├── LICENSE
├── README.md
├── STAgent_generated_report.md
├── assets
    └── images
    │   └── stagent_architecture.png
├── db
    └── chroma_squidpy_db
    │   └── chroma.sqlite3
├── environment.yml
└── src
    ├── .env
    ├── .streamlit
        └── config.toml
    ├── __pycache__
        ├── custom_class.cpython-311.pyc
        ├── graph.cpython-311.pyc
        ├── graph_anthropic.cpython-311.pyc
        ├── graph_gemini.cpython-311.pyc
        ├── prompt.cpython-311.pyc
        ├── speech_to_text.cpython-311.pyc
        ├── squidpy_rag.cpython-311.pyc
        ├── tools.cpython-311.pyc
        ├── util.cpython-311.pyc
        ├── util_anthropic.cpython-311.pyc
        └── util_gemini.cpython-311.pyc
    ├── db
        └── chroma_squidpy_db
        │   ├── chroma.sqlite3
        │   └── f219160f-16f0-4337-8401-5cefc3a7ae39
        │       ├── data_level0.bin
        │       ├── header.bin
        │       ├── length.bin
        │       └── link_lists.bin
    ├── graph.py
    ├── graph_anthropic.py
    ├── prompt.py
    ├── speech_to_text.py
    ├── squidpy_rag.py
    ├── tmp
        └── plots
        │   ├── plot_20250524_230938_643335.png
        │   ├── plot_20250524_230939_230970.png
        │   ├── plot_20250524_230939_315409.png
        │   ├── plot_20250524_230939_437124.png
        │   ├── plot_20250524_231021_978237.png
        │   ├── plot_20250524_231021_997915.png
        │   ├── plot_20250524_231022_080390.png
        │   ├── plot_20250524_231106_072022.png
        │   ├── plot_20250524_231106_527042.png
        │   ├── plot_20250524_231106_719080.png
        │   ├── plot_20250524_231106_894313.png
        │   ├── plot_20250524_231107_098543.png
        │   ├── plot_20250524_231107_271860.png
        │   ├── plot_20250524_231107_507332.png
        │   ├── plot_20250524_231107_683073.png
        │   ├── plot_20250524_231107_859878.png
        │   ├── plot_20250524_231240_336695.png
        │   ├── plot_20250524_232828_376750.png
        │   ├── plot_20250524_232828_523789.png
        │   ├── plot_20250524_232828_774850.png
        │   ├── plot_20250524_233051_701835.png
        │   ├── plot_20250524_233051_796053.png
        │   ├── plot_20250524_233653_620074.png
        │   ├── plot_20250524_233653_789226.png
        │   ├── plot_20250524_233653_961432.png
        │   ├── plot_20250524_233654_148844.png
        │   ├── plot_20250524_233654_322050.png
        │   ├── plot_20250524_233654_530819.png
        │   ├── plot_20250524_233654_693983.png
        │   ├── plot_20250524_233654_875336.png
        │   ├── plot_20250524_233655_053762.png
        │   ├── plot_20250524_234033_996956.png
        │   ├── plot_20250524_234034_131464.png
        │   └── plot_20250524_234034_262805.png
    ├── tools.py
    ├── unified_app.py
    ├── util.py
    └── util_anthropic.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Zuwan Lin, Wenbo Wang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # STAgent
  2 | 
  3 | [![GitHub stars](https://img.shields.io/github/stars/LiuLab-Bioelectronics-Harvard/STAgent)](https://github.com/LiuLab-Bioelectronics-Harvard/STAgent/stargazers)
  4 | [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
  5 | 
  6 | https://doi.org/10.1101/2025.04.01.646731
  7 | 
  8 | ## Overview
  9 | **STAgent** is a multimodal large language model (LLM)-based AI agent that automates spatial transcriptomics analysis from data to deep scientific insights. Built for end-to-end research autonomy, STAgent integrates:
 10 | 
 11 | - Advanced vision-language models
 12 | - Dynamic code generation
 13 | - Contextualized literature integration
 14 | - Structured report synthesis
 15 | 
 16 | STAgent streamlines complex spatial biology workflows, removing the need for manual programming or domain-specific expertise. This enables rapid, reproducible, and interpretable discoveries in tissue biology.
 17 | 
 18 | <img src="./assets/images/stagent_architecture.png" style="width: 600px; max-width: 100%;" alt="STAgent Architecture">
 19 | 
 20 | ## Related Work
 21 | 
 22 | STAgent is part of a broader AI agent ecosystem we developed for specialized biological data analysis. A complementary tool in this ecosystem is [SpikeAgent](https://github.com/LiuLab-Bioelectronics-Harvard/SpikeAgent.git), which focuses on neuronal electrophysiology data. While STAgent specializes in spatial transcriptomics to reveal tissue architecture and gene expression patterns, SpikeAgent automates the analysis of neuronal firing patterns and network dynamics from electrophysiological recordings.
 23 | 
 24 | SpikeAgent is described in our recent preprint: [https://www.biorxiv.org/content/10.1101/2025.02.11.637754v1](https://www.biorxiv.org/content/10.1101/2025.02.11.637754v1)
 25 | 
 26 | ## Demo
 27 | 
 28 | Check out our [demo video](https://www.youtube.com/watch?v=aEUop05RINY&t=2s) to see STAgent in action.
 29 | 
 30 | ## Features
 31 | 
 32 | ### End-to-End Automation
 33 | Transforms spatial transcriptomics data into comprehensive, publication-style research reports without human intervention. STAgent autonomously executes the full analytical pipeline from image preprocessing to biological interpretation.
 34 | 
 35 | ### Multimodal Interaction
 36 | Supports text, voice, and image-based inputs, enabling intuitive natural language interfaces for researchers with no computational background.
 37 | 
 38 | ### Autonomous Reasoning
 39 | Leverages multimodal LLMs to perform visual reasoning on tissue images, generate and execute Python analysis code, interpret spatial maps, and integrate literature insights.
 40 | 
 41 | ### Interpretable Results
 42 | Produces structured scientific reports with methods, key findings, biological implications, and citation-supported context, resembling peer-reviewed publications.
 43 | 
 44 | ### Context-Aware Gene Analysis
 45 | Performs multimodal enrichment analyses that go beyond statistical significance, focusing on biologically relevant pathways tailored to the tissue context.
 46 | 
 47 | ### Visual Reasoning Engine
 48 | Analyzes spatial maps and cell architectures directly, detecting subtle morphogenetic patterns and tissue-level changes across timepoints or conditions.
 49 | 
 50 | ### Scalable Knowledge Synthesis
 51 | Converts spatially resolved gene expression data into coherent scientific narratives, uncovering developmental programs, cellular interactions, and signaling networks.
 52 | 
 53 | ## Installation
 54 | 
 55 | ### Prerequisites
 56 | - Python 3.11
 57 | - Conda package manager
 58 | 
 59 | ### Setup Instructions
 60 | 
 61 | 1. **Clone the repository**
 62 |    ```bash
 63 |    git clone https://github.com/LiuLab-Bioelectronics-Harvard/STAgent.git
 64 |    cd STAgent
 65 |    ```
 66 | 
 67 | 2. **Install dependencies**
 68 | 
 69 |    We use conda to manage dependencies and have tested on Mac systems with Apple M2 chips.
 70 | 
 71 |    ```bash
 72 |    # Create the environment from the file
 73 |    conda env create -f environment.yml
 74 |    
 75 |    # Activate the environment
 76 |    conda activate STAgent
 77 |    ```
 78 | 
 79 | 3. **Configure environment variables**
 80 |    - modify the `.env` file (src/.env) with your own API keys:
 81 |    
 82 |    ```
 83 |    # OpenAI models (https://platform.openai.com/api-keys)
 84 |    OPENAI_API_KEY=<your-openai-api-key-here>
 85 |    WHISPER_API_KEY=<your-whisper-api-key-here>
 86 |    (Note: OPENAI_API_KEY is the same as WHISPER_API_KEY)
 87 | 
 88 |    # Claude models (https://www.anthropic.com/api)
 89 |    ANTHROPIC_API_KEY=<your-anthropic-api-key-here>
 90 | 
 91 |    # Google Scholar search via SerpAPI (https://serpapi.com/)
 92 |    SERP_API_KEY=<your-serpapi-key-here>
 93 |    ```
 94 | 💡 Important: Make sure your API accounts have sufficient balance or credits available, otherwise the agent may not function properly.
 95 | 
 96 | 4. **Set up the data folder**
 97 | 
 98 |    ```bash
 99 |    mkdir -p data
100 |    ```
101 |    
102 |    Download the .h5ad data files from [Google Drive](https://drive.google.com/drive/folders/1RqWGBhCia06-vQnqHUnid63MybQIKwFJ) and place them in the `./data` directory.
103 | 
104 | ## Usage
105 | 
106 | 1. **Launch the application**
107 |    ```bash
108 |    streamlit run src/unified_app.py
109 |    ```
110 |    The app will open in your default web browser at the local host.
111 | 
112 | 2. **Interact with the agent**
113 |    - Select a model (claude-3.7-sonnet recommended)
114 |    - You can start interacting with the agent by typing messages in the chat interface
115 |    - Example prompts you can try:
116 |    ```
117 |    "Can you help me perform an end-to-end analysis on my spatial transcriptomic datasets. Please also generate a report."
118 |    ```
119 |    - The agent will respond to your queries and can perform complex analyses based on natural language instructions
120 | 
121 | ## Project Structure
122 | 
123 | - `src/`: Contains the source code for STAgent
124 | - `data/`: Directory for storing spatial transcriptomics datasets
125 | - `src/tmp/plots/`: Contains plots generated by the agent
126 | - `conversation_histories_{model}/`: Stores conversation history classified by model
127 | 
128 | ## Example Output
129 | 
130 | When you prompt the agent to perform an end-to-end analysis, it generates a comprehensive markdown report with peer-reviewed literatures as references (one example output is "STAgent_generated_report.md"). 
131 | 
132 | <!-- Add example output images here when available -->
133 | 
134 | ## Citation
135 | If you use STAgent in your research, please cite:
136 | > *Lin, Z., *Wang, W., et al. Spatial transcriptomics AI agent charts hPSC-pancreas maturation in vivo. (2025). _bioRxiv_.
137 | > https://doi.org/10.1101/2025.04.01.646731
138 | 
139 | ## License
140 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
141 | 


--------------------------------------------------------------------------------
/STAgent_generated_report.md:
--------------------------------------------------------------------------------
  1 | # Scientific Analysis Report: Temporal Evolution of Human Pancreatic Islet Xenotransplantation
  2 | 
  3 | ## 1. Objective
  4 | 
  5 | This report aims to provide a comprehensive analysis of the temporal changes observed in human pancreatic islets transplanted into mouse kidney across three time points (Week 4, Week 16, and Week 20). The analysis focuses on characterizing cell type composition, spatial organization, and intercellular interaction patterns to understand the biological processes underlying xenograft adaptation and survival. The primary goal is to delineate the dynamic remodeling of cellular architecture that occurs during the post-transplantation period and identify key factors that may contribute to successful engraftment and function of the transplanted islets.
  6 | 
  7 | ## 2. Study Overview
  8 | 
  9 | ### Background
 10 | 
 11 | Pancreatic islet transplantation represents a promising therapeutic approach for type 1 diabetes, offering the potential for improved glycemic control and reduced dependence on exogenous insulin. However, challenges including limited islet survival post-transplantation and immune rejection have restricted its widespread clinical application. Xenotransplantation, using non-human donor islets, presents an alternative strategy to address organ shortage but faces additional immunological barriers.
 12 | 
 13 | ### Purpose
 14 | 
 15 | This study investigated the temporal evolution of human pancreatic islet xenografts in mouse kidney using spatial transcriptomics. By analyzing cellular composition, spatial organization, and cell-cell interactions at three time points (Week 4, Week 16, and Week 20), the research aimed to characterize the adaptation processes that occur during xenograft integration with host tissue.
 16 | 
 17 | ### Research Questions
 18 | 
 19 | 1. How does cellular composition of transplanted islets change over time?
 20 | 2. What spatial reorganization patterns emerge during graft adaptation?
 21 | 3. How do interactions between different cell types evolve post-transplantation?
 22 | 4. What mechanisms may contribute to graft survival and functional integration?
 23 | 
 24 | ## 3. Methods Summary
 25 | 
 26 | The analysis employed a systematic approach to characterize the xenotransplanted human pancreatic islets:
 27 | 
 28 | 1. **Dimensionality Reduction Analysis**: UMAP visualization was used to examine cell type clustering patterns across time points, revealing population-level relationships between human donor and mouse host cells.
 29 | 
 30 | 2. **Cell Type Composition Analysis**: Quantitative assessment of cell type proportions at each time point (Week 4, Week 16, Week 20) using normalized percentages, visualized through stacked bar plots and heatmaps.
 31 | 
 32 | 3. **Spatial Distribution Mapping**: Scatter plots of cell coordinates colored by cell type were generated for each tissue slice, enabling visualization of the spatial organization of different cell populations.
 33 | 
 34 | 4. **Cell-Cell Interaction Analysis**: Neighborhood enrichment analysis using spatial statistics to quantify preferential associations or avoidances between cell types, presented as heatmaps with z-score values.
 35 | 
 36 | The dataset consisted of STARmap spatial transcriptomic data from human pancreatic islets grafted on mouse kidney, with multiple slices per time point. The analysis pipeline integrated cell type identification, compositional analysis, spatial mapping, and interaction quantification to provide a comprehensive characterization of xenograft evolution.
 37 | 
 38 | ## 4. Key Findings
 39 | 
 40 | ### 4.1 Cell Type Population Dynamics
 41 | 
 42 | The UMAP visualization revealed distinct clustering of different cell populations with clear separation between human donor and mouse host cells:
 43 | 
 44 | - Human endocrine cells (alpha, beta, delta) clustered together but maintained separate identities
 45 | - Human mesenchymal and exocrine cells formed distinct clusters
 46 | - Mouse kidney cells (nephron, vascular, ureteric epithelium) clustered separately from human cells
 47 | 
 48 | Quantitative cell type composition analysis revealed significant temporal changes:
 49 | 
 50 | - **Alpha Cells**: Dramatic fluctuation in proportion - 10.7% (Week 4) → 25.6% (Week 16) → 11.9% (Week 20)
 51 | - **Beta Cells**: Progressive decline - 19.6% (Week 4) → 13.6% (Week 16) → 11.5% (Week 20)
 52 | - **Delta Cells**: Steady increase - 0.9% (Week 4) → 1.8% (Week 16) → 3.0% (Week 20)
 53 | - **Mesenchymal Cells**: Dramatic expansion - 0.1% (Week 4) → 4.4% (Week 16) → 19.1% (Week 20)
 54 | - **Enterochromaffin Cells**: Substantial decline - 12.5% (Week 4) → 1.4% (Week 16) → 1.0% (Week 20)
 55 | - **Exocrine Cells**: Fluctuation with overall increase - 3.6% (Week 4) → 2.2% (Week 16) → 8.7% (Week 20)
 56 | 
 57 | The alpha-to-beta cell ratio shifted significantly: 0.55 (Week 4) → 1.88 (Week 16) → 1.04 (Week 20), indicating dynamic remodeling of the endocrine compartment.
 58 | 
 59 | ### 4.2 Spatial Organization Patterns
 60 | 
 61 | Spatial distribution maps revealed distinct organizational patterns:
 62 | 
 63 | - **Islet-Like Structure Formation**: Human endocrine cells consistently clustered in islet-like structures across all time points
 64 | - **Cell Type Zonation**: Alpha cells frequently positioned at the periphery of islet structures, with beta cells forming the core, reflecting native islet architecture
 65 | - **Mesenchymal Expansion Pattern**: Progressive formation of a mesenchymal network surrounding islet structures, particularly evident by Week 20
 66 | - **Host-Graft Interface**: Clear boundary between human islet structures and mouse kidney tissue, with selective vascular integration
 67 | - **Exocrine Segregation**: Exocrine cells formed distinct clusters separate from islet structures, particularly in Week 20 samples
 68 | 
 69 | Notably, delta cells increasingly positioned at the interface between alpha and beta cells over time, suggesting maturation of paracrine signaling networks.
 70 | 
 71 | ### 4.3 Cell-Cell Interaction Dynamics
 72 | 
 73 | Neighborhood enrichment analysis revealed evolving interaction patterns:
 74 | 
 75 | - **Endocrine Cell Homotypic Interactions**: Strengthening over time
 76 |   - Alpha-alpha: 21.4 → 59.0 → 66.2
 77 |   - Beta-beta: 49.0 → 62.2 → 83.6
 78 |   - Delta-delta: 9.1 → 25.3 → 48.1
 79 | 
 80 | - **Mesenchymal Cell Behavior**:
 81 |   - Mesenchymal-mesenchymal: -0.1 → 48.7 → 97.0
 82 |   - Initially neutral with endocrine cells, becoming increasingly negative by Week 20
 83 |   - Strong negative association with mouse nephron cells: 0.8 → -8.7 → -38.0
 84 | 
 85 | - **Host-Graft Boundary**:
 86 |   - Increasing negative enrichment between human endocrine and mouse nephron cells
 87 |   - Alpha cells and mouse nephron: -20.2 → -56.3 → -41.9
 88 |   - Beta cells and mouse nephron: -39.8 → -47.0 → -44.3
 89 | 
 90 | - **Exocrine Cell Isolation**:
 91 |   - Exocrine-exocrine: 71.6 → 90.3 → 117.8
 92 |   - Increasingly negative associations with all other cell types
 93 | 
 94 | - **Delta Cell Integration**:
 95 |   - Increasing association with alpha cells: 5.3 → 8.9 → 15.3
 96 |   - Minimal association with beta cells across all time points
 97 | 
 98 | - **Enterochromaffin Cell Behavior**:
 99 |   - Decreasing self-association: 29.2 → 9.0 → 2.7
100 |   - Early association with alpha cells (14.6) diminishing over time (0.6 by Week 20)
101 | 
102 | These interaction patterns reveal progressive compartmentalization and specialization of cellular neighborhoods within the xenograft.
103 | 
104 | ## 5. Biological Implications
105 | 
106 | ### 5.1 Endocrine Cell Remodeling
107 | 
108 | The dynamic changes in alpha-to-beta cell ratio observed in this study reflect a significant remodeling of the endocrine compartment post-transplantation. The initial increase in alpha-to-beta ratio at Week 16, followed by normalization by Week 20, suggests a biphasic response to transplantation stress. This aligns with findings that alpha cells may be more resistant to stress during transplantation than beta cells, as documented in studies of islet transplantation outcomes ("Beta-cell function following human islet transplantation for type 1 diabetes").
109 | 
110 | The observed alpha-beta cell spatial organization, with alpha cells positioned peripherally and beta cells forming the core of islet structures, recapitulates aspects of native islet architecture. This arrangement facilitates paracrine signaling, which is critical for coordinated hormone secretion. As noted in research on islet architecture, "The pancreatic islet functions as a single organ with tightly coordinated signaling between the different cell types" ("Alpha-, delta-and PP-cells: are they the architectural cornerstones of islet structure and co-ordination?").
111 | 
112 | ### 5.2 Mesenchymal Cell Protective Function
113 | 
114 | The dramatic expansion of mesenchymal cells (0.1% → 19.1%) represents one of the most striking findings of this study. This expansion, coupled with the spatial distribution forming a network around islet structures, strongly suggests a protective role. Studies have demonstrated that mesenchymal cells can enhance islet transplantation outcomes through multiple mechanisms:
115 | 
116 | 1. Immunomodulation and prevention of inflammatory responses
117 | 2. Promotion of revascularization
118 | 3. Secretion of trophic factors that support islet cell survival
119 | 
120 | Research has shown that "MSCs have the capacity to improve the outcomes of islet transplantation in animal models of T1D" ("Protecting islet functional viability using mesenchymal stromal cells"). The observed spatial positioning of mesenchymal cells around islet structures by Week 20 likely represents an adaptive response that enhances graft survival by creating a protective microenvironment.
121 | 
122 | ### 5.3 Vascularization Dynamics
123 | 
124 | The neighborhood enrichment analysis revealed complex patterns of interaction between mouse vascular cells and human islet cells. The consistent negative enrichment scores between vascular cells and endocrine cells suggest that vascularization occurs primarily at the periphery of islet structures rather than through direct infiltration. This pattern may reflect the revascularization process described in the literature where "islet vascularization not only allows direct cellular exchanges, but also influences the characteristics and spatial arrangement of islet endocrine and immune cells" ("Vessel Network Architecture of Adult Human Islets Promotes Distinct Cell-Cell Interactions In Situ and Is Altered After Transplantation").
125 | 
126 | The increasing positive association between mouse vascular cells and mouse nephron cells indicates that host vasculature maintains its native connections while extending into the graft area. This revascularization pattern is crucial for graft survival, as noted in research showing that "neovascularization of transplanted islets is essential for their survival and function" ("Vascularization of purified pancreatic islet-like cell aggregates (pseudoislets) after syngeneic transplantation").
127 | 
128 | ### 5.4 Delta Cell Function and Integration
129 | 
130 | The steady increase in delta cells (0.9% → 3.0%) and their specific positioning at the interface between alpha and beta cells suggests an important regulatory adaptation. Delta cells secrete somatostatin, which regulates both alpha and beta cell function through paracrine signaling. Research has shown that "delta cells form synchronized networks within islets" and "delta cell filopodia allow an ~tenfold increase in potential direct interactions with beta and alpha cells" ("Structural basis for delta cell paracrine regulation in pancreatic islets").
131 | 
132 | The preferential association of delta cells with alpha cells rather than beta cells, as revealed in the neighborhood enrichment analysis, aligns with findings that delta cells may differentially regulate alpha cell function in response to metabolic changes. This strategic positioning likely contributes to the establishment of proper hormone secretion dynamics within the transplanted islets.
133 | 
134 | ### 5.5 Enterochromaffin Cell Dynamics
135 | 
136 | The substantial decrease in enterochromaffin cells (12.5% → 1.0%) represents an intriguing finding. Enterochromaffin cells are normally rare in native pancreatic islets but have been observed in stem cell-derived islets and during islet development or regeneration. Recent research indicates that "enterochromaffin cells originate from an intestinal lineage, while islet cells differentiate from a distinct pancreatic lineage" ("Single-nucleus multi-omics of human stem cell-derived islets identifies deficiencies in lineage specification").
137 | 
138 | The high initial presence followed by decline may represent a transient regenerative response that diminishes as the graft matures. This pattern could reflect cellular plasticity during the early adaptation phase, with subsequent lineage restriction as the graft stabilizes. The decline in enterochromaffin cells coincides with the normalization of endocrine cell ratios, potentially indicating maturation of the transplanted islets.
139 | 
140 | ## 6. Conclusion
141 | 
142 | ### 6.1 Major Discoveries
143 | 
144 | This comprehensive analysis of human pancreatic islet xenotransplantation revealed several key insights into the temporal evolution of cellular composition, spatial organization, and interaction patterns:
145 | 
146 | 1. The xenograft undergoes distinct adaptation phases characterized by initial stress response (Week 4), endocrine remodeling (Week 16), and subsequent stabilization (Week 20)
147 | 
148 | 2. Mesenchymal cell expansion represents a critical adaptive response that likely contributes to graft survival through the formation of a protective microenvironment
149 | 
150 | 3. Endocrine cells maintain their native architectural organization with alpha cells at the periphery and beta cells in the core, facilitating proper paracrine signaling
151 | 
152 | 4. Delta cells increase steadily and position strategically to regulate alpha and beta cell function through paracrine mechanisms
153 | 
154 | 5. Progressive compartmentalization of different cell types creates spatially defined functional domains within the graft
155 | 
156 | 6. The xenograft-host interface shows increasing definition over time, with specific patterns of exclusion and selective vascular integration
157 | 
158 | ### 6.2 Future Research Directions
159 | 
160 | Several avenues for future research emerge from these findings:
161 | 
162 | 1. Functional assessment of the transplanted islets at different time points to correlate cellular architecture with hormone secretion capacity
163 | 
164 | 2. Investigation of the specific molecular mechanisms underlying mesenchymal cell protective effects in xenotransplantation
165 | 
166 | 3. Targeted manipulation of mesenchymal cell expansion to enhance graft survival and function
167 | 
168 | 4. Exploration of strategies to accelerate vascularization while maintaining proper islet architecture
169 | 
170 | 5. Characterization of the extracellular matrix components that may contribute to spatial organization and cell type interactions
171 | 
172 | 6. Investigation of the origin and function of enterochromaffin cells in the context of islet transplantation
173 | 
174 | ### 6.3 Potential Applications
175 | 
176 | The insights gained from this study have several potential applications:
177 | 
178 | 1. Development of optimized protocols for islet transplantation that promote beneficial cellular architecture and interactions
179 | 
180 | 2. Design of bioengineered scaffolds that mimic the supportive microenvironment created by mesenchymal cells
181 | 
182 | 3. Targeted cellular therapies that combine islet cells with supportive mesenchymal populations to enhance graft outcomes
183 | 
184 | 4. Improved strategies for monitoring graft health based on cellular composition and interaction patterns
185 | 
186 | 5. Development of interventions to accelerate the transition from early stress response to stable graft architecture
187 | 
188 | In conclusion, this study provides a detailed characterization of the dynamic cellular changes that occur during human pancreatic islet xenotransplantation. The findings highlight the importance of considering not only cellular composition but also spatial organization and interaction patterns in understanding graft adaptation and survival. These insights contribute to the foundation for developing improved approaches to islet transplantation for the treatment of diabetes.
189 | 
190 | ## 7. References
191 | 
192 | - Paracrine signaling in islet function and survival
193 | - Paracrine and autocrine interactions in the human islet: more than meets the eye
194 | - Structural basis for delta cell paracrine regulation in pancreatic islets
195 | - Paracrine regulation of insulin secretion
196 | - Alpha-cell paracrine signaling in the regulation of beta-cell insulin secretion
197 | - Integrating the inputs that shape pancreatic islet hormone release
198 | - Comprehensive alpha, beta and delta cell transcriptomes reveal that ghrelin selectively activates delta cells and promotes somatostatin release from pancreatic islets
199 | - Paracrine interactions within islets of Langerhans
200 | - Cell–cell interactions in the endocrine pancreas
201 | - Protecting islet functional viability using mesenchymal stromal cells
202 | - Potential role of mesenchymal stromal cells in pancreatic islet transplantation
203 | - Mesenchymal stem cell in pancreatic islet transplantation
204 | - Human mesenchymal stem cells protect human islets from pro-inflammatory cytokines
205 | - Mesenchymal stem cells prevent acute rejection and prolong graft function in pancreatic islet transplantation
206 | - Mesenchymal stromal cells improve transplanted islet survival and islet function in a syngeneic mouse model
207 | - Cell rearrangement in transplanted human islets
208 | - Vessel Network Architecture of Adult Human Islets Promotes Distinct Cell-Cell Interactions In Situ and Is Altered After Transplantation
209 | - Vascularization of purified pancreatic islet-like cell aggregates (pseudoislets) after syngeneic transplantation
210 | - Revascularization and remodelling of pancreatic islets grafted under the kidney capsule
211 | - Bioengineering the vascularized endocrine pancreas: a fine-tuned interplay between vascularization, extracellular-matrix-based scaffold architecture, and insulin secretion
212 | - Vascular and immune interactions in islets transplantation and 3D islet models
213 | - A focus on enterochromaffin cells among the enteroendocrine cells: localization, morphology, and role
214 | - Heterogeneity of enterochromaffin cells within the gastrointestinal tract
215 | - Tissue-and cell-specific properties of enterochromaffin cells affect the fate of tumorigenesis toward nonendocrine adenocarcinoma of the small intestine
216 | - Single-nucleus multi-omics of human stem cell-derived islets identifies deficiencies in lineage specification
217 | - Beta-cell function following human islet transplantation for type 1 diabetes
218 | - Alpha-, delta-and PP-cells: are they the architectural cornerstones of islet structure and co-ordination?


--------------------------------------------------------------------------------
/assets/images/stagent_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/assets/images/stagent_architecture.png


--------------------------------------------------------------------------------
/db/chroma_squidpy_db/chroma.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/db/chroma_squidpy_db/chroma.sqlite3


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: STAgent
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.11
 7 |   - pip
 8 |   - numpy
 9 |   - pandas
10 |   - ipython
11 |   - jupyter
12 |   - matplotlib
13 |   - scikit-learn
14 |   - conda-forge::ffmpeg
15 |   - pip:
16 |     - streamlit>=1.41.1
17 |     - langchain>=0.3.11
18 |     - langchain-core>=0.3.24
19 |     - langchain-openai>=0.2.12
20 |     - langchain-community>=0.3.11
21 |     - openai>=1.57.4
22 |     - python-dotenv>=1.0.1
23 |     - streamlit-audiorec>=0.1.3
24 |     - tiktoken>=0.8.0
25 |     - plotly
26 |     - scipy
27 |     - sounddevice
28 |     - soundfile
29 |     - pydub
30 |     - langgraph
31 |     - langchain_experimental
32 |     - langchain_google_genai
33 |     - langchain_anthropic
34 |     - audio_recorder_streamlit
35 |     - scanpy
36 |     - squidpy
37 |     - google-search-results
38 |     - langchain-chroma
39 |     - esprima
40 |     - tree-sitter
41 |     - tree-sitter-languages
42 |     - protobuf<=3.20.3


--------------------------------------------------------------------------------
/src/.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=<your-openai-api-key-here>
2 | SERP_API_KEY=<your-serpapi-key-here>
3 | ANTHROPIC_API_KEY=<your-anthropic-api-key-here>
4 | WHISPER_API_KEY=<your-whisper-api-key-here>


--------------------------------------------------------------------------------
/src/.streamlit/config.toml:
--------------------------------------------------------------------------------
 1 | [theme]
 2 | # Light mode settings
 3 | base="light"
 4 | primaryColor="#1E88E5"
 5 | backgroundColor="#FFFFFF"
 6 | secondaryBackgroundColor="#F0F8FF"
 7 | textColor="#262730"
 8 | font="sans serif"
 9 | 
10 | [theme.light]
11 | # Custom light mode colors
12 | primaryColor="#1E88E5"
13 | backgroundColor="#FFFFFF"
14 | secondaryBackgroundColor="#F0F8FF"
15 | textColor="#262730"
16 | 
17 | [theme.dark]
18 | # Custom dark mode colors
19 | primaryColor="#90CAF9"
20 | backgroundColor="#0E1117"
21 | secondaryBackgroundColor="#1E1E1E"
22 | textColor="#FAFAFA" 


--------------------------------------------------------------------------------
/src/__pycache__/custom_class.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/custom_class.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/graph.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/graph.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/graph_anthropic.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/graph_anthropic.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/graph_gemini.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/graph_gemini.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/prompt.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/prompt.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/speech_to_text.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/speech_to_text.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/squidpy_rag.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/squidpy_rag.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/tools.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/tools.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/util.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/util.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/util_anthropic.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/util_anthropic.cpython-311.pyc


--------------------------------------------------------------------------------
/src/__pycache__/util_gemini.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/util_gemini.cpython-311.pyc


--------------------------------------------------------------------------------
/src/db/chroma_squidpy_db/chroma.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/chroma.sqlite3


--------------------------------------------------------------------------------
/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/header.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/header.bin


--------------------------------------------------------------------------------
/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/length.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/length.bin


--------------------------------------------------------------------------------
/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/link_lists.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/link_lists.bin


--------------------------------------------------------------------------------
/src/graph.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import base64
  3 | from datetime import datetime
  4 | import matplotlib.pyplot as plt
  5 | from typing import Annotated, TypedDict, Literal, Tuple, List
  6 | from dotenv import load_dotenv
  7 | from langchain_core.prompts import ChatPromptTemplate
  8 | from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage
  9 | from langchain_core.tools import tool
 10 | from langchain_openai import ChatOpenAI
 11 | from langgraph.graph import START, StateGraph
 12 | from langgraph.graph.message import AnyMessage, add_messages
 13 | #from langchain_experimental.utilities import PythonREPL
 14 | from tools import PythonREPL
 15 | from langgraph.prebuilt import ToolNode
 16 | from prompt import system_prompt
 17 | from langgraph.types import Command
 18 | from textwrap import dedent
 19 | import streamlit as st
 20 | from util import display_message, render_conversation_history, get_conversation_summary
 21 | from langchain_core.runnables.config import RunnableConfig
 22 | from tools import google_scholar_search, squidpy_rag_agent, visualize_cell_cell_interaction_tool, visualize_spatial_cell_type_map, visualize_cell_type_composition, visualize_umap, report_tool
 23 | # Directory Setup
 24 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots")
 25 | os.makedirs(plot_dir, exist_ok=True)
 26 | load_dotenv()
 27 | 
 28 | python_repl = PythonREPL()
 29 | 
 30 | @tool(response_format="content_and_artifact")
 31 | def python_repl_tool(query: str) -> Tuple[str, List[str]]:
 32 |     """A Python shell. Use this to execute python commands. Input should be a valid python command. 
 33 |     If you want to see the output of a value, you should print it out with `print(...)`. """
 34 |     
 35 |     plot_paths = []  # List to store file paths of generated plots
 36 |     result_parts = []  # List to store different parts of the output
 37 |     
 38 |     try:
 39 |         output = python_repl.run(query)
 40 |         if output and output.strip():
 41 |             result_parts.append(output.strip())
 42 |         
 43 |         figures = [plt.figure(i) for i in plt.get_fignums()]
 44 |         if figures:
 45 |             for fig in figures:
 46 |                 fig.set_size_inches(10, 6)  # Ensure figures are large enough
 47 |                 #fig.tight_layout()  # Prevent truncation# Generate filename
 48 |                 plot_filename = f"plot_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}.png"
 49 |                 # Create relative path
 50 |                 rel_path = os.path.join("tmp/plots", plot_filename)
 51 |                 # Convert to absolute path for saving
 52 |                 abs_path = os.path.join(os.path.dirname(__file__), rel_path)
 53 |                 
 54 |                 fig.savefig(abs_path,bbox_inches='tight')
 55 |                 plot_paths.append(rel_path)  # Store relative path
 56 |             
 57 |             plt.close("all")
 58 |             result_parts.append(f"Generated {len(plot_paths)} plot(s).")
 59 |         
 60 |         if not result_parts:  # If no output and no figures
 61 |             result_parts.append("Executed code successfully with no output. If you want to see the output of a value, you should print it out with `print(...)`.")
 62 | 
 63 |     except Exception as e:
 64 |         result_parts.append(f"Error executing code: {e}")
 65 |     
 66 |     # Join all parts of the result with newlines
 67 |     result_summary = "\n".join(result_parts)
 68 |     
 69 |     # Return both the summary and plot paths (if any)
 70 |     return result_summary, plot_paths
 71 | 
 72 | # Tools List and Node Setup
 73 | tools = [
 74 |     python_repl_tool,
 75 |     google_scholar_search,
 76 |     squidpy_rag_agent,
 77 |     visualize_cell_cell_interaction_tool,
 78 |     visualize_spatial_cell_type_map,
 79 |     visualize_cell_type_composition,
 80 |     visualize_umap,
 81 |     report_tool
 82 | ]
 83 | tool_node = ToolNode(tools)
 84 | 
 85 | # Graph Setup
 86 | class GraphsState(TypedDict):
 87 |     messages: Annotated[list[AnyMessage], add_messages]
 88 |     input_messages_len: list[int]
 89 | graph = StateGraph(GraphsState)
 90 | 
 91 | gpt_4o = ChatOpenAI(model_name="gpt-4o", temperature=0).bind_tools(tools, parallel_tool_calls=False)
 92 | 
 93 | 
 94 | models = {
 95 |     "gpt-4o": gpt_4o
 96 | }
 97 | 
 98 | def _call_model(state: GraphsState, config: RunnableConfig) -> Command[Literal["tools", "__end__"]]:
 99 |     st.session_state["final_state"]["messages"]=state["messages"]
100 |     model_name = config["configurable"].get("model", "gpt-4o")
101 |     llm = models[model_name]
102 |     previous_message_count = len(state["messages"])
103 |     state["input_messages_len"].append(previous_message_count)
104 |     render_conversation_history(state["messages"][state["input_messages_len"][-2]:state["input_messages_len"][-1]])
105 |     cur_messages_len = len(state["messages"])-state["input_messages_len"][0]  
106 |     if cur_messages_len > 200:
107 |         st.markdown(
108 |         f"""
109 |         <p style="color:blue; font-size:16px;">
110 |             Current recursion step is {cur_messages_len}. Terminated because you exceeded the limit of 200.
111 |         </p>
112 |         """,
113 |         unsafe_allow_html=True
114 |         )
115 |         st.session_state["render_last_message"] = False
116 |         return Command(
117 |         update={"messages": []},
118 |         goto="__end__",
119 |     )
120 |     last_message = state["messages"][-1]
121 | # Check if last message is a ToolMessage and has artifacts
122 |     if isinstance(last_message, ToolMessage) and hasattr(last_message, "artifact") and last_message.artifact and model_name != "gpt-3.5-turbo":
123 |         # Prepare content list with initial text
124 |         content_list = [{
125 |             "type": "text",
126 |             "text": """
127 |                 Please analyze these generated images by the code above. Your tasks are to:
128 |                 1. Examine each visualization carefully
129 |                 2. Provide a detailed description of what you observe
130 |                 3. Explain the biological implications of the observations if any.
131 |                 4. You should use google scholar to find more information to see if the literature supports your observation. 
132 |                 5. please always do multiple search queries (at least 5) to get a better understanding of the observation.
133 |                 6. After you finish your writing, please continue to the next steps according to the system instructions. unless user shows intention for interaction or you are not sure about the next step.
134 |                 7. Remember to be consistent with the user's input language. you are a multi-lingual assistant.
135 |                 8. If you don't see any plots, or the plots are not clear or crowded, please try to fix the code. if you want to see the plots then don't use plt.close"
136 |             """
137 |         }]
138 |         
139 |         # Add all PNG images to the content list
140 |         for rel_path in last_message.artifact:
141 |             if rel_path.endswith(".png"):
142 |                 # Convert relative path to absolute based on current script location
143 |                 abs_path = os.path.join(os.path.dirname(__file__), rel_path)
144 |                 if os.path.exists(abs_path):
145 |                     with open(abs_path, "rb") as image_file:
146 |                         image_data = base64.b64encode(image_file.read()).decode("utf-8")
147 |                     content_list.append({
148 |                         "type": "image_url",
149 |                         "image_url": {"url": f"data:image/png;base64,{image_data}"}
150 |                     })
151 |         
152 |         # Create a single message with all images if we found any
153 |         if len(content_list) > 1:  # Only if we have at least one image
154 |             image_message = HumanMessage(content=content_list,name="image_assistant")
155 |             state["messages"].append(image_message)
156 |             
157 |     response = llm.invoke(state["messages"])
158 |     if response.tool_calls:
159 |         return Command(
160 |         update={"messages": [response]},
161 |         goto="tools",
162 |     )
163 |     else:
164 |         st.session_state["render_last_message"] = True
165 |         return Command(
166 |         update={"messages": [response]},
167 |         goto="__end__",
168 |     )
169 | 
170 | graph.add_edge(START, "modelNode")
171 | graph.add_node("tools", tool_node)
172 | graph.add_node("modelNode", _call_model)
173 | graph.add_edge("tools", "modelNode")
174 | graph_runnable = graph.compile()
175 | def invoke_our_graph(messages,model_choose):
176 |     config = {"recursion_limit": 200, "configurable": {"model": model_choose}}
177 |     return graph_runnable.invoke({"messages": messages,"input_messages_len":[len(messages)]},config=config)
178 | 


--------------------------------------------------------------------------------
/src/graph_anthropic.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import base64
  3 | from datetime import datetime
  4 | import matplotlib.pyplot as plt
  5 | from typing import Annotated, TypedDict, Literal, Tuple, List
  6 | from dotenv import load_dotenv
  7 | from langchain_anthropic import ChatAnthropic
  8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
  9 | from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage
 10 | from langchain_core.tools import tool
 11 | from langgraph.graph import START, StateGraph
 12 | from langgraph.graph.message import AnyMessage, add_messages
 13 | #from langchain_experimental.utilities import PythonREPL
 14 | from tools import PythonREPL
 15 | from langgraph.prebuilt import ToolNode
 16 | from prompt import system_prompt
 17 | from pydantic import BaseModel, Field
 18 | from langchain_anthropic import ChatAnthropic
 19 | from langgraph.types import Command
 20 | from textwrap import dedent
 21 | import streamlit as st
 22 | from util_anthropic import display_message, render_conversation_history, get_conversation_summary
 23 | from langchain_core.runnables.config import RunnableConfig
 24 | from tools import google_scholar_search, squidpy_rag_agent, visualize_cell_cell_interaction_tool, visualize_spatial_cell_type_map, visualize_cell_type_composition, visualize_umap, report_tool
 25 | import sys
 26 | import io
 27 | # Directory Setup
 28 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots")
 29 | os.makedirs(plot_dir, exist_ok=True)
 30 | load_dotenv()
 31 | 
 32 | python_repl = PythonREPL()
 33 | 
 34 | @tool(response_format="content_and_artifact")
 35 | def python_repl_tool(query: str) -> Tuple[str, List[str]]:
 36 |     """A Python shell. Use this to execute python commands. Input should be a valid python command. 
 37 |     If you want to see the output of a value, you should print it out with `print(...)`. """
 38 |     
 39 |     plot_paths = []  # List to store file paths of generated plots
 40 |     result_parts = []  # List to store different parts of the output
 41 |     
 42 |     try:
 43 |         output = python_repl.run(query)
 44 |         if output and output.strip():
 45 |             result_parts.append(output.strip())
 46 |         
 47 |         figures = [plt.figure(i) for i in plt.get_fignums()]
 48 |         if figures:
 49 |             for fig in figures:
 50 |                 fig.set_size_inches(10, 6)  # Ensure figures are large enough
 51 |                 #fig.tight_layout()  # Prevent truncation# Generate filename
 52 |                 plot_filename = f"plot_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}.png"
 53 |                 # Create relative path
 54 |                 rel_path = os.path.join("tmp/plots", plot_filename)
 55 |                 # Convert to absolute path for saving
 56 |                 abs_path = os.path.join(os.path.dirname(__file__), rel_path)
 57 |                 
 58 |                 fig.savefig(abs_path,bbox_inches='tight')
 59 |                 plot_paths.append(rel_path)  # Store relative path
 60 |             
 61 |             plt.close("all")
 62 |             result_parts.append(f"Generated {len(plot_paths)} plot(s).")
 63 |         
 64 |         if not result_parts:  # If no output and no figures
 65 |             result_parts.append("Executed code successfully with no output. If you want to see the output of a value, you should print it out with `print(...)`.")
 66 | 
 67 |     except Exception as e:
 68 |         result_parts.append(f"Error executing code: {e}")
 69 |     
 70 |     # Join all parts of the result with newlines
 71 |     result_summary = "\n".join(result_parts)
 72 |     
 73 |     # Return both the summary and plot paths (if any)
 74 |     return result_summary, plot_paths
 75 | # Tools List and Node Setup
 76 | tools = [
 77 |     python_repl_tool,
 78 |     google_scholar_search,
 79 |     squidpy_rag_agent,
 80 |     visualize_cell_cell_interaction_tool,
 81 |     visualize_spatial_cell_type_map,
 82 |     visualize_cell_type_composition,
 83 |     visualize_umap,
 84 |     report_tool
 85 | ]
 86 | tool_node = ToolNode(tools)
 87 | 
 88 | # Graph Setup
 89 | class GraphsState(TypedDict):
 90 |     messages: Annotated[list[AnyMessage], add_messages]
 91 |     input_messages_len: list[int]
 92 | 
 93 | graph = StateGraph(GraphsState)
 94 | 
 95 | claude_3_7_sonnet_20250219 = ChatAnthropic(model_name="claude-3-7-sonnet-20250219",temperature=0,max_tokens=8000).bind_tools(tools)
 96 | claude_3_5_sonnet_20241022 = ChatAnthropic(model_name="claude-3-5-sonnet-20241022",temperature=0,max_tokens=8000).bind_tools(tools)
 97 | 
 98 | 
 99 | models = {
100 |     "claude_3_5_sonnet_20241022": claude_3_5_sonnet_20241022,
101 |     "claude_3_7_sonnet_20250219": claude_3_7_sonnet_20250219
102 | }
103 | 
104 | def _call_model(state: GraphsState, config: RunnableConfig) -> Command[Literal["tools", "__end__"]]:
105 |     st.session_state["final_state"]["messages"]=state["messages"]
106 |     model_name = config["configurable"].get("model", "claude_3_5_sonnet")
107 |     llm = models[model_name]
108 |     previous_message_count = len(state["messages"])
109 |     state["input_messages_len"].append(previous_message_count)
110 |     render_conversation_history(state["messages"][state["input_messages_len"][-2]:state["input_messages_len"][-1]])
111 |     cur_messages_len = len(state["messages"])-state["input_messages_len"][0]  
112 |     if cur_messages_len > 200:
113 |         st.markdown(
114 |         f"""
115 |         <p style="color:blue; font-size:16px;">
116 |             Current recursion step is {cur_messages_len}. Terminated because you exceeded the limit of 200.
117 |         </p>
118 |         """,
119 |         unsafe_allow_html=True
120 |         )
121 |         st.session_state["render_last_message"] = False
122 |         return Command(
123 |         update={"messages": []},
124 |         goto="__end__",
125 |     )
126 |     last_message = state["messages"][-1]
127 | # Check if last message is a ToolMessage and has artifacts
128 |     if isinstance(last_message, ToolMessage) and hasattr(last_message, "artifact") and last_message.artifact and model_name != "claude_3_5_haiku":
129 |         # Prepare content list with initial text
130 |         content_list = [{
131 |             "type": "text",
132 |             "text": """
133 |                 Please analyze these generated images by the code above. Your tasks are to:
134 |                 1. Examine each visualization carefully
135 |                 2. Provide a detailed description of what you observe
136 |                 3. Explain the biological implications of the observations if any.
137 |                 4. You should use google scholar to find more information to see if the literature supports your observation. 
138 |                 5. please always do multiple search queries (at least 5) to get a better understanding of the observation.
139 |                 6. After you finish your writing, please continue to the next steps according to the system instructions. unless user shows intention for interaction or you are not sure about the next step.
140 |                 7. Remember to be consistent with the user's input language. you are a multi-lingual assistant.
141 |                 8. If you don't see any plots, or the plots are not clear or crowded, please try to fix the code. if you want to see the plots then don't use plt.close"
142 |             """
143 |         }]
144 |         
145 |         # Add all PNG images to the content list
146 |         for rel_path in last_message.artifact:
147 |             if rel_path.endswith(".png"):
148 |                 # Convert relative path to absolute based on current script location
149 |                 abs_path = os.path.join(os.path.dirname(__file__), rel_path)
150 |                 if os.path.exists(abs_path):
151 |                     with open(abs_path, "rb") as image_file:
152 |                         image_data = base64.b64encode(image_file.read()).decode("utf-8")
153 |                     content_list.append({
154 |                         "type": "image_url",
155 |                         "image_url": {"url": f"data:image/png;base64,{image_data}"}
156 |                     })
157 |         
158 |         # Create a single message with all images if we found any
159 |         if len(content_list) > 1:  # Only if we have at least one image
160 |             image_message = HumanMessage(content=content_list,name="image_assistant")
161 |             state["messages"].append(image_message)
162 |             
163 |     response = llm.invoke(state["messages"])
164 |     if response.tool_calls:
165 |         return Command(
166 |         update={"messages": [response]},
167 |         goto="tools",
168 |     )
169 |     else:
170 |         st.session_state["render_last_message"] = True
171 |         return Command(
172 |         update={"messages": [response]},
173 |         goto="__end__",
174 |     )
175 | 
176 | graph.add_edge(START, "modelNode")
177 | graph.add_node("tools", tool_node)
178 | graph.add_node("modelNode", _call_model)
179 | graph.add_edge("tools", "modelNode")
180 | graph_runnable = graph.compile()
181 | 
182 | def invoke_our_graph(messages,model_choose):
183 |     config = {"recursion_limit": 200, "configurable": {"model": model_choose}}
184 |     return graph_runnable.invoke({"messages": messages,"input_messages_len":[len(messages)]},config=config)
185 | 


--------------------------------------------------------------------------------
/src/prompt.py:
--------------------------------------------------------------------------------
  1 | system_prompt = """
  2 | Spatial Transcriptomics AI Agent
  3 | 
  4 | This AI agent specializes in analyzing spatial transcriptomics data through a systematic pipeline.
  5 | It utilizes a set of tools to produce Python code snippets for visualization and analysis. The agent is equipped 
  6 | with tools for data exploration, visualization, and biological interpretation.
  7 | 
  8 | ---
  9 | 
 10 | Available Tools:
 11 | 1. python_repl_tool:
 12 |    - Executes Python code in a live Python shell
 13 |    - Returns printed outputs and generated visualizations
 14 |    - Input: Valid Python commands
 15 |    - Output: Execution results and plot file paths
 16 | 
 17 | 2. google_scholar_search:
 18 |    - Retrieves academic articles and summaries
 19 |    - Input: Research topic or biological query
 20 |    - Output: Article titles, authors, and summaries
 21 |    - Usage: For literature-backed information
 22 | 
 23 | 3. squidpy_rag_agent:
 24 |    - Provides guidance on Squidpy usage
 25 |    - Input: Questions about Squidpy functions
 26 |    - Output: Code examples and explanations
 27 |    - Usage: For spatial analysis workflows
 28 | 
 29 | 4. visualize_umap:
 30 |    - Creates UMAP plots for each time point
 31 |    - Input: No input required - uses default dataset
 32 |    - Output: UMAP visualizations colored by cell type
 33 |    - Shows clustering patterns of different cell populations
 34 | 
 35 | 5. visualize_cell_type_composition:
 36 |    - Shows cell type proportions across samples
 37 |    - Input: No input required - uses default dataset
 38 |    - Output: Stacked bar plots and heatmaps
 39 |    - Displays changes in cell type composition over time
 40 | 
 41 | 6. visualize_spatial_cell_type_map:
 42 |    - Creates spatial scatter plots of cell types
 43 |    - Input: No input required - uses default dataset
 44 |    - Output: Spatial distribution maps
 45 |    - Shows cell locations in tissue context
 46 | 
 47 | 7. visualize_cell_cell_interaction:
 48 |    - Analyzes cell type interaction patterns
 49 |    - Input: No input required - uses default dataset
 50 |    - Output: Neighborhood enrichment heatmaps
 51 |    - Reveals spatial relationships between cell types
 52 | 
 53 | ---
 54 | 
 55 | Pipeline Instructions:
 56 | 1. Dimensionality Reduction Visualization:
 57 |    - Use `visualize_umap` to show cell type clustering
 58 |    - Examine distribution of cell types in UMAP space
 59 | 
 60 | 2. Cell Type Composition Analysis:
 61 |    - Apply `visualize_cell_type_composition` to show proportions
 62 |    - Compare cell type changes across time points
 63 | 
 64 | 3. Spatial Distribution Analysis:
 65 |    - Use `visualize_spatial_cell_type_map` for tissue context
 66 |    - Examine spatial organization of cell types
 67 | 
 68 | 4. Cell-Cell Interaction Analysis:
 69 |    - Apply `visualize_cell_cell_interaction` for neighborhood patterns
 70 |    - Analyze spatial relationships between cell types
 71 | 
 72 | 5. Report:
 73 |    - Use `report_tool` to generate a report of the analysis
 74 |    - Input: No input required - uses default dataset
 75 |    - Output: Report of the analysis
 76 |    - Usage: For summarizing the analysis
 77 | 
 78 | ---
 79 | 
 80 | ## Data Context
 81 | - **Dataset**: Human pancreatic islets grafted on mouse kidney (STARmap spatial transcriptomic data)
 82 | - **File location**: `./data/pancreas_processed_full.h5ad`
 83 | - **Data structure**:
 84 |   - `.obs['sample_name']`: Contains timepoints (Week 4, Week 16, Week 20 post-grafting)
 85 |   - `.obs['slice_name']`: Contains slice identifiers in format "Week_X_slice_Y"
 86 | 
 87 | ---
 88 | 
 89 | ## Important Instructions:
 90 | - Always use the visualization tools to get code snippets first
 91 | - Execute the code using `python_repl_tool`
 92 | - DO NOT modify any code from the visualization tools
 93 | - If the user asks you to perform the end-to-end analysis, you should follow the pipeline order: UMAP → composition → spatial map (individual slice, id stored in .obs['slice_name']) → interaction
 94 | - If the user have specific task for you to perform, only call the related tool that the use mentioned. DO NOT call all the tools in the pipeline.
 95 | - Use `google_scholar_search` for biological interpretation after plotting the visualization
 96 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS
 97 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS
 98 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS
 99 | - Be consistent with the user's input language. you are a multi-lingual assistant.
100 | - PLEASE DO NOT CALL MULTIPLE TOOLS AT ONCE.
101 | - <<DON'T USE plt.close(), because it will close the plot window and you won't be able to see the plot>>
102 | Note: The agent can run in autonomous mode, executing all visualizations in sequence, or respond to specific analysis requests.
103 | """
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | spatial_processing_prompt = """
125 | In Squidpy, when performing spatial analysis with multiple samples in a single AnnData object, certain functions require independent processing for each sample. 
126 | This is essential to avoid spatial artifacts that can arise from pooled spatial coordinates across samples, which can lead to incorrect spatial relationships 
127 | and neighborhood structures. Here are the key `gr` (Graph) and `pl` (Plotting) functions that must be applied independently per sample, with instructions on usage:
128 | 
129 | ## Spatial Graph Functions (gr)
130 | The following functions should be run separately for each sample, rather than on pooled data, to maintain the integrity of sample-specific spatial relationships.
131 | 
132 | 1. **gr.spatial_neighbors(adata[, spatial_key, ...])**
133 |    - **Purpose**: Creates a spatial graph based on spatial coordinates.
134 |    - **Guidance**: For multiple samples, subset the AnnData object by sample and run `gr.spatial_neighbors` independently to prevent false neighborhood links across samples.
135 | 
136 | 2. **gr.nhood_enrichment(adata, cluster_key[, ...])** and **gr.co_occurrence(adata, cluster_key[, ...])**
137 |    - **Purpose**: Compute neighborhood enrichment and co-occurrence probabilities for clusters.
138 |    - **Guidance**: Apply these functions independently to each sample to capture accurate clustering and co-occurrence within each sample's spatial layout. Pooling samples can lead to artificial enrichment patterns.
139 | 
140 | 3. **gr.centrality_scores(adata, cluster_key[, ...])**
141 |    - **Purpose**: Computes centrality scores per cluster or cell type.
142 |    - **Guidance**: Calculate these scores individually per sample to reflect the spatial structure accurately within each sample's layout.
143 | 
144 | 4. **gr.interaction_matrix(adata, cluster_key[, ...])** and **gr.ligrec(adata, cluster_key[, ...])**
145 |    - **Purpose**: Compute interaction frequencies and test for ligand-receptor interactions based on spatial proximity.
146 |    - **Guidance**: For reliable cell-type interactions, run these functions per sample to ensure interactions reflect true spatial proximity within each sample.
147 | 
148 | 5. **gr.ripley(adata, cluster_key[, mode, ...])**
149 |    - **Purpose**: Calculates Ripley's statistics to assess clustering at various distances.
150 |    - **Guidance**: Ripley's clustering analysis should be applied separately to each sample, as pooling data can obscure sample-specific clustering patterns.
151 | 
152 | 6. **gr.spatial_autocorr(adata[, ...])**
153 |    - **Purpose**: Calculates global spatial autocorrelation metrics (e.g., Moran's I or Geary's C).
154 |    - **Guidance**: Autocorrelation measures spatial dependency, so compute it individually per sample to prevent cross-sample biases.
155 | 
156 | 7. **gr.mask_graph(sdata, table_key, polygon_mask)**
157 |    - **Purpose**: Masks the spatial graph based on a polygon mask.
158 |    - **Guidance**: Apply this function per sample only if each sample has a separate spatial graph. If applied to pooled data, ensure that independent graphs have already been created for each sample.
159 | 
160 | ## Plotting Functions (pl)
161 | When visualizing results, it's essential to apply the following plotting functions individually to each sample to accurately represent sample-specific spatial patterns:
162 | 
163 | 1. **pl.spatial_scatter(adata[, shape, color, ...])** VERY IMPORTANT, REMEMBER TO SPECIFY shape=None, if using STARmap spatial transcriptomic data (sq.pl.spatial_scatter(adata_sample, shape=None))
164 |    - **Purpose**: Visualizes spatial omics data with overlayed sample information.
165 |    - **Guidance**: Plot each sample independently to avoid overlapping spatial coordinates from multiple samples.
166 | 
167 | 2. **pl.spatial_segment(adata[, color, groups, ...])**
168 |    - **Purpose**: Plots spatial data with segmentation masks.
169 |    - **Guidance**: Generate segmentation plots per sample to accurately reflect spatial regions within each sample.
170 | 
171 | 3. **pl.nhood_enrichment(adata, cluster_key[, ...])**
172 |    - **Purpose**: Visualizes neighborhood enrichment.
173 |    - **Guidance**: Plot neighborhood enrichment individually for each sample to capture enrichment patterns within each sample's spatial structure.
174 | 
175 | 4. **pl.centrality_scores(adata, cluster_key[, ...])**
176 |    - **Purpose**: Plots centrality scores.
177 |    - **Guidance**: Centrality plots should be generated individually per sample to accurately represent spatial structure.
178 | 
179 | 5. **pl.interaction_matrix(adata, cluster_key[, ...])**
180 |    - **Purpose**: Plots the interaction matrix of clusters.
181 |    - **Guidance**: Visualize the interaction matrix per sample to reflect true intra-sample interaction patterns.
182 | 
183 | 6. **pl.ligrec(adata[, cluster_key, ...])**
184 |    - **Purpose**: Plots ligand-receptor interactions.
185 |    - **Guidance**: Visualize ligand-receptor interactions per sample to avoid mixing spatial proximity across samples.
186 | 
187 | 7. **pl.ripley(adata, cluster_key[, mode, ...])**
188 |    - **Purpose**: Plots Ripley's statistics for spatial clustering.
189 |    - **Guidance**: Generate Ripley's plots per sample to capture sample-specific clustering without interference from pooled data.
190 | 
191 | 8. **pl.co_occurrence(adata, cluster_key[, ...])**
192 |    - **Purpose**: Plots co-occurrence probability of clusters.
193 |    - **Guidance**: Plot per sample to reflect accurate co-occurrence within that sample.
194 | 
195 | In summary, each of these functions should be applied independently to each sample to prevent spatial artifacts and maintain sample-specific spatial integrity. 
196 | This approach ensures reliable spatial relationships within each sample, preserving the biological context in spatial analyses.
197 | """
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/src/speech_to_text.py:
--------------------------------------------------------------------------------
  1 | # speech_to_text.py
  2 | 
  3 | import base64
  4 | import os
  5 | from openai import OpenAI
  6 | from audio_recorder_streamlit import audio_recorder
  7 | import streamlit as st
  8 | from dotenv import load_dotenv
  9 | from typing import Optional
 10 | from st_audiorec import st_audiorec
 11 | # Load environment variables for API credentials
 12 | load_dotenv()
 13 | 
 14 | # Function to convert audio bytes to text using OpenAI's Whisper model
 15 | def convert_audio_to_text(audio_bytes: bytes) -> Optional[str]:
 16 |     """
 17 |     Convert audio bytes to text using OpenAI's Whisper model.
 18 |     
 19 |     Parameters:
 20 |     - audio_bytes (bytes): The audio data to convert to text.
 21 |     
 22 |     Returns:
 23 |     - str: Transcribed text if successful, None otherwise.
 24 |     """
 25 |     try:
 26 |         # Initialize OpenAI client
 27 |         client = OpenAI(
 28 |             api_key=os.getenv("WHISPER_API_KEY")
 29 |         )
 30 |         
 31 |         # Create a temporary file to store the audio bytes
 32 |         temp_filename = "temp_audio.wav"
 33 |         with open(temp_filename, "wb") as f:
 34 |             f.write(audio_bytes)
 35 |         
 36 |         # Open the temporary file and transcribe using Whisper
 37 |         with open(temp_filename, "rb") as audio_file:
 38 |             transcription = client.audio.transcriptions.create(
 39 |                 model="whisper-1",
 40 |                 file=audio_file
 41 |             )
 42 |         
 43 |         # Clean up the temporary file
 44 |         os.remove(temp_filename)
 45 |         
 46 |         return transcription.text
 47 |     
 48 |     except Exception as e:
 49 |         st.sidebar.error(f"An error occurred: {e}", icon="🚨")
 50 |         return None
 51 | 
 52 | # Function to record audio and get transcription
 53 | def input_from_mic() -> Optional[str]:
 54 |     """
 55 |     Record audio from the microphone in the Audio Options tab and convert it to text.
 56 |     """
 57 |     # Use st_audiorec within the Audio Options tab
 58 |     with st.spinner("Recording..."):
 59 |         st.session_state["audio_bytes"] = st_audiorec()
 60 | 
 61 |     # Check if audio was captured and proceed with transcription
 62 |     if st.session_state.get("audio_bytes"):
 63 |         with st.spinner("Transcribing..."):
 64 |             transcribed_text = convert_audio_to_text(st.session_state["audio_bytes"])
 65 | 
 66 |         # Display the transcribed text in the sidebar Audio Options tab
 67 |         st.write("**Transcribed Text:**")
 68 |         st.write(transcribed_text)
 69 |         
 70 |         # Clear audio bytes after processing to avoid reuse
 71 |         st.session_state["audio_bytes"] = None
 72 |         return transcribed_text
 73 |     else:
 74 |         st.write("No audio recorded.")  # Inform the user if no audio was captured
 75 |         return None
 76 | 
 77 |     
 78 | def convert_text_to_speech(text: str, filename: str = "response.wav") -> Optional[str]:
 79 |     """
 80 |     Convert text to speech using OpenAI's GPT-4o audio model and save as a WAV file.
 81 |     
 82 |     Parameters:
 83 |     - text (str): The text to convert to speech.
 84 |     - filename (str): The name of the file to save the audio output. Default is "response.wav".
 85 |     
 86 |     Returns:
 87 |     - str: Path to the saved audio file if successful, None otherwise.
 88 |     """
 89 |     prompt_text = f'''Please convert the text between <<< and >>> into speech. 
 90 |     Please be consistent with the user's input language. you are a multi-lingual assistant.
 91 |     If the text is too long to convert fully, create a summarized version. 
 92 |     Start a summarized response with: "The original response is too long; here is a summary."
 93 |     Remember: the speech output should NOT exceed 1 minute.  
 94 |     Text to convert: <<< {text} >>>'''
 95 | 
 96 |     try:
 97 |         # Initialize OpenAI client
 98 |         client = OpenAI(
 99 |             api_key=os.getenv("OPENAI_API_KEY")
100 |         )
101 |         
102 |         # Show spinner while processing
103 |         with st.spinner("Generating voice response..."):
104 |             # Prepare the API call
105 |             completion = client.chat.completions.create(
106 |                 model="gpt-4o-audio-preview",
107 |                 modalities=["text", "audio"],
108 |                 audio={"voice": "alloy", "format": "wav"},
109 |                 messages=[
110 |                     {
111 |                         "role": "user",
112 |                         "content": prompt_text
113 |                     }
114 |                 ]
115 |             )
116 |         
117 |             # Decode and save the audio file
118 |             wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
119 |             with open(filename, "wb") as f:
120 |                 f.write(wav_bytes)
121 |         
122 |         return filename
123 |     
124 |     except Exception as e:
125 |         st.sidebar.error(f"An error occurred: {e}", icon="🚨")
126 |         return None
127 | 
128 | 


--------------------------------------------------------------------------------
/src/squidpy_rag.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Annotated, TypedDict, List, Dict, Any
  3 | from dotenv import load_dotenv
  4 | from git import Repo
  5 | from langchain_core.documents import Document
  6 | from langchain_text_splitters import RecursiveCharacterTextSplitter
  7 | from langchain_text_splitters import Language
  8 | from langchain_community.document_loaders.generic import GenericLoader
  9 | from langchain_community.document_loaders.parsers import LanguageParser
 10 | from langchain_openai import OpenAIEmbeddings
 11 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 12 | from langchain_chroma import Chroma
 13 | from langchain_anthropic import ChatAnthropic
 14 | from langchain_openai import ChatOpenAI
 15 | from langgraph.graph import StateGraph, START, END
 16 | from langchain_core.messages import AnyMessage
 17 | from langchain_core.tools import tool
 18 | from langgraph.prebuilt import InjectedState
 19 | from prompt import spatial_processing_prompt
 20 | 
 21 | load_dotenv()
 22 | 
 23 | # Configuration
 24 | REPO_PATH = "./packages_available/squidpy"
 25 | PERSIST_DIRECTORY = "./db/chroma_squidpy_db"
 26 | 
 27 | # Define state for Squidpy RAG application
 28 | class SquidpyRAGState(TypedDict):
 29 |     query: str
 30 |     context: List[Document]
 31 |     answer: str
 32 |     chat_history: List[AnyMessage]
 33 | 
 34 | class SquidpyRAGTool:
 35 |     def __init__(self, model: str = "claude-3-7-sonnet-20250219"):
 36 |         self.model = model
 37 |         self.vector_store = self.setup_squidpy_index()
 38 |         self.rag_pipeline = self.create_squidpy_rag_pipeline()
 39 |     
 40 |     def setup_squidpy_index(self):
 41 |         """Setup and index the Squidpy repository for RAG if not already done."""
 42 |         
 43 |         # Clone repo if it doesn't exist
 44 |         if not os.path.exists(REPO_PATH):
 45 |             print(f"Cloning Squidpy repository to {REPO_PATH}...")
 46 |             Repo.clone_from("https://github.com/scverse/squidpy", to_path=REPO_PATH)
 47 |         
 48 |         # Initialize embeddings
 49 |         embeddings = OpenAIEmbeddings(disallowed_special=())
 50 |         
 51 |         # Load or create vector database
 52 |         if not os.path.exists(PERSIST_DIRECTORY):
 53 |             print("Creating new Squidpy vector database...")
 54 |             
 55 |             # Load Python files from the repository
 56 |             loader = GenericLoader.from_filesystem(
 57 |                 REPO_PATH,
 58 |                 glob="**/*",
 59 |                 suffixes=[".py"],
 60 |                 exclude=["**/non-utf8-encoding.py"],
 61 |                 parser=LanguageParser(language=Language.PYTHON, parser_threshold=500),
 62 |             )
 63 |             documents = loader.load()
 64 |             print(f"Loaded {len(documents)} documents from Squidpy")
 65 |             
 66 |             # Split documents into chunks
 67 |             splitter = RecursiveCharacterTextSplitter.from_language(
 68 |                 language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
 69 |             )
 70 |             texts = splitter.split_documents(documents)
 71 |             print(f"Split into {len(texts)} text chunks for Squidpy")
 72 |             
 73 |             # Create vector store
 74 |             vector_store = Chroma.from_documents(
 75 |                 documents=texts,
 76 |                 embedding=embeddings,
 77 |                 persist_directory=PERSIST_DIRECTORY
 78 |             )
 79 |             print(f"Created new Chroma database at {PERSIST_DIRECTORY}")
 80 |         else:
 81 |             # Load existing vector store
 82 |             vector_store = Chroma(
 83 |                 persist_directory=PERSIST_DIRECTORY,
 84 |                 embedding_function=embeddings
 85 |             )
 86 |             print(f"Loaded existing Chroma database from {PERSIST_DIRECTORY}")
 87 |         
 88 |         return vector_store
 89 |     
 90 |     def create_squidpy_rag_pipeline(self):
 91 |         """Create the RAG pipeline for Squidpy using LangGraph."""
 92 |         
 93 |         # Initialize the LLM
 94 |         llm = ChatAnthropic(model=self.model)
 95 |         #llm = ChatOpenAI(model="gpt-4o")
 96 |         # Define the retrieval step
 97 |         def retrieve(state: SquidpyRAGState):
 98 |             """Retrieve relevant documents based on the query."""
 99 |             squidpy_retriever = self.vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 8})
100 |             retrieved_docs = squidpy_retriever.invoke(
101 |                 state["query"]
102 |             )
103 |             return {"context": retrieved_docs}
104 |         
105 |         # Define the generation step
106 |         def generate(state: SquidpyRAGState):
107 |             """Generate an answer using the retrieved context."""
108 |             # Combine all document content
109 |             context_content = "\n\n".join(doc.page_content for doc in state["context"])
110 |             chat_history = state["chat_history"]
111 |             
112 |             # Create the prompt
113 |             prompt = ChatPromptTemplate.from_messages([
114 |                 MessagesPlaceholder("chat_history"),
115 |                 ("user", "The above are the CHAT HISTORY between the user and the spatial transcriptomics assistant. you should take into account the chat history when generating the response."),
116 |                 ("user", 
117 |                  "You are an expert in Squidpy, specializing in providing authentic Squidpy code "
118 |                  "and explanations on its usage. IMPORTANT: do not use python bracket for the code. "
119 |                  "REPEAT: do not use python bracket for the code. "
120 |                  "For each query, respond with:\n"
121 |                  "1. Squidpy code to solve the user's question.\n"
122 |                  "2. A concise explanation of the code, focusing on Squidpy-specific concepts, "
123 |                  "methods, and relevant parameters.\n\n"
124 |                  "3. REMEMBER to specify shape = None for STARmap spatial transcriptomic data.\n"
125 |                  "The following are some additional instructions:\n"
126 |                  "{spatial_processing_prompt}\n\n"
127 |                  "CONTEXT ON SQUIDPY:\n{context_content}\n\n"
128 |                 ),
129 |                 ("user", "USER QUESTION: {query}"),
130 |             ])
131 |             
132 |             # Generate messages from the prompt
133 |             messages = prompt.invoke({
134 |                 "query": state["query"], 
135 |                 "chat_history": chat_history, 
136 |                 "context_content": context_content,
137 |                 "spatial_processing_prompt": spatial_processing_prompt
138 |             })
139 |             
140 |             # Get response from LLM
141 |             response = llm.invoke(messages)
142 |             
143 |             return {"answer": response.content}
144 |         
145 |         # Build the graph
146 |         graph_builder = StateGraph(SquidpyRAGState)
147 |         graph_builder.add_node("retrieve", retrieve)
148 |         graph_builder.add_node("generate", generate)
149 |         
150 |         # Define the flow
151 |         graph_builder.add_edge(START, "retrieve")
152 |         graph_builder.add_edge("retrieve", "generate")
153 |         graph_builder.add_edge("generate", END)
154 |         
155 |         # Compile the graph
156 |         return graph_builder.compile()
157 |     
158 |     def run(self, query: str, chat_history: List[AnyMessage] = None):
159 |         """Run the Squidpy RAG pipeline with the given query and chat history."""
160 |         if chat_history is None:
161 |             chat_history = []
162 |         
163 |         response = self.rag_pipeline.invoke({
164 |             "query": query,
165 |             "chat_history": chat_history,
166 |             "context": [],  # Will be populated by the retrieve step
167 |             "answer": ""    # Will be populated by the generate step
168 |         })
169 |         
170 |         return response["answer"]
171 | 
172 | # Initialize the Squidpy RAG tool
173 | squidpy_rag = SquidpyRAGTool()
174 | 
175 | @tool
176 | def squidpy_rag_agent(state: Annotated[Dict, InjectedState], query: str) -> str:
177 |     """Tool that provides Squidpy code and explanations based on RAG.
178 |     Uses the Squidpy codebase to generate accurate Squidpy code for spatial transcriptomics analysis.
179 |     
180 |     Args:
181 |         query: The query to answer using Squidpy knowledge
182 |         
183 |     Returns:
184 |         str: Code and explanation for the Squidpy query
185 |     """
186 |     # Extract the chat history from the injected state
187 |     #chat_history = state["messages"][:-1]
188 |     chat_history = []
189 |     # Run the Squidpy RAG with the query and chat history
190 |     #example_answer = squidpy_rag.run(query, chat_history)
191 |     #final_answer = example_answer + "\n\nPlease modify the code based on the current context and use `python_repl_tool` to run the modified code above."
192 |     
193 |     return squidpy_rag.run(query, chat_history)


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_230938_643335.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230938_643335.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_230939_230970.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230939_230970.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_230939_315409.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230939_315409.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_230939_437124.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230939_437124.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231021_978237.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231021_978237.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231021_997915.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231021_997915.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231022_080390.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231022_080390.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231106_072022.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_072022.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231106_527042.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_527042.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231106_719080.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_719080.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231106_894313.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_894313.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231107_098543.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_098543.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231107_271860.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_271860.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231107_507332.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_507332.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231107_683073.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_683073.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231107_859878.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_859878.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_231240_336695.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231240_336695.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_232828_376750.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_232828_376750.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_232828_523789.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_232828_523789.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_232828_774850.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_232828_774850.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233051_701835.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233051_701835.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233051_796053.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233051_796053.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233653_620074.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233653_620074.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233653_789226.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233653_789226.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233653_961432.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233653_961432.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233654_148844.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_148844.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233654_322050.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_322050.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233654_530819.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_530819.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233654_693983.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_693983.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233654_875336.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_875336.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_233655_053762.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233655_053762.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_234033_996956.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_234033_996956.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_234034_131464.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_234034_131464.png


--------------------------------------------------------------------------------
/src/tmp/plots/plot_20250524_234034_262805.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_234034_262805.png


--------------------------------------------------------------------------------
/src/tools.py:
--------------------------------------------------------------------------------
  1 | from dotenv import load_dotenv
  2 | from langchain_core.tools import tool
  3 | from serpapi import GoogleSearch    
  4 | import os
  5 | from squidpy_rag import squidpy_rag_agent
  6 | from textwrap import dedent
  7 | from langchain_anthropic import ChatAnthropic
  8 | from langgraph.prebuilt import InjectedState
  9 | from typing import Annotated, Dict
 10 | from langchain_core.messages import HumanMessage
 11 | from dotenv import load_dotenv
 12 | from datetime import datetime
 13 | import streamlit as st
 14 | import functools
 15 | import logging
 16 | import multiprocessing
 17 | import json
 18 | import re
 19 | import sys
 20 | from io import StringIO
 21 | from typing import Dict, Optional
 22 | from pydantic import BaseModel, Field
 23 | logger = logging.getLogger(__name__)
 24 | load_dotenv()
 25 | 
 26 | # Google Scholar Tool
 27 | class GoogleScholarAPI:
 28 |     def __init__(self, serp_api_key: str = None, top_k_results: int = 40, hl: str = "en", lr: str = "lang_en"):
 29 |         self.serp_api_key = serp_api_key or os.environ.get("SERP_API_KEY")
 30 |         self.top_k_results = top_k_results
 31 |         self.hl = hl
 32 |         self.lr = lr
 33 | 
 34 |     def run(self, query: str) -> str:
 35 |         if not self.serp_api_key:
 36 |             return "API key missing for Google Scholar search."
 37 |         params = {
 38 |             "engine": "google_scholar",
 39 |             "q": query,
 40 |             "api_key": self.serp_api_key,
 41 |             "hl": self.hl,
 42 |             "lr": self.lr,
 43 |             "num": min(self.top_k_results, 40),
 44 |         }
 45 |         search = GoogleSearch(params)
 46 |         results = search.get_dict().get("organic_results", [])
 47 |         if not results:
 48 |             return "No good Google Scholar Result was found."
 49 |         return "\n\n".join([
 50 |             f"Title: {result.get('title', '')}\n"
 51 |             f"Authors: {', '.join([a.get('name') for a in result.get('publication_info', {}).get('authors', [])])}\n"
 52 |             f"Summary: {result.get('snippet', '')}\n"
 53 |             f"Link: {result.get('link', '')}"
 54 |             for result in results
 55 |         ])
 56 | 
 57 | google_scholar = GoogleScholarAPI()
 58 | 
 59 | 
 60 | @tool
 61 | def google_scholar_search(query: str) -> str:
 62 |     """Searches Google Scholar for the provided query."""
 63 |     return google_scholar.run(query)
 64 | 
 65 | 
 66 | @tool
 67 | def visualize_cell_cell_interaction_tool() -> str:
 68 |     """
 69 |     Visualizes cell-cell interaction patterns from spatial transcriptomics data.
 70 |     
 71 |     This tool analyzes and visualizes how different cell types interact with each other
 72 |     in spatial proximity using neighborhood enrichment analysis. It:
 73 |     
 74 |     1. Loads preprocessed pancreas spatial transcriptomics data
 75 |     2. For each sample in the dataset:
 76 |         - Computes spatial neighbors between cells
 77 |         - Performs neighborhood enrichment analysis based on cell types
 78 |         - Creates a heatmap visualization showing interaction patterns
 79 |         
 80 |     The visualization shows:
 81 |     - Red colors indicate cell types that are more likely to be neighbors
 82 |     - Blue colors indicate cell types that tend to avoid each other
 83 |     - Color intensity represents the strength of attraction/avoidance
 84 |     
 85 |     No input parameters are required - the tool uses a default preprocessed dataset.
 86 |     
 87 |     Note: This code should be executed using the python_repl_tool.
 88 |     """
 89 |     code = f"""
 90 |     import squidpy as sq
 91 |     import anndata as ad
 92 |     import scanpy as sc
 93 |     import seaborn as sns
 94 |     data_path = './data/pancreas_processed_full.h5ad'
 95 |     adata = ad.read_h5ad(data_path)
 96 |     # Neighborhood enrichment analysis
 97 |     id = adata.obs['slice_name'].unique()
 98 |     result_cell_type_csv = 
 99 |     # set the NaN value to 0
100 |     for sample_i in id:
101 |         data_i = adata[adata.obs['slice_name']==sample_i]
102 |         sq.gr.spatial_neighbors(data_i, coord_type="generic", spatial_key="spatial", delaunay=True)
103 |         sq.gr.nhood_enrichment(data_i, cluster_key="cell_type")
104 |         data_i.uns['cell_type_nhood_enrichment']['zscore'] = np.nan_to_num(data_i.uns['cell_type_nhood_enrichment']['zscore'])
105 |         result_cell_type_csv[sample_i] = pd.DataFrame(data_i.uns['cell_type_nhood_enrichment']['zscore'], columns=data_i.obs['cell_type'].cat.categories, index=data_i.obs['cell_type'].cat.categories)
106 | 
107 |     week_4 = ['Week_4_slice_1', 'Week_4_slice_2']
108 |     week_16 = ['Week_16_slice_1', 'Week_16_slice_2', 'Week_16_slice_3']
109 |     week_20 = ['Week_20_slice_1', 'Week_20_slice_2', 'Week_20_slice_3', 'Week_20_slice_4']
110 | 
111 |     week_4_result = 
112 |     for sample_i in week_4:
113 |         week_4_result[sample_i] = result_cell_type_csv[sample_i]
114 |     week_16_result = 
115 |     for sample_i in week_16:
116 |         week_16_result[sample_i] = result_cell_type_csv[sample_i]
117 |     week_20_result = 
118 |     for sample_i in week_20:
119 |         week_20_result[sample_i] = result_cell_type_csv[sample_i]
120 | 
121 | 
122 |     week_4_average = (week_4_result['(4, 1)'] + week_4_result['(4, 29)'])/2
123 |     week_16_average = (week_16_result['(16, 185)'] + week_16_result['(16, 215)'] + week_16_result['(16, 242)'])/3
124 |     week_20_average = (week_20_result['(20, 1)'] + week_20_result['(20, 46)'] + week_20_result['(20, 70)'] + week_20_result['(20, 160)'])/4
125 | 
126 |     enrichment_dict = 
127 |         'week_4': week_4_average,
128 |         'week_16': week_16_average,
129 |         'week_20': week_20_average
130 |     
131 | 
132 |     input_list = ['human_alpha',
133 |                 'human_beta', 
134 |                 'human_delta',
135 |                 'human_enterochromaffin',
136 |                 'human_exocrine',
137 |                 'human_mesenchymal',
138 |                 'mouse_nephron',
139 |                 'mouse_ureteric_epithelium', 
140 |                 'mouse_vascular']
141 |     
142 |     for sample_i in enrichment_dict.keys():
143 |         a = enrichment_dict[sample_i].reindex(index=input_list, columns=input_list)
144 |         ax = sns.heatmap(a, vmax=30, vmin=-30, cmap='RdBu_r', annot=True, fmt=".1f")		
145 |         ax.set_title('cell_type neighborhood enrichment of {{}}'.format(sample_i))
146 |         plt.show()
147 | 
148 |     # Please use python_repl_tool to execute the above code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE.
149 |     <<<REPEAT: PLEASE DON'T CHANGE THE CODE. AND YOU SHOULD USE THE `python_repl_tool` TO EXECUTE THE CODE. However, you can fix the bug>>>
150 |     """
151 |     return dedent(code)
152 | 
153 | 
154 | @tool
155 | def visualize_spatial_cell_type_map() -> str:
156 |     """
157 |     Visualizes spatial distribution of cell types across different slices.
158 |     
159 |     This tool creates spatial scatter plots showing the location and identity
160 |     of different cell types in the tissue space. It:
161 |     
162 |     1. Loads preprocessed pancreas spatial transcriptomics data
163 |     2. For each slice (Week_4_slice_1, Week_4_slice_2, Week_16_slice_1, etc.):
164 |         - Creates a spatial plot where each point represents a cell
165 |         - Colors points based on cell type identity
166 |         - Displays the spatial organization of different cell populations
167 |         
168 |     The visualization shows:
169 |     - Each dot represents a single cell
170 |     - Colors indicate different cell types
171 |     - Spatial coordinates preserve the original tissue structure
172 |     - Separate plots for each slice for detailed spatial comparison
173 |     
174 |     No input parameters are required - the tool uses a default preprocessed dataset.
175 |     
176 |     Note: This code should be executed using the python_repl_tool.
177 |     """
178 | 
179 |     code = f"""
180 |         import scanpy as sc
181 |         import squidpy as sq
182 |         import anndata as ad
183 |         import numpy as np
184 |         import pandas as pd
185 |         import matplotlib.pyplot as plt
186 |         import os
187 |         # Load data
188 |         data_path = './data/pancreas_processed_full.h5ad'
189 |         adata = ad.read_h5ad(data_path)
190 |         # Define color dictionary for cell types
191 |         cell_type_color_dict = 
192 |             'human_enterochromaffin': '#fdbf6e',
193 |             'human_alpha': '#34a048',
194 |             'human_beta': '#f69999', 
195 |             'human_delta': '#e21f26',
196 |             'human_exocrine': '#2078b4',
197 |             'human_mesenchymal': '#b4d88a',
198 |             'mouse_vascular': '#f57f20',
199 |             'mouse_nephron': '#6b3e98',
200 |             'mouse_ureteric_epithelium': '#fbf49c'
201 |         
202 |         # Get slice names
203 |         slice_names = sorted(adata.obs['slice_name'].unique().tolist())
204 |         # Plot spatial distribution for each slice
205 |         for slice_id in slice_names:
206 |             adata_slice = adata[adata.obs['slice_name'] == slice_id].copy()
207 |             fig, ax = plt.subplots(figsize=(10, 8))
208 |             # Get unique cell types in this slice
209 |             cell_types_in_slice = adata_slice.obs['cell_type'].unique()
210 |             # Plot each cell type with its color
211 |             for cell_type in cell_types_in_slice:
212 |                 cells = adata_slice[adata_slice.obs['cell_type'] == cell_type]
213 |                 ax.scatter(
214 |                     cells.obsm['spatial'][:, 0],
215 |                     cells.obsm['spatial'][:, 1],
216 |                     c=cell_type_color_dict[cell_type],
217 |                     label=cell_type,
218 |                     s=20,
219 |                     alpha=0.8
220 |                 )
221 |             ax.set_title(f'Spatial Cell Type Distribution - {{slice_id}}')
222 |             ax.set_xlabel('Spatial X')
223 |             ax.set_ylabel('Spatial Y')
224 |             ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
225 |             plt.tight_layout()
226 |             plt.show()
227 |     # Please use python_repl_tool to execute this code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE.
228 |     <<<REPEAT: PLEASE DON'T CHANGE THE CODE. AND YOU SHOULD USE THE `python_repl_tool` TO EXECUTE THE CODE. But you can fix the bugs>>>
229 |     """
230 |     return dedent(code)
231 | 
232 | 
233 | @tool
234 | def visualize_cell_type_composition() -> str:
235 |     """
236 |     Visualizes cell type composition changes across different time points.
237 |     
238 |     This tool creates both stacked bar plots and heatmaps to show how cell type
239 |     proportions change over time. It:
240 |     
241 |     1. Loads preprocessed pancreas spatial transcriptomics data
242 |     2. Calculates cell type proportions for each sample
243 |     3. Creates two complementary visualizations:
244 |         - Stacked bar plot showing relative proportions
245 |         - Heatmap showing exact percentage values
246 |         
247 |     The visualizations show:
248 |     - Relative abundance of each cell type per sample
249 |     - Changes in cell type composition across time points
250 |     - Exact percentage values for each cell type
251 |     
252 |     No input parameters are required - the tool uses a default preprocessed dataset.
253 |     
254 |     Note: This code should be executed using the python_repl_tool.
255 |     """
256 |     code = f"""
257 |     import matplotlib.pyplot as plt
258 |     import pandas as pd
259 |     import seaborn as sns
260 |     import squidpy as sq
261 |     import anndata as ad
262 |     import scanpy as sc
263 |     # Load the data
264 |     data_path = './data/pancreas_processed_full.h5ad'
265 |     adata = ad.read_h5ad(data_path)
266 |     # Calculate cell type composition for each sample
267 |     composition_df = pd.crosstab(
268 |     adata.obs['sample_name'], 
269 |     adata.obs['cell_type'], 
270 |     normalize='index'  # This gives proportions instead of raw counts
271 |     ) * 100  # Convert to percentages
272 | 
273 |     plt.figure(figsize=(12, 6))
274 |     composition_df.plot(kind='bar', stacked=True)
275 |     plt.title('Cell Type Composition Across Samples')
276 |     plt.xlabel('Sample')
277 |     plt.ylabel('Percentage of Cells')
278 |     plt.legend(title='Cell Type', bbox_to_anchor=(1.05, 1), loc='upper left')
279 |     plt.tight_layout()
280 |     plt.xticks(rotation=45)
281 |     plt.show()
282 | 
283 | 
284 |     print("Cell type composition (%):")
285 |     print(composition_df.round(2))
286 |     plt.figure(figsize=(10, 6))
287 |     sns.heatmap(composition_df, annot=True, fmt='.1f', cmap='YlOrRd')
288 |     plt.title('Human Cell Type Composition Heatmap')
289 |     plt.ylabel('Sample')
290 |     plt.xlabel('Cell Type')
291 |     plt.tight_layout()
292 |     plt.show()
293 |     
294 |     # Please use python_repl_tool to execute this code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE.
295 |     <<<REPEAT: PLEASE DON'T CHANGE THE CODE. AND YOU SHOULD USE THE `python_repl_tool` TO EXECUTE THE CODE.>>>
296 |     <<<DO NOT CHANGE ANY OF THE CODE FROM THE OUTPUT OF THE `visualize_cell_type_composition`>>>
297 |     """
298 |     return dedent(code)
299 | 
300 | 
301 | @tool
302 | def visualize_umap() -> str:
303 |     """
304 |     Visualizes UMAP plots for cell types across different time points.
305 |     
306 |     This tool creates UMAP visualizations code showing the distribution of cell types
307 |     in reduced dimensional space. It:
308 |     
309 |     1. Loads preprocessed pancreas spatial transcriptomics data
310 |     2. For each sample (Week 4, Week 16, Week 20):
311 |         - Creates a UMAP plot where each point represents a cell
312 |         - Colors points based on cell type identity
313 |         - Shows cell type clustering patterns
314 |         
315 |     The visualization shows:
316 |     - Each dot represents a single cell
317 |     - Colors indicate different cell types
318 |     - Clustering patterns reveal relationships between cell types
319 |     - Separate plots for each time point for temporal comparison
320 |     
321 |     No input parameters are required - the tool uses a default preprocessed dataset.
322 |     
323 |     Note: This code should be executed using the python_repl_tool.
324 |     """
325 |     code = f"""
326 |     import squidpy as sq
327 |     import anndata as ad
328 |     import scanpy as sc
329 | 
330 |     # Load the data
331 |     data_path = './data/pancreas_processed_full.h5ad'
332 |     adata = ad.read_h5ad(data_path)
333 | 
334 |     cell_type_color_dict =  
335 |         'human_enterochromaffin': '#fdbf6e',
336 |         'human_alpha': '#34a048',
337 |         'human_beta': '#f69999',
338 |         'human_delta': '#e21f26',
339 |         'human_exocrine': '#2078b4',
340 |         'human_mesenchymal': '#b4d88a',
341 |         'mouse_vascular': '#f57f20',
342 |         'mouse_nephron': '#6b3e98',
343 |         'mouse_ureteric_epithelium': '#fbf49c'
344 |     
345 |     # Plot the UMAP for the whole smaple     
346 |     sc.pl.umap(
347 |         adata,
348 |         color='cell_type',  # Replace with your cell type annotation key
349 |         title=f'umap for all samples',
350 |         legend_loc='on data',
351 |         legend_fontsize="small",
352 |         legend_fontoutline=2, 
353 |         palette=cell_type_color_dict
354 |     )
355 |     # Iterate over each sample in the AnnData object
356 |     for sample_id in ['Week_4', 'Week_16', 'Week_20']:
357 |         # Subset the AnnData object for the current sample
358 |         adata_sample = adata[adata.obs['sample_name'] == sample_id]
359 |         # Plot umap for the specific sample
360 |         sc.pl.umap(
361 |             adata_sample,
362 |             color='cell_type',  # Replace with your cell type annotation key
363 |             title=f'umap for sample {{sample_id}}',
364 |             legend_loc='on data',
365 |             legend_fontsize="small",
366 |             legend_fontoutline=2, 
367 |             palette=cell_type_color_dict
368 |         )
369 |     # Please use python_repl_tool to execute this code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE.
370 |     <<<REPEAT: PLEASE DON'T CHANGE THE CODE. AND YOU SHOULD USE THE `python_repl_tool` TO EXECUTE THE CODE. But you can fix the bugs>>>
371 |     """
372 |     return dedent(code)
373 | 
374 | 
375 | @tool
376 | def report_tool(state: Annotated[Dict, InjectedState], query: str) -> str:
377 |     """Generates a comprehensive scientific report based on the conversation history.
378 |     
379 |     This tool takes the entire conversation history and generates a well-structured scientific report
380 |     in academic paper format, covering the analysis performed and insights gathered from the spatial
381 |     transcriptomics data. The report includes sections like Abstract, Introduction, Methods, Results,
382 |     Discussion, Conclusion, and References.
383 |     
384 |     The tool saves the report as a PDF file.
385 |     
386 |     Args:
387 |         state: The current conversation state containing message history
388 |         query: Additional context or specific requirements for the report (optional)
389 |         
390 |     Returns:
391 |         str: Confirmation message with the path to the saved PDF file
392 |     """
393 | 
394 |     # Extract the chat history from the injected state
395 |     chat_history = state["messages"]
396 | 
397 | 
398 | 
399 |     report_prompt = """
400 |     # Scientific Analysis Report
401 | 
402 |     <objective>
403 |     Generate a comprehensive scientific report (minimum 1000 words) based on the conversation history above. The report should be specific and avoid general statements. All analysis should be based on data presented in the conversation.
404 |     </objective>
405 | 
406 |     <report_structure>
407 |     ## 1. Objective
408 |     - Clear statement of the research goals
409 |     - Overview of what the report aims to address
410 | 
411 |     ## 2. Study Overview
412 |     - Background on the research topic
413 |     - Purpose of the study
414 |     - Key research questions being investigated
415 | 
416 |     ## 3. Methods Summary
417 |     - Description of analysis techniques employed
418 |     - Outline of data processing approaches used
419 | 
420 |     ## 4. Key Findings
421 |     - Detailed results from each visualization/analysis in the conversation
422 |     - Specific observations with quantitative data where available
423 |     - Identification of significant patterns or trends
424 | 
425 |     ## 5. Biological Implications
426 |     - Interpretation of the biological significance of findings
427 |     - Integration with existing literature (include inline citations)
428 |     - Discussion of broader impacts and relevance
429 | 
430 |     ## 6. Conclusion
431 |     - Summary of major discoveries
432 |     - Future research directions
433 |     - Potential applications
434 | 
435 |     ## 7. References
436 |     - Relevant citations from literature searches
437 |     - Format: Title only (NO author names or years or URL)
438 |     </report_structure>
439 | 
440 |     <important_instructions>
441 |     1. OUTPUT ONLY THE REPORT CONTENT, NO OTHER TEXT
442 |     2. Use specific data-driven insights rather than general statements
443 |     3. Maintain scientific tone throughout
444 |     4. Include inline citations where appropriate
445 |     5. Do not assume conclusions not supported by the data
446 |     6. Be consistent with the user's input language. you are a multi-lingual assistant.
447 |     FORMAT: THE REPORT SHOULD BE IN MARKDOWN FORMAT.
448 |     </important_instructions>
449 |     """
450 |     
451 |     # Generate the report
452 |     ins = chat_history[:-1] + [HumanMessage(content=report_prompt, name="report_tool")]
453 |     st.write(ins)
454 |     llm = ChatAnthropic(model="claude-3-7-sonnet-20250219",max_tokens=8000)
455 |     report = llm.invoke(ins)
456 |     try:
457 |         # Save as markdown file
458 |         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
459 |         os.makedirs('output_report', exist_ok=True)
460 |         md_filename = f'./output_report/spatial_transcriptomics_report_{timestamp}.md'
461 |         
462 |         with open(md_filename, 'w', encoding='utf-8') as f:
463 |             f.write(report.content)
464 |         return f"Report has been saved as markdown file: {md_filename}"
465 |         
466 |     except Exception as e:
467 |         return f"Error saving markdown file: {str(e)}"
468 | 
469 | @functools.lru_cache(maxsize=None)
470 | def warn_once() -> None:
471 |     """Warn once about the dangers of PythonREPL."""
472 |     logger.warning("Python REPL can execute arbitrary code. Use with caution.")
473 | 
474 | 
475 | class PythonREPL(BaseModel):
476 |     """Simulates a standalone Python REPL."""
477 | 
478 |     globals: Optional[Dict] = Field(default_factory=dict, alias="_globals")  # type: ignore[arg-type]
479 |     locals: Optional[Dict] = None  # type: ignore[arg-type]
480 | 
481 |     @staticmethod
482 |     def sanitize_input(query: str) -> str:
483 |         """Sanitize input to the python REPL.
484 | 
485 |         Remove whitespace, backtick & python
486 |         (if llm mistakes python console as terminal)
487 | 
488 |         Args:
489 |             query: The query to sanitize
490 | 
491 |         Returns:
492 |             str: The sanitized query
493 |         """
494 |         query = re.sub(r"^(\s|`)*(?i:python)?\s*", "", query)
495 |         query = re.sub(r"(\s|`)*$", "", query)
496 |         return query
497 | 
498 |     @classmethod
499 |     def worker(
500 |         cls,
501 |         command: str,
502 |         globals: Optional[Dict],
503 |         locals: Optional[Dict],
504 |         queue: multiprocessing.Queue,
505 |     ) -> None:
506 |         old_stdout = sys.stdout
507 |         sys.stdout = mystdout = StringIO()
508 |         try:
509 |             cleaned_command = cls.sanitize_input(command)
510 |             exec(cleaned_command, globals, locals)
511 |             sys.stdout = old_stdout
512 |             queue.put(mystdout.getvalue())
513 |         except Exception as e:
514 |             sys.stdout = old_stdout
515 |             queue.put(repr(e))
516 | 
517 |     def run(self, command: str, timeout: Optional[int] = None) -> str:
518 |         """Run command with own globals/locals and returns anything printed.
519 |         Timeout after the specified number of seconds."""
520 | 
521 |         # Warn against dangers of PythonREPL
522 |         warn_once()
523 | 
524 |         queue: multiprocessing.Queue = multiprocessing.Queue()
525 | 
526 |         # Only use multiprocessing if we are enforcing a timeout
527 |         if timeout is not None:
528 |             # create a Process
529 |             p = multiprocessing.Process(
530 |                 target=self.worker, args=(command, self.globals, self.locals, queue)
531 |             )
532 | 
533 |             # start it
534 |             p.start()
535 | 
536 |             # wait for the process to finish or kill it after timeout seconds
537 |             p.join(timeout)
538 | 
539 |             if p.is_alive():
540 |                 p.terminate()
541 |                 return "Execution timed out"
542 |         else:
543 |             self.worker(command, self.globals, self.locals, queue)
544 |         # get the result from the worker function
545 |         return queue.get()
546 | 
547 | 


--------------------------------------------------------------------------------
/src/unified_app.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import os
  4 | import streamlit as st
  5 | from dotenv import load_dotenv
  6 | from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, BaseMessage, SystemMessage
  7 | from graph import invoke_our_graph as invoke_gpt_graph
  8 | from graph_anthropic import invoke_our_graph as invoke_anthropic_graph
  9 | from util import display_message as display_message_gpt, render_conversation_history as render_conversation_history_gpt, get_conversation_summary as get_conversation_summary_gpt
 10 | from util_anthropic import display_message as display_message_anthropic, render_conversation_history as render_conversation_history_anthropic, get_conversation_summary as get_conversation_summary_anthropic
 11 | from speech_to_text import input_from_mic, convert_text_to_speech
 12 | from datetime import datetime
 13 | from prompt import system_prompt
 14 | 
 15 | # Load environment variables
 16 | load_dotenv()
 17 | 
 18 | # Initialize session state if not present
 19 | if "page" not in st.session_state:
 20 |     st.session_state["page"] = "OpenAI"
 21 | 
 22 | if "final_state" not in st.session_state:
 23 |     st.session_state["final_state"] = {
 24 |         "messages": [SystemMessage(content=system_prompt)]
 25 |     }
 26 | if "audio_transcription" not in st.session_state:
 27 |     st.session_state["audio_transcription"] = None
 28 | 
 29 | # Add custom CSS with theme-aware styling
 30 | st.markdown("""
 31 | <style>
 32 |     /* Custom styling for the main title */
 33 |     .main-title {
 34 |         text-align: center;
 35 |         color: #FF5722;
 36 |         padding: 1rem 0;
 37 |         border-bottom: 2px solid #FF5722;
 38 |         margin-bottom: 2rem;
 39 |         font-weight: 700;
 40 |         text-shadow: 1px 1px 3px rgba(0,0,0,0.1);
 41 |     }
 42 |     
 43 |     /* Provider selection styling */
 44 |     .provider-section {
 45 |         background-color: var(--secondary-background-color);
 46 |         padding: 1.5rem;
 47 |         border-radius: 12px;
 48 |         margin-bottom: 1.5rem;
 49 |         box-shadow: 0 4px 8px rgba(0,0,0,0.05);
 50 |     }
 51 |     
 52 |     /* Button styling */
 53 |     .stButton > button {
 54 |         width: 100%;
 55 |         border-radius: 25px;
 56 |         background-color: #FF5722;
 57 |         color: white;
 58 |         font-weight: 600;
 59 |         padding: 0.75rem;
 60 |         transition: all 0.3s ease;
 61 |     }
 62 |     
 63 |     .new-chat-button > button {
 64 |         background-color: #2196F3 !important;
 65 |         margin: 1rem 0;
 66 |     }
 67 |     
 68 |     /* Tabs styling */
 69 |     .stTabs [data-baseweb="tab-list"] {
 70 |         gap: 10px;
 71 |     }
 72 |     
 73 |     .stTabs [data-baseweb="tab"] {
 74 |         border-radius: 8px 8px 0px 0px;
 75 |         padding: 8px 16px;
 76 |     }
 77 |     
 78 |     /* Chat message styling */
 79 |     .chat-message {
 80 |         padding: 1.75rem;
 81 |         border-radius: 12px;
 82 |         margin: 1.25rem 0;
 83 |         box-shadow: 0 4px 12px rgba(0,0,0,0.08);
 84 |     }
 85 |     
 86 |     .user-message {
 87 |         background-color: #FBE9E7;
 88 |         border-left: 4px solid #FF5722;
 89 |     }
 90 |     
 91 |     .ai-message {
 92 |         background-color: #E8F5E9;
 93 |         border-left: 4px solid #2196F3;
 94 |     }
 95 |     
 96 |     /* Form styling */
 97 |     .stForm {
 98 |         background-color: var(--background-color);
 99 |         padding: 1.75rem;
100 |         border-radius: 12px;
101 |         box-shadow: 0 4px 8px rgba(0,0,0,0.08);
102 |     }
103 |     
104 |     /* Image upload area styling */
105 |     [data-testid="stFileUploader"] {
106 |         background-color: var(--background-color);
107 |         padding: 1.25rem;
108 |         border-radius: 12px;
109 |         border: 2px dashed #FF5722;
110 |     }
111 |     
112 |     /* Dark mode styles */
113 |     @media (prefers-color-scheme: dark) {
114 |         .main-title {
115 |             color: #FFAB91;
116 |             border-bottom-color: #FFAB91;
117 |         }
118 |         
119 |         .provider-section {
120 |             background-color: #1E1E1E;
121 |         }
122 |         
123 |         .user-message {
124 |             background-color: #3E2723;
125 |             border-left: 4px solid #FFAB91;
126 |         }
127 |         
128 |         .ai-message {
129 |             background-color: #1A237E;
130 |             border-left: 4px solid #90CAF9;
131 |         }
132 |     }
133 |     
134 |     /* Input field styling */
135 |     .stTextInput > div > div > input {
136 |         border-radius: 25px;
137 |         padding: 12px 24px;
138 |         border: 2px solid #FF5722;
139 |         font-size: 16px;
140 |     }
141 |     
142 |     /* Submit button hover effect */
143 |     .stButton > button:hover {
144 |         transform: translateY(-3px);
145 |         box-shadow: 0 6px 18px rgba(0,0,0,0.15);
146 |         transition: all 0.3s ease;
147 |         background-color: #E64A19;
148 |     }
149 |     
150 |     /* Tab hover effect */
151 |     .stTabs [data-baseweb="tab"]:hover {
152 |         background-color: #FBE9E7;
153 |         transition: background-color 0.3s ease;
154 |     }
155 | 
156 |     /* API key setup styling */
157 |     .api-key-setup {
158 |         background-color: var(--secondary-background-color);
159 |         padding: 1.25rem;
160 |         border-radius: 12px;
161 |         margin: 1.25rem 0;
162 |         border: 1px solid #FF5722;
163 |     }
164 | 
165 |     /* Audio instructions styling */
166 |     .audio-instructions {
167 |         background-color: var(--secondary-background-color);
168 |         padding: 14px;
169 |         border-radius: 10px;
170 |         margin-bottom: 14px;
171 |         border: 1px solid #FF5722;
172 |     }
173 |     
174 |     /* Main chat interface title styling */
175 |     .chat-title {
176 |         display: flex;
177 |         align-items: center;
178 |         gap: 12px;
179 |         padding: 18px;
180 |         background: linear-gradient(90deg, #FBE9E7, transparent);
181 |         border-radius: 12px;
182 |         margin-bottom: 24px;
183 |     }
184 |     
185 |     .robot-icon {
186 |         font-size: 28px;
187 |         animation: pulse 2s infinite;
188 |     }
189 |     
190 |     @keyframes pulse {
191 |         0% { transform: scale(1); }
192 |         50% { transform: scale(1.2); }
193 |         100% { transform: scale(1); }
194 |     }
195 |     
196 |     .provider-name {
197 |         color: #FF5722;
198 |         font-weight: bold;
199 |         font-size: 18px;
200 |     }
201 | </style>
202 | """, unsafe_allow_html=True)
203 | 
204 | # Set up Streamlit layout
205 | st.markdown('<h1 class="main-title">🤖 Spatial Transcriptomics Agent</h1>', unsafe_allow_html=True)
206 | 
207 | # Navigation in sidebar with improved styling
208 | st.sidebar.markdown('<div class="provider-section">', unsafe_allow_html=True)
209 | st.sidebar.title("🎯 Navigation")
210 | 
211 | PROVIDER_CONFIGS = {
212 |     "Anthropic": {
213 |         "icon": "🟣(Recommended)",
214 |         "color": "#FF5722",
215 |         "hover_color": "#E64A19"
216 |     },
217 |     "OpenAI": {
218 |         "icon": "🟢",
219 |         "color": "#2196F3",
220 |         "hover_color": "#1976D2"
221 |     }
222 | }
223 | 
224 | # Then update the provider selection
225 | provider_options = [f"{PROVIDER_CONFIGS[p]['icon']} {p}" for p in ["Anthropic", "OpenAI"]]
226 | selected = st.sidebar.radio("Select LLM Provider Family", provider_options)
227 | page = selected.split(" ")[1]  # Extract provider name without emoji
228 | st.session_state["page"] = page
229 | 
230 | # Set provider-specific functions and variables
231 | if page == "OpenAI":
232 |     HISTORY_DIR = "conversation_histories_gpt"
233 |     invoke_graph = invoke_gpt_graph
234 |     display_message = display_message_gpt
235 |     render_conversation_history = render_conversation_history_gpt
236 |     get_conversation_summary = get_conversation_summary_gpt
237 |     available_models = ["gpt-4o"]
238 | else:  # Anthropic
239 |     HISTORY_DIR = "conversation_histories_anthropic"
240 |     invoke_graph = invoke_anthropic_graph
241 |     display_message = display_message_anthropic
242 |     render_conversation_history = render_conversation_history_anthropic
243 |     get_conversation_summary = get_conversation_summary_anthropic
244 |     available_models = [
245 |         "claude_3_7_sonnet_20250219",
246 |         "claude_3_5_sonnet_20241022"
247 |     ]
248 | 
249 | # Add model selection with improved styling
250 | selected_model = st.sidebar.selectbox(f"🔧 Select {page} Model:", available_models, index=0)
251 | 
252 | # Add New Chat button with custom styling
253 | st.sidebar.markdown('<div class="new-chat-button">', unsafe_allow_html=True)
254 | if st.sidebar.button("🔄 Start New Chat"):
255 |     st.session_state["final_state"] = {
256 |         "messages": [SystemMessage(content=system_prompt)]
257 |     }
258 |     st.session_state["last_summary_point"] = 0
259 |     st.session_state["last_summary_title"] = "Default Title"
260 |     st.session_state["last_summary_summary"] = "This is the default summary for short conversations."
261 |     st.rerun()
262 | st.sidebar.markdown('</div>', unsafe_allow_html=True)
263 | st.sidebar.markdown('</div>', unsafe_allow_html=True)
264 | 
265 | # Set up environment for API keys
266 | if page == "OpenAI" and not os.getenv('OPENAI_API_KEY'):
267 |     st.sidebar.markdown("""
268 |         <div class="api-key-setup">
269 |             <h3>🔑 OpenAI API Key Setup</h3>
270 |         </div>
271 |     """, unsafe_allow_html=True)
272 |     api_key = st.sidebar.text_input(label="OpenAI API Key", type="password", label_visibility="collapsed")
273 |     os.environ["OPENAI_API_KEY"] = api_key
274 |     if not api_key:
275 |         st.info("Please enter your OpenAI API Key in the sidebar.")
276 |         st.stop()
277 | elif page == "Anthropic" and not os.getenv('ANTHROPIC_API_KEY'):
278 |     st.sidebar.header("Anthropic API Key Setup")
279 |     api_key = st.sidebar.text_input(label="Anthropic API Key", type="password", label_visibility="collapsed")
280 |     os.environ["ANTHROPIC_API_KEY"] = api_key
281 |     if not api_key:
282 |         st.info("Please enter your Anthropic API Key in the sidebar.")
283 |         st.stop()
284 | 
285 | os.makedirs(HISTORY_DIR, exist_ok=True)
286 | 
287 | # Helper Functions for Conversation Management
288 | def save_history(title: str, summary: str):
289 |     """Save the current conversation history to a file with title and summary."""
290 |     history_data = {
291 |         "title": title,
292 |         "summary": summary,
293 |         "timestamp": datetime.now().isoformat(),
294 |         "messages": messages_to_dicts(st.session_state["final_state"]["messages"])
295 |     }
296 |     filename = f"{HISTORY_DIR}/{title.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
297 |     with open(filename, "w") as f:
298 |         json.dump(history_data, f)
299 |     st.rerun()
300 | 
301 | def load_all_histories():
302 |     """Load all saved conversation histories as a list of metadata for display."""
303 |     histories = []
304 |     for file in os.listdir(HISTORY_DIR):
305 |         if file.endswith(".json"):
306 |             with open(os.path.join(HISTORY_DIR, file), "r") as f:
307 |                 history = json.load(f)
308 |                 histories.append({
309 |                     "title": history["title"],
310 |                     "summary": history["summary"],
311 |                     "timestamp": history["timestamp"],
312 |                     "filename": file
313 |                 })
314 |     return sorted(histories, key=lambda x: x["timestamp"], reverse=True)
315 | 
316 | def load_history(filename: str):
317 |     """Load a specific conversation history file into session state."""
318 |     try:
319 |         with open(os.path.join(HISTORY_DIR, filename), "r") as f:
320 |             history_data = json.load(f)
321 |             st.session_state["final_state"]["messages"] = dicts_to_messages(history_data["messages"])
322 |         st.sidebar.success(f"Conversation '{history_data['title']}' loaded successfully")
323 |     except FileNotFoundError:
324 |         st.sidebar.error("Conversation history not found.")
325 | 
326 | def delete_history(filename: str):
327 |     """Delete a specific conversation history file."""
328 |     os.remove(os.path.join(HISTORY_DIR, filename))
329 |     st.sidebar.success("Conversation history deleted.")
330 |     st.rerun()
331 | 
332 | # Convert messages to serializable dictionaries and vice versa
333 | def messages_to_dicts(messages):
334 |     return [msg.dict() for msg in messages]
335 | 
336 | def dicts_to_messages(dicts):
337 |     reconstructed_messages = []
338 |     for d in dicts:
339 |         if d["type"] == "ai":
340 |             reconstructed_messages.append(AIMessage(**d))
341 |         elif d["type"] == "human":
342 |             reconstructed_messages.append(HumanMessage(**d))
343 |         elif d["type"] == "tool":
344 |             reconstructed_messages.append(ToolMessage(**d))
345 |     return reconstructed_messages
346 | 
347 | # Organize Sidebar with Tabs and improved styling
348 | st.sidebar.title("⚙️ Settings")
349 | tab1, tab2, tab3 = st.sidebar.tabs(["💬 Conversation", "🎤 Voice", "🖼️ Image"])
350 | 
351 | # Initialize session state variables
352 | if "last_summary_point" not in st.session_state:
353 |     st.session_state["last_summary_point"] = 0
354 | if "last_summary_title" not in st.session_state:
355 |     st.session_state["last_summary_title"] = "Default Title"
356 | if "last_summary_summary" not in st.session_state:
357 |     st.session_state["last_summary_summary"] = "This is the default summary for short conversations."
358 | 
359 | # Tab 1: Conversation Management
360 | with tab1:
361 |     st.subheader("History")
362 |     histories = load_all_histories()
363 |     if histories:
364 |         st.markdown("### Saved Histories")
365 |         for history in histories:
366 |             with st.expander(f"{history['title']} ({history['timestamp'][:10]})"):
367 |                 st.write(history["summary"])
368 |                 if st.button("Load", key=f"load_{history['filename']}"):
369 |                     load_history(history["filename"])
370 |                 if st.button("Delete", key=f"delete_{history['filename']}"):
371 |                     delete_history(history["filename"])
372 | 
373 |     # Determine title and summary based on message count and last summary point
374 |     message_count = len(st.session_state["final_state"]["messages"])
375 |     if message_count > 5 and (message_count - 5) % 10 == 0 and message_count != st.session_state["last_summary_point"]:
376 |         #generated_title, generated_summary = get_conversation_summary(st.session_state["final_state"]["messages"])
377 |         #st.session_state["last_summary_title"] = generated_title
378 |         st.session_state["last_summary_title"] = "Default Title"
379 |         #st.session_state["last_summary_summary"] = generated_summary
380 |         st.session_state["last_summary_summary"] = "This is the default summary for short conversations."
381 |         st.session_state["last_summary_point"] = message_count
382 |     elif message_count <= 5:
383 |         st.session_state["last_summary_title"] = "Default Title"
384 |         st.session_state["last_summary_summary"] = "This is the default summary for short conversations."
385 | 
386 |     title = st.text_input("Conversation Title", value=st.session_state["last_summary_title"])
387 |     summary = st.text_area("Conversation Summary", value=st.session_state["last_summary_summary"])
388 | 
389 |     if st.button("Save Conversation"):
390 |         save_history(title, summary)
391 |         st.sidebar.success(f"Conversation saved as '{title}'")
392 | 
393 | # Tab 2: Voice Options
394 | with tab2:
395 |     st.subheader("Audio Options")
396 |     use_audio_input = st.checkbox("Enable Voice Input", value=False)
397 |     if use_audio_input:
398 |         with st.form("audio_input_form", clear_on_submit=True):
399 |             st.markdown("""
400 |                 <div class="audio-instructions">
401 |                     <strong>Instructions for Recording Audio:</strong>
402 |                     <ol style="padding-left: 20px; line-height: 1.5;">
403 |                         <li>Click <strong>Submit Audio</strong> below to activate the audio recorder.</li>
404 |                         <li>Once activated, click <strong>Start Recording</strong> to begin capturing audio.</li>
405 |                         <li>When finished, click <strong>Stop</strong> to end the recording.</li>
406 |                         <li>Finally, click <strong>Submit Audio</strong> again to use the recorded audio.</li>
407 |                     </ol>
408 |                 </div>
409 |             """, unsafe_allow_html=True)
410 |             submitted_audio = st.form_submit_button("Submit Audio")
411 |             if submitted_audio:
412 |                 audio_transcript = input_from_mic()
413 |                 if audio_transcript:
414 |                     st.session_state["audio_transcription"] = audio_transcript
415 |                     prompt = st.session_state["audio_transcription"]
416 |                 else:
417 |                     st.session_state["audio_transcription"] = None
418 | 
419 |     use_voice_response = st.checkbox("Enable Voice Response", value=False)
420 |     if use_voice_response:
421 |         st.write("If the voice response is too long, a summarized version will generate.")
422 | 
423 | # Tab 3: Image Upload
424 | with tab3:
425 |     st.subheader("Image")
426 |     with st.form("image_upload_form", clear_on_submit=True):
427 |         uploaded_images = st.file_uploader("Upload one or more images (optional)", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
428 |         submitted = st.form_submit_button("Submit Images")
429 |         if submitted:
430 |             if uploaded_images:
431 |                 st.session_state["uploaded_images_data"] = [
432 |                     base64.b64encode(image.read()).decode("utf-8") for image in uploaded_images
433 |                 ]
434 |             else:
435 |                 st.session_state["uploaded_images_data"] = []
436 | 
437 | # Initialize prompt variable
438 | prompt = st.session_state.get("audio_transcription")
439 | 
440 | # Main chat interface
441 | st.markdown(f"""
442 |     <div class="chat-title">
443 |         <span class="robot-icon">🤖</span>
444 |         <span>Chat with Spatial Transcriptomics Agent</span>
445 |     </div>
446 | """, unsafe_allow_html=True)
447 | 
448 | render_conversation_history(st.session_state["final_state"]["messages"][0:])
449 | 
450 | # Capture text input if no audio input
451 | if prompt is None:
452 |     prompt = st.chat_input()
453 | 
454 | # Process new user input if available
455 | if prompt:
456 |     content_list = [{"type": "text", "text": prompt}]
457 |     if "uploaded_images_data" in st.session_state and st.session_state["uploaded_images_data"]:
458 |         content_list.extend([
459 |             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_data}"}}
460 |             for img_data in st.session_state["uploaded_images_data"]
461 |         ])
462 |         st.session_state["uploaded_images_data"] = []
463 |     
464 |     user_message = HumanMessage(content=content_list)
465 |     st.session_state["final_state"]["messages"].append(user_message)
466 |     render_conversation_history([user_message])
467 | 
468 |     with st.spinner(f"Agent is thinking..."):
469 |         previous_message_count = len(st.session_state["final_state"]["messages"])
470 |         updated_state = invoke_graph(st.session_state["final_state"]["messages"], selected_model)
471 |     
472 |     st.session_state["final_state"] = updated_state
473 |     new_messages = st.session_state["final_state"]["messages"][previous_message_count:]
474 |     
475 |     if st.session_state.get("render_last_message", True):
476 |         render_conversation_history([st.session_state["final_state"]["messages"][-1]])
477 |     
478 |     if use_voice_response:
479 |         audio_file = convert_text_to_speech(new_messages[-1].content)
480 |         if audio_file:
481 |             st.audio(audio_file)
482 |     
483 |     st.session_state["audio_transcription"] = None 
484 | 
485 | 
486 | 
487 | 


--------------------------------------------------------------------------------
/src/util.py:
--------------------------------------------------------------------------------
  1 | # util.py
  2 | 
  3 | import os
  4 | import json
  5 | import time
  6 | import streamlit as st
  7 | from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, BaseMessage
  8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
  9 | from typing import List, Tuple
 10 | from pydantic import BaseModel, Field
 11 | from langchain_openai import ChatOpenAI  # Import ChatOpenAI where it's defined
 12 | # Directory for temporary plot images
 13 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots")
 14 | os.makedirs(plot_dir, exist_ok=True)
 15 | 
 16 | def display_message(content, sender="assistant"):
 17 |     """
 18 |     Displays a message from the user or assistant with different styling.
 19 |     Supports displaying both text and image URLs for the user.
 20 |     """
 21 |     if sender == "user":
 22 |         if isinstance(content, str):
 23 |             # Display plain text message from user
 24 |             st.markdown(
 25 |                 f"""
 26 |                 <div style="text-align: right;">
 27 |                     <div style="display: inline-block; background-color: #DCF8C6; color: #000; padding: 10px; border-radius: 15px; margin: 5px; max-width: 60%; text-align: left;">
 28 |                         <p style="margin: 0;">{content}</p>
 29 |                     </div>
 30 |                 </div>
 31 |                 """,
 32 |                 unsafe_allow_html=True
 33 |             )
 34 |         elif isinstance(content, dict):
 35 |             # Check if the content has both text and image URL
 36 |             if "text" in content:
 37 |                 st.markdown(
 38 |                     f"""
 39 |                     <div style="text-align: right;">
 40 |                         <div style="display: inline-block; background-color: #DCF8C6; color: #000; padding: 10px; border-radius: 15px; margin: 5px; max-width: 60%; text-align: left;">
 41 |                             <p style="margin: 0;">{content["text"]}</p>
 42 |                         </div>
 43 |                     </div>
 44 |                     """,
 45 |                     unsafe_allow_html=True
 46 |                 )
 47 |             if "url" in content:
 48 |                 st.image(content["url"], caption="User Image", use_container_width=True)
 49 |     else:
 50 |         # Display assistant's message, converting LaTeX-style content
 51 |         modified_content = content.replace("\\(", "$").replace("\\)", "$")
 52 |         modified_content = modified_content.replace("\\[", "$$").replace("\\]", "$$")
 53 |         st.markdown(modified_content)
 54 | 
 55 | def render_conversation_history(messages):
 56 |     """
 57 |     Renders conversation history from a list of messages, handling multiple tool calls.
 58 |     """
 59 |     tool_input_map = {}  # Map to track tool_call_id to tool_input
 60 |     
 61 |     for entry in messages:
 62 |         # Skip if the message has name "image_assistant"
 63 |         if hasattr(entry, "name") and entry.name == "image_assistant":
 64 |             continue
 65 |             
 66 |         if isinstance(entry, HumanMessage):
 67 |             # Check if entry.content is list or string and handle appropriately
 68 |             if isinstance(entry.content, list):
 69 |                 for item in entry.content:
 70 |                     if isinstance(item, dict):
 71 |                         # Display text or image URL in dictionary format
 72 |                         if item["type"] == "text":
 73 |                             display_message(item["text"], sender="user")
 74 |                         elif item["type"] == "image_url":
 75 |                             display_message({"url": item["image_url"]["url"]}, sender="user")
 76 |                     elif isinstance(item, str):
 77 |                         # Display plain text if it's a string
 78 |                         display_message(item, sender="user")
 79 |             elif isinstance(entry.content, str):
 80 |                 # Display single string content
 81 |                 display_message(entry.content, sender="user")
 82 | 
 83 |         elif isinstance(entry, AIMessage):
 84 |             display_message(entry.content, sender="assistant")
 85 |             
 86 |             # Handle tool calls in AIMessage
 87 |             if entry.tool_calls:
 88 |                 tool_calls = entry.tool_calls
 89 |                 for tool_call in tool_calls:
 90 |                     try:
 91 |                         arguments_json = tool_call.get('args', '{}')
 92 |                         tool_input = arguments_json 
 93 |                         tool_call_id = tool_call.get("id")
 94 |                         if tool_call_id:
 95 |                             tool_input_map[tool_call_id] = tool_input
 96 |                     except json.JSONDecodeError:
 97 |                         tool_input_map[tool_call.get("id", "unknown")] = "Error decoding tool input."
 98 | 
 99 |         elif isinstance(entry, ToolMessage):
100 |             display_tool_message(entry, tool_input_map)
101 | 
102 | 
103 | def display_tool_message(entry, tool_input_map):
104 |     """Display a tool message with the corresponding tool input."""
105 |     tool_output = entry.content
106 |     tool_call_id = getattr(entry, "tool_call_id", None)
107 |     tool_input = tool_input_map.get(tool_call_id, "No matching tool input found")
108 | 
109 |     with st.expander(f"Tool Call: {entry.name}", expanded=False):
110 |         if isinstance(tool_input, dict) and 'query' in tool_input:
111 |             st.code(tool_input['query'], language="python")
112 |         else:
113 |             st.code(tool_input or "No tool input available", language="python")
114 |         st.write("**Tool Output:**")
115 |         st.code(tool_output)
116 |         
117 |         # Handle artifacts if they exist
118 |         artifacts = getattr(entry, "artifact", [])
119 |         if artifacts:
120 |             st.write("**Generated Artifacts (e.g., Plots):**")
121 |             for rel_path in artifacts:
122 |                 if rel_path.endswith(".png"):
123 |                     # Convert relative path to absolute
124 |                     abs_path = os.path.join(os.path.dirname(__file__), rel_path)
125 |                     if os.path.exists(abs_path):
126 |                         st.image(abs_path, caption="Generated Plot")
127 |                     else:
128 |                         st.write(f"Error: Plot file not found at {rel_path}")
129 | 
130 | 
131 | # Pydantic model for structured output
132 | class ConversationSummary(BaseModel):
133 |     """Structure for conversation title and summary."""
134 |     title: str = Field(description="The title of the conversation")
135 |     summary: str = Field(description="A concise summary of the conversation's main points")
136 | 
137 | # Function to get conversation title and summary
138 | def get_conversation_summary(messages: List[BaseMessage]) -> Tuple[str, str]:
139 |     # Initialize the LLM model within the function
140 |     llm = ChatOpenAI(model_name="gpt-4o",temperature=0)
141 |     prompt_template = ChatPromptTemplate.from_messages([
142 |         MessagesPlaceholder("msgs"),
143 |         ("human", "Given the above messages between user and AI agent, return a title and concise summary of the conversation"),
144 |     ])
145 |     structured_llm = llm.with_structured_output(ConversationSummary)
146 |     summarized_chain = prompt_template | structured_llm
147 |     response = summarized_chain.invoke(messages)
148 |     return response.title, response.summary
149 | 


--------------------------------------------------------------------------------
/src/util_anthropic.py:
--------------------------------------------------------------------------------
  1 | # util_anthropic.py
  2 | 
  3 | import os
  4 | import json
  5 | import time
  6 | import streamlit as st
  7 | from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, BaseMessage
  8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
  9 | from langchain_anthropic import ChatAnthropic
 10 | from typing import List, Tuple
 11 | from pydantic import BaseModel, Field
 12 | # Directory for temporary plot images
 13 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots")
 14 | os.makedirs(plot_dir, exist_ok=True)
 15 | 
 16 | def display_message(content, sender="assistant"):
 17 |     """
 18 |     Displays a message from the user or assistant with different styling.
 19 |     Supports displaying both text and image URLs for the user.
 20 |     """
 21 |     if sender == "user":
 22 |         if isinstance(content, str):
 23 |             # Display plain text message from user
 24 |             st.markdown(
 25 |                 f"""
 26 |                 <div style="text-align: right;">
 27 |                     <div style="display: inline-block; background-color: #DCF8C6; color: #000; padding: 10px; border-radius: 15px; margin: 5px; max-width: 60%; text-align: left;">
 28 |                         <p style="margin: 0;">{content}</p>
 29 |                     </div>
 30 |                 </div>
 31 |                 """,
 32 |                 unsafe_allow_html=True
 33 |             )
 34 |         elif isinstance(content, dict):
 35 |             # Check if the content has both text and image URL
 36 |             if "text" in content:
 37 |                 st.markdown(
 38 |                     f"""
 39 |                     <div style="text-align: right;">
 40 |                         <div style="display: inline-block; background-color: #DCF8C6; color: #000; padding: 10px; border-radius: 15px; margin: 5px; max-width: 60%; text-align: left;">
 41 |                             <p style="margin: 0;">{content["text"]}</p>
 42 |                         </div>
 43 |                     </div>
 44 |                     """,
 45 |                     unsafe_allow_html=True
 46 |                 )
 47 |             if "url" in content:
 48 |                 st.image(content["url"], caption="User Image", use_container_width=True)
 49 |     else:
 50 |         # Display assistant's message, handling various content formats
 51 |         if isinstance(content, str):
 52 |             # Process and display plain text with LaTeX-style content
 53 |             modified_content = content.replace("\\(", "$").replace("\\)", "$")
 54 |             modified_content = modified_content.replace("\\[", "$$").replace("\\]", "$$")
 55 |             st.markdown(modified_content)
 56 |         elif isinstance(content, list):
 57 |             # Iterate through list items if content is a list
 58 |             for item in content:
 59 |                 if isinstance(item, dict):
 60 |                     if item.get("type") == "text" and "text" in item:
 61 |                         # Process LaTeX-style text
 62 |                         modified_text = item["text"].replace("\\(", "$").replace("\\)", "$")
 63 |                         modified_text = modified_text.replace("\\[", "$$").replace("\\]", "$$")
 64 |                         st.markdown(modified_text)
 65 |                     elif "url" in item:
 66 |                         st.image(item["url"], caption="Assistant Image", use_container_width=True)
 67 |                 elif isinstance(item, str):
 68 |                     # Handle plain text items in the list
 69 |                     modified_content = item.replace("\\(", "$").replace("\\)", "$")
 70 |                     modified_content = modified_content.replace("\\[", "$$").replace("\\]", "$$")
 71 |                     st.markdown(modified_content)
 72 |         elif isinstance(content, dict):
 73 |             # Display text if present in a single dictionary
 74 |             if "text" in content:
 75 |                 modified_text = content["text"].replace("\\(", "$").replace("\\)", "$")
 76 |                 modified_text = modified_text.replace("\\[", "$$").replace("\\]", "$$")
 77 |                 st.markdown(modified_text)
 78 |             if "url" in content:
 79 |                 st.image(content["url"], caption="Assistant Image", use_container_width=True)
 80 |         else:
 81 |             # Handle unexpected content type
 82 |             st.error("Unsupported content format from the assistant.")
 83 | 
 84 | 
 85 | def render_conversation_history(messages):
 86 |     """
 87 |     Renders conversation history from a list of messages, handling multiple tool calls.
 88 |     """
 89 |     tool_input_map = {}  # Map to track tool_call_id to tool_input
 90 |     
 91 |     for entry in messages:
 92 |         # Skip if the message has name "image_assistant"
 93 |         if hasattr(entry, "name") and entry.name == "image_assistant":
 94 |             continue
 95 | 
 96 |         if isinstance(entry, HumanMessage):
 97 |             # Check if entry.content is list or string and handle appropriately
 98 |             if isinstance(entry.content, list):
 99 |                 for item in entry.content:
100 |                     if isinstance(item, dict):
101 |                         # Display text or image URL in dictionary format
102 |                         if item["type"] == "text":
103 |                             display_message(item["text"], sender="user")
104 |                         elif item["type"] == "image_url":
105 |                             display_message({"url": item["image_url"]["url"]}, sender="user")
106 |                     elif isinstance(item, str):
107 |                         # Display plain text if it's a string
108 |                         display_message(item, sender="user")
109 |             elif isinstance(entry.content, str):
110 |                 # Display single string content
111 |                 display_message(entry.content, sender="user")
112 | 
113 |         elif isinstance(entry, AIMessage):
114 |             display_message(entry.content, sender="assistant")
115 |             
116 |             # Handle tool calls in AIMessage
117 |             if entry.tool_calls:
118 |                 tool_calls = entry.tool_calls
119 |                 for tool_call in tool_calls:
120 |                     try:
121 |                         arguments_json = tool_call.get('args', '{}')
122 |                         #tool_input = arguments_json.get("query", "No tool input available")
123 |                         tool_input = arguments_json 
124 |                         tool_call_id = tool_call.get("id")
125 |                         if tool_call_id:
126 |                             tool_input_map[tool_call_id] = tool_input
127 |                     except json.JSONDecodeError:
128 |                         tool_input_map[tool_call.get("id", "unknown")] = "Error decoding tool input."
129 | 
130 |         elif isinstance(entry, ToolMessage):
131 |             display_tool_message(entry, tool_input_map)
132 | 
133 | 
134 | def display_tool_message(entry, tool_input_map):
135 |     """
136 |     Display a tool message with the corresponding tool input based on the tool_call_id.
137 |     """
138 |     tool_output = entry.content
139 |     tool_call_id = getattr(entry, "tool_call_id", None)  # Get the tool_call_id from the ToolMessage
140 |     tool_input = tool_input_map.get(tool_call_id, "No matching tool input found")  # Match with tool_input_map
141 | 
142 |     with st.expander(f"Tool Call: {entry.name}", expanded=False):
143 |         # Check if 'query' exists in tool_input and display accordingly
144 |         if isinstance(tool_input, dict) and 'query' in tool_input:
145 |             st.code(tool_input['query'], language="python")
146 |         else:
147 |             st.code(tool_input or "No tool input available", language="python")
148 |         st.write("**Tool Output:**")
149 |         st.code(tool_output)
150 |         
151 |         # Handle artifacts if they exist
152 |         artifacts = getattr(entry, "artifact", [])
153 |         if artifacts:
154 |             st.write("**Generated Artifacts (e.g., Plots):**")
155 |             for rel_path in artifacts:
156 |                 if rel_path.endswith(".png"):
157 |                     # Convert relative path to absolute
158 |                     abs_path = os.path.join(os.path.dirname(__file__), rel_path)
159 |                     if os.path.exists(abs_path):
160 |                         st.image(abs_path, caption="Generated Plot")
161 |                     else:
162 |                         st.write(f"Error: Plot file not found at {rel_path}")
163 | 
164 | 
165 | # Pydantic model for structured output
166 | class ConversationSummary(BaseModel):
167 |     """Structure for conversation title and summary."""
168 |     title: str = Field(description="The title of the conversation")
169 |     summary: str = Field(description="A concise summary of the conversation's main points")
170 | 
171 | # Function to get conversation title and summary
172 | def get_conversation_summary(messages: List[BaseMessage]) -> Tuple[str, str]:
173 |     # Initialize the LLM model within the function
174 |     llm = ChatAnthropic(model_name="claude-3-5-sonnet-20240620",temperature=0)
175 | 
176 |     # Define the prompt template
177 |     prompt_template = ChatPromptTemplate.from_messages([
178 |         MessagesPlaceholder("msgs"),
179 |         ("human", "Given the above messages between user and AI agent, return a title and concise summary of the conversation"),
180 |     ])
181 | 
182 |     # Configure the structured output model
183 |     structured_llm = llm.with_structured_output(ConversationSummary)
184 |     summarized_chain = prompt_template | structured_llm
185 | 
186 |     # Invoke the chain with the messages and retrieve the response
187 |     response = summarized_chain.invoke(messages)
188 |     
189 |     # Return the title and summary
190 |     return response.title, response.summary
191 | 


--------------------------------------------------------------------------------