├── LICENSE ├── README.md ├── STAgent_generated_report.md ├── assets └── images │ └── stagent_architecture.png ├── db └── chroma_squidpy_db │ └── chroma.sqlite3 ├── environment.yml └── src ├── .env ├── .streamlit └── config.toml ├── __pycache__ ├── custom_class.cpython-311.pyc ├── graph.cpython-311.pyc ├── graph_anthropic.cpython-311.pyc ├── graph_gemini.cpython-311.pyc ├── prompt.cpython-311.pyc ├── speech_to_text.cpython-311.pyc ├── squidpy_rag.cpython-311.pyc ├── tools.cpython-311.pyc ├── util.cpython-311.pyc ├── util_anthropic.cpython-311.pyc └── util_gemini.cpython-311.pyc ├── db └── chroma_squidpy_db │ ├── chroma.sqlite3 │ └── f219160f-16f0-4337-8401-5cefc3a7ae39 │ ├── data_level0.bin │ ├── header.bin │ ├── length.bin │ └── link_lists.bin ├── graph.py ├── graph_anthropic.py ├── prompt.py ├── speech_to_text.py ├── squidpy_rag.py ├── tmp └── plots │ ├── plot_20250524_230938_643335.png │ ├── plot_20250524_230939_230970.png │ ├── plot_20250524_230939_315409.png │ ├── plot_20250524_230939_437124.png │ ├── plot_20250524_231021_978237.png │ ├── plot_20250524_231021_997915.png │ ├── plot_20250524_231022_080390.png │ ├── plot_20250524_231106_072022.png │ ├── plot_20250524_231106_527042.png │ ├── plot_20250524_231106_719080.png │ ├── plot_20250524_231106_894313.png │ ├── plot_20250524_231107_098543.png │ ├── plot_20250524_231107_271860.png │ ├── plot_20250524_231107_507332.png │ ├── plot_20250524_231107_683073.png │ ├── plot_20250524_231107_859878.png │ ├── plot_20250524_231240_336695.png │ ├── plot_20250524_232828_376750.png │ ├── plot_20250524_232828_523789.png │ ├── plot_20250524_232828_774850.png │ ├── plot_20250524_233051_701835.png │ ├── plot_20250524_233051_796053.png │ ├── plot_20250524_233653_620074.png │ ├── plot_20250524_233653_789226.png │ ├── plot_20250524_233653_961432.png │ ├── plot_20250524_233654_148844.png │ ├── plot_20250524_233654_322050.png │ ├── plot_20250524_233654_530819.png │ ├── plot_20250524_233654_693983.png │ ├── plot_20250524_233654_875336.png │ ├── plot_20250524_233655_053762.png │ ├── plot_20250524_234033_996956.png │ ├── plot_20250524_234034_131464.png │ └── plot_20250524_234034_262805.png ├── tools.py ├── unified_app.py ├── util.py └── util_anthropic.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Zuwan Lin, Wenbo Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # STAgent 2 | 3 | [![GitHub stars](https://img.shields.io/github/stars/LiuLab-Bioelectronics-Harvard/STAgent)](https://github.com/LiuLab-Bioelectronics-Harvard/STAgent/stargazers) 4 | [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) 5 | 6 | https://doi.org/10.1101/2025.04.01.646731 7 | 8 | ## Overview 9 | **STAgent** is a multimodal large language model (LLM)-based AI agent that automates spatial transcriptomics analysis from data to deep scientific insights. Built for end-to-end research autonomy, STAgent integrates: 10 | 11 | - Advanced vision-language models 12 | - Dynamic code generation 13 | - Contextualized literature integration 14 | - Structured report synthesis 15 | 16 | STAgent streamlines complex spatial biology workflows, removing the need for manual programming or domain-specific expertise. This enables rapid, reproducible, and interpretable discoveries in tissue biology. 17 | 18 | STAgent Architecture 19 | 20 | ## Related Work 21 | 22 | STAgent is part of a broader AI agent ecosystem we developed for specialized biological data analysis. A complementary tool in this ecosystem is [SpikeAgent](https://github.com/LiuLab-Bioelectronics-Harvard/SpikeAgent.git), which focuses on neuronal electrophysiology data. While STAgent specializes in spatial transcriptomics to reveal tissue architecture and gene expression patterns, SpikeAgent automates the analysis of neuronal firing patterns and network dynamics from electrophysiological recordings. 23 | 24 | SpikeAgent is described in our recent preprint: [https://www.biorxiv.org/content/10.1101/2025.02.11.637754v1](https://www.biorxiv.org/content/10.1101/2025.02.11.637754v1) 25 | 26 | ## Demo 27 | 28 | Check out our [demo video](https://www.youtube.com/watch?v=aEUop05RINY&t=2s) to see STAgent in action. 29 | 30 | ## Features 31 | 32 | ### End-to-End Automation 33 | Transforms spatial transcriptomics data into comprehensive, publication-style research reports without human intervention. STAgent autonomously executes the full analytical pipeline from image preprocessing to biological interpretation. 34 | 35 | ### Multimodal Interaction 36 | Supports text, voice, and image-based inputs, enabling intuitive natural language interfaces for researchers with no computational background. 37 | 38 | ### Autonomous Reasoning 39 | Leverages multimodal LLMs to perform visual reasoning on tissue images, generate and execute Python analysis code, interpret spatial maps, and integrate literature insights. 40 | 41 | ### Interpretable Results 42 | Produces structured scientific reports with methods, key findings, biological implications, and citation-supported context, resembling peer-reviewed publications. 43 | 44 | ### Context-Aware Gene Analysis 45 | Performs multimodal enrichment analyses that go beyond statistical significance, focusing on biologically relevant pathways tailored to the tissue context. 46 | 47 | ### Visual Reasoning Engine 48 | Analyzes spatial maps and cell architectures directly, detecting subtle morphogenetic patterns and tissue-level changes across timepoints or conditions. 49 | 50 | ### Scalable Knowledge Synthesis 51 | Converts spatially resolved gene expression data into coherent scientific narratives, uncovering developmental programs, cellular interactions, and signaling networks. 52 | 53 | ## Installation 54 | 55 | ### Prerequisites 56 | - Python 3.11 57 | - Conda package manager 58 | 59 | ### Setup Instructions 60 | 61 | 1. **Clone the repository** 62 | ```bash 63 | git clone https://github.com/LiuLab-Bioelectronics-Harvard/STAgent.git 64 | cd STAgent 65 | ``` 66 | 67 | 2. **Install dependencies** 68 | 69 | We use conda to manage dependencies and have tested on Mac systems with Apple M2 chips. 70 | 71 | ```bash 72 | # Create the environment from the file 73 | conda env create -f environment.yml 74 | 75 | # Activate the environment 76 | conda activate STAgent 77 | ``` 78 | 79 | 3. **Configure environment variables** 80 | - modify the `.env` file (src/.env) with your own API keys: 81 | 82 | ``` 83 | # OpenAI models (https://platform.openai.com/api-keys) 84 | OPENAI_API_KEY= 85 | WHISPER_API_KEY= 86 | (Note: OPENAI_API_KEY is the same as WHISPER_API_KEY) 87 | 88 | # Claude models (https://www.anthropic.com/api) 89 | ANTHROPIC_API_KEY= 90 | 91 | # Google Scholar search via SerpAPI (https://serpapi.com/) 92 | SERP_API_KEY= 93 | ``` 94 | 💡 Important: Make sure your API accounts have sufficient balance or credits available, otherwise the agent may not function properly. 95 | 96 | 4. **Set up the data folder** 97 | 98 | ```bash 99 | mkdir -p data 100 | ``` 101 | 102 | Download the .h5ad data files from [Google Drive](https://drive.google.com/drive/folders/1RqWGBhCia06-vQnqHUnid63MybQIKwFJ) and place them in the `./data` directory. 103 | 104 | ## Usage 105 | 106 | 1. **Launch the application** 107 | ```bash 108 | streamlit run src/unified_app.py 109 | ``` 110 | The app will open in your default web browser at the local host. 111 | 112 | 2. **Interact with the agent** 113 | - Select a model (claude-3.7-sonnet recommended) 114 | - You can start interacting with the agent by typing messages in the chat interface 115 | - Example prompts you can try: 116 | ``` 117 | "Can you help me perform an end-to-end analysis on my spatial transcriptomic datasets. Please also generate a report." 118 | ``` 119 | - The agent will respond to your queries and can perform complex analyses based on natural language instructions 120 | 121 | ## Project Structure 122 | 123 | - `src/`: Contains the source code for STAgent 124 | - `data/`: Directory for storing spatial transcriptomics datasets 125 | - `src/tmp/plots/`: Contains plots generated by the agent 126 | - `conversation_histories_{model}/`: Stores conversation history classified by model 127 | 128 | ## Example Output 129 | 130 | When you prompt the agent to perform an end-to-end analysis, it generates a comprehensive markdown report with peer-reviewed literatures as references (one example output is "STAgent_generated_report.md"). 131 | 132 | 133 | 134 | ## Citation 135 | If you use STAgent in your research, please cite: 136 | > *Lin, Z., *Wang, W., et al. Spatial transcriptomics AI agent charts hPSC-pancreas maturation in vivo. (2025). _bioRxiv_. 137 | > https://doi.org/10.1101/2025.04.01.646731 138 | 139 | ## License 140 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 141 | -------------------------------------------------------------------------------- /STAgent_generated_report.md: -------------------------------------------------------------------------------- 1 | # Scientific Analysis Report: Temporal Evolution of Human Pancreatic Islet Xenotransplantation 2 | 3 | ## 1. Objective 4 | 5 | This report aims to provide a comprehensive analysis of the temporal changes observed in human pancreatic islets transplanted into mouse kidney across three time points (Week 4, Week 16, and Week 20). The analysis focuses on characterizing cell type composition, spatial organization, and intercellular interaction patterns to understand the biological processes underlying xenograft adaptation and survival. The primary goal is to delineate the dynamic remodeling of cellular architecture that occurs during the post-transplantation period and identify key factors that may contribute to successful engraftment and function of the transplanted islets. 6 | 7 | ## 2. Study Overview 8 | 9 | ### Background 10 | 11 | Pancreatic islet transplantation represents a promising therapeutic approach for type 1 diabetes, offering the potential for improved glycemic control and reduced dependence on exogenous insulin. However, challenges including limited islet survival post-transplantation and immune rejection have restricted its widespread clinical application. Xenotransplantation, using non-human donor islets, presents an alternative strategy to address organ shortage but faces additional immunological barriers. 12 | 13 | ### Purpose 14 | 15 | This study investigated the temporal evolution of human pancreatic islet xenografts in mouse kidney using spatial transcriptomics. By analyzing cellular composition, spatial organization, and cell-cell interactions at three time points (Week 4, Week 16, and Week 20), the research aimed to characterize the adaptation processes that occur during xenograft integration with host tissue. 16 | 17 | ### Research Questions 18 | 19 | 1. How does cellular composition of transplanted islets change over time? 20 | 2. What spatial reorganization patterns emerge during graft adaptation? 21 | 3. How do interactions between different cell types evolve post-transplantation? 22 | 4. What mechanisms may contribute to graft survival and functional integration? 23 | 24 | ## 3. Methods Summary 25 | 26 | The analysis employed a systematic approach to characterize the xenotransplanted human pancreatic islets: 27 | 28 | 1. **Dimensionality Reduction Analysis**: UMAP visualization was used to examine cell type clustering patterns across time points, revealing population-level relationships between human donor and mouse host cells. 29 | 30 | 2. **Cell Type Composition Analysis**: Quantitative assessment of cell type proportions at each time point (Week 4, Week 16, Week 20) using normalized percentages, visualized through stacked bar plots and heatmaps. 31 | 32 | 3. **Spatial Distribution Mapping**: Scatter plots of cell coordinates colored by cell type were generated for each tissue slice, enabling visualization of the spatial organization of different cell populations. 33 | 34 | 4. **Cell-Cell Interaction Analysis**: Neighborhood enrichment analysis using spatial statistics to quantify preferential associations or avoidances between cell types, presented as heatmaps with z-score values. 35 | 36 | The dataset consisted of STARmap spatial transcriptomic data from human pancreatic islets grafted on mouse kidney, with multiple slices per time point. The analysis pipeline integrated cell type identification, compositional analysis, spatial mapping, and interaction quantification to provide a comprehensive characterization of xenograft evolution. 37 | 38 | ## 4. Key Findings 39 | 40 | ### 4.1 Cell Type Population Dynamics 41 | 42 | The UMAP visualization revealed distinct clustering of different cell populations with clear separation between human donor and mouse host cells: 43 | 44 | - Human endocrine cells (alpha, beta, delta) clustered together but maintained separate identities 45 | - Human mesenchymal and exocrine cells formed distinct clusters 46 | - Mouse kidney cells (nephron, vascular, ureteric epithelium) clustered separately from human cells 47 | 48 | Quantitative cell type composition analysis revealed significant temporal changes: 49 | 50 | - **Alpha Cells**: Dramatic fluctuation in proportion - 10.7% (Week 4) → 25.6% (Week 16) → 11.9% (Week 20) 51 | - **Beta Cells**: Progressive decline - 19.6% (Week 4) → 13.6% (Week 16) → 11.5% (Week 20) 52 | - **Delta Cells**: Steady increase - 0.9% (Week 4) → 1.8% (Week 16) → 3.0% (Week 20) 53 | - **Mesenchymal Cells**: Dramatic expansion - 0.1% (Week 4) → 4.4% (Week 16) → 19.1% (Week 20) 54 | - **Enterochromaffin Cells**: Substantial decline - 12.5% (Week 4) → 1.4% (Week 16) → 1.0% (Week 20) 55 | - **Exocrine Cells**: Fluctuation with overall increase - 3.6% (Week 4) → 2.2% (Week 16) → 8.7% (Week 20) 56 | 57 | The alpha-to-beta cell ratio shifted significantly: 0.55 (Week 4) → 1.88 (Week 16) → 1.04 (Week 20), indicating dynamic remodeling of the endocrine compartment. 58 | 59 | ### 4.2 Spatial Organization Patterns 60 | 61 | Spatial distribution maps revealed distinct organizational patterns: 62 | 63 | - **Islet-Like Structure Formation**: Human endocrine cells consistently clustered in islet-like structures across all time points 64 | - **Cell Type Zonation**: Alpha cells frequently positioned at the periphery of islet structures, with beta cells forming the core, reflecting native islet architecture 65 | - **Mesenchymal Expansion Pattern**: Progressive formation of a mesenchymal network surrounding islet structures, particularly evident by Week 20 66 | - **Host-Graft Interface**: Clear boundary between human islet structures and mouse kidney tissue, with selective vascular integration 67 | - **Exocrine Segregation**: Exocrine cells formed distinct clusters separate from islet structures, particularly in Week 20 samples 68 | 69 | Notably, delta cells increasingly positioned at the interface between alpha and beta cells over time, suggesting maturation of paracrine signaling networks. 70 | 71 | ### 4.3 Cell-Cell Interaction Dynamics 72 | 73 | Neighborhood enrichment analysis revealed evolving interaction patterns: 74 | 75 | - **Endocrine Cell Homotypic Interactions**: Strengthening over time 76 | - Alpha-alpha: 21.4 → 59.0 → 66.2 77 | - Beta-beta: 49.0 → 62.2 → 83.6 78 | - Delta-delta: 9.1 → 25.3 → 48.1 79 | 80 | - **Mesenchymal Cell Behavior**: 81 | - Mesenchymal-mesenchymal: -0.1 → 48.7 → 97.0 82 | - Initially neutral with endocrine cells, becoming increasingly negative by Week 20 83 | - Strong negative association with mouse nephron cells: 0.8 → -8.7 → -38.0 84 | 85 | - **Host-Graft Boundary**: 86 | - Increasing negative enrichment between human endocrine and mouse nephron cells 87 | - Alpha cells and mouse nephron: -20.2 → -56.3 → -41.9 88 | - Beta cells and mouse nephron: -39.8 → -47.0 → -44.3 89 | 90 | - **Exocrine Cell Isolation**: 91 | - Exocrine-exocrine: 71.6 → 90.3 → 117.8 92 | - Increasingly negative associations with all other cell types 93 | 94 | - **Delta Cell Integration**: 95 | - Increasing association with alpha cells: 5.3 → 8.9 → 15.3 96 | - Minimal association with beta cells across all time points 97 | 98 | - **Enterochromaffin Cell Behavior**: 99 | - Decreasing self-association: 29.2 → 9.0 → 2.7 100 | - Early association with alpha cells (14.6) diminishing over time (0.6 by Week 20) 101 | 102 | These interaction patterns reveal progressive compartmentalization and specialization of cellular neighborhoods within the xenograft. 103 | 104 | ## 5. Biological Implications 105 | 106 | ### 5.1 Endocrine Cell Remodeling 107 | 108 | The dynamic changes in alpha-to-beta cell ratio observed in this study reflect a significant remodeling of the endocrine compartment post-transplantation. The initial increase in alpha-to-beta ratio at Week 16, followed by normalization by Week 20, suggests a biphasic response to transplantation stress. This aligns with findings that alpha cells may be more resistant to stress during transplantation than beta cells, as documented in studies of islet transplantation outcomes ("Beta-cell function following human islet transplantation for type 1 diabetes"). 109 | 110 | The observed alpha-beta cell spatial organization, with alpha cells positioned peripherally and beta cells forming the core of islet structures, recapitulates aspects of native islet architecture. This arrangement facilitates paracrine signaling, which is critical for coordinated hormone secretion. As noted in research on islet architecture, "The pancreatic islet functions as a single organ with tightly coordinated signaling between the different cell types" ("Alpha-, delta-and PP-cells: are they the architectural cornerstones of islet structure and co-ordination?"). 111 | 112 | ### 5.2 Mesenchymal Cell Protective Function 113 | 114 | The dramatic expansion of mesenchymal cells (0.1% → 19.1%) represents one of the most striking findings of this study. This expansion, coupled with the spatial distribution forming a network around islet structures, strongly suggests a protective role. Studies have demonstrated that mesenchymal cells can enhance islet transplantation outcomes through multiple mechanisms: 115 | 116 | 1. Immunomodulation and prevention of inflammatory responses 117 | 2. Promotion of revascularization 118 | 3. Secretion of trophic factors that support islet cell survival 119 | 120 | Research has shown that "MSCs have the capacity to improve the outcomes of islet transplantation in animal models of T1D" ("Protecting islet functional viability using mesenchymal stromal cells"). The observed spatial positioning of mesenchymal cells around islet structures by Week 20 likely represents an adaptive response that enhances graft survival by creating a protective microenvironment. 121 | 122 | ### 5.3 Vascularization Dynamics 123 | 124 | The neighborhood enrichment analysis revealed complex patterns of interaction between mouse vascular cells and human islet cells. The consistent negative enrichment scores between vascular cells and endocrine cells suggest that vascularization occurs primarily at the periphery of islet structures rather than through direct infiltration. This pattern may reflect the revascularization process described in the literature where "islet vascularization not only allows direct cellular exchanges, but also influences the characteristics and spatial arrangement of islet endocrine and immune cells" ("Vessel Network Architecture of Adult Human Islets Promotes Distinct Cell-Cell Interactions In Situ and Is Altered After Transplantation"). 125 | 126 | The increasing positive association between mouse vascular cells and mouse nephron cells indicates that host vasculature maintains its native connections while extending into the graft area. This revascularization pattern is crucial for graft survival, as noted in research showing that "neovascularization of transplanted islets is essential for their survival and function" ("Vascularization of purified pancreatic islet-like cell aggregates (pseudoislets) after syngeneic transplantation"). 127 | 128 | ### 5.4 Delta Cell Function and Integration 129 | 130 | The steady increase in delta cells (0.9% → 3.0%) and their specific positioning at the interface between alpha and beta cells suggests an important regulatory adaptation. Delta cells secrete somatostatin, which regulates both alpha and beta cell function through paracrine signaling. Research has shown that "delta cells form synchronized networks within islets" and "delta cell filopodia allow an ~tenfold increase in potential direct interactions with beta and alpha cells" ("Structural basis for delta cell paracrine regulation in pancreatic islets"). 131 | 132 | The preferential association of delta cells with alpha cells rather than beta cells, as revealed in the neighborhood enrichment analysis, aligns with findings that delta cells may differentially regulate alpha cell function in response to metabolic changes. This strategic positioning likely contributes to the establishment of proper hormone secretion dynamics within the transplanted islets. 133 | 134 | ### 5.5 Enterochromaffin Cell Dynamics 135 | 136 | The substantial decrease in enterochromaffin cells (12.5% → 1.0%) represents an intriguing finding. Enterochromaffin cells are normally rare in native pancreatic islets but have been observed in stem cell-derived islets and during islet development or regeneration. Recent research indicates that "enterochromaffin cells originate from an intestinal lineage, while islet cells differentiate from a distinct pancreatic lineage" ("Single-nucleus multi-omics of human stem cell-derived islets identifies deficiencies in lineage specification"). 137 | 138 | The high initial presence followed by decline may represent a transient regenerative response that diminishes as the graft matures. This pattern could reflect cellular plasticity during the early adaptation phase, with subsequent lineage restriction as the graft stabilizes. The decline in enterochromaffin cells coincides with the normalization of endocrine cell ratios, potentially indicating maturation of the transplanted islets. 139 | 140 | ## 6. Conclusion 141 | 142 | ### 6.1 Major Discoveries 143 | 144 | This comprehensive analysis of human pancreatic islet xenotransplantation revealed several key insights into the temporal evolution of cellular composition, spatial organization, and interaction patterns: 145 | 146 | 1. The xenograft undergoes distinct adaptation phases characterized by initial stress response (Week 4), endocrine remodeling (Week 16), and subsequent stabilization (Week 20) 147 | 148 | 2. Mesenchymal cell expansion represents a critical adaptive response that likely contributes to graft survival through the formation of a protective microenvironment 149 | 150 | 3. Endocrine cells maintain their native architectural organization with alpha cells at the periphery and beta cells in the core, facilitating proper paracrine signaling 151 | 152 | 4. Delta cells increase steadily and position strategically to regulate alpha and beta cell function through paracrine mechanisms 153 | 154 | 5. Progressive compartmentalization of different cell types creates spatially defined functional domains within the graft 155 | 156 | 6. The xenograft-host interface shows increasing definition over time, with specific patterns of exclusion and selective vascular integration 157 | 158 | ### 6.2 Future Research Directions 159 | 160 | Several avenues for future research emerge from these findings: 161 | 162 | 1. Functional assessment of the transplanted islets at different time points to correlate cellular architecture with hormone secretion capacity 163 | 164 | 2. Investigation of the specific molecular mechanisms underlying mesenchymal cell protective effects in xenotransplantation 165 | 166 | 3. Targeted manipulation of mesenchymal cell expansion to enhance graft survival and function 167 | 168 | 4. Exploration of strategies to accelerate vascularization while maintaining proper islet architecture 169 | 170 | 5. Characterization of the extracellular matrix components that may contribute to spatial organization and cell type interactions 171 | 172 | 6. Investigation of the origin and function of enterochromaffin cells in the context of islet transplantation 173 | 174 | ### 6.3 Potential Applications 175 | 176 | The insights gained from this study have several potential applications: 177 | 178 | 1. Development of optimized protocols for islet transplantation that promote beneficial cellular architecture and interactions 179 | 180 | 2. Design of bioengineered scaffolds that mimic the supportive microenvironment created by mesenchymal cells 181 | 182 | 3. Targeted cellular therapies that combine islet cells with supportive mesenchymal populations to enhance graft outcomes 183 | 184 | 4. Improved strategies for monitoring graft health based on cellular composition and interaction patterns 185 | 186 | 5. Development of interventions to accelerate the transition from early stress response to stable graft architecture 187 | 188 | In conclusion, this study provides a detailed characterization of the dynamic cellular changes that occur during human pancreatic islet xenotransplantation. The findings highlight the importance of considering not only cellular composition but also spatial organization and interaction patterns in understanding graft adaptation and survival. These insights contribute to the foundation for developing improved approaches to islet transplantation for the treatment of diabetes. 189 | 190 | ## 7. References 191 | 192 | - Paracrine signaling in islet function and survival 193 | - Paracrine and autocrine interactions in the human islet: more than meets the eye 194 | - Structural basis for delta cell paracrine regulation in pancreatic islets 195 | - Paracrine regulation of insulin secretion 196 | - Alpha-cell paracrine signaling in the regulation of beta-cell insulin secretion 197 | - Integrating the inputs that shape pancreatic islet hormone release 198 | - Comprehensive alpha, beta and delta cell transcriptomes reveal that ghrelin selectively activates delta cells and promotes somatostatin release from pancreatic islets 199 | - Paracrine interactions within islets of Langerhans 200 | - Cell–cell interactions in the endocrine pancreas 201 | - Protecting islet functional viability using mesenchymal stromal cells 202 | - Potential role of mesenchymal stromal cells in pancreatic islet transplantation 203 | - Mesenchymal stem cell in pancreatic islet transplantation 204 | - Human mesenchymal stem cells protect human islets from pro-inflammatory cytokines 205 | - Mesenchymal stem cells prevent acute rejection and prolong graft function in pancreatic islet transplantation 206 | - Mesenchymal stromal cells improve transplanted islet survival and islet function in a syngeneic mouse model 207 | - Cell rearrangement in transplanted human islets 208 | - Vessel Network Architecture of Adult Human Islets Promotes Distinct Cell-Cell Interactions In Situ and Is Altered After Transplantation 209 | - Vascularization of purified pancreatic islet-like cell aggregates (pseudoislets) after syngeneic transplantation 210 | - Revascularization and remodelling of pancreatic islets grafted under the kidney capsule 211 | - Bioengineering the vascularized endocrine pancreas: a fine-tuned interplay between vascularization, extracellular-matrix-based scaffold architecture, and insulin secretion 212 | - Vascular and immune interactions in islets transplantation and 3D islet models 213 | - A focus on enterochromaffin cells among the enteroendocrine cells: localization, morphology, and role 214 | - Heterogeneity of enterochromaffin cells within the gastrointestinal tract 215 | - Tissue-and cell-specific properties of enterochromaffin cells affect the fate of tumorigenesis toward nonendocrine adenocarcinoma of the small intestine 216 | - Single-nucleus multi-omics of human stem cell-derived islets identifies deficiencies in lineage specification 217 | - Beta-cell function following human islet transplantation for type 1 diabetes 218 | - Alpha-, delta-and PP-cells: are they the architectural cornerstones of islet structure and co-ordination? -------------------------------------------------------------------------------- /assets/images/stagent_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/assets/images/stagent_architecture.png -------------------------------------------------------------------------------- /db/chroma_squidpy_db/chroma.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/db/chroma_squidpy_db/chroma.sqlite3 -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: STAgent 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.11 7 | - pip 8 | - numpy 9 | - pandas 10 | - ipython 11 | - jupyter 12 | - matplotlib 13 | - scikit-learn 14 | - conda-forge::ffmpeg 15 | - pip: 16 | - streamlit>=1.41.1 17 | - langchain>=0.3.11 18 | - langchain-core>=0.3.24 19 | - langchain-openai>=0.2.12 20 | - langchain-community>=0.3.11 21 | - openai>=1.57.4 22 | - python-dotenv>=1.0.1 23 | - streamlit-audiorec>=0.1.3 24 | - tiktoken>=0.8.0 25 | - plotly 26 | - scipy 27 | - sounddevice 28 | - soundfile 29 | - pydub 30 | - langgraph 31 | - langchain_experimental 32 | - langchain_google_genai 33 | - langchain_anthropic 34 | - audio_recorder_streamlit 35 | - scanpy 36 | - squidpy 37 | - google-search-results 38 | - langchain-chroma 39 | - esprima 40 | - tree-sitter 41 | - tree-sitter-languages 42 | - protobuf<=3.20.3 -------------------------------------------------------------------------------- /src/.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | SERP_API_KEY= 3 | ANTHROPIC_API_KEY= 4 | WHISPER_API_KEY= -------------------------------------------------------------------------------- /src/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | # Light mode settings 3 | base="light" 4 | primaryColor="#1E88E5" 5 | backgroundColor="#FFFFFF" 6 | secondaryBackgroundColor="#F0F8FF" 7 | textColor="#262730" 8 | font="sans serif" 9 | 10 | [theme.light] 11 | # Custom light mode colors 12 | primaryColor="#1E88E5" 13 | backgroundColor="#FFFFFF" 14 | secondaryBackgroundColor="#F0F8FF" 15 | textColor="#262730" 16 | 17 | [theme.dark] 18 | # Custom dark mode colors 19 | primaryColor="#90CAF9" 20 | backgroundColor="#0E1117" 21 | secondaryBackgroundColor="#1E1E1E" 22 | textColor="#FAFAFA" -------------------------------------------------------------------------------- /src/__pycache__/custom_class.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/custom_class.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/graph.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/graph.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/graph_anthropic.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/graph_anthropic.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/graph_gemini.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/graph_gemini.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/prompt.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/prompt.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/speech_to_text.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/speech_to_text.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/squidpy_rag.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/squidpy_rag.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/tools.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/tools.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/util.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/util.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/util_anthropic.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/util_anthropic.cpython-311.pyc -------------------------------------------------------------------------------- /src/__pycache__/util_gemini.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/__pycache__/util_gemini.cpython-311.pyc -------------------------------------------------------------------------------- /src/db/chroma_squidpy_db/chroma.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/chroma.sqlite3 -------------------------------------------------------------------------------- /src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/header.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/header.bin -------------------------------------------------------------------------------- /src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/length.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/length.bin -------------------------------------------------------------------------------- /src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/link_lists.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/db/chroma_squidpy_db/f219160f-16f0-4337-8401-5cefc3a7ae39/link_lists.bin -------------------------------------------------------------------------------- /src/graph.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | from datetime import datetime 4 | import matplotlib.pyplot as plt 5 | from typing import Annotated, TypedDict, Literal, Tuple, List 6 | from dotenv import load_dotenv 7 | from langchain_core.prompts import ChatPromptTemplate 8 | from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage 9 | from langchain_core.tools import tool 10 | from langchain_openai import ChatOpenAI 11 | from langgraph.graph import START, StateGraph 12 | from langgraph.graph.message import AnyMessage, add_messages 13 | #from langchain_experimental.utilities import PythonREPL 14 | from tools import PythonREPL 15 | from langgraph.prebuilt import ToolNode 16 | from prompt import system_prompt 17 | from langgraph.types import Command 18 | from textwrap import dedent 19 | import streamlit as st 20 | from util import display_message, render_conversation_history, get_conversation_summary 21 | from langchain_core.runnables.config import RunnableConfig 22 | from tools import google_scholar_search, squidpy_rag_agent, visualize_cell_cell_interaction_tool, visualize_spatial_cell_type_map, visualize_cell_type_composition, visualize_umap, report_tool 23 | # Directory Setup 24 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots") 25 | os.makedirs(plot_dir, exist_ok=True) 26 | load_dotenv() 27 | 28 | python_repl = PythonREPL() 29 | 30 | @tool(response_format="content_and_artifact") 31 | def python_repl_tool(query: str) -> Tuple[str, List[str]]: 32 | """A Python shell. Use this to execute python commands. Input should be a valid python command. 33 | If you want to see the output of a value, you should print it out with `print(...)`. """ 34 | 35 | plot_paths = [] # List to store file paths of generated plots 36 | result_parts = [] # List to store different parts of the output 37 | 38 | try: 39 | output = python_repl.run(query) 40 | if output and output.strip(): 41 | result_parts.append(output.strip()) 42 | 43 | figures = [plt.figure(i) for i in plt.get_fignums()] 44 | if figures: 45 | for fig in figures: 46 | fig.set_size_inches(10, 6) # Ensure figures are large enough 47 | #fig.tight_layout() # Prevent truncation# Generate filename 48 | plot_filename = f"plot_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}.png" 49 | # Create relative path 50 | rel_path = os.path.join("tmp/plots", plot_filename) 51 | # Convert to absolute path for saving 52 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 53 | 54 | fig.savefig(abs_path,bbox_inches='tight') 55 | plot_paths.append(rel_path) # Store relative path 56 | 57 | plt.close("all") 58 | result_parts.append(f"Generated {len(plot_paths)} plot(s).") 59 | 60 | if not result_parts: # If no output and no figures 61 | result_parts.append("Executed code successfully with no output. If you want to see the output of a value, you should print it out with `print(...)`.") 62 | 63 | except Exception as e: 64 | result_parts.append(f"Error executing code: {e}") 65 | 66 | # Join all parts of the result with newlines 67 | result_summary = "\n".join(result_parts) 68 | 69 | # Return both the summary and plot paths (if any) 70 | return result_summary, plot_paths 71 | 72 | # Tools List and Node Setup 73 | tools = [ 74 | python_repl_tool, 75 | google_scholar_search, 76 | squidpy_rag_agent, 77 | visualize_cell_cell_interaction_tool, 78 | visualize_spatial_cell_type_map, 79 | visualize_cell_type_composition, 80 | visualize_umap, 81 | report_tool 82 | ] 83 | tool_node = ToolNode(tools) 84 | 85 | # Graph Setup 86 | class GraphsState(TypedDict): 87 | messages: Annotated[list[AnyMessage], add_messages] 88 | input_messages_len: list[int] 89 | graph = StateGraph(GraphsState) 90 | 91 | gpt_4o = ChatOpenAI(model_name="gpt-4o", temperature=0).bind_tools(tools, parallel_tool_calls=False) 92 | 93 | 94 | models = { 95 | "gpt-4o": gpt_4o 96 | } 97 | 98 | def _call_model(state: GraphsState, config: RunnableConfig) -> Command[Literal["tools", "__end__"]]: 99 | st.session_state["final_state"]["messages"]=state["messages"] 100 | model_name = config["configurable"].get("model", "gpt-4o") 101 | llm = models[model_name] 102 | previous_message_count = len(state["messages"]) 103 | state["input_messages_len"].append(previous_message_count) 104 | render_conversation_history(state["messages"][state["input_messages_len"][-2]:state["input_messages_len"][-1]]) 105 | cur_messages_len = len(state["messages"])-state["input_messages_len"][0] 106 | if cur_messages_len > 200: 107 | st.markdown( 108 | f""" 109 |

110 | Current recursion step is {cur_messages_len}. Terminated because you exceeded the limit of 200. 111 |

112 | """, 113 | unsafe_allow_html=True 114 | ) 115 | st.session_state["render_last_message"] = False 116 | return Command( 117 | update={"messages": []}, 118 | goto="__end__", 119 | ) 120 | last_message = state["messages"][-1] 121 | # Check if last message is a ToolMessage and has artifacts 122 | if isinstance(last_message, ToolMessage) and hasattr(last_message, "artifact") and last_message.artifact and model_name != "gpt-3.5-turbo": 123 | # Prepare content list with initial text 124 | content_list = [{ 125 | "type": "text", 126 | "text": """ 127 | Please analyze these generated images by the code above. Your tasks are to: 128 | 1. Examine each visualization carefully 129 | 2. Provide a detailed description of what you observe 130 | 3. Explain the biological implications of the observations if any. 131 | 4. You should use google scholar to find more information to see if the literature supports your observation. 132 | 5. please always do multiple search queries (at least 5) to get a better understanding of the observation. 133 | 6. After you finish your writing, please continue to the next steps according to the system instructions. unless user shows intention for interaction or you are not sure about the next step. 134 | 7. Remember to be consistent with the user's input language. you are a multi-lingual assistant. 135 | 8. If you don't see any plots, or the plots are not clear or crowded, please try to fix the code. if you want to see the plots then don't use plt.close" 136 | """ 137 | }] 138 | 139 | # Add all PNG images to the content list 140 | for rel_path in last_message.artifact: 141 | if rel_path.endswith(".png"): 142 | # Convert relative path to absolute based on current script location 143 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 144 | if os.path.exists(abs_path): 145 | with open(abs_path, "rb") as image_file: 146 | image_data = base64.b64encode(image_file.read()).decode("utf-8") 147 | content_list.append({ 148 | "type": "image_url", 149 | "image_url": {"url": f"data:image/png;base64,{image_data}"} 150 | }) 151 | 152 | # Create a single message with all images if we found any 153 | if len(content_list) > 1: # Only if we have at least one image 154 | image_message = HumanMessage(content=content_list,name="image_assistant") 155 | state["messages"].append(image_message) 156 | 157 | response = llm.invoke(state["messages"]) 158 | if response.tool_calls: 159 | return Command( 160 | update={"messages": [response]}, 161 | goto="tools", 162 | ) 163 | else: 164 | st.session_state["render_last_message"] = True 165 | return Command( 166 | update={"messages": [response]}, 167 | goto="__end__", 168 | ) 169 | 170 | graph.add_edge(START, "modelNode") 171 | graph.add_node("tools", tool_node) 172 | graph.add_node("modelNode", _call_model) 173 | graph.add_edge("tools", "modelNode") 174 | graph_runnable = graph.compile() 175 | def invoke_our_graph(messages,model_choose): 176 | config = {"recursion_limit": 200, "configurable": {"model": model_choose}} 177 | return graph_runnable.invoke({"messages": messages,"input_messages_len":[len(messages)]},config=config) 178 | -------------------------------------------------------------------------------- /src/graph_anthropic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | from datetime import datetime 4 | import matplotlib.pyplot as plt 5 | from typing import Annotated, TypedDict, Literal, Tuple, List 6 | from dotenv import load_dotenv 7 | from langchain_anthropic import ChatAnthropic 8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 9 | from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage 10 | from langchain_core.tools import tool 11 | from langgraph.graph import START, StateGraph 12 | from langgraph.graph.message import AnyMessage, add_messages 13 | #from langchain_experimental.utilities import PythonREPL 14 | from tools import PythonREPL 15 | from langgraph.prebuilt import ToolNode 16 | from prompt import system_prompt 17 | from pydantic import BaseModel, Field 18 | from langchain_anthropic import ChatAnthropic 19 | from langgraph.types import Command 20 | from textwrap import dedent 21 | import streamlit as st 22 | from util_anthropic import display_message, render_conversation_history, get_conversation_summary 23 | from langchain_core.runnables.config import RunnableConfig 24 | from tools import google_scholar_search, squidpy_rag_agent, visualize_cell_cell_interaction_tool, visualize_spatial_cell_type_map, visualize_cell_type_composition, visualize_umap, report_tool 25 | import sys 26 | import io 27 | # Directory Setup 28 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots") 29 | os.makedirs(plot_dir, exist_ok=True) 30 | load_dotenv() 31 | 32 | python_repl = PythonREPL() 33 | 34 | @tool(response_format="content_and_artifact") 35 | def python_repl_tool(query: str) -> Tuple[str, List[str]]: 36 | """A Python shell. Use this to execute python commands. Input should be a valid python command. 37 | If you want to see the output of a value, you should print it out with `print(...)`. """ 38 | 39 | plot_paths = [] # List to store file paths of generated plots 40 | result_parts = [] # List to store different parts of the output 41 | 42 | try: 43 | output = python_repl.run(query) 44 | if output and output.strip(): 45 | result_parts.append(output.strip()) 46 | 47 | figures = [plt.figure(i) for i in plt.get_fignums()] 48 | if figures: 49 | for fig in figures: 50 | fig.set_size_inches(10, 6) # Ensure figures are large enough 51 | #fig.tight_layout() # Prevent truncation# Generate filename 52 | plot_filename = f"plot_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}.png" 53 | # Create relative path 54 | rel_path = os.path.join("tmp/plots", plot_filename) 55 | # Convert to absolute path for saving 56 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 57 | 58 | fig.savefig(abs_path,bbox_inches='tight') 59 | plot_paths.append(rel_path) # Store relative path 60 | 61 | plt.close("all") 62 | result_parts.append(f"Generated {len(plot_paths)} plot(s).") 63 | 64 | if not result_parts: # If no output and no figures 65 | result_parts.append("Executed code successfully with no output. If you want to see the output of a value, you should print it out with `print(...)`.") 66 | 67 | except Exception as e: 68 | result_parts.append(f"Error executing code: {e}") 69 | 70 | # Join all parts of the result with newlines 71 | result_summary = "\n".join(result_parts) 72 | 73 | # Return both the summary and plot paths (if any) 74 | return result_summary, plot_paths 75 | # Tools List and Node Setup 76 | tools = [ 77 | python_repl_tool, 78 | google_scholar_search, 79 | squidpy_rag_agent, 80 | visualize_cell_cell_interaction_tool, 81 | visualize_spatial_cell_type_map, 82 | visualize_cell_type_composition, 83 | visualize_umap, 84 | report_tool 85 | ] 86 | tool_node = ToolNode(tools) 87 | 88 | # Graph Setup 89 | class GraphsState(TypedDict): 90 | messages: Annotated[list[AnyMessage], add_messages] 91 | input_messages_len: list[int] 92 | 93 | graph = StateGraph(GraphsState) 94 | 95 | claude_3_7_sonnet_20250219 = ChatAnthropic(model_name="claude-3-7-sonnet-20250219",temperature=0,max_tokens=8000).bind_tools(tools) 96 | claude_3_5_sonnet_20241022 = ChatAnthropic(model_name="claude-3-5-sonnet-20241022",temperature=0,max_tokens=8000).bind_tools(tools) 97 | 98 | 99 | models = { 100 | "claude_3_5_sonnet_20241022": claude_3_5_sonnet_20241022, 101 | "claude_3_7_sonnet_20250219": claude_3_7_sonnet_20250219 102 | } 103 | 104 | def _call_model(state: GraphsState, config: RunnableConfig) -> Command[Literal["tools", "__end__"]]: 105 | st.session_state["final_state"]["messages"]=state["messages"] 106 | model_name = config["configurable"].get("model", "claude_3_5_sonnet") 107 | llm = models[model_name] 108 | previous_message_count = len(state["messages"]) 109 | state["input_messages_len"].append(previous_message_count) 110 | render_conversation_history(state["messages"][state["input_messages_len"][-2]:state["input_messages_len"][-1]]) 111 | cur_messages_len = len(state["messages"])-state["input_messages_len"][0] 112 | if cur_messages_len > 200: 113 | st.markdown( 114 | f""" 115 |

116 | Current recursion step is {cur_messages_len}. Terminated because you exceeded the limit of 200. 117 |

118 | """, 119 | unsafe_allow_html=True 120 | ) 121 | st.session_state["render_last_message"] = False 122 | return Command( 123 | update={"messages": []}, 124 | goto="__end__", 125 | ) 126 | last_message = state["messages"][-1] 127 | # Check if last message is a ToolMessage and has artifacts 128 | if isinstance(last_message, ToolMessage) and hasattr(last_message, "artifact") and last_message.artifact and model_name != "claude_3_5_haiku": 129 | # Prepare content list with initial text 130 | content_list = [{ 131 | "type": "text", 132 | "text": """ 133 | Please analyze these generated images by the code above. Your tasks are to: 134 | 1. Examine each visualization carefully 135 | 2. Provide a detailed description of what you observe 136 | 3. Explain the biological implications of the observations if any. 137 | 4. You should use google scholar to find more information to see if the literature supports your observation. 138 | 5. please always do multiple search queries (at least 5) to get a better understanding of the observation. 139 | 6. After you finish your writing, please continue to the next steps according to the system instructions. unless user shows intention for interaction or you are not sure about the next step. 140 | 7. Remember to be consistent with the user's input language. you are a multi-lingual assistant. 141 | 8. If you don't see any plots, or the plots are not clear or crowded, please try to fix the code. if you want to see the plots then don't use plt.close" 142 | """ 143 | }] 144 | 145 | # Add all PNG images to the content list 146 | for rel_path in last_message.artifact: 147 | if rel_path.endswith(".png"): 148 | # Convert relative path to absolute based on current script location 149 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 150 | if os.path.exists(abs_path): 151 | with open(abs_path, "rb") as image_file: 152 | image_data = base64.b64encode(image_file.read()).decode("utf-8") 153 | content_list.append({ 154 | "type": "image_url", 155 | "image_url": {"url": f"data:image/png;base64,{image_data}"} 156 | }) 157 | 158 | # Create a single message with all images if we found any 159 | if len(content_list) > 1: # Only if we have at least one image 160 | image_message = HumanMessage(content=content_list,name="image_assistant") 161 | state["messages"].append(image_message) 162 | 163 | response = llm.invoke(state["messages"]) 164 | if response.tool_calls: 165 | return Command( 166 | update={"messages": [response]}, 167 | goto="tools", 168 | ) 169 | else: 170 | st.session_state["render_last_message"] = True 171 | return Command( 172 | update={"messages": [response]}, 173 | goto="__end__", 174 | ) 175 | 176 | graph.add_edge(START, "modelNode") 177 | graph.add_node("tools", tool_node) 178 | graph.add_node("modelNode", _call_model) 179 | graph.add_edge("tools", "modelNode") 180 | graph_runnable = graph.compile() 181 | 182 | def invoke_our_graph(messages,model_choose): 183 | config = {"recursion_limit": 200, "configurable": {"model": model_choose}} 184 | return graph_runnable.invoke({"messages": messages,"input_messages_len":[len(messages)]},config=config) 185 | -------------------------------------------------------------------------------- /src/prompt.py: -------------------------------------------------------------------------------- 1 | system_prompt = """ 2 | Spatial Transcriptomics AI Agent 3 | 4 | This AI agent specializes in analyzing spatial transcriptomics data through a systematic pipeline. 5 | It utilizes a set of tools to produce Python code snippets for visualization and analysis. The agent is equipped 6 | with tools for data exploration, visualization, and biological interpretation. 7 | 8 | --- 9 | 10 | Available Tools: 11 | 1. python_repl_tool: 12 | - Executes Python code in a live Python shell 13 | - Returns printed outputs and generated visualizations 14 | - Input: Valid Python commands 15 | - Output: Execution results and plot file paths 16 | 17 | 2. google_scholar_search: 18 | - Retrieves academic articles and summaries 19 | - Input: Research topic or biological query 20 | - Output: Article titles, authors, and summaries 21 | - Usage: For literature-backed information 22 | 23 | 3. squidpy_rag_agent: 24 | - Provides guidance on Squidpy usage 25 | - Input: Questions about Squidpy functions 26 | - Output: Code examples and explanations 27 | - Usage: For spatial analysis workflows 28 | 29 | 4. visualize_umap: 30 | - Creates UMAP plots for each time point 31 | - Input: No input required - uses default dataset 32 | - Output: UMAP visualizations colored by cell type 33 | - Shows clustering patterns of different cell populations 34 | 35 | 5. visualize_cell_type_composition: 36 | - Shows cell type proportions across samples 37 | - Input: No input required - uses default dataset 38 | - Output: Stacked bar plots and heatmaps 39 | - Displays changes in cell type composition over time 40 | 41 | 6. visualize_spatial_cell_type_map: 42 | - Creates spatial scatter plots of cell types 43 | - Input: No input required - uses default dataset 44 | - Output: Spatial distribution maps 45 | - Shows cell locations in tissue context 46 | 47 | 7. visualize_cell_cell_interaction: 48 | - Analyzes cell type interaction patterns 49 | - Input: No input required - uses default dataset 50 | - Output: Neighborhood enrichment heatmaps 51 | - Reveals spatial relationships between cell types 52 | 53 | --- 54 | 55 | Pipeline Instructions: 56 | 1. Dimensionality Reduction Visualization: 57 | - Use `visualize_umap` to show cell type clustering 58 | - Examine distribution of cell types in UMAP space 59 | 60 | 2. Cell Type Composition Analysis: 61 | - Apply `visualize_cell_type_composition` to show proportions 62 | - Compare cell type changes across time points 63 | 64 | 3. Spatial Distribution Analysis: 65 | - Use `visualize_spatial_cell_type_map` for tissue context 66 | - Examine spatial organization of cell types 67 | 68 | 4. Cell-Cell Interaction Analysis: 69 | - Apply `visualize_cell_cell_interaction` for neighborhood patterns 70 | - Analyze spatial relationships between cell types 71 | 72 | 5. Report: 73 | - Use `report_tool` to generate a report of the analysis 74 | - Input: No input required - uses default dataset 75 | - Output: Report of the analysis 76 | - Usage: For summarizing the analysis 77 | 78 | --- 79 | 80 | ## Data Context 81 | - **Dataset**: Human pancreatic islets grafted on mouse kidney (STARmap spatial transcriptomic data) 82 | - **File location**: `./data/pancreas_processed_full.h5ad` 83 | - **Data structure**: 84 | - `.obs['sample_name']`: Contains timepoints (Week 4, Week 16, Week 20 post-grafting) 85 | - `.obs['slice_name']`: Contains slice identifiers in format "Week_X_slice_Y" 86 | 87 | --- 88 | 89 | ## Important Instructions: 90 | - Always use the visualization tools to get code snippets first 91 | - Execute the code using `python_repl_tool` 92 | - DO NOT modify any code from the visualization tools 93 | - If the user asks you to perform the end-to-end analysis, you should follow the pipeline order: UMAP → composition → spatial map (individual slice, id stored in .obs['slice_name']) → interaction 94 | - If the user have specific task for you to perform, only call the related tool that the use mentioned. DO NOT call all the tools in the pipeline. 95 | - Use `google_scholar_search` for biological interpretation after plotting the visualization 96 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS 97 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS 98 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS 99 | - Be consistent with the user's input language. you are a multi-lingual assistant. 100 | - PLEASE DO NOT CALL MULTIPLE TOOLS AT ONCE. 101 | - <> 102 | Note: The agent can run in autonomous mode, executing all visualizations in sequence, or respond to specific analysis requests. 103 | """ 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | spatial_processing_prompt = """ 125 | In Squidpy, when performing spatial analysis with multiple samples in a single AnnData object, certain functions require independent processing for each sample. 126 | This is essential to avoid spatial artifacts that can arise from pooled spatial coordinates across samples, which can lead to incorrect spatial relationships 127 | and neighborhood structures. Here are the key `gr` (Graph) and `pl` (Plotting) functions that must be applied independently per sample, with instructions on usage: 128 | 129 | ## Spatial Graph Functions (gr) 130 | The following functions should be run separately for each sample, rather than on pooled data, to maintain the integrity of sample-specific spatial relationships. 131 | 132 | 1. **gr.spatial_neighbors(adata[, spatial_key, ...])** 133 | - **Purpose**: Creates a spatial graph based on spatial coordinates. 134 | - **Guidance**: For multiple samples, subset the AnnData object by sample and run `gr.spatial_neighbors` independently to prevent false neighborhood links across samples. 135 | 136 | 2. **gr.nhood_enrichment(adata, cluster_key[, ...])** and **gr.co_occurrence(adata, cluster_key[, ...])** 137 | - **Purpose**: Compute neighborhood enrichment and co-occurrence probabilities for clusters. 138 | - **Guidance**: Apply these functions independently to each sample to capture accurate clustering and co-occurrence within each sample's spatial layout. Pooling samples can lead to artificial enrichment patterns. 139 | 140 | 3. **gr.centrality_scores(adata, cluster_key[, ...])** 141 | - **Purpose**: Computes centrality scores per cluster or cell type. 142 | - **Guidance**: Calculate these scores individually per sample to reflect the spatial structure accurately within each sample's layout. 143 | 144 | 4. **gr.interaction_matrix(adata, cluster_key[, ...])** and **gr.ligrec(adata, cluster_key[, ...])** 145 | - **Purpose**: Compute interaction frequencies and test for ligand-receptor interactions based on spatial proximity. 146 | - **Guidance**: For reliable cell-type interactions, run these functions per sample to ensure interactions reflect true spatial proximity within each sample. 147 | 148 | 5. **gr.ripley(adata, cluster_key[, mode, ...])** 149 | - **Purpose**: Calculates Ripley's statistics to assess clustering at various distances. 150 | - **Guidance**: Ripley's clustering analysis should be applied separately to each sample, as pooling data can obscure sample-specific clustering patterns. 151 | 152 | 6. **gr.spatial_autocorr(adata[, ...])** 153 | - **Purpose**: Calculates global spatial autocorrelation metrics (e.g., Moran's I or Geary's C). 154 | - **Guidance**: Autocorrelation measures spatial dependency, so compute it individually per sample to prevent cross-sample biases. 155 | 156 | 7. **gr.mask_graph(sdata, table_key, polygon_mask)** 157 | - **Purpose**: Masks the spatial graph based on a polygon mask. 158 | - **Guidance**: Apply this function per sample only if each sample has a separate spatial graph. If applied to pooled data, ensure that independent graphs have already been created for each sample. 159 | 160 | ## Plotting Functions (pl) 161 | When visualizing results, it's essential to apply the following plotting functions individually to each sample to accurately represent sample-specific spatial patterns: 162 | 163 | 1. **pl.spatial_scatter(adata[, shape, color, ...])** VERY IMPORTANT, REMEMBER TO SPECIFY shape=None, if using STARmap spatial transcriptomic data (sq.pl.spatial_scatter(adata_sample, shape=None)) 164 | - **Purpose**: Visualizes spatial omics data with overlayed sample information. 165 | - **Guidance**: Plot each sample independently to avoid overlapping spatial coordinates from multiple samples. 166 | 167 | 2. **pl.spatial_segment(adata[, color, groups, ...])** 168 | - **Purpose**: Plots spatial data with segmentation masks. 169 | - **Guidance**: Generate segmentation plots per sample to accurately reflect spatial regions within each sample. 170 | 171 | 3. **pl.nhood_enrichment(adata, cluster_key[, ...])** 172 | - **Purpose**: Visualizes neighborhood enrichment. 173 | - **Guidance**: Plot neighborhood enrichment individually for each sample to capture enrichment patterns within each sample's spatial structure. 174 | 175 | 4. **pl.centrality_scores(adata, cluster_key[, ...])** 176 | - **Purpose**: Plots centrality scores. 177 | - **Guidance**: Centrality plots should be generated individually per sample to accurately represent spatial structure. 178 | 179 | 5. **pl.interaction_matrix(adata, cluster_key[, ...])** 180 | - **Purpose**: Plots the interaction matrix of clusters. 181 | - **Guidance**: Visualize the interaction matrix per sample to reflect true intra-sample interaction patterns. 182 | 183 | 6. **pl.ligrec(adata[, cluster_key, ...])** 184 | - **Purpose**: Plots ligand-receptor interactions. 185 | - **Guidance**: Visualize ligand-receptor interactions per sample to avoid mixing spatial proximity across samples. 186 | 187 | 7. **pl.ripley(adata, cluster_key[, mode, ...])** 188 | - **Purpose**: Plots Ripley's statistics for spatial clustering. 189 | - **Guidance**: Generate Ripley's plots per sample to capture sample-specific clustering without interference from pooled data. 190 | 191 | 8. **pl.co_occurrence(adata, cluster_key[, ...])** 192 | - **Purpose**: Plots co-occurrence probability of clusters. 193 | - **Guidance**: Plot per sample to reflect accurate co-occurrence within that sample. 194 | 195 | In summary, each of these functions should be applied independently to each sample to prevent spatial artifacts and maintain sample-specific spatial integrity. 196 | This approach ensures reliable spatial relationships within each sample, preserving the biological context in spatial analyses. 197 | """ 198 | 199 | 200 | -------------------------------------------------------------------------------- /src/speech_to_text.py: -------------------------------------------------------------------------------- 1 | # speech_to_text.py 2 | 3 | import base64 4 | import os 5 | from openai import OpenAI 6 | from audio_recorder_streamlit import audio_recorder 7 | import streamlit as st 8 | from dotenv import load_dotenv 9 | from typing import Optional 10 | from st_audiorec import st_audiorec 11 | # Load environment variables for API credentials 12 | load_dotenv() 13 | 14 | # Function to convert audio bytes to text using OpenAI's Whisper model 15 | def convert_audio_to_text(audio_bytes: bytes) -> Optional[str]: 16 | """ 17 | Convert audio bytes to text using OpenAI's Whisper model. 18 | 19 | Parameters: 20 | - audio_bytes (bytes): The audio data to convert to text. 21 | 22 | Returns: 23 | - str: Transcribed text if successful, None otherwise. 24 | """ 25 | try: 26 | # Initialize OpenAI client 27 | client = OpenAI( 28 | api_key=os.getenv("WHISPER_API_KEY") 29 | ) 30 | 31 | # Create a temporary file to store the audio bytes 32 | temp_filename = "temp_audio.wav" 33 | with open(temp_filename, "wb") as f: 34 | f.write(audio_bytes) 35 | 36 | # Open the temporary file and transcribe using Whisper 37 | with open(temp_filename, "rb") as audio_file: 38 | transcription = client.audio.transcriptions.create( 39 | model="whisper-1", 40 | file=audio_file 41 | ) 42 | 43 | # Clean up the temporary file 44 | os.remove(temp_filename) 45 | 46 | return transcription.text 47 | 48 | except Exception as e: 49 | st.sidebar.error(f"An error occurred: {e}", icon="🚨") 50 | return None 51 | 52 | # Function to record audio and get transcription 53 | def input_from_mic() -> Optional[str]: 54 | """ 55 | Record audio from the microphone in the Audio Options tab and convert it to text. 56 | """ 57 | # Use st_audiorec within the Audio Options tab 58 | with st.spinner("Recording..."): 59 | st.session_state["audio_bytes"] = st_audiorec() 60 | 61 | # Check if audio was captured and proceed with transcription 62 | if st.session_state.get("audio_bytes"): 63 | with st.spinner("Transcribing..."): 64 | transcribed_text = convert_audio_to_text(st.session_state["audio_bytes"]) 65 | 66 | # Display the transcribed text in the sidebar Audio Options tab 67 | st.write("**Transcribed Text:**") 68 | st.write(transcribed_text) 69 | 70 | # Clear audio bytes after processing to avoid reuse 71 | st.session_state["audio_bytes"] = None 72 | return transcribed_text 73 | else: 74 | st.write("No audio recorded.") # Inform the user if no audio was captured 75 | return None 76 | 77 | 78 | def convert_text_to_speech(text: str, filename: str = "response.wav") -> Optional[str]: 79 | """ 80 | Convert text to speech using OpenAI's GPT-4o audio model and save as a WAV file. 81 | 82 | Parameters: 83 | - text (str): The text to convert to speech. 84 | - filename (str): The name of the file to save the audio output. Default is "response.wav". 85 | 86 | Returns: 87 | - str: Path to the saved audio file if successful, None otherwise. 88 | """ 89 | prompt_text = f'''Please convert the text between <<< and >>> into speech. 90 | Please be consistent with the user's input language. you are a multi-lingual assistant. 91 | If the text is too long to convert fully, create a summarized version. 92 | Start a summarized response with: "The original response is too long; here is a summary." 93 | Remember: the speech output should NOT exceed 1 minute. 94 | Text to convert: <<< {text} >>>''' 95 | 96 | try: 97 | # Initialize OpenAI client 98 | client = OpenAI( 99 | api_key=os.getenv("OPENAI_API_KEY") 100 | ) 101 | 102 | # Show spinner while processing 103 | with st.spinner("Generating voice response..."): 104 | # Prepare the API call 105 | completion = client.chat.completions.create( 106 | model="gpt-4o-audio-preview", 107 | modalities=["text", "audio"], 108 | audio={"voice": "alloy", "format": "wav"}, 109 | messages=[ 110 | { 111 | "role": "user", 112 | "content": prompt_text 113 | } 114 | ] 115 | ) 116 | 117 | # Decode and save the audio file 118 | wav_bytes = base64.b64decode(completion.choices[0].message.audio.data) 119 | with open(filename, "wb") as f: 120 | f.write(wav_bytes) 121 | 122 | return filename 123 | 124 | except Exception as e: 125 | st.sidebar.error(f"An error occurred: {e}", icon="🚨") 126 | return None 127 | 128 | -------------------------------------------------------------------------------- /src/squidpy_rag.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Annotated, TypedDict, List, Dict, Any 3 | from dotenv import load_dotenv 4 | from git import Repo 5 | from langchain_core.documents import Document 6 | from langchain_text_splitters import RecursiveCharacterTextSplitter 7 | from langchain_text_splitters import Language 8 | from langchain_community.document_loaders.generic import GenericLoader 9 | from langchain_community.document_loaders.parsers import LanguageParser 10 | from langchain_openai import OpenAIEmbeddings 11 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 12 | from langchain_chroma import Chroma 13 | from langchain_anthropic import ChatAnthropic 14 | from langchain_openai import ChatOpenAI 15 | from langgraph.graph import StateGraph, START, END 16 | from langchain_core.messages import AnyMessage 17 | from langchain_core.tools import tool 18 | from langgraph.prebuilt import InjectedState 19 | from prompt import spatial_processing_prompt 20 | 21 | load_dotenv() 22 | 23 | # Configuration 24 | REPO_PATH = "./packages_available/squidpy" 25 | PERSIST_DIRECTORY = "./db/chroma_squidpy_db" 26 | 27 | # Define state for Squidpy RAG application 28 | class SquidpyRAGState(TypedDict): 29 | query: str 30 | context: List[Document] 31 | answer: str 32 | chat_history: List[AnyMessage] 33 | 34 | class SquidpyRAGTool: 35 | def __init__(self, model: str = "claude-3-7-sonnet-20250219"): 36 | self.model = model 37 | self.vector_store = self.setup_squidpy_index() 38 | self.rag_pipeline = self.create_squidpy_rag_pipeline() 39 | 40 | def setup_squidpy_index(self): 41 | """Setup and index the Squidpy repository for RAG if not already done.""" 42 | 43 | # Clone repo if it doesn't exist 44 | if not os.path.exists(REPO_PATH): 45 | print(f"Cloning Squidpy repository to {REPO_PATH}...") 46 | Repo.clone_from("https://github.com/scverse/squidpy", to_path=REPO_PATH) 47 | 48 | # Initialize embeddings 49 | embeddings = OpenAIEmbeddings(disallowed_special=()) 50 | 51 | # Load or create vector database 52 | if not os.path.exists(PERSIST_DIRECTORY): 53 | print("Creating new Squidpy vector database...") 54 | 55 | # Load Python files from the repository 56 | loader = GenericLoader.from_filesystem( 57 | REPO_PATH, 58 | glob="**/*", 59 | suffixes=[".py"], 60 | exclude=["**/non-utf8-encoding.py"], 61 | parser=LanguageParser(language=Language.PYTHON, parser_threshold=500), 62 | ) 63 | documents = loader.load() 64 | print(f"Loaded {len(documents)} documents from Squidpy") 65 | 66 | # Split documents into chunks 67 | splitter = RecursiveCharacterTextSplitter.from_language( 68 | language=Language.PYTHON, chunk_size=2000, chunk_overlap=200 69 | ) 70 | texts = splitter.split_documents(documents) 71 | print(f"Split into {len(texts)} text chunks for Squidpy") 72 | 73 | # Create vector store 74 | vector_store = Chroma.from_documents( 75 | documents=texts, 76 | embedding=embeddings, 77 | persist_directory=PERSIST_DIRECTORY 78 | ) 79 | print(f"Created new Chroma database at {PERSIST_DIRECTORY}") 80 | else: 81 | # Load existing vector store 82 | vector_store = Chroma( 83 | persist_directory=PERSIST_DIRECTORY, 84 | embedding_function=embeddings 85 | ) 86 | print(f"Loaded existing Chroma database from {PERSIST_DIRECTORY}") 87 | 88 | return vector_store 89 | 90 | def create_squidpy_rag_pipeline(self): 91 | """Create the RAG pipeline for Squidpy using LangGraph.""" 92 | 93 | # Initialize the LLM 94 | llm = ChatAnthropic(model=self.model) 95 | #llm = ChatOpenAI(model="gpt-4o") 96 | # Define the retrieval step 97 | def retrieve(state: SquidpyRAGState): 98 | """Retrieve relevant documents based on the query.""" 99 | squidpy_retriever = self.vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 8}) 100 | retrieved_docs = squidpy_retriever.invoke( 101 | state["query"] 102 | ) 103 | return {"context": retrieved_docs} 104 | 105 | # Define the generation step 106 | def generate(state: SquidpyRAGState): 107 | """Generate an answer using the retrieved context.""" 108 | # Combine all document content 109 | context_content = "\n\n".join(doc.page_content for doc in state["context"]) 110 | chat_history = state["chat_history"] 111 | 112 | # Create the prompt 113 | prompt = ChatPromptTemplate.from_messages([ 114 | MessagesPlaceholder("chat_history"), 115 | ("user", "The above are the CHAT HISTORY between the user and the spatial transcriptomics assistant. you should take into account the chat history when generating the response."), 116 | ("user", 117 | "You are an expert in Squidpy, specializing in providing authentic Squidpy code " 118 | "and explanations on its usage. IMPORTANT: do not use python bracket for the code. " 119 | "REPEAT: do not use python bracket for the code. " 120 | "For each query, respond with:\n" 121 | "1. Squidpy code to solve the user's question.\n" 122 | "2. A concise explanation of the code, focusing on Squidpy-specific concepts, " 123 | "methods, and relevant parameters.\n\n" 124 | "3. REMEMBER to specify shape = None for STARmap spatial transcriptomic data.\n" 125 | "The following are some additional instructions:\n" 126 | "{spatial_processing_prompt}\n\n" 127 | "CONTEXT ON SQUIDPY:\n{context_content}\n\n" 128 | ), 129 | ("user", "USER QUESTION: {query}"), 130 | ]) 131 | 132 | # Generate messages from the prompt 133 | messages = prompt.invoke({ 134 | "query": state["query"], 135 | "chat_history": chat_history, 136 | "context_content": context_content, 137 | "spatial_processing_prompt": spatial_processing_prompt 138 | }) 139 | 140 | # Get response from LLM 141 | response = llm.invoke(messages) 142 | 143 | return {"answer": response.content} 144 | 145 | # Build the graph 146 | graph_builder = StateGraph(SquidpyRAGState) 147 | graph_builder.add_node("retrieve", retrieve) 148 | graph_builder.add_node("generate", generate) 149 | 150 | # Define the flow 151 | graph_builder.add_edge(START, "retrieve") 152 | graph_builder.add_edge("retrieve", "generate") 153 | graph_builder.add_edge("generate", END) 154 | 155 | # Compile the graph 156 | return graph_builder.compile() 157 | 158 | def run(self, query: str, chat_history: List[AnyMessage] = None): 159 | """Run the Squidpy RAG pipeline with the given query and chat history.""" 160 | if chat_history is None: 161 | chat_history = [] 162 | 163 | response = self.rag_pipeline.invoke({ 164 | "query": query, 165 | "chat_history": chat_history, 166 | "context": [], # Will be populated by the retrieve step 167 | "answer": "" # Will be populated by the generate step 168 | }) 169 | 170 | return response["answer"] 171 | 172 | # Initialize the Squidpy RAG tool 173 | squidpy_rag = SquidpyRAGTool() 174 | 175 | @tool 176 | def squidpy_rag_agent(state: Annotated[Dict, InjectedState], query: str) -> str: 177 | """Tool that provides Squidpy code and explanations based on RAG. 178 | Uses the Squidpy codebase to generate accurate Squidpy code for spatial transcriptomics analysis. 179 | 180 | Args: 181 | query: The query to answer using Squidpy knowledge 182 | 183 | Returns: 184 | str: Code and explanation for the Squidpy query 185 | """ 186 | # Extract the chat history from the injected state 187 | #chat_history = state["messages"][:-1] 188 | chat_history = [] 189 | # Run the Squidpy RAG with the query and chat history 190 | #example_answer = squidpy_rag.run(query, chat_history) 191 | #final_answer = example_answer + "\n\nPlease modify the code based on the current context and use `python_repl_tool` to run the modified code above." 192 | 193 | return squidpy_rag.run(query, chat_history) -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_230938_643335.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230938_643335.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_230939_230970.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230939_230970.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_230939_315409.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230939_315409.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_230939_437124.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_230939_437124.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231021_978237.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231021_978237.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231021_997915.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231021_997915.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231022_080390.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231022_080390.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231106_072022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_072022.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231106_527042.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_527042.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231106_719080.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_719080.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231106_894313.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231106_894313.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231107_098543.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_098543.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231107_271860.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_271860.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231107_507332.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_507332.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231107_683073.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_683073.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231107_859878.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231107_859878.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_231240_336695.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_231240_336695.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_232828_376750.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_232828_376750.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_232828_523789.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_232828_523789.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_232828_774850.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_232828_774850.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233051_701835.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233051_701835.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233051_796053.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233051_796053.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233653_620074.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233653_620074.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233653_789226.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233653_789226.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233653_961432.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233653_961432.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233654_148844.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_148844.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233654_322050.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_322050.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233654_530819.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_530819.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233654_693983.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_693983.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233654_875336.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233654_875336.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_233655_053762.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_233655_053762.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_234033_996956.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_234033_996956.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_234034_131464.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_234034_131464.png -------------------------------------------------------------------------------- /src/tmp/plots/plot_20250524_234034_262805.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiuLab-Bioelectronics-Harvard/STAgent/5716aa43c512b2ac0d10f642104dc8f791088675/src/tmp/plots/plot_20250524_234034_262805.png -------------------------------------------------------------------------------- /src/tools.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from langchain_core.tools import tool 3 | from serpapi import GoogleSearch 4 | import os 5 | from squidpy_rag import squidpy_rag_agent 6 | from textwrap import dedent 7 | from langchain_anthropic import ChatAnthropic 8 | from langgraph.prebuilt import InjectedState 9 | from typing import Annotated, Dict 10 | from langchain_core.messages import HumanMessage 11 | from dotenv import load_dotenv 12 | from datetime import datetime 13 | import streamlit as st 14 | import functools 15 | import logging 16 | import multiprocessing 17 | import json 18 | import re 19 | import sys 20 | from io import StringIO 21 | from typing import Dict, Optional 22 | from pydantic import BaseModel, Field 23 | logger = logging.getLogger(__name__) 24 | load_dotenv() 25 | 26 | # Google Scholar Tool 27 | class GoogleScholarAPI: 28 | def __init__(self, serp_api_key: str = None, top_k_results: int = 40, hl: str = "en", lr: str = "lang_en"): 29 | self.serp_api_key = serp_api_key or os.environ.get("SERP_API_KEY") 30 | self.top_k_results = top_k_results 31 | self.hl = hl 32 | self.lr = lr 33 | 34 | def run(self, query: str) -> str: 35 | if not self.serp_api_key: 36 | return "API key missing for Google Scholar search." 37 | params = { 38 | "engine": "google_scholar", 39 | "q": query, 40 | "api_key": self.serp_api_key, 41 | "hl": self.hl, 42 | "lr": self.lr, 43 | "num": min(self.top_k_results, 40), 44 | } 45 | search = GoogleSearch(params) 46 | results = search.get_dict().get("organic_results", []) 47 | if not results: 48 | return "No good Google Scholar Result was found." 49 | return "\n\n".join([ 50 | f"Title: {result.get('title', '')}\n" 51 | f"Authors: {', '.join([a.get('name') for a in result.get('publication_info', {}).get('authors', [])])}\n" 52 | f"Summary: {result.get('snippet', '')}\n" 53 | f"Link: {result.get('link', '')}" 54 | for result in results 55 | ]) 56 | 57 | google_scholar = GoogleScholarAPI() 58 | 59 | 60 | @tool 61 | def google_scholar_search(query: str) -> str: 62 | """Searches Google Scholar for the provided query.""" 63 | return google_scholar.run(query) 64 | 65 | 66 | @tool 67 | def visualize_cell_cell_interaction_tool() -> str: 68 | """ 69 | Visualizes cell-cell interaction patterns from spatial transcriptomics data. 70 | 71 | This tool analyzes and visualizes how different cell types interact with each other 72 | in spatial proximity using neighborhood enrichment analysis. It: 73 | 74 | 1. Loads preprocessed pancreas spatial transcriptomics data 75 | 2. For each sample in the dataset: 76 | - Computes spatial neighbors between cells 77 | - Performs neighborhood enrichment analysis based on cell types 78 | - Creates a heatmap visualization showing interaction patterns 79 | 80 | The visualization shows: 81 | - Red colors indicate cell types that are more likely to be neighbors 82 | - Blue colors indicate cell types that tend to avoid each other 83 | - Color intensity represents the strength of attraction/avoidance 84 | 85 | No input parameters are required - the tool uses a default preprocessed dataset. 86 | 87 | Note: This code should be executed using the python_repl_tool. 88 | """ 89 | code = f""" 90 | import squidpy as sq 91 | import anndata as ad 92 | import scanpy as sc 93 | import seaborn as sns 94 | data_path = './data/pancreas_processed_full.h5ad' 95 | adata = ad.read_h5ad(data_path) 96 | # Neighborhood enrichment analysis 97 | id = adata.obs['slice_name'].unique() 98 | result_cell_type_csv = 99 | # set the NaN value to 0 100 | for sample_i in id: 101 | data_i = adata[adata.obs['slice_name']==sample_i] 102 | sq.gr.spatial_neighbors(data_i, coord_type="generic", spatial_key="spatial", delaunay=True) 103 | sq.gr.nhood_enrichment(data_i, cluster_key="cell_type") 104 | data_i.uns['cell_type_nhood_enrichment']['zscore'] = np.nan_to_num(data_i.uns['cell_type_nhood_enrichment']['zscore']) 105 | result_cell_type_csv[sample_i] = pd.DataFrame(data_i.uns['cell_type_nhood_enrichment']['zscore'], columns=data_i.obs['cell_type'].cat.categories, index=data_i.obs['cell_type'].cat.categories) 106 | 107 | week_4 = ['Week_4_slice_1', 'Week_4_slice_2'] 108 | week_16 = ['Week_16_slice_1', 'Week_16_slice_2', 'Week_16_slice_3'] 109 | week_20 = ['Week_20_slice_1', 'Week_20_slice_2', 'Week_20_slice_3', 'Week_20_slice_4'] 110 | 111 | week_4_result = 112 | for sample_i in week_4: 113 | week_4_result[sample_i] = result_cell_type_csv[sample_i] 114 | week_16_result = 115 | for sample_i in week_16: 116 | week_16_result[sample_i] = result_cell_type_csv[sample_i] 117 | week_20_result = 118 | for sample_i in week_20: 119 | week_20_result[sample_i] = result_cell_type_csv[sample_i] 120 | 121 | 122 | week_4_average = (week_4_result['(4, 1)'] + week_4_result['(4, 29)'])/2 123 | week_16_average = (week_16_result['(16, 185)'] + week_16_result['(16, 215)'] + week_16_result['(16, 242)'])/3 124 | week_20_average = (week_20_result['(20, 1)'] + week_20_result['(20, 46)'] + week_20_result['(20, 70)'] + week_20_result['(20, 160)'])/4 125 | 126 | enrichment_dict = 127 | 'week_4': week_4_average, 128 | 'week_16': week_16_average, 129 | 'week_20': week_20_average 130 | 131 | 132 | input_list = ['human_alpha', 133 | 'human_beta', 134 | 'human_delta', 135 | 'human_enterochromaffin', 136 | 'human_exocrine', 137 | 'human_mesenchymal', 138 | 'mouse_nephron', 139 | 'mouse_ureteric_epithelium', 140 | 'mouse_vascular'] 141 | 142 | for sample_i in enrichment_dict.keys(): 143 | a = enrichment_dict[sample_i].reindex(index=input_list, columns=input_list) 144 | ax = sns.heatmap(a, vmax=30, vmin=-30, cmap='RdBu_r', annot=True, fmt=".1f") 145 | ax.set_title('cell_type neighborhood enrichment of {{}}'.format(sample_i)) 146 | plt.show() 147 | 148 | # Please use python_repl_tool to execute the above code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE. 149 | <<>> 150 | """ 151 | return dedent(code) 152 | 153 | 154 | @tool 155 | def visualize_spatial_cell_type_map() -> str: 156 | """ 157 | Visualizes spatial distribution of cell types across different slices. 158 | 159 | This tool creates spatial scatter plots showing the location and identity 160 | of different cell types in the tissue space. It: 161 | 162 | 1. Loads preprocessed pancreas spatial transcriptomics data 163 | 2. For each slice (Week_4_slice_1, Week_4_slice_2, Week_16_slice_1, etc.): 164 | - Creates a spatial plot where each point represents a cell 165 | - Colors points based on cell type identity 166 | - Displays the spatial organization of different cell populations 167 | 168 | The visualization shows: 169 | - Each dot represents a single cell 170 | - Colors indicate different cell types 171 | - Spatial coordinates preserve the original tissue structure 172 | - Separate plots for each slice for detailed spatial comparison 173 | 174 | No input parameters are required - the tool uses a default preprocessed dataset. 175 | 176 | Note: This code should be executed using the python_repl_tool. 177 | """ 178 | 179 | code = f""" 180 | import scanpy as sc 181 | import squidpy as sq 182 | import anndata as ad 183 | import numpy as np 184 | import pandas as pd 185 | import matplotlib.pyplot as plt 186 | import os 187 | # Load data 188 | data_path = './data/pancreas_processed_full.h5ad' 189 | adata = ad.read_h5ad(data_path) 190 | # Define color dictionary for cell types 191 | cell_type_color_dict = 192 | 'human_enterochromaffin': '#fdbf6e', 193 | 'human_alpha': '#34a048', 194 | 'human_beta': '#f69999', 195 | 'human_delta': '#e21f26', 196 | 'human_exocrine': '#2078b4', 197 | 'human_mesenchymal': '#b4d88a', 198 | 'mouse_vascular': '#f57f20', 199 | 'mouse_nephron': '#6b3e98', 200 | 'mouse_ureteric_epithelium': '#fbf49c' 201 | 202 | # Get slice names 203 | slice_names = sorted(adata.obs['slice_name'].unique().tolist()) 204 | # Plot spatial distribution for each slice 205 | for slice_id in slice_names: 206 | adata_slice = adata[adata.obs['slice_name'] == slice_id].copy() 207 | fig, ax = plt.subplots(figsize=(10, 8)) 208 | # Get unique cell types in this slice 209 | cell_types_in_slice = adata_slice.obs['cell_type'].unique() 210 | # Plot each cell type with its color 211 | for cell_type in cell_types_in_slice: 212 | cells = adata_slice[adata_slice.obs['cell_type'] == cell_type] 213 | ax.scatter( 214 | cells.obsm['spatial'][:, 0], 215 | cells.obsm['spatial'][:, 1], 216 | c=cell_type_color_dict[cell_type], 217 | label=cell_type, 218 | s=20, 219 | alpha=0.8 220 | ) 221 | ax.set_title(f'Spatial Cell Type Distribution - {{slice_id}}') 222 | ax.set_xlabel('Spatial X') 223 | ax.set_ylabel('Spatial Y') 224 | ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') 225 | plt.tight_layout() 226 | plt.show() 227 | # Please use python_repl_tool to execute this code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE. 228 | <<>> 229 | """ 230 | return dedent(code) 231 | 232 | 233 | @tool 234 | def visualize_cell_type_composition() -> str: 235 | """ 236 | Visualizes cell type composition changes across different time points. 237 | 238 | This tool creates both stacked bar plots and heatmaps to show how cell type 239 | proportions change over time. It: 240 | 241 | 1. Loads preprocessed pancreas spatial transcriptomics data 242 | 2. Calculates cell type proportions for each sample 243 | 3. Creates two complementary visualizations: 244 | - Stacked bar plot showing relative proportions 245 | - Heatmap showing exact percentage values 246 | 247 | The visualizations show: 248 | - Relative abundance of each cell type per sample 249 | - Changes in cell type composition across time points 250 | - Exact percentage values for each cell type 251 | 252 | No input parameters are required - the tool uses a default preprocessed dataset. 253 | 254 | Note: This code should be executed using the python_repl_tool. 255 | """ 256 | code = f""" 257 | import matplotlib.pyplot as plt 258 | import pandas as pd 259 | import seaborn as sns 260 | import squidpy as sq 261 | import anndata as ad 262 | import scanpy as sc 263 | # Load the data 264 | data_path = './data/pancreas_processed_full.h5ad' 265 | adata = ad.read_h5ad(data_path) 266 | # Calculate cell type composition for each sample 267 | composition_df = pd.crosstab( 268 | adata.obs['sample_name'], 269 | adata.obs['cell_type'], 270 | normalize='index' # This gives proportions instead of raw counts 271 | ) * 100 # Convert to percentages 272 | 273 | plt.figure(figsize=(12, 6)) 274 | composition_df.plot(kind='bar', stacked=True) 275 | plt.title('Cell Type Composition Across Samples') 276 | plt.xlabel('Sample') 277 | plt.ylabel('Percentage of Cells') 278 | plt.legend(title='Cell Type', bbox_to_anchor=(1.05, 1), loc='upper left') 279 | plt.tight_layout() 280 | plt.xticks(rotation=45) 281 | plt.show() 282 | 283 | 284 | print("Cell type composition (%):") 285 | print(composition_df.round(2)) 286 | plt.figure(figsize=(10, 6)) 287 | sns.heatmap(composition_df, annot=True, fmt='.1f', cmap='YlOrRd') 288 | plt.title('Human Cell Type Composition Heatmap') 289 | plt.ylabel('Sample') 290 | plt.xlabel('Cell Type') 291 | plt.tight_layout() 292 | plt.show() 293 | 294 | # Please use python_repl_tool to execute this code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE. 295 | <<>> 296 | <<>> 297 | """ 298 | return dedent(code) 299 | 300 | 301 | @tool 302 | def visualize_umap() -> str: 303 | """ 304 | Visualizes UMAP plots for cell types across different time points. 305 | 306 | This tool creates UMAP visualizations code showing the distribution of cell types 307 | in reduced dimensional space. It: 308 | 309 | 1. Loads preprocessed pancreas spatial transcriptomics data 310 | 2. For each sample (Week 4, Week 16, Week 20): 311 | - Creates a UMAP plot where each point represents a cell 312 | - Colors points based on cell type identity 313 | - Shows cell type clustering patterns 314 | 315 | The visualization shows: 316 | - Each dot represents a single cell 317 | - Colors indicate different cell types 318 | - Clustering patterns reveal relationships between cell types 319 | - Separate plots for each time point for temporal comparison 320 | 321 | No input parameters are required - the tool uses a default preprocessed dataset. 322 | 323 | Note: This code should be executed using the python_repl_tool. 324 | """ 325 | code = f""" 326 | import squidpy as sq 327 | import anndata as ad 328 | import scanpy as sc 329 | 330 | # Load the data 331 | data_path = './data/pancreas_processed_full.h5ad' 332 | adata = ad.read_h5ad(data_path) 333 | 334 | cell_type_color_dict = 335 | 'human_enterochromaffin': '#fdbf6e', 336 | 'human_alpha': '#34a048', 337 | 'human_beta': '#f69999', 338 | 'human_delta': '#e21f26', 339 | 'human_exocrine': '#2078b4', 340 | 'human_mesenchymal': '#b4d88a', 341 | 'mouse_vascular': '#f57f20', 342 | 'mouse_nephron': '#6b3e98', 343 | 'mouse_ureteric_epithelium': '#fbf49c' 344 | 345 | # Plot the UMAP for the whole smaple 346 | sc.pl.umap( 347 | adata, 348 | color='cell_type', # Replace with your cell type annotation key 349 | title=f'umap for all samples', 350 | legend_loc='on data', 351 | legend_fontsize="small", 352 | legend_fontoutline=2, 353 | palette=cell_type_color_dict 354 | ) 355 | # Iterate over each sample in the AnnData object 356 | for sample_id in ['Week_4', 'Week_16', 'Week_20']: 357 | # Subset the AnnData object for the current sample 358 | adata_sample = adata[adata.obs['sample_name'] == sample_id] 359 | # Plot umap for the specific sample 360 | sc.pl.umap( 361 | adata_sample, 362 | color='cell_type', # Replace with your cell type annotation key 363 | title=f'umap for sample {{sample_id}}', 364 | legend_loc='on data', 365 | legend_fontsize="small", 366 | legend_fontoutline=2, 367 | palette=cell_type_color_dict 368 | ) 369 | # Please use python_repl_tool to execute this code directly. IMPORTANT: PLEASE DON'T CHANGE THE CODE. 370 | <<>> 371 | """ 372 | return dedent(code) 373 | 374 | 375 | @tool 376 | def report_tool(state: Annotated[Dict, InjectedState], query: str) -> str: 377 | """Generates a comprehensive scientific report based on the conversation history. 378 | 379 | This tool takes the entire conversation history and generates a well-structured scientific report 380 | in academic paper format, covering the analysis performed and insights gathered from the spatial 381 | transcriptomics data. The report includes sections like Abstract, Introduction, Methods, Results, 382 | Discussion, Conclusion, and References. 383 | 384 | The tool saves the report as a PDF file. 385 | 386 | Args: 387 | state: The current conversation state containing message history 388 | query: Additional context or specific requirements for the report (optional) 389 | 390 | Returns: 391 | str: Confirmation message with the path to the saved PDF file 392 | """ 393 | 394 | # Extract the chat history from the injected state 395 | chat_history = state["messages"] 396 | 397 | 398 | 399 | report_prompt = """ 400 | # Scientific Analysis Report 401 | 402 | 403 | Generate a comprehensive scientific report (minimum 1000 words) based on the conversation history above. The report should be specific and avoid general statements. All analysis should be based on data presented in the conversation. 404 | 405 | 406 | 407 | ## 1. Objective 408 | - Clear statement of the research goals 409 | - Overview of what the report aims to address 410 | 411 | ## 2. Study Overview 412 | - Background on the research topic 413 | - Purpose of the study 414 | - Key research questions being investigated 415 | 416 | ## 3. Methods Summary 417 | - Description of analysis techniques employed 418 | - Outline of data processing approaches used 419 | 420 | ## 4. Key Findings 421 | - Detailed results from each visualization/analysis in the conversation 422 | - Specific observations with quantitative data where available 423 | - Identification of significant patterns or trends 424 | 425 | ## 5. Biological Implications 426 | - Interpretation of the biological significance of findings 427 | - Integration with existing literature (include inline citations) 428 | - Discussion of broader impacts and relevance 429 | 430 | ## 6. Conclusion 431 | - Summary of major discoveries 432 | - Future research directions 433 | - Potential applications 434 | 435 | ## 7. References 436 | - Relevant citations from literature searches 437 | - Format: Title only (NO author names or years or URL) 438 | 439 | 440 | 441 | 1. OUTPUT ONLY THE REPORT CONTENT, NO OTHER TEXT 442 | 2. Use specific data-driven insights rather than general statements 443 | 3. Maintain scientific tone throughout 444 | 4. Include inline citations where appropriate 445 | 5. Do not assume conclusions not supported by the data 446 | 6. Be consistent with the user's input language. you are a multi-lingual assistant. 447 | FORMAT: THE REPORT SHOULD BE IN MARKDOWN FORMAT. 448 | 449 | """ 450 | 451 | # Generate the report 452 | ins = chat_history[:-1] + [HumanMessage(content=report_prompt, name="report_tool")] 453 | st.write(ins) 454 | llm = ChatAnthropic(model="claude-3-7-sonnet-20250219",max_tokens=8000) 455 | report = llm.invoke(ins) 456 | try: 457 | # Save as markdown file 458 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 459 | os.makedirs('output_report', exist_ok=True) 460 | md_filename = f'./output_report/spatial_transcriptomics_report_{timestamp}.md' 461 | 462 | with open(md_filename, 'w', encoding='utf-8') as f: 463 | f.write(report.content) 464 | return f"Report has been saved as markdown file: {md_filename}" 465 | 466 | except Exception as e: 467 | return f"Error saving markdown file: {str(e)}" 468 | 469 | @functools.lru_cache(maxsize=None) 470 | def warn_once() -> None: 471 | """Warn once about the dangers of PythonREPL.""" 472 | logger.warning("Python REPL can execute arbitrary code. Use with caution.") 473 | 474 | 475 | class PythonREPL(BaseModel): 476 | """Simulates a standalone Python REPL.""" 477 | 478 | globals: Optional[Dict] = Field(default_factory=dict, alias="_globals") # type: ignore[arg-type] 479 | locals: Optional[Dict] = None # type: ignore[arg-type] 480 | 481 | @staticmethod 482 | def sanitize_input(query: str) -> str: 483 | """Sanitize input to the python REPL. 484 | 485 | Remove whitespace, backtick & python 486 | (if llm mistakes python console as terminal) 487 | 488 | Args: 489 | query: The query to sanitize 490 | 491 | Returns: 492 | str: The sanitized query 493 | """ 494 | query = re.sub(r"^(\s|`)*(?i:python)?\s*", "", query) 495 | query = re.sub(r"(\s|`)*$", "", query) 496 | return query 497 | 498 | @classmethod 499 | def worker( 500 | cls, 501 | command: str, 502 | globals: Optional[Dict], 503 | locals: Optional[Dict], 504 | queue: multiprocessing.Queue, 505 | ) -> None: 506 | old_stdout = sys.stdout 507 | sys.stdout = mystdout = StringIO() 508 | try: 509 | cleaned_command = cls.sanitize_input(command) 510 | exec(cleaned_command, globals, locals) 511 | sys.stdout = old_stdout 512 | queue.put(mystdout.getvalue()) 513 | except Exception as e: 514 | sys.stdout = old_stdout 515 | queue.put(repr(e)) 516 | 517 | def run(self, command: str, timeout: Optional[int] = None) -> str: 518 | """Run command with own globals/locals and returns anything printed. 519 | Timeout after the specified number of seconds.""" 520 | 521 | # Warn against dangers of PythonREPL 522 | warn_once() 523 | 524 | queue: multiprocessing.Queue = multiprocessing.Queue() 525 | 526 | # Only use multiprocessing if we are enforcing a timeout 527 | if timeout is not None: 528 | # create a Process 529 | p = multiprocessing.Process( 530 | target=self.worker, args=(command, self.globals, self.locals, queue) 531 | ) 532 | 533 | # start it 534 | p.start() 535 | 536 | # wait for the process to finish or kill it after timeout seconds 537 | p.join(timeout) 538 | 539 | if p.is_alive(): 540 | p.terminate() 541 | return "Execution timed out" 542 | else: 543 | self.worker(command, self.globals, self.locals, queue) 544 | # get the result from the worker function 545 | return queue.get() 546 | 547 | -------------------------------------------------------------------------------- /src/unified_app.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import os 4 | import streamlit as st 5 | from dotenv import load_dotenv 6 | from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, BaseMessage, SystemMessage 7 | from graph import invoke_our_graph as invoke_gpt_graph 8 | from graph_anthropic import invoke_our_graph as invoke_anthropic_graph 9 | from util import display_message as display_message_gpt, render_conversation_history as render_conversation_history_gpt, get_conversation_summary as get_conversation_summary_gpt 10 | from util_anthropic import display_message as display_message_anthropic, render_conversation_history as render_conversation_history_anthropic, get_conversation_summary as get_conversation_summary_anthropic 11 | from speech_to_text import input_from_mic, convert_text_to_speech 12 | from datetime import datetime 13 | from prompt import system_prompt 14 | 15 | # Load environment variables 16 | load_dotenv() 17 | 18 | # Initialize session state if not present 19 | if "page" not in st.session_state: 20 | st.session_state["page"] = "OpenAI" 21 | 22 | if "final_state" not in st.session_state: 23 | st.session_state["final_state"] = { 24 | "messages": [SystemMessage(content=system_prompt)] 25 | } 26 | if "audio_transcription" not in st.session_state: 27 | st.session_state["audio_transcription"] = None 28 | 29 | # Add custom CSS with theme-aware styling 30 | st.markdown(""" 31 | 202 | """, unsafe_allow_html=True) 203 | 204 | # Set up Streamlit layout 205 | st.markdown('

🤖 Spatial Transcriptomics Agent

', unsafe_allow_html=True) 206 | 207 | # Navigation in sidebar with improved styling 208 | st.sidebar.markdown('
', unsafe_allow_html=True) 209 | st.sidebar.title("🎯 Navigation") 210 | 211 | PROVIDER_CONFIGS = { 212 | "Anthropic": { 213 | "icon": "🟣(Recommended)", 214 | "color": "#FF5722", 215 | "hover_color": "#E64A19" 216 | }, 217 | "OpenAI": { 218 | "icon": "🟢", 219 | "color": "#2196F3", 220 | "hover_color": "#1976D2" 221 | } 222 | } 223 | 224 | # Then update the provider selection 225 | provider_options = [f"{PROVIDER_CONFIGS[p]['icon']} {p}" for p in ["Anthropic", "OpenAI"]] 226 | selected = st.sidebar.radio("Select LLM Provider Family", provider_options) 227 | page = selected.split(" ")[1] # Extract provider name without emoji 228 | st.session_state["page"] = page 229 | 230 | # Set provider-specific functions and variables 231 | if page == "OpenAI": 232 | HISTORY_DIR = "conversation_histories_gpt" 233 | invoke_graph = invoke_gpt_graph 234 | display_message = display_message_gpt 235 | render_conversation_history = render_conversation_history_gpt 236 | get_conversation_summary = get_conversation_summary_gpt 237 | available_models = ["gpt-4o"] 238 | else: # Anthropic 239 | HISTORY_DIR = "conversation_histories_anthropic" 240 | invoke_graph = invoke_anthropic_graph 241 | display_message = display_message_anthropic 242 | render_conversation_history = render_conversation_history_anthropic 243 | get_conversation_summary = get_conversation_summary_anthropic 244 | available_models = [ 245 | "claude_3_7_sonnet_20250219", 246 | "claude_3_5_sonnet_20241022" 247 | ] 248 | 249 | # Add model selection with improved styling 250 | selected_model = st.sidebar.selectbox(f"🔧 Select {page} Model:", available_models, index=0) 251 | 252 | # Add New Chat button with custom styling 253 | st.sidebar.markdown('
', unsafe_allow_html=True) 254 | if st.sidebar.button("🔄 Start New Chat"): 255 | st.session_state["final_state"] = { 256 | "messages": [SystemMessage(content=system_prompt)] 257 | } 258 | st.session_state["last_summary_point"] = 0 259 | st.session_state["last_summary_title"] = "Default Title" 260 | st.session_state["last_summary_summary"] = "This is the default summary for short conversations." 261 | st.rerun() 262 | st.sidebar.markdown('
', unsafe_allow_html=True) 263 | st.sidebar.markdown('
', unsafe_allow_html=True) 264 | 265 | # Set up environment for API keys 266 | if page == "OpenAI" and not os.getenv('OPENAI_API_KEY'): 267 | st.sidebar.markdown(""" 268 |
269 |

🔑 OpenAI API Key Setup

270 |
271 | """, unsafe_allow_html=True) 272 | api_key = st.sidebar.text_input(label="OpenAI API Key", type="password", label_visibility="collapsed") 273 | os.environ["OPENAI_API_KEY"] = api_key 274 | if not api_key: 275 | st.info("Please enter your OpenAI API Key in the sidebar.") 276 | st.stop() 277 | elif page == "Anthropic" and not os.getenv('ANTHROPIC_API_KEY'): 278 | st.sidebar.header("Anthropic API Key Setup") 279 | api_key = st.sidebar.text_input(label="Anthropic API Key", type="password", label_visibility="collapsed") 280 | os.environ["ANTHROPIC_API_KEY"] = api_key 281 | if not api_key: 282 | st.info("Please enter your Anthropic API Key in the sidebar.") 283 | st.stop() 284 | 285 | os.makedirs(HISTORY_DIR, exist_ok=True) 286 | 287 | # Helper Functions for Conversation Management 288 | def save_history(title: str, summary: str): 289 | """Save the current conversation history to a file with title and summary.""" 290 | history_data = { 291 | "title": title, 292 | "summary": summary, 293 | "timestamp": datetime.now().isoformat(), 294 | "messages": messages_to_dicts(st.session_state["final_state"]["messages"]) 295 | } 296 | filename = f"{HISTORY_DIR}/{title.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" 297 | with open(filename, "w") as f: 298 | json.dump(history_data, f) 299 | st.rerun() 300 | 301 | def load_all_histories(): 302 | """Load all saved conversation histories as a list of metadata for display.""" 303 | histories = [] 304 | for file in os.listdir(HISTORY_DIR): 305 | if file.endswith(".json"): 306 | with open(os.path.join(HISTORY_DIR, file), "r") as f: 307 | history = json.load(f) 308 | histories.append({ 309 | "title": history["title"], 310 | "summary": history["summary"], 311 | "timestamp": history["timestamp"], 312 | "filename": file 313 | }) 314 | return sorted(histories, key=lambda x: x["timestamp"], reverse=True) 315 | 316 | def load_history(filename: str): 317 | """Load a specific conversation history file into session state.""" 318 | try: 319 | with open(os.path.join(HISTORY_DIR, filename), "r") as f: 320 | history_data = json.load(f) 321 | st.session_state["final_state"]["messages"] = dicts_to_messages(history_data["messages"]) 322 | st.sidebar.success(f"Conversation '{history_data['title']}' loaded successfully") 323 | except FileNotFoundError: 324 | st.sidebar.error("Conversation history not found.") 325 | 326 | def delete_history(filename: str): 327 | """Delete a specific conversation history file.""" 328 | os.remove(os.path.join(HISTORY_DIR, filename)) 329 | st.sidebar.success("Conversation history deleted.") 330 | st.rerun() 331 | 332 | # Convert messages to serializable dictionaries and vice versa 333 | def messages_to_dicts(messages): 334 | return [msg.dict() for msg in messages] 335 | 336 | def dicts_to_messages(dicts): 337 | reconstructed_messages = [] 338 | for d in dicts: 339 | if d["type"] == "ai": 340 | reconstructed_messages.append(AIMessage(**d)) 341 | elif d["type"] == "human": 342 | reconstructed_messages.append(HumanMessage(**d)) 343 | elif d["type"] == "tool": 344 | reconstructed_messages.append(ToolMessage(**d)) 345 | return reconstructed_messages 346 | 347 | # Organize Sidebar with Tabs and improved styling 348 | st.sidebar.title("⚙️ Settings") 349 | tab1, tab2, tab3 = st.sidebar.tabs(["💬 Conversation", "🎤 Voice", "🖼️ Image"]) 350 | 351 | # Initialize session state variables 352 | if "last_summary_point" not in st.session_state: 353 | st.session_state["last_summary_point"] = 0 354 | if "last_summary_title" not in st.session_state: 355 | st.session_state["last_summary_title"] = "Default Title" 356 | if "last_summary_summary" not in st.session_state: 357 | st.session_state["last_summary_summary"] = "This is the default summary for short conversations." 358 | 359 | # Tab 1: Conversation Management 360 | with tab1: 361 | st.subheader("History") 362 | histories = load_all_histories() 363 | if histories: 364 | st.markdown("### Saved Histories") 365 | for history in histories: 366 | with st.expander(f"{history['title']} ({history['timestamp'][:10]})"): 367 | st.write(history["summary"]) 368 | if st.button("Load", key=f"load_{history['filename']}"): 369 | load_history(history["filename"]) 370 | if st.button("Delete", key=f"delete_{history['filename']}"): 371 | delete_history(history["filename"]) 372 | 373 | # Determine title and summary based on message count and last summary point 374 | message_count = len(st.session_state["final_state"]["messages"]) 375 | if message_count > 5 and (message_count - 5) % 10 == 0 and message_count != st.session_state["last_summary_point"]: 376 | #generated_title, generated_summary = get_conversation_summary(st.session_state["final_state"]["messages"]) 377 | #st.session_state["last_summary_title"] = generated_title 378 | st.session_state["last_summary_title"] = "Default Title" 379 | #st.session_state["last_summary_summary"] = generated_summary 380 | st.session_state["last_summary_summary"] = "This is the default summary for short conversations." 381 | st.session_state["last_summary_point"] = message_count 382 | elif message_count <= 5: 383 | st.session_state["last_summary_title"] = "Default Title" 384 | st.session_state["last_summary_summary"] = "This is the default summary for short conversations." 385 | 386 | title = st.text_input("Conversation Title", value=st.session_state["last_summary_title"]) 387 | summary = st.text_area("Conversation Summary", value=st.session_state["last_summary_summary"]) 388 | 389 | if st.button("Save Conversation"): 390 | save_history(title, summary) 391 | st.sidebar.success(f"Conversation saved as '{title}'") 392 | 393 | # Tab 2: Voice Options 394 | with tab2: 395 | st.subheader("Audio Options") 396 | use_audio_input = st.checkbox("Enable Voice Input", value=False) 397 | if use_audio_input: 398 | with st.form("audio_input_form", clear_on_submit=True): 399 | st.markdown(""" 400 |
401 | Instructions for Recording Audio: 402 |
    403 |
  1. Click Submit Audio below to activate the audio recorder.
  2. 404 |
  3. Once activated, click Start Recording to begin capturing audio.
  4. 405 |
  5. When finished, click Stop to end the recording.
  6. 406 |
  7. Finally, click Submit Audio again to use the recorded audio.
  8. 407 |
408 |
409 | """, unsafe_allow_html=True) 410 | submitted_audio = st.form_submit_button("Submit Audio") 411 | if submitted_audio: 412 | audio_transcript = input_from_mic() 413 | if audio_transcript: 414 | st.session_state["audio_transcription"] = audio_transcript 415 | prompt = st.session_state["audio_transcription"] 416 | else: 417 | st.session_state["audio_transcription"] = None 418 | 419 | use_voice_response = st.checkbox("Enable Voice Response", value=False) 420 | if use_voice_response: 421 | st.write("If the voice response is too long, a summarized version will generate.") 422 | 423 | # Tab 3: Image Upload 424 | with tab3: 425 | st.subheader("Image") 426 | with st.form("image_upload_form", clear_on_submit=True): 427 | uploaded_images = st.file_uploader("Upload one or more images (optional)", type=["jpg", "jpeg", "png"], accept_multiple_files=True) 428 | submitted = st.form_submit_button("Submit Images") 429 | if submitted: 430 | if uploaded_images: 431 | st.session_state["uploaded_images_data"] = [ 432 | base64.b64encode(image.read()).decode("utf-8") for image in uploaded_images 433 | ] 434 | else: 435 | st.session_state["uploaded_images_data"] = [] 436 | 437 | # Initialize prompt variable 438 | prompt = st.session_state.get("audio_transcription") 439 | 440 | # Main chat interface 441 | st.markdown(f""" 442 |
443 | 🤖 444 | Chat with Spatial Transcriptomics Agent 445 |
446 | """, unsafe_allow_html=True) 447 | 448 | render_conversation_history(st.session_state["final_state"]["messages"][0:]) 449 | 450 | # Capture text input if no audio input 451 | if prompt is None: 452 | prompt = st.chat_input() 453 | 454 | # Process new user input if available 455 | if prompt: 456 | content_list = [{"type": "text", "text": prompt}] 457 | if "uploaded_images_data" in st.session_state and st.session_state["uploaded_images_data"]: 458 | content_list.extend([ 459 | {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_data}"}} 460 | for img_data in st.session_state["uploaded_images_data"] 461 | ]) 462 | st.session_state["uploaded_images_data"] = [] 463 | 464 | user_message = HumanMessage(content=content_list) 465 | st.session_state["final_state"]["messages"].append(user_message) 466 | render_conversation_history([user_message]) 467 | 468 | with st.spinner(f"Agent is thinking..."): 469 | previous_message_count = len(st.session_state["final_state"]["messages"]) 470 | updated_state = invoke_graph(st.session_state["final_state"]["messages"], selected_model) 471 | 472 | st.session_state["final_state"] = updated_state 473 | new_messages = st.session_state["final_state"]["messages"][previous_message_count:] 474 | 475 | if st.session_state.get("render_last_message", True): 476 | render_conversation_history([st.session_state["final_state"]["messages"][-1]]) 477 | 478 | if use_voice_response: 479 | audio_file = convert_text_to_speech(new_messages[-1].content) 480 | if audio_file: 481 | st.audio(audio_file) 482 | 483 | st.session_state["audio_transcription"] = None 484 | 485 | 486 | 487 | -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | # util.py 2 | 3 | import os 4 | import json 5 | import time 6 | import streamlit as st 7 | from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, BaseMessage 8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 9 | from typing import List, Tuple 10 | from pydantic import BaseModel, Field 11 | from langchain_openai import ChatOpenAI # Import ChatOpenAI where it's defined 12 | # Directory for temporary plot images 13 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots") 14 | os.makedirs(plot_dir, exist_ok=True) 15 | 16 | def display_message(content, sender="assistant"): 17 | """ 18 | Displays a message from the user or assistant with different styling. 19 | Supports displaying both text and image URLs for the user. 20 | """ 21 | if sender == "user": 22 | if isinstance(content, str): 23 | # Display plain text message from user 24 | st.markdown( 25 | f""" 26 |
27 |
28 |

{content}

29 |
30 |
31 | """, 32 | unsafe_allow_html=True 33 | ) 34 | elif isinstance(content, dict): 35 | # Check if the content has both text and image URL 36 | if "text" in content: 37 | st.markdown( 38 | f""" 39 |
40 |
41 |

{content["text"]}

42 |
43 |
44 | """, 45 | unsafe_allow_html=True 46 | ) 47 | if "url" in content: 48 | st.image(content["url"], caption="User Image", use_container_width=True) 49 | else: 50 | # Display assistant's message, converting LaTeX-style content 51 | modified_content = content.replace("\\(", "$").replace("\\)", "$") 52 | modified_content = modified_content.replace("\\[", "$$").replace("\\]", "$$") 53 | st.markdown(modified_content) 54 | 55 | def render_conversation_history(messages): 56 | """ 57 | Renders conversation history from a list of messages, handling multiple tool calls. 58 | """ 59 | tool_input_map = {} # Map to track tool_call_id to tool_input 60 | 61 | for entry in messages: 62 | # Skip if the message has name "image_assistant" 63 | if hasattr(entry, "name") and entry.name == "image_assistant": 64 | continue 65 | 66 | if isinstance(entry, HumanMessage): 67 | # Check if entry.content is list or string and handle appropriately 68 | if isinstance(entry.content, list): 69 | for item in entry.content: 70 | if isinstance(item, dict): 71 | # Display text or image URL in dictionary format 72 | if item["type"] == "text": 73 | display_message(item["text"], sender="user") 74 | elif item["type"] == "image_url": 75 | display_message({"url": item["image_url"]["url"]}, sender="user") 76 | elif isinstance(item, str): 77 | # Display plain text if it's a string 78 | display_message(item, sender="user") 79 | elif isinstance(entry.content, str): 80 | # Display single string content 81 | display_message(entry.content, sender="user") 82 | 83 | elif isinstance(entry, AIMessage): 84 | display_message(entry.content, sender="assistant") 85 | 86 | # Handle tool calls in AIMessage 87 | if entry.tool_calls: 88 | tool_calls = entry.tool_calls 89 | for tool_call in tool_calls: 90 | try: 91 | arguments_json = tool_call.get('args', '{}') 92 | tool_input = arguments_json 93 | tool_call_id = tool_call.get("id") 94 | if tool_call_id: 95 | tool_input_map[tool_call_id] = tool_input 96 | except json.JSONDecodeError: 97 | tool_input_map[tool_call.get("id", "unknown")] = "Error decoding tool input." 98 | 99 | elif isinstance(entry, ToolMessage): 100 | display_tool_message(entry, tool_input_map) 101 | 102 | 103 | def display_tool_message(entry, tool_input_map): 104 | """Display a tool message with the corresponding tool input.""" 105 | tool_output = entry.content 106 | tool_call_id = getattr(entry, "tool_call_id", None) 107 | tool_input = tool_input_map.get(tool_call_id, "No matching tool input found") 108 | 109 | with st.expander(f"Tool Call: {entry.name}", expanded=False): 110 | if isinstance(tool_input, dict) and 'query' in tool_input: 111 | st.code(tool_input['query'], language="python") 112 | else: 113 | st.code(tool_input or "No tool input available", language="python") 114 | st.write("**Tool Output:**") 115 | st.code(tool_output) 116 | 117 | # Handle artifacts if they exist 118 | artifacts = getattr(entry, "artifact", []) 119 | if artifacts: 120 | st.write("**Generated Artifacts (e.g., Plots):**") 121 | for rel_path in artifacts: 122 | if rel_path.endswith(".png"): 123 | # Convert relative path to absolute 124 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 125 | if os.path.exists(abs_path): 126 | st.image(abs_path, caption="Generated Plot") 127 | else: 128 | st.write(f"Error: Plot file not found at {rel_path}") 129 | 130 | 131 | # Pydantic model for structured output 132 | class ConversationSummary(BaseModel): 133 | """Structure for conversation title and summary.""" 134 | title: str = Field(description="The title of the conversation") 135 | summary: str = Field(description="A concise summary of the conversation's main points") 136 | 137 | # Function to get conversation title and summary 138 | def get_conversation_summary(messages: List[BaseMessage]) -> Tuple[str, str]: 139 | # Initialize the LLM model within the function 140 | llm = ChatOpenAI(model_name="gpt-4o",temperature=0) 141 | prompt_template = ChatPromptTemplate.from_messages([ 142 | MessagesPlaceholder("msgs"), 143 | ("human", "Given the above messages between user and AI agent, return a title and concise summary of the conversation"), 144 | ]) 145 | structured_llm = llm.with_structured_output(ConversationSummary) 146 | summarized_chain = prompt_template | structured_llm 147 | response = summarized_chain.invoke(messages) 148 | return response.title, response.summary 149 | -------------------------------------------------------------------------------- /src/util_anthropic.py: -------------------------------------------------------------------------------- 1 | # util_anthropic.py 2 | 3 | import os 4 | import json 5 | import time 6 | import streamlit as st 7 | from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, BaseMessage 8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 9 | from langchain_anthropic import ChatAnthropic 10 | from typing import List, Tuple 11 | from pydantic import BaseModel, Field 12 | # Directory for temporary plot images 13 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots") 14 | os.makedirs(plot_dir, exist_ok=True) 15 | 16 | def display_message(content, sender="assistant"): 17 | """ 18 | Displays a message from the user or assistant with different styling. 19 | Supports displaying both text and image URLs for the user. 20 | """ 21 | if sender == "user": 22 | if isinstance(content, str): 23 | # Display plain text message from user 24 | st.markdown( 25 | f""" 26 |
27 |
28 |

{content}

29 |
30 |
31 | """, 32 | unsafe_allow_html=True 33 | ) 34 | elif isinstance(content, dict): 35 | # Check if the content has both text and image URL 36 | if "text" in content: 37 | st.markdown( 38 | f""" 39 |
40 |
41 |

{content["text"]}

42 |
43 |
44 | """, 45 | unsafe_allow_html=True 46 | ) 47 | if "url" in content: 48 | st.image(content["url"], caption="User Image", use_container_width=True) 49 | else: 50 | # Display assistant's message, handling various content formats 51 | if isinstance(content, str): 52 | # Process and display plain text with LaTeX-style content 53 | modified_content = content.replace("\\(", "$").replace("\\)", "$") 54 | modified_content = modified_content.replace("\\[", "$$").replace("\\]", "$$") 55 | st.markdown(modified_content) 56 | elif isinstance(content, list): 57 | # Iterate through list items if content is a list 58 | for item in content: 59 | if isinstance(item, dict): 60 | if item.get("type") == "text" and "text" in item: 61 | # Process LaTeX-style text 62 | modified_text = item["text"].replace("\\(", "$").replace("\\)", "$") 63 | modified_text = modified_text.replace("\\[", "$$").replace("\\]", "$$") 64 | st.markdown(modified_text) 65 | elif "url" in item: 66 | st.image(item["url"], caption="Assistant Image", use_container_width=True) 67 | elif isinstance(item, str): 68 | # Handle plain text items in the list 69 | modified_content = item.replace("\\(", "$").replace("\\)", "$") 70 | modified_content = modified_content.replace("\\[", "$$").replace("\\]", "$$") 71 | st.markdown(modified_content) 72 | elif isinstance(content, dict): 73 | # Display text if present in a single dictionary 74 | if "text" in content: 75 | modified_text = content["text"].replace("\\(", "$").replace("\\)", "$") 76 | modified_text = modified_text.replace("\\[", "$$").replace("\\]", "$$") 77 | st.markdown(modified_text) 78 | if "url" in content: 79 | st.image(content["url"], caption="Assistant Image", use_container_width=True) 80 | else: 81 | # Handle unexpected content type 82 | st.error("Unsupported content format from the assistant.") 83 | 84 | 85 | def render_conversation_history(messages): 86 | """ 87 | Renders conversation history from a list of messages, handling multiple tool calls. 88 | """ 89 | tool_input_map = {} # Map to track tool_call_id to tool_input 90 | 91 | for entry in messages: 92 | # Skip if the message has name "image_assistant" 93 | if hasattr(entry, "name") and entry.name == "image_assistant": 94 | continue 95 | 96 | if isinstance(entry, HumanMessage): 97 | # Check if entry.content is list or string and handle appropriately 98 | if isinstance(entry.content, list): 99 | for item in entry.content: 100 | if isinstance(item, dict): 101 | # Display text or image URL in dictionary format 102 | if item["type"] == "text": 103 | display_message(item["text"], sender="user") 104 | elif item["type"] == "image_url": 105 | display_message({"url": item["image_url"]["url"]}, sender="user") 106 | elif isinstance(item, str): 107 | # Display plain text if it's a string 108 | display_message(item, sender="user") 109 | elif isinstance(entry.content, str): 110 | # Display single string content 111 | display_message(entry.content, sender="user") 112 | 113 | elif isinstance(entry, AIMessage): 114 | display_message(entry.content, sender="assistant") 115 | 116 | # Handle tool calls in AIMessage 117 | if entry.tool_calls: 118 | tool_calls = entry.tool_calls 119 | for tool_call in tool_calls: 120 | try: 121 | arguments_json = tool_call.get('args', '{}') 122 | #tool_input = arguments_json.get("query", "No tool input available") 123 | tool_input = arguments_json 124 | tool_call_id = tool_call.get("id") 125 | if tool_call_id: 126 | tool_input_map[tool_call_id] = tool_input 127 | except json.JSONDecodeError: 128 | tool_input_map[tool_call.get("id", "unknown")] = "Error decoding tool input." 129 | 130 | elif isinstance(entry, ToolMessage): 131 | display_tool_message(entry, tool_input_map) 132 | 133 | 134 | def display_tool_message(entry, tool_input_map): 135 | """ 136 | Display a tool message with the corresponding tool input based on the tool_call_id. 137 | """ 138 | tool_output = entry.content 139 | tool_call_id = getattr(entry, "tool_call_id", None) # Get the tool_call_id from the ToolMessage 140 | tool_input = tool_input_map.get(tool_call_id, "No matching tool input found") # Match with tool_input_map 141 | 142 | with st.expander(f"Tool Call: {entry.name}", expanded=False): 143 | # Check if 'query' exists in tool_input and display accordingly 144 | if isinstance(tool_input, dict) and 'query' in tool_input: 145 | st.code(tool_input['query'], language="python") 146 | else: 147 | st.code(tool_input or "No tool input available", language="python") 148 | st.write("**Tool Output:**") 149 | st.code(tool_output) 150 | 151 | # Handle artifacts if they exist 152 | artifacts = getattr(entry, "artifact", []) 153 | if artifacts: 154 | st.write("**Generated Artifacts (e.g., Plots):**") 155 | for rel_path in artifacts: 156 | if rel_path.endswith(".png"): 157 | # Convert relative path to absolute 158 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 159 | if os.path.exists(abs_path): 160 | st.image(abs_path, caption="Generated Plot") 161 | else: 162 | st.write(f"Error: Plot file not found at {rel_path}") 163 | 164 | 165 | # Pydantic model for structured output 166 | class ConversationSummary(BaseModel): 167 | """Structure for conversation title and summary.""" 168 | title: str = Field(description="The title of the conversation") 169 | summary: str = Field(description="A concise summary of the conversation's main points") 170 | 171 | # Function to get conversation title and summary 172 | def get_conversation_summary(messages: List[BaseMessage]) -> Tuple[str, str]: 173 | # Initialize the LLM model within the function 174 | llm = ChatAnthropic(model_name="claude-3-5-sonnet-20240620",temperature=0) 175 | 176 | # Define the prompt template 177 | prompt_template = ChatPromptTemplate.from_messages([ 178 | MessagesPlaceholder("msgs"), 179 | ("human", "Given the above messages between user and AI agent, return a title and concise summary of the conversation"), 180 | ]) 181 | 182 | # Configure the structured output model 183 | structured_llm = llm.with_structured_output(ConversationSummary) 184 | summarized_chain = prompt_template | structured_llm 185 | 186 | # Invoke the chain with the messages and retrieve the response 187 | response = summarized_chain.invoke(messages) 188 | 189 | # Return the title and summary 190 | return response.title, response.summary 191 | --------------------------------------------------------------------------------