110 | Current recursion step is {cur_messages_len}. Terminated because you exceeded the limit of 200. 111 |
112 | """, 113 | unsafe_allow_html=True 114 | ) 115 | st.session_state["render_last_message"] = False 116 | return Command( 117 | update={"messages": []}, 118 | goto="__end__", 119 | ) 120 | last_message = state["messages"][-1] 121 | # Check if last message is a ToolMessage and has artifacts 122 | if isinstance(last_message, ToolMessage) and hasattr(last_message, "artifact") and last_message.artifact and model_name != "gpt-3.5-turbo": 123 | # Prepare content list with initial text 124 | content_list = [{ 125 | "type": "text", 126 | "text": """ 127 | Please analyze these generated images by the code above. Your tasks are to: 128 | 1. Examine each visualization carefully 129 | 2. Provide a detailed description of what you observe 130 | 3. Explain the biological implications of the observations if any. 131 | 4. You should use google scholar to find more information to see if the literature supports your observation. 132 | 5. please always do multiple search queries (at least 5) to get a better understanding of the observation. 133 | 6. After you finish your writing, please continue to the next steps according to the system instructions. unless user shows intention for interaction or you are not sure about the next step. 134 | 7. Remember to be consistent with the user's input language. you are a multi-lingual assistant. 135 | 8. If you don't see any plots, or the plots are not clear or crowded, please try to fix the code. if you want to see the plots then don't use plt.close" 136 | """ 137 | }] 138 | 139 | # Add all PNG images to the content list 140 | for rel_path in last_message.artifact: 141 | if rel_path.endswith(".png"): 142 | # Convert relative path to absolute based on current script location 143 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 144 | if os.path.exists(abs_path): 145 | with open(abs_path, "rb") as image_file: 146 | image_data = base64.b64encode(image_file.read()).decode("utf-8") 147 | content_list.append({ 148 | "type": "image_url", 149 | "image_url": {"url": f"data:image/png;base64,{image_data}"} 150 | }) 151 | 152 | # Create a single message with all images if we found any 153 | if len(content_list) > 1: # Only if we have at least one image 154 | image_message = HumanMessage(content=content_list,name="image_assistant") 155 | state["messages"].append(image_message) 156 | 157 | response = llm.invoke(state["messages"]) 158 | if response.tool_calls: 159 | return Command( 160 | update={"messages": [response]}, 161 | goto="tools", 162 | ) 163 | else: 164 | st.session_state["render_last_message"] = True 165 | return Command( 166 | update={"messages": [response]}, 167 | goto="__end__", 168 | ) 169 | 170 | graph.add_edge(START, "modelNode") 171 | graph.add_node("tools", tool_node) 172 | graph.add_node("modelNode", _call_model) 173 | graph.add_edge("tools", "modelNode") 174 | graph_runnable = graph.compile() 175 | def invoke_our_graph(messages,model_choose): 176 | config = {"recursion_limit": 200, "configurable": {"model": model_choose}} 177 | return graph_runnable.invoke({"messages": messages,"input_messages_len":[len(messages)]},config=config) 178 | -------------------------------------------------------------------------------- /src/graph_anthropic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | from datetime import datetime 4 | import matplotlib.pyplot as plt 5 | from typing import Annotated, TypedDict, Literal, Tuple, List 6 | from dotenv import load_dotenv 7 | from langchain_anthropic import ChatAnthropic 8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 9 | from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage 10 | from langchain_core.tools import tool 11 | from langgraph.graph import START, StateGraph 12 | from langgraph.graph.message import AnyMessage, add_messages 13 | #from langchain_experimental.utilities import PythonREPL 14 | from tools import PythonREPL 15 | from langgraph.prebuilt import ToolNode 16 | from prompt import system_prompt 17 | from pydantic import BaseModel, Field 18 | from langchain_anthropic import ChatAnthropic 19 | from langgraph.types import Command 20 | from textwrap import dedent 21 | import streamlit as st 22 | from util_anthropic import display_message, render_conversation_history, get_conversation_summary 23 | from langchain_core.runnables.config import RunnableConfig 24 | from tools import google_scholar_search, squidpy_rag_agent, visualize_cell_cell_interaction_tool, visualize_spatial_cell_type_map, visualize_cell_type_composition, visualize_umap, report_tool 25 | import sys 26 | import io 27 | # Directory Setup 28 | plot_dir = os.path.join(os.path.dirname(__file__), "tmp/plots") 29 | os.makedirs(plot_dir, exist_ok=True) 30 | load_dotenv() 31 | 32 | python_repl = PythonREPL() 33 | 34 | @tool(response_format="content_and_artifact") 35 | def python_repl_tool(query: str) -> Tuple[str, List[str]]: 36 | """A Python shell. Use this to execute python commands. Input should be a valid python command. 37 | If you want to see the output of a value, you should print it out with `print(...)`. """ 38 | 39 | plot_paths = [] # List to store file paths of generated plots 40 | result_parts = [] # List to store different parts of the output 41 | 42 | try: 43 | output = python_repl.run(query) 44 | if output and output.strip(): 45 | result_parts.append(output.strip()) 46 | 47 | figures = [plt.figure(i) for i in plt.get_fignums()] 48 | if figures: 49 | for fig in figures: 50 | fig.set_size_inches(10, 6) # Ensure figures are large enough 51 | #fig.tight_layout() # Prevent truncation# Generate filename 52 | plot_filename = f"plot_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}.png" 53 | # Create relative path 54 | rel_path = os.path.join("tmp/plots", plot_filename) 55 | # Convert to absolute path for saving 56 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 57 | 58 | fig.savefig(abs_path,bbox_inches='tight') 59 | plot_paths.append(rel_path) # Store relative path 60 | 61 | plt.close("all") 62 | result_parts.append(f"Generated {len(plot_paths)} plot(s).") 63 | 64 | if not result_parts: # If no output and no figures 65 | result_parts.append("Executed code successfully with no output. If you want to see the output of a value, you should print it out with `print(...)`.") 66 | 67 | except Exception as e: 68 | result_parts.append(f"Error executing code: {e}") 69 | 70 | # Join all parts of the result with newlines 71 | result_summary = "\n".join(result_parts) 72 | 73 | # Return both the summary and plot paths (if any) 74 | return result_summary, plot_paths 75 | # Tools List and Node Setup 76 | tools = [ 77 | python_repl_tool, 78 | google_scholar_search, 79 | squidpy_rag_agent, 80 | visualize_cell_cell_interaction_tool, 81 | visualize_spatial_cell_type_map, 82 | visualize_cell_type_composition, 83 | visualize_umap, 84 | report_tool 85 | ] 86 | tool_node = ToolNode(tools) 87 | 88 | # Graph Setup 89 | class GraphsState(TypedDict): 90 | messages: Annotated[list[AnyMessage], add_messages] 91 | input_messages_len: list[int] 92 | 93 | graph = StateGraph(GraphsState) 94 | 95 | claude_3_7_sonnet_20250219 = ChatAnthropic(model_name="claude-3-7-sonnet-20250219",temperature=0,max_tokens=8000).bind_tools(tools) 96 | claude_3_5_sonnet_20241022 = ChatAnthropic(model_name="claude-3-5-sonnet-20241022",temperature=0,max_tokens=8000).bind_tools(tools) 97 | 98 | 99 | models = { 100 | "claude_3_5_sonnet_20241022": claude_3_5_sonnet_20241022, 101 | "claude_3_7_sonnet_20250219": claude_3_7_sonnet_20250219 102 | } 103 | 104 | def _call_model(state: GraphsState, config: RunnableConfig) -> Command[Literal["tools", "__end__"]]: 105 | st.session_state["final_state"]["messages"]=state["messages"] 106 | model_name = config["configurable"].get("model", "claude_3_5_sonnet") 107 | llm = models[model_name] 108 | previous_message_count = len(state["messages"]) 109 | state["input_messages_len"].append(previous_message_count) 110 | render_conversation_history(state["messages"][state["input_messages_len"][-2]:state["input_messages_len"][-1]]) 111 | cur_messages_len = len(state["messages"])-state["input_messages_len"][0] 112 | if cur_messages_len > 200: 113 | st.markdown( 114 | f""" 115 |116 | Current recursion step is {cur_messages_len}. Terminated because you exceeded the limit of 200. 117 |
118 | """, 119 | unsafe_allow_html=True 120 | ) 121 | st.session_state["render_last_message"] = False 122 | return Command( 123 | update={"messages": []}, 124 | goto="__end__", 125 | ) 126 | last_message = state["messages"][-1] 127 | # Check if last message is a ToolMessage and has artifacts 128 | if isinstance(last_message, ToolMessage) and hasattr(last_message, "artifact") and last_message.artifact and model_name != "claude_3_5_haiku": 129 | # Prepare content list with initial text 130 | content_list = [{ 131 | "type": "text", 132 | "text": """ 133 | Please analyze these generated images by the code above. Your tasks are to: 134 | 1. Examine each visualization carefully 135 | 2. Provide a detailed description of what you observe 136 | 3. Explain the biological implications of the observations if any. 137 | 4. You should use google scholar to find more information to see if the literature supports your observation. 138 | 5. please always do multiple search queries (at least 5) to get a better understanding of the observation. 139 | 6. After you finish your writing, please continue to the next steps according to the system instructions. unless user shows intention for interaction or you are not sure about the next step. 140 | 7. Remember to be consistent with the user's input language. you are a multi-lingual assistant. 141 | 8. If you don't see any plots, or the plots are not clear or crowded, please try to fix the code. if you want to see the plots then don't use plt.close" 142 | """ 143 | }] 144 | 145 | # Add all PNG images to the content list 146 | for rel_path in last_message.artifact: 147 | if rel_path.endswith(".png"): 148 | # Convert relative path to absolute based on current script location 149 | abs_path = os.path.join(os.path.dirname(__file__), rel_path) 150 | if os.path.exists(abs_path): 151 | with open(abs_path, "rb") as image_file: 152 | image_data = base64.b64encode(image_file.read()).decode("utf-8") 153 | content_list.append({ 154 | "type": "image_url", 155 | "image_url": {"url": f"data:image/png;base64,{image_data}"} 156 | }) 157 | 158 | # Create a single message with all images if we found any 159 | if len(content_list) > 1: # Only if we have at least one image 160 | image_message = HumanMessage(content=content_list,name="image_assistant") 161 | state["messages"].append(image_message) 162 | 163 | response = llm.invoke(state["messages"]) 164 | if response.tool_calls: 165 | return Command( 166 | update={"messages": [response]}, 167 | goto="tools", 168 | ) 169 | else: 170 | st.session_state["render_last_message"] = True 171 | return Command( 172 | update={"messages": [response]}, 173 | goto="__end__", 174 | ) 175 | 176 | graph.add_edge(START, "modelNode") 177 | graph.add_node("tools", tool_node) 178 | graph.add_node("modelNode", _call_model) 179 | graph.add_edge("tools", "modelNode") 180 | graph_runnable = graph.compile() 181 | 182 | def invoke_our_graph(messages,model_choose): 183 | config = {"recursion_limit": 200, "configurable": {"model": model_choose}} 184 | return graph_runnable.invoke({"messages": messages,"input_messages_len":[len(messages)]},config=config) 185 | -------------------------------------------------------------------------------- /src/prompt.py: -------------------------------------------------------------------------------- 1 | system_prompt = """ 2 | Spatial Transcriptomics AI Agent 3 | 4 | This AI agent specializes in analyzing spatial transcriptomics data through a systematic pipeline. 5 | It utilizes a set of tools to produce Python code snippets for visualization and analysis. The agent is equipped 6 | with tools for data exploration, visualization, and biological interpretation. 7 | 8 | --- 9 | 10 | Available Tools: 11 | 1. python_repl_tool: 12 | - Executes Python code in a live Python shell 13 | - Returns printed outputs and generated visualizations 14 | - Input: Valid Python commands 15 | - Output: Execution results and plot file paths 16 | 17 | 2. google_scholar_search: 18 | - Retrieves academic articles and summaries 19 | - Input: Research topic or biological query 20 | - Output: Article titles, authors, and summaries 21 | - Usage: For literature-backed information 22 | 23 | 3. squidpy_rag_agent: 24 | - Provides guidance on Squidpy usage 25 | - Input: Questions about Squidpy functions 26 | - Output: Code examples and explanations 27 | - Usage: For spatial analysis workflows 28 | 29 | 4. visualize_umap: 30 | - Creates UMAP plots for each time point 31 | - Input: No input required - uses default dataset 32 | - Output: UMAP visualizations colored by cell type 33 | - Shows clustering patterns of different cell populations 34 | 35 | 5. visualize_cell_type_composition: 36 | - Shows cell type proportions across samples 37 | - Input: No input required - uses default dataset 38 | - Output: Stacked bar plots and heatmaps 39 | - Displays changes in cell type composition over time 40 | 41 | 6. visualize_spatial_cell_type_map: 42 | - Creates spatial scatter plots of cell types 43 | - Input: No input required - uses default dataset 44 | - Output: Spatial distribution maps 45 | - Shows cell locations in tissue context 46 | 47 | 7. visualize_cell_cell_interaction: 48 | - Analyzes cell type interaction patterns 49 | - Input: No input required - uses default dataset 50 | - Output: Neighborhood enrichment heatmaps 51 | - Reveals spatial relationships between cell types 52 | 53 | --- 54 | 55 | Pipeline Instructions: 56 | 1. Dimensionality Reduction Visualization: 57 | - Use `visualize_umap` to show cell type clustering 58 | - Examine distribution of cell types in UMAP space 59 | 60 | 2. Cell Type Composition Analysis: 61 | - Apply `visualize_cell_type_composition` to show proportions 62 | - Compare cell type changes across time points 63 | 64 | 3. Spatial Distribution Analysis: 65 | - Use `visualize_spatial_cell_type_map` for tissue context 66 | - Examine spatial organization of cell types 67 | 68 | 4. Cell-Cell Interaction Analysis: 69 | - Apply `visualize_cell_cell_interaction` for neighborhood patterns 70 | - Analyze spatial relationships between cell types 71 | 72 | 5. Report: 73 | - Use `report_tool` to generate a report of the analysis 74 | - Input: No input required - uses default dataset 75 | - Output: Report of the analysis 76 | - Usage: For summarizing the analysis 77 | 78 | --- 79 | 80 | ## Data Context 81 | - **Dataset**: Human pancreatic islets grafted on mouse kidney (STARmap spatial transcriptomic data) 82 | - **File location**: `./data/pancreas_processed_full.h5ad` 83 | - **Data structure**: 84 | - `.obs['sample_name']`: Contains timepoints (Week 4, Week 16, Week 20 post-grafting) 85 | - `.obs['slice_name']`: Contains slice identifiers in format "Week_X_slice_Y" 86 | 87 | --- 88 | 89 | ## Important Instructions: 90 | - Always use the visualization tools to get code snippets first 91 | - Execute the code using `python_repl_tool` 92 | - DO NOT modify any code from the visualization tools 93 | - If the user asks you to perform the end-to-end analysis, you should follow the pipeline order: UMAP → composition → spatial map (individual slice, id stored in .obs['slice_name']) → interaction 94 | - If the user have specific task for you to perform, only call the related tool that the use mentioned. DO NOT call all the tools in the pipeline. 95 | - Use `google_scholar_search` for biological interpretation after plotting the visualization 96 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS 97 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS 98 | - REPEAT: DO NOT CHANGE ANY CODE FROM THE VISUALIZATION TOOLS 99 | - Be consistent with the user's input language. you are a multi-lingual assistant. 100 | - PLEASE DO NOT CALL MULTIPLE TOOLS AT ONCE. 101 | - <{content}
29 |{content["text"]}
42 |{content}
29 |{content["text"]}
42 |