├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── app ├── README_tournament_viewer.md ├── background.py ├── common.py ├── configuration_page.py ├── final_report_page.py ├── literature_review_page.py ├── meta_reviews_page.py ├── proximity_page.py ├── resume_page.py ├── supervisor_page.py ├── tournament_page.py ├── tournament_viewer.py └── viewer_requirements.txt ├── assets ├── agent_graph.png ├── app_demo.gif └── overview.png ├── coscientist ├── __init__.py ├── common.py ├── configuration_agent.py ├── custom_types.py ├── evolution_agent.py ├── final_report_agent.py ├── framework.py ├── generation_agent.py ├── global_state.py ├── literature_review_agent.py ├── meta_review_agent.py ├── multiturn.py ├── prompts │ ├── assumption_decomposer.md │ ├── cause_and_effect.md │ ├── collaborative_generation.md │ ├── deep_verification.md │ ├── desk_reject.md │ ├── evolve_from_feedback.md │ ├── final_report.md │ ├── independent_generation.md │ ├── meta_review_tournament.md │ ├── observation_reflection.md │ ├── out_of_the_box.md │ ├── research_config.md │ ├── simulated_debate.md │ ├── supervisor_decision.md │ ├── top_hypotheses_review.md │ ├── topic_decomposition.md │ └── tournament.md ├── proximity_agent.py ├── ranking_agent.py ├── reasoning_types.py ├── reflection_agent.py ├── research_plan.py ├── researcher_config.json └── supervisor_agent.py ├── notebooks └── coscientist.ipynb ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # Ruff version. 4 | rev: v0.6.9 5 | hooks: 6 | # Run the linter. 7 | - id: ruff 8 | args: [--extend-select, I, --fix] 9 | name: ruff-check-imports 10 | # Run the formatter. 11 | - id: ruff-format 12 | name: ruff-format -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Ryan Conrad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🧪 Open CoScientist Agents 2 | 3 | A comprehensive multi-agent system for AI-driven scientific discovery based on Google DeepMind's [AI co-scientist](https://arxiv.org/abs/2502.18864), built with LangGraph and [GPT Researcher](https://github.com/assafelovic/gpt-researcher). The aim is for this system to accelerate scientific research through collaborative AI agents that generate, critique, rank, and evolve scientific hypotheses using tournament-style competition. 4 | 5 | This implementation uses `Gemini 2.5 Pro`, `Claude Sonnet 4`, and `o3` in collaboration and competition. 6 | 7 | ![App Demo](assets/app_demo.gif) 8 | 9 | ## Key Features 10 | 11 | ### Multi-Agent Architecture 12 | - **Literature Review Agent**: Systematically decomposes research goals and conducts comprehensive literature analysis 13 | - **Generation Agents**: Create novel scientific hypotheses using multiple reasoning approaches 14 | - **Reflection Agents**: Perform deep verification and causal reasoning analysis 15 | - **Evolution Agents**: Refine and improve hypotheses based on feedback and competition 16 | - **Meta-Review Agent**: Synthesizes insights across multiple research directions 17 | - **Supervisor Agent**: Orchestrates the entire research workflow -- decides which actions to take next and when to finish the research. 18 | - **Final Report Agent**: Generates comprehensive research summaries 19 | 20 | ### Tournament-Style Hypothesis Competition 21 | - **ELO Rating System**: Ranks hypotheses through head-to-head competitive analysis 22 | - **Debate Transcripts**: Full records of why one hypothesis outperforms another 23 | - **Win-Loss Statistics**: Track performance across multiple evaluation rounds 24 | - **Hypothesis Evolution**: See how ideas improve through iterative refinement 25 | 26 | ### Interactive Web Interface 27 | - **Streamlit Dashboard**: Comprehensive visualization of research results 28 | - **Real-time Monitoring**: Track research progress and agent activities 29 | - **Hypothesis Explorer**: Deep dive into individual hypotheses and their reasoning 30 | - **Tournament Viewer**: Analyze competitive dynamics between ideas 31 | 32 | ## Installation 33 | 34 | ### Prerequisites 35 | - Python 3.12 or higher 36 | - A boatload of API keys 37 | 38 | ### Install from PyPI (Coming Soon) 39 | ```bash 40 | pip install open-coscientist-agents 41 | ``` 42 | 43 | ### Install from Source 44 | ```bash 45 | git clone https://github.com/conradry/open-coscientist-agents.git 46 | cd open-coscientist-agents 47 | pip install -e . 48 | ``` 49 | 50 | ## Configuration 51 | 52 | ### Environment Variables 53 | Set up your API keys for model providers: 54 | ```bash 55 | export OPENAI_API_KEY="your-openai-key" 56 | export ANTHROPIC_API_KEY="your-anthropic-key" 57 | export GOOGLE_API_KEY="your-google-key" 58 | ``` 59 | 60 | Set up your API key for Tavily search: 61 | ```bash 62 | export TAVILY_API_KEY='your-api-key' 63 | ``` 64 | 65 | Optional, but highly recommended for monitoring and debugging, set up API keys for LangSmith: 66 | ```bash 67 | export LANGSMITH_ENDPOINT="https://api.smith.langchain.com" 68 | export LANGSMITH_API_KEY="your-langsmith-api-key" 69 | export LANGSMITH_PROJECT="your-langsmith-project" 70 | ``` 71 | 72 | ### Web Interface 73 | Launch the interactive dashboard: 74 | ```bash 75 | cd app 76 | pip install -r viewer_requirements.txt 77 | streamlit run tournament_viewer.py 78 | ``` 79 | 80 | Features include: 81 | - **Configuration Agent**: Set up research parameters 82 | - **Literature Review**: Explore research foundation 83 | - **Tournament Rankings**: View hypothesis competition results 84 | - **Proximity Graph**: Semantic relationship visualization 85 | - **Meta-Reviews**: Synthesized research insights 86 | - **Supervisor Decisions**: Workflow orchestration logs 87 | - **Final Report**: Comprehensive research summary 88 | 89 | ### Start a research run in Python 90 | ```python 91 | import asyncio 92 | from coscientist.framework import CoscientistConfig, CoscientistFramework 93 | from coscientist.global_state import CoscientistState, CoscientistStateManager 94 | 95 | goal = "How does the gut microbiome influence rheumatoid arthritis and can probiotics help to mitigate symptoms? If so, which ones are promising?" 96 | initial_state = CoscientistState(goal=goal) 97 | 98 | config = CoscientistConfig() 99 | state_manager = CoscientistStateManager(initial_state) 100 | cosci = CoscientistFramework(config, state_manager) 101 | 102 | final_report, final_meta_review = asyncio.run(cosci.run()) 103 | ``` 104 | 105 | ## Performance & Scalability 106 | 107 | In principle, this system can be easily scaled with asynchronous execution of many tasks. In practice, API rate limits make it difficult to run in parallel. Future work will explore ways to get around this by smartly allocating work to different providers. 108 | 109 | Currently designed to work with 20-30 hypotheses in a tournament. Scaling that to more will require optimizations like smarter prioritization of head-to-head matches, summarizing context to make meta-review tractable, and actually supporting asynchronous execution. 110 | 111 | 112 | ## Caveats and sharp edges 113 | 114 | - The system isn't fully configurable and there are fields that are hardcoded (like number of hypotheses, subtopics for literature review, etc.). 115 | - Obviously no tests or evaluations yet. Getting feedback will help to steer this project in the right direction for research usefulness. 116 | 117 | ## Contributing 118 | 119 | We welcome contributions! 120 | 121 | ## License 122 | 123 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 124 | 125 | ## Acknowledgments 126 | 127 | - Inspired by Google DeepMind's research on AI-assisted scientific discovery 128 | - Built with [LangGraph](https://github.com/langchain-ai/langgraph) for agent orchestration 129 | - Uses [GPT Researcher](https://github.com/assafelovic/gpt-researcher) for literature analysis 130 | - Visualization powered by [Streamlit](https://streamlit.io/) and [Plotly](https://plotly.com/) 131 | -------------------------------------------------------------------------------- /app/README_tournament_viewer.md: -------------------------------------------------------------------------------- 1 | # Coscientist Viewer App 2 | 3 | A comprehensive Streamlit application for visualizing and exploring Coscientist research results, including tournament rankings and semantic proximity graphs. 4 | 5 | ## Features 6 | 7 | ### 🏆 Tournament Rankings Page 8 | - **ELO Rating System**: View hypotheses ranked by their tournament performance 9 | - **Detailed Hypothesis View**: Explore individual hypotheses with full context 10 | - **Match History**: See complete debate transcripts between competing hypotheses 11 | - **Hypothesis Lineage**: Track which hypotheses evolved from others 12 | - **Win-Loss Records**: Performance statistics for each hypothesis 13 | 14 | ### 📊 Proximity Graph Page 15 | - **Interactive Network Visualization**: Explore semantic relationships between hypotheses 16 | - **Community Detection**: Automatically discover groups of similar hypotheses using Louvain clustering 17 | - **Hover Interactions**: View full hypothesis descriptions by hovering over nodes 18 | - **Adjustable Parameters**: Control community detection sensitivity and edge filtering 19 | - **Graph Statistics**: View network metrics including node count, edges, and average similarity 20 | 21 | ## Installation 22 | 23 | ```bash 24 | pip install -r viewer_requirements.txt 25 | ``` 26 | 27 | ## Usage 28 | 29 | ### Starting the App 30 | 31 | ```bash 32 | streamlit run tournament_viewer.py 33 | ``` 34 | 35 | ### Loading Data 36 | 37 | 1. **Recent Files**: Select from automatically discovered Coscientist state files 38 | 2. **File Upload**: Upload a `.pkl` state file directly through the interface 39 | 40 | ### Navigation 41 | 42 | Use the sidebar to switch between: 43 | - **Tournament Rankings**: Competitive analysis of hypotheses 44 | - **Proximity Graph**: Semantic similarity visualization 45 | 46 | ## Proximity Graph Features 47 | 48 | ### Interactive Visualization 49 | - **Nodes**: Represent individual hypotheses 50 | - **Edges**: Show cosine similarity between hypothesis embeddings 51 | - **Colors**: Different colors indicate semantic communities 52 | - **Layout**: Spring-force layout for optimal node positioning 53 | 54 | ### Community Detection Controls 55 | - **Resolution**: Higher values create more, smaller communities 56 | - **Minimum Edge Weight**: Filter weak connections for cleaner clustering 57 | 58 | ### Graph Statistics 59 | - **Number of Hypotheses**: Total nodes in the graph 60 | - **Number of Connections**: Total edges between hypotheses 61 | - **Average Similarity**: Mean cosine similarity across all connections 62 | 63 | ## Data Requirements 64 | 65 | The app expects Coscientist state files (`.pkl`) containing: 66 | - **Tournament data**: For rankings and match analysis 67 | - **Proximity graph**: For semantic similarity visualization 68 | - **Reviewed hypotheses**: With detailed reasoning and predictions 69 | 70 | ## Technical Details 71 | 72 | ### Visualization Libraries 73 | - **Plotly**: Interactive graph visualization with zoom, pan, and hover 74 | - **NetworkX**: Graph processing and community detection algorithms 75 | - **Streamlit**: Web application framework 76 | 77 | ### Graph Layout 78 | - Uses spring-force layout algorithm for optimal node positioning 79 | - Nodes are sized uniformly but could be weighted by ELO rating 80 | - Edge opacity indicates connection strength 81 | 82 | ### Community Detection 83 | - Louvain method for community detection 84 | - Configurable resolution parameter 85 | - Edge filtering by minimum weight threshold 86 | 87 | ## File Structure 88 | 89 | ``` 90 | app/ 91 | ├── tournament_viewer.py # Main application with both pages 92 | ├── viewer_requirements.txt # Python dependencies 93 | └── README_tournament_viewer.md # This documentation 94 | ``` 95 | 96 | ## Dependencies 97 | 98 | - `streamlit>=1.28.0`: Web application framework 99 | - `pandas>=2.0.0`: Data manipulation and analysis 100 | - `plotly>=5.0.0`: Interactive visualizations 101 | - `networkx>=3.0`: Graph processing and algorithms 102 | 103 | ## Tips for Best Results 104 | 105 | ### Tournament Page 106 | - Use the detailed view to understand hypothesis evolution 107 | - Check match history to see reasoning behind rankings 108 | - Look for patterns in win-loss records 109 | 110 | ### Proximity Graph Page 111 | - Adjust resolution to find meaningful community sizes 112 | - Increase minimum edge weight to focus on strongest similarities 113 | - Hover over nodes to quickly compare similar hypotheses 114 | - Use the zoom and pan features to explore dense areas 115 | 116 | ## Troubleshooting 117 | 118 | ### Common Issues 119 | - **Empty Graph**: Check that the state file contains proximity graph data 120 | - **No Communities**: Try lowering the minimum edge weight or resolution 121 | - **Performance**: Large graphs (>50 nodes) may be slow to render 122 | 123 | ### File Format Requirements 124 | - State files must be valid Python pickle files 125 | - Must contain either tournament or proximity_graph data 126 | - Compatible with Coscientist framework output format 127 | 128 | ## Future Enhancements 129 | 130 | Potential improvements could include: 131 | - Node sizing based on ELO ratings or other metrics 132 | - Edge thickness proportional to similarity strength 133 | - Filtering by community or hypothesis attributes 134 | - Export functionality for graphs and rankings 135 | - Additional layout algorithms (circular, hierarchical, etc.) -------------------------------------------------------------------------------- /app/background.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from coscientist.framework import CoscientistConfig, CoscientistFramework 5 | from coscientist.global_state import CoscientistState, CoscientistStateManager 6 | 7 | 8 | def _get_done_file_path(goal: str) -> str: 9 | """Gets the path for the 'done' file for a given goal.""" 10 | goal_hash = CoscientistState._hash_goal(goal) 11 | # This assumes _OUTPUT_DIR is consistent. 12 | output_dir = os.path.join( 13 | os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")), 14 | goal_hash, 15 | ) 16 | return os.path.join(output_dir, "done.txt") 17 | 18 | 19 | def coscientist_process_target(goal: str): 20 | """The target function for the multiprocessing.Process.""" 21 | try: 22 | # This will fail if the directory exists, which is what we want. 23 | initial_state = CoscientistState(goal=goal) 24 | config = CoscientistConfig() 25 | state_manager = CoscientistStateManager(initial_state) 26 | cosci = CoscientistFramework(config, state_manager) 27 | 28 | # Run the framework 29 | asyncio.run(cosci.run()) 30 | 31 | except Exception as e: 32 | # Log error to a file in the goal directory 33 | goal_hash = CoscientistState._hash_goal(goal) 34 | output_dir = os.path.join( 35 | os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")), 36 | goal_hash, 37 | ) 38 | if not os.path.exists(output_dir): 39 | os.makedirs(output_dir) 40 | with open(os.path.join(output_dir, "error.log"), "w") as f: 41 | f.write(str(e)) 42 | finally: 43 | # Create a "done" file to signal completion 44 | done_file = _get_done_file_path(goal) 45 | with open(done_file, "w") as f: 46 | f.write("done") 47 | 48 | 49 | def check_coscientist_status(goal: str) -> str: 50 | """Checks the status of a Coscientist run.""" 51 | goal_hash = CoscientistState._hash_goal(goal) 52 | output_dir = os.path.join( 53 | os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")), 54 | goal_hash, 55 | ) 56 | 57 | done_file = os.path.join(output_dir, "done.txt") 58 | error_file = os.path.join(output_dir, "error.log") 59 | 60 | if os.path.exists(done_file): 61 | if os.path.exists(error_file): 62 | with open(error_file, "r") as f: 63 | error_message = f.read() 64 | return f"error: {error_message}" 65 | return "done" 66 | return "running" 67 | 68 | 69 | def get_coscientist_results(goal: str) -> tuple[str, str]: 70 | """Gets the results from a completed Coscientist run.""" 71 | state = CoscientistState.load_latest(goal=goal) 72 | if state and state.final_report and state.meta_reviews: 73 | # These are TypedDicts, access by key. 74 | final_report_text = state.final_report.get( 75 | "result", "Final report not generated." 76 | ) 77 | meta_review_text = state.meta_reviews[-1].get( 78 | "result", "Meta review not generated." 79 | ) 80 | return final_report_text, meta_review_text 81 | return "Results not found.", "Results not found." 82 | 83 | 84 | def cleanup_coscientist_run(goal: str): 85 | """Cleans up files after a run.""" 86 | goal_hash = CoscientistState._hash_goal(goal) 87 | output_dir = os.path.join( 88 | os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")), 89 | goal_hash, 90 | ) 91 | done_file = os.path.join(output_dir, "done.txt") 92 | error_file = os.path.join(output_dir, "error.log") 93 | if os.path.exists(done_file): 94 | os.remove(done_file) 95 | if os.path.exists(error_file): 96 | os.remove(error_file) 97 | -------------------------------------------------------------------------------- /app/common.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import Optional 3 | 4 | import streamlit as st 5 | 6 | # Import the necessary types from the coscientist package 7 | from coscientist.global_state import CoscientistState 8 | 9 | 10 | def load_coscientist_state(filepath: str) -> Optional[CoscientistState]: 11 | """Load a CoscientistState from a pickle file.""" 12 | try: 13 | with open(filepath, "rb") as f: 14 | return pickle.load(f) 15 | except Exception as e: 16 | st.error(f"Error loading state file: {e}") 17 | return None 18 | 19 | 20 | def load_coscientist_state_by_goal(goal: str) -> Optional[CoscientistState]: 21 | """Load the latest CoscientistState for a given research goal.""" 22 | try: 23 | return CoscientistState.load_latest(goal=goal) 24 | except Exception as e: 25 | st.error(f"Error loading state for goal '{goal}': {e}") 26 | return None 27 | 28 | 29 | def get_available_states() -> list[str]: 30 | """Get all available research goals from the goal-based directory structure.""" 31 | try: 32 | # Use the CoscientistState method to get all available goals 33 | goals_and_dirs = CoscientistState.list_all_goals() 34 | # Return just the goal texts (first element of each tuple) 35 | return [goal for goal, _ in goals_and_dirs] 36 | except Exception as e: 37 | st.error(f"Error getting available states: {e}") 38 | return [] 39 | -------------------------------------------------------------------------------- /app/configuration_page.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import time 3 | 4 | import streamlit as st 5 | from langchain_anthropic import ChatAnthropic 6 | from langchain_google_genai import ChatGoogleGenerativeAI 7 | from langchain_openai import ChatOpenAI 8 | 9 | # Import the background process functions 10 | from background import ( 11 | check_coscientist_status, 12 | cleanup_coscientist_run, 13 | coscientist_process_target, 14 | get_coscientist_results, 15 | ) 16 | 17 | # Import the configuration agent and required models 18 | from coscientist.configuration_agent import ConfigurationChatManager 19 | 20 | # Import coscientist framework components 21 | from coscientist.global_state import CoscientistState 22 | 23 | 24 | def get_llm_options(): 25 | """Get available LLM options for the chat interface.""" 26 | return { 27 | "o3": ChatOpenAI(model="o3", max_tokens=5000, max_retries=3), 28 | "Gemini 2.5 Pro": ChatGoogleGenerativeAI( 29 | model="gemini-2.5-pro", 30 | temperature=1.0, 31 | max_retries=3, 32 | max_tokens=5000, 33 | ), 34 | "Claude Sonnet 4": ChatAnthropic( 35 | model="claude-sonnet-4-20250514", max_tokens=5000, max_retries=3 36 | ), 37 | } 38 | 39 | 40 | def display_configuration_page(): 41 | """Display the configuration agent chat page.""" 42 | st.markdown("### 🤖 Configuration Agent Chat") 43 | st.markdown( 44 | "Refine your research goal through an interactive conversation with the configuration agent." 45 | ) 46 | 47 | # Initialize session state for chat 48 | if "chat_manager" not in st.session_state: 49 | st.session_state.chat_manager = None 50 | if "chat_history" not in st.session_state: 51 | st.session_state.chat_history = [] 52 | if "conversation_started" not in st.session_state: 53 | st.session_state.conversation_started = False 54 | if "refined_goal" not in st.session_state: 55 | st.session_state.refined_goal = "" 56 | if "coscientist_running" not in st.session_state: 57 | st.session_state.coscientist_running = False 58 | if "coscientist_result" not in st.session_state: 59 | st.session_state.coscientist_result = None 60 | if "coscientist_process" not in st.session_state: 61 | st.session_state.coscientist_process = None 62 | if "coscientist_error" not in st.session_state: 63 | st.session_state.coscientist_error = None 64 | 65 | # Configuration section 66 | st.subheader("🔧 Configuration") 67 | 68 | col1, col2 = st.columns([2, 1]) 69 | 70 | with col1: 71 | # Research goal input 72 | initial_goal = st.text_area( 73 | "Enter your initial research goal:", 74 | height=100, 75 | placeholder="e.g., Investigate the relationship between protein misfolding and neurodegeneration...", 76 | help="Provide a research question or goal that you'd like to refine through conversation.", 77 | ) 78 | 79 | with col2: 80 | # Model selection 81 | llm_options = get_llm_options() 82 | selected_model = st.selectbox( 83 | "Select Language Model:", 84 | options=list(llm_options.keys()), 85 | index=1, # Default to GPT-4o-mini 86 | help="Choose the language model for the configuration agent.", 87 | ) 88 | 89 | # Start/Reset buttons 90 | if st.button("🚀 Start New Conversation", type="primary"): 91 | if initial_goal.strip(): 92 | try: 93 | with st.spinner("Initializing conversation..."): 94 | llm = llm_options[selected_model] 95 | st.session_state.chat_manager = ConfigurationChatManager( 96 | llm, initial_goal.strip() 97 | ) 98 | st.session_state.conversation_started = True 99 | st.session_state.chat_history = [] 100 | st.session_state.refined_goal = "" 101 | 102 | # Get the initial agent message 103 | initial_message = ( 104 | st.session_state.chat_manager.get_latest_agent_message() 105 | ) 106 | st.session_state.chat_history.append(("Agent", initial_message)) 107 | 108 | st.success("Conversation started! 🎉") 109 | st.rerun() 110 | except Exception as e: 111 | st.error(f"Error starting conversation: {str(e)}") 112 | else: 113 | st.warning("Please enter a research goal first.") 114 | 115 | if st.session_state.conversation_started: 116 | if st.button("🔄 Reset Conversation"): 117 | if ( 118 | st.session_state.coscientist_process 119 | and st.session_state.coscientist_process.is_alive() 120 | ): 121 | st.session_state.coscientist_process.terminate() 122 | 123 | # Clear the goal directory if a goal was set 124 | if st.session_state.refined_goal: 125 | try: 126 | CoscientistState.clear_goal_directory( 127 | st.session_state.refined_goal 128 | ) 129 | st.info( 130 | f"Cleared data for goal: {st.session_state.refined_goal}" 131 | ) 132 | except Exception as e: 133 | st.warning(f"Could not clear goal directory: {e}") 134 | 135 | st.session_state.chat_manager = None 136 | st.session_state.conversation_started = False 137 | st.session_state.chat_history = [] 138 | st.session_state.refined_goal = "" 139 | st.session_state.coscientist_running = False 140 | st.session_state.coscientist_result = None 141 | st.session_state.coscientist_process = None 142 | st.session_state.coscientist_error = None 143 | st.rerun() 144 | 145 | # Chat interface 146 | if st.session_state.conversation_started and st.session_state.chat_manager: 147 | st.markdown("---") 148 | st.subheader("💬 Conversation") 149 | 150 | # Display chat history 151 | chat_container = st.container() 152 | with chat_container: 153 | for sender, message in st.session_state.chat_history: 154 | if sender == "Agent": 155 | with st.chat_message("assistant", avatar="🤖"): 156 | st.markdown(message) 157 | else: 158 | with st.chat_message("user", avatar="👤"): 159 | st.markdown(message) 160 | 161 | # Check if conversation is complete 162 | if st.session_state.chat_manager.is_conversation_complete(): 163 | st.success("🎉 Configuration complete!") 164 | refined_goal = st.session_state.chat_manager.get_refined_goal() 165 | st.session_state.refined_goal = refined_goal 166 | 167 | st.markdown("### 🎯 Final Refined Goal") 168 | st.markdown(f"**{refined_goal}**") 169 | 170 | # Buttons row 171 | col1, col2 = st.columns(2) 172 | 173 | with col1: 174 | # Option to copy the refined goal 175 | if st.button("📋 Copy Refined Goal"): 176 | st.code(refined_goal, language="text") 177 | st.info( 178 | "Refined goal displayed above - you can select and copy it." 179 | ) 180 | 181 | with col2: 182 | # Launch coscientist button 183 | if not st.session_state.coscientist_running: 184 | if st.button("🚀 Launch Coscientist", type="primary"): 185 | try: 186 | # Ensure the directory is clean before starting 187 | CoscientistState.clear_goal_directory(refined_goal) 188 | 189 | process = multiprocessing.Process( 190 | target=coscientist_process_target, args=(refined_goal,) 191 | ) 192 | process.start() 193 | st.session_state.coscientist_process = process 194 | st.session_state.coscientist_running = True 195 | st.session_state.refined_goal = refined_goal 196 | st.rerun() 197 | except Exception as e: 198 | st.error(f"Failed to launch Coscientist: {e}") 199 | 200 | else: 201 | st.button("🚀 Coscientist Running...", disabled=True) 202 | 203 | # Handle coscientist execution 204 | if st.session_state.coscientist_running: 205 | with st.spinner("🔬 Coscientist is running in the background..."): 206 | # Give it a moment before the first check 207 | time.sleep(5) 208 | st.rerun() # Rerun to check status 209 | 210 | # Check status if it was running 211 | if ( 212 | st.session_state.refined_goal 213 | and not st.session_state.coscientist_result 214 | ): 215 | status = check_coscientist_status(st.session_state.refined_goal) 216 | 217 | if status == "done": 218 | st.session_state.coscientist_running = False 219 | try: 220 | with st.spinner("Fetching results..."): 221 | final_report, meta_review = get_coscientist_results( 222 | st.session_state.refined_goal 223 | ) 224 | st.session_state.coscientist_result = { 225 | "final_report": final_report, 226 | "meta_review": meta_review, 227 | } 228 | cleanup_coscientist_run(st.session_state.refined_goal) 229 | st.success("🎉 Coscientist completed successfully!") 230 | st.rerun() 231 | except Exception as e: 232 | st.error(f"Error fetching results: {e}") 233 | st.session_state.coscientist_error = str(e) 234 | 235 | elif status.startswith("error:"): 236 | st.session_state.coscientist_running = False 237 | error_message = status.replace("error: ", "") 238 | st.session_state.coscientist_error = error_message 239 | cleanup_coscientist_run(st.session_state.refined_goal) 240 | st.error(f"Coscientist run failed: {error_message}") 241 | st.rerun() 242 | 243 | elif status == "running" and st.session_state.coscientist_running: 244 | st.info( 245 | "Coscientist is running. Feel free to navigate away or check back later." 246 | ) 247 | if st.button("Refresh Status"): 248 | st.rerun() 249 | 250 | # Display error if it occurred 251 | if st.session_state.coscientist_error: 252 | st.error(f"Coscientist failed: {st.session_state.coscientist_error}") 253 | 254 | # Display results if available 255 | if st.session_state.coscientist_result is not None: 256 | st.markdown("### 📊 Coscientist Results") 257 | st.json(st.session_state.coscientist_result) 258 | 259 | # Reset button to run again 260 | if st.button("🔄 Run Coscientist Again"): 261 | st.session_state.coscientist_result = None 262 | st.session_state.coscientist_running = False 263 | st.session_state.coscientist_process = None 264 | st.session_state.coscientist_error = None 265 | st.rerun() 266 | 267 | else: 268 | # Chat input 269 | user_input = st.chat_input("Type your message here...") 270 | 271 | if user_input: 272 | try: 273 | with st.spinner("Agent is thinking..."): 274 | # Add user message to history 275 | st.session_state.chat_history.append(("User", user_input)) 276 | 277 | # Get agent response 278 | agent_response = ( 279 | st.session_state.chat_manager.send_human_message(user_input) 280 | ) 281 | 282 | # Add agent response to history 283 | st.session_state.chat_history.append(("Agent", agent_response)) 284 | 285 | st.rerun() 286 | except Exception as e: 287 | st.error(f"Error sending message: {str(e)}") 288 | 289 | # Instructions when no conversation is active 290 | if not st.session_state.conversation_started: 291 | st.markdown("---") 292 | st.info( 293 | "👆 Enter your research goal above and click 'Start New Conversation' to begin." 294 | ) 295 | 296 | st.markdown(""" 297 | ## How to Use the Configuration Agent 298 | 299 | 1. **Enter your research goal** in the text area above 300 | 2. **Select a language model** that will power the configuration agent 301 | 3. **Click "Start New Conversation"** to begin the interactive refinement process 302 | 4. **Chat with the agent** to refine and improve your research goal 303 | 5. **Receive your refined goal** when the conversation is complete 304 | 6. **Launch Coscientist** with your refined goal to begin the research process 305 | 306 | ### What the Configuration Agent Does 307 | 308 | The configuration agent helps you: 309 | - **Clarify vague research questions** by asking targeted questions 310 | - **Identify key variables and parameters** relevant to your research 311 | - **Suggest specific methodological approaches** that might be appropriate 312 | - **Refine the scope** of your research to make it more focused and actionable 313 | - **Ensure your goal is well-defined** for the subsequent research agents 314 | 315 | ### Tips for Better Results 316 | 317 | - **Be specific** about your domain of interest (e.g., biology, chemistry, physics) 318 | - **Mention any constraints** or limitations you're aware of 319 | - **Indicate your level of expertise** if relevant 320 | - **Ask questions** if you need clarification on the agent's suggestions 321 | - **Iterate** - don't hesitate to refine multiple times until you're satisfied 322 | """) 323 | -------------------------------------------------------------------------------- /app/final_report_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | def display_final_report_page(state): 5 | """ 6 | Display the final report page. 7 | 8 | Parameters 9 | ---------- 10 | state : CoscientistState 11 | The loaded Coscientist state containing the final report 12 | """ 13 | st.header("📋 Final Report") 14 | 15 | # Check if we have a final report 16 | if not hasattr(state, "final_report") or not state.final_report: 17 | st.warning("No final report found in this research state.") 18 | st.markdown(""" 19 | ## Final Report Page 20 | 21 | This page displays the final research report generated when the Coscientist system completes its research: 22 | 23 | - **Comprehensive Summary**: Complete analysis of all hypotheses and findings 24 | - **Top Hypotheses**: Detailed review of the highest-ranked hypotheses 25 | - **Research Conclusions**: Final insights and recommendations 26 | - **Methodology Summary**: Overview of the research process and evaluation methods 27 | 28 | The final report is generated only when the supervisor agent decides the research process 29 | is complete and has achieved sufficient depth and quality in hypothesis exploration. 30 | """) 31 | return 32 | 33 | # Display the final report 34 | final_report_content = state.final_report.get("result", "") 35 | 36 | if final_report_content: 37 | st.markdown("### 📊 Research Summary") 38 | st.info("✅ Research process completed successfully!") 39 | 40 | # Display the final report content 41 | st.markdown(final_report_content) 42 | 43 | # Show some basic statistics if available 44 | with st.expander("📈 Research Statistics"): 45 | col1, col2 = st.columns(2) 46 | 47 | with col1: 48 | st.markdown("**Process Overview:**") 49 | if hasattr(state, "actions"): 50 | st.write(f"• Total Actions Taken: {len(state.actions)}") 51 | if hasattr(state, "supervisor_decisions"): 52 | st.write( 53 | f"• Supervisor Decisions: {len(state.supervisor_decisions)}" 54 | ) 55 | if hasattr(state, "meta_reviews"): 56 | st.write(f"• Meta-Reviews Completed: {len(state.meta_reviews)}") 57 | 58 | with col2: 59 | st.markdown("**Hypothesis Statistics:**") 60 | if hasattr(state, "tournament") and state.tournament: 61 | st.write( 62 | f"• Tournament Hypotheses: {len(state.tournament.hypotheses)}" 63 | ) 64 | # Get tournament stats if available 65 | try: 66 | tournament_stats = ( 67 | state.tournament.summarize_tournament_trajectory() 68 | ) 69 | st.write( 70 | f"• Total Matches Played: {tournament_stats.get('total_matches_played', 'N/A')}" 71 | ) 72 | st.write( 73 | f"• Max ELO Rating: {tournament_stats.get('max_elo_rating', ['N/A'])[0] if tournament_stats.get('max_elo_rating') else 'N/A'}" 74 | ) 75 | except: # noqa: E722 76 | st.write("• Tournament statistics unavailable") 77 | 78 | else: 79 | st.error("Final report exists but contains no content.") 80 | -------------------------------------------------------------------------------- /app/literature_review_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | def display_literature_review_page(state): 5 | """ 6 | Display the literature review page. 7 | 8 | Parameters 9 | ---------- 10 | state : CoscientistState 11 | The loaded Coscientist state containing literature review data 12 | """ 13 | st.header("📚 Literature Review") 14 | 15 | # Check if we have literature review data 16 | if not hasattr(state, "literature_review") or not state.literature_review: 17 | st.warning("No literature review found in this research state.") 18 | st.markdown(""" 19 | ## Literature Review Page 20 | 21 | This page displays the comprehensive literature review conducted for the research: 22 | 23 | - **Research Subtopics**: Systematic decomposition of the main research goal 24 | - **Subtopic Reports**: Detailed literature analysis for each research area 25 | - **Knowledge Foundation**: Scientific background that informs hypothesis generation 26 | - **Research Context**: Current state of knowledge in relevant fields 27 | 28 | The literature review is one of the first steps in the research process, providing 29 | the scientific foundation for generating well-informed research hypotheses. 30 | """) 31 | return 32 | 33 | # Get literature review data 34 | literature_review = state.literature_review 35 | subtopics = literature_review.get("subtopics", []) 36 | subtopic_reports = literature_review.get("subtopic_reports", []) 37 | 38 | # Verify data consistency 39 | if len(subtopics) != len(subtopic_reports): 40 | st.error( 41 | f"Data inconsistency: {len(subtopics)} subtopics but {len(subtopic_reports)} reports" 42 | ) 43 | return 44 | 45 | if not subtopics: 46 | st.warning("Literature review exists but contains no subtopics.") 47 | return 48 | 49 | # Create main layout 50 | st.markdown(f"**Research Goal:** {state.goal}") 51 | st.markdown(f"**Total Subtopics:** {len(subtopics)}") 52 | 53 | # Subtopic selection dropdown 54 | st.subheader("🔍 Select Subtopic") 55 | 56 | # Initialize session state for selected subtopic 57 | if "selected_subtopic_index" not in st.session_state: 58 | st.session_state.selected_subtopic_index = 0 59 | 60 | # Create dropdown with subtopics 61 | selected_index = st.selectbox( 62 | "Choose a research subtopic:", 63 | range(len(subtopics)), 64 | format_func=lambda x: f"{x+1}. {subtopics[x]}", 65 | index=st.session_state.selected_subtopic_index, 66 | key="subtopic_selector", 67 | ) 68 | 69 | # Update session state when selection changes 70 | if selected_index != st.session_state.selected_subtopic_index: 71 | st.session_state.selected_subtopic_index = selected_index 72 | 73 | # Display selected subtopic and report 74 | st.subheader("📖 Subtopic Report") 75 | 76 | selected_subtopic = subtopics[selected_index] 77 | selected_report = subtopic_reports[selected_index] 78 | 79 | # Show subtopic header 80 | st.markdown(f"### {selected_index + 1}. {selected_subtopic}") 81 | 82 | # Display the report content 83 | if selected_report: 84 | # Create a scrollable container for the markdown content 85 | with st.container(): 86 | st.markdown(selected_report) 87 | else: 88 | st.info("No report content available for this subtopic.") 89 | 90 | # Show navigation help and summary stats 91 | with st.expander("📊 Literature Review Summary"): 92 | col1, col2 = st.columns(2) 93 | 94 | with col1: 95 | st.markdown("**Review Statistics:**") 96 | st.write(f"• Total Subtopics: {len(subtopics)}") 97 | st.write(f"• Current Selection: #{selected_index + 1}") 98 | st.write(f"• Reports Available: {len([r for r in subtopic_reports if r])}") 99 | 100 | with col2: 101 | st.markdown("**Navigation:**") 102 | st.write("• Use the dropdown above to browse subtopics") 103 | st.write("• Each subtopic represents a focused research area") 104 | st.write("• Reports provide scientific context for hypothesis generation") 105 | 106 | # Show all subtopics as a quick reference 107 | st.markdown("**All Research Subtopics:**") 108 | for i, subtopic in enumerate(subtopics): 109 | marker = "🔹" if i == selected_index else "◦" 110 | st.write(f"{marker} {i+1}. {subtopic}") 111 | -------------------------------------------------------------------------------- /app/meta_reviews_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | def display_meta_reviews_page(state): 5 | """ 6 | Display the meta-reviews page. 7 | 8 | Parameters 9 | ---------- 10 | state : CoscientistState 11 | The loaded Coscientist state containing meta-reviews 12 | """ 13 | st.header("🔍 Meta-Reviews") 14 | 15 | # Check if we have meta-reviews 16 | if not hasattr(state, "meta_reviews") or not state.meta_reviews: 17 | st.warning("No meta-reviews found in this research state.") 18 | st.markdown(""" 19 | ## Meta-Reviews Page 20 | 21 | This page displays the meta-review analyses generated throughout the research process: 22 | 23 | - **Strategic Analysis**: Comprehensive review of hypothesis quality and research progress 24 | - **Pattern Recognition**: Identification of strengths, weaknesses, and knowledge gaps 25 | - **Research Direction**: Guidance for future hypothesis generation and evolution 26 | - **Quality Assessment**: Evaluation of tournament results and hypothesis performance 27 | 28 | Meta-reviews are generated periodically to analyze the current state of research and guide 29 | the supervisor agent's strategic decisions about what actions to take next. 30 | """) 31 | return 32 | 33 | # Get meta-reviews 34 | meta_reviews = state.meta_reviews 35 | 36 | # Create two columns: meta-reviews list and content display 37 | col1, col2 = st.columns([1, 2]) 38 | 39 | with col1: 40 | st.subheader("📚 Reviews History") 41 | st.markdown(f"**Total Meta-Reviews:** {len(meta_reviews)}") 42 | 43 | # Create a container for the scrollable meta-reviews list 44 | reviews_container = st.container() 45 | 46 | # Initialize session state for selected meta-review 47 | if "selected_meta_review_index" not in st.session_state: 48 | st.session_state.selected_meta_review_index = ( 49 | 0 # Default to latest meta-review 50 | ) 51 | 52 | with reviews_container: 53 | # Display meta-reviews in reverse order (latest first) with numbering 54 | for i, _meta_review in enumerate(reversed(meta_reviews)): 55 | review_number = len(meta_reviews) - i # Number from latest to oldest 56 | 57 | # Create a clickable button for each meta-review 58 | button_key = f"meta_review_{i}" 59 | button_label = f"Meta-Review #{review_number}" 60 | 61 | # Highlight the selected meta-review 62 | if i == st.session_state.selected_meta_review_index: 63 | st.markdown(f"**🔹 {button_label}**") 64 | else: 65 | if st.button(button_label, key=button_key): 66 | st.session_state.selected_meta_review_index = i 67 | st.rerun() 68 | 69 | with col2: 70 | st.subheader("📖 Meta-Review Content") 71 | 72 | if meta_reviews: 73 | # Get the selected meta-review (remember we're working with reversed list) 74 | selected_meta_review = list(reversed(meta_reviews))[ 75 | st.session_state.selected_meta_review_index 76 | ] 77 | review_number = ( 78 | len(meta_reviews) - st.session_state.selected_meta_review_index 79 | ) 80 | 81 | # Display the meta-review header 82 | st.markdown(f"### Meta-Review #{review_number}") 83 | 84 | # Show the meta-review content 85 | meta_review_content = selected_meta_review.get("result", "") 86 | if meta_review_content: 87 | st.markdown(meta_review_content) 88 | else: 89 | st.info("No content available for this meta-review.") 90 | 91 | # Show additional context in an expander 92 | with st.expander("📊 Meta-Review Context"): 93 | context_cols = st.columns(2) 94 | 95 | with context_cols[0]: 96 | st.markdown("**Review Information:**") 97 | st.write(f"• Review Number: {review_number} of {len(meta_reviews)}") 98 | 99 | # Show other available fields from the meta-review state 100 | if "goal" in selected_meta_review: 101 | st.write("• Research Goal Available: ✅") 102 | if "top_k" in selected_meta_review: 103 | st.write( 104 | f"• Top K Analyzed: {selected_meta_review.get('top_k', 'N/A')}" 105 | ) 106 | 107 | with context_cols[1]: 108 | st.markdown("**System State:**") 109 | # Show tournament info if available 110 | if ( 111 | "tournament" in selected_meta_review 112 | and selected_meta_review["tournament"] 113 | ): 114 | tournament = selected_meta_review["tournament"] 115 | if hasattr(tournament, "hypotheses"): 116 | st.write( 117 | f"• Hypotheses in Tournament: {len(tournament.hypotheses)}" 118 | ) 119 | try: 120 | win_loss_records = tournament.get_win_loss_records() 121 | st.write(f"• Ranked Hypotheses: {len(win_loss_records)}") 122 | except: # noqa: E722 123 | st.write("• Tournament statistics unavailable") 124 | else: 125 | st.write("• Tournament data not available") 126 | else: 127 | st.info("No meta-reviews available to display.") 128 | -------------------------------------------------------------------------------- /app/proximity_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from st_cytoscape import cytoscape 3 | 4 | 5 | def create_cytoscape_elements(graph, communities): 6 | """Convert NetworkX graph to Cytoscape elements format.""" 7 | if graph is None or len(graph.nodes()) == 0: 8 | return [], [] 9 | 10 | G = graph 11 | 12 | # Create color mapping for communities 13 | colors = [ 14 | "#FF6B6B", 15 | "#4ECDC4", 16 | "#45B7D1", 17 | "#96CEB4", 18 | "#FFEAA7", 19 | "#DDA0DD", 20 | "#98D8C8", 21 | "#FFA07A", 22 | "#B19CD9", 23 | "#FFB6C1", 24 | ] 25 | community_colors = {} 26 | for i, community in enumerate(communities): 27 | color = colors[i % len(colors)] 28 | for node_id in community: 29 | community_colors[node_id] = color 30 | 31 | # Create nodes 32 | elements = [] 33 | 34 | for node_id in G.nodes(): 35 | hypothesis_text = G.nodes[node_id].get("hypothesis", f"Hypothesis {node_id}") 36 | 37 | # Truncate for label but keep full text for tooltip 38 | label = f"H{node_id}" 39 | if len(hypothesis_text) > 80: 40 | tooltip = hypothesis_text[:80] + "..." 41 | else: 42 | tooltip = hypothesis_text 43 | 44 | elements.append( 45 | { 46 | "data": { 47 | "id": str(node_id), 48 | "label": label, 49 | "hypothesis": hypothesis_text, 50 | "tooltip": tooltip, 51 | }, 52 | "classes": f"community-{hash(community_colors.get(node_id, colors[0])) % 10}", 53 | } 54 | ) 55 | 56 | # Create edges 57 | for edge in G.edges(data=True): 58 | weight = edge[2].get("weight", 0) 59 | elements.append( 60 | { 61 | "data": { 62 | "id": f"{edge[0]}-{edge[1]}", 63 | "source": str(edge[0]), 64 | "target": str(edge[1]), 65 | "weight": weight, 66 | } 67 | } 68 | ) 69 | 70 | # Create stylesheet 71 | node_styles = [] 72 | for i in range(10): # Create styles for 10 different community classes 73 | color = colors[i % len(colors)] 74 | node_styles.append( 75 | { 76 | "selector": f".community-{i}", 77 | "style": { 78 | "background-color": color, 79 | "border-width": 2, 80 | "border-color": "#ffffff", 81 | "color": "#ffffff", 82 | "text-valign": "center", 83 | "text-halign": "center", 84 | "font-size": "12px", 85 | "font-weight": "bold", 86 | "width": 50, 87 | "height": 50, 88 | }, 89 | } 90 | ) 91 | 92 | stylesheet = [ 93 | { 94 | "selector": "node", 95 | "style": { 96 | "content": "data(label)", 97 | "text-valign": "center", 98 | "text-halign": "center", 99 | "font-size": "12px", 100 | "font-weight": "bold", 101 | "width": 50, 102 | "height": 50, 103 | "border-width": 2, 104 | "border-color": "#ffffff", 105 | }, 106 | }, 107 | { 108 | "selector": "edge", 109 | "style": { 110 | "width": 2, 111 | "line-color": "#cccccc", 112 | "opacity": 0.6, 113 | "curve-style": "bezier", 114 | }, 115 | }, 116 | { 117 | "selector": "node:selected", 118 | "style": { 119 | "border-width": 4, 120 | "border-color": "#333333", 121 | "background-color": "#333333", 122 | }, 123 | }, 124 | { 125 | "selector": "edge:selected", 126 | "style": {"line-color": "#333333", "width": 4, "opacity": 1.0}, 127 | }, 128 | ] + node_styles 129 | 130 | return elements, stylesheet 131 | 132 | 133 | def display_proximity_graph_page(state): 134 | """Display the proximity graph page.""" 135 | st.markdown( 136 | "Explore the semantic similarity between hypotheses and their communities." 137 | ) 138 | 139 | if state is None: 140 | st.info( 141 | "👈 Please select or upload a Coscientist state file from the sidebar to view the proximity graph." 142 | ) 143 | return 144 | 145 | if state.proximity_graph is None: 146 | st.warning("No proximity graph data found in this state file.") 147 | return 148 | 149 | proximity_graph = state.proximity_graph 150 | 151 | if len(proximity_graph.graph.nodes()) == 0: 152 | st.warning( 153 | "The proximity graph is empty - no hypotheses have been added to it yet." 154 | ) 155 | return 156 | 157 | # Community detection controls 158 | st.subheader("Graph Filtering & Community Detection") 159 | col1, col2 = st.columns(2) 160 | 161 | with col1: 162 | resolution = st.slider( 163 | "Resolution (higher = more communities)", 164 | min_value=0.1, 165 | max_value=2.0, 166 | value=1.0, 167 | step=0.1, 168 | help="Controls the size of communities. Higher values create more, smaller communities.", 169 | ) 170 | 171 | with col2: 172 | min_weight = st.slider( 173 | "Minimum Edge Weight", 174 | min_value=0.0, 175 | max_value=1.0, 176 | value=0.85, 177 | step=0.05, 178 | help="Only edges with similarity above this threshold will be shown in the graph.", 179 | ) 180 | 181 | # Get pruned graph based on minimum edge weight 182 | pruned_graph = proximity_graph.get_pruned_graph(min_weight) 183 | 184 | # Display updated graph statistics 185 | num_nodes = len(pruned_graph.nodes()) 186 | num_edges = len(pruned_graph.edges()) 187 | 188 | # Show warning if graph is too filtered 189 | if num_nodes == 0: 190 | st.warning( 191 | f"⚠️ No hypotheses remain after filtering with minimum edge weight {min_weight:.2f}. Try lowering the threshold." 192 | ) 193 | return 194 | elif num_edges == 0: 195 | st.warning( 196 | f"⚠️ No connections remain after filtering with minimum edge weight {min_weight:.2f}. The graph will show isolated nodes." 197 | ) 198 | 199 | # Calculate average similarity for pruned graph 200 | if num_edges > 0: 201 | edge_weights = [ 202 | data.get("weight", 0) for _, _, data in pruned_graph.edges(data=True) 203 | ] 204 | avg_similarity = sum(edge_weights) / len(edge_weights) 205 | else: 206 | avg_similarity = 0 207 | 208 | col1, col2, col3 = st.columns(3) 209 | with col1: 210 | st.metric("Hypotheses (Filtered)", num_nodes) 211 | with col2: 212 | st.metric("Connections (Filtered)", num_edges) 213 | with col3: 214 | st.metric("Avg Similarity (Filtered)", f"{avg_similarity:.3f}") 215 | 216 | # Get communities from pruned graph 217 | communities = proximity_graph.get_semantic_communities( 218 | resolution=resolution, min_weight=min_weight 219 | ) 220 | 221 | st.subheader(f"Semantic Communities ({len(communities)} found)") 222 | 223 | # Display communities 224 | if communities: 225 | for i, community in enumerate(communities): 226 | with st.expander(f"Community {i+1} ({len(community)} hypotheses)"): 227 | for node_id in community: 228 | hypothesis_text = pruned_graph.nodes[node_id].get( 229 | "hypothesis", f"Hypothesis {node_id}" 230 | ) 231 | st.markdown(f"**H{node_id}:** {hypothesis_text}") 232 | else: 233 | st.info( 234 | "No communities detected with current settings. Try lowering the minimum edge weight or adjusting the resolution parameter." 235 | ) 236 | 237 | # Create and display the visualization 238 | st.subheader("Interactive Graph Visualization") 239 | 240 | # Convert to Cytoscape format using the pruned graph 241 | elements, stylesheet = create_cytoscape_elements(pruned_graph, communities) 242 | 243 | if elements: 244 | # Layout options 245 | layout_options = { 246 | "name": "fcose", 247 | "animationDuration": 1000, 248 | "fit": True, 249 | "padding": 50, 250 | "nodeSeparation": 100, 251 | "idealEdgeLength": 100, 252 | "edgeElasticity": 0.1, 253 | "nestingFactor": 0.1, 254 | "numIter": 1000, 255 | "initialEnergyOnIncremental": 0.3, 256 | "gravityRangeCompound": 1.5, 257 | "gravityCompound": 1.0, 258 | "gravityRange": 3.8, 259 | } 260 | 261 | # Create the Cytoscape graph (key includes min_weight for reactivity) 262 | selected = cytoscape( 263 | elements=elements, 264 | stylesheet=stylesheet, 265 | layout=layout_options, 266 | selection_type="additive", 267 | width="100%", 268 | height="600px", 269 | key=f"proximity_graph_{min_weight}_{resolution}", 270 | ) 271 | 272 | # Display information about selected nodes 273 | if selected and (selected["nodes"] or selected["edges"]): 274 | st.subheader("🎯 Selected Elements") 275 | 276 | if selected["nodes"]: 277 | st.markdown("**Selected Hypotheses:**") 278 | for node_id in selected["nodes"]: 279 | # Find the corresponding element to get the full hypothesis 280 | for element in elements: 281 | if element["data"]["id"] == node_id: 282 | hypothesis_text = element["data"]["hypothesis"] 283 | st.markdown(f"**H{node_id}:** {hypothesis_text}") 284 | break 285 | 286 | if selected["edges"]: 287 | st.markdown( 288 | f"**Selected Connections:** {len(selected['edges'])} edge(s)" 289 | ) 290 | 291 | # Additional information 292 | st.info(""" 293 | **How to interact with the graph:** 294 | - **Click nodes** to select them and see full hypothesis text below 295 | - **Drag nodes** to rearrange the layout 296 | - **Zoom and pan** to explore different areas 297 | - **Different colors** represent different semantic communities 298 | - **Hold Ctrl/Cmd + click** to select multiple nodes 299 | - **Double-click empty space** to fit the graph to view 300 | - **Adjust sliders above** to dynamically filter the graph and update communities 301 | """) 302 | 303 | st.success( 304 | f"📊 **Graph Status:** Showing {num_nodes} hypotheses and {num_edges} connections with similarity ≥ {min_weight:.2f}" 305 | ) 306 | else: 307 | st.error("Could not create visualization. Please check the data.") 308 | -------------------------------------------------------------------------------- /app/resume_page.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import multiprocessing 3 | import os 4 | 5 | import streamlit as st 6 | from background import _get_done_file_path, check_coscientist_status 7 | from common import get_available_states 8 | 9 | from coscientist.framework import CoscientistConfig, CoscientistFramework 10 | from coscientist.global_state import CoscientistState, CoscientistStateManager 11 | 12 | 13 | def coscientist_resume_target(goal: str): 14 | """The target function for resuming a Coscientist process.""" 15 | try: 16 | # Load the existing state instead of creating a new one 17 | initial_state = CoscientistState.load_latest(goal=goal) 18 | if initial_state is None: 19 | raise Exception(f"No existing state found for goal: {goal}") 20 | 21 | config = CoscientistConfig() 22 | state_manager = CoscientistStateManager(initial_state) 23 | cosci = CoscientistFramework(config, state_manager) 24 | 25 | # Run the framework 26 | asyncio.run(cosci.run()) 27 | 28 | except Exception as e: 29 | # Log error to a file in the goal directory 30 | goal_hash = CoscientistState._hash_goal(goal) 31 | output_dir = os.path.join( 32 | os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")), 33 | goal_hash, 34 | ) 35 | if not os.path.exists(output_dir): 36 | os.makedirs(output_dir) 37 | with open(os.path.join(output_dir, "error.log"), "w") as f: 38 | f.write(str(e)) 39 | finally: 40 | # Create a "done" file to signal completion 41 | done_file = _get_done_file_path(goal) 42 | with open(done_file, "w") as f: 43 | f.write("done") 44 | 45 | 46 | def display_resume_page(): 47 | """Display the resume from checkpoint page.""" 48 | st.header("🔄 Resume from Checkpoint") 49 | 50 | st.markdown(""" 51 | Resume a Coscientist research process from where it left off. This page allows you to: 52 | 53 | - Select an existing research goal that has been started 54 | - Check if the research is already completed 55 | - Resume the research process from the latest checkpoint 56 | """) 57 | 58 | # Initialize session state for process tracking 59 | if "resume_process" not in st.session_state: 60 | st.session_state.resume_process = None 61 | if "resume_goal" not in st.session_state: 62 | st.session_state.resume_goal = None 63 | 64 | # Get available goals 65 | available_goals = get_available_states() 66 | 67 | if not available_goals: 68 | st.warning( 69 | "No existing research goals found. Please start a new research goal first." 70 | ) 71 | return 72 | 73 | # Goal selection 74 | st.subheader("📋 Select Research Goal") 75 | selected_goal = st.selectbox( 76 | "Choose a research goal to resume:", 77 | options=available_goals, 78 | format_func=lambda x: x[:100] + "..." if len(x) > 100 else x, 79 | help="Select an existing research goal to resume from its latest checkpoint", 80 | ) 81 | 82 | # Check status and display information 83 | if selected_goal: 84 | col1, col2 = st.columns([2, 1]) 85 | 86 | with col1: 87 | st.subheader("📊 Goal Status") 88 | 89 | try: 90 | # Load the latest state to check if finished 91 | state = CoscientistState.load_latest(goal=selected_goal) 92 | if state is None: 93 | st.error("❌ No state found for this goal. Cannot resume.") 94 | return 95 | 96 | # Create state manager to check if finished 97 | state_manager = CoscientistStateManager(state) 98 | is_finished = state_manager.is_finished 99 | 100 | # Check running status 101 | status = check_coscientist_status(selected_goal) 102 | 103 | if is_finished: 104 | st.success("✅ This research goal has already been completed!") 105 | st.info( 106 | "The research process for this goal has finished. You can view the results in the Tournament Rankings or Proximity Graph pages." 107 | ) 108 | elif status == "running": 109 | st.warning("⏳ This goal is currently running in another process.") 110 | st.info( 111 | "Please wait for the current process to finish before resuming." 112 | ) 113 | elif status.startswith("error"): 114 | st.error(f"❌ Previous run ended with error: {status[7:]}") 115 | st.info( 116 | "You can try resuming to continue from the last successful checkpoint." 117 | ) 118 | else: 119 | st.info("🔄 This goal can be resumed.") 120 | st.success("Ready to resume from the latest checkpoint.") 121 | 122 | # Display some basic state information 123 | with st.expander("📈 Current State Information"): 124 | st.write(f"**Goal:** {selected_goal}") 125 | st.write(f"**Finished:** {'Yes ✅' if is_finished else 'No ❌'}") 126 | if hasattr(state, "hypotheses") and state.hypotheses: 127 | st.write(f"**Number of Hypotheses:** {len(state.hypotheses)}") 128 | if ( 129 | hasattr(state, "tournament_results") 130 | and state.tournament_results 131 | ): 132 | st.write( 133 | f"**Tournament Matches:** {len(state.tournament_results)}" 134 | ) 135 | 136 | except Exception as e: 137 | st.error(f"❌ Error checking goal status: {str(e)}") 138 | return 139 | 140 | with col2: 141 | st.subheader("🚀 Resume Action") 142 | 143 | # Resume button 144 | can_resume = ( 145 | state is not None 146 | and not is_finished 147 | and status != "running" 148 | and ( 149 | st.session_state.resume_process is None 150 | or not st.session_state.resume_process.is_alive() 151 | ) 152 | ) 153 | 154 | if st.button( 155 | "🔄 Resume Research", 156 | disabled=not can_resume, 157 | help="Resume the research process from the latest checkpoint" 158 | if can_resume 159 | else "Cannot resume: check the status information", 160 | ): 161 | try: 162 | # Start the resume process 163 | st.session_state.resume_process = multiprocessing.Process( 164 | target=coscientist_resume_target, args=(selected_goal,) 165 | ) 166 | st.session_state.resume_process.start() 167 | st.session_state.resume_goal = selected_goal 168 | st.success(f"🚀 Resumed research for: {selected_goal[:50]}...") 169 | st.info( 170 | "The research process is now running in the background. You can check the status below or refresh the page to see updates." 171 | ) 172 | 173 | except Exception as e: 174 | st.error(f"❌ Failed to resume research: {str(e)}") 175 | 176 | # Display running process status 177 | if st.session_state.resume_process is not None and st.session_state.resume_goal: 178 | st.subheader("🔄 Resume Process Status") 179 | 180 | # Check if process is still running 181 | if st.session_state.resume_process.is_alive(): 182 | st.info( 183 | f"⏳ Research is currently running for: {st.session_state.resume_goal[:50]}..." 184 | ) 185 | 186 | # Add a refresh button 187 | if st.button("🔄 Refresh Status"): 188 | st.rerun() 189 | 190 | else: 191 | # Process has finished 192 | status = check_coscientist_status(st.session_state.resume_goal) 193 | if status == "done": 194 | st.success( 195 | f"✅ Research completed successfully for: {st.session_state.resume_goal[:50]}..." 196 | ) 197 | elif status.startswith("error"): 198 | st.error(f"❌ Research ended with error: {status[7:]}") 199 | 200 | # Clear the process from session state 201 | st.session_state.resume_process = None 202 | st.session_state.resume_goal = None 203 | 204 | # Tips section 205 | with st.expander("💡 Tips for Resuming Research"): 206 | st.markdown(""" 207 | **Before resuming:** 208 | - Make sure the research goal is not already completed 209 | - Check that no other process is currently running for this goal 210 | - Review the current state information to understand progress 211 | 212 | **During resume:** 213 | - The process runs in the background - you can navigate to other pages 214 | - Use the refresh button to check status updates 215 | - Check the Tournament Rankings page to see new results as they appear 216 | 217 | **After completion:** 218 | - View results in the Tournament Rankings page 219 | - Explore hypothesis relationships in the Proximity Graph page 220 | - Results are automatically saved and can be viewed later 221 | """) 222 | -------------------------------------------------------------------------------- /app/supervisor_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | def display_supervisor_page(state): 5 | """ 6 | Display the supervisor decisions page. 7 | 8 | Parameters 9 | ---------- 10 | state : CoscientistState 11 | The loaded Coscientist state containing supervisor decisions and actions 12 | """ 13 | st.header("🎯 Supervisor Decisions") 14 | 15 | # Check if we have supervisor decisions 16 | if not hasattr(state, "supervisor_decisions") or not state.supervisor_decisions: 17 | st.warning("No supervisor decisions found in this research state.") 18 | st.markdown(""" 19 | ## Supervisor Decisions Page 20 | 21 | This page displays the decision-making process of the supervisor agent: 22 | 23 | - **Actions Taken**: See all actions decided by the supervisor in chronological order 24 | - **Decision Reasoning**: View the detailed reasoning behind each decision 25 | - **Strategic Context**: Understand the system state that influenced each decision 26 | 27 | The supervisor agent analyzes the research progress and decides what actions to take next, 28 | such as generating hypotheses, running tournaments, or finishing the research. 29 | """) 30 | return 31 | 32 | # Get supervisor decisions and actions 33 | supervisor_decisions = state.supervisor_decisions 34 | actions = state.actions 35 | 36 | # Verify they are correlated 37 | if len(supervisor_decisions) != len(actions): 38 | st.error( 39 | f"Mismatch between supervisor decisions ({len(supervisor_decisions)}) and actions ({len(actions)})" 40 | ) 41 | return 42 | 43 | # Create two columns: actions list and reasoning display 44 | col1, col2 = st.columns([1, 2]) 45 | 46 | with col1: 47 | st.subheader("📋 Actions History") 48 | st.markdown(f"**Total Actions:** {len(actions)}") 49 | 50 | # Create a container for the scrollable actions list 51 | actions_container = st.container() 52 | 53 | # Initialize session state for selected action 54 | if "selected_action_index" not in st.session_state: 55 | st.session_state.selected_action_index = 0 # Default to latest action 56 | 57 | with actions_container: 58 | # Display actions in reverse order (latest first) with numbering 59 | for i, (action, _decision) in enumerate( 60 | zip(reversed(actions), reversed(supervisor_decisions)) 61 | ): 62 | action_number = len(actions) - i # Number from latest to oldest 63 | 64 | # Create a clickable button for each action 65 | button_key = f"action_{i}" 66 | button_label = f"#{action_number}: {action}" 67 | 68 | # Highlight the selected action 69 | if i == st.session_state.selected_action_index: 70 | st.markdown(f"**🔹 {button_label}**") 71 | else: 72 | if st.button(button_label, key=button_key): 73 | st.session_state.selected_action_index = i 74 | st.rerun() 75 | 76 | with col2: 77 | st.subheader("💭 Decision Reasoning") 78 | 79 | if supervisor_decisions: 80 | # Get the selected decision (remember we're working with reversed lists) 81 | selected_decision = list(reversed(supervisor_decisions))[ 82 | st.session_state.selected_action_index 83 | ] 84 | selected_action = list(reversed(actions))[ 85 | st.session_state.selected_action_index 86 | ] 87 | action_number = len(actions) - st.session_state.selected_action_index 88 | 89 | # Display the action and reasoning 90 | st.markdown(f"### Action #{action_number}: `{selected_action}`") 91 | 92 | # Show the reasoning 93 | if ( 94 | "decision_reasoning" in selected_decision 95 | and selected_decision["decision_reasoning"] 96 | ): 97 | st.markdown("**Reasoning:**") 98 | st.markdown(selected_decision["decision_reasoning"]) 99 | else: 100 | st.info("No detailed reasoning available for this action.") 101 | 102 | # Show additional context in an expander 103 | with st.expander("📊 System Context at Decision Time"): 104 | context_cols = st.columns(2) 105 | 106 | with context_cols[0]: 107 | st.markdown("**Research Metrics:**") 108 | st.write( 109 | f"• Total Hypotheses: {selected_decision.get('total_hypotheses', 'N/A')}" 110 | ) 111 | st.write( 112 | f"• Unranked Hypotheses: {selected_decision.get('num_unranked_hypotheses', 'N/A')}" 113 | ) 114 | st.write( 115 | f"• Meta-Reviews: {selected_decision.get('num_meta_reviews', 'N/A')}" 116 | ) 117 | st.write( 118 | f"• Literature Subtopics: {selected_decision.get('literature_review_subtopics_completed', 'N/A')}" 119 | ) 120 | 121 | with context_cols[1]: 122 | st.markdown("**Tournament Metrics:**") 123 | st.write( 124 | f"• Total Matches: {selected_decision.get('total_matches_played', 'N/A')}" 125 | ) 126 | st.write( 127 | f"• Tournament Rounds: {selected_decision.get('total_rounds_played', 'N/A')}" 128 | ) 129 | st.write( 130 | f"• New Hypotheses Since Meta-Review: {selected_decision.get('new_hypotheses_since_meta_review', 'N/A')}" 131 | ) 132 | 133 | # Show recent actions context 134 | if ( 135 | "latest_actions" in selected_decision 136 | and selected_decision["latest_actions"] 137 | ): 138 | st.markdown("**Recent Actions Context:**") 139 | st.text(selected_decision["latest_actions"]) 140 | else: 141 | st.info("No supervisor decisions available to display.") 142 | -------------------------------------------------------------------------------- /app/tournament_page.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import streamlit as st 3 | 4 | from coscientist.custom_types import ReviewedHypothesis 5 | 6 | 7 | def format_hypothesis_summary(hypothesis: ReviewedHypothesis, elo_rating: float) -> str: 8 | """Format a brief summary of the hypothesis for the list view.""" 9 | # Truncate hypothesis to first sentence or 150 characters 10 | hypothesis_text = hypothesis.hypothesis 11 | if len(hypothesis_text) > 150: 12 | hypothesis_text = hypothesis_text[:150] + "..." 13 | elif "." in hypothesis_text: 14 | first_sentence = hypothesis_text.split(".")[0] + "." 15 | if len(first_sentence) < len(hypothesis_text): 16 | hypothesis_text = first_sentence 17 | 18 | return f"**ELO: {elo_rating:.1f}** | {hypothesis_text}" 19 | 20 | 21 | def display_hypothesis_details( 22 | hypothesis: ReviewedHypothesis, 23 | elo_rating: float, 24 | win_loss_record: dict, 25 | available_uids: list[str], 26 | ): 27 | """Display detailed information about a hypothesis.""" 28 | 29 | col1, col2 = st.columns([2, 1]) 30 | 31 | with col1: 32 | st.markdown(f"### Hypothesis {hypothesis.uid}") 33 | st.markdown(f"**Full Hypothesis:** {hypothesis.hypothesis}") 34 | 35 | if hypothesis.parent_uid: 36 | # Check if parent hypothesis exists in available hypotheses 37 | if hypothesis.parent_uid in available_uids: 38 | if st.button( 39 | f"🔗 **Evolved from:** {hypothesis.parent_uid}", 40 | key=f"parent_link_{hypothesis.uid}", 41 | ): 42 | st.session_state.selected_hypothesis = hypothesis.parent_uid 43 | st.rerun() 44 | else: 45 | st.info(f"🔗 **Evolved from:** {hypothesis.parent_uid} (not available)") 46 | 47 | with col2: 48 | st.metric("ELO Rating", f"{elo_rating:.1f}") 49 | col2_1, col2_2 = st.columns(2) 50 | with col2_1: 51 | st.metric("Wins", win_loss_record.get("wins", 0)) 52 | with col2_2: 53 | st.metric("Losses", win_loss_record.get("losses", 0)) 54 | 55 | # Detailed sections in tabs 56 | tab1, tab2, tab3, tab4 = st.tabs( 57 | ["🔬 Predictions", "🧠 Reasoning", "📚 Verification", "🏛️ Assumptions"] 58 | ) 59 | 60 | with tab1: 61 | st.markdown("**Testable Predictions:**") 62 | for i, prediction in enumerate(hypothesis.predictions, 1): 63 | st.markdown(f"{i}. {prediction}") 64 | 65 | with tab2: 66 | st.markdown("**Causal Reasoning:**") 67 | st.markdown(hypothesis.causal_reasoning) 68 | 69 | with tab3: 70 | st.markdown("**Deep Verification Result:**") 71 | st.markdown(hypothesis.verification_result) 72 | 73 | with tab4: 74 | st.markdown("**Core Assumptions:**") 75 | for i, assumption in enumerate(hypothesis.assumptions, 1): 76 | st.markdown(f"{i}. {assumption}") 77 | 78 | if hypothesis.assumption_research_results: 79 | st.markdown("**Research on Assumptions:**") 80 | for assumption, research in hypothesis.assumption_research_results.items(): 81 | with st.expander(f"Research: {assumption[:100]}..."): 82 | st.markdown(research) 83 | 84 | 85 | def display_match_history(tournament, hypothesis_uid: str): 86 | """Display match history for a specific hypothesis.""" 87 | matches = [] 88 | 89 | for match_key, match_result in tournament.match_history.items(): 90 | if hypothesis_uid in [match_result.uid1, match_result.uid2]: 91 | opponent_uid = ( 92 | match_result.uid2 93 | if match_result.uid1 == hypothesis_uid 94 | else match_result.uid1 95 | ) 96 | won = ( 97 | match_result.uid1 == hypothesis_uid and match_result.winner == 1 98 | ) or (match_result.uid2 == hypothesis_uid and match_result.winner == 2) 99 | 100 | stage = "Round Robin" if match_key[2] == 1 else "Bracket" 101 | 102 | matches.append( 103 | { 104 | "Stage": stage, 105 | "Opponent": opponent_uid, 106 | "Result": "Win" if won else "Loss", 107 | "Debate": match_result.debate, 108 | } 109 | ) 110 | 111 | if matches: 112 | st.markdown("### 🥊 Match History") 113 | for i, match in enumerate(matches): 114 | result_emoji = "🏆" if match["Result"] == "Win" else "❌" 115 | with st.expander( 116 | f"{result_emoji} {match['Stage']} vs {match['Opponent']} - {match['Result']}" 117 | ): 118 | st.markdown("**Debate Transcript:**") 119 | st.markdown(match["Debate"]) 120 | else: 121 | st.info("No matches found for this hypothesis.") 122 | 123 | 124 | def display_tournament_page(state): 125 | """Display the tournament rankings page.""" 126 | st.markdown( 127 | "Explore hypotheses ranked by ELO rating with detailed information and match history." 128 | ) 129 | 130 | if state is None: 131 | return 132 | 133 | # Display basic info 134 | st.markdown(f"**Research Goal:** {state.goal}") 135 | 136 | if state.tournament is None: 137 | st.warning("No tournament data found in this state file.") 138 | return 139 | 140 | tournament = state.tournament 141 | sorted_hypotheses = tournament.get_sorted_hypotheses() 142 | win_loss_records = tournament.get_win_loss_records() 143 | 144 | if not sorted_hypotheses: 145 | st.warning("No hypotheses found in the tournament.") 146 | return 147 | 148 | st.markdown( 149 | f"**Total Hypotheses:** {len(sorted_hypotheses)} | **Total Matches:** {len(tournament.match_history)}" 150 | ) 151 | 152 | # Tournament Rankings 153 | st.header("🏆 Tournament Rankings") 154 | 155 | # Create a summary table 156 | df_data = [] 157 | for uid, elo_rating in sorted_hypotheses: 158 | hypothesis = tournament.hypotheses[uid] 159 | record = win_loss_records.get(uid, {"wins": 0, "losses": 0}) 160 | df_data.append( 161 | { 162 | "Rank": len(df_data) + 1, 163 | "UID": uid, 164 | "ELO": f"{elo_rating:.1f}", 165 | "W-L": f"{record['wins']}-{record['losses']}", 166 | "Hypothesis": hypothesis.hypothesis[:100] + "..." 167 | if len(hypothesis.hypothesis) > 100 168 | else hypothesis.hypothesis, 169 | } 170 | ) 171 | 172 | df = pd.DataFrame(df_data) 173 | st.dataframe(df, use_container_width=True, hide_index=True) 174 | 175 | # Detailed view 176 | st.header("📊 Detailed Hypothesis View") 177 | 178 | # Initialize session state for selected hypothesis if not exists 179 | available_uids = [uid for uid, _ in sorted_hypotheses] 180 | if "selected_hypothesis" not in st.session_state: 181 | st.session_state.selected_hypothesis = ( 182 | available_uids[0] if available_uids else None 183 | ) 184 | 185 | # Ensure the selected hypothesis is still valid (in case state file changed) 186 | if st.session_state.selected_hypothesis not in available_uids: 187 | st.session_state.selected_hypothesis = ( 188 | available_uids[0] if available_uids else None 189 | ) 190 | 191 | # Let user select which hypothesis to view in detail 192 | selected_uid = st.selectbox( 193 | "Select a hypothesis for detailed view:", 194 | options=available_uids, 195 | format_func=lambda uid: f"{uid} (ELO: {dict(sorted_hypotheses)[uid]:.1f})", 196 | index=available_uids.index(st.session_state.selected_hypothesis) 197 | if st.session_state.selected_hypothesis in available_uids 198 | else 0, 199 | key="hypothesis_selector", 200 | ) 201 | 202 | # Update session state when selectbox changes 203 | if selected_uid != st.session_state.selected_hypothesis: 204 | st.session_state.selected_hypothesis = selected_uid 205 | 206 | if selected_uid: 207 | hypothesis = tournament.hypotheses[selected_uid] 208 | elo_rating = dict(sorted_hypotheses)[selected_uid] 209 | win_loss_record = win_loss_records.get(selected_uid, {"wins": 0, "losses": 0}) 210 | 211 | # Display detailed information 212 | display_hypothesis_details( 213 | hypothesis, elo_rating, win_loss_record, available_uids 214 | ) 215 | 216 | # Display match history 217 | display_match_history(tournament, selected_uid) 218 | -------------------------------------------------------------------------------- /app/tournament_viewer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import streamlit as st 4 | 5 | # Import from modular pages 6 | from common import ( 7 | get_available_states, 8 | load_coscientist_state, 9 | load_coscientist_state_by_goal, 10 | ) 11 | from configuration_page import display_configuration_page 12 | from final_report_page import display_final_report_page 13 | from literature_review_page import display_literature_review_page 14 | from meta_reviews_page import display_meta_reviews_page 15 | from proximity_page import display_proximity_graph_page 16 | from resume_page import display_resume_page 17 | from supervisor_page import display_supervisor_page 18 | from tournament_page import display_tournament_page 19 | 20 | st.set_page_config(page_title="Coscientist Viewer", page_icon="🧪", layout="wide") 21 | 22 | # Sidebar navigation 23 | st.sidebar.title("🧪 Coscientist Viewer") 24 | page = st.sidebar.selectbox( 25 | "Select Page", 26 | [ 27 | "Configuration Agent", 28 | "Literature Review", 29 | "Tournament Rankings", 30 | "Proximity Graph", 31 | "Meta-Reviews", 32 | "Supervisor Decisions", 33 | "Final Report", 34 | "Resume from Checkpoint", 35 | ], 36 | ) 37 | 38 | 39 | def main(): 40 | st.title("🧪 Coscientist Viewer") 41 | 42 | # Initialize session state for file selection 43 | if "current_file" not in st.session_state: 44 | st.session_state.current_file = None 45 | if "file_list_updated" not in st.session_state: 46 | st.session_state.file_list_updated = False 47 | 48 | # Variables for file handling 49 | selected_file = None 50 | temp_path = None 51 | state = None 52 | 53 | # Sidebar for file selection (only for pages that need state files) 54 | if page in [ 55 | "Literature Review", 56 | "Tournament Rankings", 57 | "Proximity Graph", 58 | "Meta-Reviews", 59 | "Supervisor Decisions", 60 | "Final Report", 61 | ]: 62 | with st.sidebar: 63 | st.header("📁 Select Research Goal") 64 | 65 | # Update button 66 | col1, col2 = st.columns([2, 1]) 67 | with col1: 68 | st.markdown("**Available Goals:**") 69 | with col2: 70 | if st.button("Update", help="Refresh file list and load latest"): 71 | st.session_state.file_list_updated = True 72 | st.rerun() 73 | 74 | # Get available state files 75 | available_states = get_available_states() 76 | 77 | # Auto-select most recent file if update was clicked or no file is selected 78 | if ( 79 | st.session_state.file_list_updated 80 | or st.session_state.current_file is None 81 | ): 82 | if available_states: 83 | st.session_state.current_file = available_states[ 84 | 0 85 | ] # Most recent file 86 | if st.session_state.file_list_updated: 87 | st.success( 88 | f"📁 Updated! Latest goal: {available_states[0][:50]}{'...' if len(available_states[0]) > 50 else ''}" 89 | ) 90 | st.session_state.file_list_updated = False 91 | 92 | if available_states: 93 | # Find index of current file in the list (in case files changed) 94 | current_index = 0 95 | if st.session_state.current_file in available_states: 96 | current_index = available_states.index( 97 | st.session_state.current_file 98 | ) 99 | 100 | selected_file = st.selectbox( 101 | "Choose a research goal:", 102 | options=available_states, 103 | format_func=lambda x: x, # Display the goal text directly 104 | index=current_index, 105 | key="file_selector", 106 | ) 107 | 108 | # Update session state when selection changes 109 | if selected_file != st.session_state.current_file: 110 | st.session_state.current_file = selected_file 111 | else: 112 | st.warning("No Coscientist research goals found.") 113 | selected_file = None 114 | st.session_state.current_file = None 115 | 116 | # File upload option 117 | st.markdown("**Or upload a file:**") 118 | uploaded_file = st.file_uploader("Upload .pkl file", type="pkl") 119 | 120 | if uploaded_file is not None: 121 | # Save uploaded file temporarily 122 | temp_path = f"temp_{uploaded_file.name}" 123 | with open(temp_path, "wb") as f: 124 | f.write(uploaded_file.getbuffer()) 125 | selected_file = temp_path 126 | 127 | # Load state if goal is selected 128 | if selected_file: 129 | # If it's a temp file (uploaded), use the original function 130 | if selected_file.startswith("temp_"): 131 | state = load_coscientist_state(selected_file) 132 | else: 133 | # It's a goal text, use the new function 134 | state = load_coscientist_state_by_goal(selected_file) 135 | 136 | # Display appropriate page based on navigation 137 | if page == "Configuration Agent": 138 | display_configuration_page() 139 | elif page == "Literature Review": 140 | if state is None: 141 | st.info( 142 | "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started." 143 | ) 144 | st.markdown(""" 145 | ## Literature Review Page 146 | 147 | View the comprehensive literature review that forms the foundation of the research: 148 | 149 | 1. **Research Subtopics** - see how the main research goal was systematically decomposed 150 | 2. **Subtopic Reports** - select any subtopic to view its detailed literature analysis 151 | 3. **Knowledge Foundation** - understand the scientific background informing hypothesis generation 152 | 153 | **What you'll see:** 154 | - **Dropdown Navigation**: Select from numbered subtopics to explore different research areas 155 | - **Detailed Reports**: Comprehensive literature analysis for each subtopic 156 | - **Research Context**: Scientific foundation that guides hypothesis generation 157 | - **Summary Statistics**: Overview of subtopics covered and reports available 158 | 159 | The literature review is one of the first steps in the research process, providing 160 | the scientific foundation for generating well-informed, evidence-based research hypotheses. 161 | """) 162 | else: 163 | display_literature_review_page(state) 164 | elif page == "Tournament Rankings": 165 | if state is None: 166 | st.info( 167 | "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started." 168 | ) 169 | st.markdown(""" 170 | ## Tournament Rankings Page 171 | 172 | View and explore hypotheses ranked by ELO rating: 173 | 174 | 1. **Browse tournament rankings** - see all hypotheses ranked by ELO rating 175 | 2. **Select a hypothesis** for detailed view to see: 176 | - Full hypothesis text and predictions 177 | - Causal reasoning and verification results 178 | - Assumptions and supporting research 179 | - Complete match history with debate transcripts 180 | 181 | **What you'll see:** 182 | - **ELO Ratings**: Higher scores indicate stronger performance in head-to-head comparisons 183 | - **Win-Loss Records**: Track record against other hypotheses 184 | - **Match History**: Full debate transcripts showing why one hypothesis beat another 185 | - **Hypothesis Lineage**: See which hypotheses evolved from others 186 | """) 187 | else: 188 | display_tournament_page(state) 189 | elif page == "Proximity Graph": 190 | if state is None: 191 | st.info( 192 | "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started." 193 | ) 194 | st.markdown(""" 195 | ## Proximity Graph Page 196 | 197 | Explore the semantic relationships between hypotheses using advanced network visualization: 198 | 199 | 1. **Interactive Cytoscape.js graph** with hypotheses as nodes and similarities as edges 200 | 2. **Community detection** to find groups of semantically similar hypotheses 201 | 3. **Click nodes** to select them and see full hypothesis text 202 | 4. **Drag and rearrange** nodes to explore relationships 203 | 5. **Adjust parameters** to control community detection sensitivity 204 | 205 | **What you'll see:** 206 | - **Node colors**: Different colors represent different semantic communities 207 | - **Interactive layout**: Force-directed positioning based on similarity 208 | - **Edges**: Connections show cosine similarity between hypothesis embeddings 209 | - **Statistics**: Number of hypotheses, connections, and average similarity 210 | - **Selection feedback**: Click nodes to see their full hypothesis text below the graph 211 | 212 | **Advanced Features:** 213 | - Multi-node selection with Ctrl/Cmd + click 214 | - Smooth animations and transitions 215 | - Professional network graph layout algorithms 216 | - Real-time interaction feedback 217 | """) 218 | else: 219 | display_proximity_graph_page(state) 220 | elif page == "Meta-Reviews": 221 | if state is None: 222 | st.info( 223 | "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started." 224 | ) 225 | st.markdown(""" 226 | ## Meta-Reviews Page 227 | 228 | View the strategic analysis and review process of the research: 229 | 230 | 1. **Reviews Timeline** - see all meta-reviews generated during the research process 231 | 2. **Strategic Analysis** - click on any meta-review to see the full analysis 232 | 3. **Research Guidance** - understand how each review guided future research directions 233 | 234 | **What you'll see:** 235 | - **Numbered Reviews**: Latest meta-reviews appear first with sequential numbering 236 | - **Strategic Analysis**: Full text of the meta-review analysis and insights 237 | - **Research Context**: Tournament state and hypothesis counts at review time 238 | - **Quality Assessment**: Evaluation of hypothesis performance and research progress 239 | 240 | Meta-reviews are generated periodically to analyze the current state of research, 241 | identify patterns and gaps, and guide the supervisor agent's strategic decisions. 242 | """) 243 | else: 244 | display_meta_reviews_page(state) 245 | elif page == "Supervisor Decisions": 246 | if state is None: 247 | st.info( 248 | "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started." 249 | ) 250 | st.markdown(""" 251 | ## Supervisor Decisions Page 252 | 253 | View the decision-making process of the supervisor agent: 254 | 255 | 1. **Actions Timeline** - see all actions taken by the supervisor in chronological order 256 | 2. **Decision Reasoning** - click on any action to see the detailed reasoning behind it 257 | 3. **System Context** - understand the research state that influenced each decision 258 | 259 | **What you'll see:** 260 | - **Numbered Actions**: Latest actions appear first with sequential numbering 261 | - **Decision Reasoning**: Full text of the supervisor's strategic thinking 262 | - **System Metrics**: Research state, hypothesis counts, tournament progress at decision time 263 | - **Recent Context**: What other actions were taken recently that influenced the decision 264 | 265 | The supervisor agent analyzes the research progress and decides what actions to take next, 266 | such as generating new hypotheses, evolving existing ones, running tournaments, or finishing the research. 267 | """) 268 | else: 269 | display_supervisor_page(state) 270 | elif page == "Final Report": 271 | if state is None: 272 | st.info( 273 | "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started." 274 | ) 275 | st.markdown(""" 276 | ## Final Report Page 277 | 278 | View the comprehensive final research report generated upon completion: 279 | 280 | 1. **Complete Analysis** - comprehensive summary of all research findings 281 | 2. **Top Hypotheses** - detailed review of the highest-ranked hypotheses 282 | 3. **Research Conclusions** - final insights and recommendations 283 | 4. **Process Summary** - overview of the research methodology and evaluation 284 | 285 | **What you'll see:** 286 | - **Final Report**: Complete research summary and conclusions 287 | - **Research Statistics**: Overview of hypotheses generated, tournaments run, and key metrics 288 | - **Process Completion**: Confirmation that the research process finished successfully 289 | 290 | The final report is generated only when the supervisor agent determines that the research 291 | has achieved sufficient depth and quality, and further investigation would yield diminishing returns. 292 | """) 293 | else: 294 | display_final_report_page(state) 295 | elif page == "Resume from Checkpoint": 296 | display_resume_page() 297 | 298 | # Clean up temp file if it was uploaded 299 | if temp_path and os.path.exists(temp_path): 300 | os.remove(temp_path) 301 | 302 | 303 | if __name__ == "__main__": 304 | main() 305 | -------------------------------------------------------------------------------- /app/viewer_requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit>=1.46.1 2 | st-cytoscape>=0.0.5 3 | langchain-openai>=0.1.0 4 | langchain-anthropic>=0.1.0 5 | langchain-google-genai>=1.0.0 6 | langchain-core>=0.2.0 7 | langgraph>=0.1.0 -------------------------------------------------------------------------------- /assets/agent_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conradry/open-coscientist-agents/a20b018300da57a26578f8e7442b890193950afa/assets/agent_graph.png -------------------------------------------------------------------------------- /assets/app_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conradry/open-coscientist-agents/a20b018300da57a26578f8e7442b890193950afa/assets/app_demo.gif -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conradry/open-coscientist-agents/a20b018300da57a26578f8e7442b890193950afa/assets/overview.png -------------------------------------------------------------------------------- /coscientist/__init__.py: -------------------------------------------------------------------------------- 1 | """Open CoScientist Agents - Multi-agent system for AI co-scientist research.""" 2 | 3 | __version__ = "0.0.1" 4 | 5 | from coscientist.framework import CoscientistConfig, CoscientistFramework 6 | from coscientist.global_state import CoscientistState, CoscientistStateManager 7 | -------------------------------------------------------------------------------- /coscientist/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from jinja2 import Environment, FileSystemLoader, select_autoescape 5 | 6 | from coscientist.custom_types import ParsedHypothesis 7 | 8 | _env = Environment( 9 | loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), "prompts")), 10 | autoescape=select_autoescape(), 11 | trim_blocks=True, 12 | lstrip_blocks=True, 13 | ) 14 | 15 | 16 | def load_prompt(name: str, **kwargs) -> str: 17 | """ 18 | Load a template from the prompts directory and renders 19 | it with the given kwargs. 20 | 21 | Parameters 22 | ---------- 23 | name: str 24 | The name of the template to load, without the .md extension. 25 | **kwargs: dict 26 | The kwargs to render the template with. 27 | 28 | Returns 29 | ------- 30 | str 31 | The rendered template. 32 | """ 33 | return _env.get_template(f"{name}.md").render(**kwargs) 34 | 35 | 36 | def parse_hypothesis_markdown(markdown_text: str) -> ParsedHypothesis: 37 | """ 38 | Parse markdown text with # headings to extract Hypothesis, Reasoning, and Assumptions sections. 39 | 40 | Parameters 41 | ---------- 42 | markdown_text : str 43 | Markdown text containing sections with # headings for Hypothesis, Reasoning, and Assumptions 44 | 45 | Returns 46 | ------- 47 | ParsedHypothesis 48 | Structured output with hypothesis, reasoning, and assumptions fields extracted from markdown 49 | """ 50 | if "#FINAL REPORT#" in markdown_text: 51 | markdown_text = markdown_text.split("#FINAL REPORT#")[1] 52 | 53 | # Split the text by # to get sections 54 | sections = markdown_text.split("#") 55 | 56 | # Initialize fields 57 | hypothesis = "" 58 | predictions = [] 59 | assumptions = [] 60 | 61 | # Process each section 62 | for section in sections: 63 | section = section.strip() 64 | if not section: 65 | continue 66 | 67 | # Split section into title and content 68 | lines = section.split("\n", 1) 69 | if len(lines) < 2: 70 | continue 71 | 72 | title = lines[0].strip().lower() 73 | content = lines[1].strip() 74 | 75 | # Match section titles (case-insensitive) 76 | if "hypothesis" in title: 77 | hypothesis = content 78 | elif "prediction" in title: 79 | predictions = _parse_numbered_list(content) 80 | elif "assumption" in title: 81 | assumptions = _parse_numbered_list(content) 82 | 83 | assert hypothesis, f"Hypothesis section is required: {markdown_text}" 84 | assert predictions, f"Predictions section is required: {markdown_text}" 85 | assert assumptions, f"Assumptions section is required: {markdown_text}" 86 | 87 | return ParsedHypothesis( 88 | hypothesis=hypothesis, predictions=predictions, assumptions=assumptions 89 | ) 90 | 91 | 92 | def _parse_numbered_list(content: str) -> list[str]: 93 | """ 94 | Parse a numbered list from text content into a list of strings. 95 | 96 | Parameters 97 | ---------- 98 | content : str 99 | Text containing a numbered list (e.g., "1. First item\n2. Second item") 100 | 101 | Returns 102 | ------- 103 | list[str] 104 | List of individual items with numbering removed 105 | """ 106 | if not content.strip(): 107 | return [] 108 | 109 | lines = content.split("\n") 110 | items = [] 111 | 112 | # Regex to match various numbering formats: 1., 1), 1-, etc. 113 | number_pattern = re.compile(r"^\s*\d+[\.\)\-]\s*(.+)", re.MULTILINE) 114 | 115 | current_item = "" 116 | 117 | for line in lines: 118 | line = line.strip() 119 | if not line: 120 | continue 121 | 122 | # Check if line starts with a number 123 | match = number_pattern.match(line) 124 | if match: 125 | # If we have a current item, add it to the list 126 | if current_item: 127 | items.append(current_item.strip()) 128 | # Start new item 129 | current_item = match.group(1) 130 | else: 131 | # This line is a continuation of the current item 132 | if current_item: 133 | current_item += " " + line 134 | else: 135 | # Handle case where first line doesn't start with a number 136 | current_item = line 137 | 138 | # Add the last item 139 | if current_item: 140 | items.append(current_item.strip()) 141 | 142 | return items 143 | -------------------------------------------------------------------------------- /coscientist/configuration_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration 3 | ------------- 4 | - Takes a user prompt and create a configuration for the research plan to be 5 | executed by the Supervisor through an interactive conversation. 6 | """ 7 | 8 | import uuid 9 | from typing import Sequence, TypedDict 10 | 11 | from langchain_core.language_models.chat_models import BaseChatModel 12 | from langchain_core.messages import ( 13 | BaseMessage, 14 | HumanMessage, 15 | ) 16 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 17 | from langgraph.checkpoint.memory import InMemorySaver 18 | from langgraph.graph import START, StateGraph 19 | from langgraph.graph.message import add_messages 20 | from typing_extensions import Annotated 21 | 22 | from coscientist.common import load_prompt 23 | 24 | 25 | class ConfigurationState(TypedDict): 26 | """ 27 | Represents the state of the interactive configuration process. 28 | 29 | Uses LangGraph's standard message-based state management for better 30 | conversation handling and persistence. 31 | 32 | Parameters 33 | ---------- 34 | messages: Annotated[Sequence[BaseMessage], add_messages] 35 | The conversation messages between agent and user 36 | goal: str 37 | The initial research goal to refine 38 | refined_goal: str 39 | The final refined goal (set when process is complete) 40 | is_complete: bool 41 | Whether the configuration process is complete 42 | """ 43 | 44 | messages: Annotated[Sequence[BaseMessage], add_messages] 45 | goal: str 46 | refined_goal: str 47 | is_complete: bool 48 | 49 | 50 | def build_configuration_agent(llm: BaseChatModel) -> StateGraph: 51 | """ 52 | Builds and configures a LangGraph for the interactive configuration agent process. 53 | 54 | The graph uses LangGraph's built-in message persistence and follows best practices 55 | for chatbot development including: 56 | - Proper message state management 57 | - Built-in checkpointer for conversation persistence 58 | - Message trimming for context management 59 | - Streaming support 60 | 61 | Parameters 62 | ---------- 63 | llm: BaseChatModel 64 | The language model to use for the agent responses 65 | 66 | Returns 67 | ------- 68 | StateGraph 69 | A compiled LangGraph for the interactive configuration agent 70 | """ 71 | # Create the workflow 72 | workflow = StateGraph(state_schema=ConfigurationState) 73 | 74 | # Add the configuration node 75 | workflow.add_node("configuration", lambda state: _configuration_node(state, llm)) 76 | 77 | # Set up the flow 78 | workflow.add_edge(START, "configuration") 79 | 80 | # Add memory for conversation persistence 81 | memory = InMemorySaver() 82 | 83 | return workflow.compile(checkpointer=memory) 84 | 85 | 86 | def _configuration_node( 87 | state: ConfigurationState, llm: BaseChatModel 88 | ) -> ConfigurationState: 89 | """ 90 | Node that processes the conversation and generates the agent's response. 91 | """ 92 | prompt = load_prompt("research_config", goal=state["goal"]) 93 | 94 | # Ensure we have messages to work with 95 | messages = state.get("messages", []) 96 | if not messages: 97 | # If no messages, create a default user message to start the conversation 98 | messages = [HumanMessage(content="Please help me refine my research goal.")] 99 | 100 | prompt_template = ChatPromptTemplate.from_messages( 101 | [("system", prompt), MessagesPlaceholder(variable_name="messages")] 102 | ) 103 | 104 | # Prepare the input for the prompt template 105 | template_input = {"messages": messages} 106 | formatted_prompt = prompt_template.invoke(template_input) 107 | 108 | response = llm.invoke(formatted_prompt) 109 | 110 | # Check if this is a final goal statement 111 | is_complete = "FINAL GOAL:" in response.content 112 | refined_goal = state.get("refined_goal", "") 113 | 114 | if is_complete: 115 | # Extract the final goal 116 | try: 117 | refined_goal = response.content.split("FINAL GOAL:")[1].strip() 118 | except IndexError: 119 | # Fallback if parsing fails 120 | refined_goal = response.content 121 | 122 | return { 123 | "messages": [response], 124 | "goal": state["goal"], 125 | "refined_goal": refined_goal, 126 | "is_complete": is_complete, 127 | } 128 | 129 | 130 | class ConfigurationChatManager: 131 | """ 132 | Manages the interactive chat process for configuration refinement. 133 | 134 | This class handles the conversation flow between the user and the configuration 135 | agent, maintaining state and managing the workflow execution until completion. 136 | 137 | Parameters 138 | ---------- 139 | llm : BaseChatModel 140 | The language model to use for agent responses 141 | research_goal : str 142 | The initial research goal to be refined through conversation 143 | """ 144 | 145 | def __init__(self, llm: BaseChatModel, research_goal: str): 146 | """ 147 | Initialize the chat manager with an LLM and research goal. 148 | 149 | Parameters 150 | ---------- 151 | llm : BaseChatModel 152 | The language model for the configuration agent 153 | research_goal : str 154 | The initial research goal to refine 155 | """ 156 | self.llm = llm 157 | self.research_goal = research_goal 158 | self.agent = build_configuration_agent(llm) 159 | self.config = {"configurable": {"thread_id": str(uuid.uuid4())}} 160 | self.current_state = None 161 | self.is_complete = False 162 | self.refined_goal = "" 163 | 164 | # Initialize the conversation 165 | self._initialize_conversation() 166 | 167 | def _initialize_conversation(self): 168 | """Initialize the conversation with the research goal.""" 169 | # Start with an initial user message to trigger the agent's response 170 | initial_message = HumanMessage( 171 | content="Please help me refine my research goal and ask clarifying questions if needed." 172 | ) 173 | initial_state = ConfigurationState( 174 | messages=[initial_message], 175 | goal=self.research_goal, 176 | refined_goal="", 177 | is_complete=False, 178 | ) 179 | self.current_state = self.agent.invoke(initial_state, self.config) 180 | self.is_complete = self.current_state.get("is_complete", False) 181 | self.refined_goal = self.current_state.get("refined_goal", "") 182 | 183 | def send_human_message(self, message: str) -> str: 184 | """ 185 | Send a human message to the agent and get the response. 186 | 187 | Parameters 188 | ---------- 189 | message : str 190 | The human message to send to the agent 191 | 192 | Returns 193 | ------- 194 | str 195 | The agent's response message 196 | 197 | Raises 198 | ------ 199 | RuntimeError 200 | If the conversation is already complete 201 | """ 202 | if self.is_complete: 203 | raise RuntimeError( 204 | "Conversation is already complete. The refined goal is available." 205 | ) 206 | 207 | # Send human message to the agent 208 | input_messages = [HumanMessage(message)] 209 | output = self.agent.invoke({"messages": input_messages}, self.config) 210 | 211 | # Update state 212 | self.current_state = output 213 | self.is_complete = output.get("is_complete", False) 214 | self.refined_goal = output.get("refined_goal", "") 215 | 216 | # Get the latest AI message 217 | messages = output.get("messages", []) 218 | if messages: 219 | latest_message = messages[-1] 220 | if hasattr(latest_message, "content"): 221 | return latest_message.content 222 | 223 | return "No response received from agent." 224 | 225 | def get_latest_agent_message(self) -> str: 226 | """ 227 | Get the latest message from the agent. 228 | 229 | Returns 230 | ------- 231 | str 232 | The latest agent message content 233 | """ 234 | if not self.current_state: 235 | return "No messages yet." 236 | 237 | messages = self.current_state.get("messages", []) 238 | if messages: 239 | latest_message = messages[-1] 240 | if hasattr(latest_message, "content"): 241 | return latest_message.content 242 | 243 | return "No agent messages found." 244 | 245 | def is_conversation_complete(self) -> bool: 246 | """ 247 | Check if the configuration conversation is complete. 248 | 249 | Returns 250 | ------- 251 | bool 252 | True if the conversation is complete, False otherwise 253 | """ 254 | return self.is_complete 255 | 256 | def get_refined_goal(self) -> str: 257 | """ 258 | Get the refined research goal. 259 | 260 | Returns 261 | ------- 262 | str 263 | The refined goal if conversation is complete, empty string otherwise 264 | """ 265 | return self.refined_goal if self.is_complete else "" 266 | 267 | def get_conversation_history(self) -> Sequence[BaseMessage]: 268 | """ 269 | Get the full conversation history. 270 | 271 | Returns 272 | ------- 273 | Sequence[BaseMessage] 274 | All messages in the conversation 275 | """ 276 | if not self.current_state: 277 | return [] 278 | 279 | return self.current_state.get("messages", []) 280 | -------------------------------------------------------------------------------- /coscientist/custom_types.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class ParsedHypothesis(BaseModel): 7 | """Structured output for parsed hypothesis.""" 8 | 9 | uid: str = Field( 10 | default_factory=lambda: str(uuid.uuid4()), 11 | description="Unique identifier for the hypothesis", 12 | ) 13 | hypothesis: str = Field(description="The main hypothesis statement") 14 | predictions: list[str] = Field( 15 | description="A list of predictions that could be tested to disprove the hypothesis" 16 | ) 17 | assumptions: list[str] = Field( 18 | description="A list of assumptions that are implicit or explicit in the hypothesis" 19 | ) 20 | parent_uid: str | None = Field( 21 | default=None, 22 | description="The unique identifier of the parent hypothesis, if applicable", 23 | ) 24 | 25 | 26 | class ReviewedHypothesis(ParsedHypothesis): 27 | """Structured output for reviewed hypothesis.""" 28 | 29 | causal_reasoning: str = Field(description="The causal reasoning for the hypothesis") 30 | assumption_research_results: dict[str, str] = Field( 31 | description="A dictionary of assumption research results" 32 | ) 33 | verification_result: str = Field( 34 | description="The result of the deep verification process" 35 | ) 36 | 37 | 38 | class RankingMatchResult(BaseModel): 39 | """Result of a match between two hypotheses.""" 40 | 41 | uid1: str = Field(description="Unique identifier for the first hypothesis") 42 | uid2: str = Field(description="Unique identifier for the second hypothesis") 43 | winner: int = Field(description="The winner of the match (1 or 2)") 44 | debate: str = Field(description="The debate between the two hypotheses") 45 | -------------------------------------------------------------------------------- /coscientist/evolution_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evolution agent 3 | -------------- 4 | - Inspiration from other ideas 5 | - Simplification 6 | - Research extension 7 | 8 | More details: 9 | - Looks for weaknesses in a hypothesis, makes search queries to 10 | evaluate them and suggests improvements to fill in reasoning gaps. 11 | - Tries to fix invalid assumptions identified by the Reflection agent 12 | and to improve practicality and feasibility for testing. 13 | - Creates new hypotheses using multiple top-ranked ones as inspiration or 14 | by combining them in new ways. 15 | - Intentionally tries to generate out-of-the-box ideas that are 16 | divergent from existing ones. 17 | - Never replaces an existing hypothesis, but always adds a new one 18 | that should in principle be better. 19 | """ 20 | 21 | from typing import TypedDict 22 | 23 | from langchain_core.language_models.chat_models import BaseChatModel 24 | from langgraph.graph import END, StateGraph 25 | 26 | from coscientist.common import load_prompt, parse_hypothesis_markdown 27 | from coscientist.custom_types import ParsedHypothesis, ReviewedHypothesis 28 | 29 | 30 | class EvolveFromFeedbackState(TypedDict): 31 | """ 32 | State for the `evolve_from_feedback` prompt agent. 33 | """ 34 | 35 | goal: str 36 | parent_hypothesis: ReviewedHypothesis 37 | meta_review: str 38 | evolved_hypothesis: ParsedHypothesis 39 | 40 | 41 | class OutOfTheBoxState(TypedDict): 42 | """ 43 | State for the `out_of_the_box` prompt agent. 44 | """ 45 | 46 | goal: str 47 | top_hypotheses: list[ReviewedHypothesis] 48 | elo_ratings: list[float] 49 | evolved_hypothesis: ParsedHypothesis 50 | 51 | 52 | def build_evolution_agent( 53 | mode: str, 54 | llm: BaseChatModel, 55 | ) -> StateGraph: 56 | """ 57 | Unified builder function for evolution agents that supports both evolve_from_feedback and out_of_the_box modes. 58 | 59 | Parameters 60 | ---------- 61 | mode : str 62 | The mode of operation, either "evolve_from_feedback" or "out_of_the_box". 63 | llm : BaseChatModel 64 | The language model to use for both evolution and standardization. 65 | 66 | Returns 67 | ------- 68 | StateGraph 69 | A compiled LangGraph for the evolution agent. 70 | 71 | Raises 72 | ------ 73 | ValueError 74 | If mode is invalid. 75 | """ 76 | if mode == "evolve_from_feedback": 77 | return _build_evolve_from_feedback_agent(llm) 78 | elif mode == "out_of_the_box": 79 | return _build_out_of_the_box_agent(llm) 80 | else: 81 | raise ValueError( 82 | "mode must be either 'evolve_from_feedback' or 'out_of_the_box'" 83 | ) 84 | 85 | 86 | def _evolve_from_feedback_node( 87 | state: EvolveFromFeedbackState, 88 | llm: BaseChatModel, 89 | ) -> EvolveFromFeedbackState: 90 | """ 91 | Evolution node for evolving a hypothesis based on feedback. 92 | """ 93 | prompt = load_prompt( 94 | "evolve_from_feedback", 95 | goal=state["goal"], 96 | hypothesis=state["parent_hypothesis"].hypothesis, 97 | review=state["parent_hypothesis"].verification_result, 98 | meta_review=state["meta_review"], 99 | ) 100 | response_content = llm.invoke(prompt).content 101 | parsed_hypothesis = parse_hypothesis_markdown(response_content) 102 | parsed_hypothesis.parent_uid = state["parent_hypothesis"].uid 103 | return {**state, "evolved_hypothesis": parsed_hypothesis} 104 | 105 | 106 | def _out_of_the_box_node( 107 | state: OutOfTheBoxState, 108 | llm: BaseChatModel, 109 | ) -> OutOfTheBoxState: 110 | """ 111 | Evolution node for generating out-of-the-box ideas from top hypotheses. 112 | """ 113 | # Convert list of hypotheses to formatted string 114 | hypotheses_text = "\n".join( 115 | [ 116 | f"- {hyp.hypothesis} (Elo rating: {elo_rating})" 117 | for hyp, elo_rating in zip(state["top_hypotheses"], state["elo_ratings"]) 118 | ] 119 | ) 120 | 121 | prompt = load_prompt( 122 | "out_of_the_box", 123 | goal=state["goal"], 124 | hypotheses=hypotheses_text, 125 | ) 126 | response_content = llm.invoke(prompt).content 127 | parsed_hypothesis = parse_hypothesis_markdown(response_content) 128 | return {**state, "evolved_hypothesis": parsed_hypothesis} 129 | 130 | 131 | def _build_evolve_from_feedback_agent(llm: BaseChatModel) -> StateGraph: 132 | """ 133 | Builds and configures a LangGraph for evolving hypotheses from feedback. 134 | 135 | Parameters 136 | ---------- 137 | llm : BaseChatModel 138 | The language model to use for both evolution and standardization. 139 | 140 | Returns 141 | ------- 142 | StateGraph 143 | A compiled LangGraph for the evolve-from-feedback agent. 144 | """ 145 | graph = StateGraph(EvolveFromFeedbackState) 146 | 147 | graph.add_node( 148 | "evolution", 149 | lambda state: _evolve_from_feedback_node(state, llm), 150 | ) 151 | graph.add_edge("evolution", END) 152 | 153 | graph.set_entry_point("evolution") 154 | return graph.compile() 155 | 156 | 157 | def _build_out_of_the_box_agent(llm: BaseChatModel) -> StateGraph: 158 | """ 159 | Builds and configures a LangGraph for generating out-of-the-box ideas. 160 | 161 | Parameters 162 | ---------- 163 | llm : BaseChatModel 164 | The language model to use for both evolution and standardization. 165 | 166 | Returns 167 | ------- 168 | StateGraph 169 | A compiled LangGraph for the out-of-the-box agent. 170 | """ 171 | graph = StateGraph(OutOfTheBoxState) 172 | 173 | graph.add_node( 174 | "evolution", 175 | lambda state: _out_of_the_box_node(state, llm), 176 | ) 177 | 178 | graph.add_edge("evolution", END) 179 | 180 | graph.set_entry_point("evolution") 181 | return graph.compile() 182 | -------------------------------------------------------------------------------- /coscientist/final_report_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Final report agent 3 | ------------------ 4 | - Generates a comprehensive scientific research report 5 | - Takes tournament results and formats them into a professional report 6 | - Provides detailed analysis of top-ranked hypotheses with experimental suggestions 7 | 8 | More details: 9 | - Formats all hypotheses by ELO ranking for overview 10 | - Provides detailed information for top k hypotheses including causal reasoning, 11 | verification results, and falsifiable predictions 12 | - Generates a structured scientific report suitable for domain experts 13 | """ 14 | 15 | from typing import TypedDict 16 | 17 | from langchain_core.language_models.chat_models import BaseChatModel 18 | from langgraph.graph import END, StateGraph 19 | 20 | from coscientist.common import load_prompt 21 | from coscientist.custom_types import ReviewedHypothesis 22 | from coscientist.ranking_agent import EloTournament 23 | 24 | 25 | class FinalReportState(TypedDict): 26 | """ 27 | State for the final report agent. 28 | """ 29 | 30 | goal: str 31 | tournament: EloTournament 32 | top_k: int 33 | result: str 34 | 35 | 36 | def build_final_report_agent(llm: BaseChatModel) -> StateGraph: 37 | """ 38 | Builds and configures a LangGraph for final report generation. 39 | 40 | Parameters 41 | ---------- 42 | llm : BaseChatModel 43 | The language model to use for final report generation. 44 | 45 | Returns 46 | ------- 47 | StateGraph 48 | A compiled LangGraph for the final report agent. 49 | """ 50 | graph = StateGraph(FinalReportState) 51 | 52 | graph.add_node( 53 | "final_report", 54 | lambda state: _final_report_node(state, llm), 55 | ) 56 | 57 | graph.add_edge("final_report", END) 58 | graph.set_entry_point("final_report") 59 | return graph.compile() 60 | 61 | 62 | def _format_hypothesis_with_rating( 63 | hypothesis: ReviewedHypothesis, rating: float 64 | ) -> str: 65 | """Helper function to format a hypothesis with its ELO rating.""" 66 | return f"Hypothesis {hypothesis.uid} (ELO: {rating:.2f}): {hypothesis.hypothesis}" 67 | 68 | 69 | def _format_detailed_hypothesis(hypothesis: ReviewedHypothesis, rating: float) -> str: 70 | """Helper function to format a hypothesis with detailed information.""" 71 | sections = [ 72 | f"## Hypothesis {hypothesis.uid} (ELO: {rating:.2f})", 73 | f"**Hypothesis Statement:** {hypothesis.hypothesis}", 74 | f"**Causal Reasoning:** {hypothesis.causal_reasoning}", 75 | f"**Verification Result:** {hypothesis.verification_result}", 76 | f"**Falsifiable Predictions:** {' '.join(hypothesis.predictions)}", 77 | ] 78 | return "\n\n".join(sections) 79 | 80 | 81 | def _get_top_hypotheses_data( 82 | tournament: EloTournament, top_k: int 83 | ) -> list[tuple[str, float]]: 84 | """Helper function to get top k hypotheses sorted by ELO rating.""" 85 | sorted_hypotheses = tournament.get_sorted_hypotheses() 86 | return sorted_hypotheses[:top_k] 87 | 88 | 89 | def _final_report_node( 90 | state: FinalReportState, 91 | llm: BaseChatModel, 92 | ) -> FinalReportState: 93 | """ 94 | Final report node that generates a comprehensive scientific research report. 95 | """ 96 | tournament = state["tournament"] 97 | top_k = state.get("top_k", 3) # Default to top 3 hypotheses 98 | 99 | # Build hypotheses by ranking - all hypotheses sorted by ELO rating 100 | sorted_hypotheses = tournament.get_sorted_hypotheses() 101 | hypotheses_by_ranking_entries = [] 102 | for hyp_id, rating in sorted_hypotheses: 103 | hypothesis = tournament.hypotheses[hyp_id] 104 | hypotheses_by_ranking_entries.append( 105 | _format_hypothesis_with_rating(hypothesis, rating) 106 | ) 107 | hypotheses_by_ranking_text = "\n".join(hypotheses_by_ranking_entries) 108 | 109 | # Build detailed top hypotheses information 110 | top_hypotheses_data = _get_top_hypotheses_data(tournament, top_k) 111 | top_ranked_hypotheses_entries = [] 112 | for hyp_id, rating in top_hypotheses_data: 113 | hypothesis = tournament.hypotheses[hyp_id] 114 | top_ranked_hypotheses_entries.append( 115 | _format_detailed_hypothesis(hypothesis, rating) 116 | ) 117 | top_ranked_hypotheses_text = "\n\n".join(top_ranked_hypotheses_entries) 118 | 119 | prompt = load_prompt( 120 | "final_report", 121 | goal=state["goal"], 122 | hypotheses_by_ranking=hypotheses_by_ranking_text, 123 | top_ranked_hypotheses=top_ranked_hypotheses_text, 124 | ) 125 | response_content = llm.invoke(prompt).content 126 | return {**state, "result": response_content} 127 | -------------------------------------------------------------------------------- /coscientist/generation_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generation agent 3 | --------------- 4 | - Literature exploration 5 | - Simulated scientific debates 6 | """ 7 | 8 | from dataclasses import dataclass 9 | from typing import TypedDict, Union 10 | 11 | from langchain_core.language_models.chat_models import BaseChatModel 12 | from langgraph.graph import END, StateGraph 13 | 14 | from coscientist import multiturn 15 | from coscientist.common import load_prompt, parse_hypothesis_markdown 16 | from coscientist.custom_types import ParsedHypothesis 17 | from coscientist.reasoning_types import ReasoningType 18 | 19 | 20 | class IndependentState(TypedDict): 21 | goal: str 22 | literature_review: str 23 | meta_review: str 24 | hypothesis: ParsedHypothesis 25 | _raw_result: str # Private temporary field for markdown output 26 | 27 | 28 | class CollaborativeState(IndependentState, multiturn.MultiTurnState): 29 | pass 30 | 31 | 32 | @dataclass 33 | class IndependentConfig: 34 | """Configuration for independent generation mode.""" 35 | 36 | field: str 37 | reasoning_type: ReasoningType 38 | llm: BaseChatModel 39 | 40 | 41 | @dataclass 42 | class CollaborativeConfig: 43 | """Configuration for collaborative generation mode.""" 44 | 45 | agent_names: list[str] 46 | agent_fields: dict[str, str] 47 | agent_reasoning_types: dict[str, ReasoningType] 48 | llms: dict[str, BaseChatModel] 49 | max_turns: int = 10 50 | 51 | 52 | def build_generation_agent( 53 | mode: str, 54 | config: Union[IndependentConfig, CollaborativeConfig], 55 | ) -> StateGraph: 56 | """ 57 | Unified builder function for generation agents that supports both independent and collaborative modes. 58 | 59 | Parameters 60 | ---------- 61 | mode : str 62 | The mode of operation, either "independent" or "collaborative". 63 | config : Union[IndependentConfig, CollaborativeConfig] 64 | Configuration object containing all necessary parameters for the selected mode. 65 | 66 | Returns 67 | ------- 68 | StateGraph 69 | A compiled LangGraph for the generation agent. 70 | 71 | Raises 72 | ------ 73 | ValueError 74 | If mode is invalid or required parameters are missing for the selected mode. 75 | """ 76 | if mode == "independent": 77 | if not isinstance(config, IndependentConfig): 78 | raise ValueError("config must be an IndependentConfig instance") 79 | return _build_independent_generation_agent( 80 | config.field, config.reasoning_type, config.llm 81 | ) 82 | elif mode == "collaborative": 83 | if not isinstance(config, CollaborativeConfig): 84 | raise ValueError("config must be a CollaborativeConfig instance") 85 | # Use the simplified multi-turn system 86 | return _build_collaborative_generation_agent( 87 | config.agent_names, 88 | config.agent_fields, 89 | config.agent_reasoning_types, 90 | config.llms, 91 | config.max_turns, 92 | ) 93 | else: 94 | raise ValueError("mode must be either 'independent' or 'collaborative'") 95 | 96 | 97 | def _independent_generation_node( 98 | state: IndependentState, 99 | field: str, 100 | reasoning_type: ReasoningType, 101 | llm: BaseChatModel, 102 | ) -> IndependentState: 103 | """ 104 | Represents the action of a single generation agent using the independent_generation.md template. 105 | The output is expected to be markdown with sections: Evidence, Hypothesis, Reasoning, Assumptions Table. 106 | """ 107 | # Handle meta_review field with fallback 108 | meta_review = state.get("meta_review", "Not Available") 109 | 110 | prompt = load_prompt( 111 | "independent_generation", 112 | goal=state["goal"], 113 | field=field, 114 | literature_review=state["literature_review"], 115 | meta_review=meta_review, 116 | reasoning_type=reasoning_type.value, 117 | ) 118 | response_content = llm.invoke(prompt).content 119 | return {**state, "_raw_result": response_content} 120 | 121 | 122 | def _parsing_node(state: IndependentState) -> IndependentState: 123 | """ 124 | Parse the raw markdown result into a structured ParsedHypothesis object. 125 | """ 126 | parsed_hypothesis = parse_hypothesis_markdown(state["_raw_result"]) 127 | return {**state, "hypothesis": parsed_hypothesis} 128 | 129 | 130 | def _build_independent_generation_agent( 131 | field: str, reasoning_type: ReasoningType, llm: BaseChatModel 132 | ): 133 | """ 134 | Builds and configures a LangGraph for a single-agent generation process using the independent_generation.md template. 135 | The agent's output is parsed into a structured ParsedHypothesis object. 136 | 137 | Parameters 138 | ---------- 139 | field : str 140 | Field or domain of expertise. 141 | reasoning_type : ReasoningType 142 | Reasoning type for the agent. 143 | llm : BaseChatModel 144 | The language model to use. 145 | 146 | Returns 147 | ------- 148 | StateGraph 149 | A compiled LangGraph for the generation agent. 150 | """ 151 | graph = StateGraph(IndependentState) 152 | graph.add_node( 153 | "generator", 154 | lambda state: _independent_generation_node(state, field, reasoning_type, llm), 155 | ) 156 | graph.add_node("parser", _parsing_node) 157 | 158 | graph.add_edge("generator", "parser") 159 | graph.add_edge("parser", END) 160 | 161 | graph.set_entry_point("generator") 162 | return graph.compile() 163 | 164 | 165 | def _collaborative_parsing_node(state: CollaborativeState) -> CollaborativeState: 166 | """ 167 | Parse the final result from collaborative generation into a structured ParsedHypothesis object. 168 | """ 169 | transcript_str = "\n".join([f"{name}: {msg}" for name, msg in state["transcript"]]) 170 | parsed_hypothesis = parse_hypothesis_markdown(transcript_str) 171 | return {**state, "hypothesis": parsed_hypothesis} 172 | 173 | 174 | def _build_collaborative_generation_agent( 175 | agent_names: list[str], 176 | agent_fields: dict[str, str], 177 | agent_reasoning_types: dict[str, ReasoningType], 178 | llms: dict[str, BaseChatModel], 179 | max_turns: int = 10, 180 | ) -> StateGraph: 181 | """Build collaborative generation agent with structured output parsing.""" 182 | 183 | # Create agent node functions 184 | agent_node_fns = {} 185 | for agent_name in agent_names: 186 | agent_node_fns[agent_name] = multiturn.create_agent_node_fn( 187 | agent_name=agent_name, 188 | llm=llms[agent_name], 189 | prompt_name="collaborative_generation", 190 | prompt_keys_from_state=["goal", "literature_review", "meta_review"], 191 | # kwargs for the prompt 192 | field=agent_fields[agent_name], 193 | reasoning_type=agent_reasoning_types[agent_name].value, 194 | ) 195 | 196 | # Create moderator and post-processor 197 | moderator_fn = multiturn.create_moderator_node_fn( 198 | agent_names, _termination_fn, max_turns 199 | ) 200 | 201 | # Build the base multi-turn agent graph (without compiling it yet) 202 | base_graph = StateGraph(CollaborativeState) 203 | 204 | # Add agent nodes 205 | for agent_name, agent_fn in agent_node_fns.items(): 206 | base_graph.add_node(agent_name, agent_fn) 207 | 208 | # Add moderator node 209 | base_graph.add_node("moderator", moderator_fn) 210 | 211 | # Add our custom parsing node 212 | base_graph.add_node("parser", _collaborative_parsing_node) 213 | 214 | # Define edges: agents -> moderator 215 | for agent_name in agent_node_fns.keys(): 216 | base_graph.add_edge(agent_name, "moderator") 217 | 218 | # Conditional edges from moderator 219 | def route_after_moderator(state: CollaborativeState): 220 | if state["finished"]: 221 | return "parser" 222 | return state["next_agent"] 223 | 224 | routing_map = {name: name for name in agent_node_fns.keys()} 225 | routing_map["parser"] = "parser" 226 | 227 | base_graph.add_conditional_edges("moderator", route_after_moderator, routing_map) 228 | 229 | # Parser goes to END 230 | base_graph.add_edge("parser", END) 231 | 232 | # Set entry point 233 | base_graph.set_entry_point(list(agent_node_fns.keys())[0]) 234 | 235 | return base_graph.compile() 236 | 237 | 238 | def _termination_fn(msg: str) -> bool: 239 | """ 240 | Check if the message contains all required sections to prevent parser assertions. 241 | Returns True if the message has hypothesis, predictions, and assumptions sections. 242 | """ 243 | # Check if the message contains all required sections 244 | if "#FINAL REPORT#" in msg: 245 | text = msg.split("#FINAL REPORT#")[1] 246 | else: 247 | return False 248 | 249 | # Split the text by # to get sections 250 | sections = text.split("#") 251 | 252 | # Check for required sections 253 | has_hypothesis = False 254 | has_predictions = False 255 | has_assumptions = False 256 | 257 | for section in sections: 258 | section = section.strip() 259 | if not section: 260 | continue 261 | 262 | # Split section into title and content 263 | lines = section.split("\n", 1) 264 | if len(lines) < 2: 265 | continue 266 | 267 | title = lines[0].strip().lower() 268 | content = lines[1].strip() 269 | 270 | # Check if content is not empty 271 | if not content: 272 | continue 273 | 274 | # Match section titles (case-insensitive) 275 | if "hypothesis" in title: 276 | has_hypothesis = True 277 | elif "prediction" in title: 278 | has_predictions = True 279 | elif "assumption" in title: 280 | has_assumptions = True 281 | 282 | return has_hypothesis and has_predictions and has_assumptions 283 | -------------------------------------------------------------------------------- /coscientist/literature_review_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | System for agentic literature review that's used by other agents. 3 | 4 | Implementation uses LangGraph to: 5 | 1. Decompose research goals into modular topics 6 | 2. Dispatch each topic to GPTResearcher workers in parallel 7 | 3. Synthesize topic reports into executive summary 8 | """ 9 | 10 | import asyncio 11 | import os 12 | import re 13 | from typing import TypedDict 14 | 15 | from gpt_researcher import GPTResearcher 16 | from gpt_researcher.utils.enum import Tone 17 | from langchain_core.language_models.chat_models import BaseChatModel 18 | from langgraph.graph import END, StateGraph 19 | 20 | from coscientist.common import load_prompt 21 | 22 | 23 | class LiteratureReviewState(TypedDict): 24 | """State for the literature review agent.""" 25 | 26 | goal: str 27 | max_subtopics: int 28 | subtopics: list[str] 29 | subtopic_reports: list[str] 30 | meta_review: str 31 | 32 | 33 | def parse_topic_decomposition(markdown_text: str) -> list[str]: 34 | """ 35 | Parse the topic decomposition markdown into strings. 36 | 37 | Parameters 38 | ---------- 39 | markdown_text : str 40 | The markdown output from topic_decomposition prompt 41 | 42 | Returns 43 | ------- 44 | list[str] 45 | Parsed subtopics strings 46 | """ 47 | # Split by subtopic headers (### Subtopic N) 48 | sections = re.split(r"### Subtopic \d+", markdown_text) 49 | return [section.strip() for section in sections[1:]] 50 | 51 | 52 | def _topic_decomposition_node( 53 | state: LiteratureReviewState, 54 | llm: BaseChatModel, 55 | ) -> LiteratureReviewState: 56 | """ 57 | Node that decomposes the research goal into focused subtopics. 58 | """ 59 | prompt = load_prompt( 60 | "topic_decomposition", 61 | goal=state["goal"], 62 | max_subtopics=state["max_subtopics"], 63 | subtopics=state.get("subtopics", ""), 64 | meta_review=state.get("meta_review", ""), 65 | ) 66 | response_content = llm.invoke(prompt).content 67 | 68 | # Parse the topics from the markdown response 69 | subtopics = parse_topic_decomposition(response_content) 70 | 71 | if not subtopics: 72 | raise ValueError("Failed to parse any topics from decomposition response") 73 | 74 | if state.get("subtopics", False): 75 | subtopics = state["subtopics"] + subtopics 76 | 77 | return {"subtopics": subtopics} 78 | 79 | 80 | async def _write_subtopic_report(subtopic: str, main_goal: str) -> str: 81 | """ 82 | Conduct research for a single subtopic using GPTResearcher. 83 | 84 | Parameters 85 | ---------- 86 | subtopic : str 87 | The subtopic to research 88 | main_goal : str 89 | The main research goal for context 90 | 91 | Returns 92 | ------- 93 | str 94 | The research report 95 | """ 96 | # Create a focused query combining the research focus and key terms 97 | researcher = GPTResearcher( 98 | query=subtopic, 99 | report_type="subtopic_report", 100 | report_format="markdown", 101 | parent_query=main_goal, 102 | verbose=False, 103 | tone=Tone.Objective, 104 | config_path=os.path.join(os.path.dirname(__file__), "researcher_config.json"), 105 | ) 106 | 107 | # Conduct research and generate report 108 | _ = await researcher.conduct_research() 109 | return await researcher.write_report() 110 | 111 | 112 | async def _parallel_research_node( 113 | state: LiteratureReviewState, 114 | ) -> LiteratureReviewState: 115 | """ 116 | Node that conducts parallel research for all subtopics using GPTResearcher. 117 | """ 118 | subtopics = state["subtopics"] 119 | main_goal = state["goal"] 120 | 121 | # Create research tasks for all subtopics 122 | research_tasks = [_write_subtopic_report(topic, main_goal) for topic in subtopics] 123 | 124 | # Execute all research tasks in parallel 125 | try: 126 | subtopic_reports = await asyncio.gather(*research_tasks) 127 | except Exception as e: 128 | raise RuntimeError(f"Failed to conduct research for subtopics: {str(e)}") 129 | 130 | if state.get("subtopic_reports", False): 131 | subtopic_reports = state["subtopic_reports"] + subtopic_reports 132 | 133 | return {"subtopic_reports": subtopic_reports} 134 | 135 | 136 | def build_literature_review_agent(llm: BaseChatModel) -> StateGraph: 137 | """ 138 | Builds and configures a LangGraph for literature review. 139 | 140 | Parameters 141 | ---------- 142 | llm : BaseChatModel 143 | The language model to use for topic decomposition and executive summary. 144 | 145 | Returns 146 | ------- 147 | StateGraph 148 | A compiled LangGraph for the literature review agent. 149 | """ 150 | graph = StateGraph(LiteratureReviewState) 151 | 152 | # Add nodes 153 | graph.add_node( 154 | "topic_decomposition", 155 | lambda state: _topic_decomposition_node(state, llm), 156 | ) 157 | 158 | graph.add_node( 159 | "parallel_research", 160 | _parallel_research_node, 161 | ) 162 | 163 | graph.add_edge("topic_decomposition", "parallel_research") 164 | graph.add_edge("parallel_research", END) 165 | 166 | graph.set_entry_point("topic_decomposition") 167 | 168 | return graph.compile() 169 | -------------------------------------------------------------------------------- /coscientist/meta_review_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Meta review agent 3 | ----------------- 4 | - Formulates a research overview with memory 5 | - Feedback from this agent is appended to the prompts of the 6 | others in subsequent rounds. 7 | 8 | More details: 9 | - Takes in the tournament state with all debates and ELO ratings, 10 | summarizes common patterns in the reviews and debates to synthesize 11 | the meta-review feedback. 12 | - Feedback helps to steer the Reflection agent so that it accounts 13 | for common reasoning failures. 14 | - Writes top hypotheses into a research overview that highlights 15 | areas to follow up with real and specific experiments. This 16 | gets fed to the Generation agent in later rounds. Format of the 17 | overview can match the style of a review paper or a grant proposal 18 | (like an NIH Specific Aims Page). 19 | - Decides topics for additional research to follow up on. 20 | """ 21 | 22 | from typing import TypedDict 23 | 24 | from langchain_core.language_models.chat_models import BaseChatModel 25 | from langgraph.graph import END, StateGraph 26 | 27 | from coscientist.common import load_prompt 28 | from coscientist.custom_types import ReviewedHypothesis 29 | from coscientist.ranking_agent import EloTournament 30 | 31 | 32 | class MetaReviewTournamentState(TypedDict): 33 | """ 34 | State for the `meta_review_tournament` prompt agent. 35 | """ 36 | 37 | goal: str 38 | tournament: EloTournament 39 | top_k: int 40 | result: str 41 | 42 | 43 | def build_meta_review_agent(llm: BaseChatModel) -> StateGraph: 44 | """ 45 | Builds and configures a LangGraph for meta-review analysis. 46 | 47 | Parameters 48 | ---------- 49 | llm : BaseChatModel 50 | The language model to use for meta-review generation. 51 | 52 | Returns 53 | ------- 54 | StateGraph 55 | A compiled LangGraph for the meta-review agent. 56 | """ 57 | graph = StateGraph(MetaReviewTournamentState) 58 | 59 | graph.add_node( 60 | "meta_review", 61 | lambda state: _meta_review_node(state, llm), 62 | ) 63 | 64 | graph.add_edge("meta_review", END) 65 | graph.set_entry_point("meta_review") 66 | return graph.compile() 67 | 68 | 69 | def build_top_hypotheses_review_agent(llm: BaseChatModel) -> StateGraph: 70 | """ 71 | Builds and configures a LangGraph for top hypotheses review analysis. 72 | 73 | Parameters 74 | ---------- 75 | llm : BaseChatModel 76 | The language model to use for top hypotheses review generation. 77 | 78 | Returns 79 | ------- 80 | StateGraph 81 | A compiled LangGraph for the top hypotheses review agent. 82 | """ 83 | graph = StateGraph(MetaReviewTournamentState) 84 | 85 | graph.add_node( 86 | "top_hypotheses_review", 87 | lambda state: _top_hypotheses_review_node(state, llm), 88 | ) 89 | 90 | graph.add_edge("top_hypotheses_review", END) 91 | graph.set_entry_point("top_hypotheses_review") 92 | return graph.compile() 93 | 94 | 95 | def _format_hypothesis_with_rating( 96 | hypothesis: ReviewedHypothesis, rating: float 97 | ) -> str: 98 | """Helper function to format a hypothesis with its ELO rating.""" 99 | return f"Hypothesis {hypothesis.uid} (ELO: {rating:.2f}): {hypothesis.hypothesis}" 100 | 101 | 102 | def _get_top_hypotheses_data( 103 | tournament: EloTournament, top_k: int 104 | ) -> list[tuple[str, float]]: 105 | """Helper function to get top k hypotheses sorted by ELO rating.""" 106 | sorted_hypotheses = tournament.get_sorted_hypotheses() 107 | return sorted_hypotheses[:top_k] 108 | 109 | 110 | def _meta_review_node( 111 | state: MetaReviewTournamentState, 112 | llm: BaseChatModel, 113 | ) -> MetaReviewTournamentState: 114 | """ 115 | Meta-review node that synthesizes tournament data into a comprehensive meta-analysis. 116 | """ 117 | tournament = state["tournament"] 118 | 119 | # Build ratings text - hypotheses sorted by ELO rating (highest to lowest) 120 | sorted_hypotheses = tournament.get_sorted_hypotheses() 121 | ratings_entries = [] 122 | for hyp_id, rating in sorted_hypotheses: 123 | hypothesis = tournament.hypotheses[hyp_id] 124 | ratings_entries.append(_format_hypothesis_with_rating(hypothesis, rating)) 125 | ratings_text = "\n".join(ratings_entries) 126 | 127 | # Build debates text from match history 128 | debates_entries = [] 129 | for i, match_result in enumerate(tournament.match_history.values(), 1): 130 | debate_header = ( 131 | f"Debate {i}: Hypothesis {match_result.uid1} vs Hypothesis {match_result.uid2} " 132 | f"(Winner: {match_result.winner})" 133 | ) 134 | debates_entries.append(f"{debate_header}\n{match_result.debate}") 135 | debates_text = "\n\n".join(debates_entries) 136 | 137 | prompt = load_prompt( 138 | "meta_review_tournament", 139 | goal=state["goal"], 140 | ratings=ratings_text, 141 | debates=debates_text, 142 | ) 143 | response_content = llm.invoke(prompt).content 144 | return {**state, "result": response_content} 145 | 146 | 147 | def _top_hypotheses_review_node( 148 | state: MetaReviewTournamentState, 149 | llm: BaseChatModel, 150 | ) -> MetaReviewTournamentState: 151 | """ 152 | Top hypotheses review node that creates a research overview from top-ranked hypotheses. 153 | """ 154 | tournament = state["tournament"] 155 | top_k = state["top_k"] 156 | 157 | # Get top k hypotheses 158 | top_hypotheses_data = _get_top_hypotheses_data(tournament, top_k) 159 | 160 | # Build top hypotheses text with ratings 161 | top_hypotheses_entries = [] 162 | for hyp_id, rating in top_hypotheses_data: 163 | hypothesis = tournament.hypotheses[hyp_id] 164 | top_hypotheses_entries.append( 165 | _format_hypothesis_with_rating(hyp_id, hypothesis, rating) 166 | ) 167 | top_hypotheses_text = "\n".join(top_hypotheses_entries) 168 | 169 | # Build reviews text for top hypotheses 170 | reviews_entries = [] 171 | for hyp_id, rating in top_hypotheses_data: 172 | hypothesis = tournament.hypotheses[hyp_id] 173 | reviews_entries.append(f"Review for Hypothesis {hyp_id}\n{hypothesis.review}") 174 | reviews_text = "\n\n".join(reviews_entries) 175 | 176 | prompt = load_prompt( 177 | "top_hypotheses_review", 178 | goal=state["goal"], 179 | top_hypotheses=top_hypotheses_text, 180 | reviews=reviews_text, 181 | ) 182 | response_content = llm.invoke(prompt).content 183 | return {**state, "result": response_content} 184 | -------------------------------------------------------------------------------- /coscientist/multiturn.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Optional, Type, TypedDict 2 | 3 | from langchain_core.language_models.chat_models import BaseChatModel 4 | from langgraph.graph import END, StateGraph 5 | 6 | from coscientist.common import load_prompt 7 | 8 | 9 | class MultiTurnState(TypedDict): 10 | """Generalized state for multi-turn agent conversations.""" 11 | 12 | transcript: list[tuple[str, str]] 13 | turn: int 14 | next_agent: str 15 | finished: bool 16 | 17 | 18 | def create_agent_node_fn( 19 | agent_name: str, 20 | llm: BaseChatModel, 21 | prompt_name: str, 22 | prompt_keys_from_state: list[str], 23 | **prompt_kwargs: dict[str, Any], 24 | ) -> Callable[[MultiTurnState], MultiTurnState]: 25 | """Create an agent node function.""" 26 | assert ( 27 | "transcript" not in prompt_kwargs 28 | ), "transcript will be added from state and should not be in prompt_kwargs" 29 | 30 | def agent_fn(state): 31 | # Build prompt args from state 32 | # Add transcript 33 | transcript_str = "\n".join( 34 | [f"{name}: {msg}" for name, msg in state["transcript"]] 35 | ) 36 | prompt_kwargs["transcript"] = transcript_str 37 | 38 | # Add prompt keys from state 39 | for key in prompt_keys_from_state: 40 | prompt_kwargs[key] = state.get(key, "Not Available") 41 | 42 | # Generate response 43 | prompt = load_prompt(prompt_name, **prompt_kwargs) 44 | response = llm.invoke(prompt).content 45 | 46 | return {**state, "transcript": state["transcript"] + [(agent_name, response)]} 47 | 48 | return agent_fn 49 | 50 | 51 | def create_moderator_node_fn( 52 | agent_names: list[str], 53 | termination_fn: Callable[[str], bool], 54 | max_turns: int = 10, 55 | ) -> Callable[[MultiTurnState], MultiTurnState]: 56 | """Create a moderator node function.""" 57 | 58 | def moderator_fn(state: MultiTurnState) -> MultiTurnState: 59 | # Check termination conditions 60 | if state["turn"] >= max_turns: 61 | return {**state, "finished": True, "next_agent": ""} 62 | 63 | if state["transcript"] and termination_fn(state["transcript"][-1][1]): 64 | return {**state, "finished": True, "next_agent": ""} 65 | 66 | # Round-robin scheduling 67 | current_index = agent_names.index(state["next_agent"]) 68 | next_index = (current_index + 1) % len(agent_names) 69 | 70 | return { 71 | **state, 72 | "finished": False, 73 | "next_agent": agent_names[next_index], 74 | "turn": state["turn"] + 1, 75 | } 76 | 77 | return moderator_fn 78 | 79 | 80 | def build_multi_turn_agent( 81 | state_type: Type[MultiTurnState], 82 | agent_node_fns: dict[str, Callable[[MultiTurnState], MultiTurnState]], 83 | moderator_node_fn: Callable[[MultiTurnState], MultiTurnState], 84 | post_processor_node_fn: Optional[Callable[[MultiTurnState], MultiTurnState]] = None, 85 | ) -> StateGraph: 86 | """Build a multi-turn agent from pre-built node functions.""" 87 | graph = StateGraph(state_type) 88 | 89 | # Add agent nodes 90 | for agent_name, agent_fn in agent_node_fns.items(): 91 | graph.add_node(agent_name, agent_fn) 92 | 93 | # Add moderator node 94 | graph.add_node("moderator", moderator_node_fn) 95 | 96 | # Add post-processor if provided 97 | if post_processor_node_fn: 98 | graph.add_node("post_processor", post_processor_node_fn) 99 | graph.add_edge("post_processor", END) 100 | 101 | # Define edges: agents -> moderator 102 | for agent_name in agent_node_fns.keys(): 103 | graph.add_edge(agent_name, "moderator") 104 | 105 | # Conditional edges from moderator 106 | def route_after_moderator(state: state_type): 107 | if state["finished"]: 108 | return "post_processor" if post_processor_node_fn else END 109 | return state["next_agent"] 110 | 111 | routing_map = {name: name for name in agent_node_fns.keys()} 112 | if post_processor_node_fn: 113 | routing_map["post_processor"] = "post_processor" 114 | else: 115 | routing_map[END] = END 116 | 117 | graph.add_conditional_edges("moderator", route_after_moderator, routing_map) 118 | graph.set_entry_point(list(agent_node_fns.keys())[0]) 119 | 120 | return graph.compile() 121 | -------------------------------------------------------------------------------- /coscientist/prompts/assumption_decomposer.md: -------------------------------------------------------------------------------- 1 | You are a scientific assumption analyzer tasked with thoroughly decomposing hypotheses into their underlying assumptions and sub-assumptions. You are an expert in logical analysis and scientific reasoning. 2 | 3 | # Goal 4 | To systematically break down the provided hypothesis into a comprehensive list of assumptions and sub-assumptions, using the initial assumptions as inspiration for deeper analysis. Your analysis should be exhaustive and methodical. Every claim, mechanism, or relationship implied by the hypothesis should be explicitly identified as an assumption that can be independently verified or challenged with experiments or literature review. Aim for no more than 10 assumptions. 5 | 6 | # Hypothesis to decompose 7 | {{ hypothesis }} 8 | 9 | # Initial assumptions (use as inspiration for refinement) 10 | {{ assumptions }} 11 | 12 | # Instructions 13 | * When decomposing the hypothesis, consider two kinds of assumptions: 14 | - **Explicit assumptions** high-level claims that must be true for the hypothesis to hold. 15 | - **Implicit assumptions** that are implied but not explicitly stated in the hypothesis or initial assumptions list. 16 | * For each kind of assumption, identify the underlying sub-assumptions. These are the more granular claims that support the primary assumption. Typically there should be 2-4 sub-assumptions per assumption. 17 | 18 | # Output Format 19 | Structure your response as a nested list in markdown format. 20 | 21 | ## Assumptions 22 | 1. **[Assumption 1]** 23 | - Sub-assumption 1.1: [detailed description] 24 | - Sub-assumption 1.2: [detailed description] 25 | - ... 26 | 27 | 2. **[Assumption 2]** 28 | - Sub-assumption 2.1: [detailed description] 29 | - Sub-assumption 2.2: [detailed description] 30 | - ... 31 | 32 | Do not distinguish between explicit and implicit assumptions in the final list. -------------------------------------------------------------------------------- /coscientist/prompts/cause_and_effect.md: -------------------------------------------------------------------------------- 1 | You are an expert in causality. You reason about mechanisms by carefully tracing out causal chains from initial conditions to final outcomes and communicating them to domain experts. 2 | 3 | # Goal 4 | Create a detailed causal chain that thoroughly explains the causal proposition entailed by a scientific hypothesis. Your goal is not to change the hypothesis. Instead it is to propose the most plausible causal chain that would be consistent and supportive. 5 | 6 | # Hypothesis to analyze 7 | {{ hypothesis }} 8 | 9 | # Instructions 10 | * Break down the hypothesis into discrete, sequential steps. Use the steps given in the hypothesis as a starting point. Add intermediate steps to make the causal chain more detailed; emphasize direct and specific causal links. 11 | * For each step, state the cause, effect, and mechanism. 12 | * Descriptions of the mechanism should be highly detailed in describing how precisely the cause leads to the effect. 13 | * If a cause has multiple effects detail them in the same step. Likewise, when a single effect has multiple causes, it's acceptable to repeat it in a different step. 14 | * If a cause, effect, or mechanism is uncertain, say so. Then make your best guess. 15 | * Use as many steps as needed to fully detail the causal chain. 16 | 17 | # Output format (markdown) 18 | ## Causal Chain 19 | ### Step 1: [cause] -> [effect] 20 | [Exposition of the mechanism] 21 | 22 | ### Step 2: [cause] -> [effect] 23 | [Exposition of the mechanism] 24 | 25 | -------------------------------------------------------------------------------- /coscientist/prompts/collaborative_generation.md: -------------------------------------------------------------------------------- 1 | You are an expert participating in a collaborative discourse concerning the generation of a scientific hypothesis. The overarching objective of this discourse is to collaboratively develop a novel and robust hypothesis. You will engage in a discussion with other experts. You are a specialist in {{ field }} and you approach problems through this lens. {{ reasoning_type }} 2 | 3 | # Goal 4 | {{ goal }} 5 | 6 | # Criteria 7 | A strong hypothesis must be novel, robust, and falsifiable. It must also be specific and clear to domain experts, who will analyze and critique your proposals. 8 | 9 | General guidelines: 10 | * Exhibit boldness and creativity in your contributions. 11 | * Maintain a helpful and collaborative approach but do not be afraid to disagree with other experts. Seeking the truth requires a willingness to challenge and be challenged. 12 | * Always prioritize the generation of a high-quality hypothesis. Novelty is the key criterion, but it should not be at the expense of robustness or falsifiability. 13 | * Building consensus in science is a process. Do not expect to resolve all disagreements or uncertainties in this single discussion. 14 | 15 | # Review of relevant literature 16 | {{ literature_review }} 17 | 18 | # Additional Notes (optional) 19 | A panel of reviewers may have put together a meta-analysis of previously proposed hypotheses, highlighting common strengths and weaknesses. When available, you can use this to inform your contributions: 20 | {{ meta_review }} 21 | 22 | # Procedure 23 | If initiating the discussion from a blank transcript, then propose three distinct hypotheses. 24 | 25 | For subsequent contributions that continue an existing discussion: 26 | * Pose clarifying questions if ambiguities or uncertainties arise. 27 | * Critically evaluate the hypotheses proposed thus far, addressing the following aspects: 28 | - Adherence to the criteria for a strong hypothesis 29 | - Utility and practicality 30 | - Level of detail and specificity 31 | - Implicit and explicit assumptions and sub-assumptions 32 | - Novelty 33 | * Identify any weaknesses or potential limitations. 34 | * Propose concrete improvements and refinements to address identified weaknesses and improve novelty. 35 | * Conclude your response with a suggested refinement of the hypothesis. 36 | 37 | When sufficient discussion has transpired (typically 3-5 conversational turns, with a maximum of 10 turns) and all relevant questions and points have been thoroughly addressed and clarified, conclude the process by writing up a final hypothesis report in markdown format. 38 | 39 | # Final hypothesis report format 40 | You must indicate the start of the report with "#FINAL REPORT#" (in all capital letters, this is critical to let a moderator know when your discussion is finished). ONLY WRITE #FINAL REPORT# IMMEDIATELY BEFORE WRITING THE REPORT. If still in discussion simply refer to it as the "final report", without caps and without the hashtags. The report should be written in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. 41 | 42 | 1. In the Hypothesis section, state the final self-contained hypothesis agreed upon by the group. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes. 43 | 2. In the Falsifiable Predictions section, make a list of self-contained predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Later, another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 44 | 3. In the Assumptions section, make a list of self-contained assumptions that are implicit or explicit in your hypothesis. 45 | 46 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections. 47 | 48 | #BEGIN TRANSCRIPT# 49 | {{ transcript }} 50 | #END TRANSCRIPT# 51 | 52 | Your Turn: -------------------------------------------------------------------------------- /coscientist/prompts/deep_verification.md: -------------------------------------------------------------------------------- 1 | You are a scientific hypothesis verifier tasked with conducting a deep verification of hypotheses proposed by other scientists. You are an expert in methodical analysis and critical thinking. 2 | 3 | # Goal 4 | To thoroughly evaluate the scientific validity, logical consistency, and empirical support for the provided hypothesis by examining its provided reasoning and assumptions. Do not be unnecessarily charitable in your assessment. Scientific progress requires rigorous verification, and identifying weaknesses is as valuable as confirming strengths. Effective verification must be systematic, objective, and detailed. 5 | 6 | # Hypothesis to verify 7 | {{ hypothesis }} 8 | 9 | # Causal reasoning to evaluate 10 | {{ reasoning }} 11 | 12 | # Reviewed assumptions to assess 13 | {{ assumption_research }} 14 | 15 | # Instructions 16 | 1. Examine the core hypothesis for scientific validity, specificity, and testability. 17 | 2. Analyze the provided causal reasoning for logical consistency, gaps, and potential fallacies. 18 | 3. Write a summary evaluation of whether the assumptions and sub-assumptions on which the hypothesis rests are overall well-founded and supported by research. Identify the weakest ones. 19 | 4. Conclude by highlighting strengths and weaknesses undercovered during this reflection process. Suggest areas for refinement. Do not pass a final judgement. 20 | 21 | ## Tone 22 | Your response should: 23 | - Maintain scientific objectivity and intellectual rigor 24 | - Be direct about weaknesses without being dismissive 25 | - Use clear, precise language appropriate for scientific discourse 26 | -------------------------------------------------------------------------------- /coscientist/prompts/desk_reject.md: -------------------------------------------------------------------------------- 1 | You are an expert in scientific hypothesis evaluation. Your task is to analyze a hypothesis and determine if it is correct, novel, and high-quality. 2 | 3 | # Instructions 4 | 5 | 1. Correctness: Assess if the hypothesis is consistent with your extensive knowledge of the field. Your primary concern is plausibility the hypothesis itself may be speculative and unproven. 6 | 2. Novelty: Assess if the hypothesis is a meaningfully new idea. 7 | 3. Quality: A high-quality hypothesis is well-motivated, clear, concise, and scientifically sound. 8 | 9 | Provide your reasoning for each of the three criteria. We these evaluations are complete, conclude by writing either "FINAL EVALUATION: PASS" or "FINAL EVALUATION: FAIL" (in all capital letters): To pass, the hypothesis must receive a pass rating for each of the three criteria. Do not write anything after the final evaluation. 10 | 11 | # Hypothesis to evaluate 12 | {{ hypothesis }} -------------------------------------------------------------------------------- /coscientist/prompts/evolve_from_feedback.md: -------------------------------------------------------------------------------- 1 | You are an expert in scientific research and epistemic iteration. Your task is to refine the provided hypothesis to address feedback from other scientists, while ensuring the revised concept retains novelty, logical coherence, alignment with the research goal, and its original intent. Your refined hypothesis will compete in a tournament with other hypotheses to select the best one, try hard to win! 2 | 3 | # Goal 4 | {{ goal }} 5 | 6 | # Original Hypothesis 7 | {{ hypothesis }} 8 | 9 | # Reviewer Feedback 10 | {{ verification_result }} 11 | 12 | # Competitive Intelligence 13 | {{ meta_review }} 14 | 15 | # Instructions 16 | 1. Critically evaluate the original hypothesis, reviewer feedback, and your competitive intelligence. The competitive intelligence is a meta-review of the tournament, and it will help you understand the strengths and weaknesses of the other hypotheses against which you will compete. 17 | 2. Suggest concrete improvements and refinements to address identified weaknesses while retaining strengths of the original concept. Improvements should address reviewer comments in addition to: 18 | - Improving detail and specificity 19 | - Clearing away dubious assumptions 20 | - Increasing utility, practicality, and feasibility 21 | - Avoiding the pitfalls of other hypotheses in the tournament 22 | 3. Conclude your response by selecting the best refinement and writing a final hypothesis report in the format detailed below. 23 | 4. Remember that your purpose is to make the existing hypothesis as competitive as possible, not to come up with something completely new one. 24 | 25 | # Final hypothesis report format 26 | You must indicate the start of the report with "#FINAL REPORT#" (in all capital letters). The report must be written in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. 27 | 28 | 1. In the Hypothesis section, state the final self-contained hypothesis. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes without referencing the original hypothesis. 29 | 2. In the Falsifiable Predictions section, make a list of self-contained predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Later, another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 30 | 3. In the Assumptions section, make a list of self-contained assumptions that are implicit or explicit in your hypothesis. 31 | 32 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections. -------------------------------------------------------------------------------- /coscientist/prompts/final_report.md: -------------------------------------------------------------------------------- 1 | You are an expert in scientific research communication. Write a comprehensive research overview of a scientific discovery process revolving around a research goal. 2 | 3 | # Goal 4 | {{ goal }} 5 | 6 | # All hypotheses by ranking 7 | {{ hypotheses_by_ranking }} 8 | 9 | # Detailed information for top ranked hypotheses 10 | {{ top_ranked_hypotheses }} 11 | 12 | # Instructions 13 | 14 | Write a comprehensive scientific research report in markdown format that synthesizes the research discovery process. The report should be professional, well-structured, and targeted at domain experts. 15 | 16 | ## Report Structure 17 | 18 | ### 1. Executive Summary 19 | - Provide a concise overview (3-4 paragraphs) of the research goal and discovery process 20 | - Identify and briefly describe the main research directions that were explored (based on the semantic groupings of hypotheses) 21 | - Highlight the most promising findings and their potential significance 22 | - State the key conclusions and recommendations for future research 23 | 24 | ### 2. Research Directions Explored 25 | - Analyze all the hypotheses to identify distinct research directions or themes 26 | - For each major direction: 27 | - Describe the underlying scientific rationale 28 | - Explain how this direction relates to and addresses the overall research goal 29 | - Summarize the key insights and thinking from hypotheses in this group 30 | 31 | ### 3. Top-Ranked Hypotheses Analysis 32 | For each hypothesis in the top-ranked list: 33 | - **Hypothesis Statement**: Clearly state the hypothesis 34 | - **Scientific Rationale**: Summarize the reasoning and evidence supporting this hypothesis 35 | - **Experimental Design**: Propose specific, feasible experiments to test or falsify the hypothesis 36 | - Include experimental methodology, key variables to measure, and expected outcomes 37 | - Consider both positive and negative controls where applicable 38 | - **Potential Impact**: Explain the implications if this hypothesis is confirmed or refuted 39 | 40 | ### 4. Conclusions and Future Directions 41 | - Synthesize the overall findings and their significance for the research goal 42 | - Identify the most promising hypotheses and research directions for continued investigation 43 | - Discuss potential challenges and limitations in the current approach 44 | - Recommend specific next steps for advancing the research 45 | - Consider broader implications for the field and potential applications 46 | 47 | ## Writing Guidelines 48 | - Use clear, precise scientific language appropriate for a research report 49 | - Include proper markdown formatting with headers, bullet points, and emphasis where appropriate 50 | - Maintain objectivity while highlighting the most significant findings 51 | - Ensure logical flow between sections with appropriate transitions 52 | - Include specific details from the provided data while maintaining readability 53 | - Aim for approximately 2000-3000 words total 54 | -------------------------------------------------------------------------------- /coscientist/prompts/independent_generation.md: -------------------------------------------------------------------------------- 1 | You are a member of a team of scientists tasked with formulating creative and falsifiable scientific hypothesis. You are a specialist in {{ field }} and you approach problems through this lens. {{ reasoning_type }} 2 | 3 | # Goal 4 | {{ goal }} 5 | 6 | # Criteria 7 | A strong hypothesis must be novel, robust, and falsifiable. It must also be specific and clear to domain experts, who will analyze and critique your proposals. 8 | 9 | # Review of relevant literature 10 | {{ literature_review }} 11 | 12 | # Additional Notes (optional) 13 | A panel of reviewers may have put together a meta-analysis of previously proposed hypotheses, highlighting common strengths and weaknesses. When available, you can use this to inform your contributions: 14 | {{ meta_review }} 15 | 16 | # Instructions 17 | 1. State a hypothesis that addresses the research goal and criteria while staying grounded in evidence from literature and feedback from reviewers. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes. 18 | 2. Make a list of self-contained falsifiable predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 19 | 3. Make a list of self-contained assumptions that are implicit or explicit in your hypothesis. 20 | 21 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. 22 | 23 | # Output Format 24 | Structure your response in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections. -------------------------------------------------------------------------------- /coscientist/prompts/meta_review_tournament.md: -------------------------------------------------------------------------------- 1 | You are an expert in scientific research and meta-analysis. Synthesize a comprehensive meta-review of provided reviews pertaining to the following research goal. 2 | 3 | # Instructions 4 | * Generate a structured meta-analysis report of the provided reviews. 5 | * Focus on identifying: 6 | - Common strengths across highly-rated hypotheses and recurring themes in successful arguments 7 | - Recurring weaknesses, critique points, and common issues raised by reviewers 8 | - Common evaluation criteria being emphasized 9 | - Bias patterns in review processes 10 | * The generated meta-analysis should provide actionable insights for researchers developing future proposals. 11 | * Refrain from evaluating individual proposals or reviews; focus on producing a synthesized meta-analysis. 12 | 13 | # Goal 14 | {{ goal }} 15 | 16 | # Hypothesis and Elo ratings 17 | {{ ratings }} 18 | 19 | # Provided reviews for meta-analysis 20 | {{ debates }} -------------------------------------------------------------------------------- /coscientist/prompts/observation_reflection.md: -------------------------------------------------------------------------------- 1 | You are an expert in scientific hypothesis evaluation. Your task is to analyze the 2 | relationship between a provided hypothesis and observations from a scientific article. 3 | Specifically, determine if the hypothesis provides a novel causal explanation 4 | for the observations, or if they contradict it. 5 | 6 | Instructions: 7 | 8 | 1. Observation extraction: list relevant observations from the article. 9 | 2. Causal analysis (individual): for each observation: 10 | a. State if its cause is already established. 11 | b. Assess if the hypothesis could be a causal factor (hypothesis => observation). 12 | c. Start with: "would we see this observation if the hypothesis was true:". 13 | d. Explain if it’s a novel explanation. If not, or if a better explanation exists, 14 | state: "not a missing piece." 15 | 3. Causal analysis (summary): determine if the hypothesis offers a novel explanation 16 | for a subset of observations. Include reasoning. Start with: "would we see some of 17 | the observations if the hypothesis was true:". 18 | 4. Disproof analysis: determine if any observations contradict the hypothesis. 19 | Start with: "does some observations disprove the hypothesis:". 20 | 5. Conclusion: state: "hypothesis: ". 22 | 23 | Scoring: 24 | * Already explained: hypothesis consistent, but causes are known. No novel explanation. 25 | * Other explanations more likely: hypothesis *could* explain, but better explanations exist. 26 | * Missing piece: hypothesis offers a novel, plausible explanation. 27 | * Neutral: hypothesis neither explains nor is contradicted. 28 | * Disproved: observations contradict the hypothesis. 29 | 30 | Important: if observations are expected regardless of the hypothesis, and don’t disprove it, 31 | it’s neutral. 32 | 33 | Article: 34 | {article} 35 | 36 | Hypothesis: 37 | {hypothesis} 38 | 39 | Response {provide reasoning. end with: "hypothesis: ".) -------------------------------------------------------------------------------- /coscientist/prompts/out_of_the_box.md: -------------------------------------------------------------------------------- 1 | You are an expert researcher tasked with generating a novel, singular hypothesis inspired by analogous elements from provided concepts. 2 | 3 | # Goal 4 | {{ goal }} 5 | 6 | # Concepts 7 | Inspiration may be drawn from the following concepts (utilize analogy and inspiration, not direct replication): 8 | {{ hypotheses }} 9 | 10 | # Instructions 11 | 1. Provide a concise introduction to the relevant scientific domain. 12 | 2. Summarize recent findings and pertinent research, highlighting successful approaches. 13 | 3. Identify promising avenues for exploration that may yield innovative hypotheses. 14 | 4. Develop a detailed, original, and specific single hypothesis for achieving the stated goal, leveraging analogous principles from the provided ideas. This should not be a mere aggregation of existing methods or entities. Think out-of-the-box. 15 | 5. Conclude your response by selecting the best refinement and writing a final hypothesis report in the format detailed below. 16 | 17 | # Final hypothesis report format 18 | You must indicate the start of the report with "#FINAL REPORT#" (in all capital letters). The report must be written in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. 19 | 20 | 1. In the Hypothesis section, state the final self-contained hypothesis. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes without explicitly referencing the original concepts. 21 | 2. In the Falsifiable Predictions section, make a list of self-contained predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Later, another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 22 | 3. In the Assumptions section, make a list of self-contained assumptions that are implicit or explicit in your hypothesis. 23 | 24 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections. -------------------------------------------------------------------------------- /coscientist/prompts/research_config.md: -------------------------------------------------------------------------------- 1 | You are an expert scientific communicator and researcher. You are tasked with assisting a scientist in clarifying and solidifying a research goal through an interactive conversation. This refined research goal will be given to a scientific research agent that will propose hypotheses, review literature, and write a final report. 2 | 3 | # Suggested Goal for Refinement 4 | {{ goal }} 5 | 6 | # Instructions 7 | 8 | 1. Analyze the scientist's goal and restate it in your own words to ensure understanding. 9 | 2. Evaluate whether the goal is clear, specific, and well-formulated. 10 | 3. If the goal needs clarification or refinement: 11 | - Ask specific clarifying questions to better understand the scientist's intent 12 | - Suggest improvements or refinements 13 | - Wait for the scientist's response before proceeding 14 | 4. If the goal is already clear and well-formulated, suggest any minor improvements if needed. 15 | 5. Continue this interactive process until the scientist confirms they are satisfied with the refined goal. 16 | 6. Once the scientist confirms satisfaction, conclude by writing "FINAL GOAL:" (in all capital letters) followed by the final refined goal statement. Do not include any text or comments after the final goal statement. 17 | 7. Try to maintain the style of the original goal. If it is a question, keep it as a question. If it is a statement, keep it as a statement. And if it is phrased as a goal, keep that phrasing. -------------------------------------------------------------------------------- /coscientist/prompts/simulated_debate.md: -------------------------------------------------------------------------------- 1 | You are an expert in comparative analysis, engaging with a panel of domain experts in a structured discussion to evaluate two competing hypotheses. The objective is to rigorously determine which hypothesis is superior based on a predefined set of attributes and criteria. The experts possess no pre-existing biases toward either hypothesis and are solely focused on identifying the optimal choice because only one can be implemented. 2 | 3 | # Procedure 4 | If initiating the discussion from a blank transcript, begin with a concise summary of both hypotheses and their respective initial reviews, and then write a few pro and con arguments for each. 5 | 6 | For subsequent contributions that continue an existing discussion: 7 | * Pose clarifying questions to address any ambiguities or uncertainties. 8 | * Critically evaluate each hypothesis in relation to the stated research goal. This evaluation should consider aspects such as: 9 | - Potential for correctness/validity. 10 | - Utility and practical applicability. 11 | - Sufficiency of detail and specificity. 12 | - Novelty and originality. 13 | - Desirability for implementation. 14 | * Identify and articulate any weaknesses, limitations, or potential flaws in either hypothesis. 15 | * Do not be unnecessarily charitable in your assessments of either hypothesis. Scientific progress requires rigor. We're seeking the truth and have limited resources to chase unproductive leads. 16 | * Exhibit boldness and creativity in your contributions. 17 | * Maintain a helpful and collaborative approach. 18 | * Consider the reviews of the hypotheses but remember that absence of evidence is not evidence of absence. 19 | 20 | Once the discussion has reached a point of sufficient depth (typically 3-5 turns, up to 10 turns) and all relevant questions, concerns, and arguments have been thoroughly addressed, provide a consensus judgment for the better hypothesis. The judgment should succinctly state the rationale for the selection. Conclude with the phrase "WINNER: <1 or 2>" (in all capital letters), denoting the id of the superior hypothesis. Write nothing after this declaration. 21 | 22 | # Research goal for hypotheses 23 | {{ goal }} 24 | 25 | ## Hypothesis 1 26 | {{ hypothesis_1 }} 27 | 28 | ## Hypothesis 2 29 | {{ hypothesis_2 }} 30 | 31 | ## Review of hypothesis 1 32 | {{ review_1 }} 33 | 34 | ## Review of hypothesis 2 35 | {{ review_2 }} 36 | 37 | #BEGIN TRANSCRIPT# 38 | {{ transcript }} 39 | #END TRANSCRIPT# 40 | 41 | Your Turn: -------------------------------------------------------------------------------- /coscientist/prompts/supervisor_decision.md: -------------------------------------------------------------------------------- 1 | You are the **Supervisor Agent** for the Coscientist multi-agent research system. Your role is to analyze the current state of the research process and decide what actions to take next to advance scientific hypothesis generation, evaluation, and refinement. 2 | 3 | # Research Goal 4 | {{ goal }} 5 | 6 | # Research Meta Reviews 7 | Here are the two latest meta reviews of the research process. Use them to understand whether progress is continuing or leveling off. 8 | 9 | ## Latest Meta Review 10 | {{ meta_review }} 11 | 12 | ## Previous Meta Review 13 | {{ previous_meta_review }} 14 | 15 | # Available Actions 16 | You may choose from the following actions: 17 | 1. generate_new_hypotheses - Create new hypotheses through independent or collaborative generation. Perform this action to increase diversity and explore new research directions. 18 | 2. evolve_hypotheses - Refine and improve existing hypotheses based on feedback and rankings. Perform this action to improve the quality of existing hypotheses in existing research directions. 19 | 3. expand_literature_review - Broaden the literature review to cover new research directions. Perform this action to explore the literature for new ideas. 20 | 4. run_tournament - Rank unranked hypotheses through scientific debate and comparison. Perform this action to rank the hypotheses and determine which ones are the most promising. 21 | 5. run_meta_review - Review all the evaluations and debates that have happened in the tournament so far. Perform this action to synthesize strengths and weaknesses of existing hypotheses. This will inform the generation and evolution of new hypotheses. 22 | 6. finish - Complete the research process and generate a final report. Finish when the research process seems to be making diminishing returns based on the meta-review, changes in Elo ratings 23 | 24 | # Current System Statistics 25 | **Total actions taken:** {{ total_actions }} 26 | **Latest actions (most recent first):** {{ latest_actions }} 27 | 28 | ## Hypothesis Inventory 29 | These statistics are updated after hypothesis generation, evolution, and tournament running. 30 | - **Total Hypotheses (including unranked):** {{ total_hypotheses }} 31 | - **Unranked Hypotheses:** {{ num_unranked_hypotheses }} 32 | 33 | ## Meta-Review History 34 | These statistics are updated after each meta-review. 35 | - **Number of Meta-Reviews Completed:** {{ num_meta_reviews }} 36 | - **Newly Ranked Hypotheses Since Last Meta-Review:** {{ new_hypotheses_since_meta_review }} 37 | 38 | ## Tournament Trajectory 39 | These statistics are updated after each tournament run. 40 | - **Total matches played:** {{ total_matches_played }} 41 | - **Total tournaments played:** {{ total_rounds_played }} 42 | - **Current Top 3 Elo Ratings:** {{ top_3_elo_ratings }} 43 | - **Max Elo Rating Per Tournament (most recent first):** {{ max_elo_rating }} 44 | - **Count of Elo Ratings over 1400 Per Tournament (most recent first):** {{ num_elo_ratings_over_1400 }} 45 | - **Median Elo Rating Per Tournament (most recent first):** {{ median_elo_rating }} 46 | 47 | ## Quality & Diversity Metrics 48 | These statistics are updated after every hypothesis generation and evolution. 49 | - **Average pairwise cosine similarity of hypotheses:** {{ cosine_similarity_trajectory }} 50 | - **Number of distinct hypothesis clusters:** {{ cluster_count_trajectory }} 51 | 52 | ## Literature Review Status 53 | These statistics are updated after each literature review. 54 | - **Literature Review Subtopics Completed:** {{ literature_review_subtopics_completed }} 55 | 56 | # Decision-Making Framework 57 | **Consider recent actions:** Review the latest actions to avoid repeating the same action too frequently and to understand the current research trajectory. 58 | 59 | ## When to generate_new_hypotheses: 60 | - Total hypotheses < 8-10 (insufficient exploration) 61 | - Average cosine similarity score is high (>0.85) indicating hypotheses are too similar 62 | - All current hypotheses have poor performance (median Elo < 1300) 63 | 64 | ## When to evolve_hypotheses: 65 | - Have 4+ hypotheses with strong performance (Elo > 1300) 66 | - Sufficient diversity exists to avoid over-optimization (average cosine similarity score <0.85) 67 | - Meta-review suggests promising directions worth refining 68 | 69 | ## When to run_tournament: 70 | - Several unranked hypotheses exist (>4) 71 | - Before deciding to finish 72 | 73 | ## When to run_meta_review: 74 | - At least 4+ new hypotheses ranked since last meta-review 75 | - Always if there are 10 or more new hypotheses since last meta-review 76 | - Before major strategic decisions (literature expansion, evolution, finishing) 77 | - Performance plateau suggests need for strategic insight 78 | 79 | ## When to expand_literature_review: 80 | - Meta-review identifies significant and persistent knowledge gaps 81 | - Current hypotheses cluster around limited research approaches (few distinct clusters) 82 | - Similarity score remains high despite multiple generation attempts 83 | - Never when there are 20+ subtopics currently in the literature review 84 | 85 | ## When to finish: 86 | - At least 3+ high-quality hypotheses (Elo > 1400) identified 87 | - Diminishing returns evident (trajectory shows max/median Elo plateauing over last 3+ meta-reviews) 88 | - Research goal appears sufficiently addressed 89 | - The most recent action must have been `run_meta_review` 90 | 91 | # Strategic Considerations 92 | ## Exploration vs. Exploitation Balance: 93 | - **Early Stage (< 12 hypotheses):** Prioritize exploration through generation and literature expansion 94 | - **Mid Stage (12-25 hypotheses):** Balance generation with evolution of promising candidates 95 | - **Late Stage (25+ hypotheses):** Focus on evolution of top performers 96 | 97 | ## Key Decision Factors: 98 | - **Diversity:** Use cosine similarity and cluster count trajectories to assess if diversity efforts are working 99 | - **Quality:** Analyze Elo trajectories to detect plateaus, improvements, or declines 100 | - **Momentum:** Look for patterns in recent actions and avoid repetitive sequences 101 | 102 | # Output Format 103 | Provide your decision in the following structured format: 104 | 105 | ``` 106 | DECISION: [chosen_action] 107 | 108 | REASONING: 109 | - Primary factors influencing this decision 110 | - Key metrics that support this choice 111 | - Strategic rationale for timing 112 | ``` 113 | 114 | # Important Notes 115 | - **Always justify your decision** with specific reference to the current state metrics 116 | - **Consider the research workflow holistically** - don't optimize for single metrics 117 | - **Balance exploration and exploitation** based on the research stage 118 | - **Monitor for diminishing returns** and know when to conclude 119 | - **Prioritize scientific rigor** over speed or efficiency alone 120 | 121 | Choose the single most appropriate action based on the current state and provide your structured decision. 122 | -------------------------------------------------------------------------------- /coscientist/prompts/top_hypotheses_review.md: -------------------------------------------------------------------------------- 1 | You are creating a comprehensive research overview for a human scientist. 2 | 3 | # Instructions 4 | * Create a structured research overview that includes: 5 | - Executive Summary 6 | - Key Hypotheses and Their Strengths 7 | - Recommended Next Steps for Experimental Validation 8 | - Identified Knowledge Gaps 9 | - Risk Assessment and Mitigation Strategies 10 | * Format this as a professional research report suitable for grant applications or research planning. 11 | 12 | # Research Goal 13 | {{ goal }} 14 | 15 | # Top-Ranked Hypotheses 16 | {{ top_hypotheses }} 17 | 18 | # Reviews of Top Hypotheses 19 | {{ reviews }} -------------------------------------------------------------------------------- /coscientist/prompts/topic_decomposition.md: -------------------------------------------------------------------------------- 1 | You are a senior research strategist known for designing rigorous, unbiased study programs. 2 | 3 | # Task 4 | Decompose the following research goal into a set of **focused, researchable subtopics** that can each be independently investigated through literature review. Each subtopic should be specific enough to generate a comprehensive literature review report. 5 | 6 | # Research goal 7 | {{ goal }} 8 | 9 | # Previously researched subtopics (if any) 10 | {{ subtopics }} 11 | 12 | # Meta-review to consider for finding research gaps (if any) 13 | {{ meta_review }} 14 | 15 | # Instructions 16 | 1. Read the research goal carefully, identifying every distinct concept or dimension it contains (mechanisms, variables, populations, methods, temporality, etc.). 17 | 2. If previously researched subtopics are provided, carefully review them to avoid duplicating already investigated areas. 18 | 3. If a meta-review is provided, analyze it to identify: 19 | - Research gaps or limitations mentioned 20 | - Areas flagged as under-explored or requiring further investigation 21 | - Novel angles or perspectives suggested for future research 22 | 4. Create focused subtopics that: 23 | - Are narrow enough for independent literature review 24 | - Are broad enough to yield substantial research findings 25 | - **Do not duplicate or significantly overlap with previously researched subtopics** 26 | - **Prioritize novel areas and research gaps identified in the meta-review** 27 | - Minimally overlap with each other 28 | - Collectively cover all aspects needed to meaningfully investigate the research goal with a well-informed perspective and evidence-grounded background. 29 | 5. Maintain neutrality: do not judge which subtopics are "more promising," and do not predict results. 30 | 6. Aim for at least one and no more than {{ max_subtopics }} total, use fewer if the research goal is narrow enough or existing subtopics are sufficient. 31 | 7. Present each subtopic as a what, where, when, or why question that needs to be answered in order to better understand the context of the research goal and create robust hypotheses and insights. The subtopic should only be 1-2 sentences long. If you feel that length is too short, that might be an indication that the subtopic is too broad and should be further decomposed. 32 | 33 | # Output format (markdown) 34 | ## Research Subtopics 35 | ### Subtopic 1 36 | [Focused research subtopic] 37 | 38 | ### Subtopic 2 39 | [Focused research subtopic] 40 | 41 | -------------------------------------------------------------------------------- /coscientist/prompts/tournament.md: -------------------------------------------------------------------------------- 1 | You are an expert evaluator tasked with comparing two hypotheses. 2 | 3 | # Instructions 4 | You will be given a research goal and two hypotheses. Each hypothesis includes an independent review. These reviews may contain numerical scores or confidence ratings. Disregard these scores and ratings in your comparative analysis, as they may not be directly comparable across reviews. Your task is to evaluate the two hypotheses and determine which one better addresses the research goal and adheres to the evaluation criteria (detailed in the next section). Your analysis should include: 5 | 6 | 1. An assessment of each hypothesis's adherence to the evaluation criteria. 7 | 2. A comparison of the two hypotheses' strengths and weaknesses. 8 | 3. A recommendation and concise rationale for the overall superior hypothesis. 9 | 10 | Conclude your response with the phrase "WINNER: <1 or 2>" (in all capital letters), denoting the id of the superior hypothesis, based on the outcome of your analysis. Write nothing after this declaration. 11 | 12 | # Evaluation Criteria 13 | Criteria ordered by importance: 14 | 1. Alignment with the research goal. Does the hypothesis address each aspect of the goal with directness and specificity? 15 | 2. Novelty. Is the hypothesis a trivial restatement of existing scientific knowledge or does it advance bring forward new insights? 16 | 3. Falsifiability. Is the hypothesis testable and could it be falsified with laboratory experiments or field observations? 17 | 4. Robustness. Does the hypothesis rely too heavily on one or a few improbable assumptions? 18 | 5. Consider the reviews of the hypotheses but remember that absence of evidence is not evidence of absence. 19 | 20 | # Research goal for hypotheses 21 | {{ goal }} 22 | 23 | ## Hypothesis 1 24 | {{ hypothesis_1 }} 25 | 26 | ## Hypothesis 2 27 | {{ hypothesis_2 }} 28 | 29 | ## Review of hypothesis 1 30 | {{ review_1 }} 31 | 32 | ## Review of hypothesis 2 33 | {{ review_2 }} 34 | 35 | Your reasoning and conclusion: -------------------------------------------------------------------------------- /coscientist/proximity_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Proximity agent 3 | -------------- 4 | - Calculates similarity between hypotheses and builds a graph 5 | """ 6 | 7 | import networkx as nx 8 | import numpy as np 9 | from langchain_openai import OpenAIEmbeddings 10 | from sklearn.metrics.pairwise import cosine_similarity 11 | 12 | from coscientist.custom_types import ParsedHypothesis 13 | 14 | 15 | def create_embedding(text: str, dimensions: int = 256) -> np.ndarray: 16 | """Create a vector embedding for a text.""" 17 | embeddings = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=dimensions) 18 | return np.array(embeddings.embed_query(text)) 19 | 20 | 21 | class ProximityGraph: 22 | """A graph of hypotheses and their similarity scores.""" 23 | 24 | def __init__(self): 25 | self.graph = nx.Graph() 26 | 27 | def add_hypothesis(self, hypothesis: ParsedHypothesis): 28 | """Add a hypothesis to the graph.""" 29 | embedding = create_embedding(hypothesis.hypothesis) 30 | self.graph.add_node( 31 | hypothesis.uid, hypothesis=hypothesis.hypothesis, embedding=embedding 32 | ) 33 | 34 | def _compute_weighted_edges( 35 | self, hypothesis_ids_x: list[int], hypothesis_ids_y: list[int] 36 | ): 37 | """Compute the weighted edges between two sets of hypotheses.""" 38 | embeddings_x = [self.graph.nodes[id]["embedding"] for id in hypothesis_ids_x] 39 | embeddings_y = [self.graph.nodes[id]["embedding"] for id in hypothesis_ids_y] 40 | similarities = cosine_similarity(embeddings_x, embeddings_y) 41 | # return similarities 42 | # Add the edges with weights to the graph 43 | for i, id_x in enumerate(hypothesis_ids_x): 44 | for j, id_y in enumerate(hypothesis_ids_y): 45 | if id_x == id_y: 46 | continue 47 | self.graph.add_edge(id_x, id_y, weight=similarities[i, j]) 48 | 49 | def update_edges(self): 50 | """ 51 | Finds all nodes without an edge and all nodes with an edge and 52 | computes the weighted edges between them. If no nodes have edges, 53 | it will compute the weighted edges between all nodes. 54 | """ 55 | # Hypothesis ids x are the nodes with degree greater than 0 56 | hypothesis_ids_x = [ 57 | node for node in self.graph.nodes if self.graph.degree(node) > 0 58 | ] 59 | hypothesis_ids_y = [ 60 | node for node in self.graph.nodes if self.graph.degree(node) == 0 61 | ] 62 | if len(hypothesis_ids_y) == 0: 63 | # Nothing to do, we're already up to date 64 | return 65 | elif len(hypothesis_ids_x) == 0: 66 | # No nodes with edges, compute all edges 67 | self._compute_weighted_edges(hypothesis_ids_y, hypothesis_ids_y) 68 | else: 69 | # Compute edges between nodes with and without edges 70 | self._compute_weighted_edges(hypothesis_ids_y, hypothesis_ids_y) 71 | self._compute_weighted_edges(hypothesis_ids_x, hypothesis_ids_y) 72 | 73 | def get_pruned_graph(self, min_weight: float = 0.85) -> nx.Graph: 74 | """Get a pruned graph with edges with weight less than min_weight removed.""" 75 | pruned_graph = self.graph.copy() 76 | edges_to_remove = [ 77 | (u, v) 78 | for u, v, d in pruned_graph.edges(data=True) 79 | if d["weight"] < min_weight 80 | ] 81 | pruned_graph.remove_edges_from(edges_to_remove) 82 | return pruned_graph 83 | 84 | def get_semantic_communities( 85 | self, resolution: float = 1.0, min_weight: float = 0.85 86 | ) -> list[set[int]]: 87 | """Get the partitions of the graph using the Louvain method.""" 88 | # Prune edges from the graph with weight less than min_weight 89 | pruned_graph = self.get_pruned_graph(min_weight) 90 | return nx.community.louvain_communities(pruned_graph, resolution=resolution) 91 | 92 | @property 93 | def average_cosine_similarity(self) -> float: 94 | """Get the average cosine similarity of the graph.""" 95 | return np.mean([d["weight"] for u, v, d in self.graph.edges(data=True)]).item() 96 | -------------------------------------------------------------------------------- /coscientist/reasoning_types.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ReasoningType(Enum): 5 | FIRST_PRINCIPLES = "You are a first principles thinker. You strip a problem down to its most basic truths and rebuild solutions from the ground up, questioning every assumption along the way." 6 | ANALOGY = "You are an analogical reasoner. You look for similar problems in different domains and use their solutions as blueprints to guide your approach." 7 | SYSTEMS = "You are a systems thinker. You examine how parts interact within the whole, identifying feedback loops, interdependencies, and emergent behavior to understand the broader dynamics." 8 | DEDUCTIVE = "You are a deductive reasoner. You begin with general rules or truths and apply them logically to specific cases to arrive at certain conclusions." 9 | INDUCTIVE = "You are an inductive thinker. You gather observations or data points and use them to infer general principles, patterns, or trends." 10 | ABDUCTIVE = "You are an abductive reasoner. You form the most plausible explanation from incomplete evidence, using intuition and inference to connect the dots." 11 | CAUSAL = "You are a causal thinker. You analyze relationships of cause and effect to understand why things happen and predict what will happen if conditions change." 12 | STATISTICAL = "You are a statistical thinker. You rely on data, probabilities, and trends to make reasoned judgments, especially under uncertainty or variability." 13 | COUNTERFACTUAL = "You are a counterfactual thinker. You explore alternative scenarios and 'what if' questions to assess outcomes, uncover dependencies, or guide future planning." 14 | HEURISTIC = "You are a heuristic thinker. You use experience-based rules of thumb to make fast, efficient decisions when time, information, or computational power is limited." 15 | -------------------------------------------------------------------------------- /coscientist/research_plan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates a research plan from the user's query. Must: 3 | 4 | 1. Align with the research goal 5 | 2. Be plausible and consistent with existing research, or justify why not 6 | 3. Be novel 7 | 4. Be testable with user-provided resources/constraints 8 | 5. Be safe 9 | 10 | """ 11 | -------------------------------------------------------------------------------- /coscientist/researcher_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "RETRIEVER": "tavily", 3 | "EMBEDDING": "openai:text-embedding-3-small", 4 | "SIMILARITY_THRESHOLD": 0.42, 5 | "FAST_LLM": "google_genai:gemini-2.5-flash", 6 | "SMART_LLM": "anthropic:claude-sonnet-4-20250514", 7 | "STRATEGIC_LLM": "openai:o3-mini", 8 | "FAST_TOKEN_LIMIT": 3000, 9 | "SMART_TOKEN_LIMIT": 6000, 10 | "STRATEGIC_TOKEN_LIMIT": 4000, 11 | "BROWSE_CHUNK_MAX_LENGTH": 8192, 12 | "CURATE_SOURCES": false, 13 | "SUMMARY_TOKEN_LIMIT": 700, 14 | "TEMPERATURE": 0.4, 15 | "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", 16 | "MAX_SEARCH_RESULTS_PER_QUERY": 5, 17 | "MEMORY_BACKEND": "local", 18 | "TOTAL_WORDS": 1200, 19 | "REPORT_FORMAT": "APA", 20 | "MAX_ITERATIONS": 3, 21 | "AGENT_ROLE": null, 22 | "SCRAPER": "bs", 23 | "MAX_SCRAPER_WORKERS": 15, 24 | "MAX_SUBTOPICS": 3, 25 | "LANGUAGE": "english", 26 | "REPORT_SOURCE": "web", 27 | "DOC_PATH": "./my-docs", 28 | "PROMPT_FAMILY": "default", 29 | "LLM_KWARGS": {"max_tokens": 20000}, 30 | "EMBEDDING_KWARGS": {}, 31 | "VERBOSE": false, 32 | "DEEP_RESEARCH_BREADTH": 3, 33 | "DEEP_RESEARCH_DEPTH": 2, 34 | "DEEP_RESEARCH_CONCURRENCY": 4, 35 | "MCP_SERVERS": [], 36 | "MCP_AUTO_TOOL_SELECTION": true, 37 | "MCP_ALLOWED_ROOT_PATHS": [], 38 | "MCP_STRATEGY": "fast", 39 | "REASONING_EFFORT": "medium" 40 | } -------------------------------------------------------------------------------- /coscientist/supervisor_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Supervisor agent 3 | ---------------- 4 | - Analyzes the current state of the research process 5 | - Decides what actions to take next to advance scientific hypothesis 6 | generation, evaluation, and refinement 7 | - Uses strategic decision-making framework to balance exploration vs exploitation 8 | 9 | More details: 10 | - Takes in comprehensive system statistics and meta-reviews 11 | - Makes strategic decisions about next steps in the research process 12 | - Balances between generating new hypotheses, evolving existing ones, 13 | running tournaments, expanding literature review, or finishing 14 | - Considers quality metrics, diversity metrics, and research momentum 15 | """ 16 | 17 | import re 18 | from typing import TypedDict 19 | 20 | from langchain_core.language_models.chat_models import BaseChatModel 21 | from langgraph.graph import END, StateGraph 22 | 23 | from coscientist.common import load_prompt 24 | 25 | 26 | class SupervisorDecisionState(TypedDict): 27 | """ 28 | State for the supervisor decision agent. 29 | """ 30 | 31 | goal: str 32 | meta_review: str 33 | previous_meta_review: str 34 | total_actions: int 35 | latest_actions: str 36 | total_hypotheses: int 37 | num_unranked_hypotheses: int 38 | num_meta_reviews: int 39 | new_hypotheses_since_meta_review: int 40 | total_matches_played: int 41 | total_rounds_played: int 42 | top_3_elo_ratings: str 43 | max_elo_rating: str 44 | num_elo_ratings_over_1400: str 45 | median_elo_rating: str 46 | cosine_similarity_trajectory: str 47 | cluster_count_trajectory: str 48 | literature_review_subtopics_completed: int 49 | action: str 50 | decision_reasoning: str 51 | 52 | 53 | def build_supervisor_agent(llm: BaseChatModel) -> StateGraph: 54 | """ 55 | Builds and configures a LangGraph for supervisor decision-making. 56 | 57 | Parameters 58 | ---------- 59 | llm : BaseChatModel 60 | The language model to use for supervisor decisions. 61 | 62 | Returns 63 | ------- 64 | StateGraph 65 | A compiled LangGraph for the supervisor agent. 66 | """ 67 | graph = StateGraph(SupervisorDecisionState) 68 | 69 | graph.add_node( 70 | "supervisor_decision", 71 | lambda state: _supervisor_decision_node(state, llm), 72 | ) 73 | 74 | graph.add_edge("supervisor_decision", END) 75 | graph.set_entry_point("supervisor_decision") 76 | return graph.compile() 77 | 78 | 79 | def _parse_supervisor_response(response: str) -> tuple[str, str]: 80 | """ 81 | Parse the structured supervisor response to extract action and reasoning. 82 | 83 | Expected format: 84 | DECISION: [chosen_action] 85 | 86 | REASONING: 87 | - Primary factors influencing this decision 88 | - Key metrics that support this choice 89 | - Strategic rationale for timing 90 | 91 | Parameters 92 | ---------- 93 | response : str 94 | The raw response from the LLM 95 | 96 | Returns 97 | ------- 98 | tuple[str, str] 99 | A tuple of (action, decision_reasoning) 100 | """ 101 | # Extract action from DECISION line 102 | decision_match = re.search(r"DECISION:\s*(.+)", response, re.IGNORECASE) 103 | action = decision_match.group(1).strip() if decision_match else "" 104 | 105 | # Extract reasoning section 106 | reasoning_match = re.search( 107 | r"REASONING:\s*(.*)", response, re.IGNORECASE | re.DOTALL 108 | ) 109 | decision_reasoning = reasoning_match.group(1).strip() if reasoning_match else "" 110 | 111 | return action, decision_reasoning 112 | 113 | 114 | def _supervisor_decision_node( 115 | state: SupervisorDecisionState, 116 | llm: BaseChatModel, 117 | ) -> SupervisorDecisionState: 118 | """ 119 | Supervisor decision node that analyzes system state and decides next action. 120 | """ 121 | prompt = load_prompt( 122 | "supervisor_decision", 123 | goal=state["goal"], 124 | meta_review=state["meta_review"], 125 | previous_meta_review=state["previous_meta_review"], 126 | total_actions=state["total_actions"], 127 | latest_actions=state["latest_actions"], 128 | total_hypotheses=state["total_hypotheses"], 129 | num_unranked_hypotheses=state["num_unranked_hypotheses"], 130 | num_meta_reviews=state["num_meta_reviews"], 131 | new_hypotheses_since_meta_review=state["new_hypotheses_since_meta_review"], 132 | total_matches_played=state["total_matches_played"], 133 | total_rounds_played=state["total_rounds_played"], 134 | top_3_elo_ratings=state["top_3_elo_ratings"], 135 | max_elo_rating=state["max_elo_rating"], 136 | num_elo_ratings_over_1400=state["num_elo_ratings_over_1400"], 137 | median_elo_rating=state["median_elo_rating"], 138 | cosine_similarity_trajectory=state["cosine_similarity_trajectory"], 139 | cluster_count_trajectory=state["cluster_count_trajectory"], 140 | literature_review_subtopics_completed=state[ 141 | "literature_review_subtopics_completed" 142 | ], 143 | ) 144 | 145 | response_content = llm.invoke(prompt).content 146 | action, decision_reasoning = _parse_supervisor_response(response_content) 147 | return {**state, "action": action, "decision_reasoning": decision_reasoning} 148 | -------------------------------------------------------------------------------- /notebooks/coscientist.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7db78a54-5e6c-4b1f-9504-12dd82de59a8", 6 | "metadata": {}, 7 | "source": [ 8 | "# Example Usage" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "0774df25-40e8-41cc-89a2-3ca4bf7bc66b", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "%load_ext autoreload\n", 19 | "%autoreload 2" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "id": "61f3bc36-20c4-4f35-b53b-42e02ff4b4e3", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "from coscientist.framework import CoscientistConfig, CoscientistFramework\n", 30 | "from coscientist.global_state import CoscientistState, CoscientistStateManager" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "f06d0b82-dc37-4958-9b15-4bd4027a3c68", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "goal = \"How does the gut microbiome influence rheumatoid arthritis and can probiotics help to mitigate symptoms? If so, which ones are promising?\"\n", 41 | "initial_state = CoscientistState(goal=goal)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "eed3f916-bdc6-434a-b046-423a05ca360b", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "config = CoscientistConfig()\n", 52 | "state_manager = CoscientistStateManager(initial_state)\n", 53 | "cosci = CoscientistFramework(config, state_manager)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "a759291e-a22b-4027-82a9-a82619a425ea", 60 | "metadata": { 61 | "scrolled": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "final_report, final_meta_review = await cosci.run()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "a3e08ffb-a4da-4c24-9be4-afb7a4913ba7", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3 (ipykernel)", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.13.2" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 5 98 | } 99 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2>=3.1.2 2 | networkx>=3.5 3 | scikit-learn>=1.7.0 4 | typing-extensions>=4.5.0 5 | gpt-researcher @ git+https://github.com/assafelovic/gpt-researcher@v3.3.0 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open("README.md", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="open-coscientist-agents", 8 | version="0.0.1", 9 | author="conradry", 10 | author_email="", # Add your email if you want to include it 11 | description="Implementation of multi-agent system for AI co-scientist", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/conradry/open-coscientist-agents", 15 | packages=find_packages(), 16 | classifiers=[ 17 | "Development Status :: 3 - Alpha", 18 | "Intended Audience :: Science/Research", 19 | "License :: OSI Approved :: MIT License", 20 | "Operating System :: OS Independent", 21 | "Programming Language :: Python :: 3", 22 | "Programming Language :: Python :: 3.9", 23 | "Programming Language :: Python :: 3.10", 24 | "Programming Language :: Python :: 3.11", 25 | "Programming Language :: Python :: 3.12", 26 | "Programming Language :: Python :: 3.13", 27 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 28 | ], 29 | python_requires=">=3.9", 30 | install_requires=[ 31 | "langchain>=0.3.25", 32 | "langchain-community>=0.3.24", 33 | "langgraph>=0.4.7", 34 | "typing-extensions>=4.0.0", 35 | "ipython>=8.0.0", # For notebook support 36 | "gpt-researcher @ git+https://github.com/assafelovic/gpt-researcher@v3.3.0", 37 | "langchain-core>=0.3.65", 38 | "langchain-community>=0.3.2", 39 | "langchain-openai>=0.3.18", 40 | "langchain-anthropic>=0.3.15", 41 | "langchain-google-genai>=2.1.5", 42 | "networkx>=3.5", 43 | "scikit-learn>=1.7.0", 44 | ], 45 | extras_require={ 46 | "dev": [ 47 | "pytest>=7.0.0", 48 | "pytest-cov>=4.0.0", 49 | "black>=23.0.0", 50 | "isort>=5.0.0", 51 | "mypy>=1.0.0", 52 | "ruff>=0.0.1", 53 | "pre-commit>=3.0.0", 54 | ], 55 | "docs": [ 56 | "sphinx>=7.0.0", 57 | "sphinx-rtd-theme>=1.0.0", 58 | ], 59 | }, 60 | ) 61 | --------------------------------------------------------------------------------