├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── app
    ├── README_tournament_viewer.md
    ├── background.py
    ├── common.py
    ├── configuration_page.py
    ├── final_report_page.py
    ├── literature_review_page.py
    ├── meta_reviews_page.py
    ├── proximity_page.py
    ├── resume_page.py
    ├── supervisor_page.py
    ├── tournament_page.py
    ├── tournament_viewer.py
    └── viewer_requirements.txt
├── assets
    ├── agent_graph.png
    ├── app_demo.gif
    └── overview.png
├── coscientist
    ├── __init__.py
    ├── common.py
    ├── configuration_agent.py
    ├── custom_types.py
    ├── evolution_agent.py
    ├── final_report_agent.py
    ├── framework.py
    ├── generation_agent.py
    ├── global_state.py
    ├── literature_review_agent.py
    ├── meta_review_agent.py
    ├── multiturn.py
    ├── prompts
    │   ├── assumption_decomposer.md
    │   ├── cause_and_effect.md
    │   ├── collaborative_generation.md
    │   ├── deep_verification.md
    │   ├── desk_reject.md
    │   ├── evolve_from_feedback.md
    │   ├── final_report.md
    │   ├── independent_generation.md
    │   ├── meta_review_tournament.md
    │   ├── observation_reflection.md
    │   ├── out_of_the_box.md
    │   ├── research_config.md
    │   ├── simulated_debate.md
    │   ├── supervisor_decision.md
    │   ├── top_hypotheses_review.md
    │   ├── topic_decomposition.md
    │   └── tournament.md
    ├── proximity_agent.py
    ├── ranking_agent.py
    ├── reasoning_types.py
    ├── reflection_agent.py
    ├── research_plan.py
    ├── researcher_config.json
    └── supervisor_agent.py
├── notebooks
    └── coscientist.ipynb
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # Ruff stuff:
171 | .ruff_cache/
172 | 
173 | # PyPI configuration file
174 | .pypirc
175 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     # Ruff version.
 4 |     rev: v0.6.9
 5 |     hooks:
 6 |       # Run the linter.
 7 |       - id: ruff
 8 |         args: [--extend-select, I, --fix]
 9 |         name: ruff-check-imports
10 |       # Run the formatter.
11 |       - id: ruff-format
12 |         name: ruff-format 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Ryan Conrad
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🧪 Open CoScientist Agents
  2 | 
  3 | A comprehensive multi-agent system for AI-driven scientific discovery based on Google DeepMind's [AI co-scientist](https://arxiv.org/abs/2502.18864), built with LangGraph and [GPT Researcher](https://github.com/assafelovic/gpt-researcher). The aim is for this system to accelerate scientific research through collaborative AI agents that generate, critique, rank, and evolve scientific hypotheses using tournament-style competition.
  4 | 
  5 | This implementation uses `Gemini 2.5 Pro`, `Claude Sonnet 4`, and `o3` in collaboration and competition.
  6 | 
  7 | ![App Demo](assets/app_demo.gif)
  8 | 
  9 | ## Key Features
 10 | 
 11 | ### Multi-Agent Architecture
 12 | - **Literature Review Agent**: Systematically decomposes research goals and conducts comprehensive literature analysis
 13 | - **Generation Agents**: Create novel scientific hypotheses using multiple reasoning approaches
 14 | - **Reflection Agents**: Perform deep verification and causal reasoning analysis
 15 | - **Evolution Agents**: Refine and improve hypotheses based on feedback and competition
 16 | - **Meta-Review Agent**: Synthesizes insights across multiple research directions
 17 | - **Supervisor Agent**: Orchestrates the entire research workflow -- decides which actions to take next and when to finish the research.
 18 | - **Final Report Agent**: Generates comprehensive research summaries
 19 | 
 20 | ### Tournament-Style Hypothesis Competition
 21 | - **ELO Rating System**: Ranks hypotheses through head-to-head competitive analysis
 22 | - **Debate Transcripts**: Full records of why one hypothesis outperforms another
 23 | - **Win-Loss Statistics**: Track performance across multiple evaluation rounds
 24 | - **Hypothesis Evolution**: See how ideas improve through iterative refinement
 25 | 
 26 | ### Interactive Web Interface
 27 | - **Streamlit Dashboard**: Comprehensive visualization of research results
 28 | - **Real-time Monitoring**: Track research progress and agent activities
 29 | - **Hypothesis Explorer**: Deep dive into individual hypotheses and their reasoning
 30 | - **Tournament Viewer**: Analyze competitive dynamics between ideas
 31 | 
 32 | ## Installation
 33 | 
 34 | ### Prerequisites
 35 | - Python 3.12 or higher
 36 | - A boatload of API keys
 37 | 
 38 | ### Install from PyPI (Coming Soon)
 39 | ```bash
 40 | pip install open-coscientist-agents
 41 | ```
 42 | 
 43 | ### Install from Source
 44 | ```bash
 45 | git clone https://github.com/conradry/open-coscientist-agents.git
 46 | cd open-coscientist-agents
 47 | pip install -e .
 48 | ```
 49 | 
 50 | ## Configuration
 51 | 
 52 | ### Environment Variables
 53 | Set up your API keys for model providers:
 54 | ```bash
 55 | export OPENAI_API_KEY="your-openai-key"
 56 | export ANTHROPIC_API_KEY="your-anthropic-key"
 57 | export GOOGLE_API_KEY="your-google-key"
 58 | ```
 59 | 
 60 | Set up your API key for Tavily search:
 61 | ```bash
 62 | export TAVILY_API_KEY='your-api-key'
 63 | ```
 64 | 
 65 | Optional, but highly recommended for monitoring and debugging, set up API keys for LangSmith:
 66 | ```bash
 67 | export LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
 68 | export LANGSMITH_API_KEY="your-langsmith-api-key"
 69 | export LANGSMITH_PROJECT="your-langsmith-project"
 70 | ```
 71 | 
 72 | ### Web Interface
 73 | Launch the interactive dashboard:
 74 | ```bash
 75 | cd app
 76 | pip install -r viewer_requirements.txt
 77 | streamlit run tournament_viewer.py
 78 | ```
 79 | 
 80 | Features include:
 81 | - **Configuration Agent**: Set up research parameters
 82 | - **Literature Review**: Explore research foundation
 83 | - **Tournament Rankings**: View hypothesis competition results
 84 | - **Proximity Graph**: Semantic relationship visualization
 85 | - **Meta-Reviews**: Synthesized research insights
 86 | - **Supervisor Decisions**: Workflow orchestration logs
 87 | - **Final Report**: Comprehensive research summary
 88 | 
 89 | ### Start a research run in Python
 90 | ```python
 91 | import asyncio
 92 | from coscientist.framework import CoscientistConfig, CoscientistFramework
 93 | from coscientist.global_state import CoscientistState, CoscientistStateManager
 94 | 
 95 | goal = "How does the gut microbiome influence rheumatoid arthritis and can probiotics help to mitigate symptoms? If so, which ones are promising?"
 96 | initial_state = CoscientistState(goal=goal)
 97 | 
 98 | config = CoscientistConfig()
 99 | state_manager = CoscientistStateManager(initial_state)
100 | cosci = CoscientistFramework(config, state_manager)
101 | 
102 | final_report, final_meta_review = asyncio.run(cosci.run())
103 | ```
104 | 
105 | ## Performance & Scalability
106 | 
107 | In principle, this system can be easily scaled with asynchronous execution of many tasks. In practice, API rate limits make it difficult to run in parallel. Future work will explore ways to get around this by smartly allocating work to different providers.
108 | 
109 | Currently designed to work with 20-30 hypotheses in a tournament. Scaling that to more will require optimizations like smarter prioritization of head-to-head matches, summarizing context to make meta-review tractable, and actually supporting asynchronous execution.
110 | 
111 | 
112 | ## Caveats and sharp edges
113 | 
114 | - The system isn't fully configurable and there are fields that are hardcoded (like number of hypotheses, subtopics for literature review, etc.).
115 | - Obviously no tests or evaluations yet. Getting feedback will help to steer this project in the right direction for research usefulness.
116 | 
117 | ## Contributing
118 | 
119 | We welcome contributions!
120 | 
121 | ## License
122 | 
123 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
124 | 
125 | ## Acknowledgments
126 | 
127 | - Inspired by Google DeepMind's research on AI-assisted scientific discovery
128 | - Built with [LangGraph](https://github.com/langchain-ai/langgraph) for agent orchestration
129 | - Uses [GPT Researcher](https://github.com/assafelovic/gpt-researcher) for literature analysis
130 | - Visualization powered by [Streamlit](https://streamlit.io/) and [Plotly](https://plotly.com/)
131 | 


--------------------------------------------------------------------------------
/app/README_tournament_viewer.md:
--------------------------------------------------------------------------------
  1 | # Coscientist Viewer App
  2 | 
  3 | A comprehensive Streamlit application for visualizing and exploring Coscientist research results, including tournament rankings and semantic proximity graphs.
  4 | 
  5 | ## Features
  6 | 
  7 | ### 🏆 Tournament Rankings Page
  8 | - **ELO Rating System**: View hypotheses ranked by their tournament performance
  9 | - **Detailed Hypothesis View**: Explore individual hypotheses with full context
 10 | - **Match History**: See complete debate transcripts between competing hypotheses
 11 | - **Hypothesis Lineage**: Track which hypotheses evolved from others
 12 | - **Win-Loss Records**: Performance statistics for each hypothesis
 13 | 
 14 | ### 📊 Proximity Graph Page
 15 | - **Interactive Network Visualization**: Explore semantic relationships between hypotheses
 16 | - **Community Detection**: Automatically discover groups of similar hypotheses using Louvain clustering
 17 | - **Hover Interactions**: View full hypothesis descriptions by hovering over nodes
 18 | - **Adjustable Parameters**: Control community detection sensitivity and edge filtering
 19 | - **Graph Statistics**: View network metrics including node count, edges, and average similarity
 20 | 
 21 | ## Installation
 22 | 
 23 | ```bash
 24 | pip install -r viewer_requirements.txt
 25 | ```
 26 | 
 27 | ## Usage
 28 | 
 29 | ### Starting the App
 30 | 
 31 | ```bash
 32 | streamlit run tournament_viewer.py
 33 | ```
 34 | 
 35 | ### Loading Data
 36 | 
 37 | 1. **Recent Files**: Select from automatically discovered Coscientist state files
 38 | 2. **File Upload**: Upload a `.pkl` state file directly through the interface
 39 | 
 40 | ### Navigation
 41 | 
 42 | Use the sidebar to switch between:
 43 | - **Tournament Rankings**: Competitive analysis of hypotheses
 44 | - **Proximity Graph**: Semantic similarity visualization
 45 | 
 46 | ## Proximity Graph Features
 47 | 
 48 | ### Interactive Visualization
 49 | - **Nodes**: Represent individual hypotheses
 50 | - **Edges**: Show cosine similarity between hypothesis embeddings
 51 | - **Colors**: Different colors indicate semantic communities
 52 | - **Layout**: Spring-force layout for optimal node positioning
 53 | 
 54 | ### Community Detection Controls
 55 | - **Resolution**: Higher values create more, smaller communities
 56 | - **Minimum Edge Weight**: Filter weak connections for cleaner clustering
 57 | 
 58 | ### Graph Statistics
 59 | - **Number of Hypotheses**: Total nodes in the graph
 60 | - **Number of Connections**: Total edges between hypotheses
 61 | - **Average Similarity**: Mean cosine similarity across all connections
 62 | 
 63 | ## Data Requirements
 64 | 
 65 | The app expects Coscientist state files (`.pkl`) containing:
 66 | - **Tournament data**: For rankings and match analysis
 67 | - **Proximity graph**: For semantic similarity visualization
 68 | - **Reviewed hypotheses**: With detailed reasoning and predictions
 69 | 
 70 | ## Technical Details
 71 | 
 72 | ### Visualization Libraries
 73 | - **Plotly**: Interactive graph visualization with zoom, pan, and hover
 74 | - **NetworkX**: Graph processing and community detection algorithms
 75 | - **Streamlit**: Web application framework
 76 | 
 77 | ### Graph Layout
 78 | - Uses spring-force layout algorithm for optimal node positioning
 79 | - Nodes are sized uniformly but could be weighted by ELO rating
 80 | - Edge opacity indicates connection strength
 81 | 
 82 | ### Community Detection
 83 | - Louvain method for community detection
 84 | - Configurable resolution parameter
 85 | - Edge filtering by minimum weight threshold
 86 | 
 87 | ## File Structure
 88 | 
 89 | ```
 90 | app/
 91 | ├── tournament_viewer.py      # Main application with both pages
 92 | ├── viewer_requirements.txt   # Python dependencies
 93 | └── README_tournament_viewer.md  # This documentation
 94 | ```
 95 | 
 96 | ## Dependencies
 97 | 
 98 | - `streamlit>=1.28.0`: Web application framework
 99 | - `pandas>=2.0.0`: Data manipulation and analysis
100 | - `plotly>=5.0.0`: Interactive visualizations
101 | - `networkx>=3.0`: Graph processing and algorithms
102 | 
103 | ## Tips for Best Results
104 | 
105 | ### Tournament Page
106 | - Use the detailed view to understand hypothesis evolution
107 | - Check match history to see reasoning behind rankings
108 | - Look for patterns in win-loss records
109 | 
110 | ### Proximity Graph Page
111 | - Adjust resolution to find meaningful community sizes
112 | - Increase minimum edge weight to focus on strongest similarities
113 | - Hover over nodes to quickly compare similar hypotheses
114 | - Use the zoom and pan features to explore dense areas
115 | 
116 | ## Troubleshooting
117 | 
118 | ### Common Issues
119 | - **Empty Graph**: Check that the state file contains proximity graph data
120 | - **No Communities**: Try lowering the minimum edge weight or resolution
121 | - **Performance**: Large graphs (>50 nodes) may be slow to render
122 | 
123 | ### File Format Requirements
124 | - State files must be valid Python pickle files
125 | - Must contain either tournament or proximity_graph data
126 | - Compatible with Coscientist framework output format
127 | 
128 | ## Future Enhancements
129 | 
130 | Potential improvements could include:
131 | - Node sizing based on ELO ratings or other metrics
132 | - Edge thickness proportional to similarity strength
133 | - Filtering by community or hypothesis attributes
134 | - Export functionality for graphs and rankings
135 | - Additional layout algorithms (circular, hierarchical, etc.) 


--------------------------------------------------------------------------------
/app/background.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from coscientist.framework import CoscientistConfig, CoscientistFramework
 5 | from coscientist.global_state import CoscientistState, CoscientistStateManager
 6 | 
 7 | 
 8 | def _get_done_file_path(goal: str) -> str:
 9 |     """Gets the path for the 'done' file for a given goal."""
10 |     goal_hash = CoscientistState._hash_goal(goal)
11 |     # This assumes _OUTPUT_DIR is consistent.
12 |     output_dir = os.path.join(
13 |         os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")),
14 |         goal_hash,
15 |     )
16 |     return os.path.join(output_dir, "done.txt")
17 | 
18 | 
19 | def coscientist_process_target(goal: str):
20 |     """The target function for the multiprocessing.Process."""
21 |     try:
22 |         # This will fail if the directory exists, which is what we want.
23 |         initial_state = CoscientistState(goal=goal)
24 |         config = CoscientistConfig()
25 |         state_manager = CoscientistStateManager(initial_state)
26 |         cosci = CoscientistFramework(config, state_manager)
27 | 
28 |         # Run the framework
29 |         asyncio.run(cosci.run())
30 | 
31 |     except Exception as e:
32 |         # Log error to a file in the goal directory
33 |         goal_hash = CoscientistState._hash_goal(goal)
34 |         output_dir = os.path.join(
35 |             os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")),
36 |             goal_hash,
37 |         )
38 |         if not os.path.exists(output_dir):
39 |             os.makedirs(output_dir)
40 |         with open(os.path.join(output_dir, "error.log"), "w") as f:
41 |             f.write(str(e))
42 |     finally:
43 |         # Create a "done" file to signal completion
44 |         done_file = _get_done_file_path(goal)
45 |         with open(done_file, "w") as f:
46 |             f.write("done")
47 | 
48 | 
49 | def check_coscientist_status(goal: str) -> str:
50 |     """Checks the status of a Coscientist run."""
51 |     goal_hash = CoscientistState._hash_goal(goal)
52 |     output_dir = os.path.join(
53 |         os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")),
54 |         goal_hash,
55 |     )
56 | 
57 |     done_file = os.path.join(output_dir, "done.txt")
58 |     error_file = os.path.join(output_dir, "error.log")
59 | 
60 |     if os.path.exists(done_file):
61 |         if os.path.exists(error_file):
62 |             with open(error_file, "r") as f:
63 |                 error_message = f.read()
64 |             return f"error: {error_message}"
65 |         return "done"
66 |     return "running"
67 | 
68 | 
69 | def get_coscientist_results(goal: str) -> tuple[str, str]:
70 |     """Gets the results from a completed Coscientist run."""
71 |     state = CoscientistState.load_latest(goal=goal)
72 |     if state and state.final_report and state.meta_reviews:
73 |         # These are TypedDicts, access by key.
74 |         final_report_text = state.final_report.get(
75 |             "result", "Final report not generated."
76 |         )
77 |         meta_review_text = state.meta_reviews[-1].get(
78 |             "result", "Meta review not generated."
79 |         )
80 |         return final_report_text, meta_review_text
81 |     return "Results not found.", "Results not found."
82 | 
83 | 
84 | def cleanup_coscientist_run(goal: str):
85 |     """Cleans up files after a run."""
86 |     goal_hash = CoscientistState._hash_goal(goal)
87 |     output_dir = os.path.join(
88 |         os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")),
89 |         goal_hash,
90 |     )
91 |     done_file = os.path.join(output_dir, "done.txt")
92 |     error_file = os.path.join(output_dir, "error.log")
93 |     if os.path.exists(done_file):
94 |         os.remove(done_file)
95 |     if os.path.exists(error_file):
96 |         os.remove(error_file)
97 | 


--------------------------------------------------------------------------------
/app/common.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from typing import Optional
 3 | 
 4 | import streamlit as st
 5 | 
 6 | # Import the necessary types from the coscientist package
 7 | from coscientist.global_state import CoscientistState
 8 | 
 9 | 
10 | def load_coscientist_state(filepath: str) -> Optional[CoscientistState]:
11 |     """Load a CoscientistState from a pickle file."""
12 |     try:
13 |         with open(filepath, "rb") as f:
14 |             return pickle.load(f)
15 |     except Exception as e:
16 |         st.error(f"Error loading state file: {e}")
17 |         return None
18 | 
19 | 
20 | def load_coscientist_state_by_goal(goal: str) -> Optional[CoscientistState]:
21 |     """Load the latest CoscientistState for a given research goal."""
22 |     try:
23 |         return CoscientistState.load_latest(goal=goal)
24 |     except Exception as e:
25 |         st.error(f"Error loading state for goal '{goal}': {e}")
26 |         return None
27 | 
28 | 
29 | def get_available_states() -> list[str]:
30 |     """Get all available research goals from the goal-based directory structure."""
31 |     try:
32 |         # Use the CoscientistState method to get all available goals
33 |         goals_and_dirs = CoscientistState.list_all_goals()
34 |         # Return just the goal texts (first element of each tuple)
35 |         return [goal for goal, _ in goals_and_dirs]
36 |     except Exception as e:
37 |         st.error(f"Error getting available states: {e}")
38 |         return []
39 | 


--------------------------------------------------------------------------------
/app/configuration_page.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing
  2 | import time
  3 | 
  4 | import streamlit as st
  5 | from langchain_anthropic import ChatAnthropic
  6 | from langchain_google_genai import ChatGoogleGenerativeAI
  7 | from langchain_openai import ChatOpenAI
  8 | 
  9 | # Import the background process functions
 10 | from background import (
 11 |     check_coscientist_status,
 12 |     cleanup_coscientist_run,
 13 |     coscientist_process_target,
 14 |     get_coscientist_results,
 15 | )
 16 | 
 17 | # Import the configuration agent and required models
 18 | from coscientist.configuration_agent import ConfigurationChatManager
 19 | 
 20 | # Import coscientist framework components
 21 | from coscientist.global_state import CoscientistState
 22 | 
 23 | 
 24 | def get_llm_options():
 25 |     """Get available LLM options for the chat interface."""
 26 |     return {
 27 |         "o3": ChatOpenAI(model="o3", max_tokens=5000, max_retries=3),
 28 |         "Gemini 2.5 Pro": ChatGoogleGenerativeAI(
 29 |             model="gemini-2.5-pro",
 30 |             temperature=1.0,
 31 |             max_retries=3,
 32 |             max_tokens=5000,
 33 |         ),
 34 |         "Claude Sonnet 4": ChatAnthropic(
 35 |             model="claude-sonnet-4-20250514", max_tokens=5000, max_retries=3
 36 |         ),
 37 |     }
 38 | 
 39 | 
 40 | def display_configuration_page():
 41 |     """Display the configuration agent chat page."""
 42 |     st.markdown("### 🤖 Configuration Agent Chat")
 43 |     st.markdown(
 44 |         "Refine your research goal through an interactive conversation with the configuration agent."
 45 |     )
 46 | 
 47 |     # Initialize session state for chat
 48 |     if "chat_manager" not in st.session_state:
 49 |         st.session_state.chat_manager = None
 50 |     if "chat_history" not in st.session_state:
 51 |         st.session_state.chat_history = []
 52 |     if "conversation_started" not in st.session_state:
 53 |         st.session_state.conversation_started = False
 54 |     if "refined_goal" not in st.session_state:
 55 |         st.session_state.refined_goal = ""
 56 |     if "coscientist_running" not in st.session_state:
 57 |         st.session_state.coscientist_running = False
 58 |     if "coscientist_result" not in st.session_state:
 59 |         st.session_state.coscientist_result = None
 60 |     if "coscientist_process" not in st.session_state:
 61 |         st.session_state.coscientist_process = None
 62 |     if "coscientist_error" not in st.session_state:
 63 |         st.session_state.coscientist_error = None
 64 | 
 65 |     # Configuration section
 66 |     st.subheader("🔧 Configuration")
 67 | 
 68 |     col1, col2 = st.columns([2, 1])
 69 | 
 70 |     with col1:
 71 |         # Research goal input
 72 |         initial_goal = st.text_area(
 73 |             "Enter your initial research goal:",
 74 |             height=100,
 75 |             placeholder="e.g., Investigate the relationship between protein misfolding and neurodegeneration...",
 76 |             help="Provide a research question or goal that you'd like to refine through conversation.",
 77 |         )
 78 | 
 79 |     with col2:
 80 |         # Model selection
 81 |         llm_options = get_llm_options()
 82 |         selected_model = st.selectbox(
 83 |             "Select Language Model:",
 84 |             options=list(llm_options.keys()),
 85 |             index=1,  # Default to GPT-4o-mini
 86 |             help="Choose the language model for the configuration agent.",
 87 |         )
 88 | 
 89 |         # Start/Reset buttons
 90 |         if st.button("🚀 Start New Conversation", type="primary"):
 91 |             if initial_goal.strip():
 92 |                 try:
 93 |                     with st.spinner("Initializing conversation..."):
 94 |                         llm = llm_options[selected_model]
 95 |                         st.session_state.chat_manager = ConfigurationChatManager(
 96 |                             llm, initial_goal.strip()
 97 |                         )
 98 |                         st.session_state.conversation_started = True
 99 |                         st.session_state.chat_history = []
100 |                         st.session_state.refined_goal = ""
101 | 
102 |                         # Get the initial agent message
103 |                         initial_message = (
104 |                             st.session_state.chat_manager.get_latest_agent_message()
105 |                         )
106 |                         st.session_state.chat_history.append(("Agent", initial_message))
107 | 
108 |                     st.success("Conversation started! 🎉")
109 |                     st.rerun()
110 |                 except Exception as e:
111 |                     st.error(f"Error starting conversation: {str(e)}")
112 |             else:
113 |                 st.warning("Please enter a research goal first.")
114 | 
115 |         if st.session_state.conversation_started:
116 |             if st.button("🔄 Reset Conversation"):
117 |                 if (
118 |                     st.session_state.coscientist_process
119 |                     and st.session_state.coscientist_process.is_alive()
120 |                 ):
121 |                     st.session_state.coscientist_process.terminate()
122 | 
123 |                 # Clear the goal directory if a goal was set
124 |                 if st.session_state.refined_goal:
125 |                     try:
126 |                         CoscientistState.clear_goal_directory(
127 |                             st.session_state.refined_goal
128 |                         )
129 |                         st.info(
130 |                             f"Cleared data for goal: {st.session_state.refined_goal}"
131 |                         )
132 |                     except Exception as e:
133 |                         st.warning(f"Could not clear goal directory: {e}")
134 | 
135 |                 st.session_state.chat_manager = None
136 |                 st.session_state.conversation_started = False
137 |                 st.session_state.chat_history = []
138 |                 st.session_state.refined_goal = ""
139 |                 st.session_state.coscientist_running = False
140 |                 st.session_state.coscientist_result = None
141 |                 st.session_state.coscientist_process = None
142 |                 st.session_state.coscientist_error = None
143 |                 st.rerun()
144 | 
145 |     # Chat interface
146 |     if st.session_state.conversation_started and st.session_state.chat_manager:
147 |         st.markdown("---")
148 |         st.subheader("💬 Conversation")
149 | 
150 |         # Display chat history
151 |         chat_container = st.container()
152 |         with chat_container:
153 |             for sender, message in st.session_state.chat_history:
154 |                 if sender == "Agent":
155 |                     with st.chat_message("assistant", avatar="🤖"):
156 |                         st.markdown(message)
157 |                 else:
158 |                     with st.chat_message("user", avatar="👤"):
159 |                         st.markdown(message)
160 | 
161 |         # Check if conversation is complete
162 |         if st.session_state.chat_manager.is_conversation_complete():
163 |             st.success("🎉 Configuration complete!")
164 |             refined_goal = st.session_state.chat_manager.get_refined_goal()
165 |             st.session_state.refined_goal = refined_goal
166 | 
167 |             st.markdown("### 🎯 Final Refined Goal")
168 |             st.markdown(f"**{refined_goal}**")
169 | 
170 |             # Buttons row
171 |             col1, col2 = st.columns(2)
172 | 
173 |             with col1:
174 |                 # Option to copy the refined goal
175 |                 if st.button("📋 Copy Refined Goal"):
176 |                     st.code(refined_goal, language="text")
177 |                     st.info(
178 |                         "Refined goal displayed above - you can select and copy it."
179 |                     )
180 | 
181 |             with col2:
182 |                 # Launch coscientist button
183 |                 if not st.session_state.coscientist_running:
184 |                     if st.button("🚀 Launch Coscientist", type="primary"):
185 |                         try:
186 |                             # Ensure the directory is clean before starting
187 |                             CoscientistState.clear_goal_directory(refined_goal)
188 | 
189 |                             process = multiprocessing.Process(
190 |                                 target=coscientist_process_target, args=(refined_goal,)
191 |                             )
192 |                             process.start()
193 |                             st.session_state.coscientist_process = process
194 |                             st.session_state.coscientist_running = True
195 |                             st.session_state.refined_goal = refined_goal
196 |                             st.rerun()
197 |                         except Exception as e:
198 |                             st.error(f"Failed to launch Coscientist: {e}")
199 | 
200 |                 else:
201 |                     st.button("🚀 Coscientist Running...", disabled=True)
202 | 
203 |             # Handle coscientist execution
204 |             if st.session_state.coscientist_running:
205 |                 with st.spinner("🔬 Coscientist is running in the background..."):
206 |                     # Give it a moment before the first check
207 |                     time.sleep(5)
208 |                     st.rerun()  # Rerun to check status
209 | 
210 |             # Check status if it was running
211 |             if (
212 |                 st.session_state.refined_goal
213 |                 and not st.session_state.coscientist_result
214 |             ):
215 |                 status = check_coscientist_status(st.session_state.refined_goal)
216 | 
217 |                 if status == "done":
218 |                     st.session_state.coscientist_running = False
219 |                     try:
220 |                         with st.spinner("Fetching results..."):
221 |                             final_report, meta_review = get_coscientist_results(
222 |                                 st.session_state.refined_goal
223 |                             )
224 |                             st.session_state.coscientist_result = {
225 |                                 "final_report": final_report,
226 |                                 "meta_review": meta_review,
227 |                             }
228 |                             cleanup_coscientist_run(st.session_state.refined_goal)
229 |                         st.success("🎉 Coscientist completed successfully!")
230 |                         st.rerun()
231 |                     except Exception as e:
232 |                         st.error(f"Error fetching results: {e}")
233 |                         st.session_state.coscientist_error = str(e)
234 | 
235 |                 elif status.startswith("error:"):
236 |                     st.session_state.coscientist_running = False
237 |                     error_message = status.replace("error: ", "")
238 |                     st.session_state.coscientist_error = error_message
239 |                     cleanup_coscientist_run(st.session_state.refined_goal)
240 |                     st.error(f"Coscientist run failed: {error_message}")
241 |                     st.rerun()
242 | 
243 |                 elif status == "running" and st.session_state.coscientist_running:
244 |                     st.info(
245 |                         "Coscientist is running. Feel free to navigate away or check back later."
246 |                     )
247 |                     if st.button("Refresh Status"):
248 |                         st.rerun()
249 | 
250 |             # Display error if it occurred
251 |             if st.session_state.coscientist_error:
252 |                 st.error(f"Coscientist failed: {st.session_state.coscientist_error}")
253 | 
254 |             # Display results if available
255 |             if st.session_state.coscientist_result is not None:
256 |                 st.markdown("### 📊 Coscientist Results")
257 |                 st.json(st.session_state.coscientist_result)
258 | 
259 |                 # Reset button to run again
260 |                 if st.button("🔄 Run Coscientist Again"):
261 |                     st.session_state.coscientist_result = None
262 |                     st.session_state.coscientist_running = False
263 |                     st.session_state.coscientist_process = None
264 |                     st.session_state.coscientist_error = None
265 |                     st.rerun()
266 | 
267 |         else:
268 |             # Chat input
269 |             user_input = st.chat_input("Type your message here...")
270 | 
271 |             if user_input:
272 |                 try:
273 |                     with st.spinner("Agent is thinking..."):
274 |                         # Add user message to history
275 |                         st.session_state.chat_history.append(("User", user_input))
276 | 
277 |                         # Get agent response
278 |                         agent_response = (
279 |                             st.session_state.chat_manager.send_human_message(user_input)
280 |                         )
281 | 
282 |                         # Add agent response to history
283 |                         st.session_state.chat_history.append(("Agent", agent_response))
284 | 
285 |                     st.rerun()
286 |                 except Exception as e:
287 |                     st.error(f"Error sending message: {str(e)}")
288 | 
289 |     # Instructions when no conversation is active
290 |     if not st.session_state.conversation_started:
291 |         st.markdown("---")
292 |         st.info(
293 |             "👆 Enter your research goal above and click 'Start New Conversation' to begin."
294 |         )
295 | 
296 |         st.markdown("""
297 |         ## How to Use the Configuration Agent
298 |         
299 |         1. **Enter your research goal** in the text area above
300 |         2. **Select a language model** that will power the configuration agent
301 |         3. **Click "Start New Conversation"** to begin the interactive refinement process
302 |         4. **Chat with the agent** to refine and improve your research goal
303 |         5. **Receive your refined goal** when the conversation is complete
304 |         6. **Launch Coscientist** with your refined goal to begin the research process
305 |         
306 |         ### What the Configuration Agent Does
307 |         
308 |         The configuration agent helps you:
309 |         - **Clarify vague research questions** by asking targeted questions
310 |         - **Identify key variables and parameters** relevant to your research
311 |         - **Suggest specific methodological approaches** that might be appropriate
312 |         - **Refine the scope** of your research to make it more focused and actionable
313 |         - **Ensure your goal is well-defined** for the subsequent research agents
314 |         
315 |         ### Tips for Better Results
316 |         
317 |         - **Be specific** about your domain of interest (e.g., biology, chemistry, physics)
318 |         - **Mention any constraints** or limitations you're aware of
319 |         - **Indicate your level of expertise** if relevant
320 |         - **Ask questions** if you need clarification on the agent's suggestions
321 |         - **Iterate** - don't hesitate to refine multiple times until you're satisfied
322 |         """)
323 | 


--------------------------------------------------------------------------------
/app/final_report_page.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | 
 4 | def display_final_report_page(state):
 5 |     """
 6 |     Display the final report page.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     state : CoscientistState
11 |         The loaded Coscientist state containing the final report
12 |     """
13 |     st.header("📋 Final Report")
14 | 
15 |     # Check if we have a final report
16 |     if not hasattr(state, "final_report") or not state.final_report:
17 |         st.warning("No final report found in this research state.")
18 |         st.markdown("""
19 |         ## Final Report Page
20 |         
21 |         This page displays the final research report generated when the Coscientist system completes its research:
22 |         
23 |         - **Comprehensive Summary**: Complete analysis of all hypotheses and findings
24 |         - **Top Hypotheses**: Detailed review of the highest-ranked hypotheses
25 |         - **Research Conclusions**: Final insights and recommendations
26 |         - **Methodology Summary**: Overview of the research process and evaluation methods
27 |         
28 |         The final report is generated only when the supervisor agent decides the research process
29 |         is complete and has achieved sufficient depth and quality in hypothesis exploration.
30 |         """)
31 |         return
32 | 
33 |     # Display the final report
34 |     final_report_content = state.final_report.get("result", "")
35 | 
36 |     if final_report_content:
37 |         st.markdown("### 📊 Research Summary")
38 |         st.info("✅ Research process completed successfully!")
39 | 
40 |         # Display the final report content
41 |         st.markdown(final_report_content)
42 | 
43 |         # Show some basic statistics if available
44 |         with st.expander("📈 Research Statistics"):
45 |             col1, col2 = st.columns(2)
46 | 
47 |             with col1:
48 |                 st.markdown("**Process Overview:**")
49 |                 if hasattr(state, "actions"):
50 |                     st.write(f"• Total Actions Taken: {len(state.actions)}")
51 |                 if hasattr(state, "supervisor_decisions"):
52 |                     st.write(
53 |                         f"• Supervisor Decisions: {len(state.supervisor_decisions)}"
54 |                     )
55 |                 if hasattr(state, "meta_reviews"):
56 |                     st.write(f"• Meta-Reviews Completed: {len(state.meta_reviews)}")
57 | 
58 |             with col2:
59 |                 st.markdown("**Hypothesis Statistics:**")
60 |                 if hasattr(state, "tournament") and state.tournament:
61 |                     st.write(
62 |                         f"• Tournament Hypotheses: {len(state.tournament.hypotheses)}"
63 |                     )
64 |                     # Get tournament stats if available
65 |                     try:
66 |                         tournament_stats = (
67 |                             state.tournament.summarize_tournament_trajectory()
68 |                         )
69 |                         st.write(
70 |                             f"• Total Matches Played: {tournament_stats.get('total_matches_played', 'N/A')}"
71 |                         )
72 |                         st.write(
73 |                             f"• Max ELO Rating: {tournament_stats.get('max_elo_rating', ['N/A'])[0] if tournament_stats.get('max_elo_rating') else 'N/A'}"
74 |                         )
75 |                     except:  # noqa: E722
76 |                         st.write("• Tournament statistics unavailable")
77 | 
78 |     else:
79 |         st.error("Final report exists but contains no content.")
80 | 


--------------------------------------------------------------------------------
/app/literature_review_page.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | 
  3 | 
  4 | def display_literature_review_page(state):
  5 |     """
  6 |     Display the literature review page.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     state : CoscientistState
 11 |         The loaded Coscientist state containing literature review data
 12 |     """
 13 |     st.header("📚 Literature Review")
 14 | 
 15 |     # Check if we have literature review data
 16 |     if not hasattr(state, "literature_review") or not state.literature_review:
 17 |         st.warning("No literature review found in this research state.")
 18 |         st.markdown("""
 19 |         ## Literature Review Page
 20 |         
 21 |         This page displays the comprehensive literature review conducted for the research:
 22 |         
 23 |         - **Research Subtopics**: Systematic decomposition of the main research goal
 24 |         - **Subtopic Reports**: Detailed literature analysis for each research area
 25 |         - **Knowledge Foundation**: Scientific background that informs hypothesis generation
 26 |         - **Research Context**: Current state of knowledge in relevant fields
 27 |         
 28 |         The literature review is one of the first steps in the research process, providing
 29 |         the scientific foundation for generating well-informed research hypotheses.
 30 |         """)
 31 |         return
 32 | 
 33 |     # Get literature review data
 34 |     literature_review = state.literature_review
 35 |     subtopics = literature_review.get("subtopics", [])
 36 |     subtopic_reports = literature_review.get("subtopic_reports", [])
 37 | 
 38 |     # Verify data consistency
 39 |     if len(subtopics) != len(subtopic_reports):
 40 |         st.error(
 41 |             f"Data inconsistency: {len(subtopics)} subtopics but {len(subtopic_reports)} reports"
 42 |         )
 43 |         return
 44 | 
 45 |     if not subtopics:
 46 |         st.warning("Literature review exists but contains no subtopics.")
 47 |         return
 48 | 
 49 |     # Create main layout
 50 |     st.markdown(f"**Research Goal:** {state.goal}")
 51 |     st.markdown(f"**Total Subtopics:** {len(subtopics)}")
 52 | 
 53 |     # Subtopic selection dropdown
 54 |     st.subheader("🔍 Select Subtopic")
 55 | 
 56 |     # Initialize session state for selected subtopic
 57 |     if "selected_subtopic_index" not in st.session_state:
 58 |         st.session_state.selected_subtopic_index = 0
 59 | 
 60 |     # Create dropdown with subtopics
 61 |     selected_index = st.selectbox(
 62 |         "Choose a research subtopic:",
 63 |         range(len(subtopics)),
 64 |         format_func=lambda x: f"{x+1}. {subtopics[x]}",
 65 |         index=st.session_state.selected_subtopic_index,
 66 |         key="subtopic_selector",
 67 |     )
 68 | 
 69 |     # Update session state when selection changes
 70 |     if selected_index != st.session_state.selected_subtopic_index:
 71 |         st.session_state.selected_subtopic_index = selected_index
 72 | 
 73 |     # Display selected subtopic and report
 74 |     st.subheader("📖 Subtopic Report")
 75 | 
 76 |     selected_subtopic = subtopics[selected_index]
 77 |     selected_report = subtopic_reports[selected_index]
 78 | 
 79 |     # Show subtopic header
 80 |     st.markdown(f"### {selected_index + 1}. {selected_subtopic}")
 81 | 
 82 |     # Display the report content
 83 |     if selected_report:
 84 |         # Create a scrollable container for the markdown content
 85 |         with st.container():
 86 |             st.markdown(selected_report)
 87 |     else:
 88 |         st.info("No report content available for this subtopic.")
 89 | 
 90 |     # Show navigation help and summary stats
 91 |     with st.expander("📊 Literature Review Summary"):
 92 |         col1, col2 = st.columns(2)
 93 | 
 94 |         with col1:
 95 |             st.markdown("**Review Statistics:**")
 96 |             st.write(f"• Total Subtopics: {len(subtopics)}")
 97 |             st.write(f"• Current Selection: #{selected_index + 1}")
 98 |             st.write(f"• Reports Available: {len([r for r in subtopic_reports if r])}")
 99 | 
100 |         with col2:
101 |             st.markdown("**Navigation:**")
102 |             st.write("• Use the dropdown above to browse subtopics")
103 |             st.write("• Each subtopic represents a focused research area")
104 |             st.write("• Reports provide scientific context for hypothesis generation")
105 | 
106 |         # Show all subtopics as a quick reference
107 |         st.markdown("**All Research Subtopics:**")
108 |         for i, subtopic in enumerate(subtopics):
109 |             marker = "🔹" if i == selected_index else "◦"
110 |             st.write(f"{marker} {i+1}. {subtopic}")
111 | 


--------------------------------------------------------------------------------
/app/meta_reviews_page.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | 
  3 | 
  4 | def display_meta_reviews_page(state):
  5 |     """
  6 |     Display the meta-reviews page.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     state : CoscientistState
 11 |         The loaded Coscientist state containing meta-reviews
 12 |     """
 13 |     st.header("🔍 Meta-Reviews")
 14 | 
 15 |     # Check if we have meta-reviews
 16 |     if not hasattr(state, "meta_reviews") or not state.meta_reviews:
 17 |         st.warning("No meta-reviews found in this research state.")
 18 |         st.markdown("""
 19 |         ## Meta-Reviews Page
 20 |         
 21 |         This page displays the meta-review analyses generated throughout the research process:
 22 |         
 23 |         - **Strategic Analysis**: Comprehensive review of hypothesis quality and research progress
 24 |         - **Pattern Recognition**: Identification of strengths, weaknesses, and knowledge gaps
 25 |         - **Research Direction**: Guidance for future hypothesis generation and evolution
 26 |         - **Quality Assessment**: Evaluation of tournament results and hypothesis performance
 27 |         
 28 |         Meta-reviews are generated periodically to analyze the current state of research and guide
 29 |         the supervisor agent's strategic decisions about what actions to take next.
 30 |         """)
 31 |         return
 32 | 
 33 |     # Get meta-reviews
 34 |     meta_reviews = state.meta_reviews
 35 | 
 36 |     # Create two columns: meta-reviews list and content display
 37 |     col1, col2 = st.columns([1, 2])
 38 | 
 39 |     with col1:
 40 |         st.subheader("📚 Reviews History")
 41 |         st.markdown(f"**Total Meta-Reviews:** {len(meta_reviews)}")
 42 | 
 43 |         # Create a container for the scrollable meta-reviews list
 44 |         reviews_container = st.container()
 45 | 
 46 |         # Initialize session state for selected meta-review
 47 |         if "selected_meta_review_index" not in st.session_state:
 48 |             st.session_state.selected_meta_review_index = (
 49 |                 0  # Default to latest meta-review
 50 |             )
 51 | 
 52 |         with reviews_container:
 53 |             # Display meta-reviews in reverse order (latest first) with numbering
 54 |             for i, _meta_review in enumerate(reversed(meta_reviews)):
 55 |                 review_number = len(meta_reviews) - i  # Number from latest to oldest
 56 | 
 57 |                 # Create a clickable button for each meta-review
 58 |                 button_key = f"meta_review_{i}"
 59 |                 button_label = f"Meta-Review #{review_number}"
 60 | 
 61 |                 # Highlight the selected meta-review
 62 |                 if i == st.session_state.selected_meta_review_index:
 63 |                     st.markdown(f"**🔹 {button_label}**")
 64 |                 else:
 65 |                     if st.button(button_label, key=button_key):
 66 |                         st.session_state.selected_meta_review_index = i
 67 |                         st.rerun()
 68 | 
 69 |     with col2:
 70 |         st.subheader("📖 Meta-Review Content")
 71 | 
 72 |         if meta_reviews:
 73 |             # Get the selected meta-review (remember we're working with reversed list)
 74 |             selected_meta_review = list(reversed(meta_reviews))[
 75 |                 st.session_state.selected_meta_review_index
 76 |             ]
 77 |             review_number = (
 78 |                 len(meta_reviews) - st.session_state.selected_meta_review_index
 79 |             )
 80 | 
 81 |             # Display the meta-review header
 82 |             st.markdown(f"### Meta-Review #{review_number}")
 83 | 
 84 |             # Show the meta-review content
 85 |             meta_review_content = selected_meta_review.get("result", "")
 86 |             if meta_review_content:
 87 |                 st.markdown(meta_review_content)
 88 |             else:
 89 |                 st.info("No content available for this meta-review.")
 90 | 
 91 |             # Show additional context in an expander
 92 |             with st.expander("📊 Meta-Review Context"):
 93 |                 context_cols = st.columns(2)
 94 | 
 95 |                 with context_cols[0]:
 96 |                     st.markdown("**Review Information:**")
 97 |                     st.write(f"• Review Number: {review_number} of {len(meta_reviews)}")
 98 | 
 99 |                     # Show other available fields from the meta-review state
100 |                     if "goal" in selected_meta_review:
101 |                         st.write("• Research Goal Available: ✅")
102 |                     if "top_k" in selected_meta_review:
103 |                         st.write(
104 |                             f"• Top K Analyzed: {selected_meta_review.get('top_k', 'N/A')}"
105 |                         )
106 | 
107 |                 with context_cols[1]:
108 |                     st.markdown("**System State:**")
109 |                     # Show tournament info if available
110 |                     if (
111 |                         "tournament" in selected_meta_review
112 |                         and selected_meta_review["tournament"]
113 |                     ):
114 |                         tournament = selected_meta_review["tournament"]
115 |                         if hasattr(tournament, "hypotheses"):
116 |                             st.write(
117 |                                 f"• Hypotheses in Tournament: {len(tournament.hypotheses)}"
118 |                             )
119 |                         try:
120 |                             win_loss_records = tournament.get_win_loss_records()
121 |                             st.write(f"• Ranked Hypotheses: {len(win_loss_records)}")
122 |                         except:  # noqa: E722
123 |                             st.write("• Tournament statistics unavailable")
124 |                     else:
125 |                         st.write("• Tournament data not available")
126 |         else:
127 |             st.info("No meta-reviews available to display.")
128 | 


--------------------------------------------------------------------------------
/app/proximity_page.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from st_cytoscape import cytoscape
  3 | 
  4 | 
  5 | def create_cytoscape_elements(graph, communities):
  6 |     """Convert NetworkX graph to Cytoscape elements format."""
  7 |     if graph is None or len(graph.nodes()) == 0:
  8 |         return [], []
  9 | 
 10 |     G = graph
 11 | 
 12 |     # Create color mapping for communities
 13 |     colors = [
 14 |         "#FF6B6B",
 15 |         "#4ECDC4",
 16 |         "#45B7D1",
 17 |         "#96CEB4",
 18 |         "#FFEAA7",
 19 |         "#DDA0DD",
 20 |         "#98D8C8",
 21 |         "#FFA07A",
 22 |         "#B19CD9",
 23 |         "#FFB6C1",
 24 |     ]
 25 |     community_colors = {}
 26 |     for i, community in enumerate(communities):
 27 |         color = colors[i % len(colors)]
 28 |         for node_id in community:
 29 |             community_colors[node_id] = color
 30 | 
 31 |     # Create nodes
 32 |     elements = []
 33 | 
 34 |     for node_id in G.nodes():
 35 |         hypothesis_text = G.nodes[node_id].get("hypothesis", f"Hypothesis {node_id}")
 36 | 
 37 |         # Truncate for label but keep full text for tooltip
 38 |         label = f"H{node_id}"
 39 |         if len(hypothesis_text) > 80:
 40 |             tooltip = hypothesis_text[:80] + "..."
 41 |         else:
 42 |             tooltip = hypothesis_text
 43 | 
 44 |         elements.append(
 45 |             {
 46 |                 "data": {
 47 |                     "id": str(node_id),
 48 |                     "label": label,
 49 |                     "hypothesis": hypothesis_text,
 50 |                     "tooltip": tooltip,
 51 |                 },
 52 |                 "classes": f"community-{hash(community_colors.get(node_id, colors[0])) % 10}",
 53 |             }
 54 |         )
 55 | 
 56 |     # Create edges
 57 |     for edge in G.edges(data=True):
 58 |         weight = edge[2].get("weight", 0)
 59 |         elements.append(
 60 |             {
 61 |                 "data": {
 62 |                     "id": f"{edge[0]}-{edge[1]}",
 63 |                     "source": str(edge[0]),
 64 |                     "target": str(edge[1]),
 65 |                     "weight": weight,
 66 |                 }
 67 |             }
 68 |         )
 69 | 
 70 |     # Create stylesheet
 71 |     node_styles = []
 72 |     for i in range(10):  # Create styles for 10 different community classes
 73 |         color = colors[i % len(colors)]
 74 |         node_styles.append(
 75 |             {
 76 |                 "selector": f".community-{i}",
 77 |                 "style": {
 78 |                     "background-color": color,
 79 |                     "border-width": 2,
 80 |                     "border-color": "#ffffff",
 81 |                     "color": "#ffffff",
 82 |                     "text-valign": "center",
 83 |                     "text-halign": "center",
 84 |                     "font-size": "12px",
 85 |                     "font-weight": "bold",
 86 |                     "width": 50,
 87 |                     "height": 50,
 88 |                 },
 89 |             }
 90 |         )
 91 | 
 92 |     stylesheet = [
 93 |         {
 94 |             "selector": "node",
 95 |             "style": {
 96 |                 "content": "data(label)",
 97 |                 "text-valign": "center",
 98 |                 "text-halign": "center",
 99 |                 "font-size": "12px",
100 |                 "font-weight": "bold",
101 |                 "width": 50,
102 |                 "height": 50,
103 |                 "border-width": 2,
104 |                 "border-color": "#ffffff",
105 |             },
106 |         },
107 |         {
108 |             "selector": "edge",
109 |             "style": {
110 |                 "width": 2,
111 |                 "line-color": "#cccccc",
112 |                 "opacity": 0.6,
113 |                 "curve-style": "bezier",
114 |             },
115 |         },
116 |         {
117 |             "selector": "node:selected",
118 |             "style": {
119 |                 "border-width": 4,
120 |                 "border-color": "#333333",
121 |                 "background-color": "#333333",
122 |             },
123 |         },
124 |         {
125 |             "selector": "edge:selected",
126 |             "style": {"line-color": "#333333", "width": 4, "opacity": 1.0},
127 |         },
128 |     ] + node_styles
129 | 
130 |     return elements, stylesheet
131 | 
132 | 
133 | def display_proximity_graph_page(state):
134 |     """Display the proximity graph page."""
135 |     st.markdown(
136 |         "Explore the semantic similarity between hypotheses and their communities."
137 |     )
138 | 
139 |     if state is None:
140 |         st.info(
141 |             "👈 Please select or upload a Coscientist state file from the sidebar to view the proximity graph."
142 |         )
143 |         return
144 | 
145 |     if state.proximity_graph is None:
146 |         st.warning("No proximity graph data found in this state file.")
147 |         return
148 | 
149 |     proximity_graph = state.proximity_graph
150 | 
151 |     if len(proximity_graph.graph.nodes()) == 0:
152 |         st.warning(
153 |             "The proximity graph is empty - no hypotheses have been added to it yet."
154 |         )
155 |         return
156 | 
157 |     # Community detection controls
158 |     st.subheader("Graph Filtering & Community Detection")
159 |     col1, col2 = st.columns(2)
160 | 
161 |     with col1:
162 |         resolution = st.slider(
163 |             "Resolution (higher = more communities)",
164 |             min_value=0.1,
165 |             max_value=2.0,
166 |             value=1.0,
167 |             step=0.1,
168 |             help="Controls the size of communities. Higher values create more, smaller communities.",
169 |         )
170 | 
171 |     with col2:
172 |         min_weight = st.slider(
173 |             "Minimum Edge Weight",
174 |             min_value=0.0,
175 |             max_value=1.0,
176 |             value=0.85,
177 |             step=0.05,
178 |             help="Only edges with similarity above this threshold will be shown in the graph.",
179 |         )
180 | 
181 |     # Get pruned graph based on minimum edge weight
182 |     pruned_graph = proximity_graph.get_pruned_graph(min_weight)
183 | 
184 |     # Display updated graph statistics
185 |     num_nodes = len(pruned_graph.nodes())
186 |     num_edges = len(pruned_graph.edges())
187 | 
188 |     # Show warning if graph is too filtered
189 |     if num_nodes == 0:
190 |         st.warning(
191 |             f"⚠️ No hypotheses remain after filtering with minimum edge weight {min_weight:.2f}. Try lowering the threshold."
192 |         )
193 |         return
194 |     elif num_edges == 0:
195 |         st.warning(
196 |             f"⚠️ No connections remain after filtering with minimum edge weight {min_weight:.2f}. The graph will show isolated nodes."
197 |         )
198 | 
199 |     # Calculate average similarity for pruned graph
200 |     if num_edges > 0:
201 |         edge_weights = [
202 |             data.get("weight", 0) for _, _, data in pruned_graph.edges(data=True)
203 |         ]
204 |         avg_similarity = sum(edge_weights) / len(edge_weights)
205 |     else:
206 |         avg_similarity = 0
207 | 
208 |     col1, col2, col3 = st.columns(3)
209 |     with col1:
210 |         st.metric("Hypotheses (Filtered)", num_nodes)
211 |     with col2:
212 |         st.metric("Connections (Filtered)", num_edges)
213 |     with col3:
214 |         st.metric("Avg Similarity (Filtered)", f"{avg_similarity:.3f}")
215 | 
216 |     # Get communities from pruned graph
217 |     communities = proximity_graph.get_semantic_communities(
218 |         resolution=resolution, min_weight=min_weight
219 |     )
220 | 
221 |     st.subheader(f"Semantic Communities ({len(communities)} found)")
222 | 
223 |     # Display communities
224 |     if communities:
225 |         for i, community in enumerate(communities):
226 |             with st.expander(f"Community {i+1} ({len(community)} hypotheses)"):
227 |                 for node_id in community:
228 |                     hypothesis_text = pruned_graph.nodes[node_id].get(
229 |                         "hypothesis", f"Hypothesis {node_id}"
230 |                     )
231 |                     st.markdown(f"**H{node_id}:** {hypothesis_text}")
232 |     else:
233 |         st.info(
234 |             "No communities detected with current settings. Try lowering the minimum edge weight or adjusting the resolution parameter."
235 |         )
236 | 
237 |     # Create and display the visualization
238 |     st.subheader("Interactive Graph Visualization")
239 | 
240 |     # Convert to Cytoscape format using the pruned graph
241 |     elements, stylesheet = create_cytoscape_elements(pruned_graph, communities)
242 | 
243 |     if elements:
244 |         # Layout options
245 |         layout_options = {
246 |             "name": "fcose",
247 |             "animationDuration": 1000,
248 |             "fit": True,
249 |             "padding": 50,
250 |             "nodeSeparation": 100,
251 |             "idealEdgeLength": 100,
252 |             "edgeElasticity": 0.1,
253 |             "nestingFactor": 0.1,
254 |             "numIter": 1000,
255 |             "initialEnergyOnIncremental": 0.3,
256 |             "gravityRangeCompound": 1.5,
257 |             "gravityCompound": 1.0,
258 |             "gravityRange": 3.8,
259 |         }
260 | 
261 |         # Create the Cytoscape graph (key includes min_weight for reactivity)
262 |         selected = cytoscape(
263 |             elements=elements,
264 |             stylesheet=stylesheet,
265 |             layout=layout_options,
266 |             selection_type="additive",
267 |             width="100%",
268 |             height="600px",
269 |             key=f"proximity_graph_{min_weight}_{resolution}",
270 |         )
271 | 
272 |         # Display information about selected nodes
273 |         if selected and (selected["nodes"] or selected["edges"]):
274 |             st.subheader("🎯 Selected Elements")
275 | 
276 |             if selected["nodes"]:
277 |                 st.markdown("**Selected Hypotheses:**")
278 |                 for node_id in selected["nodes"]:
279 |                     # Find the corresponding element to get the full hypothesis
280 |                     for element in elements:
281 |                         if element["data"]["id"] == node_id:
282 |                             hypothesis_text = element["data"]["hypothesis"]
283 |                             st.markdown(f"**H{node_id}:** {hypothesis_text}")
284 |                             break
285 | 
286 |             if selected["edges"]:
287 |                 st.markdown(
288 |                     f"**Selected Connections:** {len(selected['edges'])} edge(s)"
289 |                 )
290 | 
291 |         # Additional information
292 |         st.info("""
293 |         **How to interact with the graph:**
294 |         - **Click nodes** to select them and see full hypothesis text below
295 |         - **Drag nodes** to rearrange the layout
296 |         - **Zoom and pan** to explore different areas
297 |         - **Different colors** represent different semantic communities
298 |         - **Hold Ctrl/Cmd + click** to select multiple nodes
299 |         - **Double-click empty space** to fit the graph to view
300 |         - **Adjust sliders above** to dynamically filter the graph and update communities
301 |         """)
302 | 
303 |         st.success(
304 |             f"📊 **Graph Status:** Showing {num_nodes} hypotheses and {num_edges} connections with similarity ≥ {min_weight:.2f}"
305 |         )
306 |     else:
307 |         st.error("Could not create visualization. Please check the data.")
308 | 


--------------------------------------------------------------------------------
/app/resume_page.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import multiprocessing
  3 | import os
  4 | 
  5 | import streamlit as st
  6 | from background import _get_done_file_path, check_coscientist_status
  7 | from common import get_available_states
  8 | 
  9 | from coscientist.framework import CoscientistConfig, CoscientistFramework
 10 | from coscientist.global_state import CoscientistState, CoscientistStateManager
 11 | 
 12 | 
 13 | def coscientist_resume_target(goal: str):
 14 |     """The target function for resuming a Coscientist process."""
 15 |     try:
 16 |         # Load the existing state instead of creating a new one
 17 |         initial_state = CoscientistState.load_latest(goal=goal)
 18 |         if initial_state is None:
 19 |             raise Exception(f"No existing state found for goal: {goal}")
 20 | 
 21 |         config = CoscientistConfig()
 22 |         state_manager = CoscientistStateManager(initial_state)
 23 |         cosci = CoscientistFramework(config, state_manager)
 24 | 
 25 |         # Run the framework
 26 |         asyncio.run(cosci.run())
 27 | 
 28 |     except Exception as e:
 29 |         # Log error to a file in the goal directory
 30 |         goal_hash = CoscientistState._hash_goal(goal)
 31 |         output_dir = os.path.join(
 32 |             os.environ.get("COSCIENTIST_DIR", os.path.expanduser("~/.coscientist")),
 33 |             goal_hash,
 34 |         )
 35 |         if not os.path.exists(output_dir):
 36 |             os.makedirs(output_dir)
 37 |         with open(os.path.join(output_dir, "error.log"), "w") as f:
 38 |             f.write(str(e))
 39 |     finally:
 40 |         # Create a "done" file to signal completion
 41 |         done_file = _get_done_file_path(goal)
 42 |         with open(done_file, "w") as f:
 43 |             f.write("done")
 44 | 
 45 | 
 46 | def display_resume_page():
 47 |     """Display the resume from checkpoint page."""
 48 |     st.header("🔄 Resume from Checkpoint")
 49 | 
 50 |     st.markdown("""
 51 |     Resume a Coscientist research process from where it left off. This page allows you to:
 52 |     
 53 |     - Select an existing research goal that has been started
 54 |     - Check if the research is already completed
 55 |     - Resume the research process from the latest checkpoint
 56 |     """)
 57 | 
 58 |     # Initialize session state for process tracking
 59 |     if "resume_process" not in st.session_state:
 60 |         st.session_state.resume_process = None
 61 |     if "resume_goal" not in st.session_state:
 62 |         st.session_state.resume_goal = None
 63 | 
 64 |     # Get available goals
 65 |     available_goals = get_available_states()
 66 | 
 67 |     if not available_goals:
 68 |         st.warning(
 69 |             "No existing research goals found. Please start a new research goal first."
 70 |         )
 71 |         return
 72 | 
 73 |     # Goal selection
 74 |     st.subheader("📋 Select Research Goal")
 75 |     selected_goal = st.selectbox(
 76 |         "Choose a research goal to resume:",
 77 |         options=available_goals,
 78 |         format_func=lambda x: x[:100] + "..." if len(x) > 100 else x,
 79 |         help="Select an existing research goal to resume from its latest checkpoint",
 80 |     )
 81 | 
 82 |     # Check status and display information
 83 |     if selected_goal:
 84 |         col1, col2 = st.columns([2, 1])
 85 | 
 86 |         with col1:
 87 |             st.subheader("📊 Goal Status")
 88 | 
 89 |             try:
 90 |                 # Load the latest state to check if finished
 91 |                 state = CoscientistState.load_latest(goal=selected_goal)
 92 |                 if state is None:
 93 |                     st.error("❌ No state found for this goal. Cannot resume.")
 94 |                     return
 95 | 
 96 |                 # Create state manager to check if finished
 97 |                 state_manager = CoscientistStateManager(state)
 98 |                 is_finished = state_manager.is_finished
 99 | 
100 |                 # Check running status
101 |                 status = check_coscientist_status(selected_goal)
102 | 
103 |                 if is_finished:
104 |                     st.success("✅ This research goal has already been completed!")
105 |                     st.info(
106 |                         "The research process for this goal has finished. You can view the results in the Tournament Rankings or Proximity Graph pages."
107 |                     )
108 |                 elif status == "running":
109 |                     st.warning("⏳ This goal is currently running in another process.")
110 |                     st.info(
111 |                         "Please wait for the current process to finish before resuming."
112 |                     )
113 |                 elif status.startswith("error"):
114 |                     st.error(f"❌ Previous run ended with error: {status[7:]}")
115 |                     st.info(
116 |                         "You can try resuming to continue from the last successful checkpoint."
117 |                     )
118 |                 else:
119 |                     st.info("🔄 This goal can be resumed.")
120 |                     st.success("Ready to resume from the latest checkpoint.")
121 | 
122 |                 # Display some basic state information
123 |                 with st.expander("📈 Current State Information"):
124 |                     st.write(f"**Goal:** {selected_goal}")
125 |                     st.write(f"**Finished:** {'Yes ✅' if is_finished else 'No ❌'}")
126 |                     if hasattr(state, "hypotheses") and state.hypotheses:
127 |                         st.write(f"**Number of Hypotheses:** {len(state.hypotheses)}")
128 |                     if (
129 |                         hasattr(state, "tournament_results")
130 |                         and state.tournament_results
131 |                     ):
132 |                         st.write(
133 |                             f"**Tournament Matches:** {len(state.tournament_results)}"
134 |                         )
135 | 
136 |             except Exception as e:
137 |                 st.error(f"❌ Error checking goal status: {str(e)}")
138 |                 return
139 | 
140 |         with col2:
141 |             st.subheader("🚀 Resume Action")
142 | 
143 |             # Resume button
144 |             can_resume = (
145 |                 state is not None
146 |                 and not is_finished
147 |                 and status != "running"
148 |                 and (
149 |                     st.session_state.resume_process is None
150 |                     or not st.session_state.resume_process.is_alive()
151 |                 )
152 |             )
153 | 
154 |             if st.button(
155 |                 "🔄 Resume Research",
156 |                 disabled=not can_resume,
157 |                 help="Resume the research process from the latest checkpoint"
158 |                 if can_resume
159 |                 else "Cannot resume: check the status information",
160 |             ):
161 |                 try:
162 |                     # Start the resume process
163 |                     st.session_state.resume_process = multiprocessing.Process(
164 |                         target=coscientist_resume_target, args=(selected_goal,)
165 |                     )
166 |                     st.session_state.resume_process.start()
167 |                     st.session_state.resume_goal = selected_goal
168 |                     st.success(f"🚀 Resumed research for: {selected_goal[:50]}...")
169 |                     st.info(
170 |                         "The research process is now running in the background. You can check the status below or refresh the page to see updates."
171 |                     )
172 | 
173 |                 except Exception as e:
174 |                     st.error(f"❌ Failed to resume research: {str(e)}")
175 | 
176 |     # Display running process status
177 |     if st.session_state.resume_process is not None and st.session_state.resume_goal:
178 |         st.subheader("🔄 Resume Process Status")
179 | 
180 |         # Check if process is still running
181 |         if st.session_state.resume_process.is_alive():
182 |             st.info(
183 |                 f"⏳ Research is currently running for: {st.session_state.resume_goal[:50]}..."
184 |             )
185 | 
186 |             # Add a refresh button
187 |             if st.button("🔄 Refresh Status"):
188 |                 st.rerun()
189 | 
190 |         else:
191 |             # Process has finished
192 |             status = check_coscientist_status(st.session_state.resume_goal)
193 |             if status == "done":
194 |                 st.success(
195 |                     f"✅ Research completed successfully for: {st.session_state.resume_goal[:50]}..."
196 |                 )
197 |             elif status.startswith("error"):
198 |                 st.error(f"❌ Research ended with error: {status[7:]}")
199 | 
200 |             # Clear the process from session state
201 |             st.session_state.resume_process = None
202 |             st.session_state.resume_goal = None
203 | 
204 |     # Tips section
205 |     with st.expander("💡 Tips for Resuming Research"):
206 |         st.markdown("""
207 |         **Before resuming:**
208 |         - Make sure the research goal is not already completed
209 |         - Check that no other process is currently running for this goal
210 |         - Review the current state information to understand progress
211 |         
212 |         **During resume:**
213 |         - The process runs in the background - you can navigate to other pages
214 |         - Use the refresh button to check status updates
215 |         - Check the Tournament Rankings page to see new results as they appear
216 |         
217 |         **After completion:**
218 |         - View results in the Tournament Rankings page
219 |         - Explore hypothesis relationships in the Proximity Graph page
220 |         - Results are automatically saved and can be viewed later
221 |         """)
222 | 


--------------------------------------------------------------------------------
/app/supervisor_page.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | 
  3 | 
  4 | def display_supervisor_page(state):
  5 |     """
  6 |     Display the supervisor decisions page.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     state : CoscientistState
 11 |         The loaded Coscientist state containing supervisor decisions and actions
 12 |     """
 13 |     st.header("🎯 Supervisor Decisions")
 14 | 
 15 |     # Check if we have supervisor decisions
 16 |     if not hasattr(state, "supervisor_decisions") or not state.supervisor_decisions:
 17 |         st.warning("No supervisor decisions found in this research state.")
 18 |         st.markdown("""
 19 |         ## Supervisor Decisions Page
 20 |         
 21 |         This page displays the decision-making process of the supervisor agent:
 22 |         
 23 |         - **Actions Taken**: See all actions decided by the supervisor in chronological order
 24 |         - **Decision Reasoning**: View the detailed reasoning behind each decision
 25 |         - **Strategic Context**: Understand the system state that influenced each decision
 26 |         
 27 |         The supervisor agent analyzes the research progress and decides what actions to take next,
 28 |         such as generating hypotheses, running tournaments, or finishing the research.
 29 |         """)
 30 |         return
 31 | 
 32 |     # Get supervisor decisions and actions
 33 |     supervisor_decisions = state.supervisor_decisions
 34 |     actions = state.actions
 35 | 
 36 |     # Verify they are correlated
 37 |     if len(supervisor_decisions) != len(actions):
 38 |         st.error(
 39 |             f"Mismatch between supervisor decisions ({len(supervisor_decisions)}) and actions ({len(actions)})"
 40 |         )
 41 |         return
 42 | 
 43 |     # Create two columns: actions list and reasoning display
 44 |     col1, col2 = st.columns([1, 2])
 45 | 
 46 |     with col1:
 47 |         st.subheader("📋 Actions History")
 48 |         st.markdown(f"**Total Actions:** {len(actions)}")
 49 | 
 50 |         # Create a container for the scrollable actions list
 51 |         actions_container = st.container()
 52 | 
 53 |         # Initialize session state for selected action
 54 |         if "selected_action_index" not in st.session_state:
 55 |             st.session_state.selected_action_index = 0  # Default to latest action
 56 | 
 57 |         with actions_container:
 58 |             # Display actions in reverse order (latest first) with numbering
 59 |             for i, (action, _decision) in enumerate(
 60 |                 zip(reversed(actions), reversed(supervisor_decisions))
 61 |             ):
 62 |                 action_number = len(actions) - i  # Number from latest to oldest
 63 | 
 64 |                 # Create a clickable button for each action
 65 |                 button_key = f"action_{i}"
 66 |                 button_label = f"#{action_number}: {action}"
 67 | 
 68 |                 # Highlight the selected action
 69 |                 if i == st.session_state.selected_action_index:
 70 |                     st.markdown(f"**🔹 {button_label}**")
 71 |                 else:
 72 |                     if st.button(button_label, key=button_key):
 73 |                         st.session_state.selected_action_index = i
 74 |                         st.rerun()
 75 | 
 76 |     with col2:
 77 |         st.subheader("💭 Decision Reasoning")
 78 | 
 79 |         if supervisor_decisions:
 80 |             # Get the selected decision (remember we're working with reversed lists)
 81 |             selected_decision = list(reversed(supervisor_decisions))[
 82 |                 st.session_state.selected_action_index
 83 |             ]
 84 |             selected_action = list(reversed(actions))[
 85 |                 st.session_state.selected_action_index
 86 |             ]
 87 |             action_number = len(actions) - st.session_state.selected_action_index
 88 | 
 89 |             # Display the action and reasoning
 90 |             st.markdown(f"### Action #{action_number}: `{selected_action}`")
 91 | 
 92 |             # Show the reasoning
 93 |             if (
 94 |                 "decision_reasoning" in selected_decision
 95 |                 and selected_decision["decision_reasoning"]
 96 |             ):
 97 |                 st.markdown("**Reasoning:**")
 98 |                 st.markdown(selected_decision["decision_reasoning"])
 99 |             else:
100 |                 st.info("No detailed reasoning available for this action.")
101 | 
102 |             # Show additional context in an expander
103 |             with st.expander("📊 System Context at Decision Time"):
104 |                 context_cols = st.columns(2)
105 | 
106 |                 with context_cols[0]:
107 |                     st.markdown("**Research Metrics:**")
108 |                     st.write(
109 |                         f"• Total Hypotheses: {selected_decision.get('total_hypotheses', 'N/A')}"
110 |                     )
111 |                     st.write(
112 |                         f"• Unranked Hypotheses: {selected_decision.get('num_unranked_hypotheses', 'N/A')}"
113 |                     )
114 |                     st.write(
115 |                         f"• Meta-Reviews: {selected_decision.get('num_meta_reviews', 'N/A')}"
116 |                     )
117 |                     st.write(
118 |                         f"• Literature Subtopics: {selected_decision.get('literature_review_subtopics_completed', 'N/A')}"
119 |                     )
120 | 
121 |                 with context_cols[1]:
122 |                     st.markdown("**Tournament Metrics:**")
123 |                     st.write(
124 |                         f"• Total Matches: {selected_decision.get('total_matches_played', 'N/A')}"
125 |                     )
126 |                     st.write(
127 |                         f"• Tournament Rounds: {selected_decision.get('total_rounds_played', 'N/A')}"
128 |                     )
129 |                     st.write(
130 |                         f"• New Hypotheses Since Meta-Review: {selected_decision.get('new_hypotheses_since_meta_review', 'N/A')}"
131 |                     )
132 | 
133 |                 # Show recent actions context
134 |                 if (
135 |                     "latest_actions" in selected_decision
136 |                     and selected_decision["latest_actions"]
137 |                 ):
138 |                     st.markdown("**Recent Actions Context:**")
139 |                     st.text(selected_decision["latest_actions"])
140 |         else:
141 |             st.info("No supervisor decisions available to display.")
142 | 


--------------------------------------------------------------------------------
/app/tournament_page.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import streamlit as st
  3 | 
  4 | from coscientist.custom_types import ReviewedHypothesis
  5 | 
  6 | 
  7 | def format_hypothesis_summary(hypothesis: ReviewedHypothesis, elo_rating: float) -> str:
  8 |     """Format a brief summary of the hypothesis for the list view."""
  9 |     # Truncate hypothesis to first sentence or 150 characters
 10 |     hypothesis_text = hypothesis.hypothesis
 11 |     if len(hypothesis_text) > 150:
 12 |         hypothesis_text = hypothesis_text[:150] + "..."
 13 |     elif "." in hypothesis_text:
 14 |         first_sentence = hypothesis_text.split(".")[0] + "."
 15 |         if len(first_sentence) < len(hypothesis_text):
 16 |             hypothesis_text = first_sentence
 17 | 
 18 |     return f"**ELO: {elo_rating:.1f}** | {hypothesis_text}"
 19 | 
 20 | 
 21 | def display_hypothesis_details(
 22 |     hypothesis: ReviewedHypothesis,
 23 |     elo_rating: float,
 24 |     win_loss_record: dict,
 25 |     available_uids: list[str],
 26 | ):
 27 |     """Display detailed information about a hypothesis."""
 28 | 
 29 |     col1, col2 = st.columns([2, 1])
 30 | 
 31 |     with col1:
 32 |         st.markdown(f"### Hypothesis {hypothesis.uid}")
 33 |         st.markdown(f"**Full Hypothesis:** {hypothesis.hypothesis}")
 34 | 
 35 |         if hypothesis.parent_uid:
 36 |             # Check if parent hypothesis exists in available hypotheses
 37 |             if hypothesis.parent_uid in available_uids:
 38 |                 if st.button(
 39 |                     f"🔗 **Evolved from:** {hypothesis.parent_uid}",
 40 |                     key=f"parent_link_{hypothesis.uid}",
 41 |                 ):
 42 |                     st.session_state.selected_hypothesis = hypothesis.parent_uid
 43 |                     st.rerun()
 44 |             else:
 45 |                 st.info(f"🔗 **Evolved from:** {hypothesis.parent_uid} (not available)")
 46 | 
 47 |     with col2:
 48 |         st.metric("ELO Rating", f"{elo_rating:.1f}")
 49 |         col2_1, col2_2 = st.columns(2)
 50 |         with col2_1:
 51 |             st.metric("Wins", win_loss_record.get("wins", 0))
 52 |         with col2_2:
 53 |             st.metric("Losses", win_loss_record.get("losses", 0))
 54 | 
 55 |     # Detailed sections in tabs
 56 |     tab1, tab2, tab3, tab4 = st.tabs(
 57 |         ["🔬 Predictions", "🧠 Reasoning", "📚 Verification", "🏛️ Assumptions"]
 58 |     )
 59 | 
 60 |     with tab1:
 61 |         st.markdown("**Testable Predictions:**")
 62 |         for i, prediction in enumerate(hypothesis.predictions, 1):
 63 |             st.markdown(f"{i}. {prediction}")
 64 | 
 65 |     with tab2:
 66 |         st.markdown("**Causal Reasoning:**")
 67 |         st.markdown(hypothesis.causal_reasoning)
 68 | 
 69 |     with tab3:
 70 |         st.markdown("**Deep Verification Result:**")
 71 |         st.markdown(hypothesis.verification_result)
 72 | 
 73 |     with tab4:
 74 |         st.markdown("**Core Assumptions:**")
 75 |         for i, assumption in enumerate(hypothesis.assumptions, 1):
 76 |             st.markdown(f"{i}. {assumption}")
 77 | 
 78 |         if hypothesis.assumption_research_results:
 79 |             st.markdown("**Research on Assumptions:**")
 80 |             for assumption, research in hypothesis.assumption_research_results.items():
 81 |                 with st.expander(f"Research: {assumption[:100]}..."):
 82 |                     st.markdown(research)
 83 | 
 84 | 
 85 | def display_match_history(tournament, hypothesis_uid: str):
 86 |     """Display match history for a specific hypothesis."""
 87 |     matches = []
 88 | 
 89 |     for match_key, match_result in tournament.match_history.items():
 90 |         if hypothesis_uid in [match_result.uid1, match_result.uid2]:
 91 |             opponent_uid = (
 92 |                 match_result.uid2
 93 |                 if match_result.uid1 == hypothesis_uid
 94 |                 else match_result.uid1
 95 |             )
 96 |             won = (
 97 |                 match_result.uid1 == hypothesis_uid and match_result.winner == 1
 98 |             ) or (match_result.uid2 == hypothesis_uid and match_result.winner == 2)
 99 | 
100 |             stage = "Round Robin" if match_key[2] == 1 else "Bracket"
101 | 
102 |             matches.append(
103 |                 {
104 |                     "Stage": stage,
105 |                     "Opponent": opponent_uid,
106 |                     "Result": "Win" if won else "Loss",
107 |                     "Debate": match_result.debate,
108 |                 }
109 |             )
110 | 
111 |     if matches:
112 |         st.markdown("### 🥊 Match History")
113 |         for i, match in enumerate(matches):
114 |             result_emoji = "🏆" if match["Result"] == "Win" else "❌"
115 |             with st.expander(
116 |                 f"{result_emoji} {match['Stage']} vs {match['Opponent']} - {match['Result']}"
117 |             ):
118 |                 st.markdown("**Debate Transcript:**")
119 |                 st.markdown(match["Debate"])
120 |     else:
121 |         st.info("No matches found for this hypothesis.")
122 | 
123 | 
124 | def display_tournament_page(state):
125 |     """Display the tournament rankings page."""
126 |     st.markdown(
127 |         "Explore hypotheses ranked by ELO rating with detailed information and match history."
128 |     )
129 | 
130 |     if state is None:
131 |         return
132 | 
133 |     # Display basic info
134 |     st.markdown(f"**Research Goal:** {state.goal}")
135 | 
136 |     if state.tournament is None:
137 |         st.warning("No tournament data found in this state file.")
138 |         return
139 | 
140 |     tournament = state.tournament
141 |     sorted_hypotheses = tournament.get_sorted_hypotheses()
142 |     win_loss_records = tournament.get_win_loss_records()
143 | 
144 |     if not sorted_hypotheses:
145 |         st.warning("No hypotheses found in the tournament.")
146 |         return
147 | 
148 |     st.markdown(
149 |         f"**Total Hypotheses:** {len(sorted_hypotheses)} | **Total Matches:** {len(tournament.match_history)}"
150 |     )
151 | 
152 |     # Tournament Rankings
153 |     st.header("🏆 Tournament Rankings")
154 | 
155 |     # Create a summary table
156 |     df_data = []
157 |     for uid, elo_rating in sorted_hypotheses:
158 |         hypothesis = tournament.hypotheses[uid]
159 |         record = win_loss_records.get(uid, {"wins": 0, "losses": 0})
160 |         df_data.append(
161 |             {
162 |                 "Rank": len(df_data) + 1,
163 |                 "UID": uid,
164 |                 "ELO": f"{elo_rating:.1f}",
165 |                 "W-L": f"{record['wins']}-{record['losses']}",
166 |                 "Hypothesis": hypothesis.hypothesis[:100] + "..."
167 |                 if len(hypothesis.hypothesis) > 100
168 |                 else hypothesis.hypothesis,
169 |             }
170 |         )
171 | 
172 |     df = pd.DataFrame(df_data)
173 |     st.dataframe(df, use_container_width=True, hide_index=True)
174 | 
175 |     # Detailed view
176 |     st.header("📊 Detailed Hypothesis View")
177 | 
178 |     # Initialize session state for selected hypothesis if not exists
179 |     available_uids = [uid for uid, _ in sorted_hypotheses]
180 |     if "selected_hypothesis" not in st.session_state:
181 |         st.session_state.selected_hypothesis = (
182 |             available_uids[0] if available_uids else None
183 |         )
184 | 
185 |     # Ensure the selected hypothesis is still valid (in case state file changed)
186 |     if st.session_state.selected_hypothesis not in available_uids:
187 |         st.session_state.selected_hypothesis = (
188 |             available_uids[0] if available_uids else None
189 |         )
190 | 
191 |     # Let user select which hypothesis to view in detail
192 |     selected_uid = st.selectbox(
193 |         "Select a hypothesis for detailed view:",
194 |         options=available_uids,
195 |         format_func=lambda uid: f"{uid} (ELO: {dict(sorted_hypotheses)[uid]:.1f})",
196 |         index=available_uids.index(st.session_state.selected_hypothesis)
197 |         if st.session_state.selected_hypothesis in available_uids
198 |         else 0,
199 |         key="hypothesis_selector",
200 |     )
201 | 
202 |     # Update session state when selectbox changes
203 |     if selected_uid != st.session_state.selected_hypothesis:
204 |         st.session_state.selected_hypothesis = selected_uid
205 | 
206 |     if selected_uid:
207 |         hypothesis = tournament.hypotheses[selected_uid]
208 |         elo_rating = dict(sorted_hypotheses)[selected_uid]
209 |         win_loss_record = win_loss_records.get(selected_uid, {"wins": 0, "losses": 0})
210 | 
211 |         # Display detailed information
212 |         display_hypothesis_details(
213 |             hypothesis, elo_rating, win_loss_record, available_uids
214 |         )
215 | 
216 |         # Display match history
217 |         display_match_history(tournament, selected_uid)
218 | 


--------------------------------------------------------------------------------
/app/tournament_viewer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import streamlit as st
  4 | 
  5 | # Import from modular pages
  6 | from common import (
  7 |     get_available_states,
  8 |     load_coscientist_state,
  9 |     load_coscientist_state_by_goal,
 10 | )
 11 | from configuration_page import display_configuration_page
 12 | from final_report_page import display_final_report_page
 13 | from literature_review_page import display_literature_review_page
 14 | from meta_reviews_page import display_meta_reviews_page
 15 | from proximity_page import display_proximity_graph_page
 16 | from resume_page import display_resume_page
 17 | from supervisor_page import display_supervisor_page
 18 | from tournament_page import display_tournament_page
 19 | 
 20 | st.set_page_config(page_title="Coscientist Viewer", page_icon="🧪", layout="wide")
 21 | 
 22 | # Sidebar navigation
 23 | st.sidebar.title("🧪 Coscientist Viewer")
 24 | page = st.sidebar.selectbox(
 25 |     "Select Page",
 26 |     [
 27 |         "Configuration Agent",
 28 |         "Literature Review",
 29 |         "Tournament Rankings",
 30 |         "Proximity Graph",
 31 |         "Meta-Reviews",
 32 |         "Supervisor Decisions",
 33 |         "Final Report",
 34 |         "Resume from Checkpoint",
 35 |     ],
 36 | )
 37 | 
 38 | 
 39 | def main():
 40 |     st.title("🧪 Coscientist Viewer")
 41 | 
 42 |     # Initialize session state for file selection
 43 |     if "current_file" not in st.session_state:
 44 |         st.session_state.current_file = None
 45 |     if "file_list_updated" not in st.session_state:
 46 |         st.session_state.file_list_updated = False
 47 | 
 48 |     # Variables for file handling
 49 |     selected_file = None
 50 |     temp_path = None
 51 |     state = None
 52 | 
 53 |     # Sidebar for file selection (only for pages that need state files)
 54 |     if page in [
 55 |         "Literature Review",
 56 |         "Tournament Rankings",
 57 |         "Proximity Graph",
 58 |         "Meta-Reviews",
 59 |         "Supervisor Decisions",
 60 |         "Final Report",
 61 |     ]:
 62 |         with st.sidebar:
 63 |             st.header("📁 Select Research Goal")
 64 | 
 65 |             # Update button
 66 |             col1, col2 = st.columns([2, 1])
 67 |             with col1:
 68 |                 st.markdown("**Available Goals:**")
 69 |             with col2:
 70 |                 if st.button("Update", help="Refresh file list and load latest"):
 71 |                     st.session_state.file_list_updated = True
 72 |                     st.rerun()
 73 | 
 74 |             # Get available state files
 75 |             available_states = get_available_states()
 76 | 
 77 |             # Auto-select most recent file if update was clicked or no file is selected
 78 |             if (
 79 |                 st.session_state.file_list_updated
 80 |                 or st.session_state.current_file is None
 81 |             ):
 82 |                 if available_states:
 83 |                     st.session_state.current_file = available_states[
 84 |                         0
 85 |                     ]  # Most recent file
 86 |                     if st.session_state.file_list_updated:
 87 |                         st.success(
 88 |                             f"📁 Updated! Latest goal: {available_states[0][:50]}{'...' if len(available_states[0]) > 50 else ''}"
 89 |                         )
 90 |                 st.session_state.file_list_updated = False
 91 | 
 92 |             if available_states:
 93 |                 # Find index of current file in the list (in case files changed)
 94 |                 current_index = 0
 95 |                 if st.session_state.current_file in available_states:
 96 |                     current_index = available_states.index(
 97 |                         st.session_state.current_file
 98 |                     )
 99 | 
100 |                 selected_file = st.selectbox(
101 |                     "Choose a research goal:",
102 |                     options=available_states,
103 |                     format_func=lambda x: x,  # Display the goal text directly
104 |                     index=current_index,
105 |                     key="file_selector",
106 |                 )
107 | 
108 |                 # Update session state when selection changes
109 |                 if selected_file != st.session_state.current_file:
110 |                     st.session_state.current_file = selected_file
111 |             else:
112 |                 st.warning("No Coscientist research goals found.")
113 |                 selected_file = None
114 |                 st.session_state.current_file = None
115 | 
116 |             # File upload option
117 |             st.markdown("**Or upload a file:**")
118 |             uploaded_file = st.file_uploader("Upload .pkl file", type="pkl")
119 | 
120 |             if uploaded_file is not None:
121 |                 # Save uploaded file temporarily
122 |                 temp_path = f"temp_{uploaded_file.name}"
123 |                 with open(temp_path, "wb") as f:
124 |                     f.write(uploaded_file.getbuffer())
125 |                 selected_file = temp_path
126 | 
127 |         # Load state if goal is selected
128 |         if selected_file:
129 |             # If it's a temp file (uploaded), use the original function
130 |             if selected_file.startswith("temp_"):
131 |                 state = load_coscientist_state(selected_file)
132 |             else:
133 |                 # It's a goal text, use the new function
134 |                 state = load_coscientist_state_by_goal(selected_file)
135 | 
136 |     # Display appropriate page based on navigation
137 |     if page == "Configuration Agent":
138 |         display_configuration_page()
139 |     elif page == "Literature Review":
140 |         if state is None:
141 |             st.info(
142 |                 "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started."
143 |             )
144 |             st.markdown("""
145 |             ## Literature Review Page
146 |             
147 |             View the comprehensive literature review that forms the foundation of the research:
148 |             
149 |             1. **Research Subtopics** - see how the main research goal was systematically decomposed
150 |             2. **Subtopic Reports** - select any subtopic to view its detailed literature analysis
151 |             3. **Knowledge Foundation** - understand the scientific background informing hypothesis generation
152 |             
153 |             **What you'll see:**
154 |             - **Dropdown Navigation**: Select from numbered subtopics to explore different research areas
155 |             - **Detailed Reports**: Comprehensive literature analysis for each subtopic
156 |             - **Research Context**: Scientific foundation that guides hypothesis generation
157 |             - **Summary Statistics**: Overview of subtopics covered and reports available
158 |             
159 |             The literature review is one of the first steps in the research process, providing
160 |             the scientific foundation for generating well-informed, evidence-based research hypotheses.
161 |             """)
162 |         else:
163 |             display_literature_review_page(state)
164 |     elif page == "Tournament Rankings":
165 |         if state is None:
166 |             st.info(
167 |                 "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started."
168 |             )
169 |             st.markdown("""
170 |             ## Tournament Rankings Page
171 |             
172 |             View and explore hypotheses ranked by ELO rating:
173 |             
174 |             1. **Browse tournament rankings** - see all hypotheses ranked by ELO rating
175 |             2. **Select a hypothesis** for detailed view to see:
176 |                - Full hypothesis text and predictions
177 |                - Causal reasoning and verification results
178 |                - Assumptions and supporting research
179 |                - Complete match history with debate transcripts
180 |             
181 |             **What you'll see:**
182 |             - **ELO Ratings**: Higher scores indicate stronger performance in head-to-head comparisons
183 |             - **Win-Loss Records**: Track record against other hypotheses
184 |             - **Match History**: Full debate transcripts showing why one hypothesis beat another
185 |             - **Hypothesis Lineage**: See which hypotheses evolved from others
186 |             """)
187 |         else:
188 |             display_tournament_page(state)
189 |     elif page == "Proximity Graph":
190 |         if state is None:
191 |             st.info(
192 |                 "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started."
193 |             )
194 |             st.markdown("""
195 |             ## Proximity Graph Page
196 |             
197 |             Explore the semantic relationships between hypotheses using advanced network visualization:
198 |             
199 |             1. **Interactive Cytoscape.js graph** with hypotheses as nodes and similarities as edges
200 |             2. **Community detection** to find groups of semantically similar hypotheses
201 |             3. **Click nodes** to select them and see full hypothesis text
202 |             4. **Drag and rearrange** nodes to explore relationships
203 |             5. **Adjust parameters** to control community detection sensitivity
204 |             
205 |             **What you'll see:**
206 |             - **Node colors**: Different colors represent different semantic communities
207 |             - **Interactive layout**: Force-directed positioning based on similarity
208 |             - **Edges**: Connections show cosine similarity between hypothesis embeddings
209 |             - **Statistics**: Number of hypotheses, connections, and average similarity
210 |             - **Selection feedback**: Click nodes to see their full hypothesis text below the graph
211 |             
212 |             **Advanced Features:**
213 |             - Multi-node selection with Ctrl/Cmd + click
214 |             - Smooth animations and transitions
215 |             - Professional network graph layout algorithms
216 |             - Real-time interaction feedback
217 |             """)
218 |         else:
219 |             display_proximity_graph_page(state)
220 |     elif page == "Meta-Reviews":
221 |         if state is None:
222 |             st.info(
223 |                 "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started."
224 |             )
225 |             st.markdown("""
226 |             ## Meta-Reviews Page
227 |             
228 |             View the strategic analysis and review process of the research:
229 |             
230 |             1. **Reviews Timeline** - see all meta-reviews generated during the research process
231 |             2. **Strategic Analysis** - click on any meta-review to see the full analysis
232 |             3. **Research Guidance** - understand how each review guided future research directions
233 |             
234 |             **What you'll see:**
235 |             - **Numbered Reviews**: Latest meta-reviews appear first with sequential numbering
236 |             - **Strategic Analysis**: Full text of the meta-review analysis and insights
237 |             - **Research Context**: Tournament state and hypothesis counts at review time
238 |             - **Quality Assessment**: Evaluation of hypothesis performance and research progress
239 |             
240 |             Meta-reviews are generated periodically to analyze the current state of research,
241 |             identify patterns and gaps, and guide the supervisor agent's strategic decisions.
242 |             """)
243 |         else:
244 |             display_meta_reviews_page(state)
245 |     elif page == "Supervisor Decisions":
246 |         if state is None:
247 |             st.info(
248 |                 "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started."
249 |             )
250 |             st.markdown("""
251 |             ## Supervisor Decisions Page
252 |             
253 |             View the decision-making process of the supervisor agent:
254 |             
255 |             1. **Actions Timeline** - see all actions taken by the supervisor in chronological order
256 |             2. **Decision Reasoning** - click on any action to see the detailed reasoning behind it
257 |             3. **System Context** - understand the research state that influenced each decision
258 |             
259 |             **What you'll see:**
260 |             - **Numbered Actions**: Latest actions appear first with sequential numbering
261 |             - **Decision Reasoning**: Full text of the supervisor's strategic thinking
262 |             - **System Metrics**: Research state, hypothesis counts, tournament progress at decision time
263 |             - **Recent Context**: What other actions were taken recently that influenced the decision
264 |             
265 |             The supervisor agent analyzes the research progress and decides what actions to take next,
266 |             such as generating new hypotheses, evolving existing ones, running tournaments, or finishing the research.
267 |             """)
268 |         else:
269 |             display_supervisor_page(state)
270 |     elif page == "Final Report":
271 |         if state is None:
272 |             st.info(
273 |                 "👈 Please select a research goal or upload a Coscientist state file from the sidebar to get started."
274 |             )
275 |             st.markdown("""
276 |             ## Final Report Page
277 |             
278 |             View the comprehensive final research report generated upon completion:
279 |             
280 |             1. **Complete Analysis** - comprehensive summary of all research findings
281 |             2. **Top Hypotheses** - detailed review of the highest-ranked hypotheses  
282 |             3. **Research Conclusions** - final insights and recommendations
283 |             4. **Process Summary** - overview of the research methodology and evaluation
284 |             
285 |             **What you'll see:**
286 |             - **Final Report**: Complete research summary and conclusions
287 |             - **Research Statistics**: Overview of hypotheses generated, tournaments run, and key metrics
288 |             - **Process Completion**: Confirmation that the research process finished successfully
289 |             
290 |             The final report is generated only when the supervisor agent determines that the research
291 |             has achieved sufficient depth and quality, and further investigation would yield diminishing returns.
292 |             """)
293 |         else:
294 |             display_final_report_page(state)
295 |     elif page == "Resume from Checkpoint":
296 |         display_resume_page()
297 | 
298 |     # Clean up temp file if it was uploaded
299 |     if temp_path and os.path.exists(temp_path):
300 |         os.remove(temp_path)
301 | 
302 | 
303 | if __name__ == "__main__":
304 |     main()
305 | 


--------------------------------------------------------------------------------
/app/viewer_requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit>=1.46.1
2 | st-cytoscape>=0.0.5
3 | langchain-openai>=0.1.0
4 | langchain-anthropic>=0.1.0
5 | langchain-google-genai>=1.0.0
6 | langchain-core>=0.2.0
7 | langgraph>=0.1.0


--------------------------------------------------------------------------------
/assets/agent_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conradry/open-coscientist-agents/a20b018300da57a26578f8e7442b890193950afa/assets/agent_graph.png


--------------------------------------------------------------------------------
/assets/app_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conradry/open-coscientist-agents/a20b018300da57a26578f8e7442b890193950afa/assets/app_demo.gif


--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conradry/open-coscientist-agents/a20b018300da57a26578f8e7442b890193950afa/assets/overview.png


--------------------------------------------------------------------------------
/coscientist/__init__.py:
--------------------------------------------------------------------------------
1 | """Open CoScientist Agents - Multi-agent system for AI co-scientist research."""
2 | 
3 | __version__ = "0.0.1"
4 | 
5 | from coscientist.framework import CoscientistConfig, CoscientistFramework
6 | from coscientist.global_state import CoscientistState, CoscientistStateManager
7 | 


--------------------------------------------------------------------------------
/coscientist/common.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | from jinja2 import Environment, FileSystemLoader, select_autoescape
  5 | 
  6 | from coscientist.custom_types import ParsedHypothesis
  7 | 
  8 | _env = Environment(
  9 |     loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), "prompts")),
 10 |     autoescape=select_autoescape(),
 11 |     trim_blocks=True,
 12 |     lstrip_blocks=True,
 13 | )
 14 | 
 15 | 
 16 | def load_prompt(name: str, **kwargs) -> str:
 17 |     """
 18 |     Load a template from the prompts directory and renders
 19 |     it with the given kwargs.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     name: str
 24 |         The name of the template to load, without the .md extension.
 25 |     **kwargs: dict
 26 |         The kwargs to render the template with.
 27 | 
 28 |     Returns
 29 |     -------
 30 |     str
 31 |         The rendered template.
 32 |     """
 33 |     return _env.get_template(f"{name}.md").render(**kwargs)
 34 | 
 35 | 
 36 | def parse_hypothesis_markdown(markdown_text: str) -> ParsedHypothesis:
 37 |     """
 38 |     Parse markdown text with # headings to extract Hypothesis, Reasoning, and Assumptions sections.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     markdown_text : str
 43 |         Markdown text containing sections with # headings for Hypothesis, Reasoning, and Assumptions
 44 | 
 45 |     Returns
 46 |     -------
 47 |     ParsedHypothesis
 48 |         Structured output with hypothesis, reasoning, and assumptions fields extracted from markdown
 49 |     """
 50 |     if "#FINAL REPORT#" in markdown_text:
 51 |         markdown_text = markdown_text.split("#FINAL REPORT#")[1]
 52 | 
 53 |     # Split the text by # to get sections
 54 |     sections = markdown_text.split("#")
 55 | 
 56 |     # Initialize fields
 57 |     hypothesis = ""
 58 |     predictions = []
 59 |     assumptions = []
 60 | 
 61 |     # Process each section
 62 |     for section in sections:
 63 |         section = section.strip()
 64 |         if not section:
 65 |             continue
 66 | 
 67 |         # Split section into title and content
 68 |         lines = section.split("\n", 1)
 69 |         if len(lines) < 2:
 70 |             continue
 71 | 
 72 |         title = lines[0].strip().lower()
 73 |         content = lines[1].strip()
 74 | 
 75 |         # Match section titles (case-insensitive)
 76 |         if "hypothesis" in title:
 77 |             hypothesis = content
 78 |         elif "prediction" in title:
 79 |             predictions = _parse_numbered_list(content)
 80 |         elif "assumption" in title:
 81 |             assumptions = _parse_numbered_list(content)
 82 | 
 83 |     assert hypothesis, f"Hypothesis section is required: {markdown_text}"
 84 |     assert predictions, f"Predictions section is required: {markdown_text}"
 85 |     assert assumptions, f"Assumptions section is required: {markdown_text}"
 86 | 
 87 |     return ParsedHypothesis(
 88 |         hypothesis=hypothesis, predictions=predictions, assumptions=assumptions
 89 |     )
 90 | 
 91 | 
 92 | def _parse_numbered_list(content: str) -> list[str]:
 93 |     """
 94 |     Parse a numbered list from text content into a list of strings.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     content : str
 99 |         Text containing a numbered list (e.g., "1. First item\n2. Second item")
100 | 
101 |     Returns
102 |     -------
103 |     list[str]
104 |         List of individual items with numbering removed
105 |     """
106 |     if not content.strip():
107 |         return []
108 | 
109 |     lines = content.split("\n")
110 |     items = []
111 | 
112 |     # Regex to match various numbering formats: 1., 1), 1-, etc.
113 |     number_pattern = re.compile(r"^\s*\d+[\.\)\-]\s*(.+)", re.MULTILINE)
114 | 
115 |     current_item = ""
116 | 
117 |     for line in lines:
118 |         line = line.strip()
119 |         if not line:
120 |             continue
121 | 
122 |         # Check if line starts with a number
123 |         match = number_pattern.match(line)
124 |         if match:
125 |             # If we have a current item, add it to the list
126 |             if current_item:
127 |                 items.append(current_item.strip())
128 |             # Start new item
129 |             current_item = match.group(1)
130 |         else:
131 |             # This line is a continuation of the current item
132 |             if current_item:
133 |                 current_item += " " + line
134 |             else:
135 |                 # Handle case where first line doesn't start with a number
136 |                 current_item = line
137 | 
138 |     # Add the last item
139 |     if current_item:
140 |         items.append(current_item.strip())
141 | 
142 |     return items
143 | 


--------------------------------------------------------------------------------
/coscientist/configuration_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration
  3 | -------------
  4 | - Takes a user prompt and create a configuration for the research plan to be
  5 | executed by the Supervisor through an interactive conversation.
  6 | """
  7 | 
  8 | import uuid
  9 | from typing import Sequence, TypedDict
 10 | 
 11 | from langchain_core.language_models.chat_models import BaseChatModel
 12 | from langchain_core.messages import (
 13 |     BaseMessage,
 14 |     HumanMessage,
 15 | )
 16 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 17 | from langgraph.checkpoint.memory import InMemorySaver
 18 | from langgraph.graph import START, StateGraph
 19 | from langgraph.graph.message import add_messages
 20 | from typing_extensions import Annotated
 21 | 
 22 | from coscientist.common import load_prompt
 23 | 
 24 | 
 25 | class ConfigurationState(TypedDict):
 26 |     """
 27 |     Represents the state of the interactive configuration process.
 28 | 
 29 |     Uses LangGraph's standard message-based state management for better
 30 |     conversation handling and persistence.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     messages: Annotated[Sequence[BaseMessage], add_messages]
 35 |         The conversation messages between agent and user
 36 |     goal: str
 37 |         The initial research goal to refine
 38 |     refined_goal: str
 39 |         The final refined goal (set when process is complete)
 40 |     is_complete: bool
 41 |         Whether the configuration process is complete
 42 |     """
 43 | 
 44 |     messages: Annotated[Sequence[BaseMessage], add_messages]
 45 |     goal: str
 46 |     refined_goal: str
 47 |     is_complete: bool
 48 | 
 49 | 
 50 | def build_configuration_agent(llm: BaseChatModel) -> StateGraph:
 51 |     """
 52 |     Builds and configures a LangGraph for the interactive configuration agent process.
 53 | 
 54 |     The graph uses LangGraph's built-in message persistence and follows best practices
 55 |     for chatbot development including:
 56 |     - Proper message state management
 57 |     - Built-in checkpointer for conversation persistence
 58 |     - Message trimming for context management
 59 |     - Streaming support
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     llm: BaseChatModel
 64 |         The language model to use for the agent responses
 65 | 
 66 |     Returns
 67 |     -------
 68 |     StateGraph
 69 |         A compiled LangGraph for the interactive configuration agent
 70 |     """
 71 |     # Create the workflow
 72 |     workflow = StateGraph(state_schema=ConfigurationState)
 73 | 
 74 |     # Add the configuration node
 75 |     workflow.add_node("configuration", lambda state: _configuration_node(state, llm))
 76 | 
 77 |     # Set up the flow
 78 |     workflow.add_edge(START, "configuration")
 79 | 
 80 |     # Add memory for conversation persistence
 81 |     memory = InMemorySaver()
 82 | 
 83 |     return workflow.compile(checkpointer=memory)
 84 | 
 85 | 
 86 | def _configuration_node(
 87 |     state: ConfigurationState, llm: BaseChatModel
 88 | ) -> ConfigurationState:
 89 |     """
 90 |     Node that processes the conversation and generates the agent's response.
 91 |     """
 92 |     prompt = load_prompt("research_config", goal=state["goal"])
 93 | 
 94 |     # Ensure we have messages to work with
 95 |     messages = state.get("messages", [])
 96 |     if not messages:
 97 |         # If no messages, create a default user message to start the conversation
 98 |         messages = [HumanMessage(content="Please help me refine my research goal.")]
 99 | 
100 |     prompt_template = ChatPromptTemplate.from_messages(
101 |         [("system", prompt), MessagesPlaceholder(variable_name="messages")]
102 |     )
103 | 
104 |     # Prepare the input for the prompt template
105 |     template_input = {"messages": messages}
106 |     formatted_prompt = prompt_template.invoke(template_input)
107 | 
108 |     response = llm.invoke(formatted_prompt)
109 | 
110 |     # Check if this is a final goal statement
111 |     is_complete = "FINAL GOAL:" in response.content
112 |     refined_goal = state.get("refined_goal", "")
113 | 
114 |     if is_complete:
115 |         # Extract the final goal
116 |         try:
117 |             refined_goal = response.content.split("FINAL GOAL:")[1].strip()
118 |         except IndexError:
119 |             # Fallback if parsing fails
120 |             refined_goal = response.content
121 | 
122 |     return {
123 |         "messages": [response],
124 |         "goal": state["goal"],
125 |         "refined_goal": refined_goal,
126 |         "is_complete": is_complete,
127 |     }
128 | 
129 | 
130 | class ConfigurationChatManager:
131 |     """
132 |     Manages the interactive chat process for configuration refinement.
133 | 
134 |     This class handles the conversation flow between the user and the configuration
135 |     agent, maintaining state and managing the workflow execution until completion.
136 | 
137 |     Parameters
138 |     ----------
139 |     llm : BaseChatModel
140 |         The language model to use for agent responses
141 |     research_goal : str
142 |         The initial research goal to be refined through conversation
143 |     """
144 | 
145 |     def __init__(self, llm: BaseChatModel, research_goal: str):
146 |         """
147 |         Initialize the chat manager with an LLM and research goal.
148 | 
149 |         Parameters
150 |         ----------
151 |         llm : BaseChatModel
152 |             The language model for the configuration agent
153 |         research_goal : str
154 |             The initial research goal to refine
155 |         """
156 |         self.llm = llm
157 |         self.research_goal = research_goal
158 |         self.agent = build_configuration_agent(llm)
159 |         self.config = {"configurable": {"thread_id": str(uuid.uuid4())}}
160 |         self.current_state = None
161 |         self.is_complete = False
162 |         self.refined_goal = ""
163 | 
164 |         # Initialize the conversation
165 |         self._initialize_conversation()
166 | 
167 |     def _initialize_conversation(self):
168 |         """Initialize the conversation with the research goal."""
169 |         # Start with an initial user message to trigger the agent's response
170 |         initial_message = HumanMessage(
171 |             content="Please help me refine my research goal and ask clarifying questions if needed."
172 |         )
173 |         initial_state = ConfigurationState(
174 |             messages=[initial_message],
175 |             goal=self.research_goal,
176 |             refined_goal="",
177 |             is_complete=False,
178 |         )
179 |         self.current_state = self.agent.invoke(initial_state, self.config)
180 |         self.is_complete = self.current_state.get("is_complete", False)
181 |         self.refined_goal = self.current_state.get("refined_goal", "")
182 | 
183 |     def send_human_message(self, message: str) -> str:
184 |         """
185 |         Send a human message to the agent and get the response.
186 | 
187 |         Parameters
188 |         ----------
189 |         message : str
190 |             The human message to send to the agent
191 | 
192 |         Returns
193 |         -------
194 |         str
195 |             The agent's response message
196 | 
197 |         Raises
198 |         ------
199 |         RuntimeError
200 |             If the conversation is already complete
201 |         """
202 |         if self.is_complete:
203 |             raise RuntimeError(
204 |                 "Conversation is already complete. The refined goal is available."
205 |             )
206 | 
207 |         # Send human message to the agent
208 |         input_messages = [HumanMessage(message)]
209 |         output = self.agent.invoke({"messages": input_messages}, self.config)
210 | 
211 |         # Update state
212 |         self.current_state = output
213 |         self.is_complete = output.get("is_complete", False)
214 |         self.refined_goal = output.get("refined_goal", "")
215 | 
216 |         # Get the latest AI message
217 |         messages = output.get("messages", [])
218 |         if messages:
219 |             latest_message = messages[-1]
220 |             if hasattr(latest_message, "content"):
221 |                 return latest_message.content
222 | 
223 |         return "No response received from agent."
224 | 
225 |     def get_latest_agent_message(self) -> str:
226 |         """
227 |         Get the latest message from the agent.
228 | 
229 |         Returns
230 |         -------
231 |         str
232 |             The latest agent message content
233 |         """
234 |         if not self.current_state:
235 |             return "No messages yet."
236 | 
237 |         messages = self.current_state.get("messages", [])
238 |         if messages:
239 |             latest_message = messages[-1]
240 |             if hasattr(latest_message, "content"):
241 |                 return latest_message.content
242 | 
243 |         return "No agent messages found."
244 | 
245 |     def is_conversation_complete(self) -> bool:
246 |         """
247 |         Check if the configuration conversation is complete.
248 | 
249 |         Returns
250 |         -------
251 |         bool
252 |             True if the conversation is complete, False otherwise
253 |         """
254 |         return self.is_complete
255 | 
256 |     def get_refined_goal(self) -> str:
257 |         """
258 |         Get the refined research goal.
259 | 
260 |         Returns
261 |         -------
262 |         str
263 |             The refined goal if conversation is complete, empty string otherwise
264 |         """
265 |         return self.refined_goal if self.is_complete else ""
266 | 
267 |     def get_conversation_history(self) -> Sequence[BaseMessage]:
268 |         """
269 |         Get the full conversation history.
270 | 
271 |         Returns
272 |         -------
273 |         Sequence[BaseMessage]
274 |             All messages in the conversation
275 |         """
276 |         if not self.current_state:
277 |             return []
278 | 
279 |         return self.current_state.get("messages", [])
280 | 


--------------------------------------------------------------------------------
/coscientist/custom_types.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class ParsedHypothesis(BaseModel):
 7 |     """Structured output for parsed hypothesis."""
 8 | 
 9 |     uid: str = Field(
10 |         default_factory=lambda: str(uuid.uuid4()),
11 |         description="Unique identifier for the hypothesis",
12 |     )
13 |     hypothesis: str = Field(description="The main hypothesis statement")
14 |     predictions: list[str] = Field(
15 |         description="A list of predictions that could be tested to disprove the hypothesis"
16 |     )
17 |     assumptions: list[str] = Field(
18 |         description="A list of assumptions that are implicit or explicit in the hypothesis"
19 |     )
20 |     parent_uid: str | None = Field(
21 |         default=None,
22 |         description="The unique identifier of the parent hypothesis, if applicable",
23 |     )
24 | 
25 | 
26 | class ReviewedHypothesis(ParsedHypothesis):
27 |     """Structured output for reviewed hypothesis."""
28 | 
29 |     causal_reasoning: str = Field(description="The causal reasoning for the hypothesis")
30 |     assumption_research_results: dict[str, str] = Field(
31 |         description="A dictionary of assumption research results"
32 |     )
33 |     verification_result: str = Field(
34 |         description="The result of the deep verification process"
35 |     )
36 | 
37 | 
38 | class RankingMatchResult(BaseModel):
39 |     """Result of a match between two hypotheses."""
40 | 
41 |     uid1: str = Field(description="Unique identifier for the first hypothesis")
42 |     uid2: str = Field(description="Unique identifier for the second hypothesis")
43 |     winner: int = Field(description="The winner of the match (1 or 2)")
44 |     debate: str = Field(description="The debate between the two hypotheses")
45 | 


--------------------------------------------------------------------------------
/coscientist/evolution_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Evolution agent
  3 | --------------
  4 | - Inspiration from other ideas
  5 | - Simplification
  6 | - Research extension
  7 | 
  8 | More details:
  9 | - Looks for weaknesses in a hypothesis, makes search queries to
 10 | evaluate them and suggests improvements to fill in reasoning gaps.
 11 | - Tries to fix invalid assumptions identified by the Reflection agent
 12 | and to improve practicality and feasibility for testing.
 13 | - Creates new hypotheses using multiple top-ranked ones as inspiration or
 14 | by combining them in new ways.
 15 | - Intentionally tries to generate out-of-the-box ideas that are
 16 | divergent from existing ones.
 17 | - Never replaces an existing hypothesis, but always adds a new one
 18 | that should in principle be better.
 19 | """
 20 | 
 21 | from typing import TypedDict
 22 | 
 23 | from langchain_core.language_models.chat_models import BaseChatModel
 24 | from langgraph.graph import END, StateGraph
 25 | 
 26 | from coscientist.common import load_prompt, parse_hypothesis_markdown
 27 | from coscientist.custom_types import ParsedHypothesis, ReviewedHypothesis
 28 | 
 29 | 
 30 | class EvolveFromFeedbackState(TypedDict):
 31 |     """
 32 |     State for the `evolve_from_feedback` prompt agent.
 33 |     """
 34 | 
 35 |     goal: str
 36 |     parent_hypothesis: ReviewedHypothesis
 37 |     meta_review: str
 38 |     evolved_hypothesis: ParsedHypothesis
 39 | 
 40 | 
 41 | class OutOfTheBoxState(TypedDict):
 42 |     """
 43 |     State for the `out_of_the_box` prompt agent.
 44 |     """
 45 | 
 46 |     goal: str
 47 |     top_hypotheses: list[ReviewedHypothesis]
 48 |     elo_ratings: list[float]
 49 |     evolved_hypothesis: ParsedHypothesis
 50 | 
 51 | 
 52 | def build_evolution_agent(
 53 |     mode: str,
 54 |     llm: BaseChatModel,
 55 | ) -> StateGraph:
 56 |     """
 57 |     Unified builder function for evolution agents that supports both evolve_from_feedback and out_of_the_box modes.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     mode : str
 62 |         The mode of operation, either "evolve_from_feedback" or "out_of_the_box".
 63 |     llm : BaseChatModel
 64 |         The language model to use for both evolution and standardization.
 65 | 
 66 |     Returns
 67 |     -------
 68 |     StateGraph
 69 |         A compiled LangGraph for the evolution agent.
 70 | 
 71 |     Raises
 72 |     ------
 73 |     ValueError
 74 |         If mode is invalid.
 75 |     """
 76 |     if mode == "evolve_from_feedback":
 77 |         return _build_evolve_from_feedback_agent(llm)
 78 |     elif mode == "out_of_the_box":
 79 |         return _build_out_of_the_box_agent(llm)
 80 |     else:
 81 |         raise ValueError(
 82 |             "mode must be either 'evolve_from_feedback' or 'out_of_the_box'"
 83 |         )
 84 | 
 85 | 
 86 | def _evolve_from_feedback_node(
 87 |     state: EvolveFromFeedbackState,
 88 |     llm: BaseChatModel,
 89 | ) -> EvolveFromFeedbackState:
 90 |     """
 91 |     Evolution node for evolving a hypothesis based on feedback.
 92 |     """
 93 |     prompt = load_prompt(
 94 |         "evolve_from_feedback",
 95 |         goal=state["goal"],
 96 |         hypothesis=state["parent_hypothesis"].hypothesis,
 97 |         review=state["parent_hypothesis"].verification_result,
 98 |         meta_review=state["meta_review"],
 99 |     )
100 |     response_content = llm.invoke(prompt).content
101 |     parsed_hypothesis = parse_hypothesis_markdown(response_content)
102 |     parsed_hypothesis.parent_uid = state["parent_hypothesis"].uid
103 |     return {**state, "evolved_hypothesis": parsed_hypothesis}
104 | 
105 | 
106 | def _out_of_the_box_node(
107 |     state: OutOfTheBoxState,
108 |     llm: BaseChatModel,
109 | ) -> OutOfTheBoxState:
110 |     """
111 |     Evolution node for generating out-of-the-box ideas from top hypotheses.
112 |     """
113 |     # Convert list of hypotheses to formatted string
114 |     hypotheses_text = "\n".join(
115 |         [
116 |             f"- {hyp.hypothesis} (Elo rating: {elo_rating})"
117 |             for hyp, elo_rating in zip(state["top_hypotheses"], state["elo_ratings"])
118 |         ]
119 |     )
120 | 
121 |     prompt = load_prompt(
122 |         "out_of_the_box",
123 |         goal=state["goal"],
124 |         hypotheses=hypotheses_text,
125 |     )
126 |     response_content = llm.invoke(prompt).content
127 |     parsed_hypothesis = parse_hypothesis_markdown(response_content)
128 |     return {**state, "evolved_hypothesis": parsed_hypothesis}
129 | 
130 | 
131 | def _build_evolve_from_feedback_agent(llm: BaseChatModel) -> StateGraph:
132 |     """
133 |     Builds and configures a LangGraph for evolving hypotheses from feedback.
134 | 
135 |     Parameters
136 |     ----------
137 |     llm : BaseChatModel
138 |         The language model to use for both evolution and standardization.
139 | 
140 |     Returns
141 |     -------
142 |     StateGraph
143 |         A compiled LangGraph for the evolve-from-feedback agent.
144 |     """
145 |     graph = StateGraph(EvolveFromFeedbackState)
146 | 
147 |     graph.add_node(
148 |         "evolution",
149 |         lambda state: _evolve_from_feedback_node(state, llm),
150 |     )
151 |     graph.add_edge("evolution", END)
152 | 
153 |     graph.set_entry_point("evolution")
154 |     return graph.compile()
155 | 
156 | 
157 | def _build_out_of_the_box_agent(llm: BaseChatModel) -> StateGraph:
158 |     """
159 |     Builds and configures a LangGraph for generating out-of-the-box ideas.
160 | 
161 |     Parameters
162 |     ----------
163 |     llm : BaseChatModel
164 |         The language model to use for both evolution and standardization.
165 | 
166 |     Returns
167 |     -------
168 |     StateGraph
169 |         A compiled LangGraph for the out-of-the-box agent.
170 |     """
171 |     graph = StateGraph(OutOfTheBoxState)
172 | 
173 |     graph.add_node(
174 |         "evolution",
175 |         lambda state: _out_of_the_box_node(state, llm),
176 |     )
177 | 
178 |     graph.add_edge("evolution", END)
179 | 
180 |     graph.set_entry_point("evolution")
181 |     return graph.compile()
182 | 


--------------------------------------------------------------------------------
/coscientist/final_report_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Final report agent
  3 | ------------------
  4 | - Generates a comprehensive scientific research report
  5 | - Takes tournament results and formats them into a professional report
  6 | - Provides detailed analysis of top-ranked hypotheses with experimental suggestions
  7 | 
  8 | More details:
  9 | - Formats all hypotheses by ELO ranking for overview
 10 | - Provides detailed information for top k hypotheses including causal reasoning,
 11 |   verification results, and falsifiable predictions
 12 | - Generates a structured scientific report suitable for domain experts
 13 | """
 14 | 
 15 | from typing import TypedDict
 16 | 
 17 | from langchain_core.language_models.chat_models import BaseChatModel
 18 | from langgraph.graph import END, StateGraph
 19 | 
 20 | from coscientist.common import load_prompt
 21 | from coscientist.custom_types import ReviewedHypothesis
 22 | from coscientist.ranking_agent import EloTournament
 23 | 
 24 | 
 25 | class FinalReportState(TypedDict):
 26 |     """
 27 |     State for the final report agent.
 28 |     """
 29 | 
 30 |     goal: str
 31 |     tournament: EloTournament
 32 |     top_k: int
 33 |     result: str
 34 | 
 35 | 
 36 | def build_final_report_agent(llm: BaseChatModel) -> StateGraph:
 37 |     """
 38 |     Builds and configures a LangGraph for final report generation.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     llm : BaseChatModel
 43 |         The language model to use for final report generation.
 44 | 
 45 |     Returns
 46 |     -------
 47 |     StateGraph
 48 |         A compiled LangGraph for the final report agent.
 49 |     """
 50 |     graph = StateGraph(FinalReportState)
 51 | 
 52 |     graph.add_node(
 53 |         "final_report",
 54 |         lambda state: _final_report_node(state, llm),
 55 |     )
 56 | 
 57 |     graph.add_edge("final_report", END)
 58 |     graph.set_entry_point("final_report")
 59 |     return graph.compile()
 60 | 
 61 | 
 62 | def _format_hypothesis_with_rating(
 63 |     hypothesis: ReviewedHypothesis, rating: float
 64 | ) -> str:
 65 |     """Helper function to format a hypothesis with its ELO rating."""
 66 |     return f"Hypothesis {hypothesis.uid} (ELO: {rating:.2f}): {hypothesis.hypothesis}"
 67 | 
 68 | 
 69 | def _format_detailed_hypothesis(hypothesis: ReviewedHypothesis, rating: float) -> str:
 70 |     """Helper function to format a hypothesis with detailed information."""
 71 |     sections = [
 72 |         f"## Hypothesis {hypothesis.uid} (ELO: {rating:.2f})",
 73 |         f"**Hypothesis Statement:** {hypothesis.hypothesis}",
 74 |         f"**Causal Reasoning:** {hypothesis.causal_reasoning}",
 75 |         f"**Verification Result:** {hypothesis.verification_result}",
 76 |         f"**Falsifiable Predictions:** {' '.join(hypothesis.predictions)}",
 77 |     ]
 78 |     return "\n\n".join(sections)
 79 | 
 80 | 
 81 | def _get_top_hypotheses_data(
 82 |     tournament: EloTournament, top_k: int
 83 | ) -> list[tuple[str, float]]:
 84 |     """Helper function to get top k hypotheses sorted by ELO rating."""
 85 |     sorted_hypotheses = tournament.get_sorted_hypotheses()
 86 |     return sorted_hypotheses[:top_k]
 87 | 
 88 | 
 89 | def _final_report_node(
 90 |     state: FinalReportState,
 91 |     llm: BaseChatModel,
 92 | ) -> FinalReportState:
 93 |     """
 94 |     Final report node that generates a comprehensive scientific research report.
 95 |     """
 96 |     tournament = state["tournament"]
 97 |     top_k = state.get("top_k", 3)  # Default to top 3 hypotheses
 98 | 
 99 |     # Build hypotheses by ranking - all hypotheses sorted by ELO rating
100 |     sorted_hypotheses = tournament.get_sorted_hypotheses()
101 |     hypotheses_by_ranking_entries = []
102 |     for hyp_id, rating in sorted_hypotheses:
103 |         hypothesis = tournament.hypotheses[hyp_id]
104 |         hypotheses_by_ranking_entries.append(
105 |             _format_hypothesis_with_rating(hypothesis, rating)
106 |         )
107 |     hypotheses_by_ranking_text = "\n".join(hypotheses_by_ranking_entries)
108 | 
109 |     # Build detailed top hypotheses information
110 |     top_hypotheses_data = _get_top_hypotheses_data(tournament, top_k)
111 |     top_ranked_hypotheses_entries = []
112 |     for hyp_id, rating in top_hypotheses_data:
113 |         hypothesis = tournament.hypotheses[hyp_id]
114 |         top_ranked_hypotheses_entries.append(
115 |             _format_detailed_hypothesis(hypothesis, rating)
116 |         )
117 |     top_ranked_hypotheses_text = "\n\n".join(top_ranked_hypotheses_entries)
118 | 
119 |     prompt = load_prompt(
120 |         "final_report",
121 |         goal=state["goal"],
122 |         hypotheses_by_ranking=hypotheses_by_ranking_text,
123 |         top_ranked_hypotheses=top_ranked_hypotheses_text,
124 |     )
125 |     response_content = llm.invoke(prompt).content
126 |     return {**state, "result": response_content}
127 | 


--------------------------------------------------------------------------------
/coscientist/generation_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generation agent
  3 | ---------------
  4 | - Literature exploration
  5 | - Simulated scientific debates
  6 | """
  7 | 
  8 | from dataclasses import dataclass
  9 | from typing import TypedDict, Union
 10 | 
 11 | from langchain_core.language_models.chat_models import BaseChatModel
 12 | from langgraph.graph import END, StateGraph
 13 | 
 14 | from coscientist import multiturn
 15 | from coscientist.common import load_prompt, parse_hypothesis_markdown
 16 | from coscientist.custom_types import ParsedHypothesis
 17 | from coscientist.reasoning_types import ReasoningType
 18 | 
 19 | 
 20 | class IndependentState(TypedDict):
 21 |     goal: str
 22 |     literature_review: str
 23 |     meta_review: str
 24 |     hypothesis: ParsedHypothesis
 25 |     _raw_result: str  # Private temporary field for markdown output
 26 | 
 27 | 
 28 | class CollaborativeState(IndependentState, multiturn.MultiTurnState):
 29 |     pass
 30 | 
 31 | 
 32 | @dataclass
 33 | class IndependentConfig:
 34 |     """Configuration for independent generation mode."""
 35 | 
 36 |     field: str
 37 |     reasoning_type: ReasoningType
 38 |     llm: BaseChatModel
 39 | 
 40 | 
 41 | @dataclass
 42 | class CollaborativeConfig:
 43 |     """Configuration for collaborative generation mode."""
 44 | 
 45 |     agent_names: list[str]
 46 |     agent_fields: dict[str, str]
 47 |     agent_reasoning_types: dict[str, ReasoningType]
 48 |     llms: dict[str, BaseChatModel]
 49 |     max_turns: int = 10
 50 | 
 51 | 
 52 | def build_generation_agent(
 53 |     mode: str,
 54 |     config: Union[IndependentConfig, CollaborativeConfig],
 55 | ) -> StateGraph:
 56 |     """
 57 |     Unified builder function for generation agents that supports both independent and collaborative modes.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     mode : str
 62 |         The mode of operation, either "independent" or "collaborative".
 63 |     config : Union[IndependentConfig, CollaborativeConfig]
 64 |         Configuration object containing all necessary parameters for the selected mode.
 65 | 
 66 |     Returns
 67 |     -------
 68 |     StateGraph
 69 |         A compiled LangGraph for the generation agent.
 70 | 
 71 |     Raises
 72 |     ------
 73 |     ValueError
 74 |         If mode is invalid or required parameters are missing for the selected mode.
 75 |     """
 76 |     if mode == "independent":
 77 |         if not isinstance(config, IndependentConfig):
 78 |             raise ValueError("config must be an IndependentConfig instance")
 79 |         return _build_independent_generation_agent(
 80 |             config.field, config.reasoning_type, config.llm
 81 |         )
 82 |     elif mode == "collaborative":
 83 |         if not isinstance(config, CollaborativeConfig):
 84 |             raise ValueError("config must be a CollaborativeConfig instance")
 85 |         # Use the simplified multi-turn system
 86 |         return _build_collaborative_generation_agent(
 87 |             config.agent_names,
 88 |             config.agent_fields,
 89 |             config.agent_reasoning_types,
 90 |             config.llms,
 91 |             config.max_turns,
 92 |         )
 93 |     else:
 94 |         raise ValueError("mode must be either 'independent' or 'collaborative'")
 95 | 
 96 | 
 97 | def _independent_generation_node(
 98 |     state: IndependentState,
 99 |     field: str,
100 |     reasoning_type: ReasoningType,
101 |     llm: BaseChatModel,
102 | ) -> IndependentState:
103 |     """
104 |     Represents the action of a single generation agent using the independent_generation.md template.
105 |     The output is expected to be markdown with sections: Evidence, Hypothesis, Reasoning, Assumptions Table.
106 |     """
107 |     # Handle meta_review field with fallback
108 |     meta_review = state.get("meta_review", "Not Available")
109 | 
110 |     prompt = load_prompt(
111 |         "independent_generation",
112 |         goal=state["goal"],
113 |         field=field,
114 |         literature_review=state["literature_review"],
115 |         meta_review=meta_review,
116 |         reasoning_type=reasoning_type.value,
117 |     )
118 |     response_content = llm.invoke(prompt).content
119 |     return {**state, "_raw_result": response_content}
120 | 
121 | 
122 | def _parsing_node(state: IndependentState) -> IndependentState:
123 |     """
124 |     Parse the raw markdown result into a structured ParsedHypothesis object.
125 |     """
126 |     parsed_hypothesis = parse_hypothesis_markdown(state["_raw_result"])
127 |     return {**state, "hypothesis": parsed_hypothesis}
128 | 
129 | 
130 | def _build_independent_generation_agent(
131 |     field: str, reasoning_type: ReasoningType, llm: BaseChatModel
132 | ):
133 |     """
134 |     Builds and configures a LangGraph for a single-agent generation process using the independent_generation.md template.
135 |     The agent's output is parsed into a structured ParsedHypothesis object.
136 | 
137 |     Parameters
138 |     ----------
139 |     field : str
140 |         Field or domain of expertise.
141 |     reasoning_type : ReasoningType
142 |         Reasoning type for the agent.
143 |     llm : BaseChatModel
144 |         The language model to use.
145 | 
146 |     Returns
147 |     -------
148 |     StateGraph
149 |         A compiled LangGraph for the generation agent.
150 |     """
151 |     graph = StateGraph(IndependentState)
152 |     graph.add_node(
153 |         "generator",
154 |         lambda state: _independent_generation_node(state, field, reasoning_type, llm),
155 |     )
156 |     graph.add_node("parser", _parsing_node)
157 | 
158 |     graph.add_edge("generator", "parser")
159 |     graph.add_edge("parser", END)
160 | 
161 |     graph.set_entry_point("generator")
162 |     return graph.compile()
163 | 
164 | 
165 | def _collaborative_parsing_node(state: CollaborativeState) -> CollaborativeState:
166 |     """
167 |     Parse the final result from collaborative generation into a structured ParsedHypothesis object.
168 |     """
169 |     transcript_str = "\n".join([f"{name}: {msg}" for name, msg in state["transcript"]])
170 |     parsed_hypothesis = parse_hypothesis_markdown(transcript_str)
171 |     return {**state, "hypothesis": parsed_hypothesis}
172 | 
173 | 
174 | def _build_collaborative_generation_agent(
175 |     agent_names: list[str],
176 |     agent_fields: dict[str, str],
177 |     agent_reasoning_types: dict[str, ReasoningType],
178 |     llms: dict[str, BaseChatModel],
179 |     max_turns: int = 10,
180 | ) -> StateGraph:
181 |     """Build collaborative generation agent with structured output parsing."""
182 | 
183 |     # Create agent node functions
184 |     agent_node_fns = {}
185 |     for agent_name in agent_names:
186 |         agent_node_fns[agent_name] = multiturn.create_agent_node_fn(
187 |             agent_name=agent_name,
188 |             llm=llms[agent_name],
189 |             prompt_name="collaborative_generation",
190 |             prompt_keys_from_state=["goal", "literature_review", "meta_review"],
191 |             # kwargs for the prompt
192 |             field=agent_fields[agent_name],
193 |             reasoning_type=agent_reasoning_types[agent_name].value,
194 |         )
195 | 
196 |     # Create moderator and post-processor
197 |     moderator_fn = multiturn.create_moderator_node_fn(
198 |         agent_names, _termination_fn, max_turns
199 |     )
200 | 
201 |     # Build the base multi-turn agent graph (without compiling it yet)
202 |     base_graph = StateGraph(CollaborativeState)
203 | 
204 |     # Add agent nodes
205 |     for agent_name, agent_fn in agent_node_fns.items():
206 |         base_graph.add_node(agent_name, agent_fn)
207 | 
208 |     # Add moderator node
209 |     base_graph.add_node("moderator", moderator_fn)
210 | 
211 |     # Add our custom parsing node
212 |     base_graph.add_node("parser", _collaborative_parsing_node)
213 | 
214 |     # Define edges: agents -> moderator
215 |     for agent_name in agent_node_fns.keys():
216 |         base_graph.add_edge(agent_name, "moderator")
217 | 
218 |     # Conditional edges from moderator
219 |     def route_after_moderator(state: CollaborativeState):
220 |         if state["finished"]:
221 |             return "parser"
222 |         return state["next_agent"]
223 | 
224 |     routing_map = {name: name for name in agent_node_fns.keys()}
225 |     routing_map["parser"] = "parser"
226 | 
227 |     base_graph.add_conditional_edges("moderator", route_after_moderator, routing_map)
228 | 
229 |     # Parser goes to END
230 |     base_graph.add_edge("parser", END)
231 | 
232 |     # Set entry point
233 |     base_graph.set_entry_point(list(agent_node_fns.keys())[0])
234 | 
235 |     return base_graph.compile()
236 | 
237 | 
238 | def _termination_fn(msg: str) -> bool:
239 |     """
240 |     Check if the message contains all required sections to prevent parser assertions.
241 |     Returns True if the message has hypothesis, predictions, and assumptions sections.
242 |     """
243 |     # Check if the message contains all required sections
244 |     if "#FINAL REPORT#" in msg:
245 |         text = msg.split("#FINAL REPORT#")[1]
246 |     else:
247 |         return False
248 | 
249 |     # Split the text by # to get sections
250 |     sections = text.split("#")
251 | 
252 |     # Check for required sections
253 |     has_hypothesis = False
254 |     has_predictions = False
255 |     has_assumptions = False
256 | 
257 |     for section in sections:
258 |         section = section.strip()
259 |         if not section:
260 |             continue
261 | 
262 |         # Split section into title and content
263 |         lines = section.split("\n", 1)
264 |         if len(lines) < 2:
265 |             continue
266 | 
267 |         title = lines[0].strip().lower()
268 |         content = lines[1].strip()
269 | 
270 |         # Check if content is not empty
271 |         if not content:
272 |             continue
273 | 
274 |         # Match section titles (case-insensitive)
275 |         if "hypothesis" in title:
276 |             has_hypothesis = True
277 |         elif "prediction" in title:
278 |             has_predictions = True
279 |         elif "assumption" in title:
280 |             has_assumptions = True
281 | 
282 |     return has_hypothesis and has_predictions and has_assumptions
283 | 


--------------------------------------------------------------------------------
/coscientist/literature_review_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | System for agentic literature review that's used by other agents.
  3 | 
  4 | Implementation uses LangGraph to:
  5 | 1. Decompose research goals into modular topics
  6 | 2. Dispatch each topic to GPTResearcher workers in parallel
  7 | 3. Synthesize topic reports into executive summary
  8 | """
  9 | 
 10 | import asyncio
 11 | import os
 12 | import re
 13 | from typing import TypedDict
 14 | 
 15 | from gpt_researcher import GPTResearcher
 16 | from gpt_researcher.utils.enum import Tone
 17 | from langchain_core.language_models.chat_models import BaseChatModel
 18 | from langgraph.graph import END, StateGraph
 19 | 
 20 | from coscientist.common import load_prompt
 21 | 
 22 | 
 23 | class LiteratureReviewState(TypedDict):
 24 |     """State for the literature review agent."""
 25 | 
 26 |     goal: str
 27 |     max_subtopics: int
 28 |     subtopics: list[str]
 29 |     subtopic_reports: list[str]
 30 |     meta_review: str
 31 | 
 32 | 
 33 | def parse_topic_decomposition(markdown_text: str) -> list[str]:
 34 |     """
 35 |     Parse the topic decomposition markdown into strings.
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     markdown_text : str
 40 |         The markdown output from topic_decomposition prompt
 41 | 
 42 |     Returns
 43 |     -------
 44 |     list[str]
 45 |         Parsed subtopics strings
 46 |     """
 47 |     # Split by subtopic headers (### Subtopic N)
 48 |     sections = re.split(r"### Subtopic \d+", markdown_text)
 49 |     return [section.strip() for section in sections[1:]]
 50 | 
 51 | 
 52 | def _topic_decomposition_node(
 53 |     state: LiteratureReviewState,
 54 |     llm: BaseChatModel,
 55 | ) -> LiteratureReviewState:
 56 |     """
 57 |     Node that decomposes the research goal into focused subtopics.
 58 |     """
 59 |     prompt = load_prompt(
 60 |         "topic_decomposition",
 61 |         goal=state["goal"],
 62 |         max_subtopics=state["max_subtopics"],
 63 |         subtopics=state.get("subtopics", ""),
 64 |         meta_review=state.get("meta_review", ""),
 65 |     )
 66 |     response_content = llm.invoke(prompt).content
 67 | 
 68 |     # Parse the topics from the markdown response
 69 |     subtopics = parse_topic_decomposition(response_content)
 70 | 
 71 |     if not subtopics:
 72 |         raise ValueError("Failed to parse any topics from decomposition response")
 73 | 
 74 |     if state.get("subtopics", False):
 75 |         subtopics = state["subtopics"] + subtopics
 76 | 
 77 |     return {"subtopics": subtopics}
 78 | 
 79 | 
 80 | async def _write_subtopic_report(subtopic: str, main_goal: str) -> str:
 81 |     """
 82 |     Conduct research for a single subtopic using GPTResearcher.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     subtopic : str
 87 |         The subtopic to research
 88 |     main_goal : str
 89 |         The main research goal for context
 90 | 
 91 |     Returns
 92 |     -------
 93 |     str
 94 |         The research report
 95 |     """
 96 |     # Create a focused query combining the research focus and key terms
 97 |     researcher = GPTResearcher(
 98 |         query=subtopic,
 99 |         report_type="subtopic_report",
100 |         report_format="markdown",
101 |         parent_query=main_goal,
102 |         verbose=False,
103 |         tone=Tone.Objective,
104 |         config_path=os.path.join(os.path.dirname(__file__), "researcher_config.json"),
105 |     )
106 | 
107 |     # Conduct research and generate report
108 |     _ = await researcher.conduct_research()
109 |     return await researcher.write_report()
110 | 
111 | 
112 | async def _parallel_research_node(
113 |     state: LiteratureReviewState,
114 | ) -> LiteratureReviewState:
115 |     """
116 |     Node that conducts parallel research for all subtopics using GPTResearcher.
117 |     """
118 |     subtopics = state["subtopics"]
119 |     main_goal = state["goal"]
120 | 
121 |     # Create research tasks for all subtopics
122 |     research_tasks = [_write_subtopic_report(topic, main_goal) for topic in subtopics]
123 | 
124 |     # Execute all research tasks in parallel
125 |     try:
126 |         subtopic_reports = await asyncio.gather(*research_tasks)
127 |     except Exception as e:
128 |         raise RuntimeError(f"Failed to conduct research for subtopics: {str(e)}")
129 | 
130 |     if state.get("subtopic_reports", False):
131 |         subtopic_reports = state["subtopic_reports"] + subtopic_reports
132 | 
133 |     return {"subtopic_reports": subtopic_reports}
134 | 
135 | 
136 | def build_literature_review_agent(llm: BaseChatModel) -> StateGraph:
137 |     """
138 |     Builds and configures a LangGraph for literature review.
139 | 
140 |     Parameters
141 |     ----------
142 |     llm : BaseChatModel
143 |         The language model to use for topic decomposition and executive summary.
144 | 
145 |     Returns
146 |     -------
147 |     StateGraph
148 |         A compiled LangGraph for the literature review agent.
149 |     """
150 |     graph = StateGraph(LiteratureReviewState)
151 | 
152 |     # Add nodes
153 |     graph.add_node(
154 |         "topic_decomposition",
155 |         lambda state: _topic_decomposition_node(state, llm),
156 |     )
157 | 
158 |     graph.add_node(
159 |         "parallel_research",
160 |         _parallel_research_node,
161 |     )
162 | 
163 |     graph.add_edge("topic_decomposition", "parallel_research")
164 |     graph.add_edge("parallel_research", END)
165 | 
166 |     graph.set_entry_point("topic_decomposition")
167 | 
168 |     return graph.compile()
169 | 


--------------------------------------------------------------------------------
/coscientist/meta_review_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Meta review agent
  3 | -----------------
  4 | - Formulates a research overview with memory
  5 | - Feedback from this agent is appended to the prompts of the
  6 | others in subsequent rounds.
  7 | 
  8 | More details:
  9 | - Takes in the tournament state with all debates and ELO ratings,
 10 | summarizes common patterns in the reviews and debates to synthesize
 11 | the meta-review feedback.
 12 | - Feedback helps to steer the Reflection agent so that it accounts
 13 | for common reasoning failures.
 14 | - Writes top hypotheses into a research overview that highlights
 15 | areas to follow up with real and specific experiments. This
 16 | gets fed to the Generation agent in later rounds. Format of the
 17 | overview can match the style of a review paper or a grant proposal
 18 | (like an NIH Specific Aims Page).
 19 | - Decides topics for additional research to follow up on.
 20 | """
 21 | 
 22 | from typing import TypedDict
 23 | 
 24 | from langchain_core.language_models.chat_models import BaseChatModel
 25 | from langgraph.graph import END, StateGraph
 26 | 
 27 | from coscientist.common import load_prompt
 28 | from coscientist.custom_types import ReviewedHypothesis
 29 | from coscientist.ranking_agent import EloTournament
 30 | 
 31 | 
 32 | class MetaReviewTournamentState(TypedDict):
 33 |     """
 34 |     State for the `meta_review_tournament` prompt agent.
 35 |     """
 36 | 
 37 |     goal: str
 38 |     tournament: EloTournament
 39 |     top_k: int
 40 |     result: str
 41 | 
 42 | 
 43 | def build_meta_review_agent(llm: BaseChatModel) -> StateGraph:
 44 |     """
 45 |     Builds and configures a LangGraph for meta-review analysis.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     llm : BaseChatModel
 50 |         The language model to use for meta-review generation.
 51 | 
 52 |     Returns
 53 |     -------
 54 |     StateGraph
 55 |         A compiled LangGraph for the meta-review agent.
 56 |     """
 57 |     graph = StateGraph(MetaReviewTournamentState)
 58 | 
 59 |     graph.add_node(
 60 |         "meta_review",
 61 |         lambda state: _meta_review_node(state, llm),
 62 |     )
 63 | 
 64 |     graph.add_edge("meta_review", END)
 65 |     graph.set_entry_point("meta_review")
 66 |     return graph.compile()
 67 | 
 68 | 
 69 | def build_top_hypotheses_review_agent(llm: BaseChatModel) -> StateGraph:
 70 |     """
 71 |     Builds and configures a LangGraph for top hypotheses review analysis.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     llm : BaseChatModel
 76 |         The language model to use for top hypotheses review generation.
 77 | 
 78 |     Returns
 79 |     -------
 80 |     StateGraph
 81 |         A compiled LangGraph for the top hypotheses review agent.
 82 |     """
 83 |     graph = StateGraph(MetaReviewTournamentState)
 84 | 
 85 |     graph.add_node(
 86 |         "top_hypotheses_review",
 87 |         lambda state: _top_hypotheses_review_node(state, llm),
 88 |     )
 89 | 
 90 |     graph.add_edge("top_hypotheses_review", END)
 91 |     graph.set_entry_point("top_hypotheses_review")
 92 |     return graph.compile()
 93 | 
 94 | 
 95 | def _format_hypothesis_with_rating(
 96 |     hypothesis: ReviewedHypothesis, rating: float
 97 | ) -> str:
 98 |     """Helper function to format a hypothesis with its ELO rating."""
 99 |     return f"Hypothesis {hypothesis.uid} (ELO: {rating:.2f}): {hypothesis.hypothesis}"
100 | 
101 | 
102 | def _get_top_hypotheses_data(
103 |     tournament: EloTournament, top_k: int
104 | ) -> list[tuple[str, float]]:
105 |     """Helper function to get top k hypotheses sorted by ELO rating."""
106 |     sorted_hypotheses = tournament.get_sorted_hypotheses()
107 |     return sorted_hypotheses[:top_k]
108 | 
109 | 
110 | def _meta_review_node(
111 |     state: MetaReviewTournamentState,
112 |     llm: BaseChatModel,
113 | ) -> MetaReviewTournamentState:
114 |     """
115 |     Meta-review node that synthesizes tournament data into a comprehensive meta-analysis.
116 |     """
117 |     tournament = state["tournament"]
118 | 
119 |     # Build ratings text - hypotheses sorted by ELO rating (highest to lowest)
120 |     sorted_hypotheses = tournament.get_sorted_hypotheses()
121 |     ratings_entries = []
122 |     for hyp_id, rating in sorted_hypotheses:
123 |         hypothesis = tournament.hypotheses[hyp_id]
124 |         ratings_entries.append(_format_hypothesis_with_rating(hypothesis, rating))
125 |     ratings_text = "\n".join(ratings_entries)
126 | 
127 |     # Build debates text from match history
128 |     debates_entries = []
129 |     for i, match_result in enumerate(tournament.match_history.values(), 1):
130 |         debate_header = (
131 |             f"Debate {i}: Hypothesis {match_result.uid1} vs Hypothesis {match_result.uid2} "
132 |             f"(Winner: {match_result.winner})"
133 |         )
134 |         debates_entries.append(f"{debate_header}\n{match_result.debate}")
135 |     debates_text = "\n\n".join(debates_entries)
136 | 
137 |     prompt = load_prompt(
138 |         "meta_review_tournament",
139 |         goal=state["goal"],
140 |         ratings=ratings_text,
141 |         debates=debates_text,
142 |     )
143 |     response_content = llm.invoke(prompt).content
144 |     return {**state, "result": response_content}
145 | 
146 | 
147 | def _top_hypotheses_review_node(
148 |     state: MetaReviewTournamentState,
149 |     llm: BaseChatModel,
150 | ) -> MetaReviewTournamentState:
151 |     """
152 |     Top hypotheses review node that creates a research overview from top-ranked hypotheses.
153 |     """
154 |     tournament = state["tournament"]
155 |     top_k = state["top_k"]
156 | 
157 |     # Get top k hypotheses
158 |     top_hypotheses_data = _get_top_hypotheses_data(tournament, top_k)
159 | 
160 |     # Build top hypotheses text with ratings
161 |     top_hypotheses_entries = []
162 |     for hyp_id, rating in top_hypotheses_data:
163 |         hypothesis = tournament.hypotheses[hyp_id]
164 |         top_hypotheses_entries.append(
165 |             _format_hypothesis_with_rating(hyp_id, hypothesis, rating)
166 |         )
167 |     top_hypotheses_text = "\n".join(top_hypotheses_entries)
168 | 
169 |     # Build reviews text for top hypotheses
170 |     reviews_entries = []
171 |     for hyp_id, rating in top_hypotheses_data:
172 |         hypothesis = tournament.hypotheses[hyp_id]
173 |         reviews_entries.append(f"Review for Hypothesis {hyp_id}\n{hypothesis.review}")
174 |     reviews_text = "\n\n".join(reviews_entries)
175 | 
176 |     prompt = load_prompt(
177 |         "top_hypotheses_review",
178 |         goal=state["goal"],
179 |         top_hypotheses=top_hypotheses_text,
180 |         reviews=reviews_text,
181 |     )
182 |     response_content = llm.invoke(prompt).content
183 |     return {**state, "result": response_content}
184 | 


--------------------------------------------------------------------------------
/coscientist/multiturn.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Callable, Optional, Type, TypedDict
  2 | 
  3 | from langchain_core.language_models.chat_models import BaseChatModel
  4 | from langgraph.graph import END, StateGraph
  5 | 
  6 | from coscientist.common import load_prompt
  7 | 
  8 | 
  9 | class MultiTurnState(TypedDict):
 10 |     """Generalized state for multi-turn agent conversations."""
 11 | 
 12 |     transcript: list[tuple[str, str]]
 13 |     turn: int
 14 |     next_agent: str
 15 |     finished: bool
 16 | 
 17 | 
 18 | def create_agent_node_fn(
 19 |     agent_name: str,
 20 |     llm: BaseChatModel,
 21 |     prompt_name: str,
 22 |     prompt_keys_from_state: list[str],
 23 |     **prompt_kwargs: dict[str, Any],
 24 | ) -> Callable[[MultiTurnState], MultiTurnState]:
 25 |     """Create an agent node function."""
 26 |     assert (
 27 |         "transcript" not in prompt_kwargs
 28 |     ), "transcript will be added from state and should not be in prompt_kwargs"
 29 | 
 30 |     def agent_fn(state):
 31 |         # Build prompt args from state
 32 |         # Add transcript
 33 |         transcript_str = "\n".join(
 34 |             [f"{name}: {msg}" for name, msg in state["transcript"]]
 35 |         )
 36 |         prompt_kwargs["transcript"] = transcript_str
 37 | 
 38 |         # Add prompt keys from state
 39 |         for key in prompt_keys_from_state:
 40 |             prompt_kwargs[key] = state.get(key, "Not Available")
 41 | 
 42 |         # Generate response
 43 |         prompt = load_prompt(prompt_name, **prompt_kwargs)
 44 |         response = llm.invoke(prompt).content
 45 | 
 46 |         return {**state, "transcript": state["transcript"] + [(agent_name, response)]}
 47 | 
 48 |     return agent_fn
 49 | 
 50 | 
 51 | def create_moderator_node_fn(
 52 |     agent_names: list[str],
 53 |     termination_fn: Callable[[str], bool],
 54 |     max_turns: int = 10,
 55 | ) -> Callable[[MultiTurnState], MultiTurnState]:
 56 |     """Create a moderator node function."""
 57 | 
 58 |     def moderator_fn(state: MultiTurnState) -> MultiTurnState:
 59 |         # Check termination conditions
 60 |         if state["turn"] >= max_turns:
 61 |             return {**state, "finished": True, "next_agent": ""}
 62 | 
 63 |         if state["transcript"] and termination_fn(state["transcript"][-1][1]):
 64 |             return {**state, "finished": True, "next_agent": ""}
 65 | 
 66 |         # Round-robin scheduling
 67 |         current_index = agent_names.index(state["next_agent"])
 68 |         next_index = (current_index + 1) % len(agent_names)
 69 | 
 70 |         return {
 71 |             **state,
 72 |             "finished": False,
 73 |             "next_agent": agent_names[next_index],
 74 |             "turn": state["turn"] + 1,
 75 |         }
 76 | 
 77 |     return moderator_fn
 78 | 
 79 | 
 80 | def build_multi_turn_agent(
 81 |     state_type: Type[MultiTurnState],
 82 |     agent_node_fns: dict[str, Callable[[MultiTurnState], MultiTurnState]],
 83 |     moderator_node_fn: Callable[[MultiTurnState], MultiTurnState],
 84 |     post_processor_node_fn: Optional[Callable[[MultiTurnState], MultiTurnState]] = None,
 85 | ) -> StateGraph:
 86 |     """Build a multi-turn agent from pre-built node functions."""
 87 |     graph = StateGraph(state_type)
 88 | 
 89 |     # Add agent nodes
 90 |     for agent_name, agent_fn in agent_node_fns.items():
 91 |         graph.add_node(agent_name, agent_fn)
 92 | 
 93 |     # Add moderator node
 94 |     graph.add_node("moderator", moderator_node_fn)
 95 | 
 96 |     # Add post-processor if provided
 97 |     if post_processor_node_fn:
 98 |         graph.add_node("post_processor", post_processor_node_fn)
 99 |         graph.add_edge("post_processor", END)
100 | 
101 |     # Define edges: agents -> moderator
102 |     for agent_name in agent_node_fns.keys():
103 |         graph.add_edge(agent_name, "moderator")
104 | 
105 |     # Conditional edges from moderator
106 |     def route_after_moderator(state: state_type):
107 |         if state["finished"]:
108 |             return "post_processor" if post_processor_node_fn else END
109 |         return state["next_agent"]
110 | 
111 |     routing_map = {name: name for name in agent_node_fns.keys()}
112 |     if post_processor_node_fn:
113 |         routing_map["post_processor"] = "post_processor"
114 |     else:
115 |         routing_map[END] = END
116 | 
117 |     graph.add_conditional_edges("moderator", route_after_moderator, routing_map)
118 |     graph.set_entry_point(list(agent_node_fns.keys())[0])
119 | 
120 |     return graph.compile()
121 | 


--------------------------------------------------------------------------------
/coscientist/prompts/assumption_decomposer.md:
--------------------------------------------------------------------------------
 1 | You are a scientific assumption analyzer tasked with thoroughly decomposing hypotheses into their underlying assumptions and sub-assumptions. You are an expert in logical analysis and scientific reasoning.
 2 | 
 3 | # Goal
 4 | To systematically break down the provided hypothesis into a comprehensive list of assumptions and sub-assumptions, using the initial assumptions as inspiration for deeper analysis. Your analysis should be exhaustive and methodical. Every claim, mechanism, or relationship implied by the hypothesis should be explicitly identified as an assumption that can be independently verified or challenged with experiments or literature review. Aim for no more than 10 assumptions.
 5 | 
 6 | # Hypothesis to decompose
 7 | {{ hypothesis }}
 8 | 
 9 | # Initial assumptions (use as inspiration for refinement)
10 | {{ assumptions }}
11 | 
12 | # Instructions
13 | * When decomposing the hypothesis, consider two kinds of assumptions:
14 | - **Explicit assumptions** high-level claims that must be true for the hypothesis to hold.
15 | - **Implicit assumptions** that are implied but not explicitly stated in the hypothesis or initial assumptions list.
16 | * For each kind of assumption, identify the underlying sub-assumptions. These are the more granular claims that support the primary assumption. Typically there should be 2-4 sub-assumptions per assumption.
17 | 
18 | # Output Format
19 | Structure your response as a nested list in markdown format. 
20 | 
21 | ## Assumptions
22 | 1. **[Assumption 1]**
23 |    - Sub-assumption 1.1: [detailed description]
24 |    - Sub-assumption 1.2: [detailed description]
25 |    - ...
26 | 
27 | 2. **[Assumption 2]**
28 |    - Sub-assumption 2.1: [detailed description]
29 |    - Sub-assumption 2.2: [detailed description]
30 |    - ...
31 | 
32 | Do not distinguish between explicit and implicit assumptions in the final list.


--------------------------------------------------------------------------------
/coscientist/prompts/cause_and_effect.md:
--------------------------------------------------------------------------------
 1 | You are an expert in causality. You reason about mechanisms by carefully tracing out causal chains from initial conditions to final outcomes and communicating them to domain experts.
 2 | 
 3 | # Goal
 4 | Create a detailed causal chain that thoroughly explains the causal proposition entailed by a scientific hypothesis. Your goal is not to change the hypothesis. Instead it is to propose the most plausible causal chain that would be consistent and supportive.
 5 | 
 6 | # Hypothesis to analyze
 7 | {{ hypothesis }}
 8 | 
 9 | # Instructions
10 | * Break down the hypothesis into discrete, sequential steps. Use the steps given in the hypothesis as a starting point. Add intermediate steps to make the causal chain more detailed; emphasize direct and specific causal links.
11 | * For each step, state the cause, effect, and mechanism.
12 | * Descriptions of the mechanism should be highly detailed in describing how precisely the cause leads to the effect.
13 | * If a cause has multiple effects detail them in the same step. Likewise, when a single effect has multiple causes, it's acceptable to repeat it in a different step.
14 | * If a cause, effect, or mechanism is uncertain, say so. Then make your best guess.
15 | * Use as many steps as needed to fully detail the causal chain.
16 | 
17 | # Output format (markdown)
18 | ## Causal Chain
19 | ### Step 1: [cause] -> [effect]
20 | [Exposition of the mechanism]
21 | 
22 | ### Step 2: [cause] -> [effect]
23 | [Exposition of the mechanism]
24 | 
25 | <!-- Continue for all steps --> 


--------------------------------------------------------------------------------
/coscientist/prompts/collaborative_generation.md:
--------------------------------------------------------------------------------
 1 | You are an expert participating in a collaborative discourse concerning the generation of a scientific hypothesis. The overarching objective of this discourse is to collaboratively develop a novel and robust hypothesis. You will engage in a discussion with other experts. You are a specialist in {{ field }} and you approach problems through this lens. {{ reasoning_type }} 
 2 | 
 3 | # Goal
 4 | {{ goal }}
 5 | 
 6 | # Criteria
 7 | A strong hypothesis must be novel, robust, and falsifiable. It must also be specific and clear to domain experts, who will analyze and critique your proposals.
 8 | 
 9 | General guidelines:
10 | * Exhibit boldness and creativity in your contributions.
11 | * Maintain a helpful and collaborative approach but do not be afraid to disagree with other experts. Seeking the truth requires a willingness to challenge and be challenged.
12 | * Always prioritize the generation of a high-quality hypothesis. Novelty is the key criterion, but it should not be at the expense of robustness or falsifiability.
13 | * Building consensus in science is a process. Do not expect to resolve all disagreements or uncertainties in this single discussion.
14 | 
15 | # Review of relevant literature
16 | {{ literature_review }}
17 | 
18 | # Additional Notes (optional)
19 | A panel of reviewers may have put together a meta-analysis of previously proposed hypotheses, highlighting common strengths and weaknesses. When available, you can use this to inform your contributions:
20 | {{ meta_review }}
21 | 
22 | # Procedure
23 | If initiating the discussion from a blank transcript, then propose three distinct hypotheses.
24 | 
25 | For subsequent contributions that continue an existing discussion:
26 | * Pose clarifying questions if ambiguities or uncertainties arise.
27 | * Critically evaluate the hypotheses proposed thus far, addressing the following aspects:
28 | - Adherence to the criteria for a strong hypothesis
29 | - Utility and practicality
30 | - Level of detail and specificity
31 | - Implicit and explicit assumptions and sub-assumptions
32 | - Novelty
33 | * Identify any weaknesses or potential limitations.
34 | * Propose concrete improvements and refinements to address identified weaknesses and improve novelty.
35 | * Conclude your response with a suggested refinement of the hypothesis.
36 | 
37 | When sufficient discussion has transpired (typically 3-5 conversational turns, with a maximum of 10 turns) and all relevant questions and points have been thoroughly addressed and clarified, conclude the process by writing up a final hypothesis report in markdown format.
38 | 
39 | # Final hypothesis report format
40 | You must indicate the start of the report with "#FINAL REPORT#" (in all capital letters, this is critical to let a moderator know when your discussion is finished). ONLY WRITE #FINAL REPORT# IMMEDIATELY BEFORE WRITING THE REPORT. If still in discussion simply refer to it as the "final report", without caps and without the hashtags. The report should be written in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. 
41 | 
42 | 1. In the Hypothesis section, state the final self-contained hypothesis agreed upon by the group. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes.
43 | 2. In the Falsifiable Predictions section, make a list of self-contained predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Later, another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 
44 | 3. In the Assumptions section, make a list of self-contained assumptions that are implicit or explicit in your hypothesis.
45 | 
46 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections.
47 | 
48 | #BEGIN TRANSCRIPT#
49 | {{ transcript }}
50 | #END TRANSCRIPT#
51 | 
52 | Your Turn:


--------------------------------------------------------------------------------
/coscientist/prompts/deep_verification.md:
--------------------------------------------------------------------------------
 1 | You are a scientific hypothesis verifier tasked with conducting a deep verification of hypotheses proposed by other scientists. You are an expert in methodical analysis and critical thinking. 
 2 | 
 3 | # Goal
 4 | To thoroughly evaluate the scientific validity, logical consistency, and empirical support for the provided hypothesis by examining its provided reasoning and assumptions. Do not be unnecessarily charitable in your assessment. Scientific progress requires rigorous verification, and identifying weaknesses is as valuable as confirming strengths. Effective verification must be systematic, objective, and detailed.
 5 | 
 6 | # Hypothesis to verify
 7 | {{ hypothesis }}
 8 | 
 9 | # Causal reasoning to evaluate
10 | {{ reasoning }}
11 | 
12 | # Reviewed assumptions to assess
13 | {{ assumption_research }}
14 | 
15 | # Instructions
16 | 1. Examine the core hypothesis for scientific validity, specificity, and testability.
17 | 2. Analyze the provided causal reasoning for logical consistency, gaps, and potential fallacies.
18 | 3. Write a summary evaluation of whether the assumptions and sub-assumptions on which the hypothesis rests are overall well-founded and supported by research. Identify the weakest ones.
19 | 4. Conclude by highlighting strengths and weaknesses undercovered during this reflection process. Suggest areas for refinement. Do not pass a final judgement.
20 | 
21 | ## Tone
22 | Your response should:
23 | - Maintain scientific objectivity and intellectual rigor
24 | - Be direct about weaknesses without being dismissive
25 | - Use clear, precise language appropriate for scientific discourse
26 | 


--------------------------------------------------------------------------------
/coscientist/prompts/desk_reject.md:
--------------------------------------------------------------------------------
 1 | You are an expert in scientific hypothesis evaluation. Your task is to analyze a hypothesis and determine if it is correct, novel, and high-quality. 
 2 | 
 3 | # Instructions
 4 | 
 5 | 1. Correctness: Assess if the hypothesis is consistent with your extensive knowledge of the field. Your primary concern is plausibility the hypothesis itself may be speculative and unproven.
 6 | 2. Novelty: Assess if the hypothesis is a meaningfully new idea.
 7 | 3. Quality: A high-quality hypothesis is well-motivated, clear, concise, and scientifically sound.
 8 | 
 9 | Provide your reasoning for each of the three criteria. We these evaluations are complete, conclude by writing either "FINAL EVALUATION: PASS" or "FINAL EVALUATION: FAIL" (in all capital letters): To pass, the hypothesis must receive a pass rating for each of the three criteria. Do not write anything after the final evaluation.
10 | 
11 | # Hypothesis to evaluate
12 | {{ hypothesis }}


--------------------------------------------------------------------------------
/coscientist/prompts/evolve_from_feedback.md:
--------------------------------------------------------------------------------
 1 | You are an expert in scientific research and epistemic iteration. Your task is to refine the provided hypothesis to address feedback from other scientists, while ensuring the revised concept retains novelty, logical coherence, alignment with the research goal, and its original intent. Your refined hypothesis will compete in a tournament with other hypotheses to select the best one, try hard to win!
 2 | 
 3 | # Goal
 4 | {{ goal }}
 5 | 
 6 | # Original Hypothesis
 7 | {{ hypothesis }} 
 8 | 
 9 | # Reviewer Feedback
10 | {{ verification_result }}
11 | 
12 | # Competitive Intelligence
13 | {{ meta_review }}
14 | 
15 | # Instructions
16 | 1. Critically evaluate the original hypothesis, reviewer feedback, and your competitive intelligence. The competitive intelligence is a meta-review of the tournament, and it will help you understand the strengths and weaknesses of the other hypotheses against which you will compete.
17 | 2. Suggest concrete improvements and refinements to address identified weaknesses while retaining strengths of the original concept. Improvements should address reviewer comments in addition to:
18 | - Improving detail and specificity
19 | - Clearing away dubious assumptions
20 | - Increasing utility, practicality, and feasibility
21 | - Avoiding the pitfalls of other hypotheses in the tournament
22 | 3. Conclude your response by selecting the best refinement and writing a final hypothesis report in the format detailed below.
23 | 4. Remember that your purpose is to make the existing hypothesis as competitive as possible, not to come up with something completely new one.
24 | 
25 | # Final hypothesis report format
26 | You must indicate the start of the report with "#FINAL REPORT#" (in all capital letters). The report must be written in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. 
27 | 
28 | 1. In the Hypothesis section, state the final self-contained hypothesis. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes without referencing the original hypothesis.
29 | 2. In the Falsifiable Predictions section, make a list of self-contained predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Later, another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 
30 | 3. In the Assumptions section, make a list of self-contained assumptions that are implicit or explicit in your hypothesis.
31 | 
32 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections.


--------------------------------------------------------------------------------
/coscientist/prompts/final_report.md:
--------------------------------------------------------------------------------
 1 | You are an expert in scientific research communication. Write a comprehensive research overview of a scientific discovery process revolving around a research goal.
 2 | 
 3 | # Goal
 4 | {{ goal }}
 5 | 
 6 | # All hypotheses by ranking
 7 | {{ hypotheses_by_ranking }}
 8 | 
 9 | # Detailed information for top ranked hypotheses
10 | {{ top_ranked_hypotheses }}
11 | 
12 | # Instructions
13 | 
14 | Write a comprehensive scientific research report in markdown format that synthesizes the research discovery process. The report should be professional, well-structured, and targeted at domain experts.
15 | 
16 | ## Report Structure
17 | 
18 | ### 1. Executive Summary
19 | - Provide a concise overview (3-4 paragraphs) of the research goal and discovery process
20 | - Identify and briefly describe the main research directions that were explored (based on the semantic groupings of hypotheses)
21 | - Highlight the most promising findings and their potential significance
22 | - State the key conclusions and recommendations for future research
23 | 
24 | ### 2. Research Directions Explored
25 | - Analyze all the hypotheses to identify distinct research directions or themes
26 | - For each major direction:
27 |   - Describe the underlying scientific rationale
28 |   - Explain how this direction relates to and addresses the overall research goal
29 |   - Summarize the key insights and thinking from hypotheses in this group
30 | 
31 | ### 3. Top-Ranked Hypotheses Analysis
32 | For each hypothesis in the top-ranked list:
33 | - **Hypothesis Statement**: Clearly state the hypothesis
34 | - **Scientific Rationale**: Summarize the reasoning and evidence supporting this hypothesis
35 | - **Experimental Design**: Propose specific, feasible experiments to test or falsify the hypothesis
36 |   - Include experimental methodology, key variables to measure, and expected outcomes
37 |   - Consider both positive and negative controls where applicable
38 | - **Potential Impact**: Explain the implications if this hypothesis is confirmed or refuted
39 | 
40 | ### 4. Conclusions and Future Directions
41 | - Synthesize the overall findings and their significance for the research goal
42 | - Identify the most promising hypotheses and research directions for continued investigation
43 | - Discuss potential challenges and limitations in the current approach
44 | - Recommend specific next steps for advancing the research
45 | - Consider broader implications for the field and potential applications
46 | 
47 | ## Writing Guidelines
48 | - Use clear, precise scientific language appropriate for a research report
49 | - Include proper markdown formatting with headers, bullet points, and emphasis where appropriate
50 | - Maintain objectivity while highlighting the most significant findings
51 | - Ensure logical flow between sections with appropriate transitions
52 | - Include specific details from the provided data while maintaining readability
53 | - Aim for approximately 2000-3000 words total
54 | 


--------------------------------------------------------------------------------
/coscientist/prompts/independent_generation.md:
--------------------------------------------------------------------------------
 1 | You are a member of a team of scientists tasked with formulating creative and falsifiable scientific hypothesis. You are a specialist in {{ field }} and you approach problems through this lens. {{ reasoning_type }}
 2 | 
 3 | # Goal
 4 | {{ goal }}
 5 | 
 6 | # Criteria
 7 | A strong hypothesis must be novel, robust, and falsifiable. It must also be specific and clear to domain experts, who will analyze and critique your proposals.
 8 | 
 9 | # Review of relevant literature
10 | {{ literature_review }}
11 | 
12 | # Additional Notes (optional)
13 | A panel of reviewers may have put together a meta-analysis of previously proposed hypotheses, highlighting common strengths and weaknesses. When available, you can use this to inform your contributions:
14 | {{ meta_review }}
15 | 
16 | # Instructions
17 | 1. State a hypothesis that addresses the research goal and criteria while staying grounded in evidence from literature and feedback from reviewers. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes.
18 | 2. Make a list of self-contained falsifiable predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 
19 | 3. Make a list of self-contained assumptions that are implicit or explicit in your hypothesis.
20 | 
21 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions.
22 | 
23 | # Output Format
24 | Structure your response in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections.


--------------------------------------------------------------------------------
/coscientist/prompts/meta_review_tournament.md:
--------------------------------------------------------------------------------
 1 | You are an expert in scientific research and meta-analysis. Synthesize a comprehensive meta-review of provided reviews pertaining to the following research goal.
 2 | 
 3 | # Instructions
 4 | * Generate a structured meta-analysis report of the provided reviews.
 5 | * Focus on identifying:
 6 | - Common strengths across highly-rated hypotheses and recurring themes in successful arguments
 7 | - Recurring weaknesses, critique points, and common issues raised by reviewers
 8 | - Common evaluation criteria being emphasized
 9 | - Bias patterns in review processes
10 | * The generated meta-analysis should provide actionable insights for researchers developing future proposals.
11 | * Refrain from evaluating individual proposals or reviews; focus on producing a synthesized meta-analysis.
12 | 
13 | # Goal
14 | {{ goal }}
15 | 
16 | # Hypothesis and Elo ratings
17 | {{ ratings }}
18 | 
19 | # Provided reviews for meta-analysis
20 | {{ debates }}


--------------------------------------------------------------------------------
/coscientist/prompts/observation_reflection.md:
--------------------------------------------------------------------------------
 1 | You are an expert in scientific hypothesis evaluation. Your task is to analyze the
 2 | relationship between a provided hypothesis and observations from a scientific article.
 3 | Specifically, determine if the hypothesis provides a novel causal explanation
 4 | for the observations, or if they contradict it.
 5 | 
 6 | Instructions:
 7 | 
 8 | 1. Observation extraction: list relevant observations from the article.
 9 | 2. Causal analysis (individual): for each observation:
10 | a. State if its cause is already established.
11 | b. Assess if the hypothesis could be a causal factor (hypothesis => observation).
12 | c. Start with: "would we see this observation if the hypothesis was true:".
13 | d. Explain if it’s a novel explanation. If not, or if a better explanation exists,
14 | state: "not a missing piece."
15 | 3. Causal analysis (summary): determine if the hypothesis offers a novel explanation
16 | for a subset of observations. Include reasoning. Start with: "would we see some of
17 | the observations if the hypothesis was true:".
18 | 4. Disproof analysis: determine if any observations contradict the hypothesis.
19 | Start with: "does some observations disprove the hypothesis:".
20 | 5. Conclusion: state: "hypothesis: <already explained, other explanations more likely,
21 | missing piece, neutral, or disproved>".
22 | 
23 | Scoring:
24 | * Already explained: hypothesis consistent, but causes are known. No novel explanation.
25 | * Other explanations more likely: hypothesis *could* explain, but better explanations exist.
26 | * Missing piece: hypothesis offers a novel, plausible explanation.
27 | * Neutral: hypothesis neither explains nor is contradicted.
28 | * Disproved: observations contradict the hypothesis.
29 | 
30 | Important: if observations are expected regardless of the hypothesis, and don’t disprove it,
31 | it’s neutral.
32 | 
33 | Article:
34 | {article}
35 | 
36 | Hypothesis:
37 | {hypothesis}
38 | 
39 | Response {provide reasoning. end with: "hypothesis: <already explained, other explanations
40 | more likely, missing piece, neutral, or disproved>".)


--------------------------------------------------------------------------------
/coscientist/prompts/out_of_the_box.md:
--------------------------------------------------------------------------------
 1 | You are an expert researcher tasked with generating a novel, singular hypothesis inspired by analogous elements from provided concepts.
 2 | 
 3 | # Goal
 4 | {{ goal }}
 5 | 
 6 | # Concepts
 7 | Inspiration may be drawn from the following concepts (utilize analogy and inspiration, not direct replication):
 8 | {{ hypotheses }} 
 9 | 
10 | # Instructions
11 | 1. Provide a concise introduction to the relevant scientific domain.
12 | 2. Summarize recent findings and pertinent research, highlighting successful approaches.
13 | 3. Identify promising avenues for exploration that may yield innovative hypotheses.
14 | 4. Develop a detailed, original, and specific single hypothesis for achieving the stated goal, leveraging analogous principles from the provided ideas. This should not be a mere aggregation of existing methods or entities. Think out-of-the-box.
15 | 5. Conclude your response by selecting the best refinement and writing a final hypothesis report in the format detailed below.
16 | 
17 | # Final hypothesis report format
18 | You must indicate the start of the report with "#FINAL REPORT#" (in all capital letters). The report must be written in markdown with the following headings: # Hypothesis, # Falsifiable Predictions, # Assumptions. 
19 | 
20 | 1. In the Hypothesis section, state the final self-contained hypothesis. Describe the hypothesis in detail, including specific entities, mechanisms, and anticipated outcomes without explicitly referencing the original concepts.
21 | 2. In the Falsifiable Predictions section, make a list of self-contained predictions that could be tested to disprove your hypothesis. Aim for at least 1 prediction and no more than 3. Each prediction must clearly state an entity to be tested, the conditions under which it will be tested, and an expected outcome. Later, another scientist will decide how to implement a test (e.g., clinical or in vitro) for each prediction. 
22 | 3. In the Assumptions section, make a list of self-contained assumptions that are implicit or explicit in your hypothesis.
23 | 
24 | Each falsifiable prediction and assumption will be sent to an experimentalist or verifier to check validity. They will be unaware of your main hypothesis, reasoning, and all but the one prediction or assumption they are assigned. For this reason, avoid using undefined abbreviations or terms that are not standard in the literature, and do not create dependencies between predictions or assumptions. Write the predictions and assumptions as numbered lists. Do not write introductions or summaries for any of the sections.


--------------------------------------------------------------------------------
/coscientist/prompts/research_config.md:
--------------------------------------------------------------------------------
 1 | You are an expert scientific communicator and researcher. You are tasked with assisting a scientist in clarifying and solidifying a research goal through an interactive conversation. This refined research goal will be given to a scientific research agent that will propose hypotheses, review literature, and write a final report.
 2 | 
 3 | # Suggested Goal for Refinement
 4 | {{ goal }}
 5 | 
 6 | # Instructions
 7 | 
 8 | 1. Analyze the scientist's goal and restate it in your own words to ensure understanding.
 9 | 2. Evaluate whether the goal is clear, specific, and well-formulated.
10 | 3. If the goal needs clarification or refinement:
11 |    - Ask specific clarifying questions to better understand the scientist's intent
12 |    - Suggest improvements or refinements
13 |    - Wait for the scientist's response before proceeding
14 | 4. If the goal is already clear and well-formulated, suggest any minor improvements if needed.
15 | 5. Continue this interactive process until the scientist confirms they are satisfied with the refined goal.
16 | 6. Once the scientist confirms satisfaction, conclude by writing "FINAL GOAL:" (in all capital letters) followed by the final refined goal statement. Do not include any text or comments after the final goal statement.
17 | 7. Try to maintain the style of the original goal. If it is a question, keep it as a question. If it is a statement, keep it as a statement. And if it is phrased as a goal, keep that phrasing.


--------------------------------------------------------------------------------
/coscientist/prompts/simulated_debate.md:
--------------------------------------------------------------------------------
 1 | You are an expert in comparative analysis, engaging with a panel of domain experts in a structured discussion to evaluate two competing hypotheses. The objective is to rigorously determine which hypothesis is superior based on a predefined set of attributes and criteria. The experts possess no pre-existing biases toward either hypothesis and are solely focused on identifying the optimal choice because only one can be implemented.
 2 | 
 3 | # Procedure
 4 | If initiating the discussion from a blank transcript, begin with a concise summary of both hypotheses and their respective initial reviews, and then write a few pro and con arguments for each.
 5 | 
 6 | For subsequent contributions that continue an existing discussion:
 7 | * Pose clarifying questions to address any ambiguities or uncertainties.
 8 | * Critically evaluate each hypothesis in relation to the stated research goal. This evaluation should consider aspects such as:
 9 | - Potential for correctness/validity.
10 | - Utility and practical applicability.
11 | - Sufficiency of detail and specificity.
12 | - Novelty and originality.
13 | - Desirability for implementation.
14 | * Identify and articulate any weaknesses, limitations, or potential flaws in either hypothesis.
15 | * Do not be unnecessarily charitable in your assessments of either hypothesis. Scientific progress requires rigor. We're seeking the truth and have limited resources to chase unproductive leads.
16 | * Exhibit boldness and creativity in your contributions.
17 | * Maintain a helpful and collaborative approach.
18 | * Consider the reviews of the hypotheses but remember that absence of evidence is not evidence of absence.
19 | 
20 | Once the discussion has reached a point of sufficient depth (typically 3-5 turns, up to 10 turns) and all relevant questions, concerns, and arguments have been thoroughly addressed, provide a consensus judgment for the better hypothesis. The judgment should succinctly state the rationale for the selection. Conclude with the phrase "WINNER: <1 or 2>" (in all capital letters), denoting the id of the superior hypothesis. Write nothing after this declaration.
21 | 
22 | # Research goal for hypotheses
23 | {{ goal }}
24 | 
25 | ## Hypothesis 1
26 | {{ hypothesis_1 }}
27 | 
28 | ## Hypothesis 2
29 | {{ hypothesis_2 }}
30 | 
31 | ## Review of hypothesis 1
32 | {{ review_1 }}
33 | 
34 | ## Review of hypothesis 2
35 | {{ review_2 }}
36 | 
37 | #BEGIN TRANSCRIPT#
38 | {{ transcript }}
39 | #END TRANSCRIPT#
40 | 
41 | Your Turn:


--------------------------------------------------------------------------------
/coscientist/prompts/supervisor_decision.md:
--------------------------------------------------------------------------------
  1 | You are the **Supervisor Agent** for the Coscientist multi-agent research system. Your role is to analyze the current state of the research process and decide what actions to take next to advance scientific hypothesis generation, evaluation, and refinement.
  2 | 
  3 | # Research Goal
  4 | {{ goal }}
  5 | 
  6 | # Research Meta Reviews
  7 | Here are the two latest meta reviews of the research process. Use them to understand whether progress is continuing or leveling off.
  8 | 
  9 | ## Latest Meta Review
 10 | {{ meta_review }}
 11 | 
 12 | ## Previous Meta Review
 13 | {{ previous_meta_review }}
 14 | 
 15 | # Available Actions
 16 | You may choose from the following actions:
 17 | 1. generate_new_hypotheses - Create new hypotheses through independent or collaborative generation. Perform this action to increase diversity and explore new research directions.
 18 | 2. evolve_hypotheses - Refine and improve existing hypotheses based on feedback and rankings. Perform this action to improve the quality of existing hypotheses in existing research directions.
 19 | 3. expand_literature_review - Broaden the literature review to cover new research directions. Perform this action to explore the literature for new ideas.
 20 | 4. run_tournament - Rank unranked hypotheses through scientific debate and comparison. Perform this action to rank the hypotheses and determine which ones are the most promising.
 21 | 5. run_meta_review - Review all the evaluations and debates that have happened in the tournament so far. Perform this action to synthesize strengths and weaknesses of existing hypotheses. This will inform the generation and evolution of new hypotheses.
 22 | 6. finish - Complete the research process and generate a final report. Finish when the research process seems to be making diminishing returns based on the meta-review, changes in Elo ratings
 23 | 
 24 | # Current System Statistics
 25 | **Total actions taken:** {{ total_actions }}
 26 | **Latest actions (most recent first):** {{ latest_actions }}
 27 | 
 28 | ## Hypothesis Inventory
 29 | These statistics are updated after hypothesis generation, evolution, and tournament running.
 30 | - **Total Hypotheses (including unranked):** {{ total_hypotheses }}
 31 | - **Unranked Hypotheses:** {{ num_unranked_hypotheses }}
 32 | 
 33 | ## Meta-Review History
 34 | These statistics are updated after each meta-review.
 35 | - **Number of Meta-Reviews Completed:** {{ num_meta_reviews }}
 36 | - **Newly Ranked Hypotheses Since Last Meta-Review:** {{ new_hypotheses_since_meta_review }}
 37 | 
 38 | ## Tournament Trajectory
 39 | These statistics are updated after each tournament run.
 40 | - **Total matches played:** {{ total_matches_played }}
 41 | - **Total tournaments played:** {{ total_rounds_played }}
 42 | - **Current Top 3 Elo Ratings:** {{ top_3_elo_ratings }}
 43 | - **Max Elo Rating Per Tournament (most recent first):** {{ max_elo_rating }}
 44 | - **Count of Elo Ratings over 1400 Per Tournament (most recent first):** {{ num_elo_ratings_over_1400 }}
 45 | - **Median Elo Rating Per Tournament (most recent first):** {{ median_elo_rating }}
 46 | 
 47 | ## Quality & Diversity Metrics
 48 | These statistics are updated after every hypothesis generation and evolution.
 49 | - **Average pairwise cosine similarity of hypotheses:** {{ cosine_similarity_trajectory }}
 50 | - **Number of distinct hypothesis clusters:** {{ cluster_count_trajectory }}
 51 | 
 52 | ## Literature Review Status
 53 | These statistics are updated after each literature review.
 54 | - **Literature Review Subtopics Completed:** {{ literature_review_subtopics_completed }}
 55 | 
 56 | # Decision-Making Framework
 57 | **Consider recent actions:** Review the latest actions to avoid repeating the same action too frequently and to understand the current research trajectory.
 58 | 
 59 | ## When to generate_new_hypotheses:
 60 | - Total hypotheses < 8-10 (insufficient exploration)
 61 | - Average cosine similarity score is high (>0.85) indicating hypotheses are too similar
 62 | - All current hypotheses have poor performance (median Elo < 1300)
 63 | 
 64 | ## When to evolve_hypotheses:
 65 | - Have 4+ hypotheses with strong performance (Elo > 1300)
 66 | - Sufficient diversity exists to avoid over-optimization (average cosine similarity score <0.85)
 67 | - Meta-review suggests promising directions worth refining
 68 | 
 69 | ## When to run_tournament:
 70 | - Several unranked hypotheses exist (>4)
 71 | - Before deciding to finish
 72 | 
 73 | ## When to run_meta_review:
 74 | - At least 4+ new hypotheses ranked since last meta-review
 75 | - Always if there are 10 or more new hypotheses since last meta-review
 76 | - Before major strategic decisions (literature expansion, evolution, finishing)
 77 | - Performance plateau suggests need for strategic insight
 78 | 
 79 | ## When to expand_literature_review:
 80 | - Meta-review identifies significant and persistent knowledge gaps
 81 | - Current hypotheses cluster around limited research approaches (few distinct clusters)
 82 | - Similarity score remains high despite multiple generation attempts
 83 | - Never when there are 20+ subtopics currently in the literature review
 84 | 
 85 | ## When to finish:
 86 | - At least 3+ high-quality hypotheses (Elo > 1400) identified
 87 | - Diminishing returns evident (trajectory shows max/median Elo plateauing over last 3+ meta-reviews)
 88 | - Research goal appears sufficiently addressed
 89 | - The most recent action must have been `run_meta_review`
 90 | 
 91 | # Strategic Considerations
 92 | ## Exploration vs. Exploitation Balance:
 93 | - **Early Stage (< 12 hypotheses):** Prioritize exploration through generation and literature expansion
 94 | - **Mid Stage (12-25 hypotheses):** Balance generation with evolution of promising candidates
 95 | - **Late Stage (25+ hypotheses):** Focus on evolution of top performers
 96 | 
 97 | ## Key Decision Factors:
 98 | - **Diversity:** Use cosine similarity and cluster count trajectories to assess if diversity efforts are working
 99 | - **Quality:** Analyze Elo trajectories to detect plateaus, improvements, or declines
100 | - **Momentum:** Look for patterns in recent actions and avoid repetitive sequences
101 | 
102 | # Output Format
103 | Provide your decision in the following structured format:
104 | 
105 | ```
106 | DECISION: [chosen_action]
107 | 
108 | REASONING:
109 | - Primary factors influencing this decision
110 | - Key metrics that support this choice
111 | - Strategic rationale for timing
112 | ```
113 | 
114 | # Important Notes
115 | - **Always justify your decision** with specific reference to the current state metrics
116 | - **Consider the research workflow holistically** - don't optimize for single metrics
117 | - **Balance exploration and exploitation** based on the research stage
118 | - **Monitor for diminishing returns** and know when to conclude
119 | - **Prioritize scientific rigor** over speed or efficiency alone
120 | 
121 | Choose the single most appropriate action based on the current state and provide your structured decision.
122 | 


--------------------------------------------------------------------------------
/coscientist/prompts/top_hypotheses_review.md:
--------------------------------------------------------------------------------
 1 | You are creating a comprehensive research overview for a human scientist.
 2 | 
 3 | # Instructions
 4 | * Create a structured research overview that includes:
 5 | - Executive Summary
 6 | - Key Hypotheses and Their Strengths
 7 | - Recommended Next Steps for Experimental Validation
 8 | - Identified Knowledge Gaps
 9 | - Risk Assessment and Mitigation Strategies
10 | * Format this as a professional research report suitable for grant applications or research planning.
11 | 
12 | # Research Goal
13 | {{ goal }}
14 | 
15 | # Top-Ranked Hypotheses
16 | {{ top_hypotheses }}
17 | 
18 | # Reviews of Top Hypotheses
19 | {{ reviews }}


--------------------------------------------------------------------------------
/coscientist/prompts/topic_decomposition.md:
--------------------------------------------------------------------------------
 1 | You are a senior research strategist known for designing rigorous, unbiased study programs.
 2 | 
 3 | # Task
 4 | Decompose the following research goal into a set of **focused, researchable subtopics** that can each be independently investigated through literature review. Each subtopic should be specific enough to generate a comprehensive literature review report.
 5 | 
 6 | # Research goal
 7 | {{ goal }}
 8 | 
 9 | # Previously researched subtopics (if any)
10 | {{ subtopics }}
11 | 
12 | # Meta-review to consider for finding research gaps (if any)
13 | {{ meta_review }}
14 | 
15 | # Instructions
16 | 1. Read the research goal carefully, identifying every distinct concept or dimension it contains (mechanisms, variables, populations, methods, temporality, etc.).
17 | 2. If previously researched subtopics are provided, carefully review them to avoid duplicating already investigated areas.
18 | 3. If a meta-review is provided, analyze it to identify:
19 |    - Research gaps or limitations mentioned
20 |    - Areas flagged as under-explored or requiring further investigation
21 |    - Novel angles or perspectives suggested for future research
22 | 4. Create focused subtopics that:
23 | - Are narrow enough for independent literature review
24 | - Are broad enough to yield substantial research findings
25 | - **Do not duplicate or significantly overlap with previously researched subtopics**
26 | - **Prioritize novel areas and research gaps identified in the meta-review**
27 | - Minimally overlap with each other
28 | - Collectively cover all aspects needed to meaningfully investigate the research goal with a well-informed perspective and evidence-grounded background.
29 | 5. Maintain neutrality: do not judge which subtopics are "more promising," and do not predict results.
30 | 6. Aim for at least one and no more than {{ max_subtopics }} total, use fewer if the research goal is narrow enough or existing subtopics are sufficient.
31 | 7. Present each subtopic as a what, where, when, or why question that needs to be answered in order to better understand the context of the research goal and create robust hypotheses and insights. The subtopic should only be 1-2 sentences long. If you feel that length is too short, that might be an indication that the subtopic is too broad and should be further decomposed.
32 | 
33 | # Output format (markdown)
34 | ## Research Subtopics
35 | ### Subtopic 1
36 | [Focused research subtopic]
37 | 
38 | ### Subtopic 2
39 | [Focused research subtopic]
40 | 
41 | <!-- Continue for all subtopics --> 


--------------------------------------------------------------------------------
/coscientist/prompts/tournament.md:
--------------------------------------------------------------------------------
 1 | You are an expert evaluator tasked with comparing two hypotheses.
 2 | 
 3 | # Instructions
 4 | You will be given a research goal and two hypotheses. Each hypothesis includes an independent review. These reviews may contain numerical scores or confidence ratings. Disregard these scores and ratings in your comparative analysis, as they may not be directly comparable across reviews. Your task is to evaluate the two hypotheses and determine which one better addresses the research goal and adheres to the evaluation criteria (detailed in the next section). Your analysis should include:
 5 | 
 6 | 1. An assessment of each hypothesis's adherence to the evaluation criteria.
 7 | 2. A comparison of the two hypotheses' strengths and weaknesses.
 8 | 3. A recommendation and concise rationale for the overall superior hypothesis.
 9 | 
10 | Conclude your response with the phrase "WINNER: <1 or 2>" (in all capital letters), denoting the id of the superior hypothesis, based on the outcome of your analysis. Write nothing after this declaration.
11 | 
12 | # Evaluation Criteria
13 | Criteria ordered by importance:
14 | 1. Alignment with the research goal. Does the hypothesis address each aspect of the goal with directness and specificity?
15 | 2. Novelty. Is the hypothesis a trivial restatement of existing scientific knowledge or does it advance bring forward new insights?
16 | 3. Falsifiability. Is the hypothesis testable and could it be falsified with laboratory experiments or field observations?
17 | 4. Robustness. Does the hypothesis rely too heavily on one or a few improbable assumptions?
18 | 5. Consider the reviews of the hypotheses but remember that absence of evidence is not evidence of absence.
19 | 
20 | # Research goal for hypotheses
21 | {{ goal }}
22 | 
23 | ## Hypothesis 1
24 | {{ hypothesis_1 }}
25 | 
26 | ## Hypothesis 2
27 | {{ hypothesis_2 }}
28 | 
29 | ## Review of hypothesis 1
30 | {{ review_1 }}
31 | 
32 | ## Review of hypothesis 2
33 | {{ review_2 }}
34 | 
35 | Your reasoning and conclusion: 


--------------------------------------------------------------------------------
/coscientist/proximity_agent.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Proximity agent
 3 | --------------
 4 | - Calculates similarity between hypotheses and builds a graph
 5 | """
 6 | 
 7 | import networkx as nx
 8 | import numpy as np
 9 | from langchain_openai import OpenAIEmbeddings
10 | from sklearn.metrics.pairwise import cosine_similarity
11 | 
12 | from coscientist.custom_types import ParsedHypothesis
13 | 
14 | 
15 | def create_embedding(text: str, dimensions: int = 256) -> np.ndarray:
16 |     """Create a vector embedding for a text."""
17 |     embeddings = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=dimensions)
18 |     return np.array(embeddings.embed_query(text))
19 | 
20 | 
21 | class ProximityGraph:
22 |     """A graph of hypotheses and their similarity scores."""
23 | 
24 |     def __init__(self):
25 |         self.graph = nx.Graph()
26 | 
27 |     def add_hypothesis(self, hypothesis: ParsedHypothesis):
28 |         """Add a hypothesis to the graph."""
29 |         embedding = create_embedding(hypothesis.hypothesis)
30 |         self.graph.add_node(
31 |             hypothesis.uid, hypothesis=hypothesis.hypothesis, embedding=embedding
32 |         )
33 | 
34 |     def _compute_weighted_edges(
35 |         self, hypothesis_ids_x: list[int], hypothesis_ids_y: list[int]
36 |     ):
37 |         """Compute the weighted edges between two sets of hypotheses."""
38 |         embeddings_x = [self.graph.nodes[id]["embedding"] for id in hypothesis_ids_x]
39 |         embeddings_y = [self.graph.nodes[id]["embedding"] for id in hypothesis_ids_y]
40 |         similarities = cosine_similarity(embeddings_x, embeddings_y)
41 |         # return similarities
42 |         # Add the edges with weights to the graph
43 |         for i, id_x in enumerate(hypothesis_ids_x):
44 |             for j, id_y in enumerate(hypothesis_ids_y):
45 |                 if id_x == id_y:
46 |                     continue
47 |                 self.graph.add_edge(id_x, id_y, weight=similarities[i, j])
48 | 
49 |     def update_edges(self):
50 |         """
51 |         Finds all nodes without an edge and all nodes with an edge and
52 |         computes the weighted edges between them. If no nodes have edges,
53 |         it will compute the weighted edges between all nodes.
54 |         """
55 |         # Hypothesis ids x are the nodes with degree greater than 0
56 |         hypothesis_ids_x = [
57 |             node for node in self.graph.nodes if self.graph.degree(node) > 0
58 |         ]
59 |         hypothesis_ids_y = [
60 |             node for node in self.graph.nodes if self.graph.degree(node) == 0
61 |         ]
62 |         if len(hypothesis_ids_y) == 0:
63 |             # Nothing to do, we're already up to date
64 |             return
65 |         elif len(hypothesis_ids_x) == 0:
66 |             # No nodes with edges, compute all edges
67 |             self._compute_weighted_edges(hypothesis_ids_y, hypothesis_ids_y)
68 |         else:
69 |             # Compute edges between nodes with and without edges
70 |             self._compute_weighted_edges(hypothesis_ids_y, hypothesis_ids_y)
71 |             self._compute_weighted_edges(hypothesis_ids_x, hypothesis_ids_y)
72 | 
73 |     def get_pruned_graph(self, min_weight: float = 0.85) -> nx.Graph:
74 |         """Get a pruned graph with edges with weight less than min_weight removed."""
75 |         pruned_graph = self.graph.copy()
76 |         edges_to_remove = [
77 |             (u, v)
78 |             for u, v, d in pruned_graph.edges(data=True)
79 |             if d["weight"] < min_weight
80 |         ]
81 |         pruned_graph.remove_edges_from(edges_to_remove)
82 |         return pruned_graph
83 | 
84 |     def get_semantic_communities(
85 |         self, resolution: float = 1.0, min_weight: float = 0.85
86 |     ) -> list[set[int]]:
87 |         """Get the partitions of the graph using the Louvain method."""
88 |         # Prune edges from the graph with weight less than min_weight
89 |         pruned_graph = self.get_pruned_graph(min_weight)
90 |         return nx.community.louvain_communities(pruned_graph, resolution=resolution)
91 | 
92 |     @property
93 |     def average_cosine_similarity(self) -> float:
94 |         """Get the average cosine similarity of the graph."""
95 |         return np.mean([d["weight"] for u, v, d in self.graph.edges(data=True)]).item()
96 | 


--------------------------------------------------------------------------------
/coscientist/reasoning_types.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class ReasoningType(Enum):
 5 |     FIRST_PRINCIPLES = "You are a first principles thinker. You strip a problem down to its most basic truths and rebuild solutions from the ground up, questioning every assumption along the way."
 6 |     ANALOGY = "You are an analogical reasoner. You look for similar problems in different domains and use their solutions as blueprints to guide your approach."
 7 |     SYSTEMS = "You are a systems thinker. You examine how parts interact within the whole, identifying feedback loops, interdependencies, and emergent behavior to understand the broader dynamics."
 8 |     DEDUCTIVE = "You are a deductive reasoner. You begin with general rules or truths and apply them logically to specific cases to arrive at certain conclusions."
 9 |     INDUCTIVE = "You are an inductive thinker. You gather observations or data points and use them to infer general principles, patterns, or trends."
10 |     ABDUCTIVE = "You are an abductive reasoner. You form the most plausible explanation from incomplete evidence, using intuition and inference to connect the dots."
11 |     CAUSAL = "You are a causal thinker. You analyze relationships of cause and effect to understand why things happen and predict what will happen if conditions change."
12 |     STATISTICAL = "You are a statistical thinker. You rely on data, probabilities, and trends to make reasoned judgments, especially under uncertainty or variability."
13 |     COUNTERFACTUAL = "You are a counterfactual thinker. You explore alternative scenarios and 'what if' questions to assess outcomes, uncover dependencies, or guide future planning."
14 |     HEURISTIC = "You are a heuristic thinker. You use experience-based rules of thumb to make fast, efficient decisions when time, information, or computational power is limited."
15 | 


--------------------------------------------------------------------------------
/coscientist/research_plan.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generates a research plan from the user's query. Must:
 3 | 
 4 | 1. Align with the research goal
 5 | 2. Be plausible and consistent with existing research, or justify why not
 6 | 3. Be novel
 7 | 4. Be testable with user-provided resources/constraints
 8 | 5. Be safe
 9 | 
10 | """
11 | 


--------------------------------------------------------------------------------
/coscientist/researcher_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "RETRIEVER": "tavily",
 3 |     "EMBEDDING": "openai:text-embedding-3-small",
 4 |     "SIMILARITY_THRESHOLD": 0.42,
 5 |     "FAST_LLM": "google_genai:gemini-2.5-flash",
 6 |     "SMART_LLM": "anthropic:claude-sonnet-4-20250514",
 7 |     "STRATEGIC_LLM": "openai:o3-mini",
 8 |     "FAST_TOKEN_LIMIT": 3000,
 9 |     "SMART_TOKEN_LIMIT": 6000,
10 |     "STRATEGIC_TOKEN_LIMIT": 4000,
11 |     "BROWSE_CHUNK_MAX_LENGTH": 8192,
12 |     "CURATE_SOURCES": false,
13 |     "SUMMARY_TOKEN_LIMIT": 700,
14 |     "TEMPERATURE": 0.4,
15 |     "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0",
16 |     "MAX_SEARCH_RESULTS_PER_QUERY": 5,
17 |     "MEMORY_BACKEND": "local",
18 |     "TOTAL_WORDS": 1200,
19 |     "REPORT_FORMAT": "APA",
20 |     "MAX_ITERATIONS": 3,
21 |     "AGENT_ROLE": null,
22 |     "SCRAPER": "bs",
23 |     "MAX_SCRAPER_WORKERS": 15,
24 |     "MAX_SUBTOPICS": 3,
25 |     "LANGUAGE": "english",
26 |     "REPORT_SOURCE": "web",
27 |     "DOC_PATH": "./my-docs",
28 |     "PROMPT_FAMILY": "default",
29 |     "LLM_KWARGS": {"max_tokens": 20000},
30 |     "EMBEDDING_KWARGS": {},
31 |     "VERBOSE": false,
32 |     "DEEP_RESEARCH_BREADTH": 3,
33 |     "DEEP_RESEARCH_DEPTH": 2,
34 |     "DEEP_RESEARCH_CONCURRENCY": 4,
35 |     "MCP_SERVERS": [],
36 |     "MCP_AUTO_TOOL_SELECTION": true,
37 |     "MCP_ALLOWED_ROOT_PATHS": [],
38 |     "MCP_STRATEGY": "fast",
39 |     "REASONING_EFFORT": "medium"
40 | }


--------------------------------------------------------------------------------
/coscientist/supervisor_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Supervisor agent
  3 | ----------------
  4 | - Analyzes the current state of the research process
  5 | - Decides what actions to take next to advance scientific hypothesis
  6 |   generation, evaluation, and refinement
  7 | - Uses strategic decision-making framework to balance exploration vs exploitation
  8 | 
  9 | More details:
 10 | - Takes in comprehensive system statistics and meta-reviews
 11 | - Makes strategic decisions about next steps in the research process
 12 | - Balances between generating new hypotheses, evolving existing ones,
 13 |   running tournaments, expanding literature review, or finishing
 14 | - Considers quality metrics, diversity metrics, and research momentum
 15 | """
 16 | 
 17 | import re
 18 | from typing import TypedDict
 19 | 
 20 | from langchain_core.language_models.chat_models import BaseChatModel
 21 | from langgraph.graph import END, StateGraph
 22 | 
 23 | from coscientist.common import load_prompt
 24 | 
 25 | 
 26 | class SupervisorDecisionState(TypedDict):
 27 |     """
 28 |     State for the supervisor decision agent.
 29 |     """
 30 | 
 31 |     goal: str
 32 |     meta_review: str
 33 |     previous_meta_review: str
 34 |     total_actions: int
 35 |     latest_actions: str
 36 |     total_hypotheses: int
 37 |     num_unranked_hypotheses: int
 38 |     num_meta_reviews: int
 39 |     new_hypotheses_since_meta_review: int
 40 |     total_matches_played: int
 41 |     total_rounds_played: int
 42 |     top_3_elo_ratings: str
 43 |     max_elo_rating: str
 44 |     num_elo_ratings_over_1400: str
 45 |     median_elo_rating: str
 46 |     cosine_similarity_trajectory: str
 47 |     cluster_count_trajectory: str
 48 |     literature_review_subtopics_completed: int
 49 |     action: str
 50 |     decision_reasoning: str
 51 | 
 52 | 
 53 | def build_supervisor_agent(llm: BaseChatModel) -> StateGraph:
 54 |     """
 55 |     Builds and configures a LangGraph for supervisor decision-making.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     llm : BaseChatModel
 60 |         The language model to use for supervisor decisions.
 61 | 
 62 |     Returns
 63 |     -------
 64 |     StateGraph
 65 |         A compiled LangGraph for the supervisor agent.
 66 |     """
 67 |     graph = StateGraph(SupervisorDecisionState)
 68 | 
 69 |     graph.add_node(
 70 |         "supervisor_decision",
 71 |         lambda state: _supervisor_decision_node(state, llm),
 72 |     )
 73 | 
 74 |     graph.add_edge("supervisor_decision", END)
 75 |     graph.set_entry_point("supervisor_decision")
 76 |     return graph.compile()
 77 | 
 78 | 
 79 | def _parse_supervisor_response(response: str) -> tuple[str, str]:
 80 |     """
 81 |     Parse the structured supervisor response to extract action and reasoning.
 82 | 
 83 |     Expected format:
 84 |     DECISION: [chosen_action]
 85 | 
 86 |     REASONING:
 87 |     - Primary factors influencing this decision
 88 |     - Key metrics that support this choice
 89 |     - Strategic rationale for timing
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     response : str
 94 |         The raw response from the LLM
 95 | 
 96 |     Returns
 97 |     -------
 98 |     tuple[str, str]
 99 |         A tuple of (action, decision_reasoning)
100 |     """
101 |     # Extract action from DECISION line
102 |     decision_match = re.search(r"DECISION:\s*(.+)", response, re.IGNORECASE)
103 |     action = decision_match.group(1).strip() if decision_match else ""
104 | 
105 |     # Extract reasoning section
106 |     reasoning_match = re.search(
107 |         r"REASONING:\s*(.*)", response, re.IGNORECASE | re.DOTALL
108 |     )
109 |     decision_reasoning = reasoning_match.group(1).strip() if reasoning_match else ""
110 | 
111 |     return action, decision_reasoning
112 | 
113 | 
114 | def _supervisor_decision_node(
115 |     state: SupervisorDecisionState,
116 |     llm: BaseChatModel,
117 | ) -> SupervisorDecisionState:
118 |     """
119 |     Supervisor decision node that analyzes system state and decides next action.
120 |     """
121 |     prompt = load_prompt(
122 |         "supervisor_decision",
123 |         goal=state["goal"],
124 |         meta_review=state["meta_review"],
125 |         previous_meta_review=state["previous_meta_review"],
126 |         total_actions=state["total_actions"],
127 |         latest_actions=state["latest_actions"],
128 |         total_hypotheses=state["total_hypotheses"],
129 |         num_unranked_hypotheses=state["num_unranked_hypotheses"],
130 |         num_meta_reviews=state["num_meta_reviews"],
131 |         new_hypotheses_since_meta_review=state["new_hypotheses_since_meta_review"],
132 |         total_matches_played=state["total_matches_played"],
133 |         total_rounds_played=state["total_rounds_played"],
134 |         top_3_elo_ratings=state["top_3_elo_ratings"],
135 |         max_elo_rating=state["max_elo_rating"],
136 |         num_elo_ratings_over_1400=state["num_elo_ratings_over_1400"],
137 |         median_elo_rating=state["median_elo_rating"],
138 |         cosine_similarity_trajectory=state["cosine_similarity_trajectory"],
139 |         cluster_count_trajectory=state["cluster_count_trajectory"],
140 |         literature_review_subtopics_completed=state[
141 |             "literature_review_subtopics_completed"
142 |         ],
143 |     )
144 | 
145 |     response_content = llm.invoke(prompt).content
146 |     action, decision_reasoning = _parse_supervisor_response(response_content)
147 |     return {**state, "action": action, "decision_reasoning": decision_reasoning}
148 | 


--------------------------------------------------------------------------------
/notebooks/coscientist.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "7db78a54-5e6c-4b1f-9504-12dd82de59a8",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# Example Usage"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "id": "0774df25-40e8-41cc-89a2-3ca4bf7bc66b",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "%load_ext autoreload\n",
19 |     "%autoreload 2"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "id": "61f3bc36-20c4-4f35-b53b-42e02ff4b4e3",
26 |    "metadata": {},
27 |    "outputs": [],
28 |    "source": [
29 |     "from coscientist.framework import CoscientistConfig, CoscientistFramework\n",
30 |     "from coscientist.global_state import CoscientistState, CoscientistStateManager"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "code",
35 |    "execution_count": null,
36 |    "id": "f06d0b82-dc37-4958-9b15-4bd4027a3c68",
37 |    "metadata": {},
38 |    "outputs": [],
39 |    "source": [
40 |     "goal = \"How does the gut microbiome influence rheumatoid arthritis and can probiotics help to mitigate symptoms? If so, which ones are promising?\"\n",
41 |     "initial_state = CoscientistState(goal=goal)"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "id": "eed3f916-bdc6-434a-b046-423a05ca360b",
48 |    "metadata": {},
49 |    "outputs": [],
50 |    "source": [
51 |     "config = CoscientistConfig()\n",
52 |     "state_manager = CoscientistStateManager(initial_state)\n",
53 |     "cosci = CoscientistFramework(config, state_manager)"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "id": "a759291e-a22b-4027-82a9-a82619a425ea",
60 |    "metadata": {
61 |     "scrolled": true
62 |    },
63 |    "outputs": [],
64 |    "source": [
65 |     "final_report, final_meta_review = await cosci.run()"
66 |    ]
67 |   },
68 |   {
69 |    "cell_type": "code",
70 |    "execution_count": null,
71 |    "id": "a3e08ffb-a4da-4c24-9be4-afb7a4913ba7",
72 |    "metadata": {},
73 |    "outputs": [],
74 |    "source": []
75 |   }
76 |  ],
77 |  "metadata": {
78 |   "kernelspec": {
79 |    "display_name": "Python 3 (ipykernel)",
80 |    "language": "python",
81 |    "name": "python3"
82 |   },
83 |   "language_info": {
84 |    "codemirror_mode": {
85 |     "name": "ipython",
86 |     "version": 3
87 |    },
88 |    "file_extension": ".py",
89 |    "mimetype": "text/x-python",
90 |    "name": "python",
91 |    "nbconvert_exporter": "python",
92 |    "pygments_lexer": "ipython3",
93 |    "version": "3.13.2"
94 |   }
95 |  },
96 |  "nbformat": 4,
97 |  "nbformat_minor": 5
98 | }
99 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jinja2>=3.1.2
2 | networkx>=3.5
3 | scikit-learn>=1.7.0
4 | typing-extensions>=4.5.0
5 | gpt-researcher @ git+https://github.com/assafelovic/gpt-researcher@v3.3.0
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | with open("README.md", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name="open-coscientist-agents",
 8 |     version="0.0.1",
 9 |     author="conradry",
10 |     author_email="",  # Add your email if you want to include it
11 |     description="Implementation of multi-agent system for AI co-scientist",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/conradry/open-coscientist-agents",
15 |     packages=find_packages(),
16 |     classifiers=[
17 |         "Development Status :: 3 - Alpha",
18 |         "Intended Audience :: Science/Research",
19 |         "License :: OSI Approved :: MIT License",
20 |         "Operating System :: OS Independent",
21 |         "Programming Language :: Python :: 3",
22 |         "Programming Language :: Python :: 3.9",
23 |         "Programming Language :: Python :: 3.10",
24 |         "Programming Language :: Python :: 3.11",
25 |         "Programming Language :: Python :: 3.12",
26 |         "Programming Language :: Python :: 3.13",
27 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
28 |     ],
29 |     python_requires=">=3.9",
30 |     install_requires=[
31 |         "langchain>=0.3.25",
32 |         "langchain-community>=0.3.24",
33 |         "langgraph>=0.4.7",
34 |         "typing-extensions>=4.0.0",
35 |         "ipython>=8.0.0",  # For notebook support
36 |         "gpt-researcher @ git+https://github.com/assafelovic/gpt-researcher@v3.3.0",
37 |         "langchain-core>=0.3.65",
38 |         "langchain-community>=0.3.2",
39 |         "langchain-openai>=0.3.18",
40 |         "langchain-anthropic>=0.3.15",
41 |         "langchain-google-genai>=2.1.5",
42 |         "networkx>=3.5",
43 |         "scikit-learn>=1.7.0",
44 |     ],
45 |     extras_require={
46 |         "dev": [
47 |             "pytest>=7.0.0",
48 |             "pytest-cov>=4.0.0",
49 |             "black>=23.0.0",
50 |             "isort>=5.0.0",
51 |             "mypy>=1.0.0",
52 |             "ruff>=0.0.1",
53 |             "pre-commit>=3.0.0",
54 |         ],
55 |         "docs": [
56 |             "sphinx>=7.0.0",
57 |             "sphinx-rtd-theme>=1.0.0",
58 |         ],
59 |     },
60 | )
61 | 


--------------------------------------------------------------------------------