├── ScienceDiscovery ├── __init__.py ├── __pycache__ │ ├── utils.cpython-310.pyc │ └── __init__.cpython-310.pyc ├── graph.py ├── llm_config.py ├── agents.py └── utils.py ├── setup.py ├── Notebooks ├── SciAgents_ScienceDiscovery_GraphReasoning_non-automated.ipynb └── SciAgents_ScienceDiscovery_GraphReasoning_automated.ipynb ├── README.md └── LICENSE.txt /ScienceDiscovery/__init__.py: -------------------------------------------------------------------------------- 1 | from ScienceDiscovery.utils import * 2 | from ScienceDiscovery.agents import * -------------------------------------------------------------------------------- /ScienceDiscovery/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lamm-mit/SciAgentsDiscovery/HEAD/ScienceDiscovery/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /ScienceDiscovery/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lamm-mit/SciAgentsDiscovery/HEAD/ScienceDiscovery/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ScienceDiscovery/graph.py: -------------------------------------------------------------------------------- 1 | from ScienceDiscovery.utils import * 2 | import os 3 | 4 | data_dir_source='./graph_giant_component/' 5 | 6 | embeddings_name='embeddings_simple_giant_ge-large-en-v1.5.pkl' 7 | graph_name='large_graph_simple_giant.graphml' 8 | tokenizer_model="BAAI/bge-large-en-v1.5" 9 | 10 | embedding_tokenizer = AutoTokenizer.from_pretrained(tokenizer_model) 11 | embedding_model = AutoModel.from_pretrained(tokenizer_model, ) 12 | 13 | G = load_graph_with_text_as_JSON (data_dir=data_dir_source, graph_name=graph_name) 14 | G = return_giant_component_of_graph (G) 15 | G = nx.Graph(G) 16 | try: 17 | node_embeddings = load_embeddings(f'{data_dir_source}/{embeddings_name}') 18 | except: 19 | print ("Node embeddings not loaded, need to regenerate.") 20 | node_embeddings = generate_node_embeddings(G, embedding_tokenizer, embedding_model, ) -------------------------------------------------------------------------------- /ScienceDiscovery/llm_config.py: -------------------------------------------------------------------------------- 1 | import autogen 2 | 3 | config_list_4o = autogen.config_list_from_models(model_list=["gpt-4o"]) 4 | 5 | config_list_4turbo = autogen.config_list_from_models(model_list=["gpt-4o"]) 6 | 7 | gpt4o_config = { 8 | "cache_seed": 42, # change the cache_seed for different trials 9 | "temperature": 0.0, 10 | "config_list": config_list_4o, 11 | "timeout": 540000, 12 | } 13 | 14 | 15 | gpt4o_config_graph = { 16 | "cache_seed": 42, # change the cache_seed for different trials 17 | "temperature": 0.1, 18 | "config_list": config_list_4o, 19 | "timeout": 540000, 20 | "max_tokens": 2048 21 | } 22 | 23 | gpt4turbo_config_graph = { 24 | "cache_seed": 42, # change the cache_seed for different trials 25 | "temperature": 0.2, 26 | "config_list": config_list_4turbo, 27 | "timeout": 540000, 28 | } 29 | 30 | gpt4turbo_config = { 31 | "cache_seed": 42, # change the cache_seed for different trials 32 | "temperature": 0, 33 | "config_list": config_list_4turbo, 34 | "timeout": 540000, 35 | } -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # It's a good practice to read long descriptions outside the setup function 4 | with open('README.md', 'r', encoding='utf-8') as f: 5 | long_description = f.read() 6 | 7 | setup( 8 | name='ScienceDiscovery', 9 | version='0.1.0', 10 | author='Markus J. Buehler', 11 | author_email='mbuehler@mit.edu', 12 | packages=find_packages(), 13 | install_requires=[ 14 | 'numpy', 15 | 'networkx', 16 | 'matplotlib', 17 | 'pandas', 18 | 'transformers>=4.39', 19 | 'pyautogen>=0.2.28', 20 | 'powerlaw', 21 | 'markdown2', 22 | 'pdfkit', 23 | 'bitsandbytes', 24 | 'peft', 25 | 'accelerate', 26 | 'torch', 27 | 'torchvision', 28 | 'torchaudio', 29 | 'huggingface_hub', 30 | 'langchain', 31 | 'pyvis', 32 | 'yachalk', 33 | 'pytesseract', 34 | 'llama-index', 35 | 'tqdm', 36 | 'ipython', 37 | 'scikit-learn', 38 | 'scipy', 39 | 'seaborn', 40 | 'uuid', 41 | 'pdfminer.six', 42 | 'community', 43 | 'guidance', 44 | 'python-louvain', 45 | 'wkhtmltopdf', 46 | 'weasyprint', 47 | 'llama-index-embeddings-huggingface', 48 | 'langchain-community', 49 | ], 50 | description='ScienceDiscovery: Use LLM-based multi-agent system to reason over graphs and generate novel research ideas.', 51 | long_description=long_description, 52 | long_description_content_type='text/markdown', 53 | url='https://github.com/lamm-mit/SciAgentsDiscovery', 54 | classifiers=[ 55 | 'License :: OSI Approved :: MIT License', 56 | 'Programming Language :: Python :: 3.11' 57 | ], 58 | python_requires='>=3.10', 59 | ) 60 | -------------------------------------------------------------------------------- /Notebooks/SciAgents_ScienceDiscovery_GraphReasoning_non-automated.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1be16592-06df-42d3-834e-92001c455abb", 6 | "metadata": {}, 7 | "source": [ 8 | "# SciAgents\n", 9 | "## Automating scientific discovery through multi-agent intelligent graph reasoning\n", 10 | "\n", 11 | "#### Alireza Ghafarollahi, Markus J. Buehler, MIT, 2024 mbuehler@MIT.EDU" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "e88a310b-5971-4111-9f0e-ac6eef990594", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "!git clone https://github.com/lamm-mit/SciAgentsDiscovery.git\n", 22 | "%cd SciAgentsDiscovery\n", 23 | "!pip install -e ." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "id": "24cbd5ab-8985-443a-abd4-bde904dcd389", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import os\n", 34 | "\n", 35 | "OpenAI_key='sk-'\n", 36 | "os.environ['OPENAI_API_KEY']=OpenAI_key\n", 37 | "\n", 38 | "SemanticScholar_api_key = ''\n", 39 | "os.environ['SEMANTIC_SCHOLAR_API_KEY']=SemanticScholar_api_key\n", 40 | "\n", 41 | "data_dir_output='./graph_giant_component_LLMdiscovery_example/'" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "0b37b47a-bbf2-4bcc-802f-4203146a2946", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from ScienceDiscovery import *\n", 52 | "make_dir_if_needed(data_dir_output)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "dc4c7e01-48aa-426a-b1ff-cea65d5d6427", 58 | "metadata": {}, 59 | "source": [ 60 | "### Setting up OpenAI GPT model for the LLM" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "id": "c9e805ab-7609-4b2c-babc-bf51279574b3", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "default_generate_OpenAIGPT = partial(\n", 71 | " generate_OpenAIGPT,\n", 72 | " openai_api_key=OpenAI_key,\n", 73 | " #gpt_model='gpt-4-turbo',\n", 74 | " gpt_model='gpt-4o',\n", 75 | " temperature=0.2,\n", 76 | " max_tokens=2048,\n", 77 | ")" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "id": "3b261e57-2f27-4588-8e5e-6774d654b85e", 83 | "metadata": {}, 84 | "source": [ 85 | "## Research idea generation using the non-automated multi-agent model" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "id": "d8276cc2-4ff3-4112-ac80-4ce744e80a23", 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "research_generation(G=G, \n", 96 | " embedding_tokenizer=embedding_tokenizer,\n", 97 | " embedding_model=embedding_model,\n", 98 | " node_embeddings=node_embeddings,\n", 99 | " generate=default_generate_OpenAIGPT,\n", 100 | " generate_graph_expansion=default_generate_OpenAIGPT,\n", 101 | " randomness_factor=0.2, num_random_waypoints=4,shortest_path=False,\n", 102 | " second_hop=False, data_dir=data_dir_output, save_files=False, verbatim=True,\n", 103 | " keyword_1='energy-intensive', keyword_2='protein')" 104 | ] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 3 (ipykernel)", 110 | "language": "python", 111 | "name": "python3" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.10.12" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 5 128 | } 129 | -------------------------------------------------------------------------------- /Notebooks/SciAgents_ScienceDiscovery_GraphReasoning_automated.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1be16592-06df-42d3-834e-92001c455abb", 6 | "metadata": {}, 7 | "source": [ 8 | "# SciAgents\n", 9 | "## Automating scientific discovery through multi-agent intelligent graph reasoning\n", 10 | "\n", 11 | "#### Alireza Ghafarollahi, Markus J. Buehler, MIT, 2024 mbuehler@MIT.EDU" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "e88a310b-5971-4111-9f0e-ac6eef990594", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "!git clone https://github.com/lamm-mit/SciAgentsDiscovery.git\n", 22 | "%cd SciAgentsDiscovery\n", 23 | "!pip install -e ." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "id": "24cbd5ab-8985-443a-abd4-bde904dcd389", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import os\n", 34 | "\n", 35 | "OpenAI_key='sk-'\n", 36 | "os.environ['OPENAI_API_KEY']=OpenAI_key\n", 37 | "\n", 38 | "SemanticScholar_api_key = ''\n", 39 | "os.environ['SEMANTIC_SCHOLAR_API_KEY']=SemanticScholar_api_key\n", 40 | "\n", 41 | "data_dir_output='./graph_giant_component_LLMdiscovery_example/'" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "0b37b47a-bbf2-4bcc-802f-4203146a2946", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from ScienceDiscovery import *\n", 52 | "make_dir_if_needed(data_dir_output)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "4e29d660-93b2-4ec4-a9e5-0366cf502515", 58 | "metadata": {}, 59 | "source": [ 60 | "## Research idea generation using the automated multi-agent model" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "9a5d67fa-b6c2-4259-a8ee-8db4b1801b05", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "res = user.initiate_chat(recipient=manager,\n", 71 | "message='''Develop a research proposal using random concepts. In the end, rate the novelty and feasibility of the research idea.''',\n", 72 | " clear_history=True)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "02261047-d40f-4bfc-a400-cd2057bc6c20", 78 | "metadata": {}, 79 | "source": [ 80 | "### Saving the output" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "id": "bb50b7c2-6063-410c-b1a2-04c1b74246d5", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "formatted_text = \"\"\n", 91 | "formatted_text_summary = \"\"\n", 92 | "for i in range(len(res.chat_history)):\n", 93 | " try:\n", 94 | " formatted_text += f'''{res.chat_history[i]['tool_calls'][0]['function']['name']}-{res.chat_history[1]['tool_calls'][0]['function']['arguments']}\\n\\n'''\n", 95 | " except:\n", 96 | " if i==0:\n", 97 | " formatted_text += '### ' + f'''{res.chat_history[i]['content']}\\n\\n'''\n", 98 | " else:\n", 99 | " formatted_text += f'''{res.chat_history[i]['content']}\\n\\n'''\n", 100 | " if re.search(\"Summary of the Initial Research Hypothesis\", f'''{res.chat_history[i]['content']}'''):\n", 101 | " formatted_text_summary += f'''{res.chat_history[i]['content']}'''\n", 102 | "\n", 103 | "text_markdown = Markdown(formatted_text)\n", 104 | "\n", 105 | "markdown_to_pdf(formatted_text, 'output_research')" 106 | ] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3 (ipykernel)", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.10.12" 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 5 130 | } 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SciAgents 2 | ## Automating scientific discovery through multi-agent intelligent graph reasoning 3 | A. Ghafarollahi, M.J. Buehler* 4 | 5 | Massachusetts Institute of Technology 6 | 7 | *mbuehler@MIT.EDU 8 | 9 | ## Summary 10 | A key challenge in artificial intelligence is the creation of systems capable of autonomously advancing scientific understanding by exploring novel domains, identifying complex patterns, and uncovering previously unseen connections in vast scientific data. In this work, we present SciAgents, an approach that leverages three core concepts: (1) the use of large-scale ontological knowledge graphs to organize and interconnect diverse scientific concepts, (2) a suite of large language models (LLMs) and data retrieval tools, and (3) multi-agent systems with in-situ learning capabilities. Applied to biologically inspired materials, SciAgents reveals hidden interdisciplinary relationships that were previously considered unrelated, achieving a scale, precision, and exploratory power that surpasses traditional human-driven research methods. The framework autonomously generates and refines research hypotheses, elucidating underlying mechanisms, design principles, and unexpected material properties. By integrating these capabilities in a modular fashion, the intelligent system yields material discoveries, critique and improve existing hypotheses, retrieve up-to-date data about existing research, and highlights their strengths and limitations. Our case studies demonstrate scalable capabilities to combine generative AI, ontological representations, and multi-agent modeling, harnessing a `swarm of intelligence' similar to biological systems. This provides new avenues for materials discovery and accelerates the development of advanced materials by unlocking Nature’s design principles. 11 | 12 | ![Fig_1](https://github.com/user-attachments/assets/3cae1052-427a-407c-8c9d-629111a3c070) 13 | 14 | Figure 1. **Overview of the multi-agent graph-reasoning system developed here** 15 | **Panel a**: Overview of graph construction, as reported in [M.J. Buehler et al., 2024](https://iopscience.iop.org/article/10.1088/2632-2153/ad7228/meta). The visual shows the progression from scientific papers as a data source to graph construction, with the image on the right showing a zoomed-in view of the graph. 16 | **Panels b and c**: Two distinct approaches are presented. In **b**, a multi-agent system based on a pre-programmed sequence of interactions between agents ensures consistency and reliability. In **c**, a fully automated, flexible multi-agent framework adapts dynamically to the evolving research context. Both systems leverage a sampled path within a global knowledge graph as context to guide the research idea generation process. Each agent plays a specialized role: **Ontologist** defines key concepts and relationships, **Scientist 1** crafts a detailed research proposal, **Scientist 2** expands and refines the proposal, **Critic agent** conducts a thorough review and suggests improvements. In the second approach, **Planner** develops a detailed plan, and the **Assistant** checks the novelty of the generated research hypotheses. 17 | This collaborative framework enables the generation of innovative and well-rounded scientific hypotheses that extend beyond conventional human-driven methods. 18 | 19 | ![silk_energy_results](https://github.com/user-attachments/assets/19c5e9d9-d6d1-4d9b-9a66-8bda742c7579) 20 | 21 | Figure 2: Results from our multi-agent model, illustrating a novel research hypothesis based on a knowledge 22 | graph connecting the keywords “silk” and “energy-intensive”, as an example. This visual overview shows that the 23 | system produces detailed, well-organized documentation of research development with multiple pages and detailed text 24 | (the example shown here includes 8,100 words). 25 | 26 | ### Codes 27 | This repository contains code for generating novel research ideas in the field of bio-inspired materials. 28 | 29 | The notebook files ```SciAgents_ScienceDiscovery_GraphReasoning_non-automated.ipynb``` and ```SciAgents_ScienceDiscovery_GraphReasoning_automated.ipynb``` in the Notebooks directory correspond to the non-automated and automated multi-agent frameworks, respectively, as explained in the accompanying paper. 30 | 31 | The automated multi-agent model is implemented with [AG2](https://github.com/ag2ai/ag2?tab=readme-ov-file) (Formerly AutoGen), an open-source ecosystem for agent-based AI modeling. 32 | This project is also collected in [Build with AG2](https://github.com/ag2ai/build-with-ag2), you can checkout more projects built with AG2. 33 | 34 | ### Audio file generation (podcast style, lecture, summary and others) 35 | 36 | Please see: [lamm-mit/PDF2Audio](https://github.com/lamm-mit/PDF2Audio) or use the version at 🤗 Hugging Face Spaces [lamm-mit/PDF2Audio](https://huggingface.co/spaces/lamm-mit/PDF2Audio). 37 | 38 | ### Example 39 | https://github.com/user-attachments/assets/d5a972f8-5308-4e42-b7dc-d68ba84e2140 40 | 41 | 42 | ### Requirements 43 | 44 | You need to install the GraphReasoning package, as describe below. Further, (a) OpenAI and (b) Semantic Scholar APIs are required to run the codes. 45 | 46 | #### Graph Reasoning installation 47 | 48 | Install directly from GitHub: 49 | ``` 50 | pip install git+https://github.com/lamm-mit/GraphReasoning 51 | ``` 52 | Or, editable: 53 | ``` 54 | pip install -e git+https://github.com/lamm-mit/GraphReasoning.git#egg=GraphReasoning 55 | ``` 56 | You may need wkhtmltopdf: 57 | ``` 58 | sudo apt-get install wkhtmltopdf 59 | ``` 60 | #### Graph file: 61 | ``` 62 | from huggingface_hub import hf_hub_download 63 | graph_name='large_graph_simple_giant.graphml' 64 | filename = f"{graph_name}" 65 | file_path = hf_hub_download(repo_id='lamm-mit/bio-graph-1K', filename=filename, local_dir='./graph_giant_component') 66 | ``` 67 | 68 | #### Embeddings: 69 | ``` 70 | from huggingface_hub import hf_hub_download 71 | embedding_name='embeddings_simple_giant_ge-large-en-v1.5.pkl' 72 | filename = f"{embedding_name}" 73 | file_path = hf_hub_download(repo_id='lamm-mit/bio-graph-1K', filename=filename, local_dir='./graph_giant_component') 74 | ``` 75 | 76 | ### Additional background 77 | 78 | ![Fig_2](https://github.com/user-attachments/assets/88f6a9f3-77b5-4b9c-ad7a-73e4b0841f0b) 79 | 80 | Figure 3. Overview of the entire process from initial keyword selection to the final document, following a hierarchical expansion strategy where answers are successively refined and improved, enriched with retrieved data, critiqued and amended by identification or critical modeling, simulation and experimental tasks. The process begins with initial keyword identification or random exploration within a graph, followed by path sampling to create a subgraph of relevant concepts and relationships. This subgraph forms the basis for generating structured output in JSON, including the hypothesis, outcome, mechanisms, design principles, unexpected properties, comparison, and novelty. Each component is subsequently expanded on with individual prompting, to yield significant amount of additional detail, forming a comprehensive draft. This draft then undergoes a critical review process, including amendments for modeling and simulation priorities (e.g., molecular dynamics) and experimental priorities (e.g., synthetic biology). The final integrated draft, along with critical analyses, results in a document that guides further scientific inquiry. 81 | 82 | ![Fig_3](https://github.com/user-attachments/assets/c356a6da-7218-42d0-b0f2-966193436f4c) 83 | 84 | 85 | Figure 4. SciAgents presents a framework for generative materials informatics, showcasing the iterative process of ideation and reasoning driven by input data, questions, and context.} The cycle of ideation and reasoning leads to predictive outcomes, offering insights into new material designs and properties. The visual elements on the edges represent various data modalities such as images, documents, scientific data, DNA sequences, video content, and microscopy, illustrating the diverse sources of information feeding into this process. 86 | 87 | ![image](https://github.com/user-attachments/assets/c11b7448-2c7b-43ae-89f2-f0e8ecac6849) 88 | 89 | Figure 5. Visualization of the ontological knowledge graph (left: whole graph, right: sub-graph) that organizes information. 90 | 91 | ### Original papers 92 | 93 | Please cite this work as: 94 | ``` 95 | @article{ghafarollahi2024sciagents, 96 | title={SciAgents: Automating Scientific Discovery Through Bioinspired Multi-Agent Intelligent Graph Reasoning}, 97 | author={Ghafarollahi, Alireza and Buehler, Markus J}, 98 | journal={Advanced Materials}, 99 | pages={2413523}, 100 | year={2024}, 101 | publisher={Wiley Online Library} 102 | } 103 | 104 | @article{buehler2024graphreasoning, 105 | author={Markus J. Buehler}, 106 | title={Accelerating Scientific Discovery with Generative Knowledge Extraction, Graph-Based Representation, and Multimodal Intelligent Graph Reasoning}, 107 | journal={Machine Learning: Science and Technology}, 108 | year={2024}, 109 | url={http://iopscience.iop.org/article/10.1088/2632-2153/ad7228}, 110 | } 111 | ``` 112 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /ScienceDiscovery/agents.py: -------------------------------------------------------------------------------- 1 | from ScienceDiscovery.utils import * 2 | from ScienceDiscovery.llm_config import * 3 | from ScienceDiscovery.graph import * 4 | 5 | 6 | from typing import Union 7 | import autogen 8 | from autogen import AssistantAgent 9 | from autogen.agentchat.contrib.img_utils import get_pil_image, pil_to_data_uri 10 | from autogen import register_function 11 | from autogen import ConversableAgent 12 | from typing import Dict, List 13 | from typing import Annotated, TypedDict 14 | from autogen import Agent 15 | 16 | user = autogen.UserProxyAgent( 17 | name="user", 18 | is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"), 19 | human_input_mode="ALWAYS", 20 | system_message="user. You are a human admin. You pose the task.", 21 | llm_config=False, 22 | code_execution_config=False, 23 | ) 24 | 25 | planner = AssistantAgent( 26 | name="planner", 27 | system_message = '''Planner. You are a helpful AI assistant. Your task is to suggest a comprehensive plan to solve a given task. 28 | 29 | Explain the Plan: Begin by providing a clear overview of the plan. 30 | Break Down the Plan: For each part of the plan, explain the reasoning behind it, and describe the specific actions that need to be taken. 31 | No Execution: Your role is strictly to suggest the plan. Do not take any actions to execute it. 32 | No Tool Call: If tool call is required, you must include the name of the tool and the agent who calls it in the plan. However, you are not allowed to call any Tool or function yourself. 33 | 34 | ''', 35 | llm_config=gpt4turbo_config, 36 | description='Who can suggest a step-by-step plan to solve the task by breaking down the task into simpler sub-tasks.', 37 | ) 38 | 39 | assistant = AssistantAgent( 40 | name="assistant", 41 | system_message = '''You are a helpful AI assistant. 42 | 43 | Your role is to call the appropriate tools and functions as suggested in the plan. You act as an intermediary between the planner's suggested plan and the execution of specific tasks using the available tools. You ensure that the correct parameters are passed to each tool and that the results are accurately reported back to the team. 44 | 45 | Return "TERMINATE" in the end when the task is over. 46 | ''', 47 | llm_config=gpt4turbo_config, 48 | description='''An assistant who calls the tools and functions as needed and returns the results. Tools include "rate_novelty_feasibility" and "generate_path".''', 49 | ) 50 | 51 | 52 | ontologist = AssistantAgent( 53 | name="ontologist", 54 | system_message = '''ontologist. You must follow the plan from planner. You are a sophisticated ontologist. 55 | 56 | Given some key concepts extracted from a comprehensive knowledge graph, your task is to define each one of the terms and discuss the relationships identified in the graph. 57 | 58 | The format of the knowledge graph is "node_1 -- relationship between node_1 and node_2 -- node_2 -- relationship between node_2 and node_3 -- node_3...." 59 | 60 | Make sure to incorporate EACH of the concepts in the knowledge graph in your response. 61 | 62 | Do not add any introductory phrases. First, define each term in the knowledge graph and then, secondly, discuss each of the relationships, with context. 63 | 64 | Here is an example structure for our response, in the following format 65 | 66 | {{ 67 | ### Definitions: 68 | A clear definition of each term in the knowledge graph. 69 | ### Relationships 70 | A thorough discussion of all the relationships in the graph. 71 | }} 72 | 73 | Further Instructions: 74 | Perform only the tasks assigned to you in the plan; do not undertake tasks assigned to other agents. Additionally, do not execute any functions or tools. 75 | ''', 76 | llm_config=gpt4turbo_config, 77 | description='I can define each of the terms and discusses the relationships in the path.', 78 | ) 79 | 80 | 81 | scientist = AssistantAgent( 82 | name="scientist", 83 | system_message = '''scientist. You must follow the plan from the planner. 84 | 85 | You are a sophisticated scientist trained in scientific research and innovation. 86 | 87 | Given the definitions and relationships acquired from a comprehensive knowledge graph, your task is to synthesize a novel research proposal with initial key aspects-hypothesis, outcome, mechanisms, design_principles, unexpected_properties, comparision, and novelty . Your response should not only demonstrate deep understanding and rational thinking but also explore imaginative and unconventional applications of these concepts. 88 | 89 | Analyze the graph deeply and carefully, then craft a detailed research proposal that investigates a likely groundbreaking aspect that incorporates EACH of the concepts and relationships identified in the knowledge graph by the ontologist. 90 | 91 | Consider the implications of your proposal and predict the outcome or behavior that might result from this line of investigation. Your creativity in linking these concepts to address unsolved problems or propose new, unexplored areas of study, emergent or unexpected behaviors, will be highly valued. 92 | 93 | Be as quantitative as possible and include details such as numbers, sequences, or chemical formulas. 94 | 95 | Your response should include the following SEVEN keys in great detail: 96 | 97 | "hypothesis" clearly delineates the hypothesis at the basis for the proposed research question. The hypothesis should be well-defined, has novelty, is feasible, has a well-defined purpose and clear components. Your hypothesis should be as detailed as possible. 98 | 99 | "outcome" describes the expected findings or impact of the research. Be quantitative and include numbers, material properties, sequences, or chemical formula. 100 | 101 | "mechanisms" provides details about anticipated chemical, biological or physical behaviors. Be as specific as possible, across all scales from molecular to macroscale. 102 | 103 | "design_principles" should list out detailed design principles, focused on novel concepts, and include a high level of detail. Be creative and give this a lot of thought, and be exhaustive in your response. 104 | 105 | "unexpected_properties" should predict unexpected properties of the new material or system. Include specific predictions, and explain the rationale behind these clearly using logic and reasoning. Think carefully. 106 | 107 | "comparison" should provide a detailed comparison with other materials, technologies or scientific concepts. Be detailed and quantitative. 108 | 109 | "novelty" should discuss novel aspects of the proposed idea, specifically highlighting how this advances over existing knowledge and technology. 110 | 111 | Ensure your scientific proposal is both innovative and grounded in logical reasoning, capable of advancing our understanding or application of the concepts provided. 112 | 113 | Here is an example structure for your response, in the following order: 114 | 115 | {{ 116 | "1- hypothesis": "...", 117 | "2- outcome": "...", 118 | "3- mechanisms": "...", 119 | "4- design_principles": "...", 120 | "5- unexpected_properties": "...", 121 | "6- comparison": "...", 122 | "7- novelty": "...", 123 | }} 124 | 125 | Remember, the value of your response lies in scientific discovery, new avenues of scientific inquiry, and potential technological breakthroughs, with detailed and solid reasoning. 126 | 127 | Further Instructions: 128 | Make sure to incorporate EACH of the concepts in the knowledge graph in your response. 129 | Perform only the tasks assigned to you in the plan; do not undertake tasks assigned to other agents. 130 | Additionally, do not execute any functions or tools. 131 | ''', 132 | llm_config=gpt4turbo_config_graph, 133 | description='I can craft the research proposal with key aspects based on the definitions and relationships acquired by the ontologist. I am **ONLY** allowed to speak after `Ontologist`', 134 | ) 135 | 136 | 137 | hypothesis_agent = AssistantAgent( 138 | name="hypothesis_agent", 139 | system_message = '''hypothesis_agent. Carefully expand on the ```{hypothesis}``` of the research proposal. 140 | 141 | Critically assess the original content and improve on it. \ 142 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 143 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 144 | 145 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 146 | 147 | 148 | where is the hypothesis aspect of the research proposal. 149 | 150 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 151 | ''', 152 | llm_config=gpt4o_config_graph, 153 | description='''I can expand the "hypothesis" aspect of the research proposal crafted by the "scientist".''', 154 | ) 155 | 156 | 157 | outcome_agent = AssistantAgent( 158 | name="outcome_agent", 159 | system_message = '''outcome_agent. Carefully expand on the ```{outcome}``` of the research proposal developed by the scientist. 160 | 161 | Critically assess the original content and improve on it. \ 162 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 163 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 164 | 165 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 166 | 167 | 168 | where is the outcome aspect of the research proposal. 169 | 170 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 171 | ''', 172 | llm_config=gpt4o_config_graph, 173 | description='''I can expand the "outcome" aspect of the research proposal crafted by the "scientist".''', 174 | ) 175 | 176 | mechanism_agent = AssistantAgent( 177 | name="mechanism_agent", 178 | system_message = '''mechanism_agent. Carefully expand on this particular aspect: ```{mechanism}``` of the research proposal. 179 | 180 | Critically assess the original content and improve on it. \ 181 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 182 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 183 | 184 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 185 | 186 | 187 | where is the mechanism aspect of the research proposal. 188 | 189 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 190 | ''', 191 | llm_config=gpt4o_config_graph, 192 | description='''I can expand the "mechanism" aspect of the research proposal crafted by the "scientist"''', 193 | ) 194 | 195 | design_principles_agent = AssistantAgent( 196 | name="design_principles_agent", 197 | system_message = '''design_principles_agent. Carefully expand on this particular aspect: ```{design_principles}``` of the research proposal. 198 | 199 | Critically assess the original content and improve on it. \ 200 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 201 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 202 | 203 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 204 | 205 | 206 | where is the design_principles aspect of the research proposal. 207 | 208 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 209 | ''', 210 | llm_config=gpt4o_config_graph, 211 | description='''I can expand the "design_principle" aspect of the research proposal crafted by the "scientist".''', 212 | ) 213 | 214 | unexpected_properties_agent = AssistantAgent( 215 | name="unexpected_properties_agent", 216 | system_message = '''unexpected_properties_agent. Carefully expand on this particular aspect: ```{unexpected_properties}``` of the research proposal. 217 | 218 | Critically assess the original content and improve on it. \ 219 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 220 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 221 | 222 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 223 | 224 | 225 | where is the unexpected_properties aspect of the research proposal. 226 | 227 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 228 | ''', 229 | llm_config=gpt4o_config_graph, 230 | description='''I can expand the "unexpected_properties" aspect of the research proposal crafted by the "scientist.''', 231 | ) 232 | 233 | comparison_agent = AssistantAgent( 234 | name="comparison_agent", 235 | system_message = '''comparison_agent. Carefully expand on this particular aspect: ```{comparison}``` of the research proposal. 236 | 237 | Critically assess the original content and improve on it. \ 238 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 239 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 240 | 241 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 242 | 243 | 244 | where is the comparison aspect of the research proposal. 245 | 246 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 247 | ''', 248 | llm_config=gpt4o_config_graph, 249 | description='''I can expand the "comparison" aspect of the research proposal crafted by the "scientist".''', 250 | ) 251 | 252 | novelty_agent = AssistantAgent( 253 | name="novelty_agent", 254 | system_message = '''novelty_agent. Carefully expand on this particular aspect: ```{novelty}``` of the research proposal. 255 | 256 | Critically assess the original content and improve on it. \ 257 | Add more specifics, quantitive scientific information (such as chemical formulas, numbers, sequences, processing conditions, microstructures, etc.), \ 258 | rationale, and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques, experimental methods, or particular analyses. 259 | 260 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science of the following: 261 | 262 | 263 | where is the novelty aspect of the research proposal. 264 | 265 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 266 | ''', 267 | llm_config=gpt4o_config_graph, 268 | description='''I can expand the "novelty" aspect of the research proposal crafted by the "scientist".''', 269 | ) 270 | 271 | critic_agent = AssistantAgent( 272 | name="critic_agent", 273 | system_message = '''critic_agent. You are a helpful AI agent who provides accurate, detailed and valuable responses. 274 | 275 | You read the whole proposal with all its details and expanded aspects and provide: 276 | 277 | (1) a summary of the document (in one paragraph, but including sufficient detail such as mechanisms, \ 278 | related technologies, models and experiments, methods to be used, and so on), \ 279 | 280 | (2) a thorough critical scientific review with strengths and weaknesses, and suggested improvements. Include logical reasoning and scientific approaches. 281 | 282 | Next, from within this document, 283 | 284 | (1) identify the single most impactful scientific question that can be tackled with molecular modeling. \ 285 | \n\nOutline key steps to set up and conduct such modeling and simulation, with details and include unique aspects of the planned work. 286 | 287 | (2) identify the single most impactful scientific question that can be tackled with synthetic biology. \ 288 | \n\nOutline key steps to set up and conduct such experimental work, with details and include unique aspects of the planned work.' 289 | 290 | Important Note: 291 | ***You do not rate Novelty and Feasibility. You are not to rate the novelty and feasibility.*** 292 | ''', 293 | llm_config=gpt4o_config_graph, 294 | description='''I can summarizes, critique, and suggest improvements after all seven aspects of the proposal have been expanded by the agents.''', 295 | ) 296 | 297 | 298 | novelty_assistant = autogen.AssistantAgent( 299 | name="novelty_assistant", 300 | system_message = '''You are a critical AI assistant collaborating with a group of scientists to assess the potential impact of a research proposal. Your primary task is to evaluate a proposed research hypothesis for its novelty and feasibility, ensuring it does not overlap significantly with existing literature or delve into areas that are already well-explored. 301 | 302 | You will have access to the Semantic Scholar API, which you can use to survey relevant literature and retrieve the top 10 results for any search query, along with their abstracts. Based on this information, you will critically assess the idea, rating its novelty and feasibility on a scale from 1 to 10 (with 1 being the lowest and 10 the highest). 303 | 304 | Your goal is to be a stringent evaluator, especially regarding novelty. Only ideas with a sufficient contribution that could justify a new conference or peer-reviewed research paper should pass your scrutiny. 305 | 306 | After careful analysis, return your estimations for the novelty and feasibility rates. 307 | 308 | If the tool call was not successful, please re-call the tool until you get a valid response. 309 | 310 | After the evaluation, conclude with a recommendation and end the conversation by stating "TERMINATE".''', 311 | llm_config=gpt4turbo_config, 312 | ) 313 | 314 | # create a UserProxyAgent instance named "user_proxy" 315 | novelty_admin = autogen.UserProxyAgent( 316 | name="novelty_admin", 317 | human_input_mode="NEVER", 318 | max_consecutive_auto_reply=10, 319 | is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"), 320 | code_execution_config=False, # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly. 321 | llm_config=False, 322 | ) 323 | 324 | @novelty_admin.register_for_execution() 325 | @novelty_assistant.register_for_llm(description='''This function is designed to search for academic papers using the Semantic Scholar API based on a specified query. 326 | The query should be constructed with relevant keywords separated by "+". ''') 327 | def response_to_query(query: Annotated[str, '''the query for the paper search. The query must consist of relevant keywords separated by +'''])->str: 328 | # Define the API endpoint URL 329 | url = 'https://api.semanticscholar.org/graph/v1/paper/search' 330 | 331 | # More specific query parameter 332 | query_params = { 333 | 'query': {query}, 334 | 'fields': 'title,abstract,openAccessPdf,url' 335 | } 336 | 337 | # Directly define the API key (Reminder: Securely handle API keys in production environments) 338 | # Replace with the actual API key 339 | 340 | # Define headers with API key 341 | api_key = os.getenv("SEMANTIC_SCHOLAR_API_KEY") 342 | headers = {'x-api-key': api_key} 343 | 344 | # Send the API request 345 | response = requests.get(url, params=query_params, headers=headers) 346 | 347 | # Check response status 348 | if response.status_code == 200: 349 | response_data = response.json() 350 | # Process and print the response data as needed 351 | else: 352 | response_data = f"Request failed with status code {response.status_code}: {response.text}" 353 | 354 | return response_data 355 | 356 | @user.register_for_execution() 357 | @planner.register_for_llm() 358 | @assistant.register_for_llm(description='''This function can be used to create a knowledge path. The function may either take two keywords as the input or randomly assign them and then returns a path between these nodes. 359 | The path contains several concepts (nodes) and the relationships between them (edges). THe function returns the path. 360 | Do not use this function if the path is already provided. If neither path nor the keywords are provided, select None for the keywords so that a path will be generated between randomly selected nodes.''') 361 | def generate_path(keyword_1: Annotated[Union[str, None], 'the first node in the knowledge graph. None for random selection.'], 362 | keyword_2: Annotated[Union[str, None], 'the second node in the knowledge graph. None for random selection.'], 363 | ) -> str: 364 | 365 | path_list_for_vis, path_list_for_vis_string = create_path(G, embedding_tokenizer, 366 | embedding_model, node_embeddings , generate_graph_expansion=None, 367 | randomness_factor=0.2, num_random_waypoints=4, shortest_path=False, 368 | second_hop=False, data_dir='./', save_files=False, verbatim=True, 369 | keyword_1 = keyword_1, keyword_2=keyword_2,) 370 | 371 | return path_list_for_vis_string 372 | 373 | @user.register_for_execution() 374 | @planner.register_for_llm() 375 | @assistant.register_for_llm(description='''Use this function to rate the novelty and feasibility of a research idea against the literature. The function uses semantic shcolar to access the literature articles. 376 | The function will return the novelty and feasibility rate from 1 to 10 (lowest to highest). The input to the function is the hypothesis with its details.''') 377 | def rate_novelty_feasibility(hypothesis: Annotated[str, 'the research hypothesis.']) -> str: 378 | res = novelty_admin.initiate_chat( 379 | novelty_assistant, 380 | clear_history=True, 381 | silent=False, 382 | max_turns=10, 383 | message=f'''Rate the following research hypothesis\n\n{hypothesis}. \n\nCall the function three times at most, but not in parallel. Wait for the results before calling the next function. ''', 384 | summary_method="reflection_with_llm", 385 | summary_args={"summary_prompt" : "Return all the results of the analysis as is."} 386 | ) 387 | 388 | return res.summary 389 | 390 | 391 | planner.reset() 392 | assistant.reset() 393 | ontologist.reset() 394 | scientist.reset() 395 | critic_agent.reset() 396 | 397 | 398 | groupchat = autogen.GroupChat( 399 | agents=[user, planner, assistant, ontologist, scientist, 400 | hypothesis_agent, outcome_agent, mechanism_agent, design_principles_agent, unexpected_properties_agent, comparison_agent, novelty_agent, critic_agent#sequence_retriever, 401 | ], messages=[], max_round=50, admin_name='user', send_introductions=True, allow_repeat_speaker=True, 402 | speaker_selection_method='auto', 403 | ) 404 | 405 | manager = autogen.GroupChatManager(groupchat=groupchat, 406 | llm_config=gpt4turbo_config, 407 | system_message='you dynamically select a speaker.') -------------------------------------------------------------------------------- /ScienceDiscovery/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | # os.environ["CUDA_VISIBLE_DEVICES"] = "0" 3 | 4 | # Markdown and PDF Handling 5 | import markdown 6 | import markdown2 7 | from weasyprint import HTML 8 | import pdfkit 9 | 10 | # Utility and System Libraries 11 | import random 12 | import re 13 | import uuid 14 | import time 15 | import glob 16 | from datetime import datetime 17 | from copy import deepcopy 18 | from pathlib import Path 19 | 20 | # Progress Bars 21 | from tqdm.notebook import tqdm 22 | try: 23 | get_ipython 24 | from tqdm.notebook import tqdm 25 | except NameError: 26 | from tqdm import tqdm 27 | 28 | # Visualization Libraries 29 | import matplotlib.pyplot as plt 30 | import seaborn as sns # For more attractive plotting 31 | sns.set_palette("hls") 32 | 33 | # PyVis for Graph Visualization 34 | from pyvis.network import Network 35 | 36 | # IPython Display 37 | from IPython.display import display, Markdown 38 | 39 | # Data Processing Libraries 40 | import pandas as pd 41 | import numpy as np 42 | 43 | # Machine Learning and AI 44 | import torch 45 | from transformers import AutoTokenizer, AutoModel 46 | from scipy.spatial.distance import cosine 47 | from sklearn.decomposition import PCA 48 | from sklearn.cluster import KMeans 49 | 50 | # Graph Libraries 51 | import networkx as nx 52 | 53 | # LangChain Document Loaders and Splitters 54 | from langchain.document_loaders import ( 55 | PyPDFLoader, UnstructuredPDFLoader, PyPDFium2Loader, 56 | PyPDFDirectoryLoader, DirectoryLoader 57 | ) 58 | from langchain.text_splitter import RecursiveCharacterTextSplitter 59 | 60 | # Custom Graph Reasoning Module 61 | from GraphReasoning import * 62 | 63 | # JSON Handling 64 | import json 65 | 66 | from functools import partial 67 | 68 | 69 | 70 | def markdown_to_pdf(markdown_text, output_pdf_path): 71 | """ 72 | Convert a Markdown string to a PDF file using markdown2 and pdfkit. 73 | 74 | Args: 75 | markdown_text (str): The Markdown text to convert. 76 | output_pdf_path (str): The path where the output PDF should be saved. 77 | """ 78 | # Convert Markdown to HTML 79 | html_content = markdown2.markdown(markdown_text) 80 | 81 | # Define CSS for smaller font size 82 | css = """ 83 | 88 | """ 89 | 90 | # Combine CSS and HTML content 91 | full_html = f"{css}{html_content}" 92 | 93 | # Convert HTML to PDF 94 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 95 | output_md_path = f"{output_pdf_path}_{timestamp}.md" 96 | output_pdf_path = f"{output_pdf_path}_{timestamp}.pdf" 97 | 98 | # Save the Markdown text to a .md file 99 | with open(output_md_path, 'w') as md_file: 100 | md_file.write(markdown_text) 101 | 102 | pdfkit.from_string(full_html, output_pdf_path) 103 | 104 | return output_pdf_path 105 | 106 | 107 | def convert_response_to_JSON (text_with_json): 108 | match = re.search(r"\{.*\}", text_with_json, re.DOTALL) 109 | if match: 110 | json_str = match.group(0) # This is the extracted JSON string 111 | 112 | # Step 2: Parse the JSON string into a dictionary (also performs a cleanup) 113 | json_obj = json.loads(json_str) 114 | 115 | # Step 3: Convert the dictionary back into a JSON-formatted string 116 | cleaned_json_str = json.dumps(json_obj, ensure_ascii=False) 117 | 118 | #print("JSONL file created with the extracted JSON.") 119 | else: 120 | print("No JSON content found.") 121 | cleaned_json_str='' 122 | return cleaned_json_str 123 | 124 | 125 | def json_to_formatted_text(json_data): 126 | formatted_text = "" 127 | 128 | formatted_text += f"### Hypothesis\n{json_data['hypothesis']}\n\n" 129 | formatted_text += f"### Outcome\n{json_data['outcome']}\n\n" 130 | formatted_text += f"### Mechanisms\n{json_data['mechanisms']}\n\n" 131 | 132 | formatted_text += "### Design Principles\n" 133 | 134 | design_principles_list=json_data['design_principles'] 135 | 136 | if isinstance(design_principles_list, list): 137 | for principle in design_principles_list: 138 | formatted_text += f"- {principle}\n" 139 | else: 140 | formatted_text += f"- {design_principles_list}\n" 141 | 142 | formatted_text += "\n" 143 | 144 | formatted_text += f"### Unexpected Properties\n{json_data['unexpected_properties']}\n\n" 145 | formatted_text += f"### Comparison\n{json_data['comparison']}\n\n" 146 | formatted_text += f"### Novelty\n{json_data['novelty']}\n" 147 | 148 | return formatted_text 149 | 150 | 151 | 152 | 153 | def create_path(G, embedding_tokenizer, embedding_model, node_embeddings, 154 | generate_graph_expansion=None, 155 | second_hop=False, data_dir='./', save_files=False, verbatim=False, 156 | keyword_1 = None, keyword_2 = None, 157 | shortest_path=True, #if set to False, do NOT use shortest path but sample a random path 158 | top_k=5, #for random walk, if shortest_path=False 159 | randomness_factor=0, 160 | num_random_waypoints=0, 161 | ): 162 | 163 | if keyword_1==None or keyword_2==None: 164 | # Randomly pick two distinct nodes 165 | random_nodes = random.sample(list(G.nodes()), 2) 166 | 167 | if keyword_1==None: 168 | keyword_1 = random_nodes[0] 169 | if keyword_2==None: 170 | keyword_2 = random_nodes[1] 171 | 172 | if verbatim: 173 | print("Randomly selected nodes:", keyword_1, "and", keyword_2) 174 | 175 | print(">>> Selected nodes:", keyword_1, "and", keyword_2) 176 | ''' 177 | try: 178 | keyword_1=keyword_1[0] 179 | except: 180 | keyword_1=keyword_1 181 | try: 182 | keyword_2 =keyword_2[0] 183 | except: 184 | keyword_2 =keyword_2 185 | ''' 186 | if shortest_path: 187 | (best_node_1, best_similarity_1, best_node_2, best_similarity_2), path, path_graph, shortest_path_length, fname, graph_GraphML=find_path( G, node_embeddings, 188 | embedding_tokenizer, embedding_model , second_hop=False, 189 | data_dir=data_dir, save_files=False, 190 | keyword_1 = keyword_1, keyword_2 = keyword_2, ) 191 | 192 | else: #random path 193 | print ("Random walk to get path:", keyword_1, "and", keyword_2) 194 | 195 | if randomness_factor>0 or num_random_waypoints>0: 196 | path, path_graph, shortest_path_length, _, _= heuristic_path_with_embeddings_with_randomization_waypoints( 197 | G, 198 | embedding_tokenizer, 199 | embedding_model, 200 | keyword_1, 201 | keyword_2, 202 | node_embeddings, 203 | top_k=5, 204 | #perturbation_factor=0.1, 205 | second_hop=False, 206 | data_dir=data_dir, 207 | verbatim=True, 208 | save_files=False, 209 | randomness_factor=randomness_factor, 210 | num_random_waypoints=num_random_waypoints, 211 | ) 212 | 213 | else: 214 | path, path_graph, shortest_path_length, _, _ = heuristic_path_with_embeddings(G, embedding_tokenizer, embedding_model, 215 | keyword_1, keyword_2, 216 | node_embeddings, top_k=top_k, 217 | second_hop=False,data_dir=data_dir, 218 | verbatim=verbatim, 219 | save_files=save_files) 220 | 221 | 222 | print ("Done random walk to get path") 223 | 224 | print("Path:", path) 225 | 226 | path_list_for_vis, path_list_for_vis_string=path_list=print_path_with_edges_as_list(G, path, keywords_separator=' -- ') 227 | print ( path_list_for_vis_string ) 228 | 229 | return path_list_for_vis, path_list_for_vis_string 230 | 231 | 232 | 233 | 234 | 235 | def develop_qa_over_path (G, embedding_tokenizer, embedding_model,node_embeddings, 236 | generate, generate_graph_expansion=None, 237 | second_hop=False, data_dir='./', save_files=False, verbatim=False, 238 | keyword_1 = None, keyword_2 = None, 239 | shortest_path=True, #if set to False, do NOT use shortest path but sample a random path 240 | top_k=5, #for random walk, if shortest_path=False 241 | randomness_factor=0, 242 | num_random_waypoints=0, 243 | ): 244 | 245 | if generate_graph_expansion==None: 246 | generate_graph_expansion=generate 247 | if keyword_1==None or keyword_2==None: 248 | # Randomly pick two distinct nodes 249 | random_nodes = random.sample(list(G.nodes()), 2) 250 | 251 | if keyword_1==None: 252 | keyword_1 = random_nodes[0] 253 | if keyword_2==None: 254 | keyword_2 = random_nodes[1] 255 | 256 | if verbatim: 257 | print("Randomly selected nodes:", keyword_1, "and", keyword_2) 258 | 259 | print(">>> Selected nodes:", keyword_1, "and", keyword_2) 260 | ''' 261 | try: 262 | keyword_1=keyword_1[0] 263 | except: 264 | keyword_1=keyword_1 265 | try: 266 | keyword_2 =keyword_2[0] 267 | except: 268 | keyword_2 =keyword_2 269 | ''' 270 | if shortest_path: 271 | (best_node_1, best_similarity_1, best_node_2, best_similarity_2), path, path_graph, shortest_path_length, fname, graph_GraphML=find_path(G, node_embeddings, 272 | embedding_tokenizer, embedding_model , second_hop=False, 273 | data_dir=data_dir, save_files=False, 274 | keyword_1 = keyword_1, keyword_2 = keyword_2, ) 275 | 276 | else: #random path 277 | print ("Random walk to get path:", keyword_1, "and", keyword_2) 278 | 279 | if randomness_factor>0 or num_random_waypoints>0: 280 | path, path_graph, shortest_path_length, _, _= heuristic_path_with_embeddings_with_randomization_waypoints( 281 | G, 282 | embedding_tokenizer, 283 | embedding_model, 284 | keyword_1, 285 | keyword_2, 286 | node_embeddings, 287 | top_k=5, 288 | #perturbation_factor=0.1, 289 | second_hop=False, 290 | data_dir=data_dir, 291 | verbatim=True, 292 | save_files=False, 293 | randomness_factor=randomness_factor, 294 | num_random_waypoints=num_random_waypoints, 295 | ) 296 | 297 | else: 298 | path, path_graph, shortest_path_length, _, _ = heuristic_path_with_embeddings(G, embedding_tokenizer, embedding_model, 299 | keyword_1, keyword_2, 300 | node_embeddings, top_k=top_k, 301 | second_hop=False,data_dir=data_dir, 302 | verbatim=verbatim, 303 | save_files=save_files) 304 | 305 | 306 | print ("Done random walk to get path") 307 | 308 | print("Path:", path) 309 | 310 | path_list_for_vis, path_list_for_vis_string=path_list=print_path_with_edges_as_list(G, path, keywords_separator=' -- ') 311 | print ( path_list_for_vis_string ) 312 | 313 | print ("---------------------------------------------") 314 | 315 | prompt=f"""You are a sophisticated ontologist trained in scientific research, engineering, and innovation. 316 | 317 | Given the following key concepts extracted from a comprehensive knowledge graph, your task is to define each one of the terms and discuss the relationships identified in the graph. 318 | 319 | Consider this list of nodes and relationships from a knowledge graph between "{keyword_1}" and "{keyword_2}". 320 | 321 | The format of the knowledge graph is "node_1 -- relationship between node_1 and node_2 -- node_2 -- relationship between node_2 and node_3 -- node_3...." 322 | 323 | Here is the graph: 324 | 325 | {path_list_for_vis_string} 326 | 327 | Make sure to incorporate EACH of the concepts in the knowledge graph in your response. 328 | 329 | Do not add any introductory phrases. First, define each term in the knowledge graph and then, secondly, discuss each of the relationships, with context. """ 330 | 331 | expanded='' 332 | expanded=generate_graph_expansion( system_prompt='You are a creative scientist who provides accurate, detailed and valuable responses.', 333 | prompt=prompt, max_tokens=1024, temperature=.1, ) 334 | 335 | print ("EXPANDED: ", expanded, "\n\n") 336 | 337 | if expanded != "": 338 | expanded = f"Here is an analysis of the concepts and relationships in the graph:\n\n{expanded}\n\n" 339 | 340 | prompt=f"""You are a sophisticated scientist trained in scientific research and innovation. 341 | 342 | Given the following key concepts extracted from a comprehensive knowledge graph, your task is to synthesize a novel research hypothesis. Your response should not only demonstrate deep understanding and rational thinking but also explore imaginative and unconventional applications of these concepts. 343 | 344 | Consider this list of nodes and relationships from a knowledge graph between "{keyword_1}" and "{keyword_2}". \ 345 | The format of the graph is "node_1 -- relationship between node_1 and node_2 -- node_2 -- relationship between node_2 and node_3 -- node_3...." 346 | 347 | Here is the graph: 348 | 349 | {path_list_for_vis_string} 350 | 351 | {expanded}Analyze the graph deeply and carefully, then craft a detailed research hypothesis that investigates a likely groundbreaking aspect that incorporates EACH of these concepts. Consider the implications of your hypothesis and predict the outcome or behavior that might result from this line of investigation. Your creativity in linking these concepts to address unsolved problems or propose new, unexplored areas of study, emergent or unexpected behaviors, will be highly valued. 352 | 353 | Be as quantitative as possible and include details such as numbers, sequences, or chemical formulas. Please structure your response in JSON format, with SEVEN keys: 354 | 355 | "hypothesis" clearly delineates the hypothesis at the basis for the proposed research question. 356 | 357 | "outcome" describes the expected findings or impact of the research. Be quantitative and include numbers, material properties, sequences, or chemical formula. 358 | 359 | "mechanisms" provides details about anticipated chemical, biological or physical behaviors. Be as specific as possible, across all scales from molecular to macroscale. 360 | 361 | "design_principles" should list out detailed design principles, focused on novel concepts and include a high level of detail. Be creative and give this a lot of thought, and be exhaustive in your response. 362 | 363 | "unexpected_properties" should predict unexpected properties of the new material or system. Include specific predictions, and explain the rationale behind these clearly using logic and reasoning. Think carefully. 364 | 365 | "comparison" should provide a detailed comparison with other materials, technologies or scientific concepts. Be detailed and quantitative. 366 | 367 | "novelty" should discuss novel aspects of the proposed idea, specifically highlighting how this advances over existing knowledge and technology. 368 | 369 | Ensure your scientific hypothesis is both innovative and grounded in logical reasoning, capable of advancing our understanding or application of the concepts provided. 370 | 371 | Here is an example structure for your response, in JSON format: 372 | 373 | {{ 374 | "hypothesis": "...", 375 | "outcome": "...", 376 | "mechanisms": "...", 377 | "design_principles": "...", 378 | "unexpected_properties": "...", 379 | "comparison": "...", 380 | "novelty": "...", 381 | }} 382 | 383 | Remember, the value of your response is as scientific discovery, new avenues of scientific inquiry and potential technological breakthroughs, with details and solid reasoning. 384 | 385 | Make sure to incorporate EACH of the concepts in the knowledge graph in your response. 386 | """ 387 | if verbatim: 388 | print ("##############################################") 389 | print (prompt) 390 | print ("##############################################") 391 | 392 | res=generate( system_prompt='You are a creative scientist who provides accurate, detailed and valuable responses, in JSON format.', 393 | prompt=prompt, max_tokens=2048, temperature=.2, ) 394 | 395 | res=convert_response_to_JSON(res) 396 | 397 | if verbatim: 398 | display (Markdown(res) ) 399 | 400 | res_dict=None 401 | try: 402 | res_dict = json.loads(res) 403 | res_dict['path_string'] = path_list_for_vis_string 404 | res_dict['expanded'] = expanded 405 | 406 | except: 407 | print ("Dict generation failed...") 408 | 409 | return res, res_dict, path_list_for_vis_string, json_to_formatted_text(res_dict), (keyword_1, keyword_2) 410 | 411 | def research_generation(G, embedding_tokenizer, 412 | embedding_model, node_embeddings, 413 | generate, 414 | generate_graph_expansion, 415 | randomness_factor, num_random_waypoints,shortest_path, 416 | second_hop, data_dir, save_files, verbatim, 417 | keyword_1 = None, keyword_2=None, 418 | ): 419 | 420 | df_total = pd.DataFrame() 421 | 422 | res, res_data, path_string, formatted_text, (start_node, end_node) = develop_qa_over_path (G=G, 423 | embedding_tokenizer=embedding_tokenizer, 424 | embedding_model=embedding_model, 425 | node_embeddings=node_embeddings, 426 | generate=generate, 427 | generate_graph_expansion=generate_graph_expansion, 428 | randomness_factor=randomness_factor, 429 | num_random_waypoints=num_random_waypoints, 430 | shortest_path=shortest_path, 431 | second_hop=second_hop, data_dir=data_dir, save_files=save_files, verbatim=verbatim, 432 | keyword_1 = keyword_1, keyword_2=keyword_2, 433 | ) 434 | 435 | 436 | print (start_node, "---->", end_node) 437 | 438 | #generate=generate_Anthropic 439 | 440 | expanded_text='' 441 | res_data_expanded={} 442 | #for i, field in tqdm(enumerate (res_data.keys())): 443 | for i, field in tqdm(enumerate (list (res_data.keys())[:7])): 444 | prompt=f'''You are given a new resaerch idea: 445 | 446 | {formatted_text} 447 | 448 | This research idea was developed based on a knowledge graph that describes relationships between two concepts, {start_node} and {end_node}: 449 | 450 | {path_string} 451 | 452 | Now, carefully expand on this particular aspect: ```{field}```. 453 | 454 | Critically assess the original content and improve on it. \ 455 | Add more specifics, quantitive scientific information, if possible, such as chemical formulas, numbers, protein sequences, processing conditions, microstructures, etc. \ 456 | Include a clear rationale and step-by-step reasoning. When possible, comment on specific modeling and simulation techniques and codes, experimental methods, or particular analyses. 457 | 458 | Start by carefully assessing this initial draft from the perspective of a peer-reviewer whose task it is to critically assess and improve the science: 459 | 460 | {res_data[field]} 461 | 462 | Do not add any introductory phrases. Your response begins with your response, with a heading: ### Expanded ... 463 | ''' 464 | res=generate( system_prompt='You are a creative scientist who provides accurate, detailed and valuable responses.', 465 | prompt=prompt, max_tokens=2048, temperature=.2, ) 466 | 467 | display (Markdown(res [:256])) 468 | 469 | res_data_expanded[field]=res 470 | # expanded_text = expanded_text+f'\n\n## Expanded field {i+1}: {field}\n\n'+res 471 | expanded_text = expanded_text+f'\n\n'+res 472 | print ('---------------------------------------------') 473 | 474 | complete=f"# Research concept between '{start_node}' and '{end_node}'\n\n### KNOWLEDGE GRAPH:\n\n{res_data['path_string']}\n\n"+f"### EXPANDED GRAPH:\n\n{res_data['expanded']}"+f"### PROPOSED RESEARCH/MATERIAL:\n\n{formatted_text}"+f'\n\n### EXPANDED DESCRIPTIONS:\n\n'+expanded_text 475 | 476 | #display (complete) 477 | #generate=generate_Anthropic 478 | 479 | prompt=f'Read this document:\n\n{complete}\n\nProvide (1) a summary of the document (in one paragraph, but including sufficient detail such as mechanisms, \ 480 | related technologies, models and experiments, methods to be used, and so on), \ 481 | and (2) a thorough critical scientific review with strengths and weaknesses, and suggested improvements. Include logical reasoning and scientific approaches.' 482 | critiques=generate( system_prompt='You are a critical scientist who provides accurate, detailed and valuable responses.', 483 | prompt=prompt, max_tokens=2048, temperature=.1, ) 484 | 485 | res_data['critiques'] = critiques 486 | res_data['res_data_expanded'] = res_data_expanded 487 | 488 | #display(Markdown(critiques)) 489 | complete_doc=complete+ f'\n\n## SUMMARY, CRITICAL REVIEW AND IMPROVEMENTS:\n\n'+critiques 490 | 491 | 492 | #generate=generate_Anthropic 493 | prompt=f'Read this document:\n\n{complete_doc}\n\nFrom within this document, identify the single most impactful scientific question that can be tackled with molecular modeling. \ 494 | \n\nOutline key steps to set up and conduct such modeling and simulation, with details and include unique aspects of the planned work.' 495 | modeling_priority=generate( system_prompt='You are a scientist who provides accurate, detailed and valuable responses.', 496 | prompt=prompt, max_tokens=2048, temperature=.1, ) 497 | prompt=f'Read this document:\n\n{complete_doc}\n\nFrom within this document, identify the single most impactful scientific question that can be tackled with synthetic biology. \ 498 | \n\nOutline key steps to set up and conduct such experimental work, with details and include unique aspects of the planned work.' 499 | synbio_priority=generate( system_prompt='You are a scientist who provides accurate, detailed and valuable responses.', 500 | prompt=prompt, max_tokens=2048, temperature=.1, ) 501 | display (Markdown(modeling_priority)) 502 | display (Markdown(synbio_priority)) 503 | 504 | complete_doc=complete_doc+ f'\n\n## MODELING AND SIMULATION PRIORITIES:\n\n'+modeling_priority 505 | complete_doc=complete_doc+ f'\n\n## SYNTHETIC BIOLOGY EXPERIMENTAL PRIORITIES:\n\n'+synbio_priority 506 | 507 | res_data['modeling_priority'] = modeling_priority 508 | res_data['synbio_priority'] = synbio_priority 509 | 510 | output_pdf_path = f"{data_dir}/output_" 511 | fname=markdown_to_pdf(complete_doc, output_pdf_path) 512 | 513 | df = pd.DataFrame([res_data]) 514 | df_total = pd.concat([df_total, df], ignore_index=True) 515 | #df_total.to_csv(fname) 516 | df_total.to_csv(fname[:-4]+'.csv') 517 | 518 | return None --------------------------------------------------------------------------------