├── .gitinore ├── LICENSE ├── README.md ├── agents ├── __init__.py ├── gen_citations.py ├── prompts.py ├── pubmed.py ├── search.py ├── states.py ├── tools.py └── utils.py ├── check_yaml ├── check_yaml_app.py ├── kiroku ├── kiroku_app.py ├── proj ├── example.yaml └── images │ └── multi-agent.jpeg └── requirements.txt /.gitinore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store 32 | .DS_Store? 33 | ._* 34 | .Spotlight-V100 35 | .Trashes 36 | ehthumbs.db 37 | Thumbs.db 38 | 39 | # directories 40 | ################## 41 | .idea 42 | .idea/ 43 | __pycache__/ 44 | agents/__pycache__/* 45 | *.pyc 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |  2 | 3 | # Introduction 4 | 5 | Kiroku is a multi-agent system that helps you organize and write documents. 6 | 7 | I started writing it because during my PhD at Stanford, I had to go through a formal process to write papers and eventually the thesis, and I tried to follow these steps. 8 | The difference is that at that time, I was the student, and my advisor was reviewing my documents, and I got the idea: what if the writer becomes the advisor, and the 9 | multi-agent system becomes the student? 10 | 11 | This flow has several advantages: 12 | - It helps you organize the ideas in a better way because you can generate a sequence of paragraphs very quickly. 13 | - It helps you change your communication through iterative evaluation of the message 14 | - Recently, [1] suggested that LLMs help you can help complex topics by discussing with with the LLM. 15 | 16 |  17 | (c) PhDCommics (www.phdcommics.com) of the advisor and the student 18 | 19 | The original code was obtained from a short course From Harrison Chase and Rotem Weiss [2], 20 | but I believe not even the prompts resemble any of the prompts from original prompts. However, I do recognize and 21 | credit to them the original code that I used as a reference. 22 | 23 |  24 | 25 | # Before You Run 26 | 27 | To run Kiroku, you need an OPENAI_API_KEY and a TAVILY_API_KEY. 28 | 29 | To get an OPENAI_API_KEY, you can check https://platform.openai.com/docs/quickstart . 30 | 31 | To get a TAVILY_API_KEY, you can check the site https://app.tavily.com/sign-in, and click "Sign in". 32 | 33 | You may want to use a tool like `direnv` to manage the environment variables `OPENAI_API_KEY` and `TAVILI_API_KEY` on a per-directory basis. 34 | This will help you automatically load these variables when you are working within the Kiroku project directory. 35 | `direnv` supports Linux, macOS, and Windows through WSL. 36 | 37 | # Installation 38 | 39 | Kiroku supports Python between versions 3.7 and 3.11. 40 | 41 | ### 1. Set up a virtual environment 42 | You can use Python’s `venv` module to create an isolated environment for dependencies. This ensures a clean environment and avoids conflicts with system packages. 43 | 44 | ```shell 45 | cd kiroku 46 | python3 -m venv venv 47 | source venv/bin/activate 48 | pip install -r requirements.txt 49 | ``` 50 | 51 | ### 2. Installation of PANDOC 52 | 53 | You need to install PANDOC. As pointed out by Charles Ferreira Gonçalves, in macos, you can do it by executing the following command. 54 | 55 | ```shell 56 | brew instal pandoc 57 | ``` 58 | 59 | # The Initial Configuration 60 | 61 | The initial configuration is specified in an YAML file with the following fields: 62 | 63 | - `title` is a suggestion for the title or the final title to use (if `suggest_title` is false). 64 | - `suggest_title` turns on recommendation for titles based on your original title. 65 | - `generate_citations`: if true, it will generate citations and references. 66 | - `type_of_document`: helps the Kiroku define whether it should use more technical terms, or whether we are trying to write children's stories. 67 | - `area_of_paper`: together with `hypothesis`, it helps Kiroku to understand what to write. 68 | - `section_names`: list of sections, as in the example below: 69 | ```markdown 70 | section_names: 71 | - Introduction 72 | - Related Work 73 | - Architecture of Kiroku 74 | - Results 75 | - Conclusions 76 | - References 77 | ``` 78 | - `number_of_paragraphs`: instructs Kiroku to write that many paragraphs per section. 79 | ```markdown 80 | number_of_paragraphs: 81 | "Introduction": 4 82 | "Related Work": 7 83 | "Architecture of Kiroku": 4 84 | "Results": 4 85 | "Conclusions": 3 86 | -"References": 0 87 | ``` 88 | - `hypothesis` tells Kiroku whether you want to establish something to be good or bad, and it will define the message. 89 | - `instructions`: as you interact with the document giving instructions like "First paragraph of Introduction should 90 | discuss the revolution that was created with the lauch of ChaGPT", you may want to add some of them to the instruction so that 91 | in the next iteration, Kiroku will observe your recommendations. In Kiroku, `instructions` are appended into the `hypothesis` at 92 | this time, so you will not see them. I usually put `\n\n` after each instruction to assist the underlying LLM. 93 | - `results`: I usually put it here as I fill this later on. 94 | - `references` are references you want Kiroku to use during its search phase for information. 95 | - `number_of_queries` tells Kiroku how many questions it will generate to Tavily to search for information. 96 | - `max_revisions` tells Kiroku how many times it performs reflection and document writing upon analyzing reflection results 97 | (please note that setting this document to `1`, it means no revision). 98 | - `temperature` is the temperature of the LLM (usually I set it to a small number). 99 | 100 | The final YAML is given below: 101 | 102 | ```yaml 103 | title: "Writing Masterpieces when You Become the Adivisor" 104 | suggest_title: True 105 | generate_citations: True 106 | type_of_document: "research seminal paper" 107 | area_of_paper: "AI and Computer Science" 108 | section_names: 109 | - Introduction 110 | - Related Work 111 | - Architecture 112 | - Results 113 | - Conclusions 114 | - References 115 | number_of_paragraphs: 116 | "Introduction": 4 117 | "Related Work": 7 118 | "Architecture": 4 119 | "Results": 4 120 | "Conclusions": 3 121 | "References": 0 122 | hypothesis: " 123 | We want to show in this paper that we turn paper writers into 'advisors' 124 | and a multi-agent system into a 'advisee' who will observe the instructions by, 125 | interactively turning a course draft of a paper into a publication ready 126 | document. 127 | " 128 | instructions: " 129 | For the following instructions, you should use your own words. 130 | \n\n 131 | The section 'Introduction', you should focus on: 132 | \n 133 | - In the first paragraph, you should discuss that the world has change 134 | since the release of ChatGPT. 135 | \n 136 | In the section 'Architecture', you should show the picture 137 | '/file=images/multi-agent.jpeg' to discuss we write a paper by defining a 138 | title and hypothesis, writing topic sentences, expanding topic sentences into 139 | paragraphs, writing the paragraphs, and finally reviewing what you have written. 140 | " 141 | results: " 142 | This is an example on how you can put a results table. 143 |
146 | | Normal Text Rate | 147 |Kiroku Rate | 148 |
Experiment 1 | 151 |3 | 152 |9 | 153 |
Experiment 2 | 156 |5 | 157 |10 | 158 |
1. https://www.youtube.com/watch?v=om7VpIK90vE
200 | 201 |2. Harrison Chase, Rotem Weiss. AI Agents in LangGraph. https://www.deeplearning.ai/short-courses/ai-agents-in-langgraph
202 | 203 | # Authors 204 | 205 | Claudionor N. Coelho Jr (https://www.linkedin.com/in/claudionor-coelho-jr-b156b01/) 206 | 207 | Fabricio Ceolin (https://br.linkedin.com/in/fabceolin) 208 | 209 | Luiza N. Coelho (https://www.linkedin.com/in/luiza-coelho-08499112a/) 210 | (looking for a summer internship for summer of 2025 in Business Development, Economics, Marketing) 211 | 212 | 213 | 214 | -------------------------------------------------------------------------------- /agents/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Claudionor Coelho Jr, Fabrício José Vieira Ceolin, Luiza Nacif Coelho 2 | 3 | from .gen_citations import * 4 | from .prompts import * 5 | from .pubmed import * 6 | from .search import * 7 | from .states import * 8 | from .tools import * 9 | from .utils import * 10 | 11 | -------------------------------------------------------------------------------- /agents/gen_citations.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Claudionor Coelho Jr, Fabrício José Vieira Ceolin, Luiza Nacif Coelho 2 | 3 | from nltk import sent_tokenize 4 | import numpy as np 5 | import re 6 | from langchain_openai import OpenAIEmbeddings 7 | 8 | def get_sentences(paper): 9 | """ 10 | Get the list of sentences for the paper. 11 | :param paper: paper without conclusions and references. 12 | :return: list of sentences. 13 | """ 14 | search = re.search(r"## Abstract[^#]*", paper) 15 | if search: 16 | l, r = search.span() 17 | paper = paper[:l] + paper[r-1:] 18 | paragraphs = "\n".join( 19 | [p for p in paper.split("\n") if p and not (p[0] == '#' or p[:2] == '![')]) 20 | sentences = [s.split('\n')[-1] for s in sent_tokenize(paragraphs)] 21 | 22 | return sentences 23 | 24 | def get_references(references): 25 | """ 26 | Get the references and generate an unnumbered list. 27 | :param references: 28 | :return: list of references. 29 | """ 30 | ref_list = references.split('\n') 31 | ref_list = [('.'.join(r.split('.')[1:])).strip() 32 | for r in ref_list if r.strip()] 33 | return ref_list 34 | 35 | def reorder_references(reference_index, references): 36 | new_references = [] 37 | for key in sorted(reference_index.keys()): 38 | for i in range(len(reference_index[key])): 39 | j = len(new_references)+1 40 | reference_index[key][i], j = j, reference_index[key][i] 41 | new_references.append(references[j-1]) 42 | return new_references 43 | 44 | def insert_references(draft): 45 | """ 46 | Insert references into sentences by computing the best match for the embeddings. 47 | :param draft: paper draft 48 | :return: new paper version. 49 | """ 50 | draft = draft.strip() 51 | search = re.search(r"## References", draft) 52 | if search: 53 | # split paper into paper up to reference and references 54 | l, r = search.span() 55 | paper = draft[:l] 56 | references = draft[r:] 57 | 58 | # remove Conclusions if they exist as we do not want to put 59 | # references in conclusions. 60 | search = re.search(r"## Conclusions\n\n", paper) 61 | if search: 62 | l, r = search.span() 63 | paper_no_conclusions = paper[:r] 64 | else: 65 | paper_no_conclusions = paper 66 | 67 | # get list of sentences and references 68 | sentences = get_sentences(paper_no_conclusions) 69 | references = get_references(references) 70 | 71 | # compute the embeddings for list of sentences and list of 72 | # references 73 | embeddings = OpenAIEmbeddings(model="text-embedding-3-large") 74 | emb_sents = np.array(embeddings.embed_documents(sentences)) 75 | emb_refs = np.array(embeddings.embed_documents(references)) 76 | 77 | # insertion point is the argmax for each sentence. This is an 78 | # approximation to the problem. 79 | similarities = np.dot(emb_sents, emb_refs.T) 80 | citation_inserts = np.argmax(similarities, axis=0) 81 | 82 | # merge multiple citations to the correct insertion 83 | # point. 84 | citations = {} 85 | for i in range(len(citation_inserts)): 86 | s = citation_inserts[i] 87 | if s in citations: 88 | citations[s].append(i+1) 89 | else: 90 | citations[s] = [i+1] 91 | 92 | references = reorder_references(citations, references) 93 | 94 | # generate citations to references 95 | paper = paper.strip() 96 | for s in citations: 97 | l = paper.find(sentences[s]) 98 | r = l + len(sentences[s]) 99 | cit = ",".join([ 100 | f'{i}' 101 | for i in citations[s]]) 102 | paper = paper[:r-1] + f" [{cit}]" + paper[r-1:] 103 | 104 | # generated clickable references 105 | references = [f'{i+1}. {r.strip()}
' 106 | for i, r in enumerate(references)] 107 | 108 | # create new draft of the paper. 109 | draft = ( 110 | paper.strip() + 111 | "\n\n## References\n\n" + 112 | "\n\n".join(references) 113 | ) 114 | 115 | return draft 116 | 117 | -------------------------------------------------------------------------------- /agents/prompts.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Claudionor Coelho Jr, Fabrício José Vieira Ceolin, Luiza Nacif Coelho 2 | 3 | TITLE_PROMPT = ( 4 | "You are a helpful AI assistant expert in {area_of_paper}. " 5 | "The title of the paper is '{title}'. " 6 | "The hypothesis of the paper is '{hypothesis}'. " 7 | "The user will give you the title of the paper or document the user " 8 | "is writing, and you will recommend possible titles, following user's " 9 | "instructions. You will be truthful to the instructions explained and " 10 | "use your knowledge recommend a good title. You should only answer what " 11 | "you have been asked and nothing more. If user does not request a clarification " 12 | "comment, you shold not add it. You should not suggest follow up questions, unless " 13 | "they are requested by the user. " 14 | ) 15 | 16 | TOPIC_SENTENCE_PROMPT = ( 17 | "You are an expert writer tasked with writing a high-level outline of a technical " 18 | "report. Write such an outline for the user-provided topic and follow the " 19 | "user-provided instructions. Give an outline of the report along with any " 20 | "relevant notes or instructions for the sections. You must follow any instructions " 21 | "provided by the user. Each section of the report should contain 3 to 4 paragraphs, " 22 | "unless instructed by the user. At the planning stage, you should provide for each " 23 | "paragraph only the topic sentences to be filled later by the content. " 24 | "Each entry on the numbered list of topic sentences should only contain the sentence " 25 | "and nothing more. If there are pictures, you MUST add them. The added pictures MUST " 26 | "be between topic sentences, with '\n' between any topic sentences. You SHOULD NOT add " 27 | "pictures unless instructed by the user. The picture definition in Markdown should not be " 28 | "changed. \n If there are tables in the instructions, you MUST follow the instructions on how " 29 | "to add the tables. Tables should be spaced by '\n' before and after topic sentences. \n" 30 | "Do NOT add any references in the 'References section if a 'References' section exist. " 31 | "You MUST return the output in Markdown format without any other text. " 32 | ) 33 | 34 | TOPIC_SENTENCE_REVIEW_PROMPT = ( 35 | "You are an expert writer who receives an article plan and an instruction " 36 | "from the user, and changes the article plan accordingly according to the " 37 | "instructions. The output should only contain the revised plan according to the " 38 | "instruction, and nothing more. The review should return a new plan in plain " 39 | "Markdown format. You should return a new version following all previous " 40 | "instructions. " 41 | ) 42 | 43 | PAPER_WRITER_PROMPT = ( 44 | "You are an AI assistant tasked with writing excellent technical documents.\n" 45 | "Generate the best document possible for the user's request and the initial " 46 | "outline.\n" 47 | "If the user provides critique, respond by revising the technical document according to the " 48 | "suggestions. \n" 49 | "You should expand each topic sentence by a paragraph. \n" 50 | "Each paragraph MUST have at least {sentences_per_paragraph} sentences. \n" 51 | "You SHOULD NOT write a paragraph with less than {sentences_per_paragraph} sentences. \n" 52 | "Each paragraph MUST start with the corresponding topic sentence. \n" 53 | "You must write a document that is publication ready. \n" 54 | "You must be truthful to the information provided. \n" 55 | "Utilize all the information below as needed. \n\n" 56 | "If there are pictures, you MUST add them. The added pictures MUST " 57 | "be between paragraphs, with '\n' between any paragraphs. You SHOULD NOT add " 58 | "pictures unless instructed by the user. \n\n" 59 | "If there are tables in the instructions, you MUST follow the instructions on how " 60 | "to add the tables. Tables should be spaced by '\n' before and after paragraphs. \n\n" 61 | "You MUST NOT add references to the References section. \n" 62 | "\n" 63 | "Output should only contain a document in Markdown format. \n" 64 | "\n" 65 | "------\n" 66 | "\n" 67 | "Task:\n\n" 68 | "{task}\n" 69 | "------\n" 70 | "\n" 71 | "Content:\n\n" 72 | "{content}\n" 73 | "------\n" 74 | "Previous User's Instructions that Should be Obeyed:\n\n" 75 | "{review_instructions}\n" 76 | "------\n" 77 | "Previous Critiques:\n\n" 78 | "{critique}\n" 79 | ) 80 | 81 | REFERENCES_PROMPT = ( 82 | "You are an AI assistant tasked with writing references for technical documents.\n" 83 | "Generate the best references possible based on the instructions below.\n" 84 | "You MUST create reference entries based on all 'Content' " 85 | "entries provided. Entries SHOULD NOT be duplicated. \n" 86 | "Reference entries MUST be a numbered list of reference item. \n\n " 87 | "Each reference item MUST be in the format author list (if one exists), title, where " 88 | "the publication was made, publication date, and http link. Except for the " 89 | "title, all other information may be optional. Each author MUST be in the format: last " 90 | "name, comma, first letter of the remaining names followed by a '.'. For example, the " 91 | "following author name is a VALID author name: 'Hubert, K.F.'. \n" 92 | "Each reference item must be in a single line. \n" 93 | "An example of a valid reference is the following: 'Hubert, K.F., Awa, K.N., Zabelina, " 94 | "D.L. The current state of artificial intelligence generative language models is more " 95 | "creative than humans on divergent thinking tasks. Sci Rep 14, 3440 (2024). " 96 | "https://doi.org/10.1038/s41598-024-53303-w' \n" 97 | "In this example, 'The current state of artificial intelligence generative " 98 | "language models is more creative than humans on divergent thinking tasks' " 99 | "is the title of the reference.\n" 100 | "Another example of a valid reference the following: 'The Need For AI-Powered " 101 | "Cybersecurity to Tackle AI-Driven Cyberattacks, https://www.isaca.org/" 102 | "resources/news-and-trends/isaca-now-blog/2024/the-need-for-ai-powered-" 103 | "cybersecurity-to-tackle-ai-driven-cyberattacks' \n" 104 | "In this example, there are no authors to the paper, and the title is " 105 | "'The Need For AI-Powered Cybersecurity to Tackle AI-Driven \n\n" 106 | "Output must be in markdown format." 107 | ) 108 | 109 | WRITER_REVIEW_PROMPT = ( 110 | "You are an expert writer who receives an article and an instruction " 111 | "from the user, and changes the article plan accordingly according to the " 112 | "instructions. The output should only contain the revised article according " 113 | "to the instruction, and nothing more. The review should return a new " 114 | "article in plain Markdown format. " 115 | ) 116 | 117 | REFLECTION_REVIEWER_PROMPT = """ 118 | You are a PhD advisor evaluating a technical document submission. \ 119 | This paper was submitted to an academic journal. You are a critical \ 120 | reviewer offering detailed and helpful feedback to help the authors. \ 121 | First, read through the paper and list the major issues you see that \ 122 | need help. Then rate it on the following scale: clarity, conciseness, 123 | depth. \ 124 | Figures and tables should not be changed. \ 125 | Your review should not contradict the previous manual review steps. \ 126 | Your review should not contradict the hypothesis of the paper. \ 127 | You MUST NEVER remove references added previously. 128 | 129 | Hypothesis of the paper: 130 | 131 | {hypothesis} 132 | 133 | Previous User's Instructions that Should be Obeyed: 134 | 135 | {review_instructions} 136 | """ 137 | 138 | INTERNET_SEARCH_PROMPT = """ 139 | You are in the internet_search phase. 140 | You are an AI researcher charged with providing information that can be used \ 141 | when writing the following essay. Generate a list of search queries that will \ 142 | gather any relevant information. Only generate {number_of_queries} queries max.""" 143 | 144 | RESEARCH_CRITIQUE_PROMPT = """ 145 | You are a researcher charged with providing information that can \ 146 | be used when making any requested revisions (as outlined below). \ 147 | Generate a list of search queries that will gather any relevant information. \ 148 | You should use the critique to generate the queries, but focusing on the \ 149 | hypothesis and draft only. Only generate 3 queries max. 150 | """ 151 | 152 | ABSTRACT_WRITER_PROMPT = """ 153 | You are an AI assistant that analyzes a text and writes an 'Abstract' section \ 154 | with 200 words or less after the title and before the first section of the paper. \ 155 | The abstract section begins with the section 'Abstract'. You must only return the final \ 156 | document in Markdown format. 157 | """ 158 | 159 | TASK_TEMPLATE = """ 160 | You will write a {type_of_document} in the area of {area_of_paper}. 161 | The document will have the title '{title}'. 162 | Your report should contain the sections {sections}. 163 | {instruction} 164 | 165 | Hypothesis: 166 | 167 | {hypothesis} 168 | 169 | Results: 170 | 171 | {results} 172 | 173 | References: 174 | 175 | {references} 176 | 177 | Your output should be in Markdown format. 178 | """ 179 | 180 | 181 | -------------------------------------------------------------------------------- /agents/pubmed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Claudionor Coelho Jr, Fabrício José Vieira Ceolin, Luiza Nacif Coelho 2 | 3 | import json 4 | import logging 5 | import time 6 | import urllib.error 7 | import urllib.parse 8 | import urllib.request 9 | from typing import Any, Dict, Iterator, List 10 | 11 | from langchain_core.documents import Document 12 | from pydantic import BaseModel, model_validator 13 | 14 | class PubMedAPIWrapper(BaseModel): 15 | """ 16 | Wrapper around PubMed API. 17 | 18 | This wrapper will use the PubMed API to conduct searches and fetch 19 | document summaries. By default, it will return the document summaries 20 | of the top-k results of an input search. 21 | 22 | Parameters: 23 | top_k_results: number of the top-scored document used for the PubMed tool 24 | MAX_QUERY_LENGTH: maximum length of the query. 25 | Default is 300 characters. 26 | doc_content_chars_max: maximum length of the document content. 27 | Content will be truncated if it exceeds this length. 28 | Default is 2000 characters. 29 | max_retry: maximum number of retries for a request. Default is 5. 30 | sleep_time: time to wait between retries. 31 | Default is 0.2 seconds. 32 | email: email address to be used for the PubMed API. 33 | """ 34 | 35 | parse: Any #: :meta private: 36 | 37 | base_url_esearch: str = ( 38 | "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?" 39 | ) 40 | base_url_efetch: str = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" 41 | max_retry: int = 5 42 | sleep_time: float = 0.2 43 | 44 | # Default values for the parameters 45 | top_k_results: int = 3 46 | MAX_QUERY_LENGTH: int = 300 47 | doc_content_chars_max: int = 2000 48 | email: str = "your_email@example.com" 49 | 50 | @model_validator(mode="before") 51 | @classmethod 52 | def validate_environment(cls, values: Dict) -> Dict: 53 | """Validate that the python package exists in environment.""" 54 | try: 55 | import xmltodict 56 | 57 | values["parse"] = xmltodict.parse 58 | except ImportError: 59 | raise ImportError( 60 | "Could not import xmltodict python package. " 61 | "Please install it with `pip install xmltodict`." 62 | ) 63 | return values 64 | 65 | def run(self, query: str) -> str: 66 | """ 67 | Run PubMed search and get the article meta information. 68 | See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch 69 | It uses only the most informative fields of article meta information. 70 | """ 71 | try: 72 | # Retrieve the top-k results for the query 73 | docs = [ 74 | ", ".join([ 75 | f"Published: {result['Published']}", 76 | f"Title: '{result['Title']}'", 77 | f"Authors: '" f"{', '.join(result['Authors'])}" "'", 78 | f"Summary: '{result['Summary']}'" 79 | ]) 80 | for result in self.load(query[: self.MAX_QUERY_LENGTH]) 81 | ] 82 | 83 | # Join the results and limit the character count 84 | return docs 85 | except Exception as ex: 86 | return "" 87 | 88 | def lazy_load(self, query: str) -> Iterator[dict]: 89 | """ 90 | Search PubMed for documents matching the query. 91 | Return an iterator of dictionaries containing the document metadata. 92 | """ 93 | 94 | url = ( 95 | self.base_url_esearch 96 | + "db=pubmed&term=" 97 | + str({urllib.parse.quote(query)}) 98 | + f"&retmode=json&retmax={self.top_k_results}&usehistory=y" 99 | ) 100 | result = urllib.request.urlopen(url) 101 | text = result.read().decode("utf-8") 102 | json_text = json.loads(text) 103 | 104 | webenv = json_text["esearchresult"]["webenv"] 105 | for uid in json_text["esearchresult"]["idlist"]: 106 | yield self.retrieve_article(uid, webenv) 107 | 108 | def load(self, query: str) -> List[dict]: 109 | """ 110 | Search PubMed for documents matching the query. 111 | Return a list of dictionaries containing the document metadata. 112 | """ 113 | return list(self.lazy_load(query)) 114 | 115 | def _dict2document(self, doc: dict) -> Document: 116 | summary = doc.pop("Summary") 117 | return Document(page_content=summary, metadata=doc) 118 | 119 | def lazy_load_docs(self, query: str) -> Iterator[Document]: 120 | for d in self.lazy_load(query=query): 121 | yield self._dict2document(d) 122 | 123 | def load_docs(self, query: str) -> List[Document]: 124 | return list(self.lazy_load_docs(query=query)) 125 | 126 | def retrieve_article(self, uid: str, webenv: str) -> dict: 127 | url = ( 128 | self.base_url_efetch 129 | + "db=pubmed&retmode=xml&id=" 130 | + uid 131 | + "&webenv=" 132 | + webenv 133 | ) 134 | 135 | retry = 0 136 | while True: 137 | try: 138 | result = urllib.request.urlopen(url) 139 | break 140 | except urllib.error.HTTPError as e: 141 | if e.code == 429 and retry < self.max_retry: 142 | # Too Many Requests errors 143 | # wait for an exponentially increasing amount of time 144 | print( # noqa: T201 145 | f"Too Many Requests, " 146 | f"waiting for {self.sleep_time:.2f} seconds..." 147 | ) 148 | time.sleep(self.sleep_time) 149 | self.sleep_time *= 2 150 | retry += 1 151 | else: 152 | raise e 153 | 154 | xml_text = result.read().decode("utf-8") 155 | text_dict = self.parse(xml_text) 156 | return self._parse_article(uid, text_dict) 157 | 158 | def _parse_article(self, uid: str, text_dict: dict) -> dict: 159 | try: 160 | ar = text_dict["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"][ 161 | "Article" 162 | ] 163 | except KeyError: 164 | ar = text_dict["PubmedArticleSet"]["PubmedBookArticle"]["BookDocument"] 165 | authors = [ 166 | author.get("ForeName", "") + " " + author.get("LastName", "") 167 | for author in ar.get("AuthorList", {}).get("Author", []) 168 | ] 169 | abstract_text = ar.get("Abstract", {}).get("AbstractText", []) 170 | summaries = [ 171 | f"{txt['@Label']}: {txt['#text']}" 172 | for txt in abstract_text 173 | if "#text" in txt and "@Label" in txt 174 | ] 175 | summary = ( 176 | "\n".join(summaries) 177 | if summaries 178 | else ( 179 | abstract_text 180 | if isinstance(abstract_text, str) 181 | else ( 182 | "\n".join(str(value) for value in abstract_text.values()) 183 | if isinstance(abstract_text, dict) 184 | else "No abstract available" 185 | ) 186 | ) 187 | ) 188 | a_d = ar.get("ArticleDate", {}) 189 | pub_date = "-".join( 190 | [a_d.get("Year", ""), a_d.get("Month", ""), a_d.get("Day", "")] 191 | ) 192 | title = ar.get("ArticleTitle", {}) 193 | if isinstance(title, dict): 194 | title = title.get("#text", "") 195 | return { 196 | "uid": uid, 197 | "Title": title, 198 | "Published": pub_date, 199 | "Authors": authors, 200 | "Summary": summary, 201 | } 202 | -------------------------------------------------------------------------------- /agents/search.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Claudionor Coelho Jr, Fabrício José Vieira Ceolin, Luiza Nacif Coelho 2 | 3 | import logging 4 | 5 | from .tools import tools, tavily 6 | 7 | def get_additional_info(link): 8 | arxiv = tools["arxiv"] 9 | pubmed = tools["pub_med"] 10 | doc = "" 11 | if "https://arxiv.org/" in link: 12 | arxiv_entry = link.split("/")[-1].strip() 13 | doc = arxiv.run(arxiv_entry) 14 | elif "pubmed" in link: 15 | doc = pubmed.run(link)[0] 16 | if doc: 17 | doc = ", " + doc 18 | return doc 19 | 20 | def search_query_ideas(query_ideas, cache, max_results=3, search_engine="tavily"): 21 | """ 22 | Searches the web based on query ideas, and expand search in some cases. 23 | :param query_ideas: List of query ideas to search on the web. 24 | :param cache: Cached list of links so that we do not duplicate. 25 | :param max_results: Maximum number of results to search. 26 | :param search_engine: "tavily", others will be implemented later 27 | :return: List of searched information to be used. 28 | """ 29 | content = [] 30 | 31 | if search_engine == "tavily": 32 | search = tavily 33 | else: 34 | raise "Invalid search engine" 35 | 36 | for iter, q in enumerate(query_ideas["queries"]): 37 | if not q: continue 38 | logging.warning(f"search for query '{q}'") 39 | if search_engine == "tavily": 40 | response = search.search(q, max_results=max_results) 41 | for result in response["results"]: 42 | text = ( 43 | f"title: {result['title']}, " 44 | f"link: {result['url']}, " 45 | f"content: {result['content']}" 46 | ) 47 | link = result['url'] 48 | if link[-1] == '/': link = link[:-1] 49 | title = result['title'] 50 | if link in cache or title in cache: 51 | continue 52 | logging.warning(f" {link}") 53 | try: 54 | info = get_additional_info(link) 55 | except: 56 | # an error happened. ignore 57 | logging.warning(f" {link} error") 58 | info = "" 59 | text = text + info 60 | cache.add(link) 61 | cache.add(title) 62 | content.append(text) 63 | return content, cache 64 | -------------------------------------------------------------------------------- /agents/states.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Claudionor Coelho Jr, Fabrício José Vieira Ceolin, Luiza Nacif Coelho 2 | 3 | import json 4 | import re 5 | from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage, ChatMessage 6 | from pydantic import BaseModel, Field 7 | from typing import TypedDict, List, Set 8 | 9 | from .gen_citations import insert_references 10 | from .prompts import ( 11 | TITLE_PROMPT, 12 | TASK_TEMPLATE, 13 | TOPIC_SENTENCE_PROMPT, 14 | TOPIC_SENTENCE_REVIEW_PROMPT, 15 | INTERNET_SEARCH_PROMPT, 16 | PAPER_WRITER_PROMPT, 17 | WRITER_REVIEW_PROMPT, 18 | REFERENCES_PROMPT, 19 | REFLECTION_REVIEWER_PROMPT, 20 | RESEARCH_CRITIQUE_PROMPT, 21 | ABSTRACT_WRITER_PROMPT 22 | ) 23 | from .search import * 24 | 25 | class AgentState(TypedDict): 26 | state: str 27 | 28 | title: str 29 | messages: str 30 | hypothesis: str 31 | area_of_paper: str 32 | type_of_document: str 33 | section_names: str 34 | number_of_paragraphs: str 35 | results: str 36 | references: list[str] 37 | 38 | # these are instructions that we save for the topic sentences 39 | # and paper writing 40 | review_topic_sentences: list[str] 41 | review_instructions: list[str] 42 | 43 | task: str 44 | plan: str 45 | draft: str 46 | critique: str 47 | cache: set[str] 48 | content: list[str] 49 | revision_number: int 50 | number_of_queries: int 51 | max_revisions: int 52 | sentences_per_paragraph: int 53 | 54 | 55 | class State: 56 | def __init__(self, model, name): 57 | self.model = model 58 | self.name = name 59 | 60 | class SuggestTitle(State): 61 | def __init__(self, model): 62 | super().__init__(model, "suggest_title") 63 | 64 | def run(self, state: AgentState, config: dict) -> dict: 65 | ''' Node of graph that suggests a title for the paper. 66 | 67 | :param state: state of the agent. 68 | :return: fields 'title', 'draft' and 'messages' updated for the paper. 69 | ''' 70 | messages = state["messages"] 71 | if not messages: 72 | title = state["title"] 73 | area_of_paper = state["area_of_paper"] 74 | hypothesis = state["hypothesis"] 75 | 76 | messages = [ 77 | SystemMessage(content=TITLE_PROMPT.format( 78 | area_of_paper=area_of_paper, 79 | title=title, 80 | hypothesis=hypothesis 81 | )), 82 | HumanMessage(content=( 83 | "Write the original title first. Then," 84 | "generate 10 thought provoking titles that " 85 | "instigates reader's curiosity based on the given information" 86 | )) 87 | ] 88 | response = self.model.invoke(messages) 89 | messages.append(response) 90 | return { 91 | "state": self.name, 92 | "title": response.content, 93 | "messages": messages, 94 | "draft": response.content 95 | } 96 | 97 | 98 | class SuggestTitleReview(State): 99 | def __init__(self, model): 100 | super().__init__(model, "suggest_title_review") 101 | 102 | def run(self, state: AgentState, config: dict) -> dict: 103 | ''' Node of graph that suggests a title for the paper. 104 | 105 | :param state: state of the agent. 106 | :return: fields 'title', 'draft' and 'messages' updated for the paper. 107 | ''' 108 | messages = state["messages"] 109 | instruction = config["configurable"]["instruction"] 110 | if not instruction: 111 | human_message = HumanMessage( 112 | content="Just return the final title without any additional information") 113 | else: 114 | human_message = HumanMessage(content=instruction) 115 | messages.append(human_message) 116 | response = self.model.invoke(messages) 117 | messages.append(response) 118 | if not instruction: 119 | messages = [] 120 | title = response.content 121 | return { 122 | "state": self.name, 123 | "title": title, 124 | "messages": messages, 125 | "draft": response.content 126 | } 127 | 128 | 129 | class InternetSearch(State): 130 | def __init__(self, model): 131 | super().__init__(model, "internet_search") 132 | 133 | def create_task( 134 | self, 135 | title, 136 | hypothesis, 137 | area_of_paper, 138 | type_of_document, 139 | section_names, 140 | number_of_paragraphs, 141 | results, 142 | references): 143 | ''' 144 | Creates a writing task that will be executed by the agent. 145 | 146 | :param title: Title of the paper. 147 | :param hypothesis: Hypothesis of the paper, like "want to check if sky is blue". 148 | :param area_of_paper: Describes general field of knowledge of the paper. 149 | :param type_of_document: If document is a research paper, technical briefing, etc. 150 | :param section_names: List of sections for paper. 151 | :param number_of_paragraphs: List or Dict of number of paragraphs for each section. 152 | :param results: Results to be presented, if any. 153 | :param references: List of references to be used, if any. 154 | :return: prompt containing task to be executed. 155 | ''' 156 | if not hypothesis: 157 | hypothesis = "No paper hypothesis given." 158 | if not results: 159 | results = "No paper results given." 160 | if not references: 161 | references = ( 162 | "No paper references given. " 163 | "Use 'research_plan' phase to get the references." 164 | ) 165 | 166 | # number of sections should be equal to number of paragraphs per section. 167 | assert len(section_names) == len(number_of_paragraphs) 168 | if isinstance(number_of_paragraphs, dict): 169 | number_of_paragraphs = [ 170 | number_of_paragraphs[section] 171 | for section in section_names 172 | ] 173 | sections = ( 174 | ', '.join([f"'{section}'" for section in section_names[:-1]]) + 175 | f" and '{section_names[-1]}'" 176 | ) 177 | instruction = ' '.join([ 178 | f"Section '{section}' will have {no_of_sentences} paragraphs." 179 | if no_of_sentences > 0 else 180 | ( 181 | f"Section '{section}' will have no paragraphs, " 182 | "as it will be filled later." 183 | ) 184 | for (section, no_of_sentences) in zip( 185 | section_names, number_of_paragraphs) 186 | ]) 187 | task = TASK_TEMPLATE.format( 188 | title=title, 189 | type_of_document=type_of_document, 190 | area_of_paper=area_of_paper, 191 | sections=sections, 192 | instruction=instruction, 193 | hypothesis=hypothesis, 194 | results=results, 195 | references="\n".join(references)) 196 | return task 197 | 198 | def run(self, state: AgentState): 199 | ''' 200 | Performs a search on the internet on the topic of the paper. 201 | 202 | :param state: current state of the agent. 203 | :return: field 'content' added to state. 204 | ''' 205 | queries = { "queries": [] } 206 | task = self.create_task( 207 | title=state["title"], 208 | hypothesis=state["hypothesis"], 209 | area_of_paper=state["area_of_paper"], 210 | type_of_document=state["type_of_document"], 211 | section_names=state["section_names"], 212 | number_of_paragraphs=state["number_of_paragraphs"], 213 | results=state["results"], 214 | references=state["references"] 215 | ) 216 | for _ in range(3): # three attempts 217 | result = self.model.invoke([ 218 | SystemMessage( 219 | content=( 220 | INTERNET_SEARCH_PROMPT.format( 221 | number_of_queries=state['number_of_queries']) + 222 | " You must only output the response in a plain list of queries " 223 | "in the JSON format '{ \"queries\": list[str] }' and no other text. " 224 | "You MUST only cite references that are in the references " 225 | "section. " 226 | )), 227 | HumanMessage(content=task) 228 | ]).content 229 | # we need to add this because sometimes the LLM decides to put a header 230 | # in the json file. 231 | if result[:7] == "```json": 232 | result = result.split('\n') 233 | result = '\n'.join(result[1:-1]) 234 | content = state.get('content', []) 235 | try: 236 | queries = json.loads(result) 237 | break 238 | except: 239 | logging.warning(f"state {self.name}: could not extract query {result}.") 240 | # finally, add to the queries all references that have http 241 | for ref in state["references"]: 242 | search_match = re.search(r"http.*(\s|$)", ref) 243 | if search_match: 244 | l, r = search_match.span() 245 | http_ref = ref[l:r] 246 | queries["queries"].insert(0, http_ref) 247 | if queries["queries"]: 248 | search, cache = search_query_ideas( 249 | query_ideas=queries, cache=set()) 250 | content = content + search 251 | else: 252 | cache = set() 253 | return { 254 | "state": self.name, 255 | "content": content, 256 | "cache": cache, 257 | "task": task, 258 | "messages": [], 259 | "references": [] 260 | } 261 | 262 | 263 | class TopicSentenceWriter(State): 264 | def __init__(self, model): 265 | super().__init__(model, "topic_sentence_writer") 266 | 267 | def run(self, state: AgentState): 268 | ''' 269 | Creates a bullet list plan for the paper with topic sentences. 270 | 271 | :param state: current state of the agent. 272 | :return: field 'plan' added to the state. 273 | ''' 274 | task = state["task"] 275 | content = '\n\n'.join(state["content"]) 276 | messages = state["messages"] 277 | if not messages: 278 | messages = [ 279 | SystemMessage(content=TOPIC_SENTENCE_PROMPT) 280 | ] 281 | messages.append( 282 | HumanMessage(content=( 283 | f"This is the content of a search on the internet for the paper:\n\n" 284 | f"{content}\n\n" 285 | f"{task}")) 286 | ) 287 | response = self.model.invoke(messages) 288 | plan = response.content.strip() 289 | if "```markdown" in plan: 290 | plan = "\n".join(plan.split("\n")[1:-1]) 291 | # sometimes, the LLM just decide it will not obey the instructions to not 292 | # add references. So, we will remove them here. 293 | plan = plan.strip() 294 | search = re.search(r"## References", plan) 295 | if search: 296 | _, r = search.span() 297 | plan = plan[:r] 298 | messages.append(AIMessage(content=plan)) 299 | return { 300 | "state": self.name, 301 | "plan": plan, 302 | "draft": plan, 303 | "messages": messages 304 | } 305 | 306 | class TopicSentenceManualReview(State): 307 | def __init__(self, model): 308 | super().__init__(model, "topic_sentence_manual_review") 309 | 310 | def run(self, state: AgentState, config: dict) -> dict: 311 | ''' 312 | Performs a manual review of the plan stage. 313 | 314 | :param state: current state of agent. 315 | :return: fields 'instruction' and 'plan' added to state. 316 | ''' 317 | review_topic_sentences = state.get("review_topic_sentences", []) 318 | messages = state["messages"] 319 | instruction = config["configurable"]["instruction"] 320 | plan = state["plan"] 321 | if instruction: 322 | review_topic_sentences.append(instruction) 323 | messages.extend([ 324 | HumanMessage( 325 | content=( 326 | TOPIC_SENTENCE_REVIEW_PROMPT + 327 | "\n\n" 328 | f"Here is my task:\n\n{state['task']}\n\n" 329 | f"Here is my plan:\n\n{state['plan']}\n\n" 330 | f"Here is my instruction:\n\n{instruction}\n\n" 331 | "Only return the Markdown for the new plan as output. " 332 | ) 333 | 334 | ) 335 | ]) 336 | response = self.model.invoke(messages) 337 | plan = response.content.strip() 338 | if "```markdown" in plan: 339 | plan = "\n".join(plan.split("\n")[1:-1]) 340 | # sometimes, the LLM just decide it will not obey the instructions to not 341 | # add references. So, we will remove them here. 342 | plan = plan.strip() 343 | search = re.search(r"## References", plan) 344 | if search: 345 | _, r = search.span() 346 | plan = plan[:r] 347 | messages.append(AIMessage(content=plan)) 348 | return { 349 | "state": self.name, 350 | "review_topic_sentences": review_topic_sentences, 351 | "plan": plan, 352 | "draft": plan, 353 | "messages": messages 354 | } 355 | 356 | 357 | class PaperWriter(State): 358 | def __init__(self, model): 359 | super().__init__(model, "paper_writer") 360 | 361 | def run(self, state: AgentState): 362 | ''' 363 | Generate the full draft of the paper based on the content, task and the plan. 364 | :param state: current state of the agent. 365 | :return: field 'draft' and 'revision_number' added to the paper. 366 | ''' 367 | content = "\n\n".join(state.get('content', [])) 368 | critique = state.get('critique', '') 369 | review_instructions = state.get("review_instructions", []) 370 | task = state["task"] 371 | sentences_per_paragraph = state["sentences_per_paragraph"] 372 | # if previous state is internet_search, draft are in the form of topic senteces 373 | if state["state"] == "internet_search": 374 | additional_info = " in terms of topic sentences" 375 | else: 376 | additional_info = "" 377 | human_content = ( 378 | "Generate a new draft of the document based on the " 379 | "information I gave you.\n\n" 380 | f"Here is my current draft{additional_info}:\n\n" 381 | f"{state['draft']}\n\n" 382 | ) 383 | messages = [ 384 | SystemMessage( 385 | content=PAPER_WRITER_PROMPT.format( 386 | task=task, 387 | content=content, 388 | review_instructions=review_instructions, 389 | critique=critique, 390 | sentences_per_paragraph=sentences_per_paragraph 391 | )), 392 | HumanMessage(content=human_content) 393 | ] 394 | response = self.model.invoke(messages) 395 | draft = response.content.strip() 396 | if "```markdown" in draft: 397 | draft = "\n".join(draft.split("\n")[1:-1]) 398 | draft = draft.strip() 399 | search = re.search(r"## References", draft) 400 | if search: 401 | _, r = search.span() 402 | draft = draft[:r] 403 | return { 404 | "state": self.name, 405 | "draft": draft, 406 | "revision_number": state.get("revision_number", 1) + 1 407 | } 408 | 409 | class WriterManualReviewer(State): 410 | def __init__(self, model): 411 | super().__init__(model, "writer_manual_reviewer") 412 | 413 | def run(self, state: AgentState, config: dict) -> dict: 414 | ''' 415 | Performs manual review of the generated paper. 416 | :param state: current state of the paper. 417 | :return: Reviewed 'draft' and add to list of instructions. 418 | ''' 419 | review_instructions = state.get("review_instructions", []) 420 | instruction = config["configurable"]["instruction"] 421 | draft = state["draft"] 422 | if instruction: 423 | review_instructions.append(instruction) 424 | joined_instructions = '\n'.join(review_instructions) 425 | messages = [ 426 | SystemMessage(content=WRITER_REVIEW_PROMPT), 427 | HumanMessage( 428 | content=( 429 | "Here is my task:\n\n" f"{state['task']}" "\n\n" 430 | "Here is my draft:\n\n" f"{state['draft']}" "\n\n" 431 | "Here is my instruction:\n\n" f"{instruction}" "\n\n" 432 | "Here is my previous instructions that you must " 433 | "observe:\n\n" f"{joined_instructions}" "\n\n" 434 | "Only change in the draft what the user has requested by " 435 | "the instruction.\n" 436 | "Only return the Markdown for the new plan as output. " 437 | ) 438 | 439 | ) 440 | ] 441 | response = self.model.invoke(messages) 442 | draft = response.content.strip() 443 | if "```markdown" in draft: 444 | draft = "\n".join(draft.split("\n")[1:-1]) 445 | search = re.search(r"## References", draft) 446 | if search: 447 | _, r = search.span() 448 | draft = draft[:r] 449 | return { 450 | "state": self.name, 451 | "review_instructions": review_instructions, 452 | "draft": draft 453 | } 454 | 455 | class ReflectionReviewer(State): 456 | def __init__(self, model): 457 | super().__init__(model, "reflection_reviewer") 458 | 459 | def run(self, state: AgentState) -> dict: 460 | ''' 461 | Performs reflection of the paper. 462 | :param state: current state of the agent. 463 | :return: 'critique' of the paper. 464 | ''' 465 | review_instructions = "\n".join(state.get("review_instructions", [])) 466 | messages = [ 467 | SystemMessage( 468 | content= 469 | REFLECTION_REVIEWER_PROMPT.format( 470 | hypothesis=state["hypothesis"], 471 | review_instructions=review_instructions)), 472 | HumanMessage(content=state['draft']) 473 | ] 474 | response = self.model.invoke(messages) 475 | return { 476 | "state": self.name, 477 | "critique": response.content 478 | } 479 | 480 | class ReflectionManualReview(State): 481 | def __init__(self, model): 482 | super().__init__(model, "additional_reflection_instructions") 483 | 484 | def run(self, state: AgentState, config: dict) -> dict: 485 | ''' 486 | adds additional manual reflection for the review of the paper. 487 | :param state: current state of the agent. 488 | :param config: configuration with instruction. 489 | :return: 'critique' of the paper. 490 | ''' 491 | additional_critique = config["configurable"]["instruction"] 492 | critique = state["critique"] 493 | if additional_critique: 494 | critique = ( 495 | critique + 496 | "\n\nAdditional User's feedback:\n" 497 | f"{additional_critique}\n" 498 | ) 499 | return { 500 | "state": self.name, 501 | "critique": critique 502 | } 503 | 504 | class ReflectionCritiqueReviewer(State): 505 | def __init__(self, model): 506 | super().__init__(model, "reflection_critique_reviewer") 507 | 508 | def run(self, state: AgentState): 509 | ''' 510 | Adds additional content to the reviewed paper. 511 | :param state: current state of the agent. 512 | :return: additional searched content to improve paper. 513 | ''' 514 | queries = { "queries": [] } 515 | result = self.model.invoke([ 516 | SystemMessage( 517 | content=( 518 | RESEARCH_CRITIQUE_PROMPT + 519 | " You must only output the response in the" + 520 | "JSON format '{ \"queries\": list[str] }' and no other text." 521 | )), 522 | HumanMessage(content=state['critique']) 523 | ]).content 524 | # we need to add this because sometimes the LLM decides to put a header 525 | # in the json file. 526 | if result[:7] == "```json": 527 | result = result.split('\n') 528 | result = '\n'.join(result[1:-1]) 529 | try: 530 | queries = json.loads(result) 531 | except: 532 | logging.warning(f"state {self.name}: could not extract query {result}.") 533 | content = state.get('content', []) 534 | if queries["queries"]: 535 | search, cache = search_query_ideas( 536 | query_ideas=queries, cache=state.get("cache", set())) 537 | content = content + search 538 | else: 539 | cache = state.get("cache", set()) 540 | return {"state": self.name, "cache": cache, "content": content} 541 | 542 | class WriteAbstract(State): 543 | def __init__(self, model): 544 | super().__init__(model, "write_abstract") 545 | 546 | def run(self, state: AgentState): 547 | ''' 548 | Generate the abstract of the paper based on the draft, task and the plan. 549 | :param state: current state of the agent. 550 | :return: updated field 'draft' of the paper. 551 | ''' 552 | human_content = ( 553 | f"Here is my task:\n\n{state['task']}\n\n" 554 | f"Here is my plan:\n\n{state['plan']}\n\n" 555 | f"Here is my research content:\n\n{state['content']}" 556 | f"Here is my current draft:\n\n{state['draft']}\n\n" 557 | ) 558 | messages = [ 559 | SystemMessage(content=ABSTRACT_WRITER_PROMPT), 560 | HumanMessage(content=human_content) 561 | ] 562 | response = self.model.invoke(messages) 563 | draft = response.content.strip() 564 | if "```markdown" in draft: 565 | draft = "\n".join(draft.split("\n")[1:-1]) 566 | return { 567 | "state": self.name, 568 | "draft": draft 569 | } 570 | 571 | class GenerateFigureCaptions(State): 572 | def __init__(self, model): 573 | super().__init__(model, "generate_figure_captions") 574 | 575 | def run(self, state: AgentState): 576 | ''' 577 | Generate figure captions. 578 | :param state: current state of the agent. 579 | :return: field 'draft' reviewed to the paper. 580 | ''' 581 | draft = state["draft"] 582 | pattern = r'!\[([^\]]*)\]\(([^\)]*)\)' 583 | 584 | # find all  definition of figures in markdown 585 | result = list(reversed(list(re.finditer(pattern, draft)))) 586 | fig = len(result) 587 | 588 | # we process the figure list in reverse order because we change 589 | # the file 590 | for entry in result: 591 | left, right = entry.span() 592 | caption = ( 593 | f'\n\n44 | | Normal Text Rate | 45 |Kiroku Rate | 46 |
Experiment 1 | 49 |3 | 50 |9 | 51 |
Experiment 2 | 54 |5 | 55 |10 | 56 |