├── .env.example ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── babyagi.py └── requirements.txt /.env.example: -------------------------------------------------------------------------------- 1 | # cp .env.example .env 2 | # Edit your .env file with your own values 3 | # Don't commit your .env file to git/push to GitHub! 4 | # Don't modify/delete .env.example unless adding extensions to the project 5 | # which require new variable to be added to the .env file 6 | 7 | MODEL_PATH=models/gpt4all-lora-quantized-ggml.bin 8 | 9 | # Temperature between 0 .. 1, default = 0.2 10 | TEMPERATURE=0.2 11 | 12 | # STORE CONFIG 13 | # TABLE_NAME can be used instead 14 | RESULTS_STORE_NAME=baby-agi-test-table 15 | 16 | # COOPERATIVE MODE CONFIG 17 | # BABY_NAME can be used instead 18 | INSTANCE_NAME=BabyAGI 19 | COOPERATIVE_MODE=none # local 20 | 21 | # RUN CONFIG 22 | OBJECTIVE=The meaning of life, the universe, and everything 23 | #OBJECTIVE=Solve Artificial General Intelligence (AGI) 24 | #OBJECTIVE=How could I achieve cosmic consciousness? 25 | #OBJECTIVE=Restore world peace 26 | #OBJECTIVE=Improve life and happiness for everyone 27 | #OBJECTIVE=What are the main problems of mankind and how can they be solved? 28 | #OBJECTIVE=Plato and Socrates on dialectics 29 | #OBJECTIVE=Design a dialectics based AI agent system to understand reality. 30 | # For backwards compatibility 31 | INITIAL_TASK=Develop a task list. 32 | 33 | # Extensions 34 | # List additional extension .env files to load (except .env.example!) 35 | DOTENV_EXTENSIONS= 36 | # Set to true to enable command line args support 37 | ENABLE_COMMAND_LINE_ARGS=false 38 | 39 | # Output extra messages for debugging 40 | VERBOSE=false -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py text eol=lf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | 5 | .env 6 | env/ 7 | .venv 8 | *venv/ 9 | 10 | .vscode/ 11 | .idea/ 12 | 13 | models 14 | llama/ 15 | 16 | # for node 17 | chroma/ 18 | node_modules/ 19 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 by Kroll Software-Entwicklung 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BabyAGI4All 2 | 3 | A small autonomous AI agent based on [BabyAGI](https://github.com/yoheinakajima/babyagi) by Yohei Nakajima. 4 |
5 | 6 | Runs on CPU with the [GPT4All](https://github.com/nomic-ai/gpt4all) model by Nomic AI. 7 |
8 | 9 | 100% open source, 100% local, no API-keys needed. 10 |
11 | 12 | # Installation: 13 | 14 | 1. Clone this repository 15 | 2. Install the requirements: *pip install -r requirements.txt* 16 | 3. Download a model file (see below) 17 | 4. Copy the file *.env.example* to *.env* 18 | 4. Edit the model-path and other preferences in the file *.env* 19 | 20 | ## Model Downloads 21 | 22 | The following model files have been tested successfully: 23 | 24 | * *gpt4all-lora-quantized-ggml.bin* 25 | * *ggml-wizardLM-7B.q4_2.bin* 26 | * *ggml-vicuna-7b-1.1-q4_2.bin* 27 | 28 | Some of these model files can be downloaded from [here](https://github.com/nomic-ai/gpt4all-chat#manual-download-of-models). 29 |
30 |
31 | 32 | Then run *python babyagi.py* 33 |
34 | 35 | Have fun! 36 |
37 | -------------------------------------------------------------------------------- /babyagi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import logging 4 | from collections import deque 5 | from typing import Dict, List 6 | import importlib 7 | import chromadb 8 | from dotenv import load_dotenv 9 | from chromadb.api.types import Documents, EmbeddingFunction, Embeddings 10 | from chromadb.utils.embedding_functions import InstructorEmbeddingFunction 11 | from llama_cpp import Llama 12 | 13 | # Load default environment variables (.env) 14 | load_dotenv() 15 | 16 | # Engine configuration 17 | LLM_MODEL = "GPT4All" 18 | 19 | # Table config 20 | RESULTS_STORE_NAME = os.getenv("RESULTS_STORE_NAME", os.getenv("TABLE_NAME", "")) 21 | assert RESULTS_STORE_NAME, "\033[91m\033[1m" + "RESULTS_STORE_NAME environment variable is missing from .env" + "\033[0m\033[0m" 22 | 23 | # Run configuration 24 | INSTANCE_NAME = os.getenv("INSTANCE_NAME", os.getenv("BABY_NAME", "BabyAGI")) 25 | COOPERATIVE_MODE = "none" 26 | JOIN_EXISTING_OBJECTIVE = False 27 | 28 | # Goal configuation 29 | OBJECTIVE = os.getenv("OBJECTIVE", "") 30 | INITIAL_TASK = os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", "")) 31 | 32 | # Model configuration 33 | TEMPERATURE = float(os.getenv("TEMPERATURE", 0.2)) 34 | 35 | VERBOSE = (os.getenv("VERBOSE", "false").lower() == "true") 36 | 37 | # Extensions support begin 38 | 39 | def can_import(module_name): 40 | try: 41 | importlib.import_module(module_name) 42 | return True 43 | except ImportError: 44 | return False 45 | 46 | print("\033[95m\033[1m"+"\n*****CONFIGURATION*****\n"+"\033[0m\033[0m") 47 | print(f"Name : {INSTANCE_NAME}") 48 | print(f"Mode : {'alone' if COOPERATIVE_MODE in ['n', 'none'] else 'local' if COOPERATIVE_MODE in ['l', 'local'] else 'distributed' if COOPERATIVE_MODE in ['d', 'distributed'] else 'undefined'}") 49 | print(f"LLM : {LLM_MODEL}") 50 | 51 | # Check if we know what we are doing 52 | assert OBJECTIVE, "\033[91m\033[1m" + "OBJECTIVE environment variable is missing from .env" + "\033[0m\033[0m" 53 | assert INITIAL_TASK, "\033[91m\033[1m" + "INITIAL_TASK environment variable is missing from .env" + "\033[0m\033[0m" 54 | 55 | MODEL_PATH = os.getenv("MODEL_PATH", "models/gpt4all-lora-quantized-ggml.bin") 56 | 57 | print(f"GPT4All : {MODEL_PATH}" + "\n") 58 | assert os.path.exists(MODEL_PATH), "\033[91m\033[1m" + f"Model can't be found." + "\033[0m\033[0m" 59 | 60 | #CTX_MAX = 2048 61 | #CTX_MAX = 8192 62 | CTX_MAX = 16384 63 | #THREADS_NUM = 16 64 | THREADS_NUM = 4 65 | 66 | llm = Llama( 67 | model_path=MODEL_PATH, 68 | n_ctx=CTX_MAX, n_threads=THREADS_NUM, 69 | use_mlock=True, 70 | verbose=False, 71 | ) 72 | 73 | print("\033[94m\033[1m" + "\n*****OBJECTIVE*****\n" + "\033[0m\033[0m") 74 | print(f"{OBJECTIVE}") 75 | 76 | if not JOIN_EXISTING_OBJECTIVE: print("\033[93m\033[1m" + "\nInitial task:" + "\033[0m\033[0m" + f" {INITIAL_TASK}") 77 | else: print("\033[93m\033[1m" + f"\nJoining to help the objective" + "\033[0m\033[0m") 78 | 79 | # Results storage using local ChromaDB 80 | class DefaultResultsStorage: 81 | def __init__(self): 82 | logging.getLogger('chromadb').setLevel(logging.ERROR) 83 | # Create Chroma collection 84 | chroma_persist_dir = "chroma" 85 | chroma_client = chromadb.Client( 86 | settings=chromadb.config.Settings( 87 | chroma_db_impl="duckdb+parquet", 88 | persist_directory=chroma_persist_dir, 89 | ) 90 | ) 91 | 92 | metric = "cosine" 93 | embedding_function = InstructorEmbeddingFunction() 94 | self.collection = chroma_client.get_or_create_collection( 95 | name=RESULTS_STORE_NAME, 96 | metadata={"hnsw:space": metric}, 97 | embedding_function=embedding_function, 98 | ) 99 | 100 | def add(self, task: Dict, result: Dict, result_id: str, vector: List): 101 | embeddings = self.collection._embedding_function([vector]) 102 | 103 | if (len(self.collection.get(ids=[result_id], include=[])["ids"]) > 0): # Check if the result already exists 104 | self.collection.update( 105 | ids=result_id, 106 | embeddings=embeddings, 107 | documents=vector, 108 | metadatas={"task": task["task_name"], "result": result}, 109 | ) 110 | else: 111 | self.collection.add( 112 | ids=result_id, 113 | embeddings=embeddings, 114 | documents=vector, 115 | metadatas={"task": task["task_name"], "result": result}, 116 | ) 117 | 118 | def query(self, query: str, top_results_num: int) -> List[dict]: 119 | count: int = self.collection.count() 120 | if count == 0: 121 | return [] 122 | results = self.collection.query( 123 | query_texts=query, 124 | n_results=min(top_results_num, count), 125 | include=["metadatas"] 126 | ) 127 | tasks = [] 128 | count = len(results["ids"][0]) 129 | for i in range(count): 130 | resultidstr = results["ids"][0][i] 131 | id = int(resultidstr[7:]) 132 | item = results["metadatas"][0][i] 133 | task = {'task_id': id, 'task_name': item["task"]} 134 | tasks.append(task) 135 | return tasks 136 | 137 | 138 | # Initialize results storage 139 | results_storage = DefaultResultsStorage() 140 | 141 | # Task storage supporting only a single instance of BabyAGI 142 | class SingleTaskListStorage: 143 | def __init__(self): 144 | self.tasks = deque([]) 145 | self.task_id_counter = 0 146 | 147 | def append(self, task: Dict): 148 | self.tasks.append(task) 149 | 150 | def replace(self, tasks: List[Dict]): 151 | self.tasks = deque(tasks) 152 | 153 | def popleft(self): 154 | return self.tasks.popleft() 155 | 156 | def is_empty(self): 157 | return False if self.tasks else True 158 | 159 | def next_task_id(self): 160 | self.task_id_counter += 1 161 | return self.task_id_counter 162 | 163 | def get_task_names(self): 164 | return [t["task_name"] for t in self.tasks] 165 | 166 | 167 | # Initialize tasks storage 168 | tasks_storage = SingleTaskListStorage() 169 | 170 | def gpt_call(prompt: str, temperature: float = TEMPERATURE, max_tokens: int = 256): 171 | result = llm(prompt[:CTX_MAX], echo=True, temperature=temperature, max_tokens=max_tokens) 172 | return result['choices'][0]['text'][len(prompt):].strip() 173 | 174 | def strip_numbered_list(nl: List[str]) -> List[str]: 175 | result_list = [] 176 | filter_chars = ['#', '(', ')', '[', ']', '.', ':', ' '] 177 | 178 | for line in nl: 179 | line = line.strip() 180 | if len(line) > 0: 181 | parts = line.split(" ", 1) 182 | if len(parts) == 2: 183 | left_part = ''.join(x for x in parts[0] if not x in filter_chars) 184 | if left_part.isnumeric(): 185 | result_list.append(parts[1].strip()) 186 | else: 187 | result_list.append(line) 188 | else: 189 | result_list.append(line) 190 | 191 | # filter result_list 192 | result_list = [line for line in result_list if len(line) > 3] 193 | 194 | # remove duplicates 195 | result_list = list(set(result_list)) 196 | return result_list 197 | 198 | def fix_prompt(prompt: str) -> str: 199 | lines = prompt.split("\n") if "\n" in prompt else [prompt] 200 | return "\n".join([line.strip() for line in lines]) 201 | 202 | def task_creation_agent( 203 | objective: str, result: Dict, task_description: str, task_list: List[str] 204 | ): 205 | prompt = f""" 206 | Your objective: {objective}\n 207 | Take into account these previously completed tasks but don't repeat them: {task_list}.\n 208 | The last completed task has the result: {result["data"]}.\n 209 | Develop a task list based on the result.\n 210 | Response:""" 211 | 212 | prompt = fix_prompt(prompt) 213 | 214 | response = gpt_call(prompt) 215 | pos = response.find("1") 216 | if (pos > 0): 217 | response = response[pos - 1:] 218 | 219 | if response == '': 220 | print("\n*** Empty Response from task_creation_agent***") 221 | new_tasks_list = result["data"].split("\n") if len(result) > 0 else [response] 222 | else: 223 | new_tasks = response.split("\n") if "\n" in response else [response] 224 | new_tasks_list = strip_numbered_list(new_tasks) 225 | 226 | return [{"task_name": task_name} for task_name in (t for t in new_tasks_list if not t == '')] 227 | 228 | 229 | def prioritization_agent(): 230 | task_names = tasks_storage.get_task_names() 231 | next_task_id = tasks_storage.next_task_id() 232 | 233 | prompt = f""" 234 | Please prioritize, summarize and consolidate the following tasks: {task_names}.\n 235 | Consider the ultimate objective: {OBJECTIVE}.\n 236 | Return the result as a numbered list. 237 | """ 238 | 239 | prompt = fix_prompt(prompt) 240 | 241 | response = gpt_call(prompt) 242 | pos = response.find("1") 243 | if (pos > 0): 244 | response = response[pos - 1:] 245 | 246 | new_tasks = response.split("\n") if "\n" in response else [response] 247 | new_tasks = strip_numbered_list(new_tasks) 248 | new_tasks_list = [] 249 | i = 0 250 | for task_string in new_tasks: 251 | new_tasks_list.append({"task_id": i + next_task_id, "task_name": task_string}) 252 | i += 1 253 | 254 | if len(new_tasks_list) > 0: 255 | tasks_storage.replace(new_tasks_list) 256 | 257 | 258 | # Execute a task based on the objective and five previous tasks 259 | def execution_agent(objective: str, task: str) -> str: 260 | """ 261 | Executes a task based on the given objective and previous context. 262 | 263 | Args: 264 | objective (str): The objective or goal for the AI to perform the task. 265 | task (str): The task to be executed by the AI. 266 | 267 | Returns: 268 | str: The response generated by the AI for the given task. 269 | 270 | """ 271 | 272 | context = context_agent(query=objective, top_results_num=5) 273 | 274 | context_list = [t['task_name'] for t in context if t['task_name'] != INITIAL_TASK] 275 | #context_list = [t['task_name'] for t in context] 276 | 277 | # remove duplicates 278 | context_list = list(set(context_list)) 279 | 280 | if VERBOSE and len(context_list) > 0: 281 | print("\n*******RELEVANT CONTEXT******\n") 282 | print(context_list) 283 | 284 | if task == INITIAL_TASK: 285 | prompt = f""" 286 | You are an AI who performs one task based on the following objective: {objective}.\n 287 | Your task: {task}\nResponse:""" 288 | else: 289 | prompt = f""" 290 | Your objective: {objective}.\n 291 | Take into account these previously completed tasks but don't repeat them: {context_list}.\n 292 | Your task: {task}\n 293 | Response:""" 294 | 295 | #Give an advice how to achieve your task!\n 296 | 297 | prompt = fix_prompt(prompt) 298 | 299 | result = gpt_call(prompt) 300 | pos = result.find("1") 301 | if (pos > 0): 302 | result = result[pos - 1:] 303 | return result 304 | 305 | 306 | # Get the top n completed tasks for the objective 307 | def context_agent(query: str, top_results_num: int): 308 | """ 309 | Retrieves context for a given query from an index of tasks. 310 | 311 | Args: 312 | query (str): The query or objective for retrieving context. 313 | top_results_num (int): The number of top results to retrieve. 314 | 315 | Returns: 316 | list: A list of tasks as context for the given query, sorted by relevance. 317 | 318 | """ 319 | results = results_storage.query(query=query, top_results_num=top_results_num) 320 | #print("\n***** RESULTS *****") 321 | #print(results) 322 | return results 323 | 324 | # Add the initial task if starting new objective 325 | if not JOIN_EXISTING_OBJECTIVE: 326 | initial_task = { 327 | "task_id": tasks_storage.next_task_id(), 328 | "task_name": INITIAL_TASK 329 | } 330 | tasks_storage.append(initial_task) 331 | 332 | def main (): 333 | while True: 334 | # As long as there are tasks in the storage... 335 | if not tasks_storage.is_empty(): 336 | # Print the task list 337 | print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m") 338 | for t in tasks_storage.get_task_names(): 339 | print(" • "+t) 340 | 341 | # Step 1: Pull the first incomplete task 342 | task = tasks_storage.popleft() 343 | print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m") 344 | print(task['task_name']) 345 | 346 | # Send to execution function to complete the task based on the context 347 | result = execution_agent(OBJECTIVE, task["task_name"]) 348 | 349 | print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m") 350 | print(result) 351 | 352 | # Step 2: Enrich result and store in the results storage 353 | # This is where you should enrich the result if needed 354 | enriched_result = { 355 | "data": result 356 | } 357 | # extract the actual result from the dictionary 358 | # since we don't do enrichment currently 359 | vector = enriched_result["data"] 360 | 361 | result_id = f"result_{task['task_id']}" 362 | results_storage.add(task, result, result_id, vector) 363 | 364 | # Step 3: Create new tasks and reprioritize task list 365 | # only the main instance in cooperative mode does that 366 | new_tasks = task_creation_agent( 367 | OBJECTIVE, 368 | enriched_result, 369 | task["task_name"], 370 | tasks_storage.get_task_names(), 371 | ) 372 | 373 | for new_task in new_tasks: 374 | if not new_task['task_name'] == '': 375 | new_task.update({"task_id": tasks_storage.next_task_id()}) 376 | tasks_storage.append(new_task) 377 | 378 | if not JOIN_EXISTING_OBJECTIVE: prioritization_agent() 379 | 380 | # Sleep a bit before checking the task list again 381 | time.sleep(5) 382 | 383 | else: 384 | print ("Ready, no more tasks.") 385 | 386 | if __name__ == "__main__": 387 | main() 388 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse==1.4.0 2 | chromadb==0.3.21 3 | pre-commit>=3.2.0 4 | python-dotenv==1.0.0 5 | InstructorEmbedding>=1.0.0 6 | llama-cpp-python==0.1.43 --------------------------------------------------------------------------------