├── .env.example
├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── babyagi.py
└── requirements.txt


/.env.example:
--------------------------------------------------------------------------------
 1 | # cp .env.example .env
 2 | # Edit your .env file with your own values
 3 | # Don't commit your .env file to git/push to GitHub!
 4 | # Don't modify/delete .env.example unless adding extensions to the project
 5 | # which require new variable to be added to the .env file
 6 | 
 7 | MODEL_PATH=models/gpt4all-lora-quantized-ggml.bin
 8 | 
 9 | # Temperature between 0 .. 1, default = 0.2
10 | TEMPERATURE=0.2
11 | 
12 | # STORE CONFIG
13 | # TABLE_NAME can be used instead
14 | RESULTS_STORE_NAME=baby-agi-test-table
15 | 
16 | # COOPERATIVE MODE CONFIG
17 | # BABY_NAME can be used instead
18 | INSTANCE_NAME=BabyAGI
19 | COOPERATIVE_MODE=none # local
20 | 
21 | # RUN CONFIG
22 | OBJECTIVE=The meaning of life, the universe, and everything
23 | #OBJECTIVE=Solve Artificial General Intelligence (AGI)
24 | #OBJECTIVE=How could I achieve cosmic consciousness?
25 | #OBJECTIVE=Restore world peace
26 | #OBJECTIVE=Improve life and happiness for everyone
27 | #OBJECTIVE=What are the main problems of mankind and how can they be solved?
28 | #OBJECTIVE=Plato and Socrates on dialectics
29 | #OBJECTIVE=Design a dialectics based AI agent system to understand reality.
30 | # For backwards compatibility
31 | INITIAL_TASK=Develop a task list.
32 | 
33 | # Extensions
34 | # List additional extension .env files to load (except .env.example!)
35 | DOTENV_EXTENSIONS=
36 | # Set to true to enable command line args support
37 | ENABLE_COMMAND_LINE_ARGS=false
38 | 
39 | # Output extra messages for debugging
40 | VERBOSE=false


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py text eol=lf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | *$py.class
 4 | 
 5 | .env
 6 | env/
 7 | .venv
 8 | *venv/
 9 | 
10 | .vscode/
11 | .idea/
12 | 
13 | models
14 | llama/
15 | 
16 | # for node
17 | chroma/
18 | node_modules/
19 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 by Kroll Software-Entwicklung
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BabyAGI4All
 2 | 
 3 | A small autonomous AI agent based on [BabyAGI](https://github.com/yoheinakajima/babyagi) by Yohei Nakajima.
 4 | </br>
 5 | 
 6 | Runs on CPU with the [GPT4All](https://github.com/nomic-ai/gpt4all) model by Nomic AI.
 7 | </br>
 8 | 
 9 | 100% open source, 100% local, no API-keys needed.
10 | </br>
11 | 
12 | # Installation:
13 | 
14 | 1. Clone this repository
15 | 2. Install the requirements: *pip install -r requirements.txt*
16 | 3. Download a model file (see below)
17 | 4. Copy the file *.env.example* to *.env*
18 | 4. Edit the model-path and other preferences in the file *.env*
19 | 
20 | ## Model Downloads
21 | 
22 | The following model files have been tested successfully:
23 | 
24 | * *gpt4all-lora-quantized-ggml.bin*
25 | * *ggml-wizardLM-7B.q4_2.bin*
26 | * *ggml-vicuna-7b-1.1-q4_2.bin*
27 | 
28 | Some of these model files can be downloaded from [here](https://github.com/nomic-ai/gpt4all-chat#manual-download-of-models).
29 | </br>
30 | </br>
31 | 
32 | Then run *python babyagi.py*
33 | </br>
34 | 
35 | Have fun!
36 | </br>
37 | 


--------------------------------------------------------------------------------
/babyagi.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import logging
  4 | from collections import deque
  5 | from typing import Dict, List
  6 | import importlib
  7 | import chromadb
  8 | from dotenv import load_dotenv
  9 | from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
 10 | from chromadb.utils.embedding_functions import InstructorEmbeddingFunction
 11 | from llama_cpp import Llama
 12 | 
 13 | # Load default environment variables (.env)
 14 | load_dotenv()
 15 | 
 16 | # Engine configuration
 17 | LLM_MODEL = "GPT4All"
 18 | 
 19 | # Table config
 20 | RESULTS_STORE_NAME = os.getenv("RESULTS_STORE_NAME", os.getenv("TABLE_NAME", ""))
 21 | assert RESULTS_STORE_NAME, "\033[91m\033[1m" + "RESULTS_STORE_NAME environment variable is missing from .env" + "\033[0m\033[0m"
 22 | 
 23 | # Run configuration
 24 | INSTANCE_NAME = os.getenv("INSTANCE_NAME", os.getenv("BABY_NAME", "BabyAGI"))
 25 | COOPERATIVE_MODE = "none"
 26 | JOIN_EXISTING_OBJECTIVE = False
 27 | 
 28 | # Goal configuation
 29 | OBJECTIVE = os.getenv("OBJECTIVE", "")
 30 | INITIAL_TASK = os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", ""))
 31 | 
 32 | # Model configuration
 33 | TEMPERATURE = float(os.getenv("TEMPERATURE", 0.2))
 34 | 
 35 | VERBOSE = (os.getenv("VERBOSE", "false").lower() == "true")
 36 | 
 37 | # Extensions support begin
 38 | 
 39 | def can_import(module_name):
 40 |     try:
 41 |         importlib.import_module(module_name)
 42 |         return True
 43 |     except ImportError:
 44 |         return False
 45 | 
 46 | print("\033[95m\033[1m"+"\n*****CONFIGURATION*****\n"+"\033[0m\033[0m")
 47 | print(f"Name  : {INSTANCE_NAME}")
 48 | print(f"Mode  : {'alone' if COOPERATIVE_MODE in ['n', 'none'] else 'local' if COOPERATIVE_MODE in ['l', 'local'] else 'distributed' if COOPERATIVE_MODE in ['d', 'distributed'] else 'undefined'}")
 49 | print(f"LLM   : {LLM_MODEL}")
 50 | 
 51 | # Check if we know what we are doing
 52 | assert OBJECTIVE, "\033[91m\033[1m" + "OBJECTIVE environment variable is missing from .env" + "\033[0m\033[0m"
 53 | assert INITIAL_TASK, "\033[91m\033[1m" + "INITIAL_TASK environment variable is missing from .env" + "\033[0m\033[0m"
 54 | 
 55 | MODEL_PATH = os.getenv("MODEL_PATH", "models/gpt4all-lora-quantized-ggml.bin")
 56 |     
 57 | print(f"GPT4All : {MODEL_PATH}" + "\n")
 58 | assert os.path.exists(MODEL_PATH), "\033[91m\033[1m" + f"Model can't be found." + "\033[0m\033[0m"
 59 | 
 60 | #CTX_MAX = 2048
 61 | #CTX_MAX = 8192
 62 | CTX_MAX = 16384
 63 | #THREADS_NUM = 16
 64 | THREADS_NUM = 4
 65 | 
 66 | llm = Llama(
 67 |     model_path=MODEL_PATH,
 68 |     n_ctx=CTX_MAX, n_threads=THREADS_NUM,
 69 |     use_mlock=True,
 70 |     verbose=False,
 71 | )
 72 | 
 73 | print("\033[94m\033[1m" + "\n*****OBJECTIVE*****\n" + "\033[0m\033[0m")
 74 | print(f"{OBJECTIVE}")
 75 | 
 76 | if not JOIN_EXISTING_OBJECTIVE: print("\033[93m\033[1m" + "\nInitial task:" + "\033[0m\033[0m" + f" {INITIAL_TASK}")
 77 | else: print("\033[93m\033[1m" + f"\nJoining to help the objective" + "\033[0m\033[0m")
 78 | 
 79 | # Results storage using local ChromaDB
 80 | class DefaultResultsStorage:
 81 |     def __init__(self):
 82 |         logging.getLogger('chromadb').setLevel(logging.ERROR)
 83 |         # Create Chroma collection
 84 |         chroma_persist_dir = "chroma"
 85 |         chroma_client = chromadb.Client(
 86 |             settings=chromadb.config.Settings(
 87 |                 chroma_db_impl="duckdb+parquet",
 88 |                 persist_directory=chroma_persist_dir,
 89 |             )
 90 |         )
 91 | 
 92 |         metric = "cosine"
 93 |         embedding_function = InstructorEmbeddingFunction()
 94 |         self.collection = chroma_client.get_or_create_collection(
 95 |             name=RESULTS_STORE_NAME,
 96 |             metadata={"hnsw:space": metric},
 97 |             embedding_function=embedding_function,
 98 |         )
 99 | 
100 |     def add(self, task: Dict, result: Dict, result_id: str, vector: List):        
101 |         embeddings = self.collection._embedding_function([vector])
102 | 
103 |         if (len(self.collection.get(ids=[result_id], include=[])["ids"]) > 0):  # Check if the result already exists
104 |             self.collection.update(
105 |                 ids=result_id,
106 |                 embeddings=embeddings,
107 |                 documents=vector,
108 |                 metadatas={"task": task["task_name"], "result": result},
109 |             )
110 |         else:
111 |             self.collection.add(
112 |                 ids=result_id,
113 |                 embeddings=embeddings,
114 |                 documents=vector,
115 |                 metadatas={"task": task["task_name"], "result": result},
116 |             )
117 | 
118 |     def query(self, query: str, top_results_num: int) -> List[dict]:
119 |         count: int = self.collection.count()
120 |         if count == 0:
121 |             return []
122 |         results = self.collection.query(
123 |             query_texts=query,
124 |             n_results=min(top_results_num, count),
125 |             include=["metadatas"]
126 |         )        
127 |         tasks = []
128 |         count = len(results["ids"][0])
129 |         for i in range(count):            
130 |             resultidstr = results["ids"][0][i]            
131 |             id = int(resultidstr[7:])
132 |             item = results["metadatas"][0][i]            
133 |             task = {'task_id': id, 'task_name': item["task"]}
134 |             tasks.append(task)            
135 |         return tasks
136 |    
137 | 
138 | # Initialize results storage
139 | results_storage = DefaultResultsStorage()
140 | 
141 | # Task storage supporting only a single instance of BabyAGI
142 | class SingleTaskListStorage:
143 |     def __init__(self):
144 |         self.tasks = deque([])
145 |         self.task_id_counter = 0
146 | 
147 |     def append(self, task: Dict):
148 |         self.tasks.append(task)
149 | 
150 |     def replace(self, tasks: List[Dict]):
151 |         self.tasks = deque(tasks)
152 | 
153 |     def popleft(self):
154 |         return self.tasks.popleft()
155 | 
156 |     def is_empty(self):
157 |         return False if self.tasks else True
158 | 
159 |     def next_task_id(self):
160 |         self.task_id_counter += 1
161 |         return self.task_id_counter
162 | 
163 |     def get_task_names(self):
164 |         return [t["task_name"] for t in self.tasks]
165 | 
166 | 
167 | # Initialize tasks storage
168 | tasks_storage = SingleTaskListStorage()
169 | 
170 | def gpt_call(prompt: str, temperature: float = TEMPERATURE, max_tokens: int = 256):
171 |     result = llm(prompt[:CTX_MAX], echo=True, temperature=temperature, max_tokens=max_tokens)
172 |     return result['choices'][0]['text'][len(prompt):].strip()
173 | 
174 | def strip_numbered_list(nl: List[str]) -> List[str]:
175 |     result_list = []
176 |     filter_chars = ['#', '(', ')', '[', ']', '.', ':', ' ']
177 | 
178 |     for line in nl:
179 |         line = line.strip()
180 |         if len(line) > 0:
181 |             parts = line.split(" ", 1)
182 |             if len(parts) == 2:
183 |                 left_part = ''.join(x for x in parts[0] if not x in filter_chars)
184 |                 if left_part.isnumeric():
185 |                     result_list.append(parts[1].strip())
186 |                 else:
187 |                     result_list.append(line)
188 |             else:
189 |                 result_list.append(line)
190 | 
191 |     # filter result_list
192 |     result_list = [line for line in result_list if len(line) > 3]
193 |     
194 |     # remove duplicates
195 |     result_list = list(set(result_list))
196 |     return result_list
197 | 
198 | def fix_prompt(prompt: str) -> str:
199 |     lines = prompt.split("\n") if "\n" in prompt else [prompt]    
200 |     return "\n".join([line.strip() for line in lines])
201 | 
202 | def task_creation_agent(
203 |     objective: str, result: Dict, task_description: str, task_list: List[str]
204 | ):    
205 |     prompt = f"""
206 |     Your objective: {objective}\n
207 |     Take into account these previously completed tasks but don't repeat them: {task_list}.\n
208 |     The last completed task has the result: {result["data"]}.\n
209 |     Develop a task list based on the result.\n
210 |     Response:"""
211 | 
212 |     prompt = fix_prompt(prompt)
213 | 
214 |     response = gpt_call(prompt)
215 |     pos = response.find("1")
216 |     if (pos > 0):
217 |         response = response[pos - 1:]
218 | 
219 |     if response == '':
220 |         print("\n*** Empty Response from task_creation_agent***")
221 |         new_tasks_list = result["data"].split("\n") if len(result) > 0 else [response]
222 |     else:
223 |         new_tasks = response.split("\n") if "\n" in response else [response]
224 |         new_tasks_list = strip_numbered_list(new_tasks)
225 |         
226 |     return [{"task_name": task_name} for task_name in (t for t in new_tasks_list if not t == '')]
227 | 
228 | 
229 | def prioritization_agent():
230 |     task_names = tasks_storage.get_task_names()
231 |     next_task_id = tasks_storage.next_task_id()    
232 | 
233 |     prompt = f"""
234 |     Please prioritize, summarize and consolidate the following tasks: {task_names}.\n
235 |     Consider the ultimate objective: {OBJECTIVE}.\n
236 |     Return the result as a numbered list.
237 |     """
238 | 
239 |     prompt = fix_prompt(prompt)
240 | 
241 |     response = gpt_call(prompt)
242 |     pos = response.find("1")
243 |     if (pos > 0):
244 |         response = response[pos - 1:]
245 | 
246 |     new_tasks = response.split("\n") if "\n" in response else [response]
247 |     new_tasks = strip_numbered_list(new_tasks)
248 |     new_tasks_list = []
249 |     i = 0
250 |     for task_string in new_tasks:        
251 |         new_tasks_list.append({"task_id": i + next_task_id, "task_name": task_string})
252 |         i += 1
253 |     
254 |     if len(new_tasks_list) > 0:
255 |         tasks_storage.replace(new_tasks_list)
256 | 
257 | 
258 | # Execute a task based on the objective and five previous tasks
259 | def execution_agent(objective: str, task: str) -> str:
260 |     """
261 |     Executes a task based on the given objective and previous context.
262 | 
263 |     Args:
264 |         objective (str): The objective or goal for the AI to perform the task.
265 |         task (str): The task to be executed by the AI.
266 | 
267 |     Returns:
268 |         str: The response generated by the AI for the given task.
269 | 
270 |     """
271 |     
272 |     context = context_agent(query=objective, top_results_num=5)
273 | 
274 |     context_list = [t['task_name'] for t in context if t['task_name'] != INITIAL_TASK]
275 |     #context_list = [t['task_name'] for t in context]
276 | 
277 |     # remove duplicates
278 |     context_list = list(set(context_list))    
279 | 
280 |     if VERBOSE and len(context_list) > 0:
281 |         print("\n*******RELEVANT CONTEXT******\n")
282 |         print(context_list)
283 | 
284 |     if task == INITIAL_TASK:
285 |         prompt = f"""
286 |         You are an AI who performs one task based on the following objective: {objective}.\n
287 |         Your task: {task}\nResponse:"""
288 |     else:
289 |         prompt = f"""
290 |         Your objective: {objective}.\n
291 |         Take into account these previously completed tasks but don't repeat them: {context_list}.\n
292 |         Your task: {task}\n
293 |         Response:"""
294 | 
295 |     #Give an advice how to achieve your task!\n
296 | 
297 |     prompt = fix_prompt(prompt)
298 | 
299 |     result = gpt_call(prompt)
300 |     pos = result.find("1")
301 |     if (pos > 0):
302 |         result = result[pos - 1:]
303 |     return result
304 | 
305 | 
306 | # Get the top n completed tasks for the objective
307 | def context_agent(query: str, top_results_num: int):
308 |     """
309 |     Retrieves context for a given query from an index of tasks.
310 | 
311 |     Args:
312 |         query (str): The query or objective for retrieving context.
313 |         top_results_num (int): The number of top results to retrieve.
314 | 
315 |     Returns:
316 |         list: A list of tasks as context for the given query, sorted by relevance.
317 | 
318 |     """
319 |     results = results_storage.query(query=query, top_results_num=top_results_num)
320 |     #print("\n***** RESULTS *****")
321 |     #print(results)
322 |     return results
323 | 
324 | # Add the initial task if starting new objective
325 | if not JOIN_EXISTING_OBJECTIVE:
326 |     initial_task = {
327 |         "task_id": tasks_storage.next_task_id(),
328 |         "task_name": INITIAL_TASK
329 |     }
330 |     tasks_storage.append(initial_task)
331 | 
332 | def main ():
333 |     while True:
334 |         # As long as there are tasks in the storage...
335 |         if not tasks_storage.is_empty():
336 |             # Print the task list
337 |             print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
338 |             for t in tasks_storage.get_task_names():
339 |                 print(" • "+t)
340 | 
341 |             # Step 1: Pull the first incomplete task
342 |             task = tasks_storage.popleft()
343 |             print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
344 |             print(task['task_name'])
345 | 
346 |             # Send to execution function to complete the task based on the context
347 |             result = execution_agent(OBJECTIVE, task["task_name"])            
348 | 
349 |             print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
350 |             print(result)
351 | 
352 |             # Step 2: Enrich result and store in the results storage
353 |             # This is where you should enrich the result if needed
354 |             enriched_result = {
355 |                 "data": result
356 |             }  
357 |             # extract the actual result from the dictionary
358 |             # since we don't do enrichment currently
359 |             vector = enriched_result["data"]  
360 | 
361 |             result_id = f"result_{task['task_id']}"
362 |             results_storage.add(task, result, result_id, vector)
363 | 
364 |             # Step 3: Create new tasks and reprioritize task list
365 |             # only the main instance in cooperative mode does that
366 |             new_tasks = task_creation_agent(
367 |                 OBJECTIVE,
368 |                 enriched_result,
369 |                 task["task_name"],
370 |                 tasks_storage.get_task_names(),
371 |             )
372 | 
373 |             for new_task in new_tasks:
374 |                 if not new_task['task_name'] == '':
375 |                     new_task.update({"task_id": tasks_storage.next_task_id()})
376 |                     tasks_storage.append(new_task)
377 | 
378 |             if not JOIN_EXISTING_OBJECTIVE: prioritization_agent()
379 | 
380 |             # Sleep a bit before checking the task list again
381 |             time.sleep(5) 
382 | 
383 |         else:
384 |             print ("Ready, no more tasks.")
385 | 
386 | if __name__ == "__main__":
387 |     main()
388 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | argparse==1.4.0
2 | chromadb==0.3.21
3 | pre-commit>=3.2.0
4 | python-dotenv==1.0.0
5 | InstructorEmbedding>=1.0.0
6 | llama-cpp-python==0.1.43


--------------------------------------------------------------------------------