├── requirements.txt ├── config.yaml ├── architecture ├── architecture.drawio.png └── architecture.drawio ├── README.md ├── prompts.py ├── agent.py └── search.py /requirements.txt: -------------------------------------------------------------------------------- 1 | termcolor==2.4.0 2 | PyYAML==6.0.1 3 | requests==2.31.0 4 | beautifulsoup4==4.12.3 -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY: "ENTER YOUR OPENAI KEY" 2 | SERPER_DEV_API_KEY: "ENTER YOUR SERPER DEV KEY" 3 | -------------------------------------------------------------------------------- /architecture/architecture.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-adeojo/custom_agent_tutorial/HEAD/architecture/architecture.drawio.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Setting Up and Running Custom Agent Script 3 | 4 | ### Prerequisites 5 | 1. **Install Anaconda:** 6 | Download Anaconda from [https://www.anaconda.com/](https://www.anaconda.com/). 7 | 8 | 2. **Create a Virtual Environment:** 9 | ```bash 10 | conda create -n agent_env python=3.10 pip 11 | ``` 12 | 13 | 3. **Activate the Virtual Environment:** 14 | ```bash 15 | conda activate agent_env 16 | ``` 17 | 18 | ### Clone and Navigate to the Repository 19 | 1. **Clone the Repo:** 20 | ```bash 21 | git clone https://github.com/john-adeojo/custom_agent_tutorial.git 22 | ``` 23 | 24 | 2. **Navigate to the Repo:** 25 | ```bash 26 | cd /path/to/your-repo/custom_agent_tutorial 27 | ``` 28 | 29 | 3. **Install Requirements:** 30 | ```bash 31 | pip install -r requirements.txt 32 | ``` 33 | 34 | ### Configure API Keys 35 | 1. **Open the `config.yaml`:** 36 | Update your keys, see next step for links to get your own API keys. 37 | ```bash 38 | nano config.yaml 39 | ``` 40 | 41 | 3. **Enter API Keys:** 42 | - **Serper API Key:** Get it from [https://serper.dev/](https://serper.dev/) 43 | - **OpenAI API Key:** Get it from [https://openai.com/](https://openai.com/) 44 | 45 | ### Run Your Query 46 | ```bash 47 | python agent.py run 48 | ``` 49 | Then run your query 50 | -------------------------------------------------------------------------------- /prompts.py: -------------------------------------------------------------------------------- 1 | planning_agent_prompt = ("You are an AI planning agent working with an integration agent. You have access to specialised tools. When addressing queries, you should follow this two-step methodology:\n" 2 | "Step 1: Thought. Begin by contemplating the problem thoroughly and devising a plan of action." 3 | "Step 2: Action. Clearly state the inputs you will use with any tools necessary to address the problem. This preparation is essential for executing your plan effectively.\n" 4 | "You must ensure your plan takes into account any feedback (if available)\n\n." 5 | "here are the outputs from the tools you have used: {outputs}\n\n" 6 | "Here is your previous plan: {plan}\n\n" 7 | "Here's the feedback:{feedback} \n\n" 8 | "Here are the specifications of your tools:\n" 9 | "{tool_specs}\n" 10 | "Continue this process until you have gathered enough information to comprehensively answer the query." 11 | ) 12 | 13 | integration_agent_prompt = ("You are an AI Integration Agent working with a planning agent. Your job is to synthesise the outputs from the planning agent into a coherent response.\n" 14 | "You must do this by considering the plan, the outputs from tools, and the original query.\n" 15 | "If any of the information is not sufficient, you should provide feedback to the planning agent to refine the plan.\n" 16 | "If the information is sufficient, you should provide a comprehenisve response to the query with appropriate citations. \n" 17 | "Your response to the query must be based on the outputs from the tools\n" 18 | "The output of the tool is a dictionary where the \n" 19 | "key is the URL source of the info and the value is the content of the URL \n" 20 | "You should use the source in citation \n" 21 | "Here are the outputs from the tool: {outputs}\n\n" 22 | "Here is the plan from the planning agent: {plan}\n\n" 23 | ) -------------------------------------------------------------------------------- /architecture/architecture.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import json 4 | import requests 5 | from termcolor import colored 6 | from prompts import planning_agent_prompt, integration_agent_prompt 7 | from search import WebSearcher 8 | 9 | 10 | def load_config(file_path): 11 | with open(file_path, 'r') as file: 12 | config = yaml.safe_load(file) 13 | for key, value in config.items(): 14 | os.environ[key] = value 15 | 16 | class Agent: 17 | def __init__(self, model, tool, temperature=0, max_tokens=1000, planning_agent_prompt=None, integration_agent_prompt=None, verbose=False): 18 | load_config('config.yaml') 19 | self.api_key = os.getenv("OPENAI_API_KEY") 20 | self.url = 'https://api.openai.com/v1/chat/completions' 21 | self.headers = { 22 | 'Content-Type': 'application/json', 23 | 'Authorization': f'Bearer {self.api_key}' 24 | } 25 | self.temperature = temperature 26 | self.max_tokens = max_tokens 27 | self.tool = tool 28 | self.tool_specs = tool.__doc__ 29 | self.planning_agent_prompt = planning_agent_prompt 30 | self.integration_agent_prompt = integration_agent_prompt 31 | self.model = model 32 | self.verbose = verbose 33 | 34 | def run_planning_agent(self, query, plan=None, outputs=None, feedback=None): 35 | 36 | system_prompt = self.planning_agent_prompt.format( 37 | outputs=outputs, 38 | plan=plan, 39 | feedback=feedback, 40 | tool_specs=self.tool_specs 41 | ) 42 | 43 | data = { 44 | "model": self.model, 45 | "messages": [{"role": "user", "content": query}, 46 | {"role": "system", "content": system_prompt}], 47 | "temperature": self.temperature, 48 | "max_tokens": self.max_tokens 49 | } 50 | 51 | json_data = json.dumps(data) 52 | response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180) 53 | response_dict = response.json() 54 | content = response_dict['choices'][0]['message']['content'] 55 | print(colored(f"Planning Agent: {content}", 'green')) 56 | 57 | return content 58 | 59 | def run_integration_agent(self, query, plan, outputs): 60 | system_prompt = self.integration_agent_prompt.format( 61 | outputs=outputs, 62 | plan=plan 63 | ) 64 | 65 | data = { 66 | "model": self.model, 67 | "messages": [{"role": "user", "content": query}, 68 | {"role": "system", "content": system_prompt}], 69 | "temperature": self.temperature, 70 | "max_tokens": self.max_tokens 71 | } 72 | 73 | json_data = json.dumps(data) 74 | response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180) 75 | response_dict = response.json() 76 | content = response_dict['choices'][0]['message']['content'] 77 | print(colored(f"Integration Agent: {content}", 'blue')) 78 | # print("Integration Agent:", content) 79 | 80 | return content 81 | 82 | def check_response(self, response, query): 83 | 84 | tools = [ 85 | { 86 | "type": "function", 87 | "function": { 88 | "name": "respose_checker", 89 | "description": "Checck if the response meets the requirements", 90 | "parameters": { 91 | "type": "object", 92 | "properties": { 93 | "meets_requirements": { 94 | "type": "string", 95 | "description": """Check if the response meets the requirements of the query based on the following: 96 | 1. The response should be relevant to the query. 97 | 2. The response should be coherent and well-structured with citations. 98 | 3. The response should be comprehensive and address the query in its entirety. 99 | Return 'yes' if the response meets the requirements and 'no' otherwise. 100 | """ 101 | }, 102 | }, 103 | "required": ["meets_requirements"] 104 | } 105 | } 106 | } 107 | ] 108 | 109 | data = { 110 | "model": self.model, 111 | "messages": [{"role": "user", "content": f"Response: {response} \n Query: {query}"},], 112 | "temperature": 0, 113 | "tools": tools, 114 | "tool_choice": "required" 115 | } 116 | 117 | json_data = json.dumps(data) 118 | response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180) 119 | response_dict = response.json() 120 | 121 | tool_calls = response_dict['choices'][0]['message']['tool_calls'][0] 122 | arguments_json = json.loads(tool_calls['function']['arguments']) 123 | response = arguments_json['meets_requirements'] 124 | 125 | if response == 'yes': 126 | return True 127 | else: 128 | return False 129 | 130 | 131 | def execute(self, iterations=5): 132 | query = input("Enter your query: ") 133 | tool = self.tool(model=self.model, verbose=self.verbose) 134 | meets_requirements = False 135 | plan = None 136 | outputs = None 137 | response = None 138 | iterations = 0 139 | 140 | while not meets_requirements and iterations < 5: 141 | iterations += 1 142 | plan = self.run_planning_agent(query, plan=plan, outputs=outputs, feedback=response) 143 | outputs = tool.use_tool(plan=plan, query=query) 144 | response = self.run_integration_agent(query, plan, outputs) 145 | meets_requirements = self.check_response(response, query) 146 | 147 | print(colored(f"Final Response: {response}", 'cyan')) 148 | 149 | 150 | if __name__ == '__main__': 151 | agent = Agent(model="gpt-3.5-turbo", 152 | tool=WebSearcher, 153 | planning_agent_prompt=planning_agent_prompt, 154 | integration_agent_prompt=integration_agent_prompt, 155 | verbose=True 156 | ) 157 | agent.execute() 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /search.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import json 4 | import yaml 5 | from termcolor import colored 6 | import os 7 | 8 | def load_config(file_path): 9 | with open(file_path, 'r') as file: 10 | config = yaml.safe_load(file) 11 | for key, value in config.items(): 12 | os.environ[key] = value 13 | 14 | class WebSearcher: 15 | 16 | """ 17 | A class that encapsulates methods for generating search queries, fetching search results, 18 | determining the best search pages, and scraping web content using the OpenAI API and other web services. 19 | 20 | This class is designed to interact with the OpenAI API to leverage its capabilities for generating 21 | search queries based on a provided plan and query. It integrates with the serper.dev API to fetch 22 | search results and then uses a combination of these results and additional OpenAI API calls to determine 23 | the most relevant web pages. Finally, it scrapes the content of the determined best page. 24 | 25 | Methods: 26 | __init__(self): Initializes the WebSearcher instance, loads API keys from a configuration file, 27 | and sets up headers for HTTP requests. 28 | generate_searches(self, plan: str, query: str) -> str: Generates search queries based on provided plan and query. 29 | get_search_page(self, search_results: str, plan: str, query: str) -> str: Determines the best search page URLs 30 | based on the results and context. 31 | format_results(self, organic_results: list) -> str: Formats the search results to a more readable format. 32 | fetch_search_results(self, search_queries: str) -> str: Fetches detailed search results from serper.dev API. 33 | scrape_website_content(self, website_url: str) -> dict: Scrapes and returns the content of the given website URL. 34 | use_tool(self, verbose: bool = False, plan: str = None, query: str = None) -> dict: Orchestrates the use of other methods 35 | to perform a complete search-and-retrieve 36 | operation based on the specified plan and query. 37 | 38 | Usage Example: 39 | searcher = WebSearcher() 40 | results_dict = searcher.use_tool(verbose=True, plan="Research new AI techniques", query="Latest trends in AI") 41 | results_dict will contain the URL as a key and the scraped content from that URL as the value. 42 | """ 43 | def __init__(self, model, verbose=False): 44 | load_config('config.yaml') 45 | self.api_key = os.getenv("OPENAI_API_KEY") 46 | self.url = 'https://api.openai.com/v1/chat/completions' 47 | self.headers = { 48 | 'Content-Type': 'application/json', 49 | 'Authorization': f'Bearer {self.api_key}' 50 | } 51 | self.model = model 52 | self.verbose = verbose 53 | 54 | def generate_searches(self, plan, query): 55 | 56 | tools = [ 57 | { 58 | "type": "function", 59 | "function": { 60 | "name": "fetch_search_results", 61 | "description": "Fetch search results based on the search query", 62 | "parameters": { 63 | "type": "object", 64 | "properties": { 65 | "search_engine_queries": { 66 | "type": "string", 67 | "description": "The most suitable search query for the plan" 68 | }, 69 | }, 70 | "required": ["search_engine_queries"] 71 | } 72 | } 73 | } 74 | ] 75 | 76 | data = { 77 | "model": self.model, 78 | "messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan}"}], 79 | "temperature": 0, 80 | "tools": tools, 81 | "tool_choice": "required" 82 | } 83 | 84 | json_data = json.dumps(data) 85 | response = requests.post(self.url, headers=self.headers, data=json_data) 86 | response_dict = response.json() 87 | 88 | tool_calls = response_dict['choices'][0]['message']['tool_calls'][0] 89 | arguments_json = json.loads(tool_calls['function']['arguments']) 90 | search_queries = arguments_json['search_engine_queries'] 91 | print(colored(f"Search Engine Queries:, {search_queries}", 'yellow')) 92 | 93 | return search_queries 94 | 95 | def get_search_page(self, search_results, plan, query): 96 | 97 | tools = [ 98 | { 99 | "type": "function", 100 | "function": { 101 | "name": "decide_best_pages", 102 | "description": "Decide the best pages to visit based on the search results", 103 | "parameters": { 104 | "type": "object", 105 | "properties": { 106 | "best_search_page": { 107 | "type": "string", 108 | "description": "The URL link of best search page based on the Search Results, Plan and Query. Do not select pdf files." 109 | }, 110 | }, 111 | "required": ["best_search_page"] 112 | } 113 | } 114 | } 115 | ] 116 | 117 | data = { 118 | "model": self.model, 119 | "messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan} \n\n Search Results:{search_results}"}], 120 | "temperature": 0, 121 | "tools": tools, 122 | "tool_choice": "required" 123 | } 124 | 125 | json_data = json.dumps(data) 126 | response = requests.post(self.url, headers=self.headers, data=json_data) 127 | response_dict = response.json() 128 | 129 | tool_calls = response_dict['choices'][0]['message']['tool_calls'][0] 130 | arguments_json = json.loads(tool_calls['function']['arguments']) 131 | search_queries = arguments_json['best_search_page'] 132 | print(colored(f"Best Pages:, {search_queries}", 'yellow')) 133 | 134 | return search_queries 135 | 136 | def format_results(self, organic_results): 137 | 138 | result_strings = [] 139 | for result in organic_results: 140 | title = result.get('title', 'No Title') 141 | link = result.get('link', '#') 142 | snippet = result.get('snippet', 'No snippet available.') 143 | result_strings.append(f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n---") 144 | 145 | return '\n'.join(result_strings) 146 | 147 | def fetch_search_results(self, search_queries: str): 148 | 149 | search_url = "https://google.serper.dev/search" 150 | headers = { 151 | 'Content-Type': 'application/json', 152 | 'X-API-KEY': os.environ['SERPER_DEV_API_KEY'] # Ensure this environment variable is set with your API key 153 | } 154 | payload = json.dumps({"q": search_queries}) 155 | 156 | # Attempt to make the HTTP POST request 157 | try: 158 | response = requests.post(search_url, headers=headers, data=payload) 159 | response.raise_for_status() # Raise an HTTPError for bad responses (4XX, 5XX) 160 | results = response.json() 161 | 162 | # Check if 'organic' results are in the response 163 | if 'organic' in results: 164 | return self.format_results(results['organic']) 165 | else: 166 | return "No organic results found." 167 | 168 | except requests.exceptions.HTTPError as http_err: 169 | return f"HTTP error occurred: {http_err}" 170 | except requests.exceptions.RequestException as req_err: 171 | return f"Request exception occurred: {req_err}" 172 | except KeyError as key_err: 173 | return f"Key error in handling response: {key_err}" 174 | 175 | def scrape_website_content(self, website_url): 176 | headers = { 177 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', 178 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 179 | 'Accept-Language': 'en-US,en;q=0.9', 180 | 'Referer': 'https://www.google.com/', 181 | 'Connection': 'keep-alive', 182 | 'Upgrade-Insecure-Requests': '1', 183 | 'Accept-Encoding': 'gzip, deflate, br' 184 | } 185 | 186 | try: 187 | # Making a GET request to the website 188 | response = requests.get(website_url, headers=headers, timeout=15) 189 | response.raise_for_status() # This will raise an exception for HTTP errors 190 | 191 | # Parsing the page content using BeautifulSoup 192 | soup = BeautifulSoup(response.content, 'html.parser') 193 | text = soup.get_text(separator='\n') 194 | # Cleaning up the text: removing excess whitespace 195 | clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()]) 196 | 197 | return {website_url: clean_text} 198 | 199 | except requests.exceptions.RequestException as e: 200 | print(f"Error retrieving content from {website_url}: {e}") 201 | return {website_url: f"Failed to retrieve content due to an error: {e}"} 202 | 203 | def use_tool(self, plan=None, query=None): 204 | 205 | search = WebSearcher(self.model) 206 | # plan = "Find the best way to cook a turkey" 207 | # query = "How long should I cook a turkey for?" 208 | 209 | search_queries = search.generate_searches(plan, query) 210 | search_results = search.fetch_search_results(search_queries) 211 | best_page = search.get_search_page(search_results, plan, query) 212 | results_dict = search.scrape_website_content(best_page) 213 | 214 | if self.verbose: 215 | print(colored(f"SEARCH RESULTS {search_results}", 'yellow')) 216 | print(colored(f"RESULTS DICT {results_dict}", 'yellow')) 217 | 218 | return results_dict 219 | 220 | 221 | if __name__ == '__main__': 222 | 223 | search = WebSearcher() 224 | search.use_tool() 225 | --------------------------------------------------------------------------------