├── requirements.txt
├── config.yaml
├── architecture
    ├── architecture.drawio.png
    └── architecture.drawio
├── README.md
├── prompts.py
├── agent.py
└── search.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | termcolor==2.4.0
2 | PyYAML==6.0.1
3 | requests==2.31.0
4 | beautifulsoup4==4.12.3


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY: "ENTER YOUR OPENAI KEY"
2 | SERPER_DEV_API_KEY: "ENTER YOUR SERPER DEV KEY"
3 | 


--------------------------------------------------------------------------------
/architecture/architecture.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/john-adeojo/custom_agent_tutorial/HEAD/architecture/architecture.drawio.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Setting Up and Running Custom Agent Script
 3 | 
 4 | ### Prerequisites
 5 | 1. **Install Anaconda:**  
 6 |    Download Anaconda from [https://www.anaconda.com/](https://www.anaconda.com/).
 7 | 
 8 | 2. **Create a Virtual Environment:**
 9 |    ```bash
10 |    conda create -n agent_env python=3.10 pip
11 |    ```
12 |    
13 | 3. **Activate the Virtual Environment:**
14 |    ```bash
15 |    conda activate agent_env
16 |    ```
17 | 
18 | ### Clone and Navigate to the Repository
19 | 1. **Clone the Repo:**
20 |    ```bash
21 |    git clone https://github.com/john-adeojo/custom_agent_tutorial.git
22 |    ```
23 | 
24 | 2. **Navigate to the Repo:**
25 |    ```bash
26 |    cd /path/to/your-repo/custom_agent_tutorial
27 |    ```
28 | 
29 | 3. **Install Requirements:**
30 |    ```bash
31 |    pip install -r requirements.txt
32 |    ```
33 | 
34 | ### Configure API Keys
35 | 1. **Open the `config.yaml`:**
36 |   Update your keys, see next step for links to get your own API keys.
37 | ```bash
38 | nano config.yaml
39 | ```
40 | 
41 | 3. **Enter API Keys:**
42 |    - **Serper API Key:** Get it from [https://serper.dev/](https://serper.dev/)
43 |    - **OpenAI API Key:** Get it from [https://openai.com/](https://openai.com/)
44 | 
45 | ### Run Your Query
46 | ```bash
47 | python agent.py run
48 | ```
49 | Then run your query
50 | 


--------------------------------------------------------------------------------
/prompts.py:
--------------------------------------------------------------------------------
 1 | planning_agent_prompt = ("You are an AI planning agent working with an integration agent. You have access to specialised tools. When addressing queries, you should follow this two-step methodology:\n"
 2 |                 "Step 1: Thought. Begin by contemplating the problem thoroughly and devising a plan of action."
 3 |                 "Step 2: Action. Clearly state the inputs you will use with any tools necessary to address the problem. This preparation is essential for executing your plan effectively.\n"
 4 |                 "You must ensure your plan takes into account any feedback (if available)\n\n."
 5 |                 "here are the outputs from the tools you have used: {outputs}\n\n"
 6 |                 "Here is your previous plan: {plan}\n\n"
 7 |                 "Here's the feedback:{feedback} \n\n"
 8 |                 "Here are the specifications of your tools:\n"
 9 |                 "{tool_specs}\n"
10 |                 "Continue this process until you have gathered enough information to comprehensively answer the query."
11 |                 )
12 | 
13 | integration_agent_prompt = ("You are an AI Integration Agent working with a planning agent. Your job is to synthesise the outputs from the planning agent into a coherent response.\n"
14 |                      "You must do this by considering the plan, the outputs from tools, and the original query.\n"
15 |                      "If any of the information is not sufficient, you should provide feedback to the planning agent to refine the plan.\n"
16 |                      "If the information is sufficient, you should provide a comprehenisve response to the query with appropriate citations. \n"
17 |                      "Your response to the query must be based on the outputs from the tools\n"
18 |                      "The output of the tool is a dictionary where the \n"
19 |                      "key is the URL source of the info and the value is the content of the URL \n"
20 |                      "You should use the source in citation \n"
21 |                      "Here are the outputs from the tool: {outputs}\n\n"
22 |                      "Here is the plan from the planning agent: {plan}\n\n"
23 |                      )


--------------------------------------------------------------------------------
/architecture/architecture.drawio:
--------------------------------------------------------------------------------
 1 | <mxfile host="app.diagrams.net" modified="2024-05-14T11:14:08.035Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" etag="VuY0quERgOPRJ4QZz9Re" version="24.3.1" type="device">
 2 |   <diagram name="Page-1" id="rU8JHjLior_zJRwLBU05">
 3 |     <mxGraphModel dx="2072" dy="1084" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="1654" math="0" shadow="0">
 4 |       <root>
 5 |         <mxCell id="0" />
 6 |         <mxCell id="1" parent="0" />
 7 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-2" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Overall Workflow&lt;/font&gt;" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
 8 |           <mxGeometry x="60" y="130" width="530" height="340" as="geometry" />
 9 |         </mxCell>
10 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-4" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Agents&amp;nbsp;&lt;/font&gt;&lt;div&gt;&lt;font style=&quot;font-size: 24px;&quot;&gt;powered by LLMs&lt;/font&gt;&lt;/div&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
11 |           <mxGeometry x="290" y="87.5" width="165" height="165" as="geometry" />
12 |         </mxCell>
13 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-23" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Web Search Tool&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
14 |           <mxGeometry x="63.75" y="87.5" width="165" height="165" as="geometry" />
15 |         </mxCell>
16 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-5" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Agents&amp;nbsp;&lt;/font&gt;" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
17 |           <mxGeometry x="60" y="590" width="960" height="500" as="geometry" />
18 |         </mxCell>
19 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-6" target="Qcd6xKoqnelJGYbhtqcp-3">
20 |           <mxGeometry relative="1" as="geometry">
21 |             <mxPoint x="380" y="336" as="targetPoint" />
22 |           </mxGeometry>
23 |         </mxCell>
24 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-6" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Planning Agent&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;align=center;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
25 |           <mxGeometry x="30" y="220" width="232.5" height="232.5" as="geometry" />
26 |         </mxCell>
27 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-7" target="Qcd6xKoqnelJGYbhtqcp-6">
28 |           <mxGeometry relative="1" as="geometry">
29 |             <Array as="points">
30 |               <mxPoint x="763" y="130" />
31 |               <mxPoint x="146" y="130" />
32 |             </Array>
33 |           </mxGeometry>
34 |         </mxCell>
35 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-11" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Feedback&lt;/font&gt;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="Qcd6xKoqnelJGYbhtqcp-10">
36 |           <mxGeometry x="0.0146" y="-3" relative="1" as="geometry">
37 |             <mxPoint x="-1" as="offset" />
38 |           </mxGeometry>
39 |         </mxCell>
40 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-7" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Integration Agent&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
41 |           <mxGeometry x="640" y="213.75" width="245" height="245" as="geometry" />
42 |         </mxCell>
43 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-22" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-3" target="Qcd6xKoqnelJGYbhtqcp-7">
44 |           <mxGeometry relative="1" as="geometry" />
45 |         </mxCell>
46 |         <mxCell id="Qcd6xKoqnelJGYbhtqcp-3" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Web Search Tool&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
47 |           <mxGeometry x="370" y="253.75" width="165" height="165" as="geometry" />
48 |         </mxCell>
49 |       </root>
50 |     </mxGraphModel>
51 |   </diagram>
52 | </mxfile>
53 | 


--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
  1 | import os 
  2 | import yaml
  3 | import json
  4 | import requests
  5 | from termcolor import colored
  6 | from prompts import planning_agent_prompt, integration_agent_prompt
  7 | from search import WebSearcher
  8 | 
  9 | 
 10 | def load_config(file_path):
 11 |     with open(file_path, 'r') as file:
 12 |         config = yaml.safe_load(file)
 13 |         for key, value in config.items():
 14 |             os.environ[key] = value
 15 | 
 16 | class Agent:
 17 |     def __init__(self, model, tool, temperature=0, max_tokens=1000, planning_agent_prompt=None, integration_agent_prompt=None, verbose=False):
 18 |         load_config('config.yaml')
 19 |         self.api_key = os.getenv("OPENAI_API_KEY")
 20 |         self.url = 'https://api.openai.com/v1/chat/completions'
 21 |         self.headers = {
 22 |             'Content-Type': 'application/json',
 23 |             'Authorization': f'Bearer {self.api_key}'
 24 |         }
 25 |         self.temperature = temperature
 26 |         self.max_tokens = max_tokens
 27 |         self.tool = tool
 28 |         self.tool_specs = tool.__doc__
 29 |         self.planning_agent_prompt = planning_agent_prompt
 30 |         self.integration_agent_prompt = integration_agent_prompt
 31 |         self.model = model
 32 |         self.verbose = verbose
 33 |     
 34 |     def run_planning_agent(self, query, plan=None, outputs=None, feedback=None):
 35 | 
 36 |         system_prompt = self.planning_agent_prompt.format(
 37 |             outputs=outputs,
 38 |             plan=plan,
 39 |             feedback=feedback,
 40 |             tool_specs=self.tool_specs
 41 |         )
 42 | 
 43 |         data = {
 44 |             "model": self.model,
 45 |             "messages": [{"role": "user", "content": query},
 46 |                          {"role": "system", "content": system_prompt}],
 47 |             "temperature": self.temperature,
 48 |             "max_tokens": self.max_tokens
 49 |         }
 50 | 
 51 |         json_data = json.dumps(data)
 52 |         response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
 53 |         response_dict = response.json()
 54 |         content = response_dict['choices'][0]['message']['content']
 55 |         print(colored(f"Planning Agent: {content}", 'green'))
 56 | 
 57 |         return content
 58 |     
 59 |     def run_integration_agent(self, query, plan, outputs):
 60 |         system_prompt = self.integration_agent_prompt.format(
 61 |             outputs=outputs,
 62 |             plan=plan
 63 |         )
 64 | 
 65 |         data = {
 66 |             "model": self.model,
 67 |             "messages": [{"role": "user", "content": query},
 68 |                          {"role": "system", "content": system_prompt}],
 69 |             "temperature": self.temperature,
 70 |             "max_tokens": self.max_tokens
 71 |         }
 72 | 
 73 |         json_data = json.dumps(data)
 74 |         response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
 75 |         response_dict = response.json()
 76 |         content = response_dict['choices'][0]['message']['content']
 77 |         print(colored(f"Integration Agent: {content}", 'blue'))
 78 |         # print("Integration Agent:", content)
 79 | 
 80 |         return content
 81 |     
 82 |     def check_response(self, response, query):
 83 |         
 84 |         tools = [
 85 |             {
 86 |                 "type": "function",
 87 |                 "function": {
 88 |                     "name": "respose_checker",
 89 |                     "description": "Checck if the response meets the requirements",
 90 |                     "parameters": {
 91 |                         "type": "object",
 92 |                         "properties": {
 93 |                             "meets_requirements": {
 94 |                                 "type": "string",
 95 |                                 "description": """Check if the response meets the requirements of the query based on the following:
 96 |                                 1. The response should be relevant to the query.
 97 |                                 2. The response should be coherent and well-structured with citations.
 98 |                                 3. The response should be comprehensive and address the query in its entirety.
 99 |                                 Return 'yes' if the response meets the requirements and 'no' otherwise.
100 |                                 """
101 |                             },
102 |                         },
103 |                         "required": ["meets_requirements"]
104 |                     }
105 |                 }
106 |             }
107 |         ]
108 | 
109 |         data = {
110 |             "model": self.model,
111 |             "messages": [{"role": "user", "content": f"Response: {response} \n Query: {query}"},],
112 |             "temperature": 0,
113 |             "tools": tools,
114 |             "tool_choice": "required"
115 |         }
116 | 
117 |         json_data = json.dumps(data)
118 |         response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
119 |         response_dict = response.json()
120 | 
121 |         tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
122 |         arguments_json = json.loads(tool_calls['function']['arguments'])
123 |         response = arguments_json['meets_requirements']
124 | 
125 |         if response == 'yes':
126 |             return True
127 |         else:
128 |             return False
129 | 
130 |          
131 |     def execute(self, iterations=5):
132 |         query = input("Enter your query: ")
133 |         tool =  self.tool(model=self.model, verbose=self.verbose)
134 |         meets_requirements = False
135 |         plan = None
136 |         outputs = None
137 |         response = None
138 |         iterations = 0
139 | 
140 |         while not meets_requirements and iterations < 5:
141 |             iterations += 1  
142 |             plan = self.run_planning_agent(query, plan=plan, outputs=outputs, feedback=response)
143 |             outputs = tool.use_tool(plan=plan, query=query)
144 |             response = self.run_integration_agent(query, plan, outputs)
145 |             meets_requirements = self.check_response(response, query)
146 | 
147 |         print(colored(f"Final Response: {response}", 'cyan'))
148 | 
149 |         
150 | if __name__ == '__main__':
151 |     agent = Agent(model="gpt-3.5-turbo",
152 |                   tool=WebSearcher, 
153 |                   planning_agent_prompt=planning_agent_prompt, 
154 |                   integration_agent_prompt=integration_agent_prompt,
155 |                   verbose=True
156 |                   )
157 |     agent.execute()
158 | 
159 | 
160 |     
161 | 
162 | 


--------------------------------------------------------------------------------
/search.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from bs4 import BeautifulSoup
  3 | import json
  4 | import yaml
  5 | from termcolor import colored
  6 | import os
  7 | 
  8 | def load_config(file_path):
  9 |     with open(file_path, 'r') as file:
 10 |         config = yaml.safe_load(file)
 11 |         for key, value in config.items():
 12 |             os.environ[key] = value
 13 | 
 14 | class WebSearcher:
 15 | 
 16 |     """
 17 |     A class that encapsulates methods for generating search queries, fetching search results,
 18 |     determining the best search pages, and scraping web content using the OpenAI API and other web services.
 19 | 
 20 |     This class is designed to interact with the OpenAI API to leverage its capabilities for generating
 21 |     search queries based on a provided plan and query. It integrates with the serper.dev API to fetch
 22 |     search results and then uses a combination of these results and additional OpenAI API calls to determine
 23 |     the most relevant web pages. Finally, it scrapes the content of the determined best page.
 24 | 
 25 |     Methods:
 26 |         __init__(self): Initializes the WebSearcher instance, loads API keys from a configuration file,
 27 |                        and sets up headers for HTTP requests.
 28 |         generate_searches(self, plan: str, query: str) -> str: Generates search queries based on provided plan and query.
 29 |         get_search_page(self, search_results: str, plan: str, query: str) -> str: Determines the best search page URLs
 30 |                                                                                based on the results and context.
 31 |         format_results(self, organic_results: list) -> str: Formats the search results to a more readable format.
 32 |         fetch_search_results(self, search_queries: str) -> str: Fetches detailed search results from serper.dev API.
 33 |         scrape_website_content(self, website_url: str) -> dict: Scrapes and returns the content of the given website URL.
 34 |         use_tool(self, verbose: bool = False, plan: str = None, query: str = None) -> dict: Orchestrates the use of other methods
 35 |                                                                                           to perform a complete search-and-retrieve
 36 |                                                                                           operation based on the specified plan and query.
 37 | 
 38 |     Usage Example:
 39 |         searcher = WebSearcher()
 40 |         results_dict = searcher.use_tool(verbose=True, plan="Research new AI techniques", query="Latest trends in AI")
 41 |         results_dict will contain the URL as a key and the scraped content from that URL as the value.
 42 |     """
 43 |     def __init__(self, model, verbose=False):
 44 |         load_config('config.yaml')
 45 |         self.api_key = os.getenv("OPENAI_API_KEY")
 46 |         self.url = 'https://api.openai.com/v1/chat/completions'
 47 |         self.headers = {
 48 |             'Content-Type': 'application/json',
 49 |             'Authorization': f'Bearer {self.api_key}'
 50 |         }
 51 |         self.model = model
 52 |         self.verbose = verbose
 53 | 
 54 |     def generate_searches(self, plan, query):
 55 | 
 56 |         tools = [
 57 |             {
 58 |                 "type": "function",
 59 |                 "function": {
 60 |                     "name": "fetch_search_results",
 61 |                     "description": "Fetch search results based on the search query",
 62 |                     "parameters": {
 63 |                         "type": "object",
 64 |                         "properties": {
 65 |                             "search_engine_queries": {
 66 |                                 "type": "string",
 67 |                                 "description": "The most suitable search query for the plan"
 68 |                             },
 69 |                         },
 70 |                         "required": ["search_engine_queries"]
 71 |                     }
 72 |                 }
 73 |             }
 74 |         ]
 75 | 
 76 |         data = {
 77 |             "model": self.model,
 78 |             "messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan}"}],
 79 |             "temperature": 0,
 80 |             "tools": tools,
 81 |             "tool_choice": "required"
 82 |         }
 83 | 
 84 |         json_data = json.dumps(data)
 85 |         response = requests.post(self.url, headers=self.headers, data=json_data)
 86 |         response_dict = response.json()
 87 | 
 88 |         tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
 89 |         arguments_json = json.loads(tool_calls['function']['arguments'])
 90 |         search_queries = arguments_json['search_engine_queries']
 91 |         print(colored(f"Search Engine Queries:, {search_queries}", 'yellow'))
 92 | 
 93 |         return search_queries
 94 |     
 95 |     def get_search_page(self, search_results, plan, query):
 96 | 
 97 |         tools = [
 98 |             {
 99 |                 "type": "function",
100 |                 "function": {
101 |                     "name": "decide_best_pages",
102 |                     "description": "Decide the best pages to visit based on the search results",
103 |                     "parameters": {
104 |                         "type": "object",
105 |                         "properties": {
106 |                             "best_search_page": {
107 |                                 "type": "string",
108 |                                 "description": "The URL link of best search page based on the Search Results, Plan and Query. Do not select pdf files."
109 |                             },
110 |                         },
111 |                         "required": ["best_search_page"]
112 |                     }
113 |                 }
114 |             }
115 |         ]
116 | 
117 |         data = {
118 |             "model": self.model,
119 |             "messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan} \n\n Search Results:{search_results}"}],
120 |             "temperature": 0,
121 |             "tools": tools,
122 |             "tool_choice": "required"
123 |         }
124 | 
125 |         json_data = json.dumps(data)
126 |         response = requests.post(self.url, headers=self.headers, data=json_data)
127 |         response_dict = response.json()
128 | 
129 |         tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
130 |         arguments_json = json.loads(tool_calls['function']['arguments'])
131 |         search_queries = arguments_json['best_search_page']
132 |         print(colored(f"Best Pages:, {search_queries}", 'yellow'))
133 | 
134 |         return search_queries
135 |     
136 |     def format_results(self, organic_results):
137 | 
138 |         result_strings = []
139 |         for result in organic_results:
140 |             title = result.get('title', 'No Title')
141 |             link = result.get('link', '#')
142 |             snippet = result.get('snippet', 'No snippet available.')
143 |             result_strings.append(f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n---")
144 |         
145 |         return '\n'.join(result_strings)
146 |     
147 |     def fetch_search_results(self, search_queries: str):
148 | 
149 |         search_url = "https://google.serper.dev/search"
150 |         headers = {
151 |             'Content-Type': 'application/json',
152 |             'X-API-KEY': os.environ['SERPER_DEV_API_KEY']  # Ensure this environment variable is set with your API key
153 |         }
154 |         payload = json.dumps({"q": search_queries})
155 |         
156 |         # Attempt to make the HTTP POST request
157 |         try:
158 |             response = requests.post(search_url, headers=headers, data=payload)
159 |             response.raise_for_status()  # Raise an HTTPError for bad responses (4XX, 5XX)
160 |             results = response.json()
161 |             
162 |             # Check if 'organic' results are in the response
163 |             if 'organic' in results:
164 |                 return self.format_results(results['organic'])
165 |             else:
166 |                 return "No organic results found."
167 | 
168 |         except requests.exceptions.HTTPError as http_err:
169 |             return f"HTTP error occurred: {http_err}"
170 |         except requests.exceptions.RequestException as req_err:
171 |             return f"Request exception occurred: {req_err}"
172 |         except KeyError as key_err:
173 |             return f"Key error in handling response: {key_err}"
174 |         
175 |     def scrape_website_content(self, website_url):
176 |         headers = {
177 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
178 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
179 |             'Accept-Language': 'en-US,en;q=0.9',
180 |             'Referer': 'https://www.google.com/',
181 |             'Connection': 'keep-alive',
182 |             'Upgrade-Insecure-Requests': '1',
183 |             'Accept-Encoding': 'gzip, deflate, br'
184 |         }
185 |         
186 |         try:
187 |             # Making a GET request to the website
188 |             response = requests.get(website_url, headers=headers, timeout=15)
189 |             response.raise_for_status()  # This will raise an exception for HTTP errors
190 | 
191 |             # Parsing the page content using BeautifulSoup
192 |             soup = BeautifulSoup(response.content, 'html.parser')
193 |             text = soup.get_text(separator='\n')
194 |             # Cleaning up the text: removing excess whitespace
195 |             clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
196 | 
197 |             return {website_url: clean_text}
198 | 
199 |         except requests.exceptions.RequestException as e:
200 |             print(f"Error retrieving content from {website_url}: {e}")
201 |             return {website_url: f"Failed to retrieve content due to an error: {e}"}
202 |     
203 |     def use_tool(self, plan=None, query=None):
204 | 
205 |         search = WebSearcher(self.model)
206 |         # plan = "Find the best way to cook a turkey"
207 |         # query = "How long should I cook a turkey for?"
208 | 
209 |         search_queries = search.generate_searches(plan, query)
210 |         search_results = search.fetch_search_results(search_queries)
211 |         best_page = search.get_search_page(search_results, plan, query)
212 |         results_dict = search.scrape_website_content(best_page)
213 | 
214 |         if self.verbose:
215 |             print(colored(f"SEARCH RESULTS {search_results}", 'yellow'))
216 |             print(colored(f"RESULTS DICT {results_dict}", 'yellow'))
217 | 
218 |         return results_dict
219 |         
220 | 
221 | if __name__ == '__main__':
222 | 
223 |     search = WebSearcher()
224 |     search.use_tool()
225 | 


--------------------------------------------------------------------------------