├── .DS_Store
├── .env_example
├── .gitignore
├── LICENSE
├── QA.py
├── README.md
├── __pycache__
    ├── QA.cpython-311.pyc
    ├── gpt3_functions.cpython-311.pyc
    └── retry_decorator.cpython-311.pyc
├── geckodriver.log
├── humanWeb.py
├── interventionTest.py
├── requirements.txt
└── retry_decorator.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nschlaepfer/humanWeb/78fc52e7abe9bf12dc3138206777435e396366c1/.DS_Store


--------------------------------------------------------------------------------
/.env_example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | QA/
 3 | Searches/
 4 | Reports/
 5 | debug_log.txt
 6 | dev.md
 7 | multithreaded.py
 8 | Resarcher.py
 9 | gpt3_functions.py
10 | basicWeb.py
11 | AI.py
12 | Modular/
13 | DevFolder/
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Nicolas Wiliam Schlaepfer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/QA.py:
--------------------------------------------------------------------------------
 1 | # QA.py
 2 | import openai
 3 | import os
 4 | import time
 5 | 
 6 | def create_qa(query, summary_filename):
 7 |     print("Creating QA...")
 8 |     try:
 9 |         with open(summary_filename, "r") as sf:  # Open the summary file
10 |             summaries = sf.read()
11 |             messages = [{'role': 'system', 'content': summaries}, {'role': 'user', 'content': f"How well does this report answer the query '{query}' on a scale of 1-10? If the rating is less than 10, why?"}]
12 |             response = openai.ChatCompletion.create(
13 |                 model="gpt-3.5-turbo-16k",
14 |                 messages=messages
15 |             )
16 |             gpt_qa = response.choices[0].message['content'].strip()
17 |             print(f"GPT-3 QA: {gpt_qa}")
18 |             os.makedirs("QA", exist_ok=True)  # Create the "QA" directory if it doesn't exist
19 |             qa_filename = os.path.join("QA", f"QA_{query}_{time.time()}.txt")  # Store the QA filename
20 |             with open(qa_filename, "w") as qf:  # Open the QA file
21 |                 qf.write(f"GPT-3 QA:\n{gpt_qa}\n")  # Write the GPT-3 QA to the QA file
22 |     except FileNotFoundError:
23 |         print(f"Could not find file: {summary_filename}")
24 |         return None
25 |     return gpt_qa  # Return the query generated by GPT-3
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # humanWeb: Your Personal AI-Powered Research Assistant 🤖
  3 | 
  4 | [![GitHub license](https://img.shields.io/github/license/nschlaepfer/humanWeb)](https://github.com/nschlaepfer/humanWeb/blob/main/LICENSE)
  5 | [![GitHub stars](https://img.shields.io/github/stars/nschlaepfer/humanWeb)](https://github.com/nschlaepfer/humanWeb/stargazers)
  6 | [![GitHub forks](https://img.shields.io/github/forks/nschlaepfer/humanWeb)](https://github.com/nschlaepfer/humanWeb/network)
  7 | [![Twitter Follow](https://img.shields.io/twitter/follow/nos_ult?style=social)](https://twitter.com/nos_ult)
  8 | 
  9 | Welcome to humanWeb, a Python-based research assistant that harnesses the power of OpenAI's GPT-3.5-Turbo-16K model and Selenium WebDriver. This tool autonomously conducts web searches, extracts and summarizes relevant data, generates comprehensive reports, and formulates additional queries based on your initial input.
 10 | 
 11 | <div align="center">
 12 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/a2a19a74-eb91-4512-ae02-ef1005866dfc" alt="logogif (1)" width="400"/>
 13 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/23b33a63-302c-4060-8232-f25f90978b75" alt="Screenshot 2023-07-10 at 2 06 03 PM" width="400"/>
 14 | </div>
 15 | 
 16 | ## Table of Contents 📑
 17 | 
 18 | - [Features](#features-✨)
 19 | - [Requirements](#requirements-📋)
 20 | - [Installation](#installation-💻)
 21 | - [Usage](#usage-🚀)
 22 |   - [Websmart mode](#websamart-mode)
 23 | - [About](#about-🙋‍♂️)
 24 | - [Future Developments](#future-developments-🚧)
 25 | 
 26 | ## Features ✨
 27 | 
 28 | <div align="center">
 29 |   <img src="humanWeb-progress.png" alt="humanWeb progress bar" width="500"/>
 30 | </div>
 31 | 
 32 | humanWeb is equipped with the following features:
 33 | 
 34 | - **Web Search & Information Extraction**: humanWeb autonomously performs web searches based on your queries using Selenium WebDriver. It then extracts and saves the search results for further analysis.
 35 | 
 36 | - **Data Summarization with GPT-3.5-Turbo-16K**: humanWeb leverages the GPT-3.5-Turbo-16K model to analyze the extracted web page content and pinpoint unique, relevant information. This information is then summarized and stored for future use.
 37 | 
 38 | - **Report Generation**: humanWeb generates detailed reports based on the summarized data using GPT-3.5-Turbo-16K. These reports provide synthesized insights on your initial query.
 39 | 
 40 | - **Additional Query Formulation**: To help you gather more information or complete a task, humanWeb formulates additional queries related to your initial one using the GPT-3.5-Turbo-16K model.
 41 | 
 42 | - **Debug Logging**: humanWeb maintains a debug log file that records the additional queries generated and any errors encountered during the process.
 43 | 
 44 | - **Data Storage**: All search results, summaries, and reports are stored in separate files within the `Searches` and `Reports` directories for convenient access and future use.
 45 | 
 46 | ## Requirements 📋
 47 | 
 48 | To run humanWeb, you will need:
 49 | 
 50 | - Python 3.6 or higher
 51 | - An OpenAI API key
 52 | - Selenium Python package
 53 | - Dotenv Python package
 54 | - Google Chrome browser
 55 | - ChromeDriver
 56 | 
 57 | ## Installation 💻
 58 | 
 59 | Here's how you can install humanWeb:
 60 | 
 61 | 1. Clone or download this repository to your local machine.
 62 | 2. Create and activate a virtual environment.
 63 | 3. Install the required packages by running `pip install -r requirements.txt`.
 64 | 4. Obtain an OpenAI API key from https://beta.openai.com/ and save it as an environment variable in a `.env` file in the project directory. The file should look like this:
 65 | 
 66 | ```text
 67 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 68 | ```
 69 | 
 70 | 5. Download the ChromeDriver from https://chromedriver.chromium.org/downloads and save it in the project directory. Ensure that the driver version matches your Chrome browser version.
 71 | 
 72 | ### macOS
 73 | 
 74 | 1. Download the ChromeDriver from the [official website](https://chromedriver.chromium.org/downloads). Make sure to download the version that matches your installed version of Google Chrome.
 75 | 
 76 | 2. Once downloaded, unzip the file. You'll get a file named `chromedriver`.
 77 | 
 78 | 3. Open Terminal and navigate to the directory where `chromedriver` is located. For example, if it's in your Downloads folder, you'd use `cd ~/Downloads`.
 79 | 
 80 | 4. Move the `chromedriver` to `/usr/local/bin` directory, which is in the PATH by default. Use the following command: `mv chromedriver /usr/local/bin`.
 81 | 
 82 | 5. Now you should be able to use ChromeDriver from anywhere on your system.
 83 | 
 84 | ### Linux
 85 | 
 86 | 1. Download the ChromeDriver from the [official website](https://chromedriver.chromium.org/downloads). Make sure to download the version that matches your installed version of Google Chrome.
 87 | 
 88 | 2. Once downloaded, unzip the file. You'll get a file named `chromedriver`.
 89 | 
 90 | 3. Open Terminal and navigate to the directory where `chromedriver` is located. For example, if it's in your Downloads folder, you'd use `cd ~/Downloads`.
 91 | 
 92 | 4. Move the `chromedriver` to `/usr/local/bin` directory, which is in the PATH by default. Use the following command: `sudo mv chromedriver /usr/local/bin`.
 93 | 
 94 | 5. Now you should be able to use ChromeDriver from anywhere on your system.
 95 | 
 96 | ### Windows
 97 | 
 98 | 1. Download the ChromeDriver from the [official website](https://chromedriver.chromium.org/downloads). Make sure to download the version that matches your installed version of Google Chrome.
 99 | 
100 | 2. Once downloaded, unzip the file. You'll get a file named `chromedriver.exe`.
101 | 
102 | 3. You need to add the directory containing `chromedriver.exe` to your system's PATH. Here's how:
103 | 
104 |    - Right-click on 'My Computer' or 'This PC' and go to Properties.
105 | 
106 |    - Click on 'Advanced system settings'.
107 | 
108 |    - Click on 'Environment Variables...'.
109 | 
110 |    - In the 'System variables' section, find the 'Path' variable, select it and click on 'Edit
111 | 
112 |    - In the 'Variable value' field, add the path to the directory containing `chromedriver.exe`. Make sure to separate it from existing paths with a semicolon (`;`).
113 | 
114 | 4. Click OK on all windows to save the changes.
115 | 
116 | 5. Now you should be able to use ChromeDriver from anywhere on your system.
117 | 
118 | Please note that you need to have administrative privileges to perform some of these steps. Also, remember to replace the paths in the commands with the actual paths where your `chromedriver` file is located.
119 | 
120 | ## Usage 🚀
121 | 
122 | humanWeb operates in Websmart mode, which performs a comprehensive sequence of tasks,
123 | 
124 |  including web search, information extraction, data summarization, report generation, and additional query formulation. 
125 | 
126 | To run humanWeb, use the command `python humanWeb.py` and follow the prompts. You will be asked to input:
127 | 
128 | 1. The number of search results you want to process.
129 | 2. Your initial query.
130 | 3. The number of steps (or queries) you wish to perform.
131 | 
132 | humanWeb will then autonomously perform a web search, extract and summarize results, generate additional queries, and formulate a comprehensive report based on the collected data. If the generated report doesn't meet a certain quality threshold, humanWeb restarts the search process to ensure satisfactory output.
133 | 
134 | <div align="center">
135 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/fec213c6-1b6f-48e2-9ed4-5d3ad4bdb346" alt="Screenshot 2023-07-10 at 2 05 06 PM" width="800"/>
136 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/6f315981-e908-45b2-99f9-cdff6fe5855f" alt="Screenshot 2023-07-10 at 2 05 22 PM" width="800"/>
137 | </div>
138 | 
139 | ## About 🙋‍♂️
140 | 
141 | humanWeb is an open-source project developed by [Nico Schlaepfer](https://github.com/nschlaepfer). It was designed as a personal research assistant tool, utilizing [OpenAI](https://openai.com/) for natural language processing and [Selenium](https://www.selenium.dev/) for web automation. This project is not affiliated with or endorsed by any of these organizations.
142 | 
143 | Please note that humanWeb is a work in progress and may contain bugs or errors. If you find any issues or have any suggestions, feel free to [open an issue](https://github.com/nschlaepfer/humanWeb/issues) or [submit a pull request](https://github.com/nschlaepfer/humanWeb/pulls).
144 | 
145 | For updates and news about humanWeb, you can follow [Nico Schlaepfer](https://twitter.com/nos_ult) on Twitter.
146 | 
147 | <div align="center">
148 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/79f74d97-14b0-4739-a781-9978c803948c" alt="logo12" width="200"/>
149 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/b400e1e4-471e-42bc-8817-96af67edc548" alt="logo11" width="200"/>
150 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/9280f0e4-861c-4a57-828d-36cf59213394" alt="logo10" width="200"/>
151 |   <br/>
152 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/2921eee3-c88d-43e7-bf72-81cf55285e07" alt="logo8" width="200"/>
153 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/00cc6226-7660-4d97-b387-842aa7dfb849" alt="7" width="200"/>
154 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/17ff9d73-1797-4414-9df9-b66feeb162b5" alt="6" width="200"/>
155 |   <br/>
156 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/9a77b2ab-aa42-483a-82a4-9b6c34bf7d20" alt="_17439a69-fa52-41b2-b2e4-a52da7955c33" width="200"/>
157 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/4c9f7983-74c3-4b89-9a19-74d338cddb30" alt="_4713be32-4b6b-41df-a163-11aa87a52c74" width="200"/>
158 |   <img src="https://github.com/nschlaepfer/humanWeb/assets/44988633/c4d9928e-23b4-4eca-a829-bb4d6ddce6d2" alt="_8dad5291-e223-40bf-ab61-1bdc18eeceb7" width="200"/>
159 | </div>
160 | 
161 | ## Future Developments 🚧
162 | 
163 | We're continuously working on improving humanWeb. Here are some potential enhancements for future versions:
164 | 
165 | - Adding a user interface for intuitive interaction.
166 | - Enhancing the report generation process with more dynamic and user-tailored outputs.
167 | - Expanding the search functionality to include more sources of information.
168 | - Implementing more customization options to adjust functionality according to user needs.
169 | - Improving error handling and providing more detailed logs.
170 | - proved layout with all the images nicely arranged in the README file on your GitHub repository.
171 | 
172 | <div align="center">
173 |   <img src="humanWeb-progress.png" alt="humanWeb progress bar" width="500"/>
174 |   <p>Progress towards future developments</p>
175 | </div>
176 | 


--------------------------------------------------------------------------------
/__pycache__/QA.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nschlaepfer/humanWeb/78fc52e7abe9bf12dc3138206777435e396366c1/__pycache__/QA.cpython-311.pyc


--------------------------------------------------------------------------------
/__pycache__/gpt3_functions.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nschlaepfer/humanWeb/78fc52e7abe9bf12dc3138206777435e396366c1/__pycache__/gpt3_functions.cpython-311.pyc


--------------------------------------------------------------------------------
/__pycache__/retry_decorator.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nschlaepfer/humanWeb/78fc52e7abe9bf12dc3138206777435e396366c1/__pycache__/retry_decorator.cpython-311.pyc


--------------------------------------------------------------------------------
/geckodriver.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nschlaepfer/humanWeb/78fc52e7abe9bf12dc3138206777435e396366c1/geckodriver.log


--------------------------------------------------------------------------------
/humanWeb.py:
--------------------------------------------------------------------------------
   1 | # Making the summarize from the article using GPT-3 and chain of thought from smartGPT project.
   2 | 
   3 | import os
   4 | import json
   5 | import time
   6 | 
   7 | from dotenv import load_dotenv
   8 | import openai
   9 | from selenium import webdriver
  10 | from selenium.webdriver.common.keys import Keys
  11 | from selenium.webdriver.common.by import By
  12 | from selenium.webdriver.support.ui import WebDriverWait
  13 | from selenium.webdriver.support import expected_conditions as EC
  14 | from QA import create_qa
  15 | from retry_decorator import retry_on_service_unavailable
  16 | 
  17 | 
  18 | load_dotenv()  # take environment variables from .env.
  19 | 
  20 | # Get OpenAI API key from environment variable
  21 | openai.api_key = os.getenv('OPENAI_API_KEY')
  22 | 
  23 | options = webdriver.ChromeOptions()
  24 | options.add_argument('headless')
  25 | 
  26 | driver = webdriver.Chrome(options=options)
  27 | 
  28 | # Create a list to store all the summaries
  29 | all_summaries = []
  30 | 
  31 | # Create a debug log file
  32 | debug_log = open("debug_log.txt", "w")
  33 | 
  34 | def generate_additional_queries(query, num_queries):
  35 |     print("Generating additional queries with GPT-3...")
  36 |     system_prompt = f"Given this query, come up with {num_queries} more queries that will help get the most information or complete a task in order. Come up with the most consise and clear queries for google."
  37 |     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': query}]
  38 |     response = openai.ChatCompletion.create(
  39 |         model="gpt-3.5-turbo", #changed since it is smaller
  40 |         messages=messages
  41 |     )
  42 |     additional_queries = response.choices[0].message['content'].strip().split('\n')[:num_queries]
  43 |     # Write to debug log
  44 |     debug_log.write(f"Generated additional queries: {additional_queries}\n")
  45 |     return additional_queries
  46 | 
  47 | 
  48 | def perform_search(query):
  49 |     print(f"Performing search for '{query}'...")
  50 |     driver.get("https://www.google.com")  # Open Google in the browser
  51 |     try:
  52 |         search_box = WebDriverWait(driver, 10).until(
  53 |             EC.presence_of_element_located((By.NAME, "q"))
  54 |         )  # Wait for the search box element to be located
  55 |         search_box.send_keys(query)  # Enter the search query
  56 |         search_box.send_keys(Keys.RETURN)  # Press Enter to perform the search
  57 |         print("Waiting for search results to load...")
  58 |         WebDriverWait(driver, 10).until(
  59 |             EC.presence_of_element_located((By.CSS_SELECTOR, "div.g"))
  60 |         )  # Wait for the search results to load
  61 |     except Exception as e:
  62 |         print(f"Error performing search: {e}")
  63 |         import traceback
  64 |         traceback.print_exc()  # Add this line
  65 |         return None
  66 |     return driver.find_elements(By.CSS_SELECTOR, "div.g")
  67 | 
  68 | 
  69 | 
  70 | def extract_search_results(query, num_results, filename, summary_filename):
  71 |     print("Extracting search results...")
  72 |     search_results = perform_search(query)[:num_results]  # Limit to user-specified number of results
  73 |     if search_results is None:
  74 |         print("No search results found.")
  75 |         return
  76 |     os.makedirs("Searches", exist_ok=True)  # Create the "Searches" directory if it doesn't exist
  77 |     links = []
  78 |     with open(filename, "w") as f:  # Open the output file
  79 |         for i, result in enumerate(search_results, start=1):
  80 |             try:
  81 |                 title = result.find_element(By.CSS_SELECTOR, "h3").text  # Extract the title
  82 |                 link = result.find_element(By.CSS_SELECTOR, "a").get_attribute("href")  # Extract the URL
  83 |                 
  84 |                 # Skip processing if the link points to a YouTube video
  85 |                 if "youtube.com" in link:
  86 |                     print(f"Skipping Result {i}: {title} ({link}) - YouTube videos are not supported")
  87 |                     continue
  88 |                 
  89 |                 print(f"Result {i}: {title} ({link})")  # Process the search result as desired
  90 |                 f.write(f"Result {i}: {title} ({link})\n")  # Write the result to the file
  91 |                 links.append((title, link))  # Store the title and link together
  92 |             except Exception as e:
  93 |                 print(f"Error extracting result {i}: {e}")
  94 |         for title, link in links:
  95 |             print("Extracting page content...")
  96 |             driver.set_page_load_timeout(20)  # Set page load timeout
  97 |             try:
  98 |                 driver.get(link)  # Navigate to the page
  99 |                 page_content = driver.find_element(By.TAG_NAME, "body").text  # Extract the text from the body of the page
 100 |                 print(page_content)  # Print the page content
 101 |                 f.write(f"Page Content:\n{page_content}\n")  # Write the page content to the file
 102 |                 print("\n---\n")  # Print a separator
 103 |                 f.write("\n---\n")  # Write a separator to the file
 104 |                 if "Sorry, you have been blocked" not in page_content:  # Check if the page content indicates you've been blocked
 105 |                     gpt_response = process_results_with_gpt3(title, link, page_content, summary_filename)  # Process the page content with GPT-3
 106 |                     if gpt_response is not None:
 107 |                         print(f"GPT-3 Response: {gpt_response}")
 108 |             except Exception as e:
 109 |                 print(f"Error loading page {link}: {e}")
 110 | 
 111 | 
 112 | # Using the chain of thought from smartGPT project to process the results takes alot longer.
 113 | def process_results_with_gpt3(title, link, content, summary_filename):
 114 |     print("Processing results with GPT-3...")
 115 |     try:
 116 |         system_prompt = f"Given the following information, extract unique and interesting facts and analytical infromation. Do not just summarize it. This would will be used in a upcomiing report about {initial_query}. If the information is already known in the content, please do not repeat it. Look at the context given. MUST have sources at bottom."
 117 |         messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': content}]
 118 |         
 119 |         response = openai.ChatCompletion.create(
 120 |             model="gpt-3.5-turbo-16k", 
 121 |             messages=messages
 122 |         )
 123 |         time.sleep(3)
 124 |         gpt_response = response.choices[0].message['content'].strip()
 125 | 
 126 |         # Use the GPT-3 response as the final summary
 127 |         summary = f"\n## {title}\n\nSource: [{link}]({link})\n\nGPT-3 Summary: {gpt_response}\n"
 128 |         all_summaries.append(summary)  # Add the summary to the list
 129 |         with open(summary_filename, "a") as sf:  # Open the summary file
 130 |             sf.write(summary)  # Write the GPT-3 summary to the summary file
 131 |     except FileNotFoundError:
 132 |         print(f"Could not find file: {summary_filename}")
 133 |         return None
 134 |     return gpt_response
 135 | 
 136 | 
 137 | 
 138 | # THis is smartGPT 
 139 | def create_report(query, initial_query, num_results, all_summaries):
 140 |     #global all_summaries  # Declare all_summaries as global so we can modify it
 141 |     print("Creating report...")
 142 |     summaries = "\n".join(all_summaries)  # Combine all the summaries into a single string
 143 |     system_prompt = f"Given the following information, create a report with the information and be sure to cite sources inline. This a professional analytical report. This is about: {query} and part of this: {initial_query}."
 144 |     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': summaries}]
 145 | 
 146 |     best_report = None
 147 |     best_score = -1
 148 | 
 149 |     # Generate 3 reports
 150 |     for _ in range(3):
 151 |         response = openai.ChatCompletion.create(
 152 |             model="gpt-3.5-turbo-16k",
 153 |             messages=messages
 154 |         )
 155 |         gpt_report = response.choices[0].message['content'].strip()
 156 | 
 157 |         # Researcher step
 158 |         researcher_prompt = f"You are a researcher tasked with investigating the report. You are a peer-reviewer. List the flaws and faulty logic of the report. Here are all the summaries from each page of the search made: {all_summaries}. Make sure every response has sources and inline citations. Let's work this out in a step by step way to be sure we have all the errors:"
 159 |         researcher_messages = [{'role': 'system', 'content': researcher_prompt}, {'role': 'user', 'content': gpt_report}]
 160 |         researcher_response = openai.ChatCompletion.create(
 161 |             model="gpt-3.5-turbo-16k",
 162 |             messages=researcher_messages
 163 |         )
 164 |         time.sleep(5)
 165 |         researcher_output = researcher_response.choices[0].message['content'].strip()
 166 | 
 167 |         # Resolver step
 168 |         resolver_prompt = f"You are a resolver tasked with improving the report. Print the improved report in full. Let's work this out in a step by step way to be sure we have the right report use the goal: {initial_query} and data resarched {all_summaries} to provide the best report possible.:"
 169 |         resolver_messages = [{'role': 'system', 'content': resolver_prompt}, {'role': 'user', 'content': researcher_output}]
 170 |         resolver_response = openai.ChatCompletion.create(
 171 |             model="gpt-3.5-turbo-16k",
 172 |             messages=resolver_messages
 173 |         )
 174 |         time.sleep(5)
 175 |         resolver_output = resolver_response.choices[0].message['content'].strip()
 176 | 
 177 |         # Score the resolver output (you can replace this with your own scoring function)
 178 |         score = len(resolver_output)
 179 | 
 180 |         # If this output is better than the current best, update the best output and score
 181 |         if score > best_score:
 182 |             best_report = resolver_output
 183 |             best_score = score
 184 | 
 185 |     # If the best score is below a certain threshold, restart the entire search process
 186 |     THRESHOLD = 5000  # Set the threshold here
 187 |     if best_score < THRESHOLD:
 188 |         print("\n\nReport not satisfactory, restarting the search process...")
 189 |         all_summaries = []  # Clear the all_summaries list
 190 |         # Reset other variables as necessary here
 191 |         # Call your search function here to restart the search process
 192 |         # You might need to modify your search function to return the final report
 193 |         filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
 194 |         summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
 195 |         return extract_search_results(query, num_results, filename, summary_filename)
 196 | 
 197 |     print(f"\n\nGPT-3 Report: {best_report}")
 198 |     os.makedirs(f"Reports/{initial_query}", exist_ok=True)  # Create the "Reports" directory if it doesn't exist
 199 |     report_filename = os.path.join("Reports", initial_query, f"Report_{query}_{str(time.time())}.md")  # Store the report filename
 200 |     with open(report_filename, "w") as rf:  # Open the report file
 201 |         rf.write(f"# GPT-3 Report:\n\n{best_report}\n\nReport generated by: Momo AI\n")
 202 |         rf.write(f"\n\nPrompt used to generate list: {initial_query}\nSearch made for this report: {query}")
 203 |         print(f"\n\nReport saved to: {report_filename}")   
 204 | 
 205 |     return best_report
 206 | 
 207 | print("\n\n\nWelcome to humanWeb! \nThis is a tool that uses GPT-3.5-16k to help you search the web and create a reports.\n Results may vary. BUGS ARE EXPECTED. \n\n\n")
 208 | 
 209 | num_results = int(input("Number of website to visit (Default 2) :"))
 210 | initial_query = input("Enter your request. Not a google. (gpt will decide what to google): ")
 211 | 
 212 | # Create directories for the initial query
 213 | os.makedirs(f"Searches/{initial_query}", exist_ok=True)
 214 | os.makedirs(f"Reports/{initial_query}", exist_ok=True)
 215 | #os.makedirs(f"Reports/{initial_query}", exist_ok=True)
 216 | 
 217 | num_queries = int(input("Number of report (Default 5) : "))
 218 | additional_queries = generate_additional_queries(initial_query, num_queries)
 219 | 
 220 | # Define all_queries here
 221 | all_queries = [initial_query] + additional_queries
 222 | 
 223 | # Set a limit for the number of additional queries
 224 | MAX_ADDITIONAL_QUERIES = 0
 225 | # Set a limit for the number of iterations
 226 | MAX_ITERATIONS = num_queries  # Set MAX_ITERATIONS to num_queries
 227 | 
 228 | # Keep track of the number of additional queries
 229 | num_additional_queries = 0
 230 | # Keep track of the number of iterations
 231 | num_iterations = 0
 232 | 
 233 | for query in all_queries:
 234 | 
 235 |     # Debug: print the current iteration and query
 236 |     print(f"\n\n\nIteration {num_iterations + 1}, processing query: '{query}'")
 237 | 
 238 |     filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
 239 |     summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
 240 | 
 241 |     extract_search_results(query, num_results, filename, summary_filename)
 242 |     create_report(query, initial_query, num_results,all_summaries)
 243 |     qa_query = create_qa(query, summary_filename)
 244 | 
 245 |     if qa_query != query and num_additional_queries < MAX_ADDITIONAL_QUERIES:
 246 |         # If the result of create_qa is a new query and we haven't reached the limit, you can add it to all_queries and process it
 247 |         all_queries.append(qa_query)
 248 |         num_additional_queries += 1
 249 | 
 250 |         # Debug: print the new query and the updated total number of queries
 251 |         print(f"\n\n\nNew query added: '{qa_query}', total queries: {len(all_queries)}")
 252 | 
 253 |         # Update the query variable
 254 |         query = qa_query
 255 | 
 256 |     num_iterations += 1
 257 |     if num_iterations >= MAX_ITERATIONS:
 258 |         # If the loop has run for more than MAX_ITERATIONS, break the loop
 259 |         print(f"\n\n\nReached the maximum number of iterations ({MAX_ITERATIONS}), breaking the loop.")
 260 |         break
 261 | 
 262 | print("\nClosing browser...")
 263 | driver.quit()
 264 | print("\nDone.")
 265 | 
 266 | # Close the debug log file at the end
 267 | debug_log.close()
 268 | 
 269 | 
 270 | 
 271 | 
 272 | # working old but big version
 273 | 
 274 | # import os
 275 | # import json
 276 | # import time
 277 | 
 278 | # from dotenv import load_dotenv
 279 | # import openai
 280 | # from selenium import webdriver
 281 | # from selenium.webdriver.common.keys import Keys
 282 | # from selenium.webdriver.common.by import By
 283 | # from selenium.webdriver.support.ui import WebDriverWait
 284 | # from selenium.webdriver.support import expected_conditions as EC
 285 | # from QA import create_qa
 286 | # from gpt3_functions import generate_additional_queries, process_results_with_gpt3, create_report
 287 | 
 288 | 
 289 | # load_dotenv()  # take environment variables from .env.
 290 | 
 291 | # # Get OpenAI API key from environment variable
 292 | # openai.api_key = os.getenv('OPENAI_API_KEY')
 293 | 
 294 | # options = webdriver.ChromeOptions()
 295 | # options.add_argument('headless')
 296 | 
 297 | # driver = webdriver.Chrome(options=options)
 298 | 
 299 | # # Create a list to store all the summaries
 300 | # all_summaries = []
 301 | 
 302 | # # Create a debug log file
 303 | # debug_log = open("debug_log.txt", "w")
 304 | 
 305 | # def generate_additional_queries(query, num_queries):
 306 | #     print("Generating additional queries with GPT-3...")
 307 | #     system_prompt = f"Given this query, come up with {num_queries} more queries that will help get the most information or complete a task in order."
 308 | #     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': query}]
 309 | #     response = openai.ChatCompletion.create(
 310 | #         model="gpt-3.5-turbo", #changed since it is smaller
 311 | #         messages=messages
 312 | #     )
 313 | #     additional_queries = response.choices[0].message['content'].strip().split('\n')[:num_queries]
 314 | #     # Write to debug log
 315 | #     debug_log.write(f"Generated additional queries: {additional_queries}\n")
 316 | #     return additional_queries
 317 | 
 318 | 
 319 | # def perform_search(query):
 320 | #     print(f"Performing search for '{query}'...")
 321 | #     driver.get("https://www.google.com")  # Open Google in the browser
 322 | #     try:
 323 | #         search_box = WebDriverWait(driver, 10).until(
 324 | #             EC.presence_of_element_located((By.NAME, "q"))
 325 | #         )  # Wait for the search box element to be located
 326 | #         search_box.send_keys(query)  # Enter the search query
 327 | #         search_box.send_keys(Keys.RETURN)  # Press Enter to perform the search
 328 | #         print("Waiting for search results to load...")
 329 | #         WebDriverWait(driver, 10).until(
 330 | #             EC.presence_of_element_located((By.CSS_SELECTOR, "div.g"))
 331 | #         )  # Wait for the search results to load
 332 | #     except Exception as e:
 333 | #         print(f"Error performing search: {e}")
 334 | #         return None
 335 | #     return driver.find_elements(By.CSS_SELECTOR, "div.g")
 336 | 
 337 | 
 338 | # def extract_search_results(query, num_results, filename, summary_filename):
 339 | #     print("Extracting search results...")
 340 | #     search_results = perform_search(query)[:num_results]  # Limit to user-specified number of results
 341 | #     if search_results is None:
 342 | #         print("No search results found.")
 343 | #         return
 344 | #     os.makedirs("Searches", exist_ok=True)  # Create the "Searches" directory if it doesn't exist
 345 | #     links = []
 346 | #     with open(filename, "w") as f:  # Open the output file
 347 | #         for i, result in enumerate(search_results, start=1):
 348 | #             try:
 349 | #                 title = result.find_element(By.CSS_SELECTOR, "h3").text  # Extract the title
 350 | #                 link = result.find_element(By.CSS_SELECTOR, "a").get_attribute("href")  # Extract the URL
 351 | #                 print(f"Result {i}: {title} ({link})")  # Process the search result as desired
 352 | #                 f.write(f"Result {i}: {title} ({link})\n")  # Write the result to the file
 353 | #                 links.append((title, link))  # Store the title and link together
 354 | #             except Exception as e:
 355 | #                 print(f"Error extracting result {i}: {e}")
 356 | #         for title, link in links:
 357 | #             print("Extracting page content...")
 358 | #             driver.set_page_load_timeout(20)  # Set page load timeout
 359 | #             try:
 360 | #                 driver.get(link)  # Navigate to the page
 361 | #                 page_content = driver.find_element(By.TAG_NAME, "body").text  # Extract the text from the body of the page
 362 | #                 print(page_content)  # Print the page content
 363 | #                 f.write(f"Page Content:\n{page_content}\n")  # Write the page content to the file
 364 | #                 print("\n---\n")  # Print a separator
 365 | #                 f.write("\n---\n")  # Write a separator to the file
 366 | #                 if "Sorry, you have been blocked" not in page_content:  # Check if the page content indicates you've been blocked
 367 | #                     gpt_response = process_results_with_gpt3(title, link, page_content, summary_filename)  # Process the page content with GPT-3
 368 | #                     if gpt_response is not None:
 369 | #                         print(f"GPT-3 Response: {gpt_response}")
 370 | #             except Exception as e:
 371 | #                 print(f"Error loading page {link}: {e}")
 372 | 
 373 | # # Using the chain of thought from smartGPT project to process the results takes alot longer.
 374 | # def process_results_with_gpt3(title, link, content, summary_filename):
 375 | #     print("Processing results with GPT-3...")
 376 | #     try:
 377 | #         system_prompt = f"Given the following information, extract unique and interesting facts and analytical infromation. Do not just summarize it. This would will be used in a upcomiing report about {initial_query}. If the information is already known in the content, please do not repeat it. Look at the context given. MUST have sources at bottom."
 378 | #         messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': content}]
 379 |         
 380 | #         response = openai.ChatCompletion.create(
 381 | #             model="gpt-3.5-turbo-16k", 
 382 | #             messages=messages
 383 | #         )
 384 | #         gpt_response = response.choices[0].message['content'].strip()
 385 | 
 386 | #         # Use the GPT-3 response as the final summary
 387 | #         summary = f"\n## {title}\n\nSource: [{link}]({link})\n\nGPT-3 Summary: {gpt_response}\n"
 388 | #         all_summaries.append(summary)  # Add the summary to the list
 389 | #         with open(summary_filename, "a") as sf:  # Open the summary file
 390 | #             sf.write(summary)  # Write the GPT-3 summary to the summary file
 391 | #     except FileNotFoundError:
 392 | #         print(f"Could not find file: {summary_filename}")
 393 | #         return None
 394 | #     return gpt_response
 395 | 
 396 | 
 397 | 
 398 | # # THis is smartGPT 
 399 | # def create_report(query, initial_query, num_results):
 400 | #     global all_summaries  # Declare all_summaries as global so we can modify it
 401 | #     print("Creating report...")
 402 | #     summaries = "\n".join(all_summaries)  # Combine all the summaries into a single string
 403 | #     system_prompt = f"Given the following information, create a report with the information and be sure to cite sources. This a professional report. This is about: {initial_query}."
 404 | #     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': summaries}]
 405 | 
 406 | #     best_report = None
 407 | #     best_score = -1
 408 | 
 409 | #     # Generate 3 reports
 410 | #     for _ in range(3):
 411 | #         response = openai.ChatCompletion.create(
 412 | #             model="gpt-3.5-turbo-16k",
 413 | #             messages=messages
 414 | #         )
 415 | #         gpt_report = response.choices[0].message['content'].strip()
 416 | 
 417 | #         # Researcher step
 418 | #         researcher_prompt = "You are a researcher tasked with investigating the report. List the flaws and faulty logic of the report. Make sure every response has sorces and inline citations Let's work this out in a step by step way to be sure we have all the errors:"
 419 | #         researcher_messages = [{'role': 'system', 'content': researcher_prompt}, {'role': 'user', 'content': gpt_report}]
 420 | #         researcher_response = openai.ChatCompletion.create(
 421 | #             model="gpt-3.5-turbo-16k",
 422 | #             messages=researcher_messages
 423 | #         )
 424 | #         researcher_output = researcher_response.choices[0].message['content'].strip()
 425 | 
 426 | #         # Resolver step
 427 | #         resolver_prompt = f"You are a resolver tasked with improving the report. Print the improved report in full. Let's work this out in a step by step way to be sure we have the right report use the goal: {initial_query} and data resarched {all_summaries} to provide the best report possible.:"
 428 | #         resolver_messages = [{'role': 'system', 'content': resolver_prompt}, {'role': 'user', 'content': researcher_output}]
 429 | #         resolver_response = openai.ChatCompletion.create(
 430 | #             model="gpt-3.5-turbo-16k",
 431 | #             messages=resolver_messages
 432 | #         )
 433 | #         resolver_output = resolver_response.choices[0].message['content'].strip()
 434 | 
 435 | #         # Score the resolver output (you can replace this with your own scoring function)
 436 | #         score = len(resolver_output)
 437 | 
 438 | #         # If this output is better than the current best, update the best output and score
 439 | #         if score > best_score:
 440 | #             best_report = resolver_output
 441 | #             best_score = score
 442 | 
 443 | #     # If the best score is below a certain threshold, restart the entire search process
 444 | #     THRESHOLD = 5000  # Set the threshold here
 445 | #     if best_score < THRESHOLD:
 446 | #         print("Report not satisfactory, restarting the search process...")
 447 | #         all_summaries = []  # Clear the all_summaries list
 448 | #         # Reset other variables as necessary here
 449 | #         # Call your search function here to restart the search process
 450 | #         # You might need to modify your search function to return the final report
 451 | #         filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
 452 | #         summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
 453 | #         return extract_search_results(query, num_results, filename, summary_filename)
 454 | 
 455 | #     print(f"GPT-3 Report: {best_report}")
 456 | #     os.makedirs(f"Reports/{initial_query}", exist_ok=True)  # Create the "Reports" directory if it doesn't exist
 457 | #     report_filename = os.path.join("Reports", initial_query, f"Report_{query}_{time.time()}.md")  # Store the report filename
 458 | #     with open(report_filename, "w") as rf:  # Open the report file
 459 | #         rf.write(f"# GPT-3 Report:\n\n{best_report}\n\nReport generated by: Momo AI\n")  # Write the GPT-3 report to the report file
 460 | #         print(f"Report saved to: {report_filename}")   
 461 | 
 462 | #     return best_report
 463 | 
 464 | 
 465 |  
 466 | 
 467 | # num_results = int(input("Enter the number of search results you want to process (rec. 2): "))
 468 | # initial_query = input("Enter your request. Not a google. (gpt will decide what to google): ")
 469 | 
 470 | # # Create directories for the initial query
 471 | # os.makedirs(f"Searches/{initial_query}", exist_ok=True)
 472 | # os.makedirs(f"Reports/{initial_query}", exist_ok=True)
 473 | 
 474 | # num_queries = int(input("Enter the number of steps (number of queries) Default 5: "))
 475 | # additional_queries = generate_additional_queries(initial_query, num_queries)
 476 | 
 477 | # # Define all_queries here
 478 | # all_queries = [initial_query] + additional_queries
 479 | 
 480 | # # Set a limit for the number of additional queries
 481 | # MAX_ADDITIONAL_QUERIES = 0
 482 | # # Set a limit for the number of iterations
 483 | # MAX_ITERATIONS = num_queries  # Set MAX_ITERATIONS to num_queries
 484 | 
 485 | # # Keep track of the number of additional queries
 486 | # num_additional_queries = 0
 487 | # # Keep track of the number of iterations
 488 | # num_iterations = 0
 489 | 
 490 | # for query in all_queries:
 491 | 
 492 | #     # Debug: print the current iteration and query
 493 | #     print(f"Iteration {num_iterations + 1}, processing query: '{query}'")
 494 | 
 495 | #     filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
 496 | #     summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
 497 | 
 498 | #     extract_search_results(query, num_results, filename, summary_filename)
 499 | #     create_report(query, initial_query, num_results)
 500 | #     qa_query = create_qa(query, summary_filename)
 501 | 
 502 | #     if qa_query != query and num_additional_queries < MAX_ADDITIONAL_QUERIES:
 503 | #         # If the result of create_qa is a new query and we haven't reached the limit, you can add it to all_queries and process it
 504 | #         all_queries.append(qa_query)
 505 | #         num_additional_queries += 1
 506 | 
 507 | #         # Debug: print the new query and the updated total number of queries
 508 | #         print(f"New query added: '{qa_query}', total queries: {len(all_queries)}")
 509 | 
 510 | #         # Update the query variable
 511 | #         query = qa_query
 512 | 
 513 | #     num_iterations += 1
 514 | #     if num_iterations >= MAX_ITERATIONS:
 515 | #         # If the loop has run for more than MAX_ITERATIONS, break the loop
 516 | #         print(f"Reached the maximum number of iterations ({MAX_ITERATIONS}), breaking the loop.")
 517 | #         break
 518 | 
 519 | # print("Closing browser...")
 520 | # driver.quit()
 521 | # print("Done.")
 522 | 
 523 | # # Close the debug log file at the end
 524 | # debug_log.close()
 525 | 
 526 | 
 527 | 
 528 | # working but slow. and not showing sources sometimes.
 529 | 
 530 | 
 531 | # import os
 532 | # import json
 533 | # import time
 534 | 
 535 | # from dotenv import load_dotenv
 536 | # import openai
 537 | # from selenium import webdriver
 538 | # from selenium.webdriver.common.keys import Keys
 539 | # from selenium.webdriver.common.by import By
 540 | # from selenium.webdriver.support.ui import WebDriverWait
 541 | # from selenium.webdriver.support import expected_conditions as EC
 542 | # from QA import create_qa
 543 | 
 544 | # load_dotenv()  # take environment variables from .env.
 545 | 
 546 | # # Get OpenAI API key from environment variable
 547 | # openai.api_key = os.getenv('OPENAI_API_KEY')
 548 | 
 549 | # options = webdriver.ChromeOptions()
 550 | # options.add_argument('headless')
 551 | 
 552 | # driver = webdriver.Chrome(options=options)
 553 | 
 554 | # # Create a list to store all the summaries
 555 | # all_summaries = []
 556 | 
 557 | # # Create a debug log file
 558 | # debug_log = open("debug_log.txt", "w")
 559 | 
 560 | # def generate_additional_queries(query):
 561 | #     print("Generating additional queries with GPT-3...")
 562 | #     system_prompt = "Given this query, come up with 10 more queries that will help get the most information or complete a task in order."
 563 | #     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': query}]
 564 | #     response = openai.ChatCompletion.create(
 565 | #         model="gpt-3.5-turbo", #changed since it is smaller
 566 | #         messages=messages
 567 | #     )
 568 | #     additional_queries = response.choices[0].message['content'].strip().split('\n')
 569 | #     # Write to debug log
 570 | #     debug_log.write(f"Generated additional queries: {additional_queries}\n")
 571 | #     return additional_queries
 572 | 
 573 | # def perform_search(query):
 574 | #     print(f"Performing search for '{query}'...")
 575 | #     driver.get("https://www.google.com")  # Open Google in the browser
 576 | #     try:
 577 | #         search_box = WebDriverWait(driver, 10).until(
 578 | #             EC.presence_of_element_located((By.NAME, "q"))
 579 | #         )  # Wait for the search box element to be located
 580 | #         search_box.send_keys(query)  # Enter the search query
 581 | #         search_box.send_keys(Keys.RETURN)  # Press Enter to perform the search
 582 | #         print("Waiting for search results to load...")
 583 | #         WebDriverWait(driver, 10).until(
 584 | #             EC.presence_of_element_located((By.CSS_SELECTOR, "div.g"))
 585 | #         )  # Wait for the search results to load
 586 | #     except Exception as e:
 587 | #         print(f"Error performing search: {e}")
 588 | #         return None
 589 | #     return driver.find_elements(By.CSS_SELECTOR, "div.g")
 590 | 
 591 | 
 592 | # def extract_search_results(query, num_results, filename, summary_filename):
 593 | #     print("Extracting search results...")
 594 | #     search_results = perform_search(query)[:num_results]  # Limit to user-specified number of results
 595 | #     if search_results is None:
 596 | #         print("No search results found.")
 597 | #         return
 598 | #     os.makedirs("Searches", exist_ok=True)  # Create the "Searches" directory if it doesn't exist
 599 | #     links = []
 600 | #     with open(filename, "w") as f:  # Open the output file
 601 | #         for i, result in enumerate(search_results, start=1):
 602 | #             try:
 603 | #                 title = result.find_element(By.CSS_SELECTOR, "h3").text  # Extract the title
 604 | #                 link = result.find_element(By.CSS_SELECTOR, "a").get_attribute("href")  # Extract the URL
 605 | #                 print(f"Result {i}: {title} ({link})")  # Process the search result as desired
 606 | #                 f.write(f"Result {i}: {title} ({link})\n")  # Write the result to the file
 607 | #                 links.append((title, link))  # Store the title and link together
 608 | #             except Exception as e:
 609 | #                 print(f"Error extracting result {i}: {e}")
 610 | #         for title, link in links:
 611 | #             print("Extracting page content...")
 612 | #             driver.set_page_load_timeout(20)  # Set page load timeout
 613 | #             try:
 614 | #                 driver.get(link)  # Navigate to the page
 615 | #                 page_content = driver.find_element(By.TAG_NAME, "body").text  # Extract the text from the body of the page
 616 | #                 print(page_content)  # Print the page content
 617 | #                 f.write(f"Page Content:\n{page_content}\n")  # Write the page content to the file
 618 | #                 print("\n---\n")  # Print a separator
 619 | #                 f.write("\n---\n")  # Write a separator to the file
 620 | #                 if "Sorry, you have been blocked" not in page_content:  # Check if the page content indicates you've been blocked
 621 | #                     gpt_response = process_results_with_gpt3(title, link, page_content, summary_filename)  # Process the page content with GPT-3
 622 | #                     if gpt_response is not None:
 623 | #                         print(f"GPT-3 Response: {gpt_response}")
 624 | #             except Exception as e:
 625 | #                 print(f"Error loading page {link}: {e}")
 626 | 
 627 | # # Using the chain of thought from smartGPT project to process the results takes alot longer.
 628 | # def process_results_with_gpt3(title, link, content, summary_filename):
 629 | #     print("Processing results with GPT-3...")
 630 | #     try:
 631 | #         system_prompt = "Given the following information, extract unique and interesting facts. If the information is already known in the content. Please do not repeat it, look at the context given."
 632 | #         messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': content}]
 633 |         
 634 | #         best_resolver_output = None
 635 | #         best_score = -1
 636 | 
 637 | #         # Generate 3 responses
 638 | #         for _ in range(3):
 639 | #             response = openai.ChatCompletion.create(
 640 | #                 model="gpt-3.5-turbo-16k",
 641 | #                 messages=messages
 642 | #             )
 643 | #             gpt_response = response.choices[0].message['content'].strip()
 644 | 
 645 | #             # Researcher step
 646 | #             researcher_prompt = f"You are a researcher tasked with investigating the response. List the flaws and faulty logic of the answer. Let's work this out in a step by step way to be sure we have all the errors:"
 647 | #             researcher_messages = [{'role': 'system', 'content': researcher_prompt}, {'role': 'user', 'content': gpt_response}]
 648 | #             researcher_response = openai.ChatCompletion.create(
 649 | #                 model="gpt-3.5-turbo-16k",
 650 | #                 messages=researcher_messages
 651 | #             )
 652 | #             researcher_output = researcher_response.choices[0].message['content'].strip()
 653 | 
 654 | #             # Resolver step
 655 | #             resolver_prompt = "You are a resolver tasked with improving the answer. Print the improved answer in full. Let's work this out in a step by step way to be sure we have the right answer:"
 656 | #             resolver_messages = [{'role': 'system', 'content': resolver_prompt}, {'role': 'user', 'content': researcher_output}]
 657 | #             resolver_response = openai.ChatCompletion.create(
 658 | #                 model="gpt-3.5-turbo-16k",
 659 | #                 messages=resolver_messages
 660 | #             )
 661 | #             resolver_output = resolver_response.choices[0].message['content'].strip()
 662 | 
 663 | #             # Score the resolver output (you can replace this with your own scoring function)
 664 | #             score = len(resolver_output)
 665 | 
 666 | #             # If this output is better than the current best, update the best output and score
 667 | #             if score > best_score:
 668 | #                 best_resolver_output = resolver_output
 669 | #                 best_score = score
 670 | 
 671 | #         # Use the best resolver output as the final summary
 672 | #         summary = f"\n## {title}\n\nSource: [{link}]({link})\n\nGPT-3 Summary: {best_resolver_output}\n"
 673 | #         all_summaries.append(summary)  # Add the summary to the list
 674 | #         with open(summary_filename, "a") as sf:  # Open the summary file
 675 | #             sf.write(summary)  # Write the GPT-3 summary to the summary file
 676 | #     except FileNotFoundError:
 677 | #         print(f"Could not find file: {summary_filename}")
 678 | #         return None
 679 | #     return best_resolver_output
 680 | 
 681 | 
 682 | 
 683 | # def create_report(query, initial_query):
 684 | #     print("Creating report...")
 685 | #     summaries = "\n".join(all_summaries)  # Combine all the summaries into a single string
 686 | #     system_prompt = "Given the following information, create a report with the information and be sure to cite sources. This a professional report."
 687 | #     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': summaries}]
 688 | #     response = openai.ChatCompletion.create(
 689 | #         model="gpt-3.5-turbo-16k",
 690 | #         messages=messages
 691 | #     )
 692 | #     gpt_report = response.choices[0].message['content'].strip()
 693 | #     print(f"GPT-3 Report: {gpt_report}")
 694 | #     os.makedirs(f"Reports/{initial_query}", exist_ok=True)  # Create the "Reports" directory if it doesn't exist
 695 | #     report_filename = os.path.join("Reports", initial_query, f"Report_{query}_{time.time()}.md")  # Store the report filename
 696 | #     with open(report_filename, "w") as rf:  # Open the report file
 697 | #         rf.write(f"# GPT-3 Report:\n\n{gpt_report}\n\nReport generated by: Momo AI\n")  # Write the GPT-3 report to the report file
 698 | #         print(f"Report saved to: {report_filename}")   
 699 | 
 700 | # num_results = int(input("Enter the number of search results you want to process: "))
 701 | # initial_query = input("Enter your initial search query: ")
 702 | 
 703 | # # Create directories for the initial query
 704 | # os.makedirs(f"Searches/{initial_query}", exist_ok=True)
 705 | # os.makedirs(f"Reports/{initial_query}", exist_ok=True)
 706 | 
 707 | # additional_queries = generate_additional_queries(initial_query)
 708 | # all_queries = [initial_query] + additional_queries
 709 | 
 710 | 
 711 | # # Set a limit for the number of additional queries
 712 | # MAX_ADDITIONAL_QUERIES = 0
 713 | # # Set a limit for the number of iterations
 714 | # MAX_ITERATIONS = 2
 715 | 
 716 | 
 717 | # # Keep track of the number of additional queries
 718 | # num_additional_queries = 0
 719 | # # Keep track of the number of iterations
 720 | # num_iterations = 0
 721 | 
 722 | # for query in all_queries:
 723 | #     # Debug: print the current iteration and query
 724 | #     print(f"Iteration {num_iterations + 1}, processing query: '{query}'")
 725 | 
 726 | #     filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
 727 | #     summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
 728 | 
 729 | #     extract_search_results(query, num_results, filename, summary_filename)
 730 | #     create_report(query, initial_query)
 731 | #     qa_query = create_qa(query, summary_filename)
 732 | 
 733 | #     if qa_query != query and num_additional_queries < MAX_ADDITIONAL_QUERIES:
 734 | #         # If the result of create_qa is a new query and we haven't reached the limit, you can add it to all_queries and process it
 735 | #         all_queries.append(qa_query)
 736 | #         num_additional_queries += 1
 737 | 
 738 | #         # Debug: print the new query and the updated total number of queries
 739 | #         print(f"New query added: '{qa_query}', total queries: {len(all_queries)}")
 740 | 
 741 | #         # Update the query variable
 742 | #         query = qa_query
 743 | 
 744 | #     num_iterations += 1
 745 | #     if num_iterations >= MAX_ITERATIONS:
 746 | #         # If the loop has run for more than MAX_ITERATIONS, break the loop
 747 | #         print(f"Reached the maximum number of iterations ({MAX_ITERATIONS}), breaking the loop.")
 748 | #         break
 749 | 
 750 | # print("Closing browser...")
 751 | # driver.quit()
 752 | # print("Done.")
 753 | 
 754 | # # Close the debug log file at the end
 755 | # debug_log.close()
 756 | 
 757 | 
 758 | 
 759 | 
 760 | 
 761 | # EXPERIMENTAL>>>
 762 | 
 763 | # import os
 764 | # import json
 765 | # import time
 766 | 
 767 | # from dotenv import load_dotenv
 768 | # import openai
 769 | # from selenium import webdriver
 770 | # from selenium.webdriver.common.keys import Keys
 771 | # from selenium.webdriver.common.by import By
 772 | # from selenium.webdriver.support.ui import WebDriverWait
 773 | # from selenium.webdriver.support import expected_conditions as EC
 774 | # from QA import create_qa
 775 | 
 776 | # load_dotenv()  # take environment variables from .env.
 777 | 
 778 | # # Get OpenAI API key from environment variable
 779 | # openai.api_key = os.getenv('OPENAI_API_KEY')
 780 | 
 781 | # options = webdriver.ChromeOptions()
 782 | # options.add_argument('headless')
 783 | 
 784 | # driver = webdriver.Chrome(options=options)
 785 | 
 786 | # # Create a list to store all the summaries
 787 | # all_summaries = []
 788 | 
 789 | # # Create a debug log file
 790 | # debug_log = open("debug_log.txt", "w")
 791 | 
 792 | # def generate_additional_queries(query):
 793 | #     print("Generating additional queries with GPT-3...")
 794 | #     system_prompt = "Given this query, come up with 10 more queries that will help get the most information or complete a task in order."
 795 | #     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': query}]
 796 | #     response = openai.ChatCompletion.create(
 797 | #         model="gpt-3.5-turbo-16k",
 798 | #         messages=messages
 799 | #     )
 800 | #     additional_queries = response.choices[0].message['content'].strip().split('\n')
 801 | #     # Write to debug log
 802 | #     debug_log.write(f"Generated additional queries: {additional_queries}\n")
 803 | #     return additional_queries
 804 | 
 805 | # def perform_search(query):
 806 | #     print(f"Performing search for '{query}'...")
 807 | #     driver.get("https://www.google.com")  # Open Google in the browser
 808 | #     try:
 809 | #         search_box = WebDriverWait(driver, 10).until(
 810 | #             EC.presence_of_element_located((By.NAME, "q"))
 811 | #         )  # Wait for the search box element to be located
 812 | #         search_box.send_keys(query)  # Enter the search query
 813 | #         search_box.send_keys(Keys.RETURN)  # Press Enter to perform the search
 814 | #         print("Waiting for search results to load...")
 815 | #         WebDriverWait(driver, 10).until(
 816 | #             EC.presence_of_element_located((By.CSS_SELECTOR, "div.g"))
 817 | #         )  # Wait for the search results to load
 818 | #     except Exception as e:
 819 | #         print(f"Error performing search: {e}")
 820 | #         return None
 821 | #     return driver.find_elements(By.CSS_SELECTOR, "div.g")
 822 | 
 823 | # def extract_search_results(query, num_results, filename, summary_filename):
 824 | #     print("Extracting search results...")
 825 | #     search_results = perform_search(query)[:num_results]  # Limit to user-specified number of results
 826 | #     if search_results is None:
 827 | #         print("No search results found.")
 828 | #         return
 829 | #     os.makedirs("Searches", exist_ok=True)  # Create the "Searches" directory if it doesn't exist
 830 | #     links = []
 831 | #     with open(filename, "w") as f:  # Open the output file
 832 | #         for i, result in enumerate(search_results, start=1):
 833 | #             try:
 834 | #                 title = result.find_element(By.CSS_SELECTOR, "h3").text  # Extract the title
 835 | #                 link = result.find_element(By.CSS_SELECTOR, "a").get_attribute("href")  # Extract the URL
 836 | #                 print(f"Result {i}: {title} ({link})")  # Process the search result as desired
 837 | #                 f.write(f"Result {i}: {title} ({link})\n")  # Write the result to the file
 838 | #                 links.append((title, link))  # Store the title and link together
 839 | #             except Exception as e:
 840 | #                 print(f"Error extracting result {i}: {e}")
 841 | #         for title, link in links:
 842 | #             print("Extracting page content...")
 843 | #             driver.set_page_load_timeout(20)  # Set page load timeout
 844 | #             try:
 845 | #                 driver.get(link)  # Navigate to the page
 846 | #                 page_content = driver.find_element(By.TAG_NAME, "body").text  # Extract the text from the body of the page
 847 | #                 print(page_content)  # Print the page content
 848 | #                 f.write(f"Page Content:\n{page_content}\n")  # Write the page content to the file
 849 | #                 print("\n---\n")  # Print a separator
 850 | #                 f.write("\n---\n")  # Write a separator to the file
 851 | #                 gpt_response = process_results_with_gpt3(title, link, page_content)
 852 | 
 853 | #                 if gpt_response is not None:
 854 | #                     print(f"GPT-3 Response: {gpt_response}")
 855 | #             except Exception as e:
 856 | #                 print(f"Error loading page {link}: {e}")
 857 | 
 858 | 
 859 | 
 860 | # def process_results_with_gpt3(title, link, content):
 861 | #     print("Processing results with GPT-3...")
 862 | #     summary_filename = "SmartGPTResults.txt"  # Output file in the project folder
 863 | 
 864 | #     # Maximum number of retries
 865 | #     max_retries = 2
 866 | #     # Current number of retries
 867 | #     retries = 0
 868 | 
 869 | #     while retries < max_retries:
 870 | #         print(f"Retry iteration: {retries + 1}")
 871 | #         # Generate 3 responses
 872 | #         for i in range(3):
 873 | #             print(f"Response iteration: {i + 1}")
 874 | #             system_prompt = "Given the following information, extract unique and interesting facts. If the information is already known in the content. Please do not repeat it, look at the context given."
 875 | #             messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': content}]
 876 | #             response = openai.ChatCompletion.create(
 877 | #                 model="gpt-3.5-turbo-16k",
 878 | #                 messages=messages
 879 | #             )
 880 | #             gpt_response = response.choices[0].message['content'].strip()
 881 | #             print(f"GPT-3 response: {gpt_response}")
 882 | 
 883 | #             # Researcher step
 884 | #             researcher_prompt = f"You are a researcher tasked with investigating the response. List the flaws and faulty logic of the answer. Let's work this out in a step by step way to be sure we have all the errors:"
 885 | #             researcher_messages = [{'role': 'system', 'content': researcher_prompt}, {'role': 'user', 'content': gpt_response}]
 886 | #             researcher_response = openai.ChatCompletion.create(
 887 | #                 model="gpt-3.5-turbo-16k",
 888 | #                 messages=researcher_messages
 889 | #             )
 890 | #             researcher_output = researcher_response.choices[0].message['content'].strip()
 891 | #             print(f"Researcher output: {researcher_output}")
 892 | 
 893 | #             # Resolver step
 894 | #             resolver_prompt = "You are a resolver tasked with improving the answer. Print the improved answer in full. Let's work this out in a step by step way to be sure we have the right answer:"
 895 | #             resolver_messages = [{'role': 'system', 'content': resolver_prompt}, {'role': 'user', 'content': researcher_output}]
 896 | #             resolver_response = openai.ChatCompletion.create(
 897 | #                 model="gpt-3.5-turbo-16k",
 898 | #                 messages=resolver_messages
 899 | #             )
 900 | #             resolver_output = resolver_response.choices[0].message['content'].strip()
 901 | #             print(f"Resolver output: {resolver_output}")
 902 | 
 903 | #         # Increment the retry counter
 904 | #         retries += 1
 905 | 
 906 | #     # Use the last resolver output as the final summary
 907 | #     summary = f"\n## {title}\n\nSource: [{link}]({link})\n\nGPT-3 Summary: {resolver_output}\n"
 908 | #     all_summaries.append(summary)  # Add the summary to the list
 909 | #     try:
 910 | #         with open(summary_filename, "a") as sf:  # Open the summary file
 911 | #             sf.write(summary)  # Write the GPT-3 summary to the summary file
 912 | #             print(f"Summary written to: {summary_filename}")
 913 | #     except Exception as e:
 914 | #         print(f"Error writing to file: {e}")
 915 | 
 916 | #     return resolver_output
 917 | 
 918 | 
 919 | 
 920 | 
 921 | 
 922 | # def create_report(query, initial_query):
 923 | #     print("Creating report...")
 924 | #     summaries = "\n".join(all_summaries)  # Combine all the summaries into a single string
 925 | #     system_prompt = "Given the following information, create a report with the information and be sure to cite sources. This a professional report."
 926 | #     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': summaries}]
 927 | #     response = openai.ChatCompletion.create(
 928 | #         model="gpt-3.5-turbo-16k",
 929 | #         messages=messages
 930 | #     )
 931 | #     gpt_report = response.choices[0].message['content'].strip()
 932 | #     print(f"GPT-3 Report: {gpt_report}")
 933 | #     os.makedirs(f"Reports/{initial_query}", exist_ok=True)  # Create the "Reports" directory if it doesn't exist
 934 | #     report_filename = os.path.join("Reports", initial_query, f"Report_{query}_{time.time()}.md")  # Store the report filename
 935 | #     with open(report_filename, "w") as rf:  # Open the report file
 936 | #         rf.write(f"# GPT-3 Report:\n\n{gpt_report}\n\nReport generated by: Momo AI\n")  # Write the GPT-3 report to the report file
 937 | #         print(f"Report saved to: {report_filename}")   
 938 | 
 939 | # num_results = int(input("Enter the number of search results you want to process: "))
 940 | # initial_query = input("Enter your initial search query: ")
 941 | 
 942 | # # Create directories for the initial query
 943 | # os.makedirs(f"Searches/{initial_query}", exist_ok=True)
 944 | # os.makedirs(f"Reports/{initial_query}", exist_ok=True)
 945 | 
 946 | # additional_queries = generate_additional_queries(initial_query)
 947 | # all_queries = [initial_query] + additional_queries
 948 | 
 949 | 
 950 | # # Set a limit for the number of additional queries
 951 | # MAX_ADDITIONAL_QUERIES = 0
 952 | # # Set a limit for the number of iterations
 953 | # MAX_ITERATIONS = 2
 954 | 
 955 | 
 956 | # # Keep track of the number of additional queries
 957 | # num_additional_queries = 0
 958 | # # Keep track of the number of iterations
 959 | # num_iterations = 0
 960 | 
 961 | # for query in all_queries:
 962 | #     # Debug: print the current iteration and query
 963 | #     print(f"Iteration {num_iterations + 1}, processing query: '{query}'")
 964 | 
 965 | #     filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
 966 | #     summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
 967 | 
 968 | #     extract_search_results(query, num_results, filename, summary_filename)
 969 | #     create_report(query, initial_query)
 970 | #     qa_query = create_qa(query, summary_filename)
 971 | 
 972 | #     if qa_query != query and num_additional_queries < MAX_ADDITIONAL_QUERIES:
 973 | #         # If the result of create_qa is a new query and we haven't reached the limit, you can add it to all_queries and process it
 974 | #         all_queries.append(qa_query)
 975 | #         num_additional_queries += 1
 976 | 
 977 | #         # Debug: print the new query and the updated total number of queries
 978 | #         print(f"New query added: '{qa_query}', total queries: {len(all_queries)}")
 979 | 
 980 | #         # Update the query variable
 981 | #         query = qa_query
 982 | 
 983 | #     num_iterations += 1
 984 | #     if num_iterations >= MAX_ITERATIONS:
 985 | #         # If the loop has run for more than MAX_ITERATIONS, break the loop
 986 | #         print(f"Reached the maximum number of iterations ({MAX_ITERATIONS}), breaking the loop.")
 987 | #         break
 988 | 
 989 | # print("Closing browser...")
 990 | # driver.quit()
 991 | # print("Done.")
 992 | 
 993 | # # Close the debug log file at the end
 994 | # debug_log.close()
 995 | 
 996 | 
 997 | 
 998 | 
 999 | 
1000 | 
1001 | 


--------------------------------------------------------------------------------
/interventionTest.py:
--------------------------------------------------------------------------------
  1 | # Making the summarize from the article using GPT-3 and chain of thought from smartGPT project.
  2 | 
  3 | import os
  4 | import json
  5 | import time
  6 | 
  7 | from dotenv import load_dotenv
  8 | import openai
  9 | from selenium import webdriver
 10 | from selenium.webdriver.common.keys import Keys
 11 | from selenium.webdriver.common.by import By
 12 | from selenium.webdriver.support.ui import WebDriverWait
 13 | from selenium.webdriver.support import expected_conditions as EC
 14 | from QA import create_qa
 15 | from retry_decorator import retry_on_service_unavailable
 16 | 
 17 | 
 18 | load_dotenv()  # take environment variables from .env.
 19 | 
 20 | # Get OpenAI API key from environment variable
 21 | openai.api_key = os.getenv('OPENAI_API_KEY')
 22 | 
 23 | options = webdriver.ChromeOptions()
 24 | options.add_argument('headless')
 25 | 
 26 | driver = webdriver.Chrome(options=options)
 27 | 
 28 | # Create a list to store all the summaries
 29 | all_summaries = []
 30 | 
 31 | # Create a debug log file
 32 | debug_log = open("debug_log.txt", "w")
 33 | 
 34 | def ask_for_user_input(prompt):
 35 |     """Ask the user for input and return their response."""
 36 |     return input(prompt)
 37 | 
 38 | def ask_for_user_confirmation(prompt):
 39 |     """Ask the user for confirmation to continue. Return True if they confirm, False otherwise."""
 40 |     response = ask_for_user_input(prompt + " (yes/no): ")
 41 |     return response.lower() in ["yes", "y"]
 42 | 
 43 | 
 44 | def generate_additional_queries(query, num_queries):
 45 |     print("Generating additional queries with GPT-3...")
 46 |     system_prompt = f"Given this query, come up with {num_queries} more queries that will help get the most information or complete a task in order. Come up with the most consise and clear queries for google."
 47 |     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': query}]
 48 |     response = openai.ChatCompletion.create(
 49 |         model="gpt-3.5-turbo", #changed since it is smaller
 50 |         messages=messages
 51 |     )
 52 |     additional_queries = response.choices[0].message['content'].strip().split('\n')[:num_queries]
 53 |     # Write to debug log
 54 |     debug_log.write(f"Generated additional queries: {additional_queries}\n")
 55 |     return additional_queries
 56 | 
 57 | 
 58 | def perform_search(query):
 59 |     print(f"Performing search for '{query}'...")
 60 |     driver.get("https://www.google.com")  # Open Google in the browser
 61 |     try:
 62 |         search_box = WebDriverWait(driver, 10).until(
 63 |             EC.presence_of_element_located((By.NAME, "q"))
 64 |         )  # Wait for the search box element to be located
 65 |         search_box.send_keys(query)  # Enter the search query
 66 |         search_box.send_keys(Keys.RETURN)  # Press Enter to perform the search
 67 |         print("Waiting for search results to load...")
 68 |         WebDriverWait(driver, 10).until(
 69 |             EC.presence_of_element_located((By.CSS_SELECTOR, "div.g"))
 70 |         )  # Wait for the search results to load
 71 |     except Exception as e:
 72 |         print(f"Error performing search: {e}")
 73 |         import traceback
 74 |         traceback.print_exc()  # Add this line
 75 |         return None
 76 |     return driver.find_elements(By.CSS_SELECTOR, "div.g")
 77 | 
 78 | 
 79 | 
 80 | def extract_search_results(query, num_results, filename, summary_filename):
 81 |     print("Extracting search results...")
 82 |     search_results = perform_search(query)[:num_results]  # Limit to user-specified number of results
 83 |     if search_results is None:
 84 |         print("No search results found.")
 85 |         return
 86 |     os.makedirs("Searches", exist_ok=True)  # Create the "Searches" directory if it doesn't exist
 87 |     links = []
 88 |     with open(filename, "w") as f:  # Open the output file
 89 |         for i, result in enumerate(search_results, start=1):
 90 |             try:
 91 |                 title = result.find_element(By.CSS_SELECTOR, "h3").text  # Extract the title
 92 |                 link = result.find_element(By.CSS_SELECTOR, "a").get_attribute("href")  # Extract the URL
 93 |                 
 94 |                 # Skip processing if the link points to a YouTube video
 95 |                 if "youtube.com" in link:
 96 |                     print(f"Skipping Result {i}: {title} ({link}) - YouTube videos are not supported")
 97 |                     continue
 98 |                 
 99 |                 print(f"Result {i}: {title} ({link})")  # Process the search result as desired
100 |                 f.write(f"Result {i}: {title} ({link})\n")  # Write the result to the file
101 |                 links.append((title, link))  # Store the title and link together
102 |             except Exception as e:
103 |                 print(f"Error extracting result {i}: {e}")
104 |         for title, link in links:
105 |             print("Extracting page content...")
106 |             driver.set_page_load_timeout(20)  # Set page load timeout
107 |             try:
108 |                 driver.get(link)  # Navigate to the page
109 |                 page_content = driver.find_element(By.TAG_NAME, "body").text  # Extract the text from the body of the page
110 |                 print(page_content)  # Print the page content
111 |                 f.write(f"Page Content:\n{page_content}\n")  # Write the page content to the file
112 |                 print("\n---\n")  # Print a separator
113 |                 f.write("\n---\n")  # Write a separator to the file
114 |                 if "Sorry, you have been blocked" not in page_content:  # Check if the page content indicates you've been blocked
115 |                     gpt_response = process_results_with_gpt3(title, link, page_content, summary_filename)  # Process the page content with GPT-3
116 |                     if gpt_response is not None:
117 |                         print(f"GPT-3 Response: {gpt_response}")
118 |             except Exception as e:
119 |                 print(f"Error loading page {link}: {e}")
120 | 
121 | 
122 | # Using the chain of thought from smartGPT project to process the results takes alot longer.
123 | def process_results_with_gpt3(title, link, content, summary_filename):
124 |     print("Processing results with GPT-3...")
125 |     try:
126 |         system_prompt = f"Given the following information, extract unique and interesting facts and analytical infromation. Do not just summarize it. This would will be used in a upcomiing report about {initial_query}. If the information is already known in the content, please do not repeat it. Look at the context given. MUST have sources at bottom."
127 |         messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': content}]
128 |         
129 |         response = openai.ChatCompletion.create(
130 |             model="gpt-3.5-turbo-16k", 
131 |             messages=messages
132 |         )
133 |         time.sleep(3)
134 |         gpt_response = response.choices[0].message['content'].strip()
135 | 
136 |         # Use the GPT-3 response as the final summary
137 |         summary = f"\n## {title}\n\nSource: [{link}]({link})\n\nGPT-3 Summary: {gpt_response}\n"
138 |         all_summaries.append(summary)  # Add the summary to the list
139 |         with open(summary_filename, "a") as sf:  # Open the summary file
140 |             sf.write(summary)  # Write the GPT-3 summary to the summary file
141 |     except FileNotFoundError:
142 |         print(f"Could not find file: {summary_filename}")
143 |         return None
144 |     return gpt_response
145 | 
146 | 
147 | 
148 | # THis is smartGPT 
149 | def create_report(query, initial_query, num_results, all_summaries):
150 |     #global all_summaries  # Declare all_summaries as global so we can modify it
151 |     print("Creating report...")
152 |     summaries = "\n".join(all_summaries)  # Combine all the summaries into a single string
153 |     system_prompt = f"Given the following information, create a report with the information and be sure to cite sources inline. This a professional analytical report. This is about: {query} and part of this: {initial_query}."
154 |     messages = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': summaries}]
155 | 
156 |     best_report = None
157 |     best_score = -1
158 | 
159 |     # Generate 3 reports
160 |     for _ in range(3):
161 |         response = openai.ChatCompletion.create(
162 |             model="gpt-3.5-turbo-16k",
163 |             messages=messages
164 |         )
165 |         gpt_report = response.choices[0].message['content'].strip()
166 | 
167 |         # Researcher step
168 |         researcher_prompt = f"You are a researcher tasked with investigating the report. You are a peer-reviewer. List the flaws and faulty logic of the report. Here are all the summaries from each page of the search made: {all_summaries}. Make sure every response has sources and inline citations. Let's work this out in a step by step way to be sure we have all the errors:"
169 |         researcher_messages = [{'role': 'system', 'content': researcher_prompt}, {'role': 'user', 'content': gpt_report}]
170 |         researcher_response = openai.ChatCompletion.create(
171 |             model="gpt-3.5-turbo-16k",
172 |             messages=researcher_messages
173 |         )
174 |         time.sleep(5)
175 |         researcher_output = researcher_response.choices[0].message['content'].strip()
176 | 
177 |         # Resolver step
178 |         resolver_prompt = f"You are a resolver tasked with improving the report. Print the improved report in full. Let's work this out in a step by step way to be sure we have the right report use the goal: {initial_query} and data resarched {all_summaries} to provide the best report possible.:"
179 |         resolver_messages = [{'role': 'system', 'content': resolver_prompt}, {'role': 'user', 'content': researcher_output}]
180 |         resolver_response = openai.ChatCompletion.create(
181 |             model="gpt-3.5-turbo-16k",
182 |             messages=resolver_messages
183 |         )
184 |         time.sleep(5)
185 |         resolver_output = resolver_response.choices[0].message['content'].strip()
186 | 
187 |         # Score the resolver output (you can replace this with your own scoring function)
188 |         score = len(resolver_output)
189 | 
190 |         # If this output is better than the current best, update the best output and score
191 |         if score > best_score:
192 |             best_report = resolver_output
193 |             best_score = score
194 | 
195 |     # If the best score is below a certain threshold, restart the entire search process
196 |     THRESHOLD = 5000  # Set the threshold here
197 |     if best_score < THRESHOLD:
198 |         print("\n\nReport not satisfactory, restarting the search process...")
199 |         all_summaries = []  # Clear the all_summaries list
200 |         # Reset other variables as necessary here
201 |         # Call your search function here to restart the search process
202 |         # You might need to modify your search function to return the final report
203 |         filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
204 |         summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
205 |         return extract_search_results(query, num_results, filename, summary_filename)
206 | 
207 |     print(f"\n\nGPT-3 Report: {best_report}")
208 |     os.makedirs(f"Reports/{initial_query}", exist_ok=True)  # Create the "Reports" directory if it doesn't exist
209 |     report_filename = os.path.join("Reports", initial_query, f"Report_{query}_{str(time.time())}.md")  # Store the report filename
210 |     with open(report_filename, "w") as rf:  # Open the report file
211 |         rf.write(f"# GPT-3 Report:\n\n{best_report}\n\nReport generated by: Momo AI\n")
212 |         rf.write(f"\n\nPrompt used to generate list: {initial_query}\nSearch made for this report: {query}")
213 |         print(f"\n\nReport saved to: {report_filename}")   
214 | 
215 |     return best_report
216 | 
217 | print("\n\n\nWelcome to humanWeb! \nThis is a tool that uses GPT-3.5-16k to help you search the web and create a reports.\n Results may vary. BUGS ARE EXPECTED. \n\n\n")
218 | 
219 | num_results = int(input("Number of website to visit (Default 2) :"))
220 | initial_query = input("Enter your request. Not a google. (gpt will decide what to google): ")
221 | 
222 | # Create directories for the initial query
223 | os.makedirs(f"Searches/{initial_query}", exist_ok=True)
224 | os.makedirs(f"Reports/{initial_query}", exist_ok=True)
225 | #os.makedirs(f"Reports/{initial_query}", exist_ok=True)
226 | 
227 | num_queries = int(input("Number of report (Default 5) : "))
228 | additional_queries = generate_additional_queries(initial_query, num_queries)
229 | 
230 | # Define all_queries here
231 | all_queries = [initial_query] + additional_queries
232 | 
233 | # Set a limit for the number of additional queries
234 | MAX_ADDITIONAL_QUERIES = 0
235 | # Set a limit for the number of iterations
236 | MAX_ITERATIONS = num_queries  # Set MAX_ITERATIONS to num_queries
237 | 
238 | # Keep track of the number of additional queries
239 | num_additional_queries = 0
240 | # Keep track of the number of iterations
241 | num_iterations = 0
242 | 
243 | for query in all_queries:
244 | 
245 |     # Debug: print the current iteration and query
246 |     print(f"\n\n\nIteration {num_iterations + 1}, processing query: '{query}'")
247 | 
248 |     filename = os.path.join(f"Searches/{initial_query}", f"{query}_{time.time()}.txt")  # Store the filename
249 |     summary_filename = os.path.join(f"Searches/{initial_query}", f"Summary_{query}_{time.time()}.txt")  # Store the summary filename
250 | 
251 |     # Ask the user if they want to continue with the current query
252 |     if not ask_for_user_confirmation(f"Do you want to continue with the query '{query}'?"):
253 |         print("You have chosen not to continue with this query.")
254 |         # Ask the user if they want to edit the query
255 |         if ask_for_user_confirmation("Do you want to edit this query?"):
256 |             query = ask_for_user_input("Please enter your new query: ")
257 |             print(f"Your new query is: '{query}'")
258 |         else:
259 |             print("Skipping this query.")
260 |             continue
261 | 
262 |     extract_search_results(query, num_results, filename, summary_filename)
263 |     create_report(query, initial_query, num_results,all_summaries)
264 |     qa_query = create_qa(query, summary_filename)
265 | 
266 |     if qa_query != query and num_additional_queries < MAX_ADDITIONAL_QUERIES:
267 |         # If the result of create_qa is a new query and we haven't reached the limit, you can add it to all_queries and process it
268 |         all_queries.append(qa_query)
269 |         num_additional_queries += 1
270 | 
271 |         # Debug: print the new query and the updated total number of queries
272 |         print(f"\n\n\nNew query added: '{qa_query}', total queries: {len(all_queries)}")
273 | 
274 |         # Ask the user if they want to continue with the new query
275 |         if not ask_for_user_confirmation(f"Do you want to continue with the new query '{qa_query}'?"):
276 |             print("You have chosen not to continue with this new query.")
277 |             # Ask the user if they want to edit the new query
278 |             if ask_for_user_confirmation("Do you want to edit this new query?"):
279 |                 qa_query = ask_for_user_input("Please enter your new query: ")
280 |                 print(f"Your new query is: '{qa_query}'")
281 |             else:
282 |                 print("Skipping this new query.")
283 |                 continue
284 | 
285 |         # Update the query variable
286 |         query = qa_query
287 | 
288 |     num_iterations += 1
289 |     if num_iterations >= MAX_ITERATIONS:
290 |         # If the loop has run for more than MAX_ITERATIONS, break the loop
291 |         print(f"\n\n\nReached the maximum number of iterations ({MAX_ITERATIONS}), breaking the loop.")
292 |         break
293 | 
294 | 
295 | print("\nClosing browser...")
296 | driver.quit()
297 | print("\nDone.")
298 | 
299 | # Close the debug log file at the end
300 | debug_log.close()
301 | 
302 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | selenium
3 | python-dotenv
4 | 


--------------------------------------------------------------------------------
/retry_decorator.py:
--------------------------------------------------------------------------------
 1 | # retry_decorator.py
 2 | 
 3 | import time
 4 | import openai
 5 | from functools import wraps
 6 | 
 7 | def retry_on_service_unavailable(max_retries=5, backoff_factor=0.5):
 8 |     """A decorator for retrying a function call with exponential backoff.
 9 | 
10 |     Args:
11 |         max_retries (int): Maximum number of retries before giving up. Default is 5.
12 |         backoff_factor (float): Multiplier for the delay between retries. Default is 0.5.
13 | 
14 |     Returns:
15 |         Callable: Decorated function that will be retried on `openai.error.ServiceUnavailableError`.
16 |     """
17 |     def decorator(func):
18 |         @wraps(func)
19 |         def wrapper(*args, **kwargs):
20 |             retries = 0
21 |             while retries < max_retries:
22 |                 try:
23 |                     return func(*args, **kwargs)
24 |                 except openai.error.ServiceUnavailableError:
25 |                     sleep_time = backoff_factor * (2 ** retries)
26 |                     time.sleep(sleep_time)
27 |                     retries += 1
28 |             return func(*args, **kwargs)  # Final attempt, let exception propagate if this fails
29 |         return wrapper
30 |     return decorator
31 | 


--------------------------------------------------------------------------------