├── .gitignore ├── Compare_Test_Runs.ipynb ├── Dockerfile ├── LICENSE ├── README.md ├── examples ├── example_wikipedia.py ├── selenium_ai_app.py └── testup_mywebshop.py ├── requirements.txt ├── scripts ├── data │ └── input │ │ ├── test_data.json │ │ └── training_data.json ├── fine_tune.py ├── prepare_training_data.py └── prepare_unittest_data.py ├── src ├── __init__.py ├── action_processor.py ├── app.py ├── gpt_api_spec.py ├── gpt_client.py ├── md_converter.py ├── rate_limiter.py ├── selenium_ai_utils.py ├── test_steps.py ├── test_utils │ ├── __init__.py │ └── custom_assertions.py └── user_exceptions.py └── tests ├── data ├── offline │ ├── bogner_damen_neuheiten.html │ ├── empty_links_with_aria.html │ ├── empty_links_with_aria_extended.html │ ├── li_a_aria.html │ ├── li_a_aria_extended.html │ └── search_field.html └── online │ ├── bogner_damen_neuheiten.html │ ├── bogner_damen_product_page.html │ ├── bogner_herren.html │ ├── myWebshop_checkout.html │ ├── myWebshop_product_page_beanie_with_logo.html │ ├── myWebshop_searched_beanie_with_logo.html │ ├── myWebshop_start.html │ ├── playground_page2.html │ ├── playground_page3.html │ ├── playground_start.html │ └── training_data.jsonl ├── offline ├── conftest.py ├── test_HtmlToMarkdownConversion.py ├── test_InteractiveElementsConversion.py ├── test_action_processor.py ├── test_bogner_damen_neuheiten_test.py ├── test_markdown_empty_a.py ├── test_markdown_li_a.py └── test_markdown_search_field_button.py └── online ├── bogner_damen_neuheiten_test.py ├── bogner_damen_product_page_test.py ├── bogner_herren_test.py ├── common_test_functions.py ├── conftest.py ├── logs ├── claude_haiku_16k.csv ├── test_results_2024-04-11_16-11-59.csv ├── test_results_2024-04-11_17-11-59.csv ├── test_results_2024-04-12_14-26-21.csv ├── test_results_2024-04-12_14-36-39.csv ├── test_results_2024-04-12_15-25-34.csv ├── test_results_2024-04-15_14-05-26.csv ├── test_results_2024-04-15_15-41-37.csv ├── test_results_CLAUDE_HAIKU_16k_2024-04-15_14-55-38.csv ├── test_results_CLAUDE_HAIKU_16k_2024-04-17_12-18-01.csv ├── test_results_CLAUDE_HAIKU_16k_2024-04-17_12-44-48.csv ├── test_results_CLAUDE_SONNET_16k_2024-04-15_15-43-25.csv ├── test_results_chatGPT_trained_16k_2024-04-17_12-01-21.csv ├── test_results_chatGPT_untrained_16K_2024-04-15_15-08-42.csv └── test_results_chatGPT_untrained_16k_2024-04-15_16-03-40.csv ├── myWebshop_checkout_test.py ├── myWebshop_product_page_beanie_with_logo_test.py ├── myWebshop_searched_beanie_with_logo_test.py ├── myWebshop_start_test.py ├── playground_page2_test.py ├── playground_page3_test.py ├── playground_start_test.py └── training_data_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | *.py[cod] 4 | *.log 5 | .env 6 | # Package files 7 | *.jar 8 | 9 | # Maven 10 | target/ 11 | dist/ 12 | 13 | #output 14 | scripts/data/output/* 15 | tests/online/logs/ 16 | 17 | # Deployment 18 | deployment.yaml 19 | 20 | # JetBrains IDE 21 | .idea/ 22 | 23 | .DS_Store 24 | Thumbs.db 25 | 26 | test_summaries 27 | 28 | .ipynb_checkpoints/ 29 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Runtime image 2 | FROM python:3 3 | WORKDIR /opt/source 4 | 5 | COPY ./requirements.txt . 6 | RUN pip install -r requirements.txt 7 | 8 | COPY ./src src 9 | 10 | EXPOSE 5000 11 | CMD ["python","src/app.py"] 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 P3TERX 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Testup AI driver 3 | 4 | This project introduces an AI-driven tool to automate website testing through Selenium. By harnessing the power of natural language instructions and Selenium's comprehensive browser automation capabilities, it simplifies executing complex test scenarios on web applications. From adding products to shopping carts and navigating through pages to verifying webpage elements, this tool makes web testing accessible and efficient. 5 | 6 | This tool is brought to you by [testup.io](https://testup.io), 7 | the easist no-code test automation tool on the web. 8 | 9 | 10 | [![Explanation video](https://img.youtube.com/vi/YlGgri1IILM/0.jpg)](https://www.youtube.com/watch?v=YlGgri1IILM) 11 | 12 | ## Features 13 | 14 | - **DOM Tree compression** To limit the token count sent to the AI model the first step is to reduce the DOM to relevant information. 15 | - **Automatic ID generation** Ids are added to all DOM elements to ensure back and forth communication with the AI model. 16 | - **Conversation context** The model may use repeated requests to inform the AI about the result of its actions to make sure that the entire task can be completed. 17 | - **Selenium wrapper**: The `SeleniumAIUtils` class wraps the Selenium WebDriver and offers the additional AI functions. 18 | - **Training data**: By running parameter fine tuning against our sample data you can significantly increase the accuracy of the results. 19 | 20 | ## Usage 21 | 22 | Setup the selenium driver as usual then use our wrapper to execute prompts: 23 | 24 | ```python 25 | selenium_utils = SeleniumAiUtils() 26 | selenium_utils.set_local_driver(driver, url) 27 | 28 | selenium_utils.execute_prompt("put any product in shopping cart") 29 | ``` 30 | 31 | # Setup 32 | 33 | ## Setting up python and selenium 34 | 35 | 1. Make sure you have python installed and add the requireds from the `requirements.txt` file: 36 | ```bash 37 | pip install -r requirements.txt 38 | ``` 39 | 40 | 2. Make sure you have ChromeDriver installed and added to your system's PATH. Instructions for installing ChromeDriver can be found at [ChromeDriver - WebDriver for Chrome](https://chromedriver.chromium.org/). 41 | 42 | 3. If you are using an ide you can then add `src` and `examples` to your source root paths and add 43 | `test` to your test root path. Alternatively you can set it up in the shell: 44 | 45 | ```bash 46 | export PYTHONPATH=$(pwd)/src 47 | ``` 48 | 49 | ## Setting up the OpenAI connection 50 | 51 | Before running the tool, you must set up the following environment variable: 52 | 53 | - `OPENAI_API_KEY`: Your OpenAI API key for processing natural language commands. 54 | 55 | To limit the amount of traffic to the API you can optionally set the following environment variables: 56 | 57 | - `MAX_REQUESTS`: The maximum number of API requests per minute (default is 20). 58 | - `MAX_TOKENS`: The maximum number of tokens (input characters) per minute (default is 160000). 59 | 60 | You may want to consider setting up these variables in your environment or through a `.env` file in the project's root directory for security reasons. 61 | 62 | 63 | # Usage 64 | 65 | ## Interactive Mode 66 | 67 | This mode allows real-time natural language commands for web application interaction. Users can change URLs, navigate, click, input data, and exit as needed. 68 | To start using the tool: 69 | 70 | 1. Run the app: 71 | ```bash 72 | python examples/selenium_ai_app.py 73 | ``` 74 | 75 | ## Example Test Script 76 | 77 | If you want to write your own standalone test, please refer to our reference file 78 | `testup_mywebshop.py`, for an automation script example. It illustrates the use of the api 79 | using a simple selneium wrapper. It opens a demo web shop, adds a product to a shopping 80 | cart and checks out. Each step is tested with selenium core functions, demonstrating the mix between 81 | AI and traditional testing methods. 82 | 83 | 1. Open and customize `examples/testup_mywebshop.py` to fit your test scenario. 84 | 2. Execute the script: 85 | ```bash 86 | python examples/testup_mywebshop.py 87 | ``` 88 | 89 | The script initiates a browser session, performs the defined actions, and automatically closes the browser upon completion or error. 90 | 91 | You may Adapt `examples/testup_mywebshop.py` or create new scripts using the `SeleniumUtils` class 92 | for AI-driven Selenium interactions and the `execute_prompt` method for AI-driven navigation 93 | and actions. 94 | 95 | 96 | # Model Fine-Tuning 97 | Fine-tuning a model will help ensuring getting more accurate results from the gpt model, the process requires training data in a specific format, this section guides you through the process of preparing your training data and executing the fine-tuning 98 | by converting HTML content to Markdown, which is a preferred format for text-based machine learning tasks due to its simplicity, readability and reduced size. 99 | 100 | 101 | ## HTML to Markdown Conversion 102 | The first step of training is to reduce the HTML content from `scripts/data/input` directory. Execute the following command from your project's root directory: 103 | ```bash 104 | cd scripts 105 | python prepare_training_data.py 106 | ``` 107 | ## Fine-Tuning Your Model 108 | Once your data is prepared and converted to Markdown, you can fine-tune your model using the provided script. Execute the fine-tuning script with the following command, which will use the converted Markdown data for training: 109 | ```bash 110 | python3 fine_tune.py 111 | ``` 112 | The script will process the training data and initiate the fine-tuning job. It will continue to run until the fine-tuning is complete, at which point it will display the new model ID. 113 | 114 | ## Setting the New Trained Model 115 | After the fine-tuning process is finished and you have your new model ID, you can set your environment to use this trained model for future tasks: 116 | ```bash 117 | export GPT_MODEL= 118 | ``` 119 | 120 | Replace `` with the actual model ID provided after the fine-tuning process. 121 | 122 | # Contributing 123 | Contributions are welcome! Fork the repository, make your changes, and submit a pull request. 124 | -------------------------------------------------------------------------------- /examples/example_wikipedia.py: -------------------------------------------------------------------------------- 1 | import time 2 | from selenium import webdriver 3 | from selenium.common.exceptions import NoSuchElementException 4 | from selenium.webdriver.common.by import By 5 | from selenium.webdriver.chrome.options import Options 6 | from selenium_ai_utils import SeleniumAiUtils # Assuming this is a custom module 7 | 8 | 9 | def setup_driver(): 10 | """Initializes and returns a Chrome WebDriver with specified options.""" 11 | chrome_options = Options() 12 | # chrome_options.add_argument("--headless") # Uncomment for headless mode 13 | # chrome_options.add_argument("window-size=1920x1080") # Set specific resolution 14 | driver = webdriver.Chrome(options=chrome_options) 15 | driver.set_window_size(1920, 1080) # Explicitly set the window size 16 | 17 | return driver 18 | 19 | 20 | def check_wikipedia_page_title(driver, expected_title): 21 | """Checks if the current page's title matches the expected title.""" 22 | try: 23 | title_element = driver.find_element(By.ID, "firstHeading") 24 | page_title = title_element.text 25 | assert page_title == expected_title, f"Title does not match. Expected '{expected_title}', got '{page_title}'." 26 | print(f"Title '{page_title}' matches.") 27 | except NoSuchElementException: 28 | print(f"Test failed, didn't navigate to {expected_title}. The element does not exist.") 29 | raise 30 | except AssertionError as e: 31 | print(e) 32 | raise 33 | 34 | 35 | def main(): 36 | driver = setup_driver() 37 | url = "https://wikipedia.com/" 38 | mySelenium = SeleniumAiUtils() 39 | 40 | try: 41 | mySelenium.set_local_driver(driver, url) 42 | 43 | # Test Steps 44 | mySelenium.execute_prompt("go to the English section and then search for 'Trudering-Riem'") 45 | 46 | mySelenium.execute_prompt("click on 'Trudering-Riem' and then navigate from there to 'messestadt-riem'") 47 | 48 | check_wikipedia_page_title(driver, "Messestadt Riem") 49 | 50 | mySelenium.execute_prompt("go to 'Munich-Ubahn'. The link to this site might be at the bottom of the page") 51 | 52 | check_wikipedia_page_title(driver, "Munich U-Bahn") 53 | 54 | print("Test finished successfully.") 55 | 56 | finally: 57 | # Cleanup 58 | driver.quit() 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /examples/selenium_ai_app.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.options import Options 3 | from selenium_ai_utils import SeleniumAiUtils # Make sure this is correctly imported 4 | import re 5 | import argparse 6 | from dotenv import load_dotenv 7 | 8 | 9 | 10 | def display_start_screen(): 11 | # ASCII art for "testup.io" 12 | ascii_art_testup = r""" 13 | 14 | .----------------. .----------------. .----------------. .----------------. .----------------. .----------------. .----------------. .----------------. .----------------. 15 | | .--------------. || .--------------. || .--------------. || .--------------. || .--------------. || .--------------. || .--------------. || .--------------. || .--------------. | 16 | | | _________ | || | _________ | || | _______ | || | _________ | || | _____ _____ | || | ______ | || | | || | _____ | || | ____ | | 17 | | | | _ _ | | || | |_ ___ | | || | / ___ | | || | | _ _ | | || ||_ _||_ _|| || | |_ __ \ | || | | || | |_ _| | || | .' `. | | 18 | | | |_/ | | \_| | || | | |_ \_| | || | | (__ \_| | || | |_/ | | \_| | || | | | | | | || | | |__) | | || | | || | | | | || | / .--. \ | | 19 | | | | | | || | | _| _ | || | '.___`-. | || | | | | || | | ' ' | | || | | ___/ | || | | || | | | | || | | | | | | | 20 | | | _| |_ | || | _| |___/ | | || | |`\____) | | || | _| |_ | || | \ `--' / | || | _| |_ | || | _ | || | _| |_ | || | \ `--' / | | 21 | | | |_____| | || | |_________| | || | |_______.' | || | |_____| | || | `.__.' | || | |_____| | || | (_) | || | |_____| | || | `.____.' | | 22 | | | | || | | || | | || | | || | | || | | || | | || | | || | | | 23 | | '--------------' || '--------------' || '--------------' || '--------------' || '--------------' || '--------------' || '--------------' || '--------------' || '--------------' | 24 | '----------------' '----------------' '----------------' '----------------' '----------------' '----------------' '----------------' '----------------' '----------------' 25 | 26 | """ 27 | 28 | # Welcoming message 29 | welcome_message = """ 30 | Welcome to AI Selenium Driver, brought to you by testup.io! 31 | 32 | This tool enhances your web interaction automation, allowing you to control web browsers through natural language. 33 | Whether you're executing predefined scripts or engaging in interactive commands, your approach to web testing and automation is simplified. 34 | 35 | Quick Start Guide: 36 | 37 | 1. **Initialization Parameters**: 38 | - Browser Resolution: Use '--resolution WIDTHxHEIGHT' to set the window size (default: 1920x1080). 39 | - Starting Website: Use '--url YOUR_WEBSITE_URL' to begin on a specific site. Without this, we'll start on https://https://mywebsite.testup.io/ as an example. 40 | 41 | 2. **Runtime Interaction**: 42 | - Direct Command: Simply type natural language commands to interact with the web page, such as 'buy warm clothing' or 'complete this form with my details: Max Mustermann, Munich, Germany, phone 089/12345678'. 43 | - Change Website: Use the '/URL' command to switch sites. You'll be prompted to enter the new website address. 44 | - Type '/exit' '/q' or '/quit' to quit:" 45 | 46 | Start your streamlined web automation journey with us! For further assistance, refer to our documentation or contact support. 47 | """ 48 | 49 | # print(ascii_art_testup) 50 | print(welcome_message) 51 | 52 | 53 | def display_help_message(): 54 | help_message = """ 55 | Available Commands: 56 | - /URL [website] : Switch to a specific website. You can directly use '/URL website.com' or just '/URL' and then input the URL when prompted. 57 | - /exit, /q, /quit : Quit the interactive mode. 58 | - /help : Display this help message. 59 | 60 | How to Use: 61 | When you input commands, the AI processes your text and decides on an action: click, enter text, scroll, press enter, or finish. These actions are automatically executed on the webpage via the Selenium WebDriver, and you will see the changes directly in the browser. 62 | 63 | Example Commands: 64 | - "Click on the login button": The AI will attempt to find and click a login button on the current page. 65 | - "Enter 'John Doe' in the name field": The AI will look for a name field and enter the text 'John Doe'. 66 | - "Scroll down": The AI will scroll the webpage down. 67 | - "Press enter in the search box": The AI will simulate pressing the Enter key in a search box. 68 | - "Complete the form and submit": The AI will try to fill out a form with previously provided details and submit it. 69 | 70 | Command Tips: 71 | - Be Clear and Specific: Describe exactly what you want to do. Include any necessary details. 72 | - Use Natural Language: Phrase your commands in a straightforward manner, as if you were asking another person to perform the task. 73 | - Provide Context: If your command depends on previous actions or certain conditions, make sure to include that context in your request. 74 | 75 | """ 76 | print(help_message) 77 | 78 | 79 | def run_interactive_mode(selenium_utils): 80 | print("Interactive mode. Type '/URL' to switch the website, or ('/exit','/q' or '/quit') to quit:") 81 | 82 | while True: 83 | # Prompt the user for input 84 | user_input = input("> ").strip() 85 | 86 | # Check for help command 87 | if user_input.lower() == '/help': 88 | display_help_message() 89 | continue 90 | 91 | # Check if the user wants to exit the interactive mode 92 | if user_input.lower() in ['/exit', '/quit', '/q']: 93 | print("Exiting interactive mode. Goodbye!") 94 | break 95 | 96 | split_input = user_input.split(' ') 97 | # Handle URL change request 98 | if split_input[0].lower() == '/url': 99 | if len(split_input) == 2: 100 | new_url = split_input[1] 101 | else: 102 | new_url = input("Enter the new URL: ").strip() 103 | original_new_url = new_url # Store the original input for user feedback 104 | # Automatically prepend 'http://' if necessary 105 | if not new_url.startswith(('http://', 'https://')): 106 | new_url = 'http://' + new_url 107 | while not validate_url(new_url): 108 | print("Invalid URL. Please enter a valid URL.") 109 | new_url = input("Enter the new URL: ").strip() 110 | original_new_url = new_url # Update the original input for user feedback 111 | if not new_url.startswith(('http://', 'https://')): 112 | new_url = 'http://' + new_url 113 | selenium_utils.go_to_url(new_url) 114 | print(f"Switched to {original_new_url}") # Show the URL as the user entered it 115 | continue 116 | 117 | # Handle empty input gracefully 118 | if not user_input: 119 | print("Please enter a command, or type 'exit' to quit.") 120 | continue 121 | 122 | try: 123 | # Execute the command through SeleniumUtils 124 | selenium_utils.execute_prompt(user_input) 125 | except Exception as e: 126 | # Catch and display any errors that occur during command execution 127 | print(f"An error occurred: {e}") 128 | continue 129 | 130 | # Clean up before exiting the interactive mode 131 | selenium_utils.driver.quit() 132 | print("The browser has been closed.") 133 | 134 | 135 | def validate_url(url): 136 | # Prepend 'http://' if no scheme is specified 137 | if not url.startswith(('http://', 'https://')): 138 | url = 'http://' + url 139 | 140 | # A simple regex to check for valid URL format 141 | regex = re.compile( 142 | r'^(?:http|ftp)s?://' # http:// or https:// 143 | r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... 144 | r'localhost|' # localhost... 145 | r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip 146 | r'(?::\d+)?' # optional port 147 | r'(?:/?|[/?]\S+)$', re.IGNORECASE) 148 | return re.match(regex, url) is not None 149 | 150 | 151 | def setup_arg_parser(): 152 | parser = argparse.ArgumentParser(description="Control the browser with your words. .") 153 | 154 | # Argument for setting browser resolution 155 | parser.add_argument('--resolution', default='1920x1080', help='Browser window resolution, default is 1920x1080.') 156 | parser.add_argument('--url', default='https://mywebsite.testup.io/', 157 | help='Website URL to start with. If not specified, will use an example website.') 158 | 159 | return parser 160 | 161 | 162 | def main(): 163 | 164 | 165 | # Load environment variables from .env file 166 | load_dotenv() 167 | display_start_screen() 168 | 169 | parser = setup_arg_parser() 170 | args = parser.parse_args() 171 | 172 | # Setup Chrome options 173 | chrome_options = Options() 174 | chrome_options.add_argument(f"window-size={args.resolution}") 175 | 176 | driver = webdriver.Chrome(options=chrome_options) 177 | driver.set_window_size(1920, 1080) # Explicitly set the window size 178 | 179 | url = "https://mywebsite.testup.io/" 180 | selenium_utils = SeleniumAiUtils() 181 | selenium_utils.set_local_driver(driver, url) 182 | 183 | run_interactive_mode(selenium_utils) 184 | 185 | 186 | if __name__ == "__main__": 187 | main() 188 | -------------------------------------------------------------------------------- /examples/testup_mywebshop.py: -------------------------------------------------------------------------------- 1 | import time 2 | from selenium.common.exceptions import NoSuchElementException 3 | from selenium.webdriver.common.by import By 4 | from selenium.webdriver.chrome.options import Options 5 | from selenium import webdriver 6 | from selenium_ai_utils import SeleniumAiUtils # Assuming this is a custom module 7 | 8 | 9 | # Setup 10 | def setup_driver(): 11 | """Initialize and configure the Chrome WebDriver.""" 12 | chrome_options = Options() 13 | # chrome_options.add_argument("--headless") # Uncomment to enable headless mode 14 | #chrome_options.add_argument("window-size=1920x1080") # Set specific resolution --- Doesn't seem to work 15 | driver = webdriver.Chrome(options=chrome_options) 16 | driver.set_window_size(1920, 1080) # Explicitly set the window size 17 | return driver 18 | 19 | 20 | def main(): 21 | url = "https://mywebsite.testup.io/" 22 | driver = setup_driver() 23 | 24 | try: 25 | selenium_utils = SeleniumAiUtils() 26 | selenium_utils.set_local_driver(driver, url) 27 | 28 | # Test steps 29 | selenium_utils.execute_prompt("search for album") 30 | time.sleep(1) # Wait for the page to update 31 | selenium_utils.execute_prompt("add album to the cart ") 32 | time.sleep(1) # Wait for the page to update 33 | selenium_utils.execute_prompt("go to the shopping cart") 34 | 35 | try: 36 | input_element = selenium_utils.driver.find_element(By.CLASS_NAME, 'input-text.qty.text') 37 | quantity_value = int(input_element.get_attribute('value')) 38 | assert quantity_value == 1, "Product not added to cart." 39 | print("Product has been added to cart.") 40 | except NoSuchElementException: 41 | print("Did not navigate to shopping cart.") 42 | 43 | selenium_utils.execute_prompt( 44 | "go to the checkout page, the button to do so is at the bottom of the shopping cart page") 45 | 46 | # Validate navigation to the checkout page 47 | title_element = driver.find_element(By.CSS_SELECTOR, 'header.entry-header h1.entry-title') 48 | assert title_element.text == "Checkout", "Did not navigate to checkout, test failed." 49 | print("Successfully navigated to checkout.") 50 | time.sleep(10) 51 | finally: 52 | driver.quit() 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium 2 | webdriver-manager==3.4.2 3 | flask 4 | requests 5 | beautifulsoup4 6 | markdownify 7 | cachetools 8 | openai 9 | tiktoken 10 | python-dotenv -------------------------------------------------------------------------------- /scripts/fine_tune.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pathlib import Path 3 | from openai import OpenAI 4 | import os 5 | import argparse 6 | 7 | file_path = "data/output/training_data_md.jsonl" 8 | 9 | 10 | client = OpenAI( 11 | # This is the default and can be omitted 12 | api_key=os.environ.get("OPENAI_API_KEY"), 13 | ) 14 | 15 | response = client.files.create( 16 | file=Path(file_path), 17 | purpose="fine-tune", 18 | ) 19 | 20 | print(response) 21 | file_id = response.id 22 | 23 | 24 | try: 25 | fine_tune_response = client.fine_tuning.jobs.create( 26 | training_file=file_id, 27 | model="gpt-3.5-turbo-1106", 28 | ) 29 | except openai.APIConnectionError as e: 30 | print("The server could not be reached") 31 | print(e.__cause__) # an underlying Exception, likely raised within httpx. 32 | except openai.RateLimitError as e: 33 | print("A 429 status code was received; we should back off a bit.") 34 | except openai.APIStatusError as e: 35 | print("Another non-200-range status code was received") 36 | print(e.status_code) 37 | print(e.response) 38 | 39 | print(fine_tune_response) 40 | print("Going to wait until the fine tuning job starts") 41 | time.sleep(60) 42 | specific_job_running = False 43 | count = 60 44 | while True: 45 | first_page = client.fine_tuning.jobs.list( 46 | limit=20, 47 | ) 48 | 49 | # Flag to check if the specific job is found and running 50 | job_found = False 51 | for job in first_page.data: 52 | # Check if the current job's training_file matches the specific file ID 53 | if job.training_file == file_id: 54 | job_found = True 55 | if job.status == "running": 56 | print(f"Job is still running. {count} seconds elapsed") 57 | specific_job_running = True 58 | elif job.status == "succeeded": 59 | specific_job_running = False 60 | print("Job has finished successfully") 61 | print(f"Fined Tuned Model={job.fine_tuned_model}") 62 | print(f"To use fined tune model, run export GPT_MODEL={job.fine_tuned_model}") 63 | else: 64 | specific_job_running = False 65 | print(f"Found the specific job, but its status is {job.status}.") 66 | break 67 | 68 | if not job_found: 69 | print("Unable to find job, please check openai cp") 70 | break 71 | 72 | if not specific_job_running: 73 | print("Going to stop since job is not running, please check openai cp") 74 | break 75 | 76 | count = count+1 77 | time.sleep(1) 78 | 79 | -------------------------------------------------------------------------------- /scripts/prepare_training_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import sys 4 | import os 5 | sys.path.append('src') 6 | from md_converter import convert_to_md 7 | 8 | 9 | input_dir_path = 'data/input' 10 | 11 | output_dir_path = 'data/output' 12 | 13 | os.makedirs(output_dir_path, exist_ok=True) 14 | 15 | for filename in os.listdir(input_dir_path): 16 | input_file_path = os.path.join(input_dir_path, filename) 17 | 18 | # Check if it's a file and not a directory 19 | if os.path.isfile(input_file_path): 20 | with open(input_file_path, 'r') as input_file: 21 | data = json.load(input_file) 22 | 23 | modified_json_line = [] 24 | 25 | for item in data: 26 | # Check each message for the relevant content 27 | for message in item["messages"]: 28 | # Look for the specific phrases in the content 29 | if message["role"] == "assistant": 30 | assistant_content = json.loads(message["content"]) 31 | try: 32 | message["content"] = assistant_content[0] 33 | except Exception as e: 34 | logging.error(e) 35 | else: 36 | if "Here is the Markdown" in message["content"] or "Here is the new markdown" in message["content"]: 37 | # Extract the HTML content 38 | html_content = message["content"] 39 | # Convert the HTML to Markdown 40 | markdown_content = convert_to_md(html_content) 41 | # Replace the original content with the new Markdown content 42 | message["content"] = markdown_content 43 | 44 | modified_json_line.append(json.dumps(item, ensure_ascii=False)) 45 | 46 | # Construct the output file path 47 | output_file_path = os.path.join(output_dir_path, os.path.splitext(filename)[0] + '_md.jsonl') 48 | 49 | # Write the modified JSON string to the output file 50 | with open(output_file_path, 'w', encoding='utf-8') as output_file: 51 | output_file.write('\n'.join(modified_json_line)) 52 | 53 | print(f"The modified content has been written to '{output_file_path}'") -------------------------------------------------------------------------------- /scripts/prepare_unittest_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import os 4 | sys.path.append('src') 5 | from md_converter import convert_to_md 6 | 7 | 8 | input_dir_path = 'data/input' 9 | 10 | output_dir_path = 'data/output' 11 | 12 | os.makedirs(output_dir_path, exist_ok=True) 13 | 14 | for filename in os.listdir(input_dir_path): 15 | input_file_path = os.path.join(input_dir_path, filename) 16 | 17 | # Check if it's a file and not a directory 18 | if os.path.isfile(input_file_path): 19 | with open(input_file_path, 'r') as input_file: 20 | data = json.load(input_file) 21 | 22 | modified_json_line = [] 23 | 24 | for item in data: 25 | # Check each message for the relevant content 26 | for message in item["messages"]: 27 | # Look for the specific phrases in the content 28 | if "Here is the Markdown" in message["content"] or "Here is the new markdown" in message["content"]: 29 | # Extract the HTML content 30 | html_content = message["content"] 31 | # Convert the HTML to Markdown 32 | markdown_content = convert_to_md(html_content) 33 | # Replace the original content with the new Markdown content 34 | message["content"] = markdown_content 35 | 36 | modified_json_line.append(json.dumps(item, ensure_ascii=False)) 37 | 38 | # Construct the output file path 39 | output_file_path = os.path.join(output_dir_path, os.path.splitext(filename)[0] + '_unittest.jsonl') 40 | 41 | # Write the modified JSON string to the output file 42 | with open(output_file_path, 'w', encoding='utf-8') as output_file: 43 | output_file.write('\n'.join(modified_json_line)) 44 | 45 | print(f"The modified content has been written to '{output_file_path}'") -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thetaris/testup-ai-driver/3ad6dd64253b605e773453e2f56d895360d8ed08/src/__init__.py -------------------------------------------------------------------------------- /src/action_processor.py: -------------------------------------------------------------------------------- 1 | from md_converter import convert_to_md 2 | from cachetools import TTLCache 3 | from gpt_client import GptClient, TokenLimitExceededError, RateLimitExceededError 4 | import re 5 | import logging 6 | import time 7 | import json 8 | 9 | # Configure logging 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 11 | 12 | 13 | def convert_keys_to_lowercase(data): 14 | if isinstance(data, dict): 15 | return {k.lower(): convert_keys_to_lowercase(v) for k, v in data.items()} 16 | elif isinstance(data, list): 17 | return [convert_keys_to_lowercase(item) for item in data] 18 | else: 19 | return data 20 | 21 | 22 | class DomAnalyzer: 23 | system_input_default = """ 24 | You are a testautomation system. Your job is to analyze the already executed actions and determine the next actions needed to complete the provided task. 25 | 26 | The actions that you can take are: 27 | 1. click (if you need to click something on the screen) 28 | 2. enter_text (if you believe you need to write something) 29 | 3. key_enter ( after enter_text action in search to apply the search) 30 | 4. scroll (this will trigger a function, that scrolls down in the current webpage. Use this if you can't find the element but expect it to be there) 31 | 5. finish (at the end to know that we are done or if all actions have been executed) 32 | 6. error ( the given task cannot be accomplished) 33 | 34 | Each entry is an object of 5 fields, the fields are the following: 35 | 1. action: can be one of: click, enter_text, wait, error, or finish. 36 | 2. css_selector: (only needed for click or enter-text), this is the css id of the html element('li', 'button', 'input', 'textarea', 'a'), example #id. 37 | 3. text: this is optional and contains the text you want to input in case of an enter-text action. 38 | 4. explanation: this is why you chose this action. 39 | 5. description: detailed description of the action 40 | The output format must be {"steps":[{ "action":..,"css_selector":...., "text":..., "explanation":..., "description":...}]} 41 | """ 42 | 43 | user_input_default = """ 44 | \n\nPerform the task delimited by triple quotes: \"\"\"@@@task@@@\"\"\" 45 | \n @@@variables@@@ 46 | """ 47 | markdown_input_default = """ 48 | \n Here is the Markdown representation of the currently visible section of the page on which you will execute the actions. Please note that you can scroll if you unable to proceed with the task using the available elements: \n @@@markdown@@@""" 49 | 50 | def __init__(self, cache_ttl=3600, cache_maxsize=1000): 51 | self.cache = TTLCache(maxsize=1000, ttl=3600) 52 | self.log_cache = TTLCache(maxsize=1000, ttl=3600) 53 | self.md_cache = TTLCache(maxsize=1000, ttl=3600) 54 | self.gpt_client = GptClient() 55 | 56 | def get_actions(self, session_id, user_prompt, html_doc, actions_executed, variables_string="", duplicate=False, valid=True, last_action=None, user_input=user_input_default, system_input=system_input_default, return_history=False): 57 | 58 | markdown = convert_to_md(html_doc) 59 | 60 | user_input = user_input.replace("@@@markdown@@@", markdown) 61 | system_input = system_input.replace("@@@markdown@@@", markdown) 62 | 63 | user_input = user_input.replace("@@@task@@@", user_prompt) 64 | system_input = system_input.replace("@@@task@@@", user_prompt) 65 | 66 | user_input = user_input.replace("@@@variables@@@", variables_string) 67 | system_input = system_input.replace("@@@variables@@@", variables_string) 68 | 69 | markdown_input = self.markdown_input_default.replace("@@@markdown@@@", markdown) 70 | 71 | max_retries = 5 72 | attempts = 0 73 | formatted = True 74 | id_used = True 75 | 76 | while attempts < max_retries: 77 | if session_id not in self.cache: 78 | system_content = {'role': 'system', 'message': system_input, 'removable': False} 79 | markdown_content = {'role': 'user', 'message': markdown_input, 'removable': False} 80 | user_content = {'role': 'user', 'message': user_input, 'removable': False} 81 | 82 | try: 83 | response = self.gpt_client.make_request([system_content, markdown_content, user_content]) 84 | self.cache[session_id] = [system_content, markdown_content, user_content] 85 | self.log_cache[session_id] = [system_content, {'role': 'user', 'message': html_doc, 'removable': False}, user_content] 86 | self.md_cache[session_id] = markdown 87 | extracted_response = self.extract_steps(response) 88 | if not extracted_response or extracted_response == {}: # Check if the response is empty 89 | raise ValueError("Empty or invalid response") 90 | 91 | first_step = extracted_response.get('steps', [{}])[0] # Safely get the first step 92 | if first_step.get('css_selector', '').find('#') == -1 and first_step.get('action') not in ['finish', 'error', 'scroll']: 93 | raise ValueError("Condition not met: cssSelector does not use ID or action is not 'finish'") 94 | 95 | if return_history is True: 96 | extracted_response['history'] = self.md_cache[session_id] 97 | return extracted_response 98 | 99 | except ValueError as e: 100 | logging.warn(f"Failed with value error: {e}") 101 | attempts += 1 102 | 103 | # Check the specific error message to set formatted and id_used accordingly 104 | if str(e) == "Condition not met: cssSelector does not use ID or action is not 'finish'": 105 | formatted = True 106 | id_used = False 107 | last_action = first_step 108 | else: 109 | last_action = response 110 | formatted = False 111 | id_used = True # Assuming the default state is that IDs are used 112 | duplicate = False 113 | # logging.info(f"Failed to get response, next attempt#{attempts}: {e}") 114 | time.sleep(1) 115 | continue # Retry the loop 116 | except TokenLimitExceededError as e: 117 | logging.error(f"Failed: {e} ") 118 | if self.clean_prompt(self.cache[session_id]): 119 | continue 120 | break 121 | except RateLimitExceededError as e: 122 | logging.error(f"Failed with rate limit exceeded: {e} " 123 | f"\n going to sleep for 10 seconds and try again") 124 | formatted = True 125 | attempts += 1 126 | time.sleep(10) 127 | continue 128 | except Exception as e: 129 | formatted = True 130 | attempts += 1 131 | logging.warn(f"Failed to get response, next attempt#{attempts}: {e} ") 132 | time.sleep(1) 133 | continue 134 | else: 135 | executed_actions_str = '\n'.join([f"{idx+1}.{self.format_action(action)}" for idx, action in enumerate(actions_executed)]) 136 | follow_up = self.resolve_follow_up(duplicate, valid, formatted, id_used, self.format_action(last_action), executed_actions_str, user_prompt, variables_string) 137 | if markdown == self.md_cache[session_id]: 138 | prefix_message = f"Again, Here is the markdown representation of the currently visible section of the page on which you will execute the actions: {markdown}\n\n" if attempts == max_retries-1 else "" 139 | prefix_message_log = f"Again, Here is the markdown representation of the currently visible section of the page on which you will execute the actions: {html_doc}\n\n" if attempts == max_retries-1 else "" 140 | if not id_used or not formatted: 141 | follow_up_content = [{'role': 'user', 'message': f"{prefix_message}{follow_up}", 'removable': True}] 142 | assistant_content = {'role': 'assistant', 'message': self.format_action(last_action), 'removable': True} 143 | follow_up_content_log = [{'role': 'user', 'message': f"{prefix_message_log}{follow_up}", 'removable': True}] 144 | else: 145 | follow_up_content = [{'role': 'user', 'message': f"{prefix_message}{follow_up}", 'removable': False}] 146 | assistant_content = {'role': 'assistant', 'message': self.format_action(last_action), 'removable': False} 147 | follow_up_content_log = [{'role': 'user', 'message': f"{prefix_message_log}{follow_up}", 'removable': False}] 148 | else: 149 | follow_up_content = [{'role': 'user', 'message': f"Here is the new markdown " 150 | f"representation of the currently visible section of the page on which you will execute the actions: " 151 | f"{markdown}\n\n{follow_up}", 'removable': False}] 152 | follow_up_content_log = [{'role': 'user', 'message': f"Here is the new markdown: {html_doc}\n\n{follow_up}"}] 153 | assistant_content = {'role': 'assistant', 'message': self.format_action(last_action), 'removable': False} 154 | self.md_cache[session_id] = markdown 155 | 156 | # add assistant_content, follow_up_content to the cache 157 | 158 | try: 159 | response = self.gpt_client.make_request([*self.cache[session_id], assistant_content, *follow_up_content]) 160 | self.cache[session_id].append(assistant_content) 161 | self.cache[session_id].extend(follow_up_content) 162 | 163 | self.log_cache[session_id].append(assistant_content) 164 | self.log_cache[session_id].extend(follow_up_content_log) 165 | 166 | extracted_response = self.extract_steps(response) 167 | 168 | if not extracted_response or extracted_response == {}: 169 | raise ValueError("Empty or invalid response") 170 | 171 | first_step = extracted_response.get('steps', [{}])[0] # Safely get the first step 172 | if first_step.get('css_selector', '').find('#') == -1 and first_step.get('action') not in ['finish', 'error', 'scroll']: 173 | raise ValueError("Condition not met: cssSelector does not use ID or action is not 'finish'") 174 | if return_history is True: 175 | extracted_response['history'] = self.md_cache[session_id] 176 | 177 | return extracted_response 178 | 179 | except ValueError as e: 180 | logging.warn(f"Failed with value error: {e}") 181 | attempts += 1 182 | last_action = response 183 | # Check the specific error message to set formatted and id_used accordingly 184 | if str(e) == "Condition not met: cssSelector does not use ID or action is not 'finish'": 185 | formatted = True 186 | id_used = False 187 | else: 188 | formatted = False 189 | id_used = True # Assuming the default state is that IDs are used 190 | duplicate = False 191 | # logging.info(f"Failed to get response, next attempt#{attempts}: {e}") 192 | time.sleep(1) 193 | continue # Retry the loop 194 | except TokenLimitExceededError as e: 195 | logging.error(f"Failed: {e} ") 196 | if self.clean_prompt(self.cache[session_id]): 197 | continue 198 | break 199 | except RateLimitExceededError as e: 200 | logging.error(f"Failed with rate limit exceeded: {e} " 201 | f"\n going to sleep for 10 seconds and try again") 202 | formatted = True 203 | attempts += 1 204 | time.sleep(10) 205 | continue 206 | 207 | except Exception as e: 208 | attempts += 1 209 | logging.info(f"Failed to get response, next attempt#{attempts}: {e} ") 210 | time.sleep(1) 211 | continue 212 | if return_history is True: 213 | extracted_response['history'] = self.md_cache[session_id] 214 | return {"steps": [{"action": "Error", "text": "Failed to get action"}]} 215 | 216 | def format_action(self, action): 217 | if action is None: 218 | return "" 219 | 220 | if isinstance(action, str): 221 | return action 222 | 223 | if isinstance(action, dict): 224 | return f"{{\"action\": \"{action['action']}\", \"css_selector\": \"{action['css_selector']}\", \"Text\": \"{action['text']}\", \"explanation\": \"{action['explanation']}\", \"description\": \"{action['description']}\"}}" 225 | 226 | return str(action) 227 | 228 | 229 | def variableMap_to_string(self, input_map): 230 | if not input_map: 231 | return "" 232 | 233 | # Initialize an empty string 234 | output_string = "\n\nYou can use the information given by this set of variables to complete your task:\n" 235 | # Iterate through the map to format the string 236 | for index, (key, value) in enumerate(input_map.items(), start=1): 237 | output_string += f"-{key} = {value}\n" 238 | # Remove the last newline character for clean output 239 | return output_string.rstrip() 240 | 241 | def resolve_follow_up(self, duplicate, valid, formatted, id_used, last_action, executed_actions_str, task, variables_string): 242 | if id_used is False: 243 | return f"Please note that action {last_action} you provided does not use css id, the needed element has an id," \ 244 | f" can you try again and provide the id as css_selector instead" 245 | if formatted is False: 246 | return f"Please note that the last action you provided is not in the required json format," \ 247 | f" The output format should be {{\"steps\":[{{ \"action\":..,\"css_selector\":...., \"text\":..., \"explanation\":..., \"description\":...}}]}}, if task is achieved return finish action" 248 | 249 | if valid is False: 250 | return f"Please note that the last action you provided is invalid or not interactable in selenium," \ 251 | f" so i need another way to perform the task" 252 | 253 | if duplicate is True: 254 | return f"Please note that the last action you provided is duplicate," \ 255 | f" I need the next action to perform the task" 256 | 257 | return f"Actions Executed so far are \n {executed_actions_str}\n " \ 258 | f"please provide the next action to achieve the task delimited by triple quotes:" \ 259 | f" \"\"\"{task} or return finish action if the task is completed\"\"\"\n {variables_string}" 260 | 261 | def extract_steps(self, json_str): 262 | try: 263 | data = json.loads(json_str) 264 | if 'steps' in data: 265 | return convert_keys_to_lowercase(data) 266 | except json.JSONDecodeError: 267 | pass 268 | pattern = r'(\{.*"steps".*\})' 269 | matches = re.findall(pattern, json_str, re.DOTALL) 270 | 271 | for match in matches: 272 | try: 273 | potential_json = match 274 | parsed_json = json.loads(potential_json) 275 | if 'steps' in parsed_json: 276 | return convert_keys_to_lowercase(parsed_json) 277 | except json.JSONDecodeError as e: 278 | continue 279 | 280 | pattern = r'"steps":\s*\[(.*?\})\s*\]' 281 | matches = re.findall(pattern, json_str, re.DOTALL) 282 | 283 | # If matches are found, try to parse each one 284 | if matches: 285 | # Build a proper JSON string by enclosing the matched content in an array 286 | for match in matches: 287 | potential_json = '[' + match + ']' 288 | try: 289 | # Attempt to parse the JSON 290 | parsed_json = json.loads(potential_json) 291 | # If successful, return the converted data 292 | return convert_keys_to_lowercase({'steps': parsed_json}) 293 | except json.JSONDecodeError as e: 294 | logging.debug(f"Failed to parse JSON for matched steps: {e}") 295 | continue 296 | 297 | logging.debug("No valid 'steps' array found or all parsing attempts failed.") 298 | return {} 299 | 300 | def print_prompt(self, session_id): 301 | logging.info("###########################################" 302 | "###########################################") 303 | # logging.info(f"history: {self.log_cache[session_id]}") 304 | logging.info("###########################################" 305 | "###########################################") 306 | 307 | def clean_prompt(self, prompt_history): 308 | # going to delete the first removable assistant/user prompt 309 | logging.info("Going to clean prompt history") 310 | if len(prompt_history) < 2: 311 | logging.info("History is less than 2 objects, will not attempt to clear") 312 | 313 | for i in range(len(prompt_history) - 1): 314 | if (prompt_history[i]['role'] == 'assistant' and prompt_history[i + 1]['role'] == 'user' 315 | and prompt_history[i]['removable'] is True and prompt_history[i + 1]['removable'] is True): 316 | logging.info(f"Going to delete [{prompt_history[i]},\n{prompt_history[i + 1]}]") 317 | del prompt_history[i:i+2] 318 | return True 319 | logging.info("Was not able to find removable items") 320 | return False 321 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, request, Response, redirect 2 | import logging 3 | import os 4 | from action_processor import DomAnalyzer 5 | 6 | app = Flask(__name__) 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | logger = logging.getLogger("Server") 10 | 11 | 12 | dom_analyzer = DomAnalyzer() 13 | @app.route('/api/v1/prompt/', methods=['POST']) 14 | def process_prompt(session_id): 15 | data = request.get_json() 16 | 17 | # Extract 'html_doc' and 'user_prompt' from the JSON payload 18 | html_doc = data.get('html_doc') 19 | user_prompt = data.get('user_prompt') 20 | actions_executed = data.get("actions_executed") 21 | variable_map = data.get("variables_map") 22 | duplicate = data.get("duplicate", False) # Defaulting to False if not provided 23 | valid = data.get("valid", False) # Defaulting to False if not provided 24 | last_action = data.get("last_action") 25 | variable_map_str = dom_analyzer.variableMap_to_string(variable_map) 26 | analysis_result = dom_analyzer.get_actions(session_id, user_prompt, html_doc, actions_executed, variable_map_str, duplicate, valid, last_action) 27 | if check_action_status(analysis_result): 28 | dom_analyzer.print_prompt(session_id) 29 | logging.info(f"Returning: {analysis_result}") 30 | 31 | return jsonify(analysis_result) 32 | 33 | def check_action_status(data): 34 | # Check if 'steps' key exists and is a list 35 | if 'steps' in data and isinstance(data['steps'], list): 36 | # Iterate through each step in the 'steps' list 37 | for step in data['steps']: 38 | # Use .get() to safely get 'action', defaulting to None if not found 39 | action_value = step.get('action', None) 40 | 41 | # Check if action_value is "error" or "finish" 42 | if action_value in ['error', 'finish']: 43 | return True 44 | # Return False if no action is "error" or "finish", or if 'steps' is not as expected 45 | return False 46 | 47 | 48 | if __name__ == '__main__': 49 | port = int(os.getenv("PORT", "5000")) 50 | app.run(host='0.0.0.0', port=port) 51 | -------------------------------------------------------------------------------- /src/gpt_api_spec.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def payload_chat_completions_json(model, contents): 4 | messages = [] 5 | 6 | if contents: 7 | for content in contents: 8 | messages.append({"role": content['role'], "content": content['message']}) 9 | 10 | return { 11 | "model": model, 12 | "messages": messages, 13 | "temperature": 0 14 | } 15 | 16 | def payload_chat_completions_claude_json(model, contents): 17 | messages = [] 18 | is_first = True 19 | if contents: 20 | for content in contents: 21 | if content['role'] == "system": 22 | system = content['message'] 23 | else: 24 | messages.append({"role": content['role'], "content": content['message']}) 25 | if is_first: 26 | messages.append({"role": "assistant", "content": "Ok, what is your task"}) 27 | is_first= False 28 | 29 | 30 | return { 31 | "system": system, 32 | "model": model, 33 | "messages": messages, 34 | "temperature": 0, 35 | "max_tokens": 2000 36 | } 37 | 38 | api_map_json = { 39 | "gpt-3.5-turbo-1106": { 40 | "endpoint": "https://api.openai.com/v1/chat/completions", 41 | "payload": payload_chat_completions_json 42 | }, 43 | "gpt-3.5-turbo": { 44 | "endpoint": "https://api.openai.com/v1/chat/completions", 45 | "payload": payload_chat_completions_json 46 | }, 47 | "claude-3-opus-20240229": { 48 | "endpoint": "https://api.anthropic.com/v1/messages", 49 | "payload": payload_chat_completions_claude_json 50 | }, 51 | "claude-3-haiku-20240307": { 52 | "endpoint": "https://api.anthropic.com/v1/messages", 53 | "payload": payload_chat_completions_claude_json 54 | }, 55 | "claude-3-sonnet-20240229": { 56 | "endpoint": "https://api.anthropic.com/v1/messages", 57 | "payload": payload_chat_completions_claude_json 58 | } 59 | 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/gpt_client.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import os 3 | import logging 4 | import tiktoken 5 | from pathlib import Path 6 | 7 | import requests 8 | import json 9 | 10 | from rate_limiter import RateLimiter 11 | from gpt_api_spec import api_map_json 12 | from dotenv import load_dotenv 13 | # Configure logging 14 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 15 | 16 | 17 | class GptClient: 18 | # Load environment variables from .env file 19 | path_to_env_file = Path(__file__).parent.parent / '.env' 20 | load_dotenv(dotenv_path=path_to_env_file, verbose = True) 21 | 22 | gpt_api_key = os.getenv("OPENAI_API_KEY") 23 | x_api_key = os.getenv('X_API_KEY', "") 24 | max_requests_per_minute = os.getenv("MAX_REQUESTS", 10) 25 | max_tokens_per_minute = os.getenv("MAX_TOKENS", 160000) 26 | gpt_model = os.getenv("GPT_MODEL", "gpt-3.5-turbo-1106") 27 | 28 | def __init__(self): 29 | logging.info("initiating GPT client") 30 | self.operation_lock = threading.Lock() 31 | if "gpt" in self.gpt_model: 32 | self.rate_limiter = RateLimiter(max_requests_per_minute=20, max_tokens_per_minute=160000) 33 | elif "claude-3-opus" in self.gpt_model: 34 | self.rate_limiter = RateLimiter(max_requests_per_minute=10, max_tokens_per_minute=20000) 35 | elif "claude-3-haiku" in self.gpt_model: 36 | self.rate_limiter = RateLimiter(max_requests_per_minute=10, max_tokens_per_minute=50000) 37 | elif "claude-3-sonnet" in self.gpt_model: 38 | self.rate_limiter = RateLimiter(max_requests_per_minute=10, max_tokens_per_minute=40000) 39 | else: 40 | raise Exception("No Rate Limiter for this model configured") 41 | 42 | def num_tokens_from_messages(self, messages, model="gpt-3.5-turbo-1106"): 43 | """Return the number of tokens used by a list of messages.""" 44 | try: 45 | encoding = tiktoken.encoding_for_model(model) 46 | except KeyError: 47 | print("Warning: model not found. Using cl100k_base encoding.") 48 | encoding = tiktoken.get_encoding("cl100k_base") 49 | if model in { 50 | "gpt-3.5-turbo-1106", 51 | "gpt-3.5-turbo-0613", 52 | "gpt-3.5-turbo-16k-0613", 53 | "gpt-4-0314", 54 | "gpt-4-32k-0314", 55 | "gpt-4-0613", 56 | "gpt-4-32k-0613", 57 | }: 58 | tokens_per_message = 3 59 | tokens_per_name = 1 60 | elif model == "gpt-3.5-turbo-0301": 61 | tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n 62 | tokens_per_name = -1 # if there's a name, the role is omitted 63 | elif "gpt-3.5-turbo" in model: 64 | print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.") 65 | return messages.num_tokens_from_messages(messages, model="gpt-3.5-turbo-1106") 66 | elif "gpt-4" in model: 67 | print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") 68 | return messages.num_tokens_from_messages(messages, model="gpt-4-0613") 69 | else: 70 | raise NotImplementedError( 71 | f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" 72 | ) 73 | num_tokens = 0 74 | for message in messages: 75 | num_tokens += tokens_per_message 76 | for key, value in message.items(): 77 | num_tokens += len(encoding.encode(value)) 78 | if key == "name": 79 | num_tokens += tokens_per_name 80 | num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> 81 | return num_tokens 82 | 83 | def make_request(self, contents): 84 | 85 | if "gpt-3.5-turbo-1106" in self.gpt_model: 86 | api_info = api_map_json["gpt-3.5-turbo-1106"] 87 | elif "gpt-3.5-turbo" in self.gpt_model: 88 | api_info = api_map_json["gpt-3.5-turbo"] 89 | elif "claude-3-opus-20240229" in self.gpt_model: 90 | api_info = api_map_json["claude-3-opus-20240229"] 91 | elif "claude-3-haiku-20240307" in self.gpt_model: 92 | api_info = api_map_json["claude-3-haiku-20240307"] 93 | elif "claude-3-sonnet-20240229" in self.gpt_model: 94 | api_info = api_map_json["claude-3-sonnet-20240229"] 95 | 96 | 97 | 98 | payload = api_info['payload'](self.gpt_model, contents) 99 | num_token = self.num_tokens_from_messages(payload["messages"]) 100 | self.rate_limiter.wait_and_check(num_token) 101 | 102 | if self.gpt_model in ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"]: 103 | headers = { 104 | "Content-Type": "application/json", 105 | "anthropic-version": "2023-06-01", 106 | "x-api-key": f"{self.x_api_key}" 107 | } 108 | 109 | response = requests.post(api_info['endpoint'], headers=headers, json=payload) 110 | return self.extract_response_claude(response) 111 | 112 | else: 113 | headers = { 114 | "Content-Type": "application/json", 115 | "Authorization": f"Bearer {self.gpt_api_key}" 116 | } 117 | 118 | logging.info("##############################################################################################################") 119 | logging.info(f"sending: {contents}") 120 | logging.info("##############################################################################################################") 121 | 122 | response = requests.post(api_info['endpoint'], headers=headers, json=payload) 123 | return self.extract_response_gpt(response) 124 | 125 | 126 | 127 | def extract_response_gpt(self, response): 128 | response_data = response.json() 129 | 130 | logging.info(f"Response from openai {response_data}") 131 | 132 | response_object_type = response_data.get('object', '') 133 | 134 | if "choices" in response_data and len(response_data["choices"]) > 0: 135 | if response_object_type == 'chat.completion': 136 | # Handling response for 'chat.completion' 137 | assistant_message_json_str = response_data["choices"][0].get("message", {}).get("content", "") 138 | elif response_object_type == 'text_completion': 139 | # Handling response for 'text_completion' 140 | assistant_message_json_str = response_data["choices"][0].get("text", "") 141 | else: 142 | raise Exception("Unknown response object type.") 143 | 144 | total_tokens = response_data["usage"].get("total_tokens", 0) 145 | self.rate_limiter.add_token_consumed(total_tokens) 146 | 147 | try: 148 | # Parse the extracted content as JSON 149 | assistant_message_json_str = assistant_message_json_str.replace("```json", "").replace("```", "").strip() 150 | assistant_message = assistant_message_json_str 151 | except json.JSONDecodeError: 152 | raise Exception("Error decoding the extracted content as JSON.") 153 | 154 | return assistant_message 155 | elif "error" in response_data and response_data["error"].get("code", "") == 'context_length_exceeded': 156 | raise TokenLimitExceededError(response_data["error"].get("message", "Token limit exceeded")) 157 | elif "error" in response_data and response_data["error"].get("code", "") == 'rate_limit_exceeded': 158 | raise RateLimitExceededError(response_data["error"].get("message", "Rate limit exceeded")) 159 | else: 160 | raise Exception(f"No content found in response or invalid response format:{response_data}") 161 | 162 | def extract_response_claude(self, response): 163 | response_data = response.json() 164 | 165 | logging.info(f"Response from anthropic {response_data}") 166 | 167 | response_object_type = response_data.get('type', '') 168 | 169 | if "content" in response_data and len(response_data["content"]) > 0: 170 | if response_object_type == 'message': 171 | # Handling response for 'chat.completion' 172 | assistant_message_json_str = response_data["content"][0].get("text", {}) 173 | # elif response_object_type == 'text_completion': 174 | # # Handling response for 'text_completion' 175 | # assistant_message_json_str = response_data["choices"][0].get("text", "") 176 | else: 177 | raise Exception("Unknown response object type.") 178 | 179 | total_tokens = response_data["usage"].get("input_tokens", 0) 180 | self.rate_limiter.add_token_consumed(total_tokens) 181 | 182 | try: 183 | # Parse the extracted content as JSON 184 | assistant_message_json_str = assistant_message_json_str.replace("```json", "").replace("```", "").strip() 185 | assistant_message = assistant_message_json_str 186 | except json.JSONDecodeError: 187 | raise Exception("Error decoding the extracted content as JSON.") 188 | 189 | return assistant_message 190 | elif "error" in response_data and response_data["error"].get("type", "") == 'context_length_exceeded': 191 | raise TokenLimitExceededError(response_data["error"].get("message", "Token limit exceeded")) 192 | elif "error" in response_data and response_data["error"].get("type", "") == 'rate_limit_error': 193 | raise RateLimitExceededError(response_data["error"].get("message", "Rate limit exceeded")) 194 | else: 195 | raise Exception(f"No content found in response or invalid response format:{response_data}") 196 | 197 | 198 | class TokenLimitExceededError(Exception): 199 | """GPT token limit exceeded""" 200 | pass 201 | 202 | 203 | class RateLimitExceededError(Exception): 204 | """GPT token limit exceeded""" 205 | pass 206 | -------------------------------------------------------------------------------- /src/md_converter.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup, Comment 2 | from markdownify import markdownify as md 3 | import re 4 | import sys 5 | import logging 6 | 7 | # Configure logging 8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 9 | 10 | 11 | def clean_markdown(markdown): 12 | # Remove base64 encoded images 13 | cleaned_markdown = re.sub(r'!\[[^\]]*\]\(data:image\/[a-zA-Z]+;base64,[^\)]+\)', '', markdown) 14 | 15 | # Remove CSS styles - targeting patterns that start with a period or within style tags 16 | cleaned_markdown = re.sub(r'