├── README.md ├── downloads └── output.mp3 ├── mistapikey.txt ├── news.txt ├── openaiapikey2.txt ├── requirements.txt ├── screenshot.js └── trendz.py /README.md: -------------------------------------------------------------------------------- 1 | # ai-vision-scraping 2 | Scrape Webpages with AI Vision 3 | 4 | 1. Install node.js 5 | 2. npm init (leave all spaces empty) 6 | 3. npm install puppeteer 7 | 4. npm install puppeteer extra 8 | 5. npm install puppeteer-extra-plugin-stealth 9 | 6. pip install -r requirements.txt (check versions) 10 | 7. Set all need API KEYS 11 | 8. Set Your URLs to screenshot 12 | 9. Run trendz.py 13 | 14 | Adjust prompts / llm models / etc to your needs 15 | 16 | Kris 17 | -------------------------------------------------------------------------------- /downloads/output.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllAboutAI-YT/ai-vision-scraping/ed5ed4a35fa121f6c91eebbe510b1798a3488e6f/downloads/output.mp3 -------------------------------------------------------------------------------- /mistapikey.txt: -------------------------------------------------------------------------------- 1 | YOUR LLM API KEY 2 | -------------------------------------------------------------------------------- /news.txt: -------------------------------------------------------------------------------- 1 | <> 2 | 3 | From all the information above, write a SHORT REPORT on the Sports Games you a tracking suitable for a VOICEOVER: -------------------------------------------------------------------------------- /openaiapikey2.txt: -------------------------------------------------------------------------------- 1 | YOUR OPENAI API KEY 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai==0.23.0 2 | python-dotenv==0.20.0 3 | requests==2.28.0 4 | pydub==0.25.1 5 | simpleaudio==1.0.4 6 | -------------------------------------------------------------------------------- /screenshot.js: -------------------------------------------------------------------------------- 1 | const puppeteer = require('puppeteer-extra'); 2 | const StealthPlugin = require('puppeteer-extra-plugin-stealth'); 3 | 4 | // Apply the stealth plugin to avoid detection 5 | puppeteer.use(StealthPlugin()); 6 | 7 | // Get the URL from the command line arguments 8 | const url = process.argv[2]; 9 | const timeout = 8000; 10 | 11 | (async () => { 12 | // Launch the browser 13 | const browser = await puppeteer.launch({ 14 | headless: true, // Assuming you want to run headless 15 | }); 16 | 17 | // Open a new page 18 | const page = await browser.newPage(); 19 | 20 | // Set the viewport for the page 21 | await page.setViewport({ 22 | width: 1200, 23 | height: 2000, 24 | deviceScaleFactor: 1, 25 | }); 26 | 27 | // Navigate to the URL 28 | await page.goto(url, { 29 | waitUntil: "networkidle0", // Wait for the network to be idle 30 | timeout: timeout, 31 | }); 32 | 33 | 34 | // Take a screenshot after the page loads 35 | await page.screenshot({ 36 | path: "screenshot.jpg", 37 | fullPage: false, 38 | }); 39 | 40 | // Close the browser 41 | await browser.close(); 42 | })(); 43 | -------------------------------------------------------------------------------- /trendz.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import subprocess 3 | import base64 4 | import os 5 | from dotenv import load_dotenv 6 | from mistralai.client import MistralClient 7 | from mistralai.models.chat_completion import ChatMessage 8 | import os 9 | import time 10 | import requests 11 | from pydub import AudioSegment 12 | import simpleaudio as sa 13 | 14 | # Function to open a file and return its contents as a string 15 | def open_file(filepath): 16 | with open(filepath, 'r', encoding='utf-8') as infile: 17 | return infile.read() 18 | 19 | def save_file(filepath, content): 20 | with open(filepath, 'a', encoding='utf-8') as outfile: 21 | outfile.write(content) 22 | 23 | # ANSI escape code for colors 24 | PINK = '\033[95m' 25 | CYAN = '\033[96m' 26 | YELLOW = '\033[93m' 27 | RESET_COLOR = '\033[0m' 28 | 29 | # Set the OpenAI API key 30 | api_key = open_file('openaiapikey2.txt') 31 | 32 | # Initialize the OpenAI client with the API key 33 | client = OpenAI(api_key=api_key) 34 | 35 | def get_mistral_response(user_content): 36 | """ 37 | Interact with the Mistral API using a streaming approach. Print and return the response. 38 | """ 39 | # Initialize the Mistral client with your API key 40 | api_key = open_file("mistapikey.txt") 41 | model = "mistral-medium" 42 | client = MistralClient(api_key=api_key) 43 | 44 | # Prepare a list of ChatMessage objects with the user's content 45 | messages = [ChatMessage(role="user", content=user_content)] 46 | 47 | # Initialize an empty string to accumulate responses 48 | accumulated_response = "" 49 | 50 | # Streaming approach 51 | try: 52 | for chunk in client.chat_stream(model=model, messages=messages): 53 | if chunk.choices: 54 | for choice in chunk.choices: 55 | if choice.delta and choice.delta.content: 56 | print(f"{CYAN}{choice.delta.content}{RESET_COLOR}", end='') 57 | accumulated_response += choice.delta.content 58 | except Exception as e: 59 | print(f"An error occurred during streaming: {e}") 60 | 61 | return accumulated_response 62 | 63 | def image_b64(image): 64 | with open(image, "rb") as f: 65 | return base64.b64encode(f.read()).decode() 66 | 67 | def url2screenshot(url): 68 | print(f"{CYAN}Crawling {url}{RESET_COLOR}") 69 | 70 | if os.path.exists("screenshot.jpg"): 71 | os.remove("screenshot.jpg") 72 | 73 | result = subprocess.run( 74 | ["node", "screenshot.js", url], 75 | capture_output=True, 76 | text=True 77 | ) 78 | 79 | if not os.path.exists("screenshot.jpg"): 80 | print("ERROR") 81 | return "Failed to scrape the website" 82 | 83 | # New Code: Open the screenshot.jpg file 84 | try: 85 | os_command = f'start screenshot.jpg' 86 | os.system(os_command) 87 | except Exception as e: 88 | print(f"An error occurred while opening the image: {e}") 89 | 90 | b64_image = image_b64("screenshot.jpg") 91 | return b64_image 92 | 93 | def visionExtract(b64_image, prompt): 94 | response = client.chat.completions.create( 95 | model="gpt-4-vision-preview", 96 | messages=[ 97 | { 98 | "role": "system", 99 | "content": "You a web scraper, your job is to extract the following information about Sports Game the image: 1. Score 2. Basic Statistics 3. The Best Performing Player (If applicable). Use a structured format:", 100 | } 101 | ] + [ 102 | { 103 | "role": "user", 104 | "content": [ 105 | { 106 | "type": "image_url", 107 | "image_url": f"data:image/jpeg;base64,{b64_image}", 108 | }, 109 | { 110 | "type": "text", 111 | "text": prompt, 112 | } 113 | ] 114 | } 115 | ], 116 | max_tokens=1024, 117 | ) 118 | 119 | message = response.choices[0].message 120 | message_text = message.content 121 | 122 | if "ANSWER_NOT_FOUND" in message_text: 123 | print("ERROR: Answer not found") 124 | return "I was unable to find the answer on that website. Please pick another one" 125 | else: 126 | print(f"{YELLOW}GPT: {message_text}{RESET_COLOR}") 127 | return message_text 128 | 129 | def visionCrawl(url, prompt): 130 | b64_image = url2screenshot(url) 131 | 132 | print(f"{PINK}Image captured{RESET_COLOR}") 133 | 134 | if b64_image == "Failed to scrape the website": 135 | return "I was unable to crawl that site. Please pick a different one." 136 | else: 137 | return visionExtract(b64_image, prompt) 138 | 139 | # Function for processing multiple URLs 140 | def process_urls(urls, prompt): 141 | all_responses = [] # List to store all responses 142 | # Iterate through each URL 143 | for url in urls: 144 | print(f"{CYAN}Processing {url}...{RESET_COLOR}") 145 | response = visionCrawl(url, prompt) 146 | all_responses.append(response) 147 | return all_responses 148 | 149 | def text_to_speech_and_download(text, download_path): 150 | CHUNK_SIZE = 1024 151 | url = "https://api.elevenlabs.io/v1/text-to-speech/(voice_id)" 152 | 153 | headers = { 154 | "Accept": "audio/mpeg", 155 | "Content-Type": "application/json", 156 | "xi-api-key": "YOUR ELEVEN LABS API KEY" 157 | } 158 | 159 | data = { 160 | "text": text, 161 | "model_id": "eleven_monolingual_v1", 162 | "voice_settings": { 163 | "stability": 0.5, 164 | "similarity_boost": 0.5 165 | } 166 | } 167 | 168 | response = requests.post(url, json=data, headers=headers) 169 | 170 | # Ensure the response is valid 171 | if response.status_code != 200: 172 | print("Error: Unable to generate speech.") 173 | return 174 | 175 | file_path = os.path.join(download_path, 'output.mp3') 176 | with open(file_path, 'wb') as f: 177 | for chunk in response.iter_content(chunk_size=CHUNK_SIZE): 178 | if chunk: 179 | f.write(chunk) 180 | 181 | print(f"Downloaded MP3 file to {file_path}") 182 | 183 | 184 | 185 | # List of URLs to process 186 | urls_to_process = [ 187 | "INSERT URLS TO SCREENSHOT" 188 | # Add more URLs here... 189 | ] 190 | 191 | # Prompt for the visionCrawl function 192 | prompt = "Extract the following information about the Sports Game image: 1. Score 2. Basic Statistics 3. The Best Performing Player (If applicable). Use a structured format:" 193 | 194 | # Process the URLs 195 | responses = process_urls(urls_to_process, prompt) 196 | 197 | # Convert responses to a single string 198 | responses_str = '\n'.join([resp for resp in responses if resp]) 199 | 200 | # Read the file and replace placeholder with the responses string 201 | news_content = open_file("news.txt").replace("<>", responses_str) 202 | 203 | # Get the response from Mistral 204 | code = get_mistral_response(news_content) 205 | 206 | download_folder = 'downloads' # Ensure this folder exists or create it 207 | text_to_speech_and_download(code, download_folder) 208 | 209 | 210 | --------------------------------------------------------------------------------