├── README.md
├── downloads
    └── output.mp3
├── mistapikey.txt
├── news.txt
├── openaiapikey2.txt
├── requirements.txt
├── screenshot.js
└── trendz.py


/README.md:
--------------------------------------------------------------------------------
 1 | # ai-vision-scraping
 2 | Scrape Webpages with AI Vision
 3 | 
 4 | 1. Install node.js
 5 | 2. npm init (leave all spaces empty)
 6 | 3. npm install puppeteer
 7 | 4. npm install puppeteer extra
 8 | 5. npm install puppeteer-extra-plugin-stealth
 9 | 6. pip install -r requirements.txt (check versions)
10 | 7. Set all need API KEYS
11 | 8. Set Your URLs to screenshot
12 | 9. Run trendz.py
13 | 
14 | Adjust prompts / llm models / etc to your needs
15 | 
16 | Kris
17 | 


--------------------------------------------------------------------------------
/downloads/output.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllAboutAI-YT/ai-vision-scraping/ed5ed4a35fa121f6c91eebbe510b1798a3488e6f/downloads/output.mp3


--------------------------------------------------------------------------------
/mistapikey.txt:
--------------------------------------------------------------------------------
1 | YOUR LLM API KEY
2 | 


--------------------------------------------------------------------------------
/news.txt:
--------------------------------------------------------------------------------
1 | <<NEWS>>
2 | 
3 | From all the information above, write a SHORT REPORT on the Sports Games you a tracking suitable for a VOICEOVER:


--------------------------------------------------------------------------------
/openaiapikey2.txt:
--------------------------------------------------------------------------------
1 | YOUR OPENAI API KEY
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai==0.23.0
2 | python-dotenv==0.20.0
3 | requests==2.28.0
4 | pydub==0.25.1
5 | simpleaudio==1.0.4
6 | 


--------------------------------------------------------------------------------
/screenshot.js:
--------------------------------------------------------------------------------
 1 | const puppeteer = require('puppeteer-extra');
 2 | const StealthPlugin = require('puppeteer-extra-plugin-stealth');
 3 | 
 4 | // Apply the stealth plugin to avoid detection
 5 | puppeteer.use(StealthPlugin());
 6 | 
 7 | // Get the URL from the command line arguments
 8 | const url = process.argv[2];
 9 | const timeout = 8000;
10 | 
11 | (async () => {
12 |     // Launch the browser
13 |     const browser = await puppeteer.launch({
14 |         headless: true, // Assuming you want to run headless
15 |     });
16 | 
17 |     // Open a new page
18 |     const page = await browser.newPage();
19 | 
20 |     // Set the viewport for the page
21 |     await page.setViewport({
22 |         width: 1200,
23 |         height: 2000,
24 |         deviceScaleFactor: 1,
25 |     });
26 | 
27 | 	// Navigate to the URL
28 | 	await page.goto(url, {
29 | 		waitUntil: "networkidle0", // Wait for the network to be idle
30 | 		timeout: timeout,
31 | 	});
32 | 
33 | 
34 |     // Take a screenshot after the page loads
35 |     await page.screenshot({ 
36 |         path: "screenshot.jpg",
37 |         fullPage: false,
38 |     });
39 |     
40 |     // Close the browser
41 |     await browser.close();
42 | })();
43 | 


--------------------------------------------------------------------------------
/trendz.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | import subprocess
  3 | import base64
  4 | import os
  5 | from dotenv import load_dotenv
  6 | from mistralai.client import MistralClient
  7 | from mistralai.models.chat_completion import ChatMessage
  8 | import os
  9 | import time
 10 | import requests
 11 | from pydub import AudioSegment
 12 | import simpleaudio as sa
 13 | 
 14 | # Function to open a file and return its contents as a string
 15 | def open_file(filepath):
 16 |     with open(filepath, 'r', encoding='utf-8') as infile:
 17 |         return infile.read()
 18 |     
 19 | def save_file(filepath, content):
 20 |     with open(filepath, 'a', encoding='utf-8') as outfile:
 21 |         outfile.write(content)
 22 | 
 23 | # ANSI escape code for colors
 24 | PINK = '\033[95m'
 25 | CYAN = '\033[96m'
 26 | YELLOW = '\033[93m'
 27 | RESET_COLOR = '\033[0m'
 28 | 
 29 | # Set the OpenAI API key
 30 | api_key = open_file('openaiapikey2.txt')
 31 | 
 32 | # Initialize the OpenAI client with the API key
 33 | client = OpenAI(api_key=api_key)
 34 | 
 35 | def get_mistral_response(user_content):
 36 |     """
 37 |     Interact with the Mistral API using a streaming approach. Print and return the response.
 38 |     """
 39 |     # Initialize the Mistral client with your API key
 40 |     api_key = open_file("mistapikey.txt")
 41 |     model = "mistral-medium"
 42 |     client = MistralClient(api_key=api_key)
 43 | 
 44 |     # Prepare a list of ChatMessage objects with the user's content
 45 |     messages = [ChatMessage(role="user", content=user_content)]
 46 | 
 47 |     # Initialize an empty string to accumulate responses
 48 |     accumulated_response = ""
 49 | 
 50 |     # Streaming approach
 51 |     try:
 52 |         for chunk in client.chat_stream(model=model, messages=messages):
 53 |             if chunk.choices:
 54 |                 for choice in chunk.choices:
 55 |                     if choice.delta and choice.delta.content:
 56 |                         print(f"{CYAN}{choice.delta.content}{RESET_COLOR}", end='')
 57 |                         accumulated_response += choice.delta.content
 58 |     except Exception as e:
 59 |         print(f"An error occurred during streaming: {e}")
 60 | 
 61 |     return accumulated_response
 62 | 
 63 | def image_b64(image):
 64 |     with open(image, "rb") as f:
 65 |         return base64.b64encode(f.read()).decode()
 66 | 
 67 | def url2screenshot(url):
 68 |     print(f"{CYAN}Crawling {url}{RESET_COLOR}")
 69 | 
 70 |     if os.path.exists("screenshot.jpg"):
 71 |         os.remove("screenshot.jpg")
 72 | 
 73 |     result = subprocess.run(
 74 |         ["node", "screenshot.js", url],
 75 |         capture_output=True,
 76 |         text=True
 77 |     )
 78 | 
 79 |     if not os.path.exists("screenshot.jpg"):
 80 |         print("ERROR")
 81 |         return "Failed to scrape the website"
 82 |     
 83 |     # New Code: Open the screenshot.jpg file
 84 |     try:
 85 |         os_command = f'start screenshot.jpg'
 86 |         os.system(os_command)
 87 |     except Exception as e:
 88 |         print(f"An error occurred while opening the image: {e}")
 89 | 
 90 |     b64_image = image_b64("screenshot.jpg")
 91 |     return b64_image
 92 | 
 93 | def visionExtract(b64_image, prompt):
 94 |     response = client.chat.completions.create(
 95 |         model="gpt-4-vision-preview",
 96 |         messages=[
 97 |             {
 98 |                 "role": "system",
 99 |                 "content": "You a web scraper, your job is to extract the following information about Sports Game the image: 1. Score 2. Basic Statistics 3. The Best Performing Player (If applicable).  Use a structured format:",
100 |             }
101 |         ] + [
102 |             {
103 |                 "role": "user",
104 |                 "content": [
105 |                     {
106 |                         "type": "image_url",
107 |                         "image_url": f"data:image/jpeg;base64,{b64_image}",
108 |                     },
109 |                     {
110 |                         "type": "text",
111 |                         "text": prompt,
112 |                     }
113 |                 ]
114 |             }
115 |         ],
116 |         max_tokens=1024,
117 |     )
118 | 
119 |     message = response.choices[0].message
120 |     message_text = message.content
121 | 
122 |     if "ANSWER_NOT_FOUND" in message_text:
123 |         print("ERROR: Answer not found")
124 |         return "I was unable to find the answer on that website. Please pick another one"
125 |     else:
126 |         print(f"{YELLOW}GPT: {message_text}{RESET_COLOR}")
127 |         return message_text
128 | 
129 | def visionCrawl(url, prompt):
130 |     b64_image = url2screenshot(url)
131 | 
132 |     print(f"{PINK}Image captured{RESET_COLOR}")
133 |     
134 |     if b64_image == "Failed to scrape the website":
135 |         return "I was unable to crawl that site. Please pick a different one."
136 |     else:
137 |         return visionExtract(b64_image, prompt)
138 | 
139 | # Function for processing multiple URLs
140 | def process_urls(urls, prompt):
141 |     all_responses = []  # List to store all responses
142 |     # Iterate through each URL
143 |     for url in urls:
144 |         print(f"{CYAN}Processing {url}...{RESET_COLOR}")
145 |         response = visionCrawl(url, prompt)
146 |         all_responses.append(response)
147 |     return all_responses
148 | 
149 | def text_to_speech_and_download(text, download_path):
150 |     CHUNK_SIZE = 1024
151 |     url = "https://api.elevenlabs.io/v1/text-to-speech/(voice_id)"
152 | 
153 |     headers = {
154 |       "Accept": "audio/mpeg",
155 |       "Content-Type": "application/json",
156 |       "xi-api-key": "YOUR ELEVEN LABS API KEY"
157 |     }
158 | 
159 |     data = {
160 |       "text": text,
161 |       "model_id": "eleven_monolingual_v1",
162 |       "voice_settings": {
163 |         "stability": 0.5,
164 |         "similarity_boost": 0.5
165 |       }
166 |     }
167 | 
168 |     response = requests.post(url, json=data, headers=headers)
169 | 
170 |     # Ensure the response is valid
171 |     if response.status_code != 200:
172 |         print("Error: Unable to generate speech.")
173 |         return
174 | 
175 |     file_path = os.path.join(download_path, 'output.mp3')
176 |     with open(file_path, 'wb') as f:
177 |         for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
178 |             if chunk:
179 |                 f.write(chunk)
180 | 
181 |     print(f"Downloaded MP3 file to {file_path}")
182 |     
183 |    
184 | 
185 | # List of URLs to process
186 | urls_to_process = [
187 |     "INSERT URLS TO SCREENSHOT"
188 |     # Add more URLs here...
189 | ]
190 | 
191 | # Prompt for the visionCrawl function
192 | prompt = "Extract the following information about the Sports Game image: 1. Score 2. Basic Statistics 3. The Best Performing Player (If applicable).  Use a structured format:"
193 | 
194 | # Process the URLs
195 | responses = process_urls(urls_to_process, prompt)
196 | 
197 | # Convert responses to a single string
198 | responses_str = '\n'.join([resp for resp in responses if resp])
199 | 
200 | # Read the file and replace placeholder with the responses string
201 | news_content = open_file("news.txt").replace("<<NEWS>>", responses_str)
202 | 
203 | # Get the response from Mistral
204 | code = get_mistral_response(news_content)
205 | 
206 | download_folder = 'downloads'  # Ensure this folder exists or create it
207 | text_to_speech_and_download(code, download_folder)
208 | 
209 | 
210 | 


--------------------------------------------------------------------------------