├── Marketplace_Discord_Comments_Only.ipynb ├── Marketplace_Discord_Tutorial2.ipynb └── README.md /Marketplace_Discord_Comments_Only.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "5be1b06a", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#Import Dependencies" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "7f8a41a7", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "#Configure Chromedriver" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "id": "eb428574", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# Initialize Chrome WebDriver" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "id": "c7798c54", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "#Setup search parameters" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 5, 46 | "id": "11aa27b7", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# Set up base URL\n", 51 | "\n", 52 | "# Visit the website\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 6, 58 | "id": "9ad04618", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "# Locate the button with aria-label=\"Decline optional cookies\" (Europe)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 7, 68 | "id": "aef2be3d", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# Locate the button for the login pop-up with aria-label=\"Close\"" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "5c5d1435", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "#Scroll down to load all results\n", 83 | "\n", 84 | " # Get the initial scroll position\n", 85 | "\n", 86 | " \n", 87 | " # Scroll down to the bottom of the page using JavaScript\n", 88 | "\n", 89 | " # Get the new scroll position\n", 90 | "\n", 91 | " # Check if we've reached the bottom\n", 92 | " \n", 93 | " # Update the scroll position\n", 94 | " " 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "id": "d3bd7641", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Retrieve the HTML\n", 105 | "\n", 106 | "# Use BeautifulSoup to parse the HTML\n", 107 | "\n", 108 | "#Close the browser\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 9, 114 | "id": "c08984be", 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# Find all link elements\n", 119 | "\n", 120 | "# Only keep items where the text matches your search terms and desired location\n", 121 | "\n", 122 | "# Create empty list to store product data\n", 123 | "\n", 124 | "# Store the items url and text into a list of dictionaries\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 10, 130 | "id": "39a1421f", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# Create an empty list to store product data\n", 135 | "\n", 136 | " # Regular expression to find numeric values\n", 137 | " \n", 138 | " \n", 139 | " # Extracting prices\n", 140 | " # Iterate through lines to find the first line with numbers\n", 141 | "\n", 142 | " # Extract title\n", 143 | "\n", 144 | " # Extract location\n", 145 | "\n", 146 | " # Add extracted data to a list of dictionaries\n" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "id": "6372b1d4", 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# Convert extracted data into a Pandas Dataframe\n", 157 | "\n", 158 | "# Sort the DataFrame by the \"price\" column in ascending order\n", 159 | "\n", 160 | "# Get the 10 cheapest entries\n" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 11, 166 | "id": "b46b8e32", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "# Create an empty message\n", 171 | "\n", 172 | "# Iterate over each row in the DataFrame containing the 10 cheapest items\n", 173 | "\n", 174 | " # Append the title, price, and URL of each item to the message string\n", 175 | "\n", 176 | "\n", 177 | "# URL of the Discord channel where the message will be posted\n", 178 | "\n", 179 | "\n", 180 | "# Payload containing the message to be sent\n", 181 | "\n", 182 | "\n", 183 | "# Headers including the authorization token for the Discord API\n", 184 | "\n", 185 | "\n", 186 | "# Send a POST request to the Discord API with the payload and headers\n" 187 | ] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3 (ipykernel)", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.10.9" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 5 211 | } 212 | -------------------------------------------------------------------------------- /Marketplace_Discord_Tutorial2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "69c86e9e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#Import Dependencies\n", 11 | "from selenium import webdriver\n", 12 | "from selenium.webdriver.chrome.options import Options\n", 13 | "from selenium.webdriver.chrome.service import Service\n", 14 | "from webdriver_manager.chrome import ChromeDriverManager\n", 15 | "from selenium.webdriver.common.by import By\n", 16 | "import os\n", 17 | "import time\n", 18 | "from bs4 import BeautifulSoup\n", 19 | "import re\n", 20 | "import pandas as pd\n", 21 | "import requests" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "b987a106", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "#Configure Chromedriver\n", 32 | "\n", 33 | "chrome_install = ChromeDriverManager().install()\n", 34 | "\n", 35 | "folder = os.path.dirname(chrome_install)\n", 36 | "chromedriver_path = os.path.join(folder, \"chromedriver.exe\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "f3280df5", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Initialize Chrome WebDriver\n", 47 | "browser = webdriver.Chrome(\n", 48 | " service = Service(chromedriver_path),\n", 49 | ")" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "id": "e425e484", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "#Setup search parameters\n", 60 | "city = \"toronto\"\n", 61 | "product = \"Iphone 13\"\n", 62 | "min_price = 300\n", 63 | "max_price = 600\n", 64 | "days_listed = 1" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "1939ce5b", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# Set up base URL\n", 75 | "url = f'https://www.facebook.com/marketplace/{city}/search?query={product}&minPrice={min_price}&maxPrice={max_price}&daysSinceListed={days_listed}&exact=false'\n", 76 | "\n", 77 | "# Visit the website\n", 78 | "browser.get(url)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "222c79ad", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "# Locate the button with aria-label=\"Decline optional cookies\" (Europe)\n", 89 | "try:\n", 90 | " decline_button = browser.find_element(By.XPATH, '//div[@aria-label=\"Close\" and @role=\"button\"]')\n", 91 | " decline_button.click()\n", 92 | " print(\"Decline optional cookies button clicked!\")\n", 93 | " \n", 94 | "except:\n", 95 | " print(\"Could not find or click the optional cookies button!\")\n", 96 | " pass" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "a625f6b7", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# Locate the button for the login pop-up with aria-label=\"Close\"\n", 107 | "try:\n", 108 | " close_button = browser.find_element(By.XPATH, '//div[@aria-label=\"Close\" and @role=\"button\"]')\n", 109 | " close_button.click()\n", 110 | " print(\"Close button clicked!\")\n", 111 | " \n", 112 | "except:\n", 113 | " print(\"Could not find or click the close button!\")\n", 114 | " pass" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "97025871", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "#Scroll down to load all results\n", 125 | "try:\n", 126 | " # Get the initial scroll position\n", 127 | " last_height = browser.execute_script(\"return document.body.scrollHeight\")\n", 128 | " \n", 129 | " while True:\n", 130 | " \n", 131 | " # Scroll down to the bottom of the page using JavaScript\n", 132 | " browser.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n", 133 | " time.sleep(4)\n", 134 | "\n", 135 | " # Get the new scroll position\n", 136 | " new_height = browser.execute_script(\"return document.body.scrollHeight\")\n", 137 | "\n", 138 | " # Check if we've reached the bottom\n", 139 | " if new_height == last_height:\n", 140 | " break\n", 141 | " \n", 142 | "\n", 143 | " # Update the scroll position\n", 144 | " last_height = new_height\n", 145 | " \n", 146 | " print(\"scrolled\")\n", 147 | " \n", 148 | "except Exception as e:\n", 149 | " print(f\"An error occurred: {e}\")" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "957ca617", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# Retrieve the HTML\n", 160 | "html = browser.page_source\n", 161 | "\n", 162 | "# Use BeautifulSoup to parse the HTML\n", 163 | "soup = BeautifulSoup(html, 'html.parser')\n", 164 | "\n", 165 | "#Close the browser\n", 166 | "browser.close()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "id": "56729a86", 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# Find all link elements\n", 177 | "links = soup.find_all('a')\n", 178 | "\n", 179 | "# Only keep items where the text matches your search terms and desired location\n", 180 | "iphone_links = [link for link in links if product.lower() in link.text.lower() and city.lower() in link.text.lower()]\n", 181 | "\n", 182 | "# Create empty list to store product data\n", 183 | "iphone_data = []\n", 184 | "\n", 185 | "# Store the items url and text into a list of dictionaries\n", 186 | "for iphone_link in iphone_links:\n", 187 | " url = iphone_link.get('href')\n", 188 | " text = '\\n'.join(iphone_link.stripped_strings)\n", 189 | " iphone_data.append({'text': text, 'url': url})" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "7587f52c", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "# Create an empty list to store product data\n", 200 | "extracted_data = []\n", 201 | "\n", 202 | "for item in iphone_data:\n", 203 | " lines = item['text'].split('\\n')\n", 204 | "\n", 205 | " # Regular expression to find numeric values\n", 206 | " numeric_pattern = re.compile('\\d[\\d,.]*')\n", 207 | " \n", 208 | " \n", 209 | " # Extracting prices\n", 210 | " # Iterate through lines to find the first line with numbers\n", 211 | " for line in lines:\n", 212 | " match = numeric_pattern.search(line)\n", 213 | " if match: \n", 214 | " # Extract the first numeric value found\n", 215 | " price_str = match.group()\n", 216 | " # Convert price to float (handle commas)\n", 217 | " price = float(price_str.replace(',',''))\n", 218 | " break\n", 219 | " \n", 220 | " if price:\n", 221 | " print(f\"Price extracted: {price}\")\n", 222 | " else:\n", 223 | " print(\"price not found\")\n", 224 | "\n", 225 | " # Extract title\n", 226 | " title = lines[-2]\n", 227 | "\n", 228 | " # Extract location\n", 229 | " location = lines[-1]\n", 230 | "\n", 231 | " # Add extracted data to a list of dictionaries\n", 232 | " extracted_data.append({\n", 233 | " 'title': title,\n", 234 | " 'price': price,\n", 235 | " 'location': location,\n", 236 | " 'url': re.sub(r'\\?.*', '', item['url'])\n", 237 | " \n", 238 | " })" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "e0628307", 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "# Convert extracted data into a Pandas Dataframe\n", 249 | "items_df = pd.DataFrame(extracted_data)\n", 250 | "\n", 251 | "# Sort the DataFrame by the \"price\" column in ascending order\n", 252 | "sorted_df = items_df.sort_values(by='price')\n", 253 | "\n", 254 | "# Get the 10 cheapest entries\n", 255 | "cheapest_10 = sorted_df.head(10)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "id": "6d07c236", 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "# Create an empty message\n", 266 | "message = \"\"\n", 267 | "\n", 268 | "# Iterate over each row in the DataFrame containing the 10 cheapest items\n", 269 | "for index, row in cheapest_10.iterrows():\n", 270 | "\n", 271 | " # Append the title, price, and URL of each item to the message string\n", 272 | " message += f\"Title: {row['title']}\\nPrice: {row['price']}\\nURL: {row['url']}\\n\\n\"\n", 273 | "\n", 274 | "\n", 275 | "# URL of the Discord channel where the message will be posted\n", 276 | "discord_url = 'PASTE REQUEST URL HERE'\n", 277 | "\n", 278 | "\n", 279 | "# Payload containing the message to be sent\n", 280 | "payload = {\"content\": message}\n", 281 | "\n", 282 | "\n", 283 | "# Headers including the authorization token for the Discord API\n", 284 | "headers = {\"Authorization\" : \"PASTE AUTHORIZATION TOKEN HERE\"}\n", 285 | "\n", 286 | "\n", 287 | "# Send a POST request to the Discord API with the payload and headers\n", 288 | "response = requests.post(discord_url, payload, headers = headers)\n" 289 | ] 290 | } 291 | ], 292 | "metadata": { 293 | "kernelspec": { 294 | "display_name": "Python 3 (ipykernel)", 295 | "language": "python", 296 | "name": "python3" 297 | }, 298 | "language_info": { 299 | "codemirror_mode": { 300 | "name": "ipython", 301 | "version": 3 302 | }, 303 | "file_extension": ".py", 304 | "mimetype": "text/x-python", 305 | "name": "python", 306 | "nbconvert_exporter": "python", 307 | "pygments_lexer": "ipython3", 308 | "version": "3.10.9" 309 | } 310 | }, 311 | "nbformat": 4, 312 | "nbformat_minor": 5 313 | } 314 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # marketplace-discord-webscraping 2 | 📱💸 Discord integrated Facebook Marketplace scraper using Python, Chromedriver, Selenium, BeautifulSoup, Pandas... 3 | 4 |

5 | Get instant Facebook Marketplace notifications with 6 | Swoopa using the code 7 | "TTC15" to get a free 7-day trial and 15% off any plan. 8 |

9 | --------------------------------------------------------------------------------