├── Marketplace_Discord_Comments_Only.ipynb
├── Marketplace_Discord_Tutorial2.ipynb
└── README.md


/Marketplace_Discord_Comments_Only.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "5be1b06a",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#Import Dependencies"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "id": "7f8a41a7",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "#Configure Chromedriver"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "id": "eb428574",
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# Initialize Chrome WebDriver"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 4,
 36 |    "id": "c7798c54",
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "#Setup search parameters"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 5,
 46 |    "id": "11aa27b7",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Set up base URL\n",
 51 |     "\n",
 52 |     "# Visit the website\n"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 6,
 58 |    "id": "9ad04618",
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "# Locate the button with aria-label=\"Decline optional cookies\" (Europe)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 7,
 68 |    "id": "aef2be3d",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# Locate the button for the login pop-up with aria-label=\"Close\""
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "id": "5c5d1435",
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "#Scroll down to load all results\n",
 83 |     "\n",
 84 |     "    # Get the initial scroll position\n",
 85 |     "\n",
 86 |     "    \n",
 87 |     "        # Scroll down to the bottom of the page using JavaScript\n",
 88 |     "\n",
 89 |     "        # Get the new scroll position\n",
 90 |     "\n",
 91 |     "        # Check if we've reached the bottom\n",
 92 |     "        \n",
 93 |     "        # Update the scroll position\n",
 94 |     "        "
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 8,
100 |    "id": "d3bd7641",
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Retrieve the HTML\n",
105 |     "\n",
106 |     "# Use BeautifulSoup to parse the HTML\n",
107 |     "\n",
108 |     "#Close the browser\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 9,
114 |    "id": "c08984be",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# Find all link elements\n",
119 |     "\n",
120 |     "# Only keep items where the text matches your search terms and desired location\n",
121 |     "\n",
122 |     "# Create empty list to store product data\n",
123 |     "\n",
124 |     "# Store the items url and text into a list of dictionaries\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 10,
130 |    "id": "39a1421f",
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "# Create an empty list to store product data\n",
135 |     "\n",
136 |     "    # Regular expression to find numeric values\n",
137 |     "    \n",
138 |     "    \n",
139 |     "    # Extracting prices\n",
140 |     "    # Iterate through lines to find the first line with numbers\n",
141 |     "\n",
142 |     "    # Extract title\n",
143 |     "\n",
144 |     "    # Extract location\n",
145 |     "\n",
146 |     "    # Add extracted data to a list of dictionaries\n"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "id": "6372b1d4",
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "# Convert extracted data into a Pandas Dataframe\n",
157 |     "\n",
158 |     "# Sort the DataFrame by the \"price\" column in ascending order\n",
159 |     "\n",
160 |     "# Get the 10 cheapest entries\n"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 11,
166 |    "id": "b46b8e32",
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "# Create an empty message\n",
171 |     "\n",
172 |     "# Iterate over each row in the DataFrame containing the 10 cheapest items\n",
173 |     "\n",
174 |     "    # Append the title, price, and URL of each item to the message string\n",
175 |     "\n",
176 |     "\n",
177 |     "# URL of the Discord channel where the message will be posted\n",
178 |     "\n",
179 |     "\n",
180 |     "# Payload containing the message to be sent\n",
181 |     "\n",
182 |     "\n",
183 |     "# Headers including the authorization token for the Discord API\n",
184 |     "\n",
185 |     "\n",
186 |     "# Send a POST request to the Discord API with the payload and headers\n"
187 |    ]
188 |   }
189 |  ],
190 |  "metadata": {
191 |   "kernelspec": {
192 |    "display_name": "Python 3 (ipykernel)",
193 |    "language": "python",
194 |    "name": "python3"
195 |   },
196 |   "language_info": {
197 |    "codemirror_mode": {
198 |     "name": "ipython",
199 |     "version": 3
200 |    },
201 |    "file_extension": ".py",
202 |    "mimetype": "text/x-python",
203 |    "name": "python",
204 |    "nbconvert_exporter": "python",
205 |    "pygments_lexer": "ipython3",
206 |    "version": "3.10.9"
207 |   }
208 |  },
209 |  "nbformat": 4,
210 |  "nbformat_minor": 5
211 | }
212 | 


--------------------------------------------------------------------------------
/Marketplace_Discord_Tutorial2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "69c86e9e",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#Import Dependencies\n",
 11 |     "from selenium import webdriver\n",
 12 |     "from selenium.webdriver.chrome.options import Options\n",
 13 |     "from selenium.webdriver.chrome.service import Service\n",
 14 |     "from webdriver_manager.chrome import ChromeDriverManager\n",
 15 |     "from selenium.webdriver.common.by import By\n",
 16 |     "import os\n",
 17 |     "import time\n",
 18 |     "from bs4 import BeautifulSoup\n",
 19 |     "import re\n",
 20 |     "import pandas as pd\n",
 21 |     "import requests"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "id": "b987a106",
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "#Configure Chromedriver\n",
 32 |     "\n",
 33 |     "chrome_install = ChromeDriverManager().install()\n",
 34 |     "\n",
 35 |     "folder = os.path.dirname(chrome_install)\n",
 36 |     "chromedriver_path = os.path.join(folder, \"chromedriver.exe\")"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "id": "f3280df5",
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Initialize Chrome WebDriver\n",
 47 |     "browser = webdriver.Chrome(\n",
 48 |     "    service = Service(chromedriver_path),\n",
 49 |     ")"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "id": "e425e484",
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "#Setup search parameters\n",
 60 |     "city = \"toronto\"\n",
 61 |     "product = \"Iphone 13\"\n",
 62 |     "min_price = 300\n",
 63 |     "max_price = 600\n",
 64 |     "days_listed = 1"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "id": "1939ce5b",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Set up base URL\n",
 75 |     "url = f'https://www.facebook.com/marketplace/{city}/search?query={product}&minPrice={min_price}&maxPrice={max_price}&daysSinceListed={days_listed}&exact=false'\n",
 76 |     "\n",
 77 |     "# Visit the website\n",
 78 |     "browser.get(url)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "id": "222c79ad",
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# Locate the button with aria-label=\"Decline optional cookies\" (Europe)\n",
 89 |     "try:\n",
 90 |     "    decline_button = browser.find_element(By.XPATH, '//div[@aria-label=\"Close\" and @role=\"button\"]')\n",
 91 |     "    decline_button.click()\n",
 92 |     "    print(\"Decline optional cookies button clicked!\")\n",
 93 |     "    \n",
 94 |     "except:\n",
 95 |     "    print(\"Could not find or click the optional cookies button!\")\n",
 96 |     "    pass"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "id": "a625f6b7",
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "# Locate the button for the login pop-up with aria-label=\"Close\"\n",
107 |     "try:\n",
108 |     "    close_button = browser.find_element(By.XPATH, '//div[@aria-label=\"Close\" and @role=\"button\"]')\n",
109 |     "    close_button.click()\n",
110 |     "    print(\"Close button clicked!\")\n",
111 |     "    \n",
112 |     "except:\n",
113 |     "    print(\"Could not find or click the close button!\")\n",
114 |     "    pass"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "id": "97025871",
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "#Scroll down to load all results\n",
125 |     "try:\n",
126 |     "    # Get the initial scroll position\n",
127 |     "    last_height = browser.execute_script(\"return document.body.scrollHeight\")\n",
128 |     "    \n",
129 |     "    while True:\n",
130 |     "    \n",
131 |     "        # Scroll down to the bottom of the page using JavaScript\n",
132 |     "        browser.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
133 |     "        time.sleep(4)\n",
134 |     "\n",
135 |     "        # Get the new scroll position\n",
136 |     "        new_height = browser.execute_script(\"return document.body.scrollHeight\")\n",
137 |     "\n",
138 |     "        # Check if we've reached the bottom\n",
139 |     "        if new_height == last_height:\n",
140 |     "            break\n",
141 |     "        \n",
142 |     "\n",
143 |     "        # Update the scroll position\n",
144 |     "        last_height = new_height\n",
145 |     "        \n",
146 |     "        print(\"scrolled\")\n",
147 |     "        \n",
148 |     "except Exception as e:\n",
149 |     "    print(f\"An error occurred: {e}\")"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "id": "957ca617",
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "# Retrieve the HTML\n",
160 |     "html = browser.page_source\n",
161 |     "\n",
162 |     "# Use BeautifulSoup to parse the HTML\n",
163 |     "soup = BeautifulSoup(html, 'html.parser')\n",
164 |     "\n",
165 |     "#Close the browser\n",
166 |     "browser.close()"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "id": "56729a86",
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "# Find all link elements\n",
177 |     "links = soup.find_all('a')\n",
178 |     "\n",
179 |     "# Only keep items where the text matches your search terms and desired location\n",
180 |     "iphone_links = [link for link in links if product.lower() in link.text.lower() and city.lower() in link.text.lower()]\n",
181 |     "\n",
182 |     "# Create empty list to store product data\n",
183 |     "iphone_data = []\n",
184 |     "\n",
185 |     "# Store the items url and text into a list of dictionaries\n",
186 |     "for iphone_link in iphone_links:\n",
187 |     "    url = iphone_link.get('href')\n",
188 |     "    text = '\\n'.join(iphone_link.stripped_strings)\n",
189 |     "    iphone_data.append({'text': text, 'url': url})"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "id": "7587f52c",
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "# Create an empty list to store product data\n",
200 |     "extracted_data = []\n",
201 |     "\n",
202 |     "for item in iphone_data:\n",
203 |     "    lines = item['text'].split('\\n')\n",
204 |     "\n",
205 |     "    # Regular expression to find numeric values\n",
206 |     "    numeric_pattern = re.compile('\\d[\\d,.]*')\n",
207 |     "    \n",
208 |     "    \n",
209 |     "    # Extracting prices\n",
210 |     "    # Iterate through lines to find the first line with numbers\n",
211 |     "    for line in lines:\n",
212 |     "        match = numeric_pattern.search(line)\n",
213 |     "        if match:    \n",
214 |     "            # Extract the first numeric value found\n",
215 |     "            price_str = match.group()\n",
216 |     "            # Convert price to float (handle commas)\n",
217 |     "            price = float(price_str.replace(',',''))\n",
218 |     "            break\n",
219 |     "            \n",
220 |     "    if price:\n",
221 |     "        print(f\"Price extracted: {price}\")\n",
222 |     "    else:\n",
223 |     "        print(\"price not found\")\n",
224 |     "\n",
225 |     "    # Extract title\n",
226 |     "    title = lines[-2]\n",
227 |     "\n",
228 |     "    # Extract location\n",
229 |     "    location = lines[-1]\n",
230 |     "\n",
231 |     "    # Add extracted data to a list of dictionaries\n",
232 |     "    extracted_data.append({\n",
233 |     "        'title': title,\n",
234 |     "        'price': price,\n",
235 |     "        'location': location,\n",
236 |     "        'url': re.sub(r'\\?.*', '', item['url'])\n",
237 |     "        \n",
238 |     "    })"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "id": "e0628307",
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "# Convert extracted data into a Pandas Dataframe\n",
249 |     "items_df = pd.DataFrame(extracted_data)\n",
250 |     "\n",
251 |     "# Sort the DataFrame by the \"price\" column in ascending order\n",
252 |     "sorted_df = items_df.sort_values(by='price')\n",
253 |     "\n",
254 |     "# Get the 10 cheapest entries\n",
255 |     "cheapest_10 = sorted_df.head(10)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "id": "6d07c236",
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "# Create an empty message\n",
266 |     "message = \"\"\n",
267 |     "\n",
268 |     "# Iterate over each row in the DataFrame containing the 10 cheapest items\n",
269 |     "for index, row in cheapest_10.iterrows():\n",
270 |     "\n",
271 |     "    # Append the title, price, and URL of each item to the message string\n",
272 |     "    message += f\"Title: {row['title']}\\nPrice: {row['price']}\\nURL: {row['url']}\\n\\n\"\n",
273 |     "\n",
274 |     "\n",
275 |     "# URL of the Discord channel where the message will be posted\n",
276 |     "discord_url = 'PASTE REQUEST URL HERE'\n",
277 |     "\n",
278 |     "\n",
279 |     "# Payload containing the message to be sent\n",
280 |     "payload = {\"content\": message}\n",
281 |     "\n",
282 |     "\n",
283 |     "# Headers including the authorization token for the Discord API\n",
284 |     "headers = {\"Authorization\" : \"PASTE AUTHORIZATION TOKEN HERE\"}\n",
285 |     "\n",
286 |     "\n",
287 |     "# Send a POST request to the Discord API with the payload and headers\n",
288 |     "response = requests.post(discord_url, payload, headers = headers)\n"
289 |    ]
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "kernelspec": {
294 |    "display_name": "Python 3 (ipykernel)",
295 |    "language": "python",
296 |    "name": "python3"
297 |   },
298 |   "language_info": {
299 |    "codemirror_mode": {
300 |     "name": "ipython",
301 |     "version": 3
302 |    },
303 |    "file_extension": ".py",
304 |    "mimetype": "text/x-python",
305 |    "name": "python",
306 |    "nbconvert_exporter": "python",
307 |    "pygments_lexer": "ipython3",
308 |    "version": "3.10.9"
309 |   }
310 |  },
311 |  "nbformat": 4,
312 |  "nbformat_minor": 5
313 | }
314 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # marketplace-discord-webscraping
2 | 📱💸 Discord integrated Facebook Marketplace scraper using Python, Chromedriver, Selenium, BeautifulSoup, Pandas...
3 | 
4 | <h3>
5 |   Get instant Facebook Marketplace notifications with 
6 |   <a href="https://shorturl.at/sMFDs">Swoopa</a> using the code 
7 |   "TTC15"</strong> to get a free 7-day trial and 15% off any plan.
8 | <h3/>
9 | 


--------------------------------------------------------------------------------