├── README.md
├── Instagram Scraper - Comments Only.ipynb
└── Insta_scraper_V2.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # instagram-photo-reel-webscraping
2 | 📷 💾 Python bulk instagram scraper for photos and videos using Selenium and BS4.
3 | 
4 | <h3>
5 |   🔥  Avoid getting BLOCKED or RESTRICTED with <a href="https://shorturl.at/eCOdT" target="_blank">quality proxies</a>.
6 | <h3/>
7 | 


--------------------------------------------------------------------------------
/Instagram Scraper - Comments Only.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "9553deeb",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#Import dependencies\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "0746d622",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "#setup chromedriver\n",
 21 |     "\n",
 22 |     "#open the webpage\n"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "id": "8d5f22bd",
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "#target username\n",
 33 |     "\n",
 34 |     "#enter username and password\n",
 35 |     "\n",
 36 |     "#target the login button and click it\n"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "id": "bf815d5b",
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Wait up to 10 seconds for the search button to be clickable on the web page\n",
 47 |     "\n",
 48 |     "# Click the search button once it becomes clickable\n"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "id": "65252b18",
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "#target the search input field\n",
 59 |     "\n",
 60 |     "#search for the @handle or keyword"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "id": "e51ab43b",
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# Check if the keyword starts with \"@\"\n",
 71 |     "\n",
 72 |     "    # Remove the \"@\" symbol \n",
 73 |     "\n",
 74 |     "# Find the first element with the specified that matches the keyword\n",
 75 |     "\n",
 76 |     "# Click on the found element (assuming it represents the desired search result)\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "id": "a7c4bef3",
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# Get the initial page height\n",
 87 |     "\n",
 88 |     "# Create a list to store htmls\n",
 89 |     "\n",
 90 |     "# Scroll loop\n",
 91 |     "\n",
 92 |     "\n",
 93 |     "    # Scroll down to the bottom of the page\n",
 94 |     "\n",
 95 |     "    # Wait for a moment to allow new content to load (adjust as needed)\n",
 96 |     "    \n",
 97 |     "    # Parse the HTML\n",
 98 |     "    \n",
 99 |     "    # Create a BeautifulSoup object from the scraped HTML\n",
100 |     "\n",
101 |     "    # Get the current page height\n",
102 |     "    \n",
103 |     "   # Exit the loop when you can't scroll further\n",
104 |     "\n",
105 |     "    # Update the initial height for the next iteration\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "id": "779a64d6",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "# List to store the post image URLs\n",
116 |     "\n",
117 |     "# Loop through soup elements\n",
118 |     "\n",
119 |     "    # Find all image elements that match the specific class in the current soup\n",
120 |     "    \n",
121 |     "    # Extract the href attributes and filter URLs that start with \"/p/\" or \"/reel/\"   \n",
122 |     "    \n",
123 |     "# Convert the list to a set to remove duplicates\n"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "id": "b1baabd3",
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "# Create a list to store the json for each post\n",
134 |     "\n",
135 |     "# Define the query parameters to add\n",
136 |     "\n",
137 |     "# go through all urls\n",
138 |     "\n",
139 |     "    # Error ahndling\n",
140 |     "\n",
141 |     "        # Get the current URL of the page\n",
142 |     "\n",
143 |     "        # Append the query parameters to the current URL\n",
144 |     "        \n",
145 |     "        # Get URL\n",
146 |     "        \n",
147 |     "        # Wait for a moment to allow new content to load (adjust as needed)\n",
148 |     "        \n",
149 |     "        # Find the <pre> tag containing the JSON data\n",
150 |     "        \n",
151 |     "        # Extract the JSON data from the <pre> tag\n",
152 |     "\n",
153 |     "        # Parse the JSON data\n",
154 |     "        \n",
155 |     "        # Add json to the list\n",
156 |     "        \n",
157 |     "    #Error Handling\n"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "395bd4f1",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "# Lists to store URLs and corresponding dates\n",
168 |     "\n",
169 |     "# Iterate through each JSON data in the list\n",
170 |     "    \n",
171 |     "    # Extract the list from the 'items' key\n",
172 |     "    \n",
173 |     "    \n",
174 |     "    # Iterate through each item in the 'items' list\n",
175 |     "        \n",
176 |     "        # Extract the date the item was taken\n",
177 |     "\n",
178 |     "        # Check if 'carousel_media' is present\n",
179 |     "        \n",
180 |     "        # Iterate through each media in the 'carousel_media' list\n",
181 |     "            \n",
182 |     "            # Extract the image URL from the media\n",
183 |     "            \n",
184 |     "            # Check if the image_url field is found inside the 'carousel_media' list\n",
185 |     "\n",
186 |     "                # Add the image URL and corresponding date to the lists\n",
187 |     "                \n",
188 |     "            # Extract the video URL from the media\n",
189 |     "            \n",
190 |     "                    # Add the video URL and corresponding date to the lists\n",
191 |     "\n",
192 |     "        # Handle cases of unique image, instead of carousel\n",
193 |     "            \n",
194 |     "            # Add the image URL and corresponding date to the lists\n",
195 |     "\n",
196 |     "        # Check if 'video_versions' key exists\n",
197 |     "                "
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "id": "70173379",
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# Create a directory to store downloaded files\n",
208 |     "\n",
209 |     "# Create subfolders for images and videos\n",
210 |     "\n",
211 |     "# Initialize counters for images and videos\n",
212 |     "\n",
213 |     "\n",
214 |     "# Iterate through URLs in the all_urls list and download media\n",
215 |     "\n",
216 |     "    # Extract file extension from the URL\n",
217 |     "\n",
218 |     "    # Determine the file name based on the URL\n",
219 |     "    \n",
220 |     "        # Default to the main download directory for other file types\n",
221 |     "\n",
222 |     "    # Save the file to the appropriate folder\n",
223 |     "\n",
224 |     "    \n",
225 |     "    # Write the content of the response to the file\n",
226 |     "                \n",
227 |     "\n",
228 |     "# Print a message indicating the number of downloaded files and the download directory"
229 |    ]
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "kernelspec": {
234 |    "display_name": "Python 3 (ipykernel)",
235 |    "language": "python",
236 |    "name": "python3"
237 |   },
238 |   "language_info": {
239 |    "codemirror_mode": {
240 |     "name": "ipython",
241 |     "version": 3
242 |    },
243 |    "file_extension": ".py",
244 |    "mimetype": "text/x-python",
245 |    "name": "python",
246 |    "nbconvert_exporter": "python",
247 |    "pygments_lexer": "ipython3",
248 |    "version": "3.9.13"
249 |   }
250 |  },
251 |  "nbformat": 4,
252 |  "nbformat_minor": 5
253 | }
254 | 


--------------------------------------------------------------------------------
/Insta_scraper_V2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "729a2f1e",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#Import dependencies\n",
 11 |     "from selenium import webdriver\n",
 12 |     "from selenium.webdriver.common.keys import Keys\n",
 13 |     "from selenium.webdriver.support import expected_conditions as EC\n",
 14 |     "from selenium.webdriver.common.by import By\n",
 15 |     "from selenium.webdriver.support.wait import WebDriverWait\n",
 16 |     "from selenium.webdriver.chrome.options import Options\n",
 17 |     "from selenium.common.exceptions import NoSuchElementException, TimeoutException\n",
 18 |     "import time\n",
 19 |     "import requests\n",
 20 |     "from bs4 import BeautifulSoup\n",
 21 |     "import re\n",
 22 |     "import config\n",
 23 |     "import json\n",
 24 |     "import os\n",
 25 |     "from urllib.parse import urlparse\n",
 26 |     "import csv"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "833b157f",
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "#specify the path to chromedriver.exe (download and save on your computer)\n",
 37 |     "driver = webdriver.Chrome()\n",
 38 |     "\n",
 39 |     "#open the webpage\n",
 40 |     "driver.get(\"https://www.instagram.com/\")"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "fbe88437",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "#target username\n",
 51 |     "username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, \"input[name='username']\")))\n",
 52 |     "password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, \"input[name='password']\")))\n",
 53 |     "\n",
 54 |     "#enter username and password\n",
 55 |     "username.clear()\n",
 56 |     "username.send_keys(config.username)\n",
 57 |     "password.clear()\n",
 58 |     "password.send_keys(config.password)\n",
 59 |     "\n",
 60 |     "#target the login button and click it\n",
 61 |     "button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, \"button[type='submit']\"))).click()"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "id": "c4813345",
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "not_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), \"Not Now\")]')))\n",
 72 |     "\n",
 73 |     "not_button.click()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "id": "df9ee85d",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Wait up to 10 seconds for the search button to be clickable on the web page\n",
 84 |     "search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'svg[aria-label=\"Search\"]')))\n",
 85 |     "\n",
 86 |     "# Click the search button once it becomes clickable\n",
 87 |     "search_button.click()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "aed26dce",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "#target the search input field\n",
 98 |     "searchbox = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, \"//input[@placeholder='Search']\")))\n",
 99 |     "searchbox.clear()\n",
100 |     "\n",
101 |     "#search for the @handle or keyword\n",
102 |     "keyword = \"@sample-handle\"\n",
103 |     "searchbox.send_keys(keyword)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "id": "f06c2e34",
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# Check if the keyword starts with \"@\"\n",
114 |     "if keyword.startswith(\"@\"):\n",
115 |     "    # Remove the \"@\" symbol\n",
116 |     "    keyword = keyword[1:]\n",
117 |     "    \n",
118 |     "# Find the first element with the specified XPath that matches the keyword    \n",
119 |     "first_result = driver.find_element(By.XPATH, f'//span[text()=\"{keyword}\"]')\n",
120 |     "\n",
121 |     "# Click on the found element (assuming it represents the desired search result)\n",
122 |     "first_result.click()"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "id": "1c79c06c",
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "# Get the initial page height\n",
133 |     "initial_height = driver.execute_script(\"return document.body.scrollHeight\")\n",
134 |     "\n",
135 |     "# Create a list to store htmls\n",
136 |     "soups = []\n",
137 |     "\n",
138 |     "while True:\n",
139 |     "    # Scroll down to the bottom of the page\n",
140 |     "    driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
141 |     "\n",
142 |     "    # Wait for a moment to allow new content to load (adjust as needed)\n",
143 |     "    time.sleep(5)\n",
144 |     "    \n",
145 |     "    # Parse the HTML\n",
146 |     "    html = driver.page_source\n",
147 |     "    \n",
148 |     "    # Create a BeautifulSoup object from the scraped HTML\n",
149 |     "    soups.append(BeautifulSoup(html, 'html.parser'))\n",
150 |     "\n",
151 |     "    # Get the current page height\n",
152 |     "    current_height = driver.execute_script(\"return document.body.scrollHeight\")\n",
153 |     "\n",
154 |     "    if current_height == initial_height:\n",
155 |     "        break  # Exit the loop when you can't scroll further\n",
156 |     "\n",
157 |     "    initial_height = current_height  # Update the initial height for the next iteration"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "d7fa7a7b",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "# List to store the post image URLs\n",
168 |     "post_urls = []\n",
169 |     "\n",
170 |     "for soup in soups:\n",
171 |     "    # Find all anchor elements with href attributes\n",
172 |     "    anchors = soup.find_all('a', href=True)\n",
173 |     "    \n",
174 |     "    # Filter URLs that start with \"/p/\" or \"/reel/\"\n",
175 |     "    post_urls.extend([anchor['href'] for anchor in anchors if anchor['href'].startswith((\"/p/\", \"/reel/\"))])\n",
176 |     "\n",
177 |     "# Convert the list to a set to remove duplicates\n",
178 |     "unique_post_urls = list(set(post_urls))\n",
179 |     "\n",
180 |     "print(f\"before: {len(post_urls)}, after: {len(unique_post_urls)}\")"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "id": "801f4c86",
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "json_list = []\n",
191 |     "\n",
192 |     "# Define the query parameters to add\n",
193 |     "query_parameters = \"__a=1&__d=dis\"\n",
194 |     "\n",
195 |     "# go through all urls\n",
196 |     "for url in unique_post_urls:\n",
197 |     "    try:\n",
198 |     "        # Get the current URL of the page\n",
199 |     "        current_url = driver.current_url\n",
200 |     "\n",
201 |     "        # Append the query parameters to the current URL\n",
202 |     "        modified_url = \"https://www.instagram.com/\" + url + \"?\" + query_parameters\n",
203 |     "\n",
204 |     "        # Get URL\n",
205 |     "        driver.get(modified_url)\n",
206 |     "\n",
207 |     "        # Wait for a moment to allow new content to load (adjust as needed)\n",
208 |     "        time.sleep(1)\n",
209 |     "\n",
210 |     "        # Find the <pre> tag containing the JSON data\n",
211 |     "        WebDriverWait(driver, 10).until(\n",
212 |     "            EC.presence_of_element_located((By.XPATH, '//pre'))\n",
213 |     "        )\n",
214 |     "        pre_tag = driver.find_element_by_xpath('//pre')\n",
215 |     "\n",
216 |     "        # Extract the JSON data from the <pre> tag\n",
217 |     "        json_script = pre_tag.text\n",
218 |     "\n",
219 |     "        # Parse the JSON data\n",
220 |     "        json_parsed = json.loads(json_script)\n",
221 |     "\n",
222 |     "        # Add json to the list\n",
223 |     "        json_list.append(json_parsed)\n",
224 |     "    except (NoSuchElementException, TimeoutException, json.JSONDecodeError) as e:\n",
225 |     "        print(f\"Error processing URL {url}: {e}\")\n"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "4c032fe0",
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "# Lists to store URLs and corresponding dates\n",
236 |     "all_urls = []\n",
237 |     "all_dates = []\n",
238 |     "\n",
239 |     "# Iterate through each JSON data in the list\n",
240 |     "for json_data in json_list:\n",
241 |     "    \n",
242 |     "    # Extract the list from the 'items' key\n",
243 |     "    item_list = json_data.get('items', [])\n",
244 |     "    \n",
245 |     "    # Iterate through each item in the 'items' list\n",
246 |     "    for item in item_list:\n",
247 |     "        \n",
248 |     "        # Extract the date the item was taken\n",
249 |     "        date_taken = item.get('taken_at')  # Move this line inside the loop\n",
250 |     "\n",
251 |     "        # Check if 'carousel_media' is present\n",
252 |     "        carousel_media = item.get('carousel_media', [])\n",
253 |     "        \n",
254 |     "        # Iterate through each media in the 'carousel_media' list\n",
255 |     "        for media in carousel_media:\n",
256 |     "            \n",
257 |     "            # Extract the image URL from the media\n",
258 |     "            image_url = media.get('image_versions2', {}).get('candidates', [{}])[0].get('url')\n",
259 |     "            \n",
260 |     "            if image_url:\n",
261 |     "                # Add the image URL and corresponding date to the lists\n",
262 |     "                all_urls.append(image_url)\n",
263 |     "                all_dates.append(date_taken)\n",
264 |     "                print(f\"carousel image added\")\n",
265 |     "                \n",
266 |     "            # Extract the video URL from the media\n",
267 |     "            video_versions = media.get('video_versions', [])\n",
268 |     "            if video_versions:\n",
269 |     "                video_url = video_versions[0].get('url')\n",
270 |     "                if video_url:\n",
271 |     "                    \n",
272 |     "                    # Add the video URL and corresponding date to the lists\n",
273 |     "                    all_urls.append(video_url)\n",
274 |     "                    all_dates.append(date_taken)\n",
275 |     "                    print(f\"carousel video added\")\n",
276 |     "\n",
277 |     "        # Handle cases of unique image, instead of carousel\n",
278 |     "        image_url = item.get('image_versions2', {}).get('candidates', [{}])[0].get('url')\n",
279 |     "        if image_url:\n",
280 |     "            \n",
281 |     "            # Add the image URL and corresponding date to the lists\n",
282 |     "            all_urls.append(image_url)\n",
283 |     "            all_dates.append(date_taken)\n",
284 |     "            print(f\"single image added\")\n",
285 |     "\n",
286 |     "        # Check if 'video_versions' key exists\n",
287 |     "        video_versions = item.get('video_versions', [])\n",
288 |     "        if video_versions:\n",
289 |     "            video_url = video_versions[0].get('url')\n",
290 |     "            if video_url:\n",
291 |     "                all_urls.append(video_url)\n",
292 |     "                all_dates.append(date_taken)\n",
293 |     "                print(f\"video added\")\n",
294 |     "                \n",
295 |     "# Print or use all collected URLs as needed\n",
296 |     "print(len(all_urls))\n",
297 |     "                \n",
298 |     "\n"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": null,
304 |    "id": "df201782",
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "# Create a directory to store downloaded files\n",
309 |     "download_dir = keyword\n",
310 |     "os.makedirs(download_dir, exist_ok=True)\n",
311 |     "\n",
312 |     "# Create subfolders for images and videos\n",
313 |     "image_dir = os.path.join(download_dir, \"images\")\n",
314 |     "video_dir = os.path.join(download_dir, \"videos\")\n",
315 |     "os.makedirs(image_dir, exist_ok=True)\n",
316 |     "os.makedirs(video_dir, exist_ok=True)\n",
317 |     "\n",
318 |     "# Initialize counters for images and videos\n",
319 |     "image_counter = 1\n",
320 |     "video_counter = 1\n",
321 |     "\n",
322 |     "# Iterate through URLs in the all_urls list and download media\n",
323 |     "for index, url in enumerate(all_urls, 0):\n",
324 |     "    response = requests.get(url, stream=True)\n",
325 |     "\n",
326 |     "    # Extract file extension from the URL\n",
327 |     "    url_path = urlparse(url).path\n",
328 |     "    file_extension = os.path.splitext(url_path)[1]\n",
329 |     "\n",
330 |     "    # Determine the file name based on the URL\n",
331 |     "    if file_extension.lower() in {'.jpg', '.jpeg', '.png', '.gif'}:\n",
332 |     "        file_name = f\"{all_dates[index]}-img-{image_counter}.png\"\n",
333 |     "        destination_folder = image_dir\n",
334 |     "        image_counter += 1\n",
335 |     "    elif file_extension.lower() in {'.mp4', '.avi', '.mkv', '.mov'}:\n",
336 |     "        file_name = f\"{all_dates[index]}-vid-{video_counter}.mp4\"\n",
337 |     "        destination_folder = video_dir\n",
338 |     "        video_counter += 1\n",
339 |     "    else:\n",
340 |     "        # Default to the main download directory for other file types\n",
341 |     "        file_name = f\"{all_dates[index]}{file_extension}\"\n",
342 |     "        destination_folder = download_dir\n",
343 |     "\n",
344 |     "    # Save the file to the appropriate folder\n",
345 |     "    file_path = os.path.join(destination_folder, file_name)\n",
346 |     "    \n",
347 |     "    # Write the content of the response to the file\n",
348 |     "    with open(file_path, 'wb') as file:\n",
349 |     "        for chunk in response.iter_content(chunk_size=8192):\n",
350 |     "            if chunk:\n",
351 |     "                file.write(chunk)\n",
352 |     "\n",
353 |     "    print(f\"Downloaded: {file_path}\")\n",
354 |     "\n",
355 |     "# Print a message indicating the number of downloaded files and the download directory\n",
356 |     "print(f\"Downloaded {len(all_urls)} files to {download_dir}\")"
357 |    ]
358 |   }
359 |  ],
360 |  "metadata": {
361 |   "kernelspec": {
362 |    "display_name": "Python 3 (ipykernel)",
363 |    "language": "python",
364 |    "name": "python3"
365 |   },
366 |   "language_info": {
367 |    "codemirror_mode": {
368 |     "name": "ipython",
369 |     "version": 3
370 |    },
371 |    "file_extension": ".py",
372 |    "mimetype": "text/x-python",
373 |    "name": "python",
374 |    "nbconvert_exporter": "python",
375 |    "pygments_lexer": "ipython3",
376 |    "version": "3.10.9"
377 |   }
378 |  },
379 |  "nbformat": 4,
380 |  "nbformat_minor": 5
381 | }
382 | 


--------------------------------------------------------------------------------