├── README.md ├── Scraping Country Names using Selenium.ipynb ├── Scraping IMDB Top Movies using Beautiful Soup.ipynb ├── Scraping Multimedia Files using Beautiful Soup.ipynb ├── Scraping Products from Amazon using Selenium-Dynamic Website.ipynb ├── Scraping XML Data using Beautiful Soup.ipynb ├── Scraping data using Regular Expression.ipynb └── Taking Screenshot of Webpage using Selenium.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Web-Scraping Tutorials 2 | This repository contains all the python tutorial code files with all the basic concepts explained with simple examples. 3 | 4 | ## Web-Scraping tutorial videos playlist:- 5 | http://bit.ly/webscrapingplaylist 6 | -------------------------------------------------------------------------------- /Scraping Country Names using Selenium.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import Modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from selenium import webdriver\n", 17 | "from selenium.webdriver.common.by import By\n", 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# import time use this when you are using as python file\n", 28 | "# time.sleep() use this while the page gets loaded" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Set path for Chrome Driver" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 5, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# path = 'C:\\\\Chromedriver.exe'" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 6, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# set path for the driver\n", 54 | "# browser = webdriver.Chrome(executable_path = path)\n", 55 | "browser = webdriver.Chrome()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 7, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "url = \"https://scrapethissite.com/pages/simple/\"" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 8, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "# open the page url in chrome\n", 74 | "browser.get(url)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Scrap the data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 9, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# get country names\n", 91 | "country_list = browser.find_elements(By.XPATH, \"//h3[@class='country-name']\")\n", 92 | "# country_list" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 12, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# parse the data\n", 102 | "countries = []\n", 103 | "for country in country_list:\n", 104 | " # get the text data\n", 105 | " temp = country.text\n", 106 | " countries.append(temp)\n", 107 | "# countries" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 13, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# # get the population for the country\n", 117 | "population_list = browser.find_elements(By.CLASS_NAME,'country-population')\n", 118 | "# population_list" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 14, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# parse the data\n", 128 | "populations = []\n", 129 | "for population in population_list:\n", 130 | " # get the text data\n", 131 | " temp = population.text\n", 132 | " populations.append(temp)\n", 133 | "# populations" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## Store the Scraped Data" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 15, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/html": [ 151 | "
\n", 152 | "\n", 165 | "\n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | "
Country NamesPopulation
0Andorra84000
1United Arab Emirates4975593
2Afghanistan29121286
3Antigua and Barbuda86754
4Anguilla13254
\n", 201 | "
" 202 | ], 203 | "text/plain": [ 204 | " Country Names Population\n", 205 | "0 Andorra 84000\n", 206 | "1 United Arab Emirates 4975593\n", 207 | "2 Afghanistan 29121286\n", 208 | "3 Antigua and Barbuda 86754\n", 209 | "4 Anguilla 13254" 210 | ] 211 | }, 212 | "execution_count": 15, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "data = pd.DataFrame()\n", 219 | "data['Country Names'] = countries\n", 220 | "data['Population'] = populations\n", 221 | "data.head()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 13, 227 | "metadata": { 228 | "scrolled": true 229 | }, 230 | "outputs": [ 231 | { 232 | "name": "stderr", 233 | "output_type": "stream", 234 | "text": [ 235 | ":2: FutureWarning: As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead. You can set the option io.excel.xls.writer to 'xlwt' to silence this warning. While this option is deprecated and will also raise a warning, it can be globally set and the warning suppressed.\n", 236 | " data.to_excel('countries.xls', index=False)\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "# save the data\n", 242 | "data.to_excel('countries.xls', index=False)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "## Close the Driver" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 16, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "browser.quit()" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [] 267 | } 268 | ], 269 | "metadata": { 270 | "kernelspec": { 271 | "display_name": "Python 3 (ipykernel)", 272 | "language": "python", 273 | "name": "python3" 274 | }, 275 | "language_info": { 276 | "codemirror_mode": { 277 | "name": "ipython", 278 | "version": 3 279 | }, 280 | "file_extension": ".py", 281 | "mimetype": "text/x-python", 282 | "name": "python", 283 | "nbconvert_exporter": "python", 284 | "pygments_lexer": "ipython3", 285 | "version": "3.8.3" 286 | } 287 | }, 288 | "nbformat": 4, 289 | "nbformat_minor": 4 290 | } 291 | -------------------------------------------------------------------------------- /Scraping IMDB Top Movies using Beautiful Soup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import Modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from bs4 import BeautifulSoup\n", 17 | "import requests\n", 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Request page source from URL" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "url = \"https://www.imdb.com/chart/top/\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 5, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "HEADERS = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 6, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "" 55 | ] 56 | }, 57 | "execution_count": 6, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "page = requests.get(url, headers=HEADERS)\n", 64 | "page" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 7, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "## display the page source code\n", 74 | "# page.content" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 8, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "soup = BeautifulSoup(page.content, \"html.parser\")\n", 84 | "# print(soup.prettify())" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 9, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "[\n", 96 | " 1.\n", 97 | " The Shawshank Redemption\n", 98 | " (1994)\n", 99 | " ,\n", 100 | " \n", 101 | " 2.\n", 102 | " The Godfather\n", 103 | " (1972)\n", 104 | " ,\n", 105 | " \n", 106 | " 3.\n", 107 | " The Dark Knight\n", 108 | " (2008)\n", 109 | " ,\n", 110 | " \n", 111 | " 4.\n", 112 | " The Godfather Part II\n", 113 | " (1974)\n", 114 | " ,\n", 115 | " \n", 116 | " 5.\n", 117 | " 12 Angry Men\n", 118 | " (1957)\n", 119 | " ,\n", 120 | " \n", 121 | " 6.\n", 122 | " Schindler's List\n", 123 | " (1993)\n", 124 | " ,\n", 125 | " \n", 126 | " 7.\n", 127 | " The Lord of the Rings: The Return of the King\n", 128 | " (2003)\n", 129 | " ,\n", 130 | " \n", 131 | " 8.\n", 132 | " Pulp Fiction\n", 133 | " (1994)\n", 134 | " ,\n", 135 | " \n", 136 | " 9.\n", 137 | " The Lord of the Rings: The Fellowship of the Ring\n", 138 | " (2001)\n", 139 | " ,\n", 140 | " \n", 141 | " 10.\n", 142 | " Il buono, il brutto, il cattivo\n", 143 | " (1966)\n", 144 | " ,\n", 145 | " \n", 146 | " 11.\n", 147 | " Forrest Gump\n", 148 | " (1994)\n", 149 | " ,\n", 150 | " \n", 151 | " 12.\n", 152 | " Fight Club\n", 153 | " (1999)\n", 154 | " ,\n", 155 | " \n", 156 | " 13.\n", 157 | " Spider-Man: Across the Spider-Verse\n", 158 | " (2023)\n", 159 | " ,\n", 160 | " \n", 161 | " 14.\n", 162 | " The Lord of the Rings: The Two Towers\n", 163 | " (2002)\n", 164 | " ,\n", 165 | " \n", 166 | " 15.\n", 167 | " Inception\n", 168 | " (2010)\n", 169 | " ,\n", 170 | " \n", 171 | " 16.\n", 172 | " Star Wars: Episode V - The Empire Strikes Back\n", 173 | " (1980)\n", 174 | " ,\n", 175 | " \n", 176 | " 17.\n", 177 | " The Matrix\n", 178 | " (1999)\n", 179 | " ,\n", 180 | " \n", 181 | " 18.\n", 182 | " GoodFellas\n", 183 | " (1990)\n", 184 | " ,\n", 185 | " \n", 186 | " 19.\n", 187 | " One Flew Over the Cuckoo's Nest\n", 188 | " (1975)\n", 189 | " ,\n", 190 | " \n", 191 | " 20.\n", 192 | " Se7en\n", 193 | " (1995)\n", 194 | " ,\n", 195 | " \n", 196 | " 21.\n", 197 | " It's a Wonderful Life\n", 198 | " (1946)\n", 199 | " ,\n", 200 | " \n", 201 | " 22.\n", 202 | " Shichinin no samurai\n", 203 | " (1954)\n", 204 | " ,\n", 205 | " \n", 206 | " 23.\n", 207 | " The Silence of the Lambs\n", 208 | " (1991)\n", 209 | " ,\n", 210 | " \n", 211 | " 24.\n", 212 | " Saving Private Ryan\n", 213 | " (1998)\n", 214 | " ,\n", 215 | " \n", 216 | " 25.\n", 217 | " Cidade de Deus\n", 218 | " (2002)\n", 219 | " ,\n", 220 | " \n", 221 | " 26.\n", 222 | " Interstellar\n", 223 | " (2014)\n", 224 | " ,\n", 225 | " \n", 226 | " 27.\n", 227 | " La vita è bella\n", 228 | " (1997)\n", 229 | " ,\n", 230 | " \n", 231 | " 28.\n", 232 | " The Green Mile\n", 233 | " (1999)\n", 234 | " ,\n", 235 | " \n", 236 | " 29.\n", 237 | " Star Wars\n", 238 | " (1977)\n", 239 | " ,\n", 240 | " \n", 241 | " 30.\n", 242 | " Terminator 2: Judgment Day\n", 243 | " (1991)\n", 244 | " ,\n", 245 | " \n", 246 | " 31.\n", 247 | " Back to the Future\n", 248 | " (1985)\n", 249 | " ,\n", 250 | " \n", 251 | " 32.\n", 252 | " Sen to Chihiro no kamikakushi\n", 253 | " (2001)\n", 254 | " ,\n", 255 | " \n", 256 | " 33.\n", 257 | " The Pianist\n", 258 | " (2002)\n", 259 | " ,\n", 260 | " \n", 261 | " 34.\n", 262 | " Psycho\n", 263 | " (1960)\n", 264 | " ,\n", 265 | " \n", 266 | " 35.\n", 267 | " Gisaengchung\n", 268 | " (2019)\n", 269 | " ,\n", 270 | " \n", 271 | " 36.\n", 272 | " Léon\n", 273 | " (1994)\n", 274 | " ,\n", 275 | " \n", 276 | " 37.\n", 277 | " The Lion King\n", 278 | " (1994)\n", 279 | " ,\n", 280 | " \n", 281 | " 38.\n", 282 | " Gladiator\n", 283 | " (2000)\n", 284 | " ,\n", 285 | " \n", 286 | " 39.\n", 287 | " American History X\n", 288 | " (1998)\n", 289 | " ,\n", 290 | " \n", 291 | " 40.\n", 292 | " The Departed\n", 293 | " (2006)\n", 294 | " ,\n", 295 | " \n", 296 | " 41.\n", 297 | " Whiplash\n", 298 | " (2014)\n", 299 | " ,\n", 300 | " \n", 301 | " 42.\n", 302 | " The Prestige\n", 303 | " (2006)\n", 304 | " ,\n", 305 | " \n", 306 | " 43.\n", 307 | " The Usual Suspects\n", 308 | " (1995)\n", 309 | " ,\n", 310 | " \n", 311 | " 44.\n", 312 | " Casablanca\n", 313 | " (1942)\n", 314 | " ,\n", 315 | " \n", 316 | " 45.\n", 317 | " Hotaru no haka\n", 318 | " (1988)\n", 319 | " ,\n", 320 | " \n", 321 | " 46.\n", 322 | " Seppuku\n", 323 | " (1962)\n", 324 | " ,\n", 325 | " \n", 326 | " 47.\n", 327 | " The Intouchables\n", 328 | " (2011)\n", 329 | " ,\n", 330 | " \n", 331 | " 48.\n", 332 | " Modern Times\n", 333 | " (1936)\n", 334 | " ,\n", 335 | " \n", 336 | " 49.\n", 337 | " Once Upon a Time in the West\n", 338 | " (1968)\n", 339 | " ,\n", 340 | " \n", 341 | " 50.\n", 342 | " Nuovo Cinema Paradiso\n", 343 | " (1988)\n", 344 | " ,\n", 345 | " \n", 346 | " 51.\n", 347 | " Rear Window\n", 348 | " (1954)\n", 349 | " ,\n", 350 | " \n", 351 | " 52.\n", 352 | " Alien\n", 353 | " (1979)\n", 354 | " ,\n", 355 | " \n", 356 | " 53.\n", 357 | " City Lights\n", 358 | " (1931)\n", 359 | " ,\n", 360 | " \n", 361 | " 54.\n", 362 | " Apocalypse Now\n", 363 | " (1979)\n", 364 | " ,\n", 365 | " \n", 366 | " 55.\n", 367 | " Memento\n", 368 | " (2000)\n", 369 | " ,\n", 370 | " \n", 371 | " 56.\n", 372 | " Django Unchained\n", 373 | " (2012)\n", 374 | " ,\n", 375 | " \n", 376 | " 57.\n", 377 | " Raiders of the Lost Ark\n", 378 | " (1981)\n", 379 | " ,\n", 380 | " \n", 381 | " 58.\n", 382 | " WALL·E\n", 383 | " (2008)\n", 384 | " ,\n", 385 | " \n", 386 | " 59.\n", 387 | " The Lives of Others\n", 388 | " (2006)\n", 389 | " ,\n", 390 | " \n", 391 | " 60.\n", 392 | " Sunset Blvd.\n", 393 | " (1950)\n", 394 | " ,\n", 395 | " \n", 396 | " 61.\n", 397 | " Paths of Glory\n", 398 | " (1957)\n", 399 | " ,\n", 400 | " \n", 401 | " 62.\n", 402 | " Avengers: Infinity War\n", 403 | " (2018)\n", 404 | " ,\n", 405 | " \n", 406 | " 63.\n", 407 | " The Shining\n", 408 | " (1980)\n", 409 | " ,\n", 410 | " \n", 411 | " 64.\n", 412 | " The Great Dictator\n", 413 | " (1940)\n", 414 | " ,\n", 415 | " \n", 416 | " 65.\n", 417 | " Witness for the Prosecution\n", 418 | " (1957)\n", 419 | " ,\n", 420 | " \n", 421 | " 66.\n", 422 | " Spider-Man: Into the Spider-Verse\n", 423 | " (2018)\n", 424 | " ,\n", 425 | " \n", 426 | " 67.\n", 427 | " Aliens\n", 428 | " (1986)\n", 429 | " ,\n", 430 | " \n", 431 | " 68.\n", 432 | " American Beauty\n", 433 | " (1999)\n", 434 | " ,\n", 435 | " \n", 436 | " 69.\n", 437 | " The Dark Knight Rises\n", 438 | " (2012)\n", 439 | " ,\n", 440 | " \n", 441 | " 70.\n", 442 | " Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb\n", 443 | " (1964)\n", 444 | " ,\n", 445 | " \n", 446 | " 71.\n", 447 | " Inglourious Basterds\n", 448 | " (2009)\n", 449 | " ,\n", 450 | " \n", 451 | " 72.\n", 452 | " Oldeuboi\n", 453 | " (2003)\n", 454 | " ,\n", 455 | " \n", 456 | " 73.\n", 457 | " Coco\n", 458 | " (2017)\n", 459 | " ,\n", 460 | " \n", 461 | " 74.\n", 462 | " Amadeus\n", 463 | " (1984)\n", 464 | " ,\n", 465 | " \n", 466 | " 75.\n", 467 | " Toy Story\n", 468 | " (1995)\n", 469 | " ,\n", 470 | " \n", 471 | " 76.\n", 472 | " Braveheart\n", 473 | " (1995)\n", 474 | " ,\n", 475 | " \n", 476 | " 77.\n", 477 | " Das Boot\n", 478 | " (1981)\n", 479 | " ,\n", 480 | " \n", 481 | " 78.\n", 482 | " Joker\n", 483 | " (2019)\n", 484 | " ,\n", 485 | " \n", 486 | " 79.\n", 487 | " Avengers: Endgame\n", 488 | " (2019)\n", 489 | " ,\n", 490 | " \n", 491 | " 80.\n", 492 | " Mononoke-hime\n", 493 | " (1997)\n", 494 | " ,\n", 495 | " \n", 496 | " 81.\n", 497 | " Good Will Hunting\n", 498 | " (1997)\n", 499 | " ,\n", 500 | " \n", 501 | " 82.\n", 502 | " Once Upon a Time in America\n", 503 | " (1984)\n", 504 | " ,\n", 505 | " \n", 506 | " 83.\n", 507 | " Kimi no na wa.\n", 508 | " (2016)\n", 509 | " ,\n", 510 | " \n", 511 | " 84.\n", 512 | " 3 Idiots\n", 513 | " (2009)\n", 514 | " ,\n", 515 | " \n", 516 | " 85.\n", 517 | " Singin' in the Rain\n", 518 | " (1952)\n", 519 | " ,\n", 520 | " \n", 521 | " 86.\n", 522 | " Tengoku to jigoku\n", 523 | " (1963)\n", 524 | " ,\n", 525 | " \n", 526 | " 87.\n", 527 | " Requiem for a Dream\n", 528 | " (2000)\n", 529 | " ,\n", 530 | " \n", 531 | " 88.\n", 532 | " Capharnaüm\n", 533 | " (2018)\n", 534 | " ,\n", 535 | " \n", 536 | " 89.\n", 537 | " Toy Story 3\n", 538 | " (2010)\n", 539 | " ,\n", 540 | " \n", 541 | " 90.\n", 542 | " Star Wars: Episode VI - Return of the Jedi\n", 543 | " (1983)\n", 544 | " ,\n", 545 | " \n", 546 | " 91.\n", 547 | " Idi i smotri\n", 548 | " (1985)\n", 549 | " ,\n", 550 | " \n", 551 | " 92.\n", 552 | " Eternal Sunshine of the Spotless Mind\n", 553 | " (2004)\n", 554 | " ,\n", 555 | " \n", 556 | " 93.\n", 557 | " 2001: A Space Odyssey\n", 558 | " (1968)\n", 559 | " ,\n", 560 | " \n", 561 | " 94.\n", 562 | " Jagten\n", 563 | " (2012)\n", 564 | " ,\n", 565 | " \n", 566 | " 95.\n", 567 | " Reservoir Dogs\n", 568 | " (1992)\n", 569 | " ,\n", 570 | " \n", 571 | " 96.\n", 572 | " Lawrence of Arabia\n", 573 | " (1962)\n", 574 | " ,\n", 575 | " \n", 576 | " 97.\n", 577 | " Citizen Kane\n", 578 | " (1941)\n", 579 | " ,\n", 580 | " \n", 581 | " 98.\n", 582 | " Ikiru\n", 583 | " (1952)\n", 584 | " ,\n", 585 | " \n", 586 | " 99.\n", 587 | " M - Eine Stadt sucht einen Mörder\n", 588 | " (1931)\n", 589 | " ,\n", 590 | " \n", 591 | " 100.\n", 592 | " North by Northwest\n", 593 | " (1959)\n", 594 | " ,\n", 595 | " \n", 596 | " 101.\n", 597 | " The Apartment\n", 598 | " (1960)\n", 599 | " ,\n", 600 | " \n", 601 | " 102.\n", 602 | " Vertigo\n", 603 | " (1958)\n", 604 | " ,\n", 605 | " \n", 606 | " 103.\n", 607 | " Le fabuleux destin d'Amélie Poulain\n", 608 | " (2001)\n", 609 | " ,\n", 610 | " \n", 611 | " 104.\n", 612 | " Double Indemnity\n", 613 | " (1944)\n", 614 | " ,\n", 615 | " \n", 616 | " 105.\n", 617 | " A Clockwork Orange\n", 618 | " (1971)\n", 619 | " ,\n", 620 | " \n", 621 | " 106.\n", 622 | " Full Metal Jacket\n", 623 | " (1987)\n", 624 | " ,\n", 625 | " \n", 626 | " 107.\n", 627 | " Scarface\n", 628 | " (1983)\n", 629 | " ,\n", 630 | " \n", 631 | " 108.\n", 632 | " Hamilton\n", 633 | " (2020)\n", 634 | " ,\n", 635 | " \n", 636 | " 109.\n", 637 | " Incendies\n", 638 | " (2010)\n", 639 | " ,\n", 640 | " \n", 641 | " 110.\n", 642 | " Heat\n", 643 | " (1995)\n", 644 | " ,\n", 645 | " \n", 646 | " 111.\n", 647 | " Up\n", 648 | " (2009)\n", 649 | " ,\n", 650 | " \n", 651 | " 112.\n", 652 | " To Kill a Mockingbird\n", 653 | " (1962)\n", 654 | " ,\n", 655 | " \n", 656 | " 113.\n", 657 | " The Sting\n", 658 | " (1973)\n", 659 | " ,\n", 660 | " \n", 661 | " 114.\n", 662 | " Jodaeiye Nader az Simin\n", 663 | " (2011)\n", 664 | " ,\n", 665 | " \n", 666 | " 115.\n", 667 | " Metropolis\n", 668 | " (1927)\n", 669 | " ,\n", 670 | " \n", 671 | " 116.\n", 672 | " Indiana Jones and the Last Crusade\n", 673 | " (1989)\n", 674 | " ,\n", 675 | " \n", 676 | " 117.\n", 677 | " Die Hard\n", 678 | " (1988)\n", 679 | " ,\n", 680 | " \n", 681 | " 118.\n", 682 | " L.A. Confidential\n", 683 | " (1997)\n", 684 | " ,\n", 685 | " \n", 686 | " 119.\n", 687 | " Snatch\n", 688 | " (2000)\n", 689 | " ,\n", 690 | " \n", 691 | " 120.\n", 692 | " Ladri di biciclette\n", 693 | " (1948)\n", 694 | " ,\n", 695 | " \n", 696 | " 121.\n", 697 | " Taare Zameen Par\n", 698 | " (2007)\n", 699 | " ,\n", 700 | " \n", 701 | " 122.\n", 702 | " Taxi Driver\n", 703 | " (1976)\n", 704 | " ,\n", 705 | " \n", 706 | " 123.\n", 707 | " 1917\n", 708 | " (2019)\n", 709 | " ,\n", 710 | " \n", 711 | " 124.\n", 712 | " Dangal\n", 713 | " (2016)\n", 714 | " ,\n", 715 | " \n", 716 | " 125.\n", 717 | " Der Untergang\n", 718 | " (2004)\n", 719 | " ,\n", 720 | " \n", 721 | " 126.\n", 722 | " Top Gun: Maverick\n", 723 | " (2022)\n", 724 | " ,\n", 725 | " \n", 726 | " 127.\n", 727 | " Per qualche dollaro in più\n", 728 | " (1965)\n", 729 | " ,\n", 730 | " \n", 731 | " 128.\n", 732 | " Batman Begins\n", 733 | " (2005)\n", 734 | " ,\n", 735 | " \n", 736 | " 129.\n", 737 | " The Kid\n", 738 | " (1921)\n", 739 | " ,\n", 740 | " \n", 741 | " 130.\n", 742 | " Some Like It Hot\n", 743 | " (1959)\n", 744 | " ,\n", 745 | " \n", 746 | " 131.\n", 747 | " The Father\n", 748 | " (2020)\n", 749 | " ,\n", 750 | " \n", 751 | " 132.\n", 752 | " The Wolf of Wall Street\n", 753 | " (2013)\n", 754 | " ,\n", 755 | " \n", 756 | " 133.\n", 757 | " All About Eve\n", 758 | " (1950)\n", 759 | " ,\n", 760 | " \n", 761 | " 134.\n", 762 | " Green Book\n", 763 | " (2018)\n", 764 | " ,\n", 765 | " \n", 766 | " 135.\n", 767 | " Judgment at Nuremberg\n", 768 | " (1961)\n", 769 | " ,\n", 770 | " \n", 771 | " 136.\n", 772 | " The Truman Show\n", 773 | " (1998)\n", 774 | " ,\n", 775 | " \n", 776 | " 137.\n", 777 | " Ran\n", 778 | " (1985)\n", 779 | " ,\n", 780 | " \n", 781 | " 138.\n", 782 | " Casino\n", 783 | " (1995)\n", 784 | " ,\n", 785 | " \n", 786 | " 139.\n", 787 | " There Will Be Blood\n", 788 | " (2007)\n", 789 | " ,\n", 790 | " \n", 791 | " 140.\n", 792 | " Pan's Labyrinth\n", 793 | " (2006)\n", 794 | " ,\n", 795 | " \n", 796 | " 141.\n", 797 | " Shutter Island\n", 798 | " (2010)\n", 799 | " ,\n", 800 | " \n", 801 | " 142.\n", 802 | " Unforgiven\n", 803 | " (1992)\n", 804 | " ,\n", 805 | " \n", 806 | " 143.\n", 807 | " The Sixth Sense\n", 808 | " (1999)\n", 809 | " ,\n", 810 | " \n", 811 | " 144.\n", 812 | " Jurassic Park\n", 813 | " (1993)\n", 814 | " ,\n", 815 | " \n", 816 | " 145.\n", 817 | " A Beautiful Mind\n", 818 | " (2001)\n", 819 | " ,\n", 820 | " \n", 821 | " 146.\n", 822 | " The Treasure of the Sierra Madre\n", 823 | " (1948)\n", 824 | " ,\n", 825 | " \n", 826 | " 147.\n", 827 | " Yôjinbô\n", 828 | " (1961)\n", 829 | " ,\n", 830 | " \n", 831 | " 148.\n", 832 | " No Country for Old Men\n", 833 | " (2007)\n", 834 | " ,\n", 835 | " \n", 836 | " 149.\n", 837 | " Monty Python and the Holy Grail\n", 838 | " (1975)\n", 839 | " ,\n", 840 | " \n", 841 | " 150.\n", 842 | " Kill Bill: Vol. 1\n", 843 | " (2003)\n", 844 | " ,\n", 845 | " \n", 846 | " 151.\n", 847 | " The Great Escape\n", 848 | " (1963)\n", 849 | " ,\n", 850 | " \n", 851 | " 152.\n", 852 | " The Thing\n", 853 | " (1982)\n", 854 | " ,\n", 855 | " \n", 856 | " 153.\n", 857 | " Rashômon\n", 858 | " (1950)\n", 859 | " ,\n", 860 | " \n", 861 | " 154.\n", 862 | " Finding Nemo\n", 863 | " (2003)\n", 864 | " ,\n", 865 | " \n", 866 | " 155.\n", 867 | " Spider-Man: No Way Home\n", 868 | " (2021)\n", 869 | " ,\n", 870 | " \n", 871 | " 156.\n", 872 | " The Elephant Man\n", 873 | " (1980)\n", 874 | " ,\n", 875 | " \n", 876 | " 157.\n", 877 | " Chinatown\n", 878 | " (1974)\n", 879 | " ,\n", 880 | " \n", 881 | " 158.\n", 882 | " V for Vendetta\n", 883 | " (2005)\n", 884 | " ,\n", 885 | " \n", 886 | " 159.\n", 887 | " Raging Bull\n", 888 | " (1980)\n", 889 | " ,\n", 890 | " \n", 891 | " 160.\n", 892 | " Gone with the Wind\n", 893 | " (1939)\n", 894 | " ,\n", 895 | " \n", 896 | " 161.\n", 897 | " Dial M for Murder\n", 898 | " (1954)\n", 899 | " ,\n", 900 | " \n", 901 | " 162.\n", 902 | " Hauru no ugoku shiro\n", 903 | " (2004)\n", 904 | " ,\n", 905 | " \n", 906 | " 163.\n", 907 | " Lock, Stock and Two Smoking Barrels\n", 908 | " (1998)\n", 909 | " ,\n", 910 | " \n", 911 | " 164.\n", 912 | " Inside Out\n", 913 | " (2015)\n", 914 | " ,\n", 915 | " \n", 916 | " 165.\n", 917 | " El secreto de sus ojos\n", 918 | " (2009)\n", 919 | " ,\n", 920 | " \n", 921 | " 166.\n", 922 | " Three Billboards Outside Ebbing, Missouri\n", 923 | " (2017)\n", 924 | " ,\n", 925 | " \n", 926 | " 167.\n", 927 | " Prisoners\n", 928 | " (2013)\n", 929 | " ,\n", 930 | " \n", 931 | " 168.\n", 932 | " The Bridge on the River Kwai\n", 933 | " (1957)\n", 934 | " ,\n", 935 | " \n", 936 | " 169.\n", 937 | " Trainspotting\n", 938 | " (1996)\n", 939 | " ,\n", 940 | " \n", 941 | " 170.\n", 942 | " Fargo\n", 943 | " (1996)\n", 944 | " ,\n", 945 | " \n", 946 | " 171.\n", 947 | " Warrior\n", 948 | " (2011)\n", 949 | " ,\n", 950 | " \n", 951 | " 172.\n", 952 | " Gran Torino\n", 953 | " (2008)\n", 954 | " ,\n", 955 | " \n", 956 | " 173.\n", 957 | " Catch Me If You Can\n", 958 | " (2002)\n", 959 | " ,\n", 960 | " \n", 961 | " 174.\n", 962 | " Tonari no Totoro\n", 963 | " (1988)\n", 964 | " ,\n", 965 | " \n", 966 | " 175.\n", 967 | " Million Dollar Baby\n", 968 | " (2004)\n", 969 | " ,\n", 970 | " \n", 971 | " 176.\n", 972 | " Bacheha-Ye aseman\n", 973 | " (1997)\n", 974 | " ,\n", 975 | " \n", 976 | " 177.\n", 977 | " Klaus\n", 978 | " (2019)\n", 979 | " ,\n", 980 | " \n", 981 | " 178.\n", 982 | " Harry Potter and the Deathly Hallows: Part 2\n", 983 | " (2011)\n", 984 | " ,\n", 985 | " \n", 986 | " 179.\n", 987 | " Blade Runner\n", 988 | " (1982)\n", 989 | " ,\n", 990 | " \n", 991 | " 180.\n", 992 | " The Gold Rush\n", 993 | " (1925)\n", 994 | " ,\n", 995 | " \n", 996 | " 181.\n", 997 | " 12 Years a Slave\n", 998 | " (2013)\n", 999 | " ,\n", 1000 | " \n", 1001 | " 182.\n", 1002 | " Before Sunrise\n", 1003 | " (1995)\n", 1004 | " ,\n", 1005 | " \n", 1006 | " 183.\n", 1007 | " The Grand Budapest Hotel\n", 1008 | " (2014)\n", 1009 | " ,\n", 1010 | " \n", 1011 | " 184.\n", 1012 | " Ben-Hur\n", 1013 | " (1959)\n", 1014 | " ,\n", 1015 | " \n", 1016 | " 185.\n", 1017 | " Gone Girl\n", 1018 | " (2014)\n", 1019 | " ,\n", 1020 | " \n", 1021 | " 186.\n", 1022 | " On the Waterfront\n", 1023 | " (1954)\n", 1024 | " ,\n", 1025 | " \n", 1026 | " 187.\n", 1027 | " Barry Lyndon\n", 1028 | " (1975)\n", 1029 | " ,\n", 1030 | " \n", 1031 | " 188.\n", 1032 | " The General\n", 1033 | " (1926)\n", 1034 | " ,\n", 1035 | " \n", 1036 | " 189.\n", 1037 | " Smultronstället\n", 1038 | " (1957)\n", 1039 | " ,\n", 1040 | " \n", 1041 | " 190.\n", 1042 | " In the Name of the Father\n", 1043 | " (1993)\n", 1044 | " ,\n", 1045 | " \n", 1046 | " 191.\n", 1047 | " The Deer Hunter\n", 1048 | " (1978)\n", 1049 | " ,\n", 1050 | " \n", 1051 | " 192.\n", 1052 | " Hacksaw Ridge\n", 1053 | " (2016)\n", 1054 | " ,\n", 1055 | " \n", 1056 | " 193.\n", 1057 | " The Third Man\n", 1058 | " (1949)\n", 1059 | " ,\n", 1060 | " \n", 1061 | " 194.\n", 1062 | " Salinui chueok\n", 1063 | " (2003)\n", 1064 | " ,\n", 1065 | " \n", 1066 | " 195.\n", 1067 | " Le salaire de la peur\n", 1068 | " (1953)\n", 1069 | " ,\n", 1070 | " \n", 1071 | " 196.\n", 1072 | " Sherlock Jr.\n", 1073 | " (1924)\n", 1074 | " ,\n", 1075 | " \n", 1076 | " 197.\n", 1077 | " Relatos salvajes\n", 1078 | " (2014)\n", 1079 | " ,\n", 1080 | " \n", 1081 | " 198.\n", 1082 | " Mr. Smith Goes to Washington\n", 1083 | " (1939)\n", 1084 | " ,\n", 1085 | " \n", 1086 | " 199.\n", 1087 | " Mad Max: Fury Road\n", 1088 | " (2015)\n", 1089 | " ,\n", 1090 | " \n", 1091 | " 200.\n", 1092 | " Dead Poets Society\n", 1093 | " (1989)\n", 1094 | " ,\n", 1095 | " \n", 1096 | " 201.\n", 1097 | " Mary and Max.\n", 1098 | " (2009)\n", 1099 | " ,\n", 1100 | " \n", 1101 | " 202.\n", 1102 | " Monsters, Inc.\n", 1103 | " (2001)\n", 1104 | " ,\n", 1105 | " \n", 1106 | " 203.\n", 1107 | " How to Train Your Dragon\n", 1108 | " (2010)\n", 1109 | " ,\n", 1110 | " \n", 1111 | " 204.\n", 1112 | " Jaws\n", 1113 | " (1975)\n", 1114 | " ,\n", 1115 | " \n", 1116 | " 205.\n", 1117 | " Guardians of the Galaxy Vol. 3\n", 1118 | " (2023)\n", 1119 | " ,\n", 1120 | " \n", 1121 | " 206.\n", 1122 | " Det sjunde inseglet\n", 1123 | " (1957)\n", 1124 | " ,\n", 1125 | " \n", 1126 | " 207.\n", 1127 | " Room\n", 1128 | " (2015)\n", 1129 | " ,\n", 1130 | " \n", 1131 | " 208.\n", 1132 | " Tôkyô monogatari\n", 1133 | " (1953)\n", 1134 | " ,\n", 1135 | " \n", 1136 | " 209.\n", 1137 | " The Big Lebowski\n", 1138 | " (1998)\n", 1139 | " ,\n", 1140 | " \n", 1141 | " 210.\n", 1142 | " Ford v Ferrari\n", 1143 | " (2019)\n", 1144 | " ,\n", 1145 | " \n", 1146 | " 211.\n", 1147 | " La passion de Jeanne d'Arc\n", 1148 | " (1928)\n", 1149 | " ,\n", 1150 | " \n", 1151 | " 212.\n", 1152 | " Hotel Rwanda\n", 1153 | " (2004)\n", 1154 | " ,\n", 1155 | " \n", 1156 | " 213.\n", 1157 | " Ratatouille\n", 1158 | " (2007)\n", 1159 | " ,\n", 1160 | " \n", 1161 | " 214.\n", 1162 | " Rocky\n", 1163 | " (1976)\n", 1164 | " ,\n", 1165 | " \n", 1166 | " 215.\n", 1167 | " Platoon\n", 1168 | " (1986)\n", 1169 | " ,\n", 1170 | " \n", 1171 | " 216.\n", 1172 | " Logan\n", 1173 | " (2017)\n", 1174 | " ,\n", 1175 | " \n", 1176 | " 217.\n", 1177 | " Spotlight\n", 1178 | " (2015)\n", 1179 | " ,\n", 1180 | " \n", 1181 | " 218.\n", 1182 | " The Terminator\n", 1183 | " (1984)\n", 1184 | " ,\n", 1185 | " \n", 1186 | " 219.\n", 1187 | " Jai Bhim\n", 1188 | " (2021)\n", 1189 | " ,\n", 1190 | " \n", 1191 | " 220.\n", 1192 | " Before Sunset\n", 1193 | " (2004)\n", 1194 | " ,\n", 1195 | " \n", 1196 | " 221.\n", 1197 | " Rush\n", 1198 | " (2013)\n", 1199 | " ,\n", 1200 | " \n", 1201 | " 222.\n", 1202 | " Network\n", 1203 | " (1976)\n", 1204 | " ,\n", 1205 | " \n", 1206 | " 223.\n", 1207 | " Stand by Me\n", 1208 | " (1986)\n", 1209 | " ,\n", 1210 | " \n", 1211 | " 224.\n", 1212 | " The Best Years of Our Lives\n", 1213 | " (1946)\n", 1214 | " ,\n", 1215 | " \n", 1216 | " 225.\n", 1217 | " Into the Wild\n", 1218 | " (2007)\n", 1219 | " ,\n", 1220 | " \n", 1221 | " 226.\n", 1222 | " The Wizard of Oz\n", 1223 | " (1939)\n", 1224 | " ,\n", 1225 | " \n", 1226 | " 227.\n", 1227 | " La haine\n", 1228 | " (1995)\n", 1229 | " ,\n", 1230 | " \n", 1231 | " 228.\n", 1232 | " The Incredibles\n", 1233 | " (2004)\n", 1234 | " ,\n", 1235 | " \n", 1236 | " 229.\n", 1237 | " The Exorcist\n", 1238 | " (1973)\n", 1239 | " ,\n", 1240 | " \n", 1241 | " 230.\n", 1242 | " Pirates of the Caribbean: The Curse of the Black Pearl\n", 1243 | " (2003)\n", 1244 | " ,\n", 1245 | " \n", 1246 | " 231.\n", 1247 | " To Be or Not to Be\n", 1248 | " (1942)\n", 1249 | " ,\n", 1250 | " \n", 1251 | " 232.\n", 1252 | " Groundhog Day\n", 1253 | " (1993)\n", 1254 | " ,\n", 1255 | " \n", 1256 | " 233.\n", 1257 | " Babam ve Oglum\n", 1258 | " (2005)\n", 1259 | " ,\n", 1260 | " \n", 1261 | " 234.\n", 1262 | " La battaglia di Algeri\n", 1263 | " (1966)\n", 1264 | " ,\n", 1265 | " \n", 1266 | " 235.\n", 1267 | " The Grapes of Wrath\n", 1268 | " (1940)\n", 1269 | " ,\n", 1270 | " \n", 1271 | " 236.\n", 1272 | " Hachi: A Dog's Tale\n", 1273 | " (2009)\n", 1274 | " ,\n", 1275 | " \n", 1276 | " 237.\n", 1277 | " Ah-ga-ssi\n", 1278 | " (2016)\n", 1279 | " ,\n", 1280 | " \n", 1281 | " 238.\n", 1282 | " Pather Panchali\n", 1283 | " (1955)\n", 1284 | " ,\n", 1285 | " \n", 1286 | " 239.\n", 1287 | " Amores perros\n", 1288 | " (2000)\n", 1289 | " ,\n", 1290 | " \n", 1291 | " 240.\n", 1292 | " Rebecca\n", 1293 | " (1940)\n", 1294 | " ,\n", 1295 | " \n", 1296 | " 241.\n", 1297 | " Cool Hand Luke\n", 1298 | " (1967)\n", 1299 | " ,\n", 1300 | " \n", 1301 | " 242.\n", 1302 | " The Sound of Music\n", 1303 | " (1965)\n", 1304 | " ,\n", 1305 | " \n", 1306 | " 243.\n", 1307 | " It Happened One Night\n", 1308 | " (1934)\n", 1309 | " ,\n", 1310 | " \n", 1311 | " 244.\n", 1312 | " Les quatre cents coups\n", 1313 | " (1959)\n", 1314 | " ,\n", 1315 | " \n", 1316 | " 245.\n", 1317 | " Persona\n", 1318 | " (1966)\n", 1319 | " ,\n", 1320 | " \n", 1321 | " 246.\n", 1322 | " The Help\n", 1323 | " (2011)\n", 1324 | " ,\n", 1325 | " \n", 1326 | " 247.\n", 1327 | " The Iron Giant\n", 1328 | " (1999)\n", 1329 | " ,\n", 1330 | " \n", 1331 | " 248.\n", 1332 | " Life of Brian\n", 1333 | " (1979)\n", 1334 | " ,\n", 1335 | " \n", 1336 | " 249.\n", 1337 | " Aladdin\n", 1338 | " (1992)\n", 1339 | " ,\n", 1340 | " \n", 1341 | " 250.\n", 1342 | " Dances with Wolves\n", 1343 | " (1990)\n", 1344 | " ]" 1345 | ] 1346 | }, 1347 | "execution_count": 9, 1348 | "metadata": {}, 1349 | "output_type": "execute_result" 1350 | } 1351 | ], 1352 | "source": [ 1353 | "# scrap movie names\n", 1354 | "scraped_movies = soup.find_all('td', class_='titleColumn')\n", 1355 | "scraped_movies" 1356 | ] 1357 | }, 1358 | { 1359 | "cell_type": "code", 1360 | "execution_count": 10, 1361 | "metadata": {}, 1362 | "outputs": [ 1363 | { 1364 | "data": { 1365 | "text/plain": [ 1366 | "['1. The Shawshank Redemption(1994)',\n", 1367 | " '2. The Godfather(1972)',\n", 1368 | " '3. The Dark Knight(2008)',\n", 1369 | " '4. The Godfather Part II(1974)',\n", 1370 | " '5. 12 Angry Men(1957)',\n", 1371 | " \"6. Schindler's List(1993)\",\n", 1372 | " '7. The Lord of the Rings: The Return of the King(2003)',\n", 1373 | " '8. Pulp Fiction(1994)',\n", 1374 | " '9. The Lord of the Rings: The Fellowship of the Ring(2001)',\n", 1375 | " '10. Il buono, il brutto, il cattivo(1966)',\n", 1376 | " '11. Forrest Gump(1994)',\n", 1377 | " '12. Fight Club(1999)',\n", 1378 | " '13. Spider-Man: Across the Spider-Verse(2023)',\n", 1379 | " '14. The Lord of the Rings: The Two Towers(2002)',\n", 1380 | " '15. Inception(2010)',\n", 1381 | " '16. Star Wars: Episode V - The Empire Strikes Back(1980)',\n", 1382 | " '17. The Matrix(1999)',\n", 1383 | " '18. GoodFellas(1990)',\n", 1384 | " \"19. One Flew Over the Cuckoo's Nest(1975)\",\n", 1385 | " '20. Se7en(1995)',\n", 1386 | " \"21. It's a Wonderful Life(1946)\",\n", 1387 | " '22. Shichinin no samurai(1954)',\n", 1388 | " '23. The Silence of the Lambs(1991)',\n", 1389 | " '24. Saving Private Ryan(1998)',\n", 1390 | " '25. Cidade de Deus(2002)',\n", 1391 | " '26. Interstellar(2014)',\n", 1392 | " '27. La vita è bella(1997)',\n", 1393 | " '28. The Green Mile(1999)',\n", 1394 | " '29. Star Wars(1977)',\n", 1395 | " '30. Terminator 2: Judgment Day(1991)',\n", 1396 | " '31. Back to the Future(1985)',\n", 1397 | " '32. Sen to Chihiro no kamikakushi(2001)',\n", 1398 | " '33. The Pianist(2002)',\n", 1399 | " '34. Psycho(1960)',\n", 1400 | " '35. Gisaengchung(2019)',\n", 1401 | " '36. Léon(1994)',\n", 1402 | " '37. The Lion King(1994)',\n", 1403 | " '38. Gladiator(2000)',\n", 1404 | " '39. American History X(1998)',\n", 1405 | " '40. The Departed(2006)',\n", 1406 | " '41. Whiplash(2014)',\n", 1407 | " '42. The Prestige(2006)',\n", 1408 | " '43. The Usual Suspects(1995)',\n", 1409 | " '44. Casablanca(1942)',\n", 1410 | " '45. Hotaru no haka(1988)',\n", 1411 | " '46. Seppuku(1962)',\n", 1412 | " '47. The Intouchables(2011)',\n", 1413 | " '48. Modern Times(1936)',\n", 1414 | " '49. Once Upon a Time in the West(1968)',\n", 1415 | " '50. Nuovo Cinema Paradiso(1988)',\n", 1416 | " '51. Rear Window(1954)',\n", 1417 | " '52. Alien(1979)',\n", 1418 | " '53. City Lights(1931)',\n", 1419 | " '54. Apocalypse Now(1979)',\n", 1420 | " '55. Memento(2000)',\n", 1421 | " '56. Django Unchained(2012)',\n", 1422 | " '57. Raiders of the Lost Ark(1981)',\n", 1423 | " '58. WALL·E(2008)',\n", 1424 | " '59. The Lives of Others(2006)',\n", 1425 | " '60. Sunset Blvd.(1950)',\n", 1426 | " '61. Paths of Glory(1957)',\n", 1427 | " '62. Avengers: Infinity War(2018)',\n", 1428 | " '63. The Shining(1980)',\n", 1429 | " '64. The Great Dictator(1940)',\n", 1430 | " '65. Witness for the Prosecution(1957)',\n", 1431 | " '66. Spider-Man: Into the Spider-Verse(2018)',\n", 1432 | " '67. Aliens(1986)',\n", 1433 | " '68. American Beauty(1999)',\n", 1434 | " '69. The Dark Knight Rises(2012)',\n", 1435 | " '70. Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb(1964)',\n", 1436 | " '71. Inglourious Basterds(2009)',\n", 1437 | " '72. Oldeuboi(2003)',\n", 1438 | " '73. Coco(2017)',\n", 1439 | " '74. Amadeus(1984)',\n", 1440 | " '75. Toy Story(1995)',\n", 1441 | " '76. Braveheart(1995)',\n", 1442 | " '77. Das Boot(1981)',\n", 1443 | " '78. Joker(2019)',\n", 1444 | " '79. Avengers: Endgame(2019)',\n", 1445 | " '80. Mononoke-hime(1997)',\n", 1446 | " '81. Good Will Hunting(1997)',\n", 1447 | " '82. Once Upon a Time in America(1984)',\n", 1448 | " '83. Kimi no na wa.(2016)',\n", 1449 | " '84. 3 Idiots(2009)',\n", 1450 | " \"85. Singin' in the Rain(1952)\",\n", 1451 | " '86. Tengoku to jigoku(1963)',\n", 1452 | " '87. Requiem for a Dream(2000)',\n", 1453 | " '88. Capharnaüm(2018)',\n", 1454 | " '89. Toy Story 3(2010)',\n", 1455 | " '90. Star Wars: Episode VI - Return of the Jedi(1983)',\n", 1456 | " '91. Idi i smotri(1985)',\n", 1457 | " '92. Eternal Sunshine of the Spotless Mind(2004)',\n", 1458 | " '93. 2001: A Space Odyssey(1968)',\n", 1459 | " '94. Jagten(2012)',\n", 1460 | " '95. Reservoir Dogs(1992)',\n", 1461 | " '96. Lawrence of Arabia(1962)',\n", 1462 | " '97. Citizen Kane(1941)',\n", 1463 | " '98. Ikiru(1952)',\n", 1464 | " '99. M - Eine Stadt sucht einen Mörder(1931)',\n", 1465 | " '100. North by Northwest(1959)',\n", 1466 | " '101. The Apartment(1960)',\n", 1467 | " '102. Vertigo(1958)',\n", 1468 | " \"103. Le fabuleux destin d'Amélie Poulain(2001)\",\n", 1469 | " '104. Double Indemnity(1944)',\n", 1470 | " '105. A Clockwork Orange(1971)',\n", 1471 | " '106. Full Metal Jacket(1987)',\n", 1472 | " '107. Scarface(1983)',\n", 1473 | " '108. Hamilton(2020)',\n", 1474 | " '109. Incendies(2010)',\n", 1475 | " '110. Heat(1995)',\n", 1476 | " '111. Up(2009)',\n", 1477 | " '112. To Kill a Mockingbird(1962)',\n", 1478 | " '113. The Sting(1973)',\n", 1479 | " '114. Jodaeiye Nader az Simin(2011)',\n", 1480 | " '115. Metropolis(1927)',\n", 1481 | " '116. Indiana Jones and the Last Crusade(1989)',\n", 1482 | " '117. Die Hard(1988)',\n", 1483 | " '118. L.A. Confidential(1997)',\n", 1484 | " '119. Snatch(2000)',\n", 1485 | " '120. Ladri di biciclette(1948)',\n", 1486 | " '121. Taare Zameen Par(2007)',\n", 1487 | " '122. Taxi Driver(1976)',\n", 1488 | " '123. 1917(2019)',\n", 1489 | " '124. Dangal(2016)',\n", 1490 | " '125. Der Untergang(2004)',\n", 1491 | " '126. Top Gun: Maverick(2022)',\n", 1492 | " '127. Per qualche dollaro in più(1965)',\n", 1493 | " '128. Batman Begins(2005)',\n", 1494 | " '129. The Kid(1921)',\n", 1495 | " '130. Some Like It Hot(1959)',\n", 1496 | " '131. The Father(2020)',\n", 1497 | " '132. The Wolf of Wall Street(2013)',\n", 1498 | " '133. All About Eve(1950)',\n", 1499 | " '134. Green Book(2018)',\n", 1500 | " '135. Judgment at Nuremberg(1961)',\n", 1501 | " '136. The Truman Show(1998)',\n", 1502 | " '137. Ran(1985)',\n", 1503 | " '138. Casino(1995)',\n", 1504 | " '139. There Will Be Blood(2007)',\n", 1505 | " \"140. Pan's Labyrinth(2006)\",\n", 1506 | " '141. Shutter Island(2010)',\n", 1507 | " '142. Unforgiven(1992)',\n", 1508 | " '143. The Sixth Sense(1999)',\n", 1509 | " '144. Jurassic Park(1993)',\n", 1510 | " '145. A Beautiful Mind(2001)',\n", 1511 | " '146. The Treasure of the Sierra Madre(1948)',\n", 1512 | " '147. Yôjinbô(1961)',\n", 1513 | " '148. No Country for Old Men(2007)',\n", 1514 | " '149. Monty Python and the Holy Grail(1975)',\n", 1515 | " '150. Kill Bill: Vol. 1(2003)',\n", 1516 | " '151. The Great Escape(1963)',\n", 1517 | " '152. The Thing(1982)',\n", 1518 | " '153. Rashômon(1950)',\n", 1519 | " '154. Finding Nemo(2003)',\n", 1520 | " '155. Spider-Man: No Way Home(2021)',\n", 1521 | " '156. The Elephant Man(1980)',\n", 1522 | " '157. Chinatown(1974)',\n", 1523 | " '158. V for Vendetta(2005)',\n", 1524 | " '159. Raging Bull(1980)',\n", 1525 | " '160. Gone with the Wind(1939)',\n", 1526 | " '161. Dial M for Murder(1954)',\n", 1527 | " '162. Hauru no ugoku shiro(2004)',\n", 1528 | " '163. Lock, Stock and Two Smoking Barrels(1998)',\n", 1529 | " '164. Inside Out(2015)',\n", 1530 | " '165. El secreto de sus ojos(2009)',\n", 1531 | " '166. Three Billboards Outside Ebbing, Missouri(2017)',\n", 1532 | " '167. Prisoners(2013)',\n", 1533 | " '168. The Bridge on the River Kwai(1957)',\n", 1534 | " '169. Trainspotting(1996)',\n", 1535 | " '170. Fargo(1996)',\n", 1536 | " '171. Warrior(2011)',\n", 1537 | " '172. Gran Torino(2008)',\n", 1538 | " '173. Catch Me If You Can(2002)',\n", 1539 | " '174. Tonari no Totoro(1988)',\n", 1540 | " '175. Million Dollar Baby(2004)',\n", 1541 | " '176. Bacheha-Ye aseman(1997)',\n", 1542 | " '177. Klaus(2019)',\n", 1543 | " '178. Harry Potter and the Deathly Hallows: Part 2(2011)',\n", 1544 | " '179. Blade Runner(1982)',\n", 1545 | " '180. The Gold Rush(1925)',\n", 1546 | " '181. 12 Years a Slave(2013)',\n", 1547 | " '182. Before Sunrise(1995)',\n", 1548 | " '183. The Grand Budapest Hotel(2014)',\n", 1549 | " '184. Ben-Hur(1959)',\n", 1550 | " '185. Gone Girl(2014)',\n", 1551 | " '186. On the Waterfront(1954)',\n", 1552 | " '187. Barry Lyndon(1975)',\n", 1553 | " '188. The General(1926)',\n", 1554 | " '189. Smultronstället(1957)',\n", 1555 | " '190. In the Name of the Father(1993)',\n", 1556 | " '191. The Deer Hunter(1978)',\n", 1557 | " '192. Hacksaw Ridge(2016)',\n", 1558 | " '193. The Third Man(1949)',\n", 1559 | " '194. Salinui chueok(2003)',\n", 1560 | " '195. Le salaire de la peur(1953)',\n", 1561 | " '196. Sherlock Jr.(1924)',\n", 1562 | " '197. Relatos salvajes(2014)',\n", 1563 | " '198. Mr. Smith Goes to Washington(1939)',\n", 1564 | " '199. Mad Max: Fury Road(2015)',\n", 1565 | " '200. Dead Poets Society(1989)',\n", 1566 | " '201. Mary and Max.(2009)',\n", 1567 | " '202. Monsters, Inc.(2001)',\n", 1568 | " '203. How to Train Your Dragon(2010)',\n", 1569 | " '204. Jaws(1975)',\n", 1570 | " '205. Guardians of the Galaxy Vol. 3(2023)',\n", 1571 | " '206. Det sjunde inseglet(1957)',\n", 1572 | " '207. Room(2015)',\n", 1573 | " '208. Tôkyô monogatari(1953)',\n", 1574 | " '209. The Big Lebowski(1998)',\n", 1575 | " '210. Ford v Ferrari(2019)',\n", 1576 | " \"211. La passion de Jeanne d'Arc(1928)\",\n", 1577 | " '212. Hotel Rwanda(2004)',\n", 1578 | " '213. Ratatouille(2007)',\n", 1579 | " '214. Rocky(1976)',\n", 1580 | " '215. Platoon(1986)',\n", 1581 | " '216. Logan(2017)',\n", 1582 | " '217. Spotlight(2015)',\n", 1583 | " '218. The Terminator(1984)',\n", 1584 | " '219. Jai Bhim(2021)',\n", 1585 | " '220. Before Sunset(2004)',\n", 1586 | " '221. Rush(2013)',\n", 1587 | " '222. Network(1976)',\n", 1588 | " '223. Stand by Me(1986)',\n", 1589 | " '224. The Best Years of Our Lives(1946)',\n", 1590 | " '225. Into the Wild(2007)',\n", 1591 | " '226. The Wizard of Oz(1939)',\n", 1592 | " '227. La haine(1995)',\n", 1593 | " '228. The Incredibles(2004)',\n", 1594 | " '229. The Exorcist(1973)',\n", 1595 | " '230. Pirates of the Caribbean: The Curse of the Black Pearl(2003)',\n", 1596 | " '231. To Be or Not to Be(1942)',\n", 1597 | " '232. Groundhog Day(1993)',\n", 1598 | " '233. Babam ve Oglum(2005)',\n", 1599 | " '234. La battaglia di Algeri(1966)',\n", 1600 | " '235. The Grapes of Wrath(1940)',\n", 1601 | " \"236. Hachi: A Dog's Tale(2009)\",\n", 1602 | " '237. Ah-ga-ssi(2016)',\n", 1603 | " '238. Pather Panchali(1955)',\n", 1604 | " '239. Amores perros(2000)',\n", 1605 | " '240. Rebecca(1940)',\n", 1606 | " '241. Cool Hand Luke(1967)',\n", 1607 | " '242. The Sound of Music(1965)',\n", 1608 | " '243. It Happened One Night(1934)',\n", 1609 | " '244. Les quatre cents coups(1959)',\n", 1610 | " '245. Persona(1966)',\n", 1611 | " '246. The Help(2011)',\n", 1612 | " '247. The Iron Giant(1999)',\n", 1613 | " '248. Life of Brian(1979)',\n", 1614 | " '249. Aladdin(1992)',\n", 1615 | " '250. Dances with Wolves(1990)']" 1616 | ] 1617 | }, 1618 | "execution_count": 10, 1619 | "metadata": {}, 1620 | "output_type": "execute_result" 1621 | } 1622 | ], 1623 | "source": [ 1624 | "# parse movie names\n", 1625 | "movies = []\n", 1626 | "for movie in scraped_movies:\n", 1627 | " movie = movie.get_text().replace('\\n', \"\")\n", 1628 | " movie = movie.strip(\" \")\n", 1629 | " movies.append(movie)\n", 1630 | "movies" 1631 | ] 1632 | }, 1633 | { 1634 | "cell_type": "code", 1635 | "execution_count": 11, 1636 | "metadata": {}, 1637 | "outputs": [ 1638 | { 1639 | "data": { 1640 | "text/plain": [ 1641 | "[\n", 1642 | " 9.2\n", 1643 | " ,\n", 1644 | " \n", 1645 | " 9.2\n", 1646 | " ,\n", 1647 | " \n", 1648 | " 9.0\n", 1649 | " ,\n", 1650 | " \n", 1651 | " 9.0\n", 1652 | " ,\n", 1653 | " \n", 1654 | " 9.0\n", 1655 | " ,\n", 1656 | " \n", 1657 | " 8.9\n", 1658 | " ,\n", 1659 | " \n", 1660 | " 8.9\n", 1661 | " ,\n", 1662 | " \n", 1663 | " 8.8\n", 1664 | " ,\n", 1665 | " \n", 1666 | " 8.8\n", 1667 | " ,\n", 1668 | " \n", 1669 | " 8.8\n", 1670 | " ,\n", 1671 | " \n", 1672 | " 8.8\n", 1673 | " ,\n", 1674 | " \n", 1675 | " 8.7\n", 1676 | " ,\n", 1677 | " \n", 1678 | " 8.7\n", 1679 | " ,\n", 1680 | " \n", 1681 | " 8.7\n", 1682 | " ,\n", 1683 | " \n", 1684 | " 8.7\n", 1685 | " ,\n", 1686 | " \n", 1687 | " 8.7\n", 1688 | " ,\n", 1689 | " \n", 1690 | " 8.7\n", 1691 | " ,\n", 1692 | " \n", 1693 | " 8.7\n", 1694 | " ,\n", 1695 | " \n", 1696 | " 8.6\n", 1697 | " ,\n", 1698 | " \n", 1699 | " 8.6\n", 1700 | " ,\n", 1701 | " \n", 1702 | " 8.6\n", 1703 | " ,\n", 1704 | " \n", 1705 | " 8.6\n", 1706 | " ,\n", 1707 | " \n", 1708 | " 8.6\n", 1709 | " ,\n", 1710 | " \n", 1711 | " 8.6\n", 1712 | " ,\n", 1713 | " \n", 1714 | " 8.6\n", 1715 | " ,\n", 1716 | " \n", 1717 | " 8.6\n", 1718 | " ,\n", 1719 | " \n", 1720 | " 8.6\n", 1721 | " ,\n", 1722 | " \n", 1723 | " 8.6\n", 1724 | " ,\n", 1725 | " \n", 1726 | " 8.5\n", 1727 | " ,\n", 1728 | " \n", 1729 | " 8.5\n", 1730 | " ,\n", 1731 | " \n", 1732 | " 8.5\n", 1733 | " ,\n", 1734 | " \n", 1735 | " 8.5\n", 1736 | " ,\n", 1737 | " \n", 1738 | " 8.5\n", 1739 | " ,\n", 1740 | " \n", 1741 | " 8.5\n", 1742 | " ,\n", 1743 | " \n", 1744 | " 8.5\n", 1745 | " ,\n", 1746 | " \n", 1747 | " 8.5\n", 1748 | " ,\n", 1749 | " \n", 1750 | " 8.5\n", 1751 | " ,\n", 1752 | " \n", 1753 | " 8.5\n", 1754 | " ,\n", 1755 | " \n", 1756 | " 8.5\n", 1757 | " ,\n", 1758 | " \n", 1759 | " 8.5\n", 1760 | " ,\n", 1761 | " \n", 1762 | " 8.5\n", 1763 | " ,\n", 1764 | " \n", 1765 | " 8.5\n", 1766 | " ,\n", 1767 | " \n", 1768 | " 8.5\n", 1769 | " ,\n", 1770 | " \n", 1771 | " 8.5\n", 1772 | " ,\n", 1773 | " \n", 1774 | " 8.5\n", 1775 | " ,\n", 1776 | " \n", 1777 | " 8.5\n", 1778 | " ,\n", 1779 | " \n", 1780 | " 8.5\n", 1781 | " ,\n", 1782 | " \n", 1783 | " 8.4\n", 1784 | " ,\n", 1785 | " \n", 1786 | " 8.4\n", 1787 | " ,\n", 1788 | " \n", 1789 | " 8.4\n", 1790 | " ,\n", 1791 | " \n", 1792 | " 8.4\n", 1793 | " ,\n", 1794 | " \n", 1795 | " 8.4\n", 1796 | " ,\n", 1797 | " \n", 1798 | " 8.4\n", 1799 | " ,\n", 1800 | " \n", 1801 | " 8.4\n", 1802 | " ,\n", 1803 | " \n", 1804 | " 8.4\n", 1805 | " ,\n", 1806 | " \n", 1807 | " 8.4\n", 1808 | " ,\n", 1809 | " \n", 1810 | " 8.4\n", 1811 | " ,\n", 1812 | " \n", 1813 | " 8.4\n", 1814 | " ,\n", 1815 | " \n", 1816 | " 8.4\n", 1817 | " ,\n", 1818 | " \n", 1819 | " 8.4\n", 1820 | " ,\n", 1821 | " \n", 1822 | " 8.4\n", 1823 | " ,\n", 1824 | " \n", 1825 | " 8.4\n", 1826 | " ,\n", 1827 | " \n", 1828 | " 8.4\n", 1829 | " ,\n", 1830 | " \n", 1831 | " 8.4\n", 1832 | " ,\n", 1833 | " \n", 1834 | " 8.4\n", 1835 | " ,\n", 1836 | " \n", 1837 | " 8.4\n", 1838 | " ,\n", 1839 | " \n", 1840 | " 8.3\n", 1841 | " ,\n", 1842 | " \n", 1843 | " 8.3\n", 1844 | " ,\n", 1845 | " \n", 1846 | " 8.3\n", 1847 | " ,\n", 1848 | " \n", 1849 | " 8.3\n", 1850 | " ,\n", 1851 | " \n", 1852 | " 8.3\n", 1853 | " ,\n", 1854 | " \n", 1855 | " 8.3\n", 1856 | " ,\n", 1857 | " \n", 1858 | " 8.3\n", 1859 | " ,\n", 1860 | " \n", 1861 | " 8.3\n", 1862 | " ,\n", 1863 | " \n", 1864 | " 8.3\n", 1865 | " ,\n", 1866 | " \n", 1867 | " 8.3\n", 1868 | " ,\n", 1869 | " \n", 1870 | " 8.3\n", 1871 | " ,\n", 1872 | " \n", 1873 | " 8.3\n", 1874 | " ,\n", 1875 | " \n", 1876 | " 8.3\n", 1877 | " ,\n", 1878 | " \n", 1879 | " 8.3\n", 1880 | " ,\n", 1881 | " \n", 1882 | " 8.3\n", 1883 | " ,\n", 1884 | " \n", 1885 | " 8.3\n", 1886 | " ,\n", 1887 | " \n", 1888 | " 8.3\n", 1889 | " ,\n", 1890 | " \n", 1891 | " 8.3\n", 1892 | " ,\n", 1893 | " \n", 1894 | " 8.3\n", 1895 | " ,\n", 1896 | " \n", 1897 | " 8.3\n", 1898 | " ,\n", 1899 | " \n", 1900 | " 8.3\n", 1901 | " ,\n", 1902 | " \n", 1903 | " 8.3\n", 1904 | " ,\n", 1905 | " \n", 1906 | " 8.3\n", 1907 | " ,\n", 1908 | " \n", 1909 | " 8.3\n", 1910 | " ,\n", 1911 | " \n", 1912 | " 8.3\n", 1913 | " ,\n", 1914 | " \n", 1915 | " 8.3\n", 1916 | " ,\n", 1917 | " \n", 1918 | " 8.3\n", 1919 | " ,\n", 1920 | " \n", 1921 | " 8.3\n", 1922 | " ,\n", 1923 | " \n", 1924 | " 8.3\n", 1925 | " ,\n", 1926 | " \n", 1927 | " 8.2\n", 1928 | " ,\n", 1929 | " \n", 1930 | " 8.2\n", 1931 | " ,\n", 1932 | " \n", 1933 | " 8.2\n", 1934 | " ,\n", 1935 | " \n", 1936 | " 8.2\n", 1937 | " ,\n", 1938 | " \n", 1939 | " 8.2\n", 1940 | " ,\n", 1941 | " \n", 1942 | " 8.2\n", 1943 | " ,\n", 1944 | " \n", 1945 | " 8.2\n", 1946 | " ,\n", 1947 | " \n", 1948 | " 8.2\n", 1949 | " ,\n", 1950 | " \n", 1951 | " 8.2\n", 1952 | " ,\n", 1953 | " \n", 1954 | " 8.2\n", 1955 | " ,\n", 1956 | " \n", 1957 | " 8.2\n", 1958 | " ,\n", 1959 | " \n", 1960 | " 8.2\n", 1961 | " ,\n", 1962 | " \n", 1963 | " 8.2\n", 1964 | " ,\n", 1965 | " \n", 1966 | " 8.2\n", 1967 | " ,\n", 1968 | " \n", 1969 | " 8.2\n", 1970 | " ,\n", 1971 | " \n", 1972 | " 8.2\n", 1973 | " ,\n", 1974 | " \n", 1975 | " 8.2\n", 1976 | " ,\n", 1977 | " \n", 1978 | " 8.2\n", 1979 | " ,\n", 1980 | " \n", 1981 | " 8.2\n", 1982 | " ,\n", 1983 | " \n", 1984 | " 8.2\n", 1985 | " ,\n", 1986 | " \n", 1987 | " 8.2\n", 1988 | " ,\n", 1989 | " \n", 1990 | " 8.2\n", 1991 | " ,\n", 1992 | " \n", 1993 | " 8.2\n", 1994 | " ,\n", 1995 | " \n", 1996 | " 8.2\n", 1997 | " ,\n", 1998 | " \n", 1999 | " 8.2\n", 2000 | " ,\n", 2001 | " \n", 2002 | " 8.2\n", 2003 | " ,\n", 2004 | " \n", 2005 | " 8.2\n", 2006 | " ,\n", 2007 | " \n", 2008 | " 8.2\n", 2009 | " ,\n", 2010 | " \n", 2011 | " 8.2\n", 2012 | " ,\n", 2013 | " \n", 2014 | " 8.2\n", 2015 | " ,\n", 2016 | " \n", 2017 | " 8.2\n", 2018 | " ,\n", 2019 | " \n", 2020 | " 8.2\n", 2021 | " ,\n", 2022 | " \n", 2023 | " 8.2\n", 2024 | " ,\n", 2025 | " \n", 2026 | " 8.2\n", 2027 | " ,\n", 2028 | " \n", 2029 | " 8.2\n", 2030 | " ,\n", 2031 | " \n", 2032 | " 8.2\n", 2033 | " ,\n", 2034 | " \n", 2035 | " 8.2\n", 2036 | " ,\n", 2037 | " \n", 2038 | " 8.2\n", 2039 | " ,\n", 2040 | " \n", 2041 | " 8.2\n", 2042 | " ,\n", 2043 | " \n", 2044 | " 8.2\n", 2045 | " ,\n", 2046 | " \n", 2047 | " 8.2\n", 2048 | " ,\n", 2049 | " \n", 2050 | " 8.2\n", 2051 | " ,\n", 2052 | " \n", 2053 | " 8.2\n", 2054 | " ,\n", 2055 | " \n", 2056 | " 8.2\n", 2057 | " ,\n", 2058 | " \n", 2059 | " 8.2\n", 2060 | " ,\n", 2061 | " \n", 2062 | " 8.2\n", 2063 | " ,\n", 2064 | " \n", 2065 | " 8.2\n", 2066 | " ,\n", 2067 | " \n", 2068 | " 8.2\n", 2069 | " ,\n", 2070 | " \n", 2071 | " 8.2\n", 2072 | " ,\n", 2073 | " \n", 2074 | " 8.2\n", 2075 | " ,\n", 2076 | " \n", 2077 | " 8.1\n", 2078 | " ,\n", 2079 | " \n", 2080 | " 8.1\n", 2081 | " ,\n", 2082 | " \n", 2083 | " 8.1\n", 2084 | " ,\n", 2085 | " \n", 2086 | " 8.1\n", 2087 | " ,\n", 2088 | " \n", 2089 | " 8.1\n", 2090 | " ,\n", 2091 | " \n", 2092 | " 8.1\n", 2093 | " ,\n", 2094 | " \n", 2095 | " 8.1\n", 2096 | " ,\n", 2097 | " \n", 2098 | " 8.1\n", 2099 | " ,\n", 2100 | " \n", 2101 | " 8.1\n", 2102 | " ,\n", 2103 | " \n", 2104 | " 8.1\n", 2105 | " ,\n", 2106 | " \n", 2107 | " 8.1\n", 2108 | " ,\n", 2109 | " \n", 2110 | " 8.1\n", 2111 | " ,\n", 2112 | " \n", 2113 | " 8.1\n", 2114 | " ,\n", 2115 | " \n", 2116 | " 8.1\n", 2117 | " ,\n", 2118 | " \n", 2119 | " 8.1\n", 2120 | " ,\n", 2121 | " \n", 2122 | " 8.1\n", 2123 | " ,\n", 2124 | " \n", 2125 | " 8.1\n", 2126 | " ,\n", 2127 | " \n", 2128 | " 8.1\n", 2129 | " ,\n", 2130 | " \n", 2131 | " 8.1\n", 2132 | " ,\n", 2133 | " \n", 2134 | " 8.1\n", 2135 | " ,\n", 2136 | " \n", 2137 | " 8.1\n", 2138 | " ,\n", 2139 | " \n", 2140 | " 8.1\n", 2141 | " ,\n", 2142 | " \n", 2143 | " 8.1\n", 2144 | " ,\n", 2145 | " \n", 2146 | " 8.1\n", 2147 | " ,\n", 2148 | " \n", 2149 | " 8.1\n", 2150 | " ,\n", 2151 | " \n", 2152 | " 8.1\n", 2153 | " ,\n", 2154 | " \n", 2155 | " 8.1\n", 2156 | " ,\n", 2157 | " \n", 2158 | " 8.1\n", 2159 | " ,\n", 2160 | " \n", 2161 | " 8.1\n", 2162 | " ,\n", 2163 | " \n", 2164 | " 8.1\n", 2165 | " ,\n", 2166 | " \n", 2167 | " 8.1\n", 2168 | " ,\n", 2169 | " \n", 2170 | " 8.1\n", 2171 | " ,\n", 2172 | " \n", 2173 | " 8.1\n", 2174 | " ,\n", 2175 | " \n", 2176 | " 8.1\n", 2177 | " ,\n", 2178 | " \n", 2179 | " 8.1\n", 2180 | " ,\n", 2181 | " \n", 2182 | " 8.1\n", 2183 | " ,\n", 2184 | " \n", 2185 | " 8.1\n", 2186 | " ,\n", 2187 | " \n", 2188 | " 8.1\n", 2189 | " ,\n", 2190 | " \n", 2191 | " 8.1\n", 2192 | " ,\n", 2193 | " \n", 2194 | " 8.1\n", 2195 | " ,\n", 2196 | " \n", 2197 | " 8.1\n", 2198 | " ,\n", 2199 | " \n", 2200 | " 8.1\n", 2201 | " ,\n", 2202 | " \n", 2203 | " 8.1\n", 2204 | " ,\n", 2205 | " \n", 2206 | " 8.1\n", 2207 | " ,\n", 2208 | " \n", 2209 | " 8.1\n", 2210 | " ,\n", 2211 | " \n", 2212 | " 8.1\n", 2213 | " ,\n", 2214 | " \n", 2215 | " 8.1\n", 2216 | " ,\n", 2217 | " \n", 2218 | " 8.1\n", 2219 | " ,\n", 2220 | " \n", 2221 | " 8.1\n", 2222 | " ,\n", 2223 | " \n", 2224 | " 8.1\n", 2225 | " ,\n", 2226 | " \n", 2227 | " 8.1\n", 2228 | " ,\n", 2229 | " \n", 2230 | " 8.1\n", 2231 | " ,\n", 2232 | " \n", 2233 | " 8.1\n", 2234 | " ,\n", 2235 | " \n", 2236 | " 8.1\n", 2237 | " ,\n", 2238 | " \n", 2239 | " 8.1\n", 2240 | " ,\n", 2241 | " \n", 2242 | " 8.1\n", 2243 | " ,\n", 2244 | " \n", 2245 | " 8.1\n", 2246 | " ,\n", 2247 | " \n", 2248 | " 8.1\n", 2249 | " ,\n", 2250 | " \n", 2251 | " 8.1\n", 2252 | " ,\n", 2253 | " \n", 2254 | " 8.1\n", 2255 | " ,\n", 2256 | " \n", 2257 | " 8.1\n", 2258 | " ,\n", 2259 | " \n", 2260 | " 8.1\n", 2261 | " ,\n", 2262 | " \n", 2263 | " 8.1\n", 2264 | " ,\n", 2265 | " \n", 2266 | " 8.1\n", 2267 | " ,\n", 2268 | " \n", 2269 | " 8.1\n", 2270 | " ,\n", 2271 | " \n", 2272 | " 8.0\n", 2273 | " ,\n", 2274 | " \n", 2275 | " 8.0\n", 2276 | " ,\n", 2277 | " \n", 2278 | " 8.0\n", 2279 | " ,\n", 2280 | " \n", 2281 | " 8.0\n", 2282 | " ,\n", 2283 | " \n", 2284 | " 8.0\n", 2285 | " ,\n", 2286 | " \n", 2287 | " 8.0\n", 2288 | " ,\n", 2289 | " \n", 2290 | " 8.0\n", 2291 | " ,\n", 2292 | " \n", 2293 | " 8.0\n", 2294 | " ,\n", 2295 | " \n", 2296 | " 8.0\n", 2297 | " ,\n", 2298 | " \n", 2299 | " 8.0\n", 2300 | " ,\n", 2301 | " \n", 2302 | " 8.0\n", 2303 | " ,\n", 2304 | " \n", 2305 | " 8.0\n", 2306 | " ,\n", 2307 | " \n", 2308 | " 8.0\n", 2309 | " ,\n", 2310 | " \n", 2311 | " 8.0\n", 2312 | " ,\n", 2313 | " \n", 2314 | " 8.0\n", 2315 | " ,\n", 2316 | " \n", 2317 | " 8.0\n", 2318 | " ,\n", 2319 | " \n", 2320 | " 8.0\n", 2321 | " ,\n", 2322 | " \n", 2323 | " 8.0\n", 2324 | " ,\n", 2325 | " \n", 2326 | " 8.0\n", 2327 | " ,\n", 2328 | " \n", 2329 | " 8.0\n", 2330 | " ,\n", 2331 | " \n", 2332 | " 8.0\n", 2333 | " ,\n", 2334 | " \n", 2335 | " 8.0\n", 2336 | " ,\n", 2337 | " \n", 2338 | " 8.0\n", 2339 | " ,\n", 2340 | " \n", 2341 | " 8.0\n", 2342 | " ,\n", 2343 | " \n", 2344 | " 8.0\n", 2345 | " ,\n", 2346 | " \n", 2347 | " 8.0\n", 2348 | " ,\n", 2349 | " \n", 2350 | " 8.0\n", 2351 | " ,\n", 2352 | " \n", 2353 | " 8.0\n", 2354 | " ,\n", 2355 | " \n", 2356 | " 8.0\n", 2357 | " ,\n", 2358 | " \n", 2359 | " 8.0\n", 2360 | " ,\n", 2361 | " \n", 2362 | " 8.0\n", 2363 | " ,\n", 2364 | " \n", 2365 | " 8.0\n", 2366 | " ,\n", 2367 | " \n", 2368 | " 8.0\n", 2369 | " ,\n", 2370 | " \n", 2371 | " 8.0\n", 2372 | " ,\n", 2373 | " \n", 2374 | " 8.0\n", 2375 | " ,\n", 2376 | " \n", 2377 | " 8.0\n", 2378 | " ,\n", 2379 | " \n", 2380 | " 8.0\n", 2381 | " ,\n", 2382 | " \n", 2383 | " 8.0\n", 2384 | " ,\n", 2385 | " \n", 2386 | " 8.0\n", 2387 | " ,\n", 2388 | " \n", 2389 | " 8.0\n", 2390 | " ]" 2391 | ] 2392 | }, 2393 | "execution_count": 11, 2394 | "metadata": {}, 2395 | "output_type": "execute_result" 2396 | } 2397 | ], 2398 | "source": [ 2399 | "# scrap rating for movies\n", 2400 | "scraped_ratings = soup.find_all('td', class_='ratingColumn imdbRating')\n", 2401 | "scraped_ratings" 2402 | ] 2403 | }, 2404 | { 2405 | "cell_type": "code", 2406 | "execution_count": 12, 2407 | "metadata": {}, 2408 | "outputs": [ 2409 | { 2410 | "data": { 2411 | "text/plain": [ 2412 | "['9.2',\n", 2413 | " '9.2',\n", 2414 | " '9.0',\n", 2415 | " '9.0',\n", 2416 | " '9.0',\n", 2417 | " '8.9',\n", 2418 | " '8.9',\n", 2419 | " '8.8',\n", 2420 | " '8.8',\n", 2421 | " '8.8',\n", 2422 | " '8.8',\n", 2423 | " '8.7',\n", 2424 | " '8.7',\n", 2425 | " '8.7',\n", 2426 | " '8.7',\n", 2427 | " '8.7',\n", 2428 | " '8.7',\n", 2429 | " '8.7',\n", 2430 | " '8.6',\n", 2431 | " '8.6',\n", 2432 | " '8.6',\n", 2433 | " '8.6',\n", 2434 | " '8.6',\n", 2435 | " '8.6',\n", 2436 | " '8.6',\n", 2437 | " '8.6',\n", 2438 | " '8.6',\n", 2439 | " '8.6',\n", 2440 | " '8.5',\n", 2441 | " '8.5',\n", 2442 | " '8.5',\n", 2443 | " '8.5',\n", 2444 | " '8.5',\n", 2445 | " '8.5',\n", 2446 | " '8.5',\n", 2447 | " '8.5',\n", 2448 | " '8.5',\n", 2449 | " '8.5',\n", 2450 | " '8.5',\n", 2451 | " '8.5',\n", 2452 | " '8.5',\n", 2453 | " '8.5',\n", 2454 | " '8.5',\n", 2455 | " '8.5',\n", 2456 | " '8.5',\n", 2457 | " '8.5',\n", 2458 | " '8.5',\n", 2459 | " '8.4',\n", 2460 | " '8.4',\n", 2461 | " '8.4',\n", 2462 | " '8.4',\n", 2463 | " '8.4',\n", 2464 | " '8.4',\n", 2465 | " '8.4',\n", 2466 | " '8.4',\n", 2467 | " '8.4',\n", 2468 | " '8.4',\n", 2469 | " '8.4',\n", 2470 | " '8.4',\n", 2471 | " '8.4',\n", 2472 | " '8.4',\n", 2473 | " '8.4',\n", 2474 | " '8.4',\n", 2475 | " '8.4',\n", 2476 | " '8.4',\n", 2477 | " '8.4',\n", 2478 | " '8.3',\n", 2479 | " '8.3',\n", 2480 | " '8.3',\n", 2481 | " '8.3',\n", 2482 | " '8.3',\n", 2483 | " '8.3',\n", 2484 | " '8.3',\n", 2485 | " '8.3',\n", 2486 | " '8.3',\n", 2487 | " '8.3',\n", 2488 | " '8.3',\n", 2489 | " '8.3',\n", 2490 | " '8.3',\n", 2491 | " '8.3',\n", 2492 | " '8.3',\n", 2493 | " '8.3',\n", 2494 | " '8.3',\n", 2495 | " '8.3',\n", 2496 | " '8.3',\n", 2497 | " '8.3',\n", 2498 | " '8.3',\n", 2499 | " '8.3',\n", 2500 | " '8.3',\n", 2501 | " '8.3',\n", 2502 | " '8.3',\n", 2503 | " '8.3',\n", 2504 | " '8.3',\n", 2505 | " '8.3',\n", 2506 | " '8.3',\n", 2507 | " '8.2',\n", 2508 | " '8.2',\n", 2509 | " '8.2',\n", 2510 | " '8.2',\n", 2511 | " '8.2',\n", 2512 | " '8.2',\n", 2513 | " '8.2',\n", 2514 | " '8.2',\n", 2515 | " '8.2',\n", 2516 | " '8.2',\n", 2517 | " '8.2',\n", 2518 | " '8.2',\n", 2519 | " '8.2',\n", 2520 | " '8.2',\n", 2521 | " '8.2',\n", 2522 | " '8.2',\n", 2523 | " '8.2',\n", 2524 | " '8.2',\n", 2525 | " '8.2',\n", 2526 | " '8.2',\n", 2527 | " '8.2',\n", 2528 | " '8.2',\n", 2529 | " '8.2',\n", 2530 | " '8.2',\n", 2531 | " '8.2',\n", 2532 | " '8.2',\n", 2533 | " '8.2',\n", 2534 | " '8.2',\n", 2535 | " '8.2',\n", 2536 | " '8.2',\n", 2537 | " '8.2',\n", 2538 | " '8.2',\n", 2539 | " '8.2',\n", 2540 | " '8.2',\n", 2541 | " '8.2',\n", 2542 | " '8.2',\n", 2543 | " '8.2',\n", 2544 | " '8.2',\n", 2545 | " '8.2',\n", 2546 | " '8.2',\n", 2547 | " '8.2',\n", 2548 | " '8.2',\n", 2549 | " '8.2',\n", 2550 | " '8.2',\n", 2551 | " '8.2',\n", 2552 | " '8.2',\n", 2553 | " '8.2',\n", 2554 | " '8.2',\n", 2555 | " '8.2',\n", 2556 | " '8.2',\n", 2557 | " '8.1',\n", 2558 | " '8.1',\n", 2559 | " '8.1',\n", 2560 | " '8.1',\n", 2561 | " '8.1',\n", 2562 | " '8.1',\n", 2563 | " '8.1',\n", 2564 | " '8.1',\n", 2565 | " '8.1',\n", 2566 | " '8.1',\n", 2567 | " '8.1',\n", 2568 | " '8.1',\n", 2569 | " '8.1',\n", 2570 | " '8.1',\n", 2571 | " '8.1',\n", 2572 | " '8.1',\n", 2573 | " '8.1',\n", 2574 | " '8.1',\n", 2575 | " '8.1',\n", 2576 | " '8.1',\n", 2577 | " '8.1',\n", 2578 | " '8.1',\n", 2579 | " '8.1',\n", 2580 | " '8.1',\n", 2581 | " '8.1',\n", 2582 | " '8.1',\n", 2583 | " '8.1',\n", 2584 | " '8.1',\n", 2585 | " '8.1',\n", 2586 | " '8.1',\n", 2587 | " '8.1',\n", 2588 | " '8.1',\n", 2589 | " '8.1',\n", 2590 | " '8.1',\n", 2591 | " '8.1',\n", 2592 | " '8.1',\n", 2593 | " '8.1',\n", 2594 | " '8.1',\n", 2595 | " '8.1',\n", 2596 | " '8.1',\n", 2597 | " '8.1',\n", 2598 | " '8.1',\n", 2599 | " '8.1',\n", 2600 | " '8.1',\n", 2601 | " '8.1',\n", 2602 | " '8.1',\n", 2603 | " '8.1',\n", 2604 | " '8.1',\n", 2605 | " '8.1',\n", 2606 | " '8.1',\n", 2607 | " '8.1',\n", 2608 | " '8.1',\n", 2609 | " '8.1',\n", 2610 | " '8.1',\n", 2611 | " '8.1',\n", 2612 | " '8.1',\n", 2613 | " '8.1',\n", 2614 | " '8.1',\n", 2615 | " '8.1',\n", 2616 | " '8.1',\n", 2617 | " '8.1',\n", 2618 | " '8.1',\n", 2619 | " '8.1',\n", 2620 | " '8.1',\n", 2621 | " '8.1',\n", 2622 | " '8.0',\n", 2623 | " '8.0',\n", 2624 | " '8.0',\n", 2625 | " '8.0',\n", 2626 | " '8.0',\n", 2627 | " '8.0',\n", 2628 | " '8.0',\n", 2629 | " '8.0',\n", 2630 | " '8.0',\n", 2631 | " '8.0',\n", 2632 | " '8.0',\n", 2633 | " '8.0',\n", 2634 | " '8.0',\n", 2635 | " '8.0',\n", 2636 | " '8.0',\n", 2637 | " '8.0',\n", 2638 | " '8.0',\n", 2639 | " '8.0',\n", 2640 | " '8.0',\n", 2641 | " '8.0',\n", 2642 | " '8.0',\n", 2643 | " '8.0',\n", 2644 | " '8.0',\n", 2645 | " '8.0',\n", 2646 | " '8.0',\n", 2647 | " '8.0',\n", 2648 | " '8.0',\n", 2649 | " '8.0',\n", 2650 | " '8.0',\n", 2651 | " '8.0',\n", 2652 | " '8.0',\n", 2653 | " '8.0',\n", 2654 | " '8.0',\n", 2655 | " '8.0',\n", 2656 | " '8.0',\n", 2657 | " '8.0',\n", 2658 | " '8.0',\n", 2659 | " '8.0',\n", 2660 | " '8.0',\n", 2661 | " '8.0']" 2662 | ] 2663 | }, 2664 | "execution_count": 12, 2665 | "metadata": {}, 2666 | "output_type": "execute_result" 2667 | } 2668 | ], 2669 | "source": [ 2670 | "# parse ratings\n", 2671 | "ratings = []\n", 2672 | "for rating in scraped_ratings:\n", 2673 | " rating = rating.get_text().replace('\\n', '')\n", 2674 | " ratings.append(rating)\n", 2675 | "ratings" 2676 | ] 2677 | }, 2678 | { 2679 | "cell_type": "markdown", 2680 | "metadata": {}, 2681 | "source": [ 2682 | "## Store the Scraped Data" 2683 | ] 2684 | }, 2685 | { 2686 | "cell_type": "code", 2687 | "execution_count": 13, 2688 | "metadata": {}, 2689 | "outputs": [ 2690 | { 2691 | "data": { 2692 | "text/html": [ 2693 | "
\n", 2694 | "\n", 2707 | "\n", 2708 | " \n", 2709 | " \n", 2710 | " \n", 2711 | " \n", 2712 | " \n", 2713 | " \n", 2714 | " \n", 2715 | " \n", 2716 | " \n", 2717 | " \n", 2718 | " \n", 2719 | " \n", 2720 | " \n", 2721 | " \n", 2722 | " \n", 2723 | " \n", 2724 | " \n", 2725 | " \n", 2726 | " \n", 2727 | " \n", 2728 | " \n", 2729 | " \n", 2730 | " \n", 2731 | " \n", 2732 | " \n", 2733 | " \n", 2734 | " \n", 2735 | " \n", 2736 | " \n", 2737 | " \n", 2738 | " \n", 2739 | " \n", 2740 | " \n", 2741 | " \n", 2742 | "
Movie NamesRatings
01. The Shawshank Redemption(1994)9.2
12. The Godfather(1972)9.2
23. The Dark Knight(2008)9.0
34. The Godfather Part II(1974)9.0
45. 12 Angry Men(1957)9.0
\n", 2743 | "
" 2744 | ], 2745 | "text/plain": [ 2746 | " Movie Names Ratings\n", 2747 | "0 1. The Shawshank Redemption(1994) 9.2\n", 2748 | "1 2. The Godfather(1972) 9.2\n", 2749 | "2 3. The Dark Knight(2008) 9.0\n", 2750 | "3 4. The Godfather Part II(1974) 9.0\n", 2751 | "4 5. 12 Angry Men(1957) 9.0" 2752 | ] 2753 | }, 2754 | "execution_count": 13, 2755 | "metadata": {}, 2756 | "output_type": "execute_result" 2757 | } 2758 | ], 2759 | "source": [ 2760 | "data = pd.DataFrame()\n", 2761 | "data['Movie Names'] = movies\n", 2762 | "data['Ratings'] = ratings\n", 2763 | "data.head()" 2764 | ] 2765 | }, 2766 | { 2767 | "cell_type": "code", 2768 | "execution_count": 13, 2769 | "metadata": {}, 2770 | "outputs": [], 2771 | "source": [ 2772 | "data.to_csv('IMDB Top Movies.csv', index=False)" 2773 | ] 2774 | }, 2775 | { 2776 | "cell_type": "code", 2777 | "execution_count": null, 2778 | "metadata": {}, 2779 | "outputs": [], 2780 | "source": [] 2781 | }, 2782 | { 2783 | "cell_type": "code", 2784 | "execution_count": null, 2785 | "metadata": {}, 2786 | "outputs": [], 2787 | "source": [] 2788 | }, 2789 | { 2790 | "cell_type": "code", 2791 | "execution_count": null, 2792 | "metadata": {}, 2793 | "outputs": [], 2794 | "source": [] 2795 | } 2796 | ], 2797 | "metadata": { 2798 | "kernelspec": { 2799 | "display_name": "Python 3 (ipykernel)", 2800 | "language": "python", 2801 | "name": "python3" 2802 | }, 2803 | "language_info": { 2804 | "codemirror_mode": { 2805 | "name": "ipython", 2806 | "version": 3 2807 | }, 2808 | "file_extension": ".py", 2809 | "mimetype": "text/x-python", 2810 | "name": "python", 2811 | "nbconvert_exporter": "python", 2812 | "pygments_lexer": "ipython3", 2813 | "version": "3.8.3" 2814 | } 2815 | }, 2816 | "nbformat": 4, 2817 | "nbformat_minor": 4 2818 | } 2819 | -------------------------------------------------------------------------------- /Scraping Multimedia Files using Beautiful Soup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import Modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from bs4 import BeautifulSoup\n", 17 | "import requests" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Get Page Content from URL" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "url = 'https://www.thehindu.com/news/national/coronavirus-live-updates-may-29-2021/article34672944.ece?homepage=true'" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "page = requests.get(url)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "" 54 | ] 55 | }, 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "page" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# display page content\n", 72 | "# page.content" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# parse the data\n", 82 | "soup = BeautifulSoup(page.content, 'html.parser')\n", 83 | "# print(soup.prettify())" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": [ 94 | "" 95 | ] 96 | }, 97 | "execution_count": 7, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "# find the image src link\n", 104 | "img_tag = soup.find('source')\n", 105 | "img_tag" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 8, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "'https://th-i.thgim.com/public/news/national/2g2qwq/article53557510.ece/alternates/LANDSCAPE_1200/Migrants2jpg'" 117 | ] 118 | }, 119 | "execution_count": 8, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "img_tag['srcset']" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 9, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "img_url = img_tag['srcset']" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## Download the Image from URL" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 10, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "image = requests.get(img_url)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 11, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# store the image in file\n", 160 | "with open('image.jpg', 'wb') as file:\n", 161 | " for chunk in image.iter_content(chunk_size=1024):\n", 162 | " file.write(chunk)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Download PPT from URL" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 12, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "ppt = requests.get('http://www.howtowebscrape.com/examples/media/images/SampleSlides.pptx')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 13, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "with open('sample.pptx', 'wb') as file:\n", 188 | " for chunk in ppt.iter_content(chunk_size=1024):\n", 189 | " file.write(chunk)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Download Video from URL" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 14, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "video = requests.get('http://www.howtowebscrape.com/examples/media/images/BigRabbit.mp4')" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 15, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "with open('BigRabbit.mp4', 'wb') as file:\n", 215 | " for chunk in video.iter_content(chunk_size=1024):\n", 216 | " file.write(chunk)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "Python 3 (ipykernel)", 230 | "language": "python", 231 | "name": "python3" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.8.3" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 4 248 | } 249 | -------------------------------------------------------------------------------- /Scraping Products from Amazon using Selenium-Dynamic Website.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import Modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from selenium import webdriver\n", 17 | "from selenium.webdriver.common.by import By\n", 18 | "from time import sleep" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Set path for Webdriver" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# path = 'C://chromedriver.exe'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# open the browser\n", 44 | "# browser = webdriver.Chrome(executable_path = path)\n", 45 | "browser = webdriver.Chrome()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# load the webpage\n", 55 | "browser.get('https://www.amazon.in')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "browser.maximize_window()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 6, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "# get the input elements\n", 74 | "input_search = browser.find_element(By.ID, 'twotabsearchtextbox')\n", 75 | "search_button = browser.find_element(By.XPATH, \"(//input[@type='submit'])[1]\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 7, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "# send the input to the webpage\n", 85 | "input_search.send_keys(\"Smartphones under 10000\")\n", 86 | "sleep(1)\n", 87 | "search_button.click()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Scrape Products from Amazon" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "Scraping page 1\n", 107 | "Scraping page 2\n", 108 | "Scraping page 3\n", 109 | "Scraping page 4\n", 110 | "Scraping page 5\n", 111 | "Scraping page 6\n", 112 | "Scraping page 7\n", 113 | "Scraping page 8\n", 114 | "Scraping page 9\n", 115 | "Scraping page 10\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "products = []\n", 121 | "for i in range(10):\n", 122 | " print('Scraping page', i+1)\n", 123 | " product = browser.find_elements(By.XPATH, \"//span[@class='a-size-medium a-color-base a-text-normal']\")\n", 124 | " for p in product:\n", 125 | " products.append(p.text)\n", 126 | " next_button = browser.find_element(By.XPATH, \"//a[text()='Next']\")\n", 127 | " next_button.click()\n", 128 | " sleep(2)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 9, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "186" 140 | ] 141 | }, 142 | "execution_count": 9, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "len(products)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 10, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "['Lava Blaze 2 (6GB RAM, 128GB Storage) - Glass Blue | 18W Fast Charging | 6.5 inch 90Hz Punch Hole Display | Side Fingerprint Sensor | Upto 11GB Expandable RAM | 5000 mAh Battery',\n", 160 | " 'Lava Yuva 2 Pro (Glass Lavender, 4GB RAM, 64GB Storage)| 2.3 Ghz Octa Core Helio G37| 13 MP AI Triple Camera |Fingerprint Sensor| 5000 mAh Battery| Upto 7GB Expandable RAM',\n", 161 | " 'realme narzo N53 (Feather Black, 4GB+64GB) 33W Segment Fastest Charging | Slimmest Phone in Segment | 90 Hz Smooth Display',\n", 162 | " 'realme narzo 50i Prime (Dark Blue 4GB RAM+64GB Storage) Octa-core Processor | 5000 mAh Battery',\n", 163 | " 'Redmi A2 (Aqua Blue, 2GB RAM, 32GB Storage) | Powerful Octa Core G36 Processor | Upto 7GB RAM | Large 16.5 cm HD+ Display with Massive 5000mAh Battery | 2 Years Warranty [Limited time Offer]']" 164 | ] 165 | }, 166 | "execution_count": 10, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "products[:5]" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 11, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "browser.quit()" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3 (ipykernel)", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.8.3" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 4 213 | } 214 | -------------------------------------------------------------------------------- /Scraping XML Data using Beautiful Soup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import Modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from bs4 import BeautifulSoup\n", 17 | "import requests" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Get Data from URL" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "url = \"https://www.w3schools.com/xml/note.xml\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# get the data\n", 43 | "xml = requests.get(url)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "b'\\n\\n Tove\\n Jani\\n Reminder\\n Don\\'t forget me this weekend!\\n'" 55 | ] 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "xml.content" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 6, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "\n", 76 | "\n", 77 | "Tove\n", 78 | "Jani\n", 79 | "Reminder\n", 80 | "Don't forget me this weekend!\n", 81 | "\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "# parse the data\n", 87 | "soup = BeautifulSoup(xml.content, 'xml')\n", 88 | "print(soup)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 7, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "Reminder" 100 | ] 101 | }, 102 | "execution_count": 7, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "xml_tag = soup.find('heading')\n", 109 | "xml_tag" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 8, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "'Reminder'" 121 | ] 122 | }, 123 | "execution_count": 8, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "xml_tag.text" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 9, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "\n", 141 | "Tove\n", 142 | "Jani\n", 143 | "Reminder\n", 144 | "Don't forget me this weekend!\n", 145 | "" 146 | ] 147 | }, 148 | "execution_count": 9, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "xml_tag = soup.find('note')\n", 155 | "xml_tag" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 10, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "\n", 168 | "Tove\n", 169 | "Jani\n", 170 | "Reminder\n", 171 | "Don't forget me this weekend!\n", 172 | "\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "print(xml_tag.text)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [] 186 | } 187 | ], 188 | "metadata": { 189 | "kernelspec": { 190 | "display_name": "Python 3 (ipykernel)", 191 | "language": "python", 192 | "name": "python3" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": { 196 | "name": "ipython", 197 | "version": 3 198 | }, 199 | "file_extension": ".py", 200 | "mimetype": "text/x-python", 201 | "name": "python", 202 | "nbconvert_exporter": "python", 203 | "pygments_lexer": "ipython3", 204 | "version": "3.8.3" 205 | } 206 | }, 207 | "nbformat": 4, 208 | "nbformat_minor": 4 209 | } 210 | -------------------------------------------------------------------------------- /Scraping data using Regular Expression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Part 1: Using Beautiful Soup" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Import Modules" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from bs4 import BeautifulSoup\n", 24 | "import requests\n", 25 | "import re" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Get the Data using URL" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "url = \"https://www.imdb.com/chart/top/\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "HEADERS = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "" 62 | ] 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "# get page data\n", 71 | "page = requests.get(url, headers=HEADERS)\n", 72 | "page" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 15, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# page.content" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 20, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# parse the data\n", 91 | "soup = BeautifulSoup(page.content, 'html.parser')\n", 92 | "# print(soup.prettify())" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Regex to find particular class" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 24, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "
IMDb Top 250 as rated by regular IMDb voters
" 111 | ] 112 | }, 113 | "execution_count": 24, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "tag = soup.find('div', string=re.compile(r'by+'))\n", 120 | "tag" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 25, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "'IMDb Top 250 as rated by regular IMDb voters'" 132 | ] 133 | }, 134 | "execution_count": 25, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "tag.text" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Part 2: Using Regular Expression" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 26, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# page.text" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 27, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "['IMDb Top 250 Movies', 'IMDb, an Amazon company']" 168 | ] 169 | }, 170 | "execution_count": 27, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "re.findall(r'(.*?)', page.text)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 28, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "[('IMDb Top 250 Movies', 'IMDb Top 250 Movies'),\n", 188 | " ('IMDb, an Amazon company', 'IMDb, an Amazon company')]" 189 | ] 190 | }, 191 | "execution_count": 28, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "re.findall(r'((.*?))', page.text)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python 3 (ipykernel)", 211 | "language": "python", 212 | "name": "python3" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.8.3" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 4 229 | } 230 | -------------------------------------------------------------------------------- /Taking Screenshot of Webpage using Selenium.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import Modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from selenium import webdriver" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Set path for Driver" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# path = 'C://chromedriver.exe'" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# browser = webdriver.Chrome(executable_path=path)\n", 42 | "browser = webdriver.Chrome()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "browser.get(\"https://www.google.com\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "True" 63 | ] 64 | }, 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "browser.save_screenshot('screenshot.png')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 6, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "browser.quit()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | } 90 | ], 91 | "metadata": { 92 | "kernelspec": { 93 | "display_name": "Python 3 (ipykernel)", 94 | "language": "python", 95 | "name": "python3" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.8.3" 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 4 112 | } 113 | --------------------------------------------------------------------------------