"
55 | ]
56 | },
57 | "execution_count": 6,
58 | "metadata": {},
59 | "output_type": "execute_result"
60 | }
61 | ],
62 | "source": [
63 | "page = requests.get(url, headers=HEADERS)\n",
64 | "page"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 7,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "## display the page source code\n",
74 | "# page.content"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 8,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "soup = BeautifulSoup(page.content, \"html.parser\")\n",
84 | "# print(soup.prettify())"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 9,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "data": {
94 | "text/plain": [
95 | "[\n",
96 | " 1.\n",
97 | " The Shawshank Redemption\n",
98 | " (1994)\n",
99 | " | ,\n",
100 | " \n",
101 | " 2.\n",
102 | " The Godfather\n",
103 | " (1972)\n",
104 | " | ,\n",
105 | " \n",
106 | " 3.\n",
107 | " The Dark Knight\n",
108 | " (2008)\n",
109 | " | ,\n",
110 | " \n",
111 | " 4.\n",
112 | " The Godfather Part II\n",
113 | " (1974)\n",
114 | " | ,\n",
115 | " \n",
116 | " 5.\n",
117 | " 12 Angry Men\n",
118 | " (1957)\n",
119 | " | ,\n",
120 | " \n",
121 | " 6.\n",
122 | " Schindler's List\n",
123 | " (1993)\n",
124 | " | ,\n",
125 | " \n",
126 | " 7.\n",
127 | " The Lord of the Rings: The Return of the King\n",
128 | " (2003)\n",
129 | " | ,\n",
130 | " \n",
131 | " 8.\n",
132 | " Pulp Fiction\n",
133 | " (1994)\n",
134 | " | ,\n",
135 | " \n",
136 | " 9.\n",
137 | " The Lord of the Rings: The Fellowship of the Ring\n",
138 | " (2001)\n",
139 | " | ,\n",
140 | " \n",
141 | " 10.\n",
142 | " Il buono, il brutto, il cattivo\n",
143 | " (1966)\n",
144 | " | ,\n",
145 | " \n",
146 | " 11.\n",
147 | " Forrest Gump\n",
148 | " (1994)\n",
149 | " | ,\n",
150 | " \n",
151 | " 12.\n",
152 | " Fight Club\n",
153 | " (1999)\n",
154 | " | ,\n",
155 | " \n",
156 | " 13.\n",
157 | " Spider-Man: Across the Spider-Verse\n",
158 | " (2023)\n",
159 | " | ,\n",
160 | " \n",
161 | " 14.\n",
162 | " The Lord of the Rings: The Two Towers\n",
163 | " (2002)\n",
164 | " | ,\n",
165 | " \n",
166 | " 15.\n",
167 | " Inception\n",
168 | " (2010)\n",
169 | " | ,\n",
170 | " \n",
171 | " 16.\n",
172 | " Star Wars: Episode V - The Empire Strikes Back\n",
173 | " (1980)\n",
174 | " | ,\n",
175 | " \n",
176 | " 17.\n",
177 | " The Matrix\n",
178 | " (1999)\n",
179 | " | ,\n",
180 | " \n",
181 | " 18.\n",
182 | " GoodFellas\n",
183 | " (1990)\n",
184 | " | ,\n",
185 | " \n",
186 | " 19.\n",
187 | " One Flew Over the Cuckoo's Nest\n",
188 | " (1975)\n",
189 | " | ,\n",
190 | " \n",
191 | " 20.\n",
192 | " Se7en\n",
193 | " (1995)\n",
194 | " | ,\n",
195 | " \n",
196 | " 21.\n",
197 | " It's a Wonderful Life\n",
198 | " (1946)\n",
199 | " | ,\n",
200 | " \n",
201 | " 22.\n",
202 | " Shichinin no samurai\n",
203 | " (1954)\n",
204 | " | ,\n",
205 | " \n",
206 | " 23.\n",
207 | " The Silence of the Lambs\n",
208 | " (1991)\n",
209 | " | ,\n",
210 | " \n",
211 | " 24.\n",
212 | " Saving Private Ryan\n",
213 | " (1998)\n",
214 | " | ,\n",
215 | " \n",
216 | " 25.\n",
217 | " Cidade de Deus\n",
218 | " (2002)\n",
219 | " | ,\n",
220 | " \n",
221 | " 26.\n",
222 | " Interstellar\n",
223 | " (2014)\n",
224 | " | ,\n",
225 | " \n",
226 | " 27.\n",
227 | " La vita è bella\n",
228 | " (1997)\n",
229 | " | ,\n",
230 | " \n",
231 | " 28.\n",
232 | " The Green Mile\n",
233 | " (1999)\n",
234 | " | ,\n",
235 | " \n",
236 | " 29.\n",
237 | " Star Wars\n",
238 | " (1977)\n",
239 | " | ,\n",
240 | " \n",
241 | " 30.\n",
242 | " Terminator 2: Judgment Day\n",
243 | " (1991)\n",
244 | " | ,\n",
245 | " \n",
246 | " 31.\n",
247 | " Back to the Future\n",
248 | " (1985)\n",
249 | " | ,\n",
250 | " \n",
251 | " 32.\n",
252 | " Sen to Chihiro no kamikakushi\n",
253 | " (2001)\n",
254 | " | ,\n",
255 | " \n",
256 | " 33.\n",
257 | " The Pianist\n",
258 | " (2002)\n",
259 | " | ,\n",
260 | " \n",
261 | " 34.\n",
262 | " Psycho\n",
263 | " (1960)\n",
264 | " | ,\n",
265 | " \n",
266 | " 35.\n",
267 | " Gisaengchung\n",
268 | " (2019)\n",
269 | " | ,\n",
270 | " \n",
271 | " 36.\n",
272 | " Léon\n",
273 | " (1994)\n",
274 | " | ,\n",
275 | " \n",
276 | " 37.\n",
277 | " The Lion King\n",
278 | " (1994)\n",
279 | " | ,\n",
280 | " \n",
281 | " 38.\n",
282 | " Gladiator\n",
283 | " (2000)\n",
284 | " | ,\n",
285 | " \n",
286 | " 39.\n",
287 | " American History X\n",
288 | " (1998)\n",
289 | " | ,\n",
290 | " \n",
291 | " 40.\n",
292 | " The Departed\n",
293 | " (2006)\n",
294 | " | ,\n",
295 | " \n",
296 | " 41.\n",
297 | " Whiplash\n",
298 | " (2014)\n",
299 | " | ,\n",
300 | " \n",
301 | " 42.\n",
302 | " The Prestige\n",
303 | " (2006)\n",
304 | " | ,\n",
305 | " \n",
306 | " 43.\n",
307 | " The Usual Suspects\n",
308 | " (1995)\n",
309 | " | ,\n",
310 | " \n",
311 | " 44.\n",
312 | " Casablanca\n",
313 | " (1942)\n",
314 | " | ,\n",
315 | " \n",
316 | " 45.\n",
317 | " Hotaru no haka\n",
318 | " (1988)\n",
319 | " | ,\n",
320 | " \n",
321 | " 46.\n",
322 | " Seppuku\n",
323 | " (1962)\n",
324 | " | ,\n",
325 | " \n",
326 | " 47.\n",
327 | " The Intouchables\n",
328 | " (2011)\n",
329 | " | ,\n",
330 | " \n",
331 | " 48.\n",
332 | " Modern Times\n",
333 | " (1936)\n",
334 | " | ,\n",
335 | " \n",
336 | " 49.\n",
337 | " Once Upon a Time in the West\n",
338 | " (1968)\n",
339 | " | ,\n",
340 | " \n",
341 | " 50.\n",
342 | " Nuovo Cinema Paradiso\n",
343 | " (1988)\n",
344 | " | ,\n",
345 | " \n",
346 | " 51.\n",
347 | " Rear Window\n",
348 | " (1954)\n",
349 | " | ,\n",
350 | " \n",
351 | " 52.\n",
352 | " Alien\n",
353 | " (1979)\n",
354 | " | ,\n",
355 | " \n",
356 | " 53.\n",
357 | " City Lights\n",
358 | " (1931)\n",
359 | " | ,\n",
360 | " \n",
361 | " 54.\n",
362 | " Apocalypse Now\n",
363 | " (1979)\n",
364 | " | ,\n",
365 | " \n",
366 | " 55.\n",
367 | " Memento\n",
368 | " (2000)\n",
369 | " | ,\n",
370 | " \n",
371 | " 56.\n",
372 | " Django Unchained\n",
373 | " (2012)\n",
374 | " | ,\n",
375 | " \n",
376 | " 57.\n",
377 | " Raiders of the Lost Ark\n",
378 | " (1981)\n",
379 | " | ,\n",
380 | " \n",
381 | " 58.\n",
382 | " WALL·E\n",
383 | " (2008)\n",
384 | " | ,\n",
385 | " \n",
386 | " 59.\n",
387 | " The Lives of Others\n",
388 | " (2006)\n",
389 | " | ,\n",
390 | " \n",
391 | " 60.\n",
392 | " Sunset Blvd.\n",
393 | " (1950)\n",
394 | " | ,\n",
395 | " \n",
396 | " 61.\n",
397 | " Paths of Glory\n",
398 | " (1957)\n",
399 | " | ,\n",
400 | " \n",
401 | " 62.\n",
402 | " Avengers: Infinity War\n",
403 | " (2018)\n",
404 | " | ,\n",
405 | " \n",
406 | " 63.\n",
407 | " The Shining\n",
408 | " (1980)\n",
409 | " | ,\n",
410 | " \n",
411 | " 64.\n",
412 | " The Great Dictator\n",
413 | " (1940)\n",
414 | " | ,\n",
415 | " \n",
416 | " 65.\n",
417 | " Witness for the Prosecution\n",
418 | " (1957)\n",
419 | " | ,\n",
420 | " \n",
421 | " 66.\n",
422 | " Spider-Man: Into the Spider-Verse\n",
423 | " (2018)\n",
424 | " | ,\n",
425 | " \n",
426 | " 67.\n",
427 | " Aliens\n",
428 | " (1986)\n",
429 | " | ,\n",
430 | " \n",
431 | " 68.\n",
432 | " American Beauty\n",
433 | " (1999)\n",
434 | " | ,\n",
435 | " \n",
436 | " 69.\n",
437 | " The Dark Knight Rises\n",
438 | " (2012)\n",
439 | " | ,\n",
440 | " \n",
441 | " 70.\n",
442 | " Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb\n",
443 | " (1964)\n",
444 | " | ,\n",
445 | " \n",
446 | " 71.\n",
447 | " Inglourious Basterds\n",
448 | " (2009)\n",
449 | " | ,\n",
450 | " \n",
451 | " 72.\n",
452 | " Oldeuboi\n",
453 | " (2003)\n",
454 | " | ,\n",
455 | " \n",
456 | " 73.\n",
457 | " Coco\n",
458 | " (2017)\n",
459 | " | ,\n",
460 | " \n",
461 | " 74.\n",
462 | " Amadeus\n",
463 | " (1984)\n",
464 | " | ,\n",
465 | " \n",
466 | " 75.\n",
467 | " Toy Story\n",
468 | " (1995)\n",
469 | " | ,\n",
470 | " \n",
471 | " 76.\n",
472 | " Braveheart\n",
473 | " (1995)\n",
474 | " | ,\n",
475 | " \n",
476 | " 77.\n",
477 | " Das Boot\n",
478 | " (1981)\n",
479 | " | ,\n",
480 | " \n",
481 | " 78.\n",
482 | " Joker\n",
483 | " (2019)\n",
484 | " | ,\n",
485 | " \n",
486 | " 79.\n",
487 | " Avengers: Endgame\n",
488 | " (2019)\n",
489 | " | ,\n",
490 | " \n",
491 | " 80.\n",
492 | " Mononoke-hime\n",
493 | " (1997)\n",
494 | " | ,\n",
495 | " \n",
496 | " 81.\n",
497 | " Good Will Hunting\n",
498 | " (1997)\n",
499 | " | ,\n",
500 | " \n",
501 | " 82.\n",
502 | " Once Upon a Time in America\n",
503 | " (1984)\n",
504 | " | ,\n",
505 | " \n",
506 | " 83.\n",
507 | " Kimi no na wa.\n",
508 | " (2016)\n",
509 | " | ,\n",
510 | " \n",
511 | " 84.\n",
512 | " 3 Idiots\n",
513 | " (2009)\n",
514 | " | ,\n",
515 | " \n",
516 | " 85.\n",
517 | " Singin' in the Rain\n",
518 | " (1952)\n",
519 | " | ,\n",
520 | " \n",
521 | " 86.\n",
522 | " Tengoku to jigoku\n",
523 | " (1963)\n",
524 | " | ,\n",
525 | " \n",
526 | " 87.\n",
527 | " Requiem for a Dream\n",
528 | " (2000)\n",
529 | " | ,\n",
530 | " \n",
531 | " 88.\n",
532 | " Capharnaüm\n",
533 | " (2018)\n",
534 | " | ,\n",
535 | " \n",
536 | " 89.\n",
537 | " Toy Story 3\n",
538 | " (2010)\n",
539 | " | ,\n",
540 | " \n",
541 | " 90.\n",
542 | " Star Wars: Episode VI - Return of the Jedi\n",
543 | " (1983)\n",
544 | " | ,\n",
545 | " \n",
546 | " 91.\n",
547 | " Idi i smotri\n",
548 | " (1985)\n",
549 | " | ,\n",
550 | " \n",
551 | " 92.\n",
552 | " Eternal Sunshine of the Spotless Mind\n",
553 | " (2004)\n",
554 | " | ,\n",
555 | " \n",
556 | " 93.\n",
557 | " 2001: A Space Odyssey\n",
558 | " (1968)\n",
559 | " | ,\n",
560 | " \n",
561 | " 94.\n",
562 | " Jagten\n",
563 | " (2012)\n",
564 | " | ,\n",
565 | " \n",
566 | " 95.\n",
567 | " Reservoir Dogs\n",
568 | " (1992)\n",
569 | " | ,\n",
570 | " \n",
571 | " 96.\n",
572 | " Lawrence of Arabia\n",
573 | " (1962)\n",
574 | " | ,\n",
575 | " \n",
576 | " 97.\n",
577 | " Citizen Kane\n",
578 | " (1941)\n",
579 | " | ,\n",
580 | " \n",
581 | " 98.\n",
582 | " Ikiru\n",
583 | " (1952)\n",
584 | " | ,\n",
585 | " \n",
586 | " 99.\n",
587 | " M - Eine Stadt sucht einen Mörder\n",
588 | " (1931)\n",
589 | " | ,\n",
590 | " \n",
591 | " 100.\n",
592 | " North by Northwest\n",
593 | " (1959)\n",
594 | " | ,\n",
595 | " \n",
596 | " 101.\n",
597 | " The Apartment\n",
598 | " (1960)\n",
599 | " | ,\n",
600 | " \n",
601 | " 102.\n",
602 | " Vertigo\n",
603 | " (1958)\n",
604 | " | ,\n",
605 | " \n",
606 | " 103.\n",
607 | " Le fabuleux destin d'Amélie Poulain\n",
608 | " (2001)\n",
609 | " | ,\n",
610 | " \n",
611 | " 104.\n",
612 | " Double Indemnity\n",
613 | " (1944)\n",
614 | " | ,\n",
615 | " \n",
616 | " 105.\n",
617 | " A Clockwork Orange\n",
618 | " (1971)\n",
619 | " | ,\n",
620 | " \n",
621 | " 106.\n",
622 | " Full Metal Jacket\n",
623 | " (1987)\n",
624 | " | ,\n",
625 | " \n",
626 | " 107.\n",
627 | " Scarface\n",
628 | " (1983)\n",
629 | " | ,\n",
630 | " \n",
631 | " 108.\n",
632 | " Hamilton\n",
633 | " (2020)\n",
634 | " | ,\n",
635 | " \n",
636 | " 109.\n",
637 | " Incendies\n",
638 | " (2010)\n",
639 | " | ,\n",
640 | " \n",
641 | " 110.\n",
642 | " Heat\n",
643 | " (1995)\n",
644 | " | ,\n",
645 | " \n",
646 | " 111.\n",
647 | " Up\n",
648 | " (2009)\n",
649 | " | ,\n",
650 | " \n",
651 | " 112.\n",
652 | " To Kill a Mockingbird\n",
653 | " (1962)\n",
654 | " | ,\n",
655 | " \n",
656 | " 113.\n",
657 | " The Sting\n",
658 | " (1973)\n",
659 | " | ,\n",
660 | " \n",
661 | " 114.\n",
662 | " Jodaeiye Nader az Simin\n",
663 | " (2011)\n",
664 | " | ,\n",
665 | " \n",
666 | " 115.\n",
667 | " Metropolis\n",
668 | " (1927)\n",
669 | " | ,\n",
670 | " \n",
671 | " 116.\n",
672 | " Indiana Jones and the Last Crusade\n",
673 | " (1989)\n",
674 | " | ,\n",
675 | " \n",
676 | " 117.\n",
677 | " Die Hard\n",
678 | " (1988)\n",
679 | " | ,\n",
680 | " \n",
681 | " 118.\n",
682 | " L.A. Confidential\n",
683 | " (1997)\n",
684 | " | ,\n",
685 | " \n",
686 | " 119.\n",
687 | " Snatch\n",
688 | " (2000)\n",
689 | " | ,\n",
690 | " \n",
691 | " 120.\n",
692 | " Ladri di biciclette\n",
693 | " (1948)\n",
694 | " | ,\n",
695 | " \n",
696 | " 121.\n",
697 | " Taare Zameen Par\n",
698 | " (2007)\n",
699 | " | ,\n",
700 | " \n",
701 | " 122.\n",
702 | " Taxi Driver\n",
703 | " (1976)\n",
704 | " | ,\n",
705 | " \n",
706 | " 123.\n",
707 | " 1917\n",
708 | " (2019)\n",
709 | " | ,\n",
710 | " \n",
711 | " 124.\n",
712 | " Dangal\n",
713 | " (2016)\n",
714 | " | ,\n",
715 | " \n",
716 | " 125.\n",
717 | " Der Untergang\n",
718 | " (2004)\n",
719 | " | ,\n",
720 | " \n",
721 | " 126.\n",
722 | " Top Gun: Maverick\n",
723 | " (2022)\n",
724 | " | ,\n",
725 | " \n",
726 | " 127.\n",
727 | " Per qualche dollaro in più\n",
728 | " (1965)\n",
729 | " | ,\n",
730 | " \n",
731 | " 128.\n",
732 | " Batman Begins\n",
733 | " (2005)\n",
734 | " | ,\n",
735 | " \n",
736 | " 129.\n",
737 | " The Kid\n",
738 | " (1921)\n",
739 | " | ,\n",
740 | " \n",
741 | " 130.\n",
742 | " Some Like It Hot\n",
743 | " (1959)\n",
744 | " | ,\n",
745 | " \n",
746 | " 131.\n",
747 | " The Father\n",
748 | " (2020)\n",
749 | " | ,\n",
750 | " \n",
751 | " 132.\n",
752 | " The Wolf of Wall Street\n",
753 | " (2013)\n",
754 | " | ,\n",
755 | " \n",
756 | " 133.\n",
757 | " All About Eve\n",
758 | " (1950)\n",
759 | " | ,\n",
760 | " \n",
761 | " 134.\n",
762 | " Green Book\n",
763 | " (2018)\n",
764 | " | ,\n",
765 | " \n",
766 | " 135.\n",
767 | " Judgment at Nuremberg\n",
768 | " (1961)\n",
769 | " | ,\n",
770 | " \n",
771 | " 136.\n",
772 | " The Truman Show\n",
773 | " (1998)\n",
774 | " | ,\n",
775 | " \n",
776 | " 137.\n",
777 | " Ran\n",
778 | " (1985)\n",
779 | " | ,\n",
780 | " \n",
781 | " 138.\n",
782 | " Casino\n",
783 | " (1995)\n",
784 | " | ,\n",
785 | " \n",
786 | " 139.\n",
787 | " There Will Be Blood\n",
788 | " (2007)\n",
789 | " | ,\n",
790 | " \n",
791 | " 140.\n",
792 | " Pan's Labyrinth\n",
793 | " (2006)\n",
794 | " | ,\n",
795 | " \n",
796 | " 141.\n",
797 | " Shutter Island\n",
798 | " (2010)\n",
799 | " | ,\n",
800 | " \n",
801 | " 142.\n",
802 | " Unforgiven\n",
803 | " (1992)\n",
804 | " | ,\n",
805 | " \n",
806 | " 143.\n",
807 | " The Sixth Sense\n",
808 | " (1999)\n",
809 | " | ,\n",
810 | " \n",
811 | " 144.\n",
812 | " Jurassic Park\n",
813 | " (1993)\n",
814 | " | ,\n",
815 | " \n",
816 | " 145.\n",
817 | " A Beautiful Mind\n",
818 | " (2001)\n",
819 | " | ,\n",
820 | " \n",
821 | " 146.\n",
822 | " The Treasure of the Sierra Madre\n",
823 | " (1948)\n",
824 | " | ,\n",
825 | " \n",
826 | " 147.\n",
827 | " Yôjinbô\n",
828 | " (1961)\n",
829 | " | ,\n",
830 | " \n",
831 | " 148.\n",
832 | " No Country for Old Men\n",
833 | " (2007)\n",
834 | " | ,\n",
835 | " \n",
836 | " 149.\n",
837 | " Monty Python and the Holy Grail\n",
838 | " (1975)\n",
839 | " | ,\n",
840 | " \n",
841 | " 150.\n",
842 | " Kill Bill: Vol. 1\n",
843 | " (2003)\n",
844 | " | ,\n",
845 | " \n",
846 | " 151.\n",
847 | " The Great Escape\n",
848 | " (1963)\n",
849 | " | ,\n",
850 | " \n",
851 | " 152.\n",
852 | " The Thing\n",
853 | " (1982)\n",
854 | " | ,\n",
855 | " \n",
856 | " 153.\n",
857 | " Rashômon\n",
858 | " (1950)\n",
859 | " | ,\n",
860 | " \n",
861 | " 154.\n",
862 | " Finding Nemo\n",
863 | " (2003)\n",
864 | " | ,\n",
865 | " \n",
866 | " 155.\n",
867 | " Spider-Man: No Way Home\n",
868 | " (2021)\n",
869 | " | ,\n",
870 | " \n",
871 | " 156.\n",
872 | " The Elephant Man\n",
873 | " (1980)\n",
874 | " | ,\n",
875 | " \n",
876 | " 157.\n",
877 | " Chinatown\n",
878 | " (1974)\n",
879 | " | ,\n",
880 | " \n",
881 | " 158.\n",
882 | " V for Vendetta\n",
883 | " (2005)\n",
884 | " | ,\n",
885 | " \n",
886 | " 159.\n",
887 | " Raging Bull\n",
888 | " (1980)\n",
889 | " | ,\n",
890 | " \n",
891 | " 160.\n",
892 | " Gone with the Wind\n",
893 | " (1939)\n",
894 | " | ,\n",
895 | " \n",
896 | " 161.\n",
897 | " Dial M for Murder\n",
898 | " (1954)\n",
899 | " | ,\n",
900 | " \n",
901 | " 162.\n",
902 | " Hauru no ugoku shiro\n",
903 | " (2004)\n",
904 | " | ,\n",
905 | " \n",
906 | " 163.\n",
907 | " Lock, Stock and Two Smoking Barrels\n",
908 | " (1998)\n",
909 | " | ,\n",
910 | " \n",
911 | " 164.\n",
912 | " Inside Out\n",
913 | " (2015)\n",
914 | " | ,\n",
915 | " \n",
916 | " 165.\n",
917 | " El secreto de sus ojos\n",
918 | " (2009)\n",
919 | " | ,\n",
920 | " \n",
921 | " 166.\n",
922 | " Three Billboards Outside Ebbing, Missouri\n",
923 | " (2017)\n",
924 | " | ,\n",
925 | " \n",
926 | " 167.\n",
927 | " Prisoners\n",
928 | " (2013)\n",
929 | " | ,\n",
930 | " \n",
931 | " 168.\n",
932 | " The Bridge on the River Kwai\n",
933 | " (1957)\n",
934 | " | ,\n",
935 | " \n",
936 | " 169.\n",
937 | " Trainspotting\n",
938 | " (1996)\n",
939 | " | ,\n",
940 | " \n",
941 | " 170.\n",
942 | " Fargo\n",
943 | " (1996)\n",
944 | " | ,\n",
945 | " \n",
946 | " 171.\n",
947 | " Warrior\n",
948 | " (2011)\n",
949 | " | ,\n",
950 | " \n",
951 | " 172.\n",
952 | " Gran Torino\n",
953 | " (2008)\n",
954 | " | ,\n",
955 | " \n",
956 | " 173.\n",
957 | " Catch Me If You Can\n",
958 | " (2002)\n",
959 | " | ,\n",
960 | " \n",
961 | " 174.\n",
962 | " Tonari no Totoro\n",
963 | " (1988)\n",
964 | " | ,\n",
965 | " \n",
966 | " 175.\n",
967 | " Million Dollar Baby\n",
968 | " (2004)\n",
969 | " | ,\n",
970 | " \n",
971 | " 176.\n",
972 | " Bacheha-Ye aseman\n",
973 | " (1997)\n",
974 | " | ,\n",
975 | " \n",
976 | " 177.\n",
977 | " Klaus\n",
978 | " (2019)\n",
979 | " | ,\n",
980 | " \n",
981 | " 178.\n",
982 | " Harry Potter and the Deathly Hallows: Part 2\n",
983 | " (2011)\n",
984 | " | ,\n",
985 | " \n",
986 | " 179.\n",
987 | " Blade Runner\n",
988 | " (1982)\n",
989 | " | ,\n",
990 | " \n",
991 | " 180.\n",
992 | " The Gold Rush\n",
993 | " (1925)\n",
994 | " | ,\n",
995 | " \n",
996 | " 181.\n",
997 | " 12 Years a Slave\n",
998 | " (2013)\n",
999 | " | ,\n",
1000 | " \n",
1001 | " 182.\n",
1002 | " Before Sunrise\n",
1003 | " (1995)\n",
1004 | " | ,\n",
1005 | " \n",
1006 | " 183.\n",
1007 | " The Grand Budapest Hotel\n",
1008 | " (2014)\n",
1009 | " | ,\n",
1010 | " \n",
1011 | " 184.\n",
1012 | " Ben-Hur\n",
1013 | " (1959)\n",
1014 | " | ,\n",
1015 | " \n",
1016 | " 185.\n",
1017 | " Gone Girl\n",
1018 | " (2014)\n",
1019 | " | ,\n",
1020 | " \n",
1021 | " 186.\n",
1022 | " On the Waterfront\n",
1023 | " (1954)\n",
1024 | " | ,\n",
1025 | " \n",
1026 | " 187.\n",
1027 | " Barry Lyndon\n",
1028 | " (1975)\n",
1029 | " | ,\n",
1030 | " \n",
1031 | " 188.\n",
1032 | " The General\n",
1033 | " (1926)\n",
1034 | " | ,\n",
1035 | " \n",
1036 | " 189.\n",
1037 | " Smultronstället\n",
1038 | " (1957)\n",
1039 | " | ,\n",
1040 | " \n",
1041 | " 190.\n",
1042 | " In the Name of the Father\n",
1043 | " (1993)\n",
1044 | " | ,\n",
1045 | " \n",
1046 | " 191.\n",
1047 | " The Deer Hunter\n",
1048 | " (1978)\n",
1049 | " | ,\n",
1050 | " \n",
1051 | " 192.\n",
1052 | " Hacksaw Ridge\n",
1053 | " (2016)\n",
1054 | " | ,\n",
1055 | " \n",
1056 | " 193.\n",
1057 | " The Third Man\n",
1058 | " (1949)\n",
1059 | " | ,\n",
1060 | " \n",
1061 | " 194.\n",
1062 | " Salinui chueok\n",
1063 | " (2003)\n",
1064 | " | ,\n",
1065 | " \n",
1066 | " 195.\n",
1067 | " Le salaire de la peur\n",
1068 | " (1953)\n",
1069 | " | ,\n",
1070 | " \n",
1071 | " 196.\n",
1072 | " Sherlock Jr.\n",
1073 | " (1924)\n",
1074 | " | ,\n",
1075 | " \n",
1076 | " 197.\n",
1077 | " Relatos salvajes\n",
1078 | " (2014)\n",
1079 | " | ,\n",
1080 | " \n",
1081 | " 198.\n",
1082 | " Mr. Smith Goes to Washington\n",
1083 | " (1939)\n",
1084 | " | ,\n",
1085 | " \n",
1086 | " 199.\n",
1087 | " Mad Max: Fury Road\n",
1088 | " (2015)\n",
1089 | " | ,\n",
1090 | " \n",
1091 | " 200.\n",
1092 | " Dead Poets Society\n",
1093 | " (1989)\n",
1094 | " | ,\n",
1095 | " \n",
1096 | " 201.\n",
1097 | " Mary and Max.\n",
1098 | " (2009)\n",
1099 | " | ,\n",
1100 | " \n",
1101 | " 202.\n",
1102 | " Monsters, Inc.\n",
1103 | " (2001)\n",
1104 | " | ,\n",
1105 | " \n",
1106 | " 203.\n",
1107 | " How to Train Your Dragon\n",
1108 | " (2010)\n",
1109 | " | ,\n",
1110 | " \n",
1111 | " 204.\n",
1112 | " Jaws\n",
1113 | " (1975)\n",
1114 | " | ,\n",
1115 | " \n",
1116 | " 205.\n",
1117 | " Guardians of the Galaxy Vol. 3\n",
1118 | " (2023)\n",
1119 | " | ,\n",
1120 | " \n",
1121 | " 206.\n",
1122 | " Det sjunde inseglet\n",
1123 | " (1957)\n",
1124 | " | ,\n",
1125 | " \n",
1126 | " 207.\n",
1127 | " Room\n",
1128 | " (2015)\n",
1129 | " | ,\n",
1130 | " \n",
1131 | " 208.\n",
1132 | " Tôkyô monogatari\n",
1133 | " (1953)\n",
1134 | " | ,\n",
1135 | " \n",
1136 | " 209.\n",
1137 | " The Big Lebowski\n",
1138 | " (1998)\n",
1139 | " | ,\n",
1140 | " \n",
1141 | " 210.\n",
1142 | " Ford v Ferrari\n",
1143 | " (2019)\n",
1144 | " | ,\n",
1145 | " \n",
1146 | " 211.\n",
1147 | " La passion de Jeanne d'Arc\n",
1148 | " (1928)\n",
1149 | " | ,\n",
1150 | " \n",
1151 | " 212.\n",
1152 | " Hotel Rwanda\n",
1153 | " (2004)\n",
1154 | " | ,\n",
1155 | " \n",
1156 | " 213.\n",
1157 | " Ratatouille\n",
1158 | " (2007)\n",
1159 | " | ,\n",
1160 | " \n",
1161 | " 214.\n",
1162 | " Rocky\n",
1163 | " (1976)\n",
1164 | " | ,\n",
1165 | " \n",
1166 | " 215.\n",
1167 | " Platoon\n",
1168 | " (1986)\n",
1169 | " | ,\n",
1170 | " \n",
1171 | " 216.\n",
1172 | " Logan\n",
1173 | " (2017)\n",
1174 | " | ,\n",
1175 | " \n",
1176 | " 217.\n",
1177 | " Spotlight\n",
1178 | " (2015)\n",
1179 | " | ,\n",
1180 | " \n",
1181 | " 218.\n",
1182 | " The Terminator\n",
1183 | " (1984)\n",
1184 | " | ,\n",
1185 | " \n",
1186 | " 219.\n",
1187 | " Jai Bhim\n",
1188 | " (2021)\n",
1189 | " | ,\n",
1190 | " \n",
1191 | " 220.\n",
1192 | " Before Sunset\n",
1193 | " (2004)\n",
1194 | " | ,\n",
1195 | " \n",
1196 | " 221.\n",
1197 | " Rush\n",
1198 | " (2013)\n",
1199 | " | ,\n",
1200 | " \n",
1201 | " 222.\n",
1202 | " Network\n",
1203 | " (1976)\n",
1204 | " | ,\n",
1205 | " \n",
1206 | " 223.\n",
1207 | " Stand by Me\n",
1208 | " (1986)\n",
1209 | " | ,\n",
1210 | " \n",
1211 | " 224.\n",
1212 | " The Best Years of Our Lives\n",
1213 | " (1946)\n",
1214 | " | ,\n",
1215 | " \n",
1216 | " 225.\n",
1217 | " Into the Wild\n",
1218 | " (2007)\n",
1219 | " | ,\n",
1220 | " \n",
1221 | " 226.\n",
1222 | " The Wizard of Oz\n",
1223 | " (1939)\n",
1224 | " | ,\n",
1225 | " \n",
1226 | " 227.\n",
1227 | " La haine\n",
1228 | " (1995)\n",
1229 | " | ,\n",
1230 | " \n",
1231 | " 228.\n",
1232 | " The Incredibles\n",
1233 | " (2004)\n",
1234 | " | ,\n",
1235 | " \n",
1236 | " 229.\n",
1237 | " The Exorcist\n",
1238 | " (1973)\n",
1239 | " | ,\n",
1240 | " \n",
1241 | " 230.\n",
1242 | " Pirates of the Caribbean: The Curse of the Black Pearl\n",
1243 | " (2003)\n",
1244 | " | ,\n",
1245 | " \n",
1246 | " 231.\n",
1247 | " To Be or Not to Be\n",
1248 | " (1942)\n",
1249 | " | ,\n",
1250 | " \n",
1251 | " 232.\n",
1252 | " Groundhog Day\n",
1253 | " (1993)\n",
1254 | " | ,\n",
1255 | " \n",
1256 | " 233.\n",
1257 | " Babam ve Oglum\n",
1258 | " (2005)\n",
1259 | " | ,\n",
1260 | " \n",
1261 | " 234.\n",
1262 | " La battaglia di Algeri\n",
1263 | " (1966)\n",
1264 | " | ,\n",
1265 | " \n",
1266 | " 235.\n",
1267 | " The Grapes of Wrath\n",
1268 | " (1940)\n",
1269 | " | ,\n",
1270 | " \n",
1271 | " 236.\n",
1272 | " Hachi: A Dog's Tale\n",
1273 | " (2009)\n",
1274 | " | ,\n",
1275 | " \n",
1276 | " 237.\n",
1277 | " Ah-ga-ssi\n",
1278 | " (2016)\n",
1279 | " | ,\n",
1280 | " \n",
1281 | " 238.\n",
1282 | " Pather Panchali\n",
1283 | " (1955)\n",
1284 | " | ,\n",
1285 | " \n",
1286 | " 239.\n",
1287 | " Amores perros\n",
1288 | " (2000)\n",
1289 | " | ,\n",
1290 | " \n",
1291 | " 240.\n",
1292 | " Rebecca\n",
1293 | " (1940)\n",
1294 | " | ,\n",
1295 | " \n",
1296 | " 241.\n",
1297 | " Cool Hand Luke\n",
1298 | " (1967)\n",
1299 | " | ,\n",
1300 | " \n",
1301 | " 242.\n",
1302 | " The Sound of Music\n",
1303 | " (1965)\n",
1304 | " | ,\n",
1305 | " \n",
1306 | " 243.\n",
1307 | " It Happened One Night\n",
1308 | " (1934)\n",
1309 | " | ,\n",
1310 | " \n",
1311 | " 244.\n",
1312 | " Les quatre cents coups\n",
1313 | " (1959)\n",
1314 | " | ,\n",
1315 | " \n",
1316 | " 245.\n",
1317 | " Persona\n",
1318 | " (1966)\n",
1319 | " | ,\n",
1320 | " \n",
1321 | " 246.\n",
1322 | " The Help\n",
1323 | " (2011)\n",
1324 | " | ,\n",
1325 | " \n",
1326 | " 247.\n",
1327 | " The Iron Giant\n",
1328 | " (1999)\n",
1329 | " | ,\n",
1330 | " \n",
1331 | " 248.\n",
1332 | " Life of Brian\n",
1333 | " (1979)\n",
1334 | " | ,\n",
1335 | " \n",
1336 | " 249.\n",
1337 | " Aladdin\n",
1338 | " (1992)\n",
1339 | " | ,\n",
1340 | " \n",
1341 | " 250.\n",
1342 | " Dances with Wolves\n",
1343 | " (1990)\n",
1344 | " | ]"
1345 | ]
1346 | },
1347 | "execution_count": 9,
1348 | "metadata": {},
1349 | "output_type": "execute_result"
1350 | }
1351 | ],
1352 | "source": [
1353 | "# scrap movie names\n",
1354 | "scraped_movies = soup.find_all('td', class_='titleColumn')\n",
1355 | "scraped_movies"
1356 | ]
1357 | },
1358 | {
1359 | "cell_type": "code",
1360 | "execution_count": 10,
1361 | "metadata": {},
1362 | "outputs": [
1363 | {
1364 | "data": {
1365 | "text/plain": [
1366 | "['1. The Shawshank Redemption(1994)',\n",
1367 | " '2. The Godfather(1972)',\n",
1368 | " '3. The Dark Knight(2008)',\n",
1369 | " '4. The Godfather Part II(1974)',\n",
1370 | " '5. 12 Angry Men(1957)',\n",
1371 | " \"6. Schindler's List(1993)\",\n",
1372 | " '7. The Lord of the Rings: The Return of the King(2003)',\n",
1373 | " '8. Pulp Fiction(1994)',\n",
1374 | " '9. The Lord of the Rings: The Fellowship of the Ring(2001)',\n",
1375 | " '10. Il buono, il brutto, il cattivo(1966)',\n",
1376 | " '11. Forrest Gump(1994)',\n",
1377 | " '12. Fight Club(1999)',\n",
1378 | " '13. Spider-Man: Across the Spider-Verse(2023)',\n",
1379 | " '14. The Lord of the Rings: The Two Towers(2002)',\n",
1380 | " '15. Inception(2010)',\n",
1381 | " '16. Star Wars: Episode V - The Empire Strikes Back(1980)',\n",
1382 | " '17. The Matrix(1999)',\n",
1383 | " '18. GoodFellas(1990)',\n",
1384 | " \"19. One Flew Over the Cuckoo's Nest(1975)\",\n",
1385 | " '20. Se7en(1995)',\n",
1386 | " \"21. It's a Wonderful Life(1946)\",\n",
1387 | " '22. Shichinin no samurai(1954)',\n",
1388 | " '23. The Silence of the Lambs(1991)',\n",
1389 | " '24. Saving Private Ryan(1998)',\n",
1390 | " '25. Cidade de Deus(2002)',\n",
1391 | " '26. Interstellar(2014)',\n",
1392 | " '27. La vita è bella(1997)',\n",
1393 | " '28. The Green Mile(1999)',\n",
1394 | " '29. Star Wars(1977)',\n",
1395 | " '30. Terminator 2: Judgment Day(1991)',\n",
1396 | " '31. Back to the Future(1985)',\n",
1397 | " '32. Sen to Chihiro no kamikakushi(2001)',\n",
1398 | " '33. The Pianist(2002)',\n",
1399 | " '34. Psycho(1960)',\n",
1400 | " '35. Gisaengchung(2019)',\n",
1401 | " '36. Léon(1994)',\n",
1402 | " '37. The Lion King(1994)',\n",
1403 | " '38. Gladiator(2000)',\n",
1404 | " '39. American History X(1998)',\n",
1405 | " '40. The Departed(2006)',\n",
1406 | " '41. Whiplash(2014)',\n",
1407 | " '42. The Prestige(2006)',\n",
1408 | " '43. The Usual Suspects(1995)',\n",
1409 | " '44. Casablanca(1942)',\n",
1410 | " '45. Hotaru no haka(1988)',\n",
1411 | " '46. Seppuku(1962)',\n",
1412 | " '47. The Intouchables(2011)',\n",
1413 | " '48. Modern Times(1936)',\n",
1414 | " '49. Once Upon a Time in the West(1968)',\n",
1415 | " '50. Nuovo Cinema Paradiso(1988)',\n",
1416 | " '51. Rear Window(1954)',\n",
1417 | " '52. Alien(1979)',\n",
1418 | " '53. City Lights(1931)',\n",
1419 | " '54. Apocalypse Now(1979)',\n",
1420 | " '55. Memento(2000)',\n",
1421 | " '56. Django Unchained(2012)',\n",
1422 | " '57. Raiders of the Lost Ark(1981)',\n",
1423 | " '58. WALL·E(2008)',\n",
1424 | " '59. The Lives of Others(2006)',\n",
1425 | " '60. Sunset Blvd.(1950)',\n",
1426 | " '61. Paths of Glory(1957)',\n",
1427 | " '62. Avengers: Infinity War(2018)',\n",
1428 | " '63. The Shining(1980)',\n",
1429 | " '64. The Great Dictator(1940)',\n",
1430 | " '65. Witness for the Prosecution(1957)',\n",
1431 | " '66. Spider-Man: Into the Spider-Verse(2018)',\n",
1432 | " '67. Aliens(1986)',\n",
1433 | " '68. American Beauty(1999)',\n",
1434 | " '69. The Dark Knight Rises(2012)',\n",
1435 | " '70. Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb(1964)',\n",
1436 | " '71. Inglourious Basterds(2009)',\n",
1437 | " '72. Oldeuboi(2003)',\n",
1438 | " '73. Coco(2017)',\n",
1439 | " '74. Amadeus(1984)',\n",
1440 | " '75. Toy Story(1995)',\n",
1441 | " '76. Braveheart(1995)',\n",
1442 | " '77. Das Boot(1981)',\n",
1443 | " '78. Joker(2019)',\n",
1444 | " '79. Avengers: Endgame(2019)',\n",
1445 | " '80. Mononoke-hime(1997)',\n",
1446 | " '81. Good Will Hunting(1997)',\n",
1447 | " '82. Once Upon a Time in America(1984)',\n",
1448 | " '83. Kimi no na wa.(2016)',\n",
1449 | " '84. 3 Idiots(2009)',\n",
1450 | " \"85. Singin' in the Rain(1952)\",\n",
1451 | " '86. Tengoku to jigoku(1963)',\n",
1452 | " '87. Requiem for a Dream(2000)',\n",
1453 | " '88. Capharnaüm(2018)',\n",
1454 | " '89. Toy Story 3(2010)',\n",
1455 | " '90. Star Wars: Episode VI - Return of the Jedi(1983)',\n",
1456 | " '91. Idi i smotri(1985)',\n",
1457 | " '92. Eternal Sunshine of the Spotless Mind(2004)',\n",
1458 | " '93. 2001: A Space Odyssey(1968)',\n",
1459 | " '94. Jagten(2012)',\n",
1460 | " '95. Reservoir Dogs(1992)',\n",
1461 | " '96. Lawrence of Arabia(1962)',\n",
1462 | " '97. Citizen Kane(1941)',\n",
1463 | " '98. Ikiru(1952)',\n",
1464 | " '99. M - Eine Stadt sucht einen Mörder(1931)',\n",
1465 | " '100. North by Northwest(1959)',\n",
1466 | " '101. The Apartment(1960)',\n",
1467 | " '102. Vertigo(1958)',\n",
1468 | " \"103. Le fabuleux destin d'Amélie Poulain(2001)\",\n",
1469 | " '104. Double Indemnity(1944)',\n",
1470 | " '105. A Clockwork Orange(1971)',\n",
1471 | " '106. Full Metal Jacket(1987)',\n",
1472 | " '107. Scarface(1983)',\n",
1473 | " '108. Hamilton(2020)',\n",
1474 | " '109. Incendies(2010)',\n",
1475 | " '110. Heat(1995)',\n",
1476 | " '111. Up(2009)',\n",
1477 | " '112. To Kill a Mockingbird(1962)',\n",
1478 | " '113. The Sting(1973)',\n",
1479 | " '114. Jodaeiye Nader az Simin(2011)',\n",
1480 | " '115. Metropolis(1927)',\n",
1481 | " '116. Indiana Jones and the Last Crusade(1989)',\n",
1482 | " '117. Die Hard(1988)',\n",
1483 | " '118. L.A. Confidential(1997)',\n",
1484 | " '119. Snatch(2000)',\n",
1485 | " '120. Ladri di biciclette(1948)',\n",
1486 | " '121. Taare Zameen Par(2007)',\n",
1487 | " '122. Taxi Driver(1976)',\n",
1488 | " '123. 1917(2019)',\n",
1489 | " '124. Dangal(2016)',\n",
1490 | " '125. Der Untergang(2004)',\n",
1491 | " '126. Top Gun: Maverick(2022)',\n",
1492 | " '127. Per qualche dollaro in più(1965)',\n",
1493 | " '128. Batman Begins(2005)',\n",
1494 | " '129. The Kid(1921)',\n",
1495 | " '130. Some Like It Hot(1959)',\n",
1496 | " '131. The Father(2020)',\n",
1497 | " '132. The Wolf of Wall Street(2013)',\n",
1498 | " '133. All About Eve(1950)',\n",
1499 | " '134. Green Book(2018)',\n",
1500 | " '135. Judgment at Nuremberg(1961)',\n",
1501 | " '136. The Truman Show(1998)',\n",
1502 | " '137. Ran(1985)',\n",
1503 | " '138. Casino(1995)',\n",
1504 | " '139. There Will Be Blood(2007)',\n",
1505 | " \"140. Pan's Labyrinth(2006)\",\n",
1506 | " '141. Shutter Island(2010)',\n",
1507 | " '142. Unforgiven(1992)',\n",
1508 | " '143. The Sixth Sense(1999)',\n",
1509 | " '144. Jurassic Park(1993)',\n",
1510 | " '145. A Beautiful Mind(2001)',\n",
1511 | " '146. The Treasure of the Sierra Madre(1948)',\n",
1512 | " '147. Yôjinbô(1961)',\n",
1513 | " '148. No Country for Old Men(2007)',\n",
1514 | " '149. Monty Python and the Holy Grail(1975)',\n",
1515 | " '150. Kill Bill: Vol. 1(2003)',\n",
1516 | " '151. The Great Escape(1963)',\n",
1517 | " '152. The Thing(1982)',\n",
1518 | " '153. Rashômon(1950)',\n",
1519 | " '154. Finding Nemo(2003)',\n",
1520 | " '155. Spider-Man: No Way Home(2021)',\n",
1521 | " '156. The Elephant Man(1980)',\n",
1522 | " '157. Chinatown(1974)',\n",
1523 | " '158. V for Vendetta(2005)',\n",
1524 | " '159. Raging Bull(1980)',\n",
1525 | " '160. Gone with the Wind(1939)',\n",
1526 | " '161. Dial M for Murder(1954)',\n",
1527 | " '162. Hauru no ugoku shiro(2004)',\n",
1528 | " '163. Lock, Stock and Two Smoking Barrels(1998)',\n",
1529 | " '164. Inside Out(2015)',\n",
1530 | " '165. El secreto de sus ojos(2009)',\n",
1531 | " '166. Three Billboards Outside Ebbing, Missouri(2017)',\n",
1532 | " '167. Prisoners(2013)',\n",
1533 | " '168. The Bridge on the River Kwai(1957)',\n",
1534 | " '169. Trainspotting(1996)',\n",
1535 | " '170. Fargo(1996)',\n",
1536 | " '171. Warrior(2011)',\n",
1537 | " '172. Gran Torino(2008)',\n",
1538 | " '173. Catch Me If You Can(2002)',\n",
1539 | " '174. Tonari no Totoro(1988)',\n",
1540 | " '175. Million Dollar Baby(2004)',\n",
1541 | " '176. Bacheha-Ye aseman(1997)',\n",
1542 | " '177. Klaus(2019)',\n",
1543 | " '178. Harry Potter and the Deathly Hallows: Part 2(2011)',\n",
1544 | " '179. Blade Runner(1982)',\n",
1545 | " '180. The Gold Rush(1925)',\n",
1546 | " '181. 12 Years a Slave(2013)',\n",
1547 | " '182. Before Sunrise(1995)',\n",
1548 | " '183. The Grand Budapest Hotel(2014)',\n",
1549 | " '184. Ben-Hur(1959)',\n",
1550 | " '185. Gone Girl(2014)',\n",
1551 | " '186. On the Waterfront(1954)',\n",
1552 | " '187. Barry Lyndon(1975)',\n",
1553 | " '188. The General(1926)',\n",
1554 | " '189. Smultronstället(1957)',\n",
1555 | " '190. In the Name of the Father(1993)',\n",
1556 | " '191. The Deer Hunter(1978)',\n",
1557 | " '192. Hacksaw Ridge(2016)',\n",
1558 | " '193. The Third Man(1949)',\n",
1559 | " '194. Salinui chueok(2003)',\n",
1560 | " '195. Le salaire de la peur(1953)',\n",
1561 | " '196. Sherlock Jr.(1924)',\n",
1562 | " '197. Relatos salvajes(2014)',\n",
1563 | " '198. Mr. Smith Goes to Washington(1939)',\n",
1564 | " '199. Mad Max: Fury Road(2015)',\n",
1565 | " '200. Dead Poets Society(1989)',\n",
1566 | " '201. Mary and Max.(2009)',\n",
1567 | " '202. Monsters, Inc.(2001)',\n",
1568 | " '203. How to Train Your Dragon(2010)',\n",
1569 | " '204. Jaws(1975)',\n",
1570 | " '205. Guardians of the Galaxy Vol. 3(2023)',\n",
1571 | " '206. Det sjunde inseglet(1957)',\n",
1572 | " '207. Room(2015)',\n",
1573 | " '208. Tôkyô monogatari(1953)',\n",
1574 | " '209. The Big Lebowski(1998)',\n",
1575 | " '210. Ford v Ferrari(2019)',\n",
1576 | " \"211. La passion de Jeanne d'Arc(1928)\",\n",
1577 | " '212. Hotel Rwanda(2004)',\n",
1578 | " '213. Ratatouille(2007)',\n",
1579 | " '214. Rocky(1976)',\n",
1580 | " '215. Platoon(1986)',\n",
1581 | " '216. Logan(2017)',\n",
1582 | " '217. Spotlight(2015)',\n",
1583 | " '218. The Terminator(1984)',\n",
1584 | " '219. Jai Bhim(2021)',\n",
1585 | " '220. Before Sunset(2004)',\n",
1586 | " '221. Rush(2013)',\n",
1587 | " '222. Network(1976)',\n",
1588 | " '223. Stand by Me(1986)',\n",
1589 | " '224. The Best Years of Our Lives(1946)',\n",
1590 | " '225. Into the Wild(2007)',\n",
1591 | " '226. The Wizard of Oz(1939)',\n",
1592 | " '227. La haine(1995)',\n",
1593 | " '228. The Incredibles(2004)',\n",
1594 | " '229. The Exorcist(1973)',\n",
1595 | " '230. Pirates of the Caribbean: The Curse of the Black Pearl(2003)',\n",
1596 | " '231. To Be or Not to Be(1942)',\n",
1597 | " '232. Groundhog Day(1993)',\n",
1598 | " '233. Babam ve Oglum(2005)',\n",
1599 | " '234. La battaglia di Algeri(1966)',\n",
1600 | " '235. The Grapes of Wrath(1940)',\n",
1601 | " \"236. Hachi: A Dog's Tale(2009)\",\n",
1602 | " '237. Ah-ga-ssi(2016)',\n",
1603 | " '238. Pather Panchali(1955)',\n",
1604 | " '239. Amores perros(2000)',\n",
1605 | " '240. Rebecca(1940)',\n",
1606 | " '241. Cool Hand Luke(1967)',\n",
1607 | " '242. The Sound of Music(1965)',\n",
1608 | " '243. It Happened One Night(1934)',\n",
1609 | " '244. Les quatre cents coups(1959)',\n",
1610 | " '245. Persona(1966)',\n",
1611 | " '246. The Help(2011)',\n",
1612 | " '247. The Iron Giant(1999)',\n",
1613 | " '248. Life of Brian(1979)',\n",
1614 | " '249. Aladdin(1992)',\n",
1615 | " '250. Dances with Wolves(1990)']"
1616 | ]
1617 | },
1618 | "execution_count": 10,
1619 | "metadata": {},
1620 | "output_type": "execute_result"
1621 | }
1622 | ],
1623 | "source": [
1624 | "# parse movie names\n",
1625 | "movies = []\n",
1626 | "for movie in scraped_movies:\n",
1627 | " movie = movie.get_text().replace('\\n', \"\")\n",
1628 | " movie = movie.strip(\" \")\n",
1629 | " movies.append(movie)\n",
1630 | "movies"
1631 | ]
1632 | },
1633 | {
1634 | "cell_type": "code",
1635 | "execution_count": 11,
1636 | "metadata": {},
1637 | "outputs": [
1638 | {
1639 | "data": {
1640 | "text/plain": [
1641 | "[\n",
1642 | " 9.2\n",
1643 | " | ,\n",
1644 | " \n",
1645 | " 9.2\n",
1646 | " | ,\n",
1647 | " \n",
1648 | " 9.0\n",
1649 | " | ,\n",
1650 | " \n",
1651 | " 9.0\n",
1652 | " | ,\n",
1653 | " \n",
1654 | " 9.0\n",
1655 | " | ,\n",
1656 | " \n",
1657 | " 8.9\n",
1658 | " | ,\n",
1659 | " \n",
1660 | " 8.9\n",
1661 | " | ,\n",
1662 | " \n",
1663 | " 8.8\n",
1664 | " | ,\n",
1665 | " \n",
1666 | " 8.8\n",
1667 | " | ,\n",
1668 | " \n",
1669 | " 8.8\n",
1670 | " | ,\n",
1671 | " \n",
1672 | " 8.8\n",
1673 | " | ,\n",
1674 | " \n",
1675 | " 8.7\n",
1676 | " | ,\n",
1677 | " \n",
1678 | " 8.7\n",
1679 | " | ,\n",
1680 | " \n",
1681 | " 8.7\n",
1682 | " | ,\n",
1683 | " \n",
1684 | " 8.7\n",
1685 | " | ,\n",
1686 | " \n",
1687 | " 8.7\n",
1688 | " | ,\n",
1689 | " \n",
1690 | " 8.7\n",
1691 | " | ,\n",
1692 | " \n",
1693 | " 8.7\n",
1694 | " | ,\n",
1695 | " \n",
1696 | " 8.6\n",
1697 | " | ,\n",
1698 | " \n",
1699 | " 8.6\n",
1700 | " | ,\n",
1701 | " \n",
1702 | " 8.6\n",
1703 | " | ,\n",
1704 | " \n",
1705 | " 8.6\n",
1706 | " | ,\n",
1707 | " \n",
1708 | " 8.6\n",
1709 | " | ,\n",
1710 | " \n",
1711 | " 8.6\n",
1712 | " | ,\n",
1713 | " \n",
1714 | " 8.6\n",
1715 | " | ,\n",
1716 | " \n",
1717 | " 8.6\n",
1718 | " | ,\n",
1719 | " \n",
1720 | " 8.6\n",
1721 | " | ,\n",
1722 | " \n",
1723 | " 8.6\n",
1724 | " | ,\n",
1725 | " \n",
1726 | " 8.5\n",
1727 | " | ,\n",
1728 | " \n",
1729 | " 8.5\n",
1730 | " | ,\n",
1731 | " \n",
1732 | " 8.5\n",
1733 | " | ,\n",
1734 | " \n",
1735 | " 8.5\n",
1736 | " | ,\n",
1737 | " \n",
1738 | " 8.5\n",
1739 | " | ,\n",
1740 | " \n",
1741 | " 8.5\n",
1742 | " | ,\n",
1743 | " \n",
1744 | " 8.5\n",
1745 | " | ,\n",
1746 | " \n",
1747 | " 8.5\n",
1748 | " | ,\n",
1749 | " \n",
1750 | " 8.5\n",
1751 | " | ,\n",
1752 | " \n",
1753 | " 8.5\n",
1754 | " | ,\n",
1755 | " \n",
1756 | " 8.5\n",
1757 | " | ,\n",
1758 | " \n",
1759 | " 8.5\n",
1760 | " | ,\n",
1761 | " \n",
1762 | " 8.5\n",
1763 | " | ,\n",
1764 | " \n",
1765 | " 8.5\n",
1766 | " | ,\n",
1767 | " \n",
1768 | " 8.5\n",
1769 | " | ,\n",
1770 | " \n",
1771 | " 8.5\n",
1772 | " | ,\n",
1773 | " \n",
1774 | " 8.5\n",
1775 | " | ,\n",
1776 | " \n",
1777 | " 8.5\n",
1778 | " | ,\n",
1779 | " \n",
1780 | " 8.5\n",
1781 | " | ,\n",
1782 | " \n",
1783 | " 8.4\n",
1784 | " | ,\n",
1785 | " \n",
1786 | " 8.4\n",
1787 | " | ,\n",
1788 | " \n",
1789 | " 8.4\n",
1790 | " | ,\n",
1791 | " \n",
1792 | " 8.4\n",
1793 | " | ,\n",
1794 | " \n",
1795 | " 8.4\n",
1796 | " | ,\n",
1797 | " \n",
1798 | " 8.4\n",
1799 | " | ,\n",
1800 | " \n",
1801 | " 8.4\n",
1802 | " | ,\n",
1803 | " \n",
1804 | " 8.4\n",
1805 | " | ,\n",
1806 | " \n",
1807 | " 8.4\n",
1808 | " | ,\n",
1809 | " \n",
1810 | " 8.4\n",
1811 | " | ,\n",
1812 | " \n",
1813 | " 8.4\n",
1814 | " | ,\n",
1815 | " \n",
1816 | " 8.4\n",
1817 | " | ,\n",
1818 | " \n",
1819 | " 8.4\n",
1820 | " | ,\n",
1821 | " \n",
1822 | " 8.4\n",
1823 | " | ,\n",
1824 | " \n",
1825 | " 8.4\n",
1826 | " | ,\n",
1827 | " \n",
1828 | " 8.4\n",
1829 | " | ,\n",
1830 | " \n",
1831 | " 8.4\n",
1832 | " | ,\n",
1833 | " \n",
1834 | " 8.4\n",
1835 | " | ,\n",
1836 | " \n",
1837 | " 8.4\n",
1838 | " | ,\n",
1839 | " \n",
1840 | " 8.3\n",
1841 | " | ,\n",
1842 | " \n",
1843 | " 8.3\n",
1844 | " | ,\n",
1845 | " \n",
1846 | " 8.3\n",
1847 | " | ,\n",
1848 | " \n",
1849 | " 8.3\n",
1850 | " | ,\n",
1851 | " \n",
1852 | " 8.3\n",
1853 | " | ,\n",
1854 | " \n",
1855 | " 8.3\n",
1856 | " | ,\n",
1857 | " \n",
1858 | " 8.3\n",
1859 | " | ,\n",
1860 | " \n",
1861 | " 8.3\n",
1862 | " | ,\n",
1863 | " \n",
1864 | " 8.3\n",
1865 | " | ,\n",
1866 | " \n",
1867 | " 8.3\n",
1868 | " | ,\n",
1869 | " \n",
1870 | " 8.3\n",
1871 | " | ,\n",
1872 | " \n",
1873 | " 8.3\n",
1874 | " | ,\n",
1875 | " \n",
1876 | " 8.3\n",
1877 | " | ,\n",
1878 | " \n",
1879 | " 8.3\n",
1880 | " | ,\n",
1881 | " \n",
1882 | " 8.3\n",
1883 | " | ,\n",
1884 | " \n",
1885 | " 8.3\n",
1886 | " | ,\n",
1887 | " \n",
1888 | " 8.3\n",
1889 | " | ,\n",
1890 | " \n",
1891 | " 8.3\n",
1892 | " | ,\n",
1893 | " \n",
1894 | " 8.3\n",
1895 | " | ,\n",
1896 | " \n",
1897 | " 8.3\n",
1898 | " | ,\n",
1899 | " \n",
1900 | " 8.3\n",
1901 | " | ,\n",
1902 | " \n",
1903 | " 8.3\n",
1904 | " | ,\n",
1905 | " \n",
1906 | " 8.3\n",
1907 | " | ,\n",
1908 | " \n",
1909 | " 8.3\n",
1910 | " | ,\n",
1911 | " \n",
1912 | " 8.3\n",
1913 | " | ,\n",
1914 | " \n",
1915 | " 8.3\n",
1916 | " | ,\n",
1917 | " \n",
1918 | " 8.3\n",
1919 | " | ,\n",
1920 | " \n",
1921 | " 8.3\n",
1922 | " | ,\n",
1923 | " \n",
1924 | " 8.3\n",
1925 | " | ,\n",
1926 | " \n",
1927 | " 8.2\n",
1928 | " | ,\n",
1929 | " \n",
1930 | " 8.2\n",
1931 | " | ,\n",
1932 | " \n",
1933 | " 8.2\n",
1934 | " | ,\n",
1935 | " \n",
1936 | " 8.2\n",
1937 | " | ,\n",
1938 | " \n",
1939 | " 8.2\n",
1940 | " | ,\n",
1941 | " \n",
1942 | " 8.2\n",
1943 | " | ,\n",
1944 | " \n",
1945 | " 8.2\n",
1946 | " | ,\n",
1947 | " \n",
1948 | " 8.2\n",
1949 | " | ,\n",
1950 | " \n",
1951 | " 8.2\n",
1952 | " | ,\n",
1953 | " \n",
1954 | " 8.2\n",
1955 | " | ,\n",
1956 | " \n",
1957 | " 8.2\n",
1958 | " | ,\n",
1959 | " \n",
1960 | " 8.2\n",
1961 | " | ,\n",
1962 | " \n",
1963 | " 8.2\n",
1964 | " | ,\n",
1965 | " \n",
1966 | " 8.2\n",
1967 | " | ,\n",
1968 | " \n",
1969 | " 8.2\n",
1970 | " | ,\n",
1971 | " \n",
1972 | " 8.2\n",
1973 | " | ,\n",
1974 | " \n",
1975 | " 8.2\n",
1976 | " | ,\n",
1977 | " \n",
1978 | " 8.2\n",
1979 | " | ,\n",
1980 | " \n",
1981 | " 8.2\n",
1982 | " | ,\n",
1983 | " \n",
1984 | " 8.2\n",
1985 | " | ,\n",
1986 | " \n",
1987 | " 8.2\n",
1988 | " | ,\n",
1989 | " \n",
1990 | " 8.2\n",
1991 | " | ,\n",
1992 | " \n",
1993 | " 8.2\n",
1994 | " | ,\n",
1995 | " \n",
1996 | " 8.2\n",
1997 | " | ,\n",
1998 | " \n",
1999 | " 8.2\n",
2000 | " | ,\n",
2001 | " \n",
2002 | " 8.2\n",
2003 | " | ,\n",
2004 | " \n",
2005 | " 8.2\n",
2006 | " | ,\n",
2007 | " \n",
2008 | " 8.2\n",
2009 | " | ,\n",
2010 | " \n",
2011 | " 8.2\n",
2012 | " | ,\n",
2013 | " \n",
2014 | " 8.2\n",
2015 | " | ,\n",
2016 | " \n",
2017 | " 8.2\n",
2018 | " | ,\n",
2019 | " \n",
2020 | " 8.2\n",
2021 | " | ,\n",
2022 | " \n",
2023 | " 8.2\n",
2024 | " | ,\n",
2025 | " \n",
2026 | " 8.2\n",
2027 | " | ,\n",
2028 | " \n",
2029 | " 8.2\n",
2030 | " | ,\n",
2031 | " \n",
2032 | " 8.2\n",
2033 | " | ,\n",
2034 | " \n",
2035 | " 8.2\n",
2036 | " | ,\n",
2037 | " \n",
2038 | " 8.2\n",
2039 | " | ,\n",
2040 | " \n",
2041 | " 8.2\n",
2042 | " | ,\n",
2043 | " \n",
2044 | " 8.2\n",
2045 | " | ,\n",
2046 | " \n",
2047 | " 8.2\n",
2048 | " | ,\n",
2049 | " \n",
2050 | " 8.2\n",
2051 | " | ,\n",
2052 | " \n",
2053 | " 8.2\n",
2054 | " | ,\n",
2055 | " \n",
2056 | " 8.2\n",
2057 | " | ,\n",
2058 | " \n",
2059 | " 8.2\n",
2060 | " | ,\n",
2061 | " \n",
2062 | " 8.2\n",
2063 | " | ,\n",
2064 | " \n",
2065 | " 8.2\n",
2066 | " | ,\n",
2067 | " \n",
2068 | " 8.2\n",
2069 | " | ,\n",
2070 | " \n",
2071 | " 8.2\n",
2072 | " | ,\n",
2073 | " \n",
2074 | " 8.2\n",
2075 | " | ,\n",
2076 | " \n",
2077 | " 8.1\n",
2078 | " | ,\n",
2079 | " \n",
2080 | " 8.1\n",
2081 | " | ,\n",
2082 | " \n",
2083 | " 8.1\n",
2084 | " | ,\n",
2085 | " \n",
2086 | " 8.1\n",
2087 | " | ,\n",
2088 | " \n",
2089 | " 8.1\n",
2090 | " | ,\n",
2091 | " \n",
2092 | " 8.1\n",
2093 | " | ,\n",
2094 | " \n",
2095 | " 8.1\n",
2096 | " | ,\n",
2097 | " \n",
2098 | " 8.1\n",
2099 | " | ,\n",
2100 | " \n",
2101 | " 8.1\n",
2102 | " | ,\n",
2103 | " \n",
2104 | " 8.1\n",
2105 | " | ,\n",
2106 | " \n",
2107 | " 8.1\n",
2108 | " | ,\n",
2109 | " \n",
2110 | " 8.1\n",
2111 | " | ,\n",
2112 | " \n",
2113 | " 8.1\n",
2114 | " | ,\n",
2115 | " \n",
2116 | " 8.1\n",
2117 | " | ,\n",
2118 | " \n",
2119 | " 8.1\n",
2120 | " | ,\n",
2121 | " \n",
2122 | " 8.1\n",
2123 | " | ,\n",
2124 | " \n",
2125 | " 8.1\n",
2126 | " | ,\n",
2127 | " \n",
2128 | " 8.1\n",
2129 | " | ,\n",
2130 | " \n",
2131 | " 8.1\n",
2132 | " | ,\n",
2133 | " \n",
2134 | " 8.1\n",
2135 | " | ,\n",
2136 | " \n",
2137 | " 8.1\n",
2138 | " | ,\n",
2139 | " \n",
2140 | " 8.1\n",
2141 | " | ,\n",
2142 | " \n",
2143 | " 8.1\n",
2144 | " | ,\n",
2145 | " \n",
2146 | " 8.1\n",
2147 | " | ,\n",
2148 | " \n",
2149 | " 8.1\n",
2150 | " | ,\n",
2151 | " \n",
2152 | " 8.1\n",
2153 | " | ,\n",
2154 | " \n",
2155 | " 8.1\n",
2156 | " | ,\n",
2157 | " \n",
2158 | " 8.1\n",
2159 | " | ,\n",
2160 | " \n",
2161 | " 8.1\n",
2162 | " | ,\n",
2163 | " \n",
2164 | " 8.1\n",
2165 | " | ,\n",
2166 | " \n",
2167 | " 8.1\n",
2168 | " | ,\n",
2169 | " \n",
2170 | " 8.1\n",
2171 | " | ,\n",
2172 | " \n",
2173 | " 8.1\n",
2174 | " | ,\n",
2175 | " \n",
2176 | " 8.1\n",
2177 | " | ,\n",
2178 | " \n",
2179 | " 8.1\n",
2180 | " | ,\n",
2181 | " \n",
2182 | " 8.1\n",
2183 | " | ,\n",
2184 | " \n",
2185 | " 8.1\n",
2186 | " | ,\n",
2187 | " \n",
2188 | " 8.1\n",
2189 | " | ,\n",
2190 | " \n",
2191 | " 8.1\n",
2192 | " | ,\n",
2193 | " \n",
2194 | " 8.1\n",
2195 | " | ,\n",
2196 | " \n",
2197 | " 8.1\n",
2198 | " | ,\n",
2199 | " \n",
2200 | " 8.1\n",
2201 | " | ,\n",
2202 | " \n",
2203 | " 8.1\n",
2204 | " | ,\n",
2205 | " \n",
2206 | " 8.1\n",
2207 | " | ,\n",
2208 | " \n",
2209 | " 8.1\n",
2210 | " | ,\n",
2211 | " \n",
2212 | " 8.1\n",
2213 | " | ,\n",
2214 | " \n",
2215 | " 8.1\n",
2216 | " | ,\n",
2217 | " \n",
2218 | " 8.1\n",
2219 | " | ,\n",
2220 | " \n",
2221 | " 8.1\n",
2222 | " | ,\n",
2223 | " \n",
2224 | " 8.1\n",
2225 | " | ,\n",
2226 | " \n",
2227 | " 8.1\n",
2228 | " | ,\n",
2229 | " \n",
2230 | " 8.1\n",
2231 | " | ,\n",
2232 | " \n",
2233 | " 8.1\n",
2234 | " | ,\n",
2235 | " \n",
2236 | " 8.1\n",
2237 | " | ,\n",
2238 | " \n",
2239 | " 8.1\n",
2240 | " | ,\n",
2241 | " \n",
2242 | " 8.1\n",
2243 | " | ,\n",
2244 | " \n",
2245 | " 8.1\n",
2246 | " | ,\n",
2247 | " \n",
2248 | " 8.1\n",
2249 | " | ,\n",
2250 | " \n",
2251 | " 8.1\n",
2252 | " | ,\n",
2253 | " \n",
2254 | " 8.1\n",
2255 | " | ,\n",
2256 | " \n",
2257 | " 8.1\n",
2258 | " | ,\n",
2259 | " \n",
2260 | " 8.1\n",
2261 | " | ,\n",
2262 | " \n",
2263 | " 8.1\n",
2264 | " | ,\n",
2265 | " \n",
2266 | " 8.1\n",
2267 | " | ,\n",
2268 | " \n",
2269 | " 8.1\n",
2270 | " | ,\n",
2271 | " \n",
2272 | " 8.0\n",
2273 | " | ,\n",
2274 | " \n",
2275 | " 8.0\n",
2276 | " | ,\n",
2277 | " \n",
2278 | " 8.0\n",
2279 | " | ,\n",
2280 | " \n",
2281 | " 8.0\n",
2282 | " | ,\n",
2283 | " \n",
2284 | " 8.0\n",
2285 | " | ,\n",
2286 | " \n",
2287 | " 8.0\n",
2288 | " | ,\n",
2289 | " \n",
2290 | " 8.0\n",
2291 | " | ,\n",
2292 | " \n",
2293 | " 8.0\n",
2294 | " | ,\n",
2295 | " \n",
2296 | " 8.0\n",
2297 | " | ,\n",
2298 | " \n",
2299 | " 8.0\n",
2300 | " | ,\n",
2301 | " \n",
2302 | " 8.0\n",
2303 | " | ,\n",
2304 | " \n",
2305 | " 8.0\n",
2306 | " | ,\n",
2307 | " \n",
2308 | " 8.0\n",
2309 | " | ,\n",
2310 | " \n",
2311 | " 8.0\n",
2312 | " | ,\n",
2313 | " \n",
2314 | " 8.0\n",
2315 | " | ,\n",
2316 | " \n",
2317 | " 8.0\n",
2318 | " | ,\n",
2319 | " \n",
2320 | " 8.0\n",
2321 | " | ,\n",
2322 | " \n",
2323 | " 8.0\n",
2324 | " | ,\n",
2325 | " \n",
2326 | " 8.0\n",
2327 | " | ,\n",
2328 | " \n",
2329 | " 8.0\n",
2330 | " | ,\n",
2331 | " \n",
2332 | " 8.0\n",
2333 | " | ,\n",
2334 | " \n",
2335 | " 8.0\n",
2336 | " | ,\n",
2337 | " \n",
2338 | " 8.0\n",
2339 | " | ,\n",
2340 | " \n",
2341 | " 8.0\n",
2342 | " | ,\n",
2343 | " \n",
2344 | " 8.0\n",
2345 | " | ,\n",
2346 | " \n",
2347 | " 8.0\n",
2348 | " | ,\n",
2349 | " \n",
2350 | " 8.0\n",
2351 | " | ,\n",
2352 | " \n",
2353 | " 8.0\n",
2354 | " | ,\n",
2355 | " \n",
2356 | " 8.0\n",
2357 | " | ,\n",
2358 | " \n",
2359 | " 8.0\n",
2360 | " | ,\n",
2361 | " \n",
2362 | " 8.0\n",
2363 | " | ,\n",
2364 | " \n",
2365 | " 8.0\n",
2366 | " | ,\n",
2367 | " \n",
2368 | " 8.0\n",
2369 | " | ,\n",
2370 | " \n",
2371 | " 8.0\n",
2372 | " | ,\n",
2373 | " \n",
2374 | " 8.0\n",
2375 | " | ,\n",
2376 | " \n",
2377 | " 8.0\n",
2378 | " | ,\n",
2379 | " \n",
2380 | " 8.0\n",
2381 | " | ,\n",
2382 | " \n",
2383 | " 8.0\n",
2384 | " | ,\n",
2385 | " \n",
2386 | " 8.0\n",
2387 | " | ,\n",
2388 | " \n",
2389 | " 8.0\n",
2390 | " | ]"
2391 | ]
2392 | },
2393 | "execution_count": 11,
2394 | "metadata": {},
2395 | "output_type": "execute_result"
2396 | }
2397 | ],
2398 | "source": [
2399 | "# scrap rating for movies\n",
2400 | "scraped_ratings = soup.find_all('td', class_='ratingColumn imdbRating')\n",
2401 | "scraped_ratings"
2402 | ]
2403 | },
2404 | {
2405 | "cell_type": "code",
2406 | "execution_count": 12,
2407 | "metadata": {},
2408 | "outputs": [
2409 | {
2410 | "data": {
2411 | "text/plain": [
2412 | "['9.2',\n",
2413 | " '9.2',\n",
2414 | " '9.0',\n",
2415 | " '9.0',\n",
2416 | " '9.0',\n",
2417 | " '8.9',\n",
2418 | " '8.9',\n",
2419 | " '8.8',\n",
2420 | " '8.8',\n",
2421 | " '8.8',\n",
2422 | " '8.8',\n",
2423 | " '8.7',\n",
2424 | " '8.7',\n",
2425 | " '8.7',\n",
2426 | " '8.7',\n",
2427 | " '8.7',\n",
2428 | " '8.7',\n",
2429 | " '8.7',\n",
2430 | " '8.6',\n",
2431 | " '8.6',\n",
2432 | " '8.6',\n",
2433 | " '8.6',\n",
2434 | " '8.6',\n",
2435 | " '8.6',\n",
2436 | " '8.6',\n",
2437 | " '8.6',\n",
2438 | " '8.6',\n",
2439 | " '8.6',\n",
2440 | " '8.5',\n",
2441 | " '8.5',\n",
2442 | " '8.5',\n",
2443 | " '8.5',\n",
2444 | " '8.5',\n",
2445 | " '8.5',\n",
2446 | " '8.5',\n",
2447 | " '8.5',\n",
2448 | " '8.5',\n",
2449 | " '8.5',\n",
2450 | " '8.5',\n",
2451 | " '8.5',\n",
2452 | " '8.5',\n",
2453 | " '8.5',\n",
2454 | " '8.5',\n",
2455 | " '8.5',\n",
2456 | " '8.5',\n",
2457 | " '8.5',\n",
2458 | " '8.5',\n",
2459 | " '8.4',\n",
2460 | " '8.4',\n",
2461 | " '8.4',\n",
2462 | " '8.4',\n",
2463 | " '8.4',\n",
2464 | " '8.4',\n",
2465 | " '8.4',\n",
2466 | " '8.4',\n",
2467 | " '8.4',\n",
2468 | " '8.4',\n",
2469 | " '8.4',\n",
2470 | " '8.4',\n",
2471 | " '8.4',\n",
2472 | " '8.4',\n",
2473 | " '8.4',\n",
2474 | " '8.4',\n",
2475 | " '8.4',\n",
2476 | " '8.4',\n",
2477 | " '8.4',\n",
2478 | " '8.3',\n",
2479 | " '8.3',\n",
2480 | " '8.3',\n",
2481 | " '8.3',\n",
2482 | " '8.3',\n",
2483 | " '8.3',\n",
2484 | " '8.3',\n",
2485 | " '8.3',\n",
2486 | " '8.3',\n",
2487 | " '8.3',\n",
2488 | " '8.3',\n",
2489 | " '8.3',\n",
2490 | " '8.3',\n",
2491 | " '8.3',\n",
2492 | " '8.3',\n",
2493 | " '8.3',\n",
2494 | " '8.3',\n",
2495 | " '8.3',\n",
2496 | " '8.3',\n",
2497 | " '8.3',\n",
2498 | " '8.3',\n",
2499 | " '8.3',\n",
2500 | " '8.3',\n",
2501 | " '8.3',\n",
2502 | " '8.3',\n",
2503 | " '8.3',\n",
2504 | " '8.3',\n",
2505 | " '8.3',\n",
2506 | " '8.3',\n",
2507 | " '8.2',\n",
2508 | " '8.2',\n",
2509 | " '8.2',\n",
2510 | " '8.2',\n",
2511 | " '8.2',\n",
2512 | " '8.2',\n",
2513 | " '8.2',\n",
2514 | " '8.2',\n",
2515 | " '8.2',\n",
2516 | " '8.2',\n",
2517 | " '8.2',\n",
2518 | " '8.2',\n",
2519 | " '8.2',\n",
2520 | " '8.2',\n",
2521 | " '8.2',\n",
2522 | " '8.2',\n",
2523 | " '8.2',\n",
2524 | " '8.2',\n",
2525 | " '8.2',\n",
2526 | " '8.2',\n",
2527 | " '8.2',\n",
2528 | " '8.2',\n",
2529 | " '8.2',\n",
2530 | " '8.2',\n",
2531 | " '8.2',\n",
2532 | " '8.2',\n",
2533 | " '8.2',\n",
2534 | " '8.2',\n",
2535 | " '8.2',\n",
2536 | " '8.2',\n",
2537 | " '8.2',\n",
2538 | " '8.2',\n",
2539 | " '8.2',\n",
2540 | " '8.2',\n",
2541 | " '8.2',\n",
2542 | " '8.2',\n",
2543 | " '8.2',\n",
2544 | " '8.2',\n",
2545 | " '8.2',\n",
2546 | " '8.2',\n",
2547 | " '8.2',\n",
2548 | " '8.2',\n",
2549 | " '8.2',\n",
2550 | " '8.2',\n",
2551 | " '8.2',\n",
2552 | " '8.2',\n",
2553 | " '8.2',\n",
2554 | " '8.2',\n",
2555 | " '8.2',\n",
2556 | " '8.2',\n",
2557 | " '8.1',\n",
2558 | " '8.1',\n",
2559 | " '8.1',\n",
2560 | " '8.1',\n",
2561 | " '8.1',\n",
2562 | " '8.1',\n",
2563 | " '8.1',\n",
2564 | " '8.1',\n",
2565 | " '8.1',\n",
2566 | " '8.1',\n",
2567 | " '8.1',\n",
2568 | " '8.1',\n",
2569 | " '8.1',\n",
2570 | " '8.1',\n",
2571 | " '8.1',\n",
2572 | " '8.1',\n",
2573 | " '8.1',\n",
2574 | " '8.1',\n",
2575 | " '8.1',\n",
2576 | " '8.1',\n",
2577 | " '8.1',\n",
2578 | " '8.1',\n",
2579 | " '8.1',\n",
2580 | " '8.1',\n",
2581 | " '8.1',\n",
2582 | " '8.1',\n",
2583 | " '8.1',\n",
2584 | " '8.1',\n",
2585 | " '8.1',\n",
2586 | " '8.1',\n",
2587 | " '8.1',\n",
2588 | " '8.1',\n",
2589 | " '8.1',\n",
2590 | " '8.1',\n",
2591 | " '8.1',\n",
2592 | " '8.1',\n",
2593 | " '8.1',\n",
2594 | " '8.1',\n",
2595 | " '8.1',\n",
2596 | " '8.1',\n",
2597 | " '8.1',\n",
2598 | " '8.1',\n",
2599 | " '8.1',\n",
2600 | " '8.1',\n",
2601 | " '8.1',\n",
2602 | " '8.1',\n",
2603 | " '8.1',\n",
2604 | " '8.1',\n",
2605 | " '8.1',\n",
2606 | " '8.1',\n",
2607 | " '8.1',\n",
2608 | " '8.1',\n",
2609 | " '8.1',\n",
2610 | " '8.1',\n",
2611 | " '8.1',\n",
2612 | " '8.1',\n",
2613 | " '8.1',\n",
2614 | " '8.1',\n",
2615 | " '8.1',\n",
2616 | " '8.1',\n",
2617 | " '8.1',\n",
2618 | " '8.1',\n",
2619 | " '8.1',\n",
2620 | " '8.1',\n",
2621 | " '8.1',\n",
2622 | " '8.0',\n",
2623 | " '8.0',\n",
2624 | " '8.0',\n",
2625 | " '8.0',\n",
2626 | " '8.0',\n",
2627 | " '8.0',\n",
2628 | " '8.0',\n",
2629 | " '8.0',\n",
2630 | " '8.0',\n",
2631 | " '8.0',\n",
2632 | " '8.0',\n",
2633 | " '8.0',\n",
2634 | " '8.0',\n",
2635 | " '8.0',\n",
2636 | " '8.0',\n",
2637 | " '8.0',\n",
2638 | " '8.0',\n",
2639 | " '8.0',\n",
2640 | " '8.0',\n",
2641 | " '8.0',\n",
2642 | " '8.0',\n",
2643 | " '8.0',\n",
2644 | " '8.0',\n",
2645 | " '8.0',\n",
2646 | " '8.0',\n",
2647 | " '8.0',\n",
2648 | " '8.0',\n",
2649 | " '8.0',\n",
2650 | " '8.0',\n",
2651 | " '8.0',\n",
2652 | " '8.0',\n",
2653 | " '8.0',\n",
2654 | " '8.0',\n",
2655 | " '8.0',\n",
2656 | " '8.0',\n",
2657 | " '8.0',\n",
2658 | " '8.0',\n",
2659 | " '8.0',\n",
2660 | " '8.0',\n",
2661 | " '8.0']"
2662 | ]
2663 | },
2664 | "execution_count": 12,
2665 | "metadata": {},
2666 | "output_type": "execute_result"
2667 | }
2668 | ],
2669 | "source": [
2670 | "# parse ratings\n",
2671 | "ratings = []\n",
2672 | "for rating in scraped_ratings:\n",
2673 | " rating = rating.get_text().replace('\\n', '')\n",
2674 | " ratings.append(rating)\n",
2675 | "ratings"
2676 | ]
2677 | },
2678 | {
2679 | "cell_type": "markdown",
2680 | "metadata": {},
2681 | "source": [
2682 | "## Store the Scraped Data"
2683 | ]
2684 | },
2685 | {
2686 | "cell_type": "code",
2687 | "execution_count": 13,
2688 | "metadata": {},
2689 | "outputs": [
2690 | {
2691 | "data": {
2692 | "text/html": [
2693 | "\n",
2694 | "\n",
2707 | "
\n",
2708 | " \n",
2709 | " \n",
2710 | " | \n",
2711 | " Movie Names | \n",
2712 | " Ratings | \n",
2713 | "
\n",
2714 | " \n",
2715 | " \n",
2716 | " \n",
2717 | " 0 | \n",
2718 | " 1. The Shawshank Redemption(1994) | \n",
2719 | " 9.2 | \n",
2720 | "
\n",
2721 | " \n",
2722 | " 1 | \n",
2723 | " 2. The Godfather(1972) | \n",
2724 | " 9.2 | \n",
2725 | "
\n",
2726 | " \n",
2727 | " 2 | \n",
2728 | " 3. The Dark Knight(2008) | \n",
2729 | " 9.0 | \n",
2730 | "
\n",
2731 | " \n",
2732 | " 3 | \n",
2733 | " 4. The Godfather Part II(1974) | \n",
2734 | " 9.0 | \n",
2735 | "
\n",
2736 | " \n",
2737 | " 4 | \n",
2738 | " 5. 12 Angry Men(1957) | \n",
2739 | " 9.0 | \n",
2740 | "
\n",
2741 | " \n",
2742 | "
\n",
2743 | "
"
2744 | ],
2745 | "text/plain": [
2746 | " Movie Names Ratings\n",
2747 | "0 1. The Shawshank Redemption(1994) 9.2\n",
2748 | "1 2. The Godfather(1972) 9.2\n",
2749 | "2 3. The Dark Knight(2008) 9.0\n",
2750 | "3 4. The Godfather Part II(1974) 9.0\n",
2751 | "4 5. 12 Angry Men(1957) 9.0"
2752 | ]
2753 | },
2754 | "execution_count": 13,
2755 | "metadata": {},
2756 | "output_type": "execute_result"
2757 | }
2758 | ],
2759 | "source": [
2760 | "data = pd.DataFrame()\n",
2761 | "data['Movie Names'] = movies\n",
2762 | "data['Ratings'] = ratings\n",
2763 | "data.head()"
2764 | ]
2765 | },
2766 | {
2767 | "cell_type": "code",
2768 | "execution_count": 13,
2769 | "metadata": {},
2770 | "outputs": [],
2771 | "source": [
2772 | "data.to_csv('IMDB Top Movies.csv', index=False)"
2773 | ]
2774 | },
2775 | {
2776 | "cell_type": "code",
2777 | "execution_count": null,
2778 | "metadata": {},
2779 | "outputs": [],
2780 | "source": []
2781 | },
2782 | {
2783 | "cell_type": "code",
2784 | "execution_count": null,
2785 | "metadata": {},
2786 | "outputs": [],
2787 | "source": []
2788 | },
2789 | {
2790 | "cell_type": "code",
2791 | "execution_count": null,
2792 | "metadata": {},
2793 | "outputs": [],
2794 | "source": []
2795 | }
2796 | ],
2797 | "metadata": {
2798 | "kernelspec": {
2799 | "display_name": "Python 3 (ipykernel)",
2800 | "language": "python",
2801 | "name": "python3"
2802 | },
2803 | "language_info": {
2804 | "codemirror_mode": {
2805 | "name": "ipython",
2806 | "version": 3
2807 | },
2808 | "file_extension": ".py",
2809 | "mimetype": "text/x-python",
2810 | "name": "python",
2811 | "nbconvert_exporter": "python",
2812 | "pygments_lexer": "ipython3",
2813 | "version": "3.8.3"
2814 | }
2815 | },
2816 | "nbformat": 4,
2817 | "nbformat_minor": 4
2818 | }
2819 |
--------------------------------------------------------------------------------
/Scraping Multimedia Files using Beautiful Soup.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Import Modules"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from bs4 import BeautifulSoup\n",
17 | "import requests"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Get Page Content from URL"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "url = 'https://www.thehindu.com/news/national/coronavirus-live-updates-may-29-2021/article34672944.ece?homepage=true'"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "page = requests.get(url)"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 4,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/plain": [
53 | ""
54 | ]
55 | },
56 | "execution_count": 4,
57 | "metadata": {},
58 | "output_type": "execute_result"
59 | }
60 | ],
61 | "source": [
62 | "page"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 5,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "# display page content\n",
72 | "# page.content"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 6,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "# parse the data\n",
82 | "soup = BeautifulSoup(page.content, 'html.parser')\n",
83 | "# print(soup.prettify())"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 7,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "data": {
93 | "text/plain": [
94 | ""
95 | ]
96 | },
97 | "execution_count": 7,
98 | "metadata": {},
99 | "output_type": "execute_result"
100 | }
101 | ],
102 | "source": [
103 | "# find the image src link\n",
104 | "img_tag = soup.find('source')\n",
105 | "img_tag"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 8,
111 | "metadata": {},
112 | "outputs": [
113 | {
114 | "data": {
115 | "text/plain": [
116 | "'https://th-i.thgim.com/public/news/national/2g2qwq/article53557510.ece/alternates/LANDSCAPE_1200/Migrants2jpg'"
117 | ]
118 | },
119 | "execution_count": 8,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "img_tag['srcset']"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 9,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "img_url = img_tag['srcset']"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "## Download the Image from URL"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 10,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "image = requests.get(img_url)"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": 11,
156 | "metadata": {},
157 | "outputs": [],
158 | "source": [
159 | "# store the image in file\n",
160 | "with open('image.jpg', 'wb') as file:\n",
161 | " for chunk in image.iter_content(chunk_size=1024):\n",
162 | " file.write(chunk)"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "## Download PPT from URL"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 12,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "ppt = requests.get('http://www.howtowebscrape.com/examples/media/images/SampleSlides.pptx')"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 13,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "with open('sample.pptx', 'wb') as file:\n",
188 | " for chunk in ppt.iter_content(chunk_size=1024):\n",
189 | " file.write(chunk)"
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "metadata": {},
195 | "source": [
196 | "## Download Video from URL"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 14,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "video = requests.get('http://www.howtowebscrape.com/examples/media/images/BigRabbit.mp4')"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 15,
211 | "metadata": {},
212 | "outputs": [],
213 | "source": [
214 | "with open('BigRabbit.mp4', 'wb') as file:\n",
215 | " for chunk in video.iter_content(chunk_size=1024):\n",
216 | " file.write(chunk)"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": []
225 | }
226 | ],
227 | "metadata": {
228 | "kernelspec": {
229 | "display_name": "Python 3 (ipykernel)",
230 | "language": "python",
231 | "name": "python3"
232 | },
233 | "language_info": {
234 | "codemirror_mode": {
235 | "name": "ipython",
236 | "version": 3
237 | },
238 | "file_extension": ".py",
239 | "mimetype": "text/x-python",
240 | "name": "python",
241 | "nbconvert_exporter": "python",
242 | "pygments_lexer": "ipython3",
243 | "version": "3.8.3"
244 | }
245 | },
246 | "nbformat": 4,
247 | "nbformat_minor": 4
248 | }
249 |
--------------------------------------------------------------------------------
/Scraping Products from Amazon using Selenium-Dynamic Website.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Import Modules"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from selenium import webdriver\n",
17 | "from selenium.webdriver.common.by import By\n",
18 | "from time import sleep"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "## Set path for Webdriver"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "# path = 'C://chromedriver.exe'"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 3,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "# open the browser\n",
44 | "# browser = webdriver.Chrome(executable_path = path)\n",
45 | "browser = webdriver.Chrome()"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 4,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "# load the webpage\n",
55 | "browser.get('https://www.amazon.in')"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 5,
61 | "metadata": {},
62 | "outputs": [],
63 | "source": [
64 | "browser.maximize_window()"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 6,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "# get the input elements\n",
74 | "input_search = browser.find_element(By.ID, 'twotabsearchtextbox')\n",
75 | "search_button = browser.find_element(By.XPATH, \"(//input[@type='submit'])[1]\")"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 7,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "# send the input to the webpage\n",
85 | "input_search.send_keys(\"Smartphones under 10000\")\n",
86 | "sleep(1)\n",
87 | "search_button.click()"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "## Scrape Products from Amazon"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 8,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "Scraping page 1\n",
107 | "Scraping page 2\n",
108 | "Scraping page 3\n",
109 | "Scraping page 4\n",
110 | "Scraping page 5\n",
111 | "Scraping page 6\n",
112 | "Scraping page 7\n",
113 | "Scraping page 8\n",
114 | "Scraping page 9\n",
115 | "Scraping page 10\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "products = []\n",
121 | "for i in range(10):\n",
122 | " print('Scraping page', i+1)\n",
123 | " product = browser.find_elements(By.XPATH, \"//span[@class='a-size-medium a-color-base a-text-normal']\")\n",
124 | " for p in product:\n",
125 | " products.append(p.text)\n",
126 | " next_button = browser.find_element(By.XPATH, \"//a[text()='Next']\")\n",
127 | " next_button.click()\n",
128 | " sleep(2)"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 9,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "data": {
138 | "text/plain": [
139 | "186"
140 | ]
141 | },
142 | "execution_count": 9,
143 | "metadata": {},
144 | "output_type": "execute_result"
145 | }
146 | ],
147 | "source": [
148 | "len(products)"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 10,
154 | "metadata": {},
155 | "outputs": [
156 | {
157 | "data": {
158 | "text/plain": [
159 | "['Lava Blaze 2 (6GB RAM, 128GB Storage) - Glass Blue | 18W Fast Charging | 6.5 inch 90Hz Punch Hole Display | Side Fingerprint Sensor | Upto 11GB Expandable RAM | 5000 mAh Battery',\n",
160 | " 'Lava Yuva 2 Pro (Glass Lavender, 4GB RAM, 64GB Storage)| 2.3 Ghz Octa Core Helio G37| 13 MP AI Triple Camera |Fingerprint Sensor| 5000 mAh Battery| Upto 7GB Expandable RAM',\n",
161 | " 'realme narzo N53 (Feather Black, 4GB+64GB) 33W Segment Fastest Charging | Slimmest Phone in Segment | 90 Hz Smooth Display',\n",
162 | " 'realme narzo 50i Prime (Dark Blue 4GB RAM+64GB Storage) Octa-core Processor | 5000 mAh Battery',\n",
163 | " 'Redmi A2 (Aqua Blue, 2GB RAM, 32GB Storage) | Powerful Octa Core G36 Processor | Upto 7GB RAM | Large 16.5 cm HD+ Display with Massive 5000mAh Battery | 2 Years Warranty [Limited time Offer]']"
164 | ]
165 | },
166 | "execution_count": 10,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "products[:5]"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 11,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "browser.quit()"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": []
190 | }
191 | ],
192 | "metadata": {
193 | "kernelspec": {
194 | "display_name": "Python 3 (ipykernel)",
195 | "language": "python",
196 | "name": "python3"
197 | },
198 | "language_info": {
199 | "codemirror_mode": {
200 | "name": "ipython",
201 | "version": 3
202 | },
203 | "file_extension": ".py",
204 | "mimetype": "text/x-python",
205 | "name": "python",
206 | "nbconvert_exporter": "python",
207 | "pygments_lexer": "ipython3",
208 | "version": "3.8.3"
209 | }
210 | },
211 | "nbformat": 4,
212 | "nbformat_minor": 4
213 | }
214 |
--------------------------------------------------------------------------------
/Scraping XML Data using Beautiful Soup.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Import Modules"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from bs4 import BeautifulSoup\n",
17 | "import requests"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "## Get Data from URL"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "url = \"https://www.w3schools.com/xml/note.xml\""
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "# get the data\n",
43 | "xml = requests.get(url)"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 4,
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "data": {
53 | "text/plain": [
54 | "b'\\n\\n Tove\\n Jani\\n Reminder\\n Don\\'t forget me this weekend!\\n'"
55 | ]
56 | },
57 | "execution_count": 4,
58 | "metadata": {},
59 | "output_type": "execute_result"
60 | }
61 | ],
62 | "source": [
63 | "xml.content"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 6,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "name": "stdout",
73 | "output_type": "stream",
74 | "text": [
75 | "\n",
76 | "\n",
77 | "Tove\n",
78 | "Jani\n",
79 | "Reminder\n",
80 | "Don't forget me this weekend!\n",
81 | "\n"
82 | ]
83 | }
84 | ],
85 | "source": [
86 | "# parse the data\n",
87 | "soup = BeautifulSoup(xml.content, 'xml')\n",
88 | "print(soup)"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 7,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "data": {
98 | "text/plain": [
99 | "Reminder"
100 | ]
101 | },
102 | "execution_count": 7,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "xml_tag = soup.find('heading')\n",
109 | "xml_tag"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 8,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "data": {
119 | "text/plain": [
120 | "'Reminder'"
121 | ]
122 | },
123 | "execution_count": 8,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "xml_tag.text"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 9,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/plain": [
140 | "\n",
141 | "Tove\n",
142 | "Jani\n",
143 | "Reminder\n",
144 | "Don't forget me this weekend!\n",
145 | ""
146 | ]
147 | },
148 | "execution_count": 9,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "xml_tag = soup.find('note')\n",
155 | "xml_tag"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 10,
161 | "metadata": {},
162 | "outputs": [
163 | {
164 | "name": "stdout",
165 | "output_type": "stream",
166 | "text": [
167 | "\n",
168 | "Tove\n",
169 | "Jani\n",
170 | "Reminder\n",
171 | "Don't forget me this weekend!\n",
172 | "\n"
173 | ]
174 | }
175 | ],
176 | "source": [
177 | "print(xml_tag.text)"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": []
186 | }
187 | ],
188 | "metadata": {
189 | "kernelspec": {
190 | "display_name": "Python 3 (ipykernel)",
191 | "language": "python",
192 | "name": "python3"
193 | },
194 | "language_info": {
195 | "codemirror_mode": {
196 | "name": "ipython",
197 | "version": 3
198 | },
199 | "file_extension": ".py",
200 | "mimetype": "text/x-python",
201 | "name": "python",
202 | "nbconvert_exporter": "python",
203 | "pygments_lexer": "ipython3",
204 | "version": "3.8.3"
205 | }
206 | },
207 | "nbformat": 4,
208 | "nbformat_minor": 4
209 | }
210 |
--------------------------------------------------------------------------------
/Scraping data using Regular Expression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Part 1: Using Beautiful Soup"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Import Modules"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "from bs4 import BeautifulSoup\n",
24 | "import requests\n",
25 | "import re"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "## Get the Data using URL"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 2,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "url = \"https://www.imdb.com/chart/top/\""
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "HEADERS = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 4,
56 | "metadata": {},
57 | "outputs": [
58 | {
59 | "data": {
60 | "text/plain": [
61 | ""
62 | ]
63 | },
64 | "execution_count": 4,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "# get page data\n",
71 | "page = requests.get(url, headers=HEADERS)\n",
72 | "page"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 15,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "# page.content"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 20,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "# parse the data\n",
91 | "soup = BeautifulSoup(page.content, 'html.parser')\n",
92 | "# print(soup.prettify())"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "## Regex to find particular class"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 24,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "data": {
109 | "text/plain": [
110 | "IMDb Top 250 as rated by regular IMDb voters
"
111 | ]
112 | },
113 | "execution_count": 24,
114 | "metadata": {},
115 | "output_type": "execute_result"
116 | }
117 | ],
118 | "source": [
119 | "tag = soup.find('div', string=re.compile(r'by+'))\n",
120 | "tag"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 25,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/plain": [
131 | "'IMDb Top 250 as rated by regular IMDb voters'"
132 | ]
133 | },
134 | "execution_count": 25,
135 | "metadata": {},
136 | "output_type": "execute_result"
137 | }
138 | ],
139 | "source": [
140 | "tag.text"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "## Part 2: Using Regular Expression"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 26,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": [
156 | "# page.text"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 27,
162 | "metadata": {},
163 | "outputs": [
164 | {
165 | "data": {
166 | "text/plain": [
167 | "['IMDb Top 250 Movies', 'IMDb, an Amazon company']"
168 | ]
169 | },
170 | "execution_count": 27,
171 | "metadata": {},
172 | "output_type": "execute_result"
173 | }
174 | ],
175 | "source": [
176 | "re.findall(r'(.*?)', page.text)"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 28,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "data": {
186 | "text/plain": [
187 | "[('IMDb Top 250 Movies', 'IMDb Top 250 Movies'),\n",
188 | " ('IMDb, an Amazon company', 'IMDb, an Amazon company')]"
189 | ]
190 | },
191 | "execution_count": 28,
192 | "metadata": {},
193 | "output_type": "execute_result"
194 | }
195 | ],
196 | "source": [
197 | "re.findall(r'((.*?))', page.text)"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": []
206 | }
207 | ],
208 | "metadata": {
209 | "kernelspec": {
210 | "display_name": "Python 3 (ipykernel)",
211 | "language": "python",
212 | "name": "python3"
213 | },
214 | "language_info": {
215 | "codemirror_mode": {
216 | "name": "ipython",
217 | "version": 3
218 | },
219 | "file_extension": ".py",
220 | "mimetype": "text/x-python",
221 | "name": "python",
222 | "nbconvert_exporter": "python",
223 | "pygments_lexer": "ipython3",
224 | "version": "3.8.3"
225 | }
226 | },
227 | "nbformat": 4,
228 | "nbformat_minor": 4
229 | }
230 |
--------------------------------------------------------------------------------
/Taking Screenshot of Webpage using Selenium.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Import Modules"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from selenium import webdriver"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "## Set path for Driver"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# path = 'C://chromedriver.exe'"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 3,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "# browser = webdriver.Chrome(executable_path=path)\n",
42 | "browser = webdriver.Chrome()"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 4,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "browser.get(\"https://www.google.com\")"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 5,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/plain": [
62 | "True"
63 | ]
64 | },
65 | "execution_count": 5,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "browser.save_screenshot('screenshot.png')"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 6,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "browser.quit()"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": []
89 | }
90 | ],
91 | "metadata": {
92 | "kernelspec": {
93 | "display_name": "Python 3 (ipykernel)",
94 | "language": "python",
95 | "name": "python3"
96 | },
97 | "language_info": {
98 | "codemirror_mode": {
99 | "name": "ipython",
100 | "version": 3
101 | },
102 | "file_extension": ".py",
103 | "mimetype": "text/x-python",
104 | "name": "python",
105 | "nbconvert_exporter": "python",
106 | "pygments_lexer": "ipython3",
107 | "version": "3.8.3"
108 | }
109 | },
110 | "nbformat": 4,
111 | "nbformat_minor": 4
112 | }
113 |
--------------------------------------------------------------------------------