├── BS4
├── Lec 11-17.py
├── Lec 18-21.py
├── Lec 4-6.py
└── Lec 9-10.py
├── LICENSE
├── Quizzes
├── BS4
│ ├── Quiz 2 (07-Quiz(Extracting Author Names)).py
│ ├── Quiz 3 (12-Quiz(Getting the Rattings,Year,Name of the Movie)).py
│ ├── quiz2solution.csv
│ └── quiz3solution.csv
├── CSS Selectors
│ ├── Quiz 1 (03-Quiz(Tags)).html
│ ├── Quiz 10 (28-Quiz(Last Child)).html
│ ├── Quiz 11 (31-Quiz(Negation)).html
│ ├── Quiz 12 (34-Quiz(Attributes Values)).html
│ ├── Quiz 2 (06-Quiz(Descendants)).html
│ ├── Quiz 3 (08-Quiz(ID)).html
│ ├── Quiz 4 (13-Quiz(Class with Tag)).html
│ ├── Quiz 5 (16-Quiz(Combining Two Selectors)).html
│ ├── Quiz 6 (19-Quiz(Adjacent Sibling)).html
│ ├── Quiz 7 (21-Quiz(General Sibling)).html
│ ├── Quiz 8 (24-Quiz(First Child)).html
│ └── Quiz 9 (26-Quiz(Only Child)).html
├── Requests
│ ├── Quiz 1 (04-Quiz(Extracting Authors)).py
│ ├── Quiz 2 (07-Quiz(Extracting Author and Quotes)).py
│ ├── Quiz 3 (13-Quiz(Extracting Top Stats from Cricinfo)).py
│ ├── quiz1solution.txt
│ ├── quiz2solution.csv
│ └── quiz3solution.csv
├── Scrapy
│ ├── Quiz 1 (24-Quiz(Get The Tags)).py
│ ├── Quiz 2 (33-Quiz(Extracting the Year)).py
│ ├── quiz1solution.csv
│ └── quiz2solution.csv
└── Selenium
│ ├── Quiz 1 (06-Quiz(Extracting Quotes)).py
│ └── Quiz 2 (12-Quiz(Log in and Extract Quote)).py
├── README.md
├── Requests
├── Lec 10-12.py
└── Lec 2-6.py
├── SCRAPY.zip
├── Selenium
├── DeepL Script.py
├── Selenium Different Lectures Scripts
│ ├── lec 11.py
│ ├── lec 4.py
│ ├── lec 5.py
│ └── lec 9-10.py
├── chromedriver.exe
├── chromedriver_win32.zip
└── input.txt
└── Slides.pptx
/BS4/Lec 11-17.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | res = requests.get('https://www.imdb.com/chart/top/')
4 | html = res.text
5 |
6 | soup = BeautifulSoup(html, 'html.parser')
7 | tbody = soup.find('tbody', {'class': 'lister-list'})
8 | trs = tbody.findAll('tr')
9 | for tr in trs:
10 | td = tr.find('td', {'class': 'titleColumn'})
11 | movieId = td.a['href']
12 | movieUrl = f'https://www.imdb.com/{movieId}'
13 |
14 | res2 = requests.get(movieUrl)
15 | html = res2.text
16 | soup2 = BeautifulSoup(html, 'html.parser')
17 | info = soup2.find('div', {'class': 'subtext'})
18 |
19 | a = info.findAll('a')
20 | print(td.a.string)
21 | print(info.time.string.strip())
22 | print(a[0].string.strip())
23 | print(a[1].string.strip())
24 |
25 |
--------------------------------------------------------------------------------
/BS4/Lec 18-21.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 |
4 | movieName = input('Enter Movie Name: ')
5 | movieName = movieName.lower()
6 |
7 | res = requests.get('https://www.imdb.com/chart/top/')
8 | html = res.text
9 |
10 | soup = BeautifulSoup(html, 'html.parser')
11 | tbody = soup.find('tbody', {'class': 'lister-list'})
12 | trs = tbody.findAll('tr')
13 | for tr in trs:
14 | td = tr.find('td', {'class': 'titleColumn'})
15 | imdbMovieName = td.a.string.strip().lower()
16 | if imdbMovieName == movieName:
17 | movieId = td.a['href']
18 | movieUrl = f'https://www.imdb.com/{movieId}'
19 | res2 = requests.get(movieUrl)
20 | html = res2.text
21 | soup2 = BeautifulSoup(html, 'html.parser')
22 | summary = soup2.find('div', {'class': 'credit_summary_item'})
23 | dirID = summary.a['href']
24 | dirUrl = f'https://www.imdb.com/{dirID}'
25 | print("Dir Name: ",summary.a.string)
26 | res3 = requests.get(dirUrl)
27 | html = res3.text
28 | soup3 = BeautifulSoup(html, 'html.parser')
29 | knownfor = soup3.find('div', {'id': 'knownfor'})
30 | movieDivs = knownfor.findAll('div', {'class': 'knownfor-title'})
31 | for div in movieDivs:
32 | moviediv = div.find('div', {'class': 'knownfor-title-role'})
33 | print(moviediv.a.string)
34 |
35 | break
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/BS4/Lec 4-6.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import requests
3 | r = requests.get('https://quotes.toscrape.com/')
4 | html = r.text
5 | soup = BeautifulSoup(html, 'html.parser')
6 | with open('bs4quotes.txt','w') as f:
7 | for tag in soup.findAll('span', {'class': 'text'}):
8 | f.write(tag.string)
9 | f.write('\n')
--------------------------------------------------------------------------------
/BS4/Lec 9-10.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | html = ' Hello World'
3 | soup = BeautifulSoup(html, 'html.parser', multi_valued_attributes=None)
4 | tag = soup.b
5 |
6 | print(tag['id'])
7 | print(tag['class'])
8 | print(tag.attrs)
9 | print(tag)
10 | tag['id'] = 'HELLO'
11 | tag['class'] = 'World'
12 | print(tag)
13 | print(tag['class'])
14 |
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Quizzes/BS4/Quiz 2 (07-Quiz(Extracting Author Names)).py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | res = requests.get('https://quotes.toscrape.com/')
4 | html = res.text
5 | soup = BeautifulSoup(html, 'html.parser')
6 | with open('AuthorNames.csv', 'w') as f:
7 | for tag in soup.findAll('small',{'class':'author'}):
8 | f.write(tag.string)
9 | f.write('\n')
10 |
--------------------------------------------------------------------------------
/Quizzes/BS4/Quiz 3 (12-Quiz(Getting the Rattings,Year,Name of the Movie)).py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | res = requests.get('https://www.imdb.com/chart/top/')
4 | html = res.text
5 | soup = BeautifulSoup(html, 'html.parser')
6 | tbody = soup.find('tbody', {'class':'lister-list'})
7 | trs = tbody.findAll('tr')
8 | with open('imdbMoviesNameRating.csv', 'w') as f:
9 | for tr in trs:
10 | movieNametd = tr.find('td',{'class':'titleColumn'})
11 | ratingtd = tr.find('td',{'class':'ratingColumn'})
12 | f.write(movieNametd.a.string+ "," + movieNametd.span.string + "," +ratingtd.strong.string)
13 | f.write('\n')
14 |
15 |
--------------------------------------------------------------------------------
/Quizzes/BS4/quiz2solution.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/BS4/quiz2solution.csv
--------------------------------------------------------------------------------
/Quizzes/BS4/quiz3solution.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/BS4/quiz3solution.csv
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 1 (03-Quiz(Tags)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
Hello
10 |
World
11 |
How are you?
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 10 (28-Quiz(Last Child)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
a
10 |
b
11 |
c
12 |
d
13 |
e
14 |
c
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 11 (31-Quiz(Negation)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | a
10 | b
11 |
12 |
13 | c
14 |
15 |
16 | d
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 12 (34-Quiz(Attributes Values)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | a
9 |
10 |
11 | b
12 |
13 |
14 | c
15 |
16 |
17 | d
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 2 (06-Quiz(Descendants)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
13 |
14 |
15 | d
16 | e
17 | f
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 3 (08-Quiz(ID)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
13 |
14 | d
15 | e
16 | f
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 4 (13-Quiz(Class with Tag)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
13 |
14 | d
15 | e
16 | b
17 | ball
18 | bat
19 | f
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 5 (16-Quiz(Combining Two Selectors)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
a
10 |
ball
11 |
c
12 |
13 |
14 | d
15 | e
16 | bat
17 | f
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 6 (19-Quiz(Adjacent Sibling)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
a
10 |
ball
11 |
c
12 |
13 |
14 | d
15 | e
16 | bat
17 | f
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 7 (21-Quiz(General Sibling)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
a
10 |
ball
11 |
c
12 |
13 |
14 | d
15 | e
16 | bat
17 | f
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 8 (24-Quiz(First Child)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
a
10 |
ball
11 |
c
12 |
cc
13 |
ccc
14 |
d
15 |
cccc
16 |
17 |
18 | d
19 | e
20 | bat
21 | f
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 9 (26-Quiz(Only Child)).html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
13 |
14 | b
15 | c
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Quizzes/Requests/Quiz 1 (04-Quiz(Extracting Authors)).py:
--------------------------------------------------------------------------------
1 | import requests
2 | res = requests.get('https://quotes.toscrape.com/')
3 | html = res.text
4 | with open('Authors.txt','w') as f:
5 | for line in html.split('\n'):
6 | if '' in line:
7 | line = line.replace('by ','')
8 | line = line.replace('','')
9 | author = line.strip()
10 | f.write(author)
11 | f.write('\n')
--------------------------------------------------------------------------------
/Quizzes/Requests/Quiz 2 (07-Quiz(Extracting Author and Quotes)).py:
--------------------------------------------------------------------------------
1 | import requests
2 | res = requests.get('https://quotes.toscrape.com/')
3 | html = res.text
4 |
5 | with open('quotes.csv','w') as f:
6 | for line in html.split('\n'):
7 | if '' in line:
8 | line = line.replace('“','')
9 | line = line.replace('”', '')
10 | quote = line.strip()
11 |
12 | if '' in line:
13 | line = line.replace(' by ','')
14 | line = line.replace('', '')
15 | author = line.strip()
16 |
17 | f.write(author+","+quote)
18 | f.write('\n')
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/Quizzes/Requests/Quiz 3 (13-Quiz(Extracting Top Stats from Cricinfo)).py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | with open('superstats.csv','w') as f:
4 | for i in range(1,3):
5 | url = f'https://www.espncricinfo.com/ci/content/story/data/index.json?genre=706;;page={i}'
6 | res = requests.get(url)
7 | data = res.text
8 | data = json.loads(data)
9 | for headline in data:
10 | _headline = headline['headline']
11 | _headline = _headline.replace(',', '|')
12 | f.write(_headline)
13 | f.write('\n')
--------------------------------------------------------------------------------
/Quizzes/Requests/quiz1solution.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/Requests/quiz1solution.txt
--------------------------------------------------------------------------------
/Quizzes/Requests/quiz2solution.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/Requests/quiz2solution.csv
--------------------------------------------------------------------------------
/Quizzes/Requests/quiz3solution.csv:
--------------------------------------------------------------------------------
1 | Smart Stats: Rashid Khan BBL's all-time MVP| D'arcy Short the best batsman
2 | Overseas players' impact in IPL 2020 - England on top| West Indies smash it
3 | Jofra Archer - IPL 2020's MVP by a distance
4 | Why Shikhar Dhawan and not KL Rahul tops the Smart Runs tally
5 | Luck Index - Umpire's call adds 20 runs to Mumbai's total
6 | The dropped catch that gave Kings XI Punjab a fighting chance
7 | Luck Index: Did Vijay Shankar dropping Ben Stokes hurt Sunrisers or Royals?
8 | Why Mohammed Shami trumps Purple Cap holder Kagiso Rabada in Smart Wickets tally
9 | Super Kings pay for letting Dhawan live a charmed life
10 | Smart Stats: Krunal Pandya's 2 for 26 more impactful than Quinton de Kock and Suryakumar Yadav's fifties
11 | Smart Stats: Axar Patel most impactful player Delhi Capitals' big win
12 | Archer| Agarwal and Samson take early Smart Stats honours
13 | Why Rahul Chahar is the Smart Stats player of the match
14 | Introducing Smart Stats| where context trumps raw numbers
15 | Why Pat Cummins| and not Shubman Gill| is the Smart Stats player of the match
16 | KL Rahul makes the most of Virat Kohli's lapses
17 | Why Jasprit Bumrah is the Smart Stats Player of the Match
18 | Smart Stats: Lungi Ngidi's super spell for CSK| and Chris Gayle's 175 not out
19 | Smart Stats: Why David Warner topped Virat Kohli in IPL 2016
20 | Andre Russell| the IPL's most destructive| impactful and valuable player
21 |
--------------------------------------------------------------------------------
/Quizzes/Scrapy/Quiz 1 (24-Quiz(Get The Tags)).py:
--------------------------------------------------------------------------------
1 | import scrapy
2 | class Quiz(scrapy.Spider):
3 | name='quiz1'
4 | start_urls = ['https://quotes.toscrape.com/']
5 |
6 | def parse(self, response):
7 | for div in response.css('.quote'):
8 | tags = div.css('.tag::text').getall()
9 | tags = ' , '.join(tags)
10 | yield {
11 | 'Quote': div.css('.text::text').get(),
12 | 'Author': div.css('.author::text').get(),
13 | 'Tags' : tags
14 | }
15 |
16 |
17 |
--------------------------------------------------------------------------------
/Quizzes/Scrapy/Quiz 2 (33-Quiz(Extracting the Year)).py:
--------------------------------------------------------------------------------
1 | import scrapy
2 | class Quiz(scrapy.Spider):
3 | name='quiz2'
4 | start_urls = ['https://www.imdb.com/chart/top/']
5 |
6 | def parse(self, response):
7 | for td in response.css('.titleColumn'):
8 | dict = {
9 | 'Movie' : td.css('a::text').get(),
10 | 'Year': td.css('span::text').get()
11 | }
12 | url = td.css('a::attr(href)').get()
13 | yield response.follow(url, callback=self.parseMovie, meta=dict)
14 |
15 | def parseMovie(self, response):
16 | duration = response.css('.subtext time::text').get().strip()
17 | movie = response.meta['Movie']
18 | year = response.meta['Year']
19 |
20 | yield {
21 | 'Movie': movie,
22 | 'Year': year,
23 | 'Duration': duration
24 | }
25 |
--------------------------------------------------------------------------------
/Quizzes/Scrapy/quiz1solution.csv:
--------------------------------------------------------------------------------
1 | Quote,Author,Tags
2 | “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”,Albert Einstein,"change , deep-thoughts , thinking , world"
3 | "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",J.K. Rowling,"abilities , choices"
4 | “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”,Albert Einstein,"inspirational , life , live , miracle , miracles"
5 | "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”",Jane Austen,"aliteracy , books , classic , humor"
6 | "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",Marilyn Monroe,"be-yourself , inspirational"
7 | “Try not to become a man of success. Rather become a man of value.”,Albert Einstein,"adulthood , success , value"
8 | “It is better to be hated for what you are than to be loved for what you are not.”,André Gide,"life , love"
9 | "“I have not failed. I've just found 10,000 ways that won't work.”",Thomas A. Edison,"edison , failure , inspirational , paraphrased"
10 | “A woman is like a tea bag; you never know how strong it is until it's in hot water.”,Eleanor Roosevelt,misattributed-eleanor-roosevelt
11 | "“A day without sunshine is like, you know, night.”",Steve Martin,"humor , obvious , simile"
12 |
--------------------------------------------------------------------------------
/Quizzes/Scrapy/quiz2solution.csv:
--------------------------------------------------------------------------------
1 | Movie,Year,Duration
2 | The Shawshank Redemption,(1994),2h 22min
3 | 12 Angry Men,(1957),1h 36min
4 | Inception,(2010),2h 28min
5 | Schindler's List,(1993),3h 15min
6 | The Dark Knight,(2008),2h 32min
7 | The Godfather,(1972),2h 55min
8 | The Godfather: Part II,(1974),3h 22min
9 | The Lord of the Rings: The Return of the King,(2003),3h 21min
10 | Forrest Gump,(1994),2h 22min
11 | The Lord of the Rings: The Fellowship of the Ring,(2001),2h 58min
12 | "The Good, the Bad and the Ugly",(1966),2h 58min
13 | Fight Club,(1999),2h 19min
14 | Pulp Fiction,(1994),2h 34min
15 | Star Wars: Episode V - The Empire Strikes Back,(1980),2h 4min
16 | The Lord of the Rings: The Two Towers,(2002),2h 59min
17 | Swades,(2004),3h 9min
18 | Saving Private Ryan,(1998),2h 49min
19 | Tangerines,(2013),1h 27min
20 | The Battle of Algiers,(1966),2h 1min
21 | Three Colors: Red,(1994),1h 39min
22 | The Help,(2011),2h 26min
23 | A Silent Voice: The Movie,(2016),2h 10min
24 | Drishyam,(2015),2h 43min
25 | The Invisible Guest,(2016),1h 46min
26 | "Paris, Texas",(1984),2h 25min
27 | It Happened One Night,(1934),1h 45min
28 | Portrait of a Lady on Fire,(2019),2h 2min
29 | In the Mood for Love,(2000),1h 38min
30 | Rang De Basanti,(2006),2h 47min
31 | Rififi,(1955),1h 58min
32 | Rebecca,(1940),2h 10min
33 | Before Sunset,(2004),1h 20min
34 | Nausicaä of the Valley of the Wind,(1984),1h 57min
35 | Andrei Rublev,(1966),3h 25min
36 | "Monsters, Inc.",(2001),1h 32min
37 | Rocky,(1976),2h
38 | Gangs of Wasseypur,(2012),5h 21min
39 | Amores Perros,(2000),2h 34min
40 | Time of the Gypsies,(1988),2h 22min
41 | Hotel Rwanda,(2004),2h 1min
42 | The Passion of Joan of Arc,(1928),1h 54min
43 | Spotlight,(2015),2h 9min
44 | The 400 Blows,(1959),1h 39min
45 | The Wages of Fear,(1953),2h 11min
46 | La Haine,(1995),1h 38min
47 | Rush,(2013),2h 3min
48 | Into the Wild,(2007),2h 28min
49 | Logan,(2017),2h 17min
50 | Monty Python's Life of Brian,(1979),1h 34min
51 | The Handmaiden,(2016),2h 25min
52 | Hachi: A Dog's Tale,(2009),1h 33min
53 | Platoon,(1986),2h
54 | Stand by Me,(1986),1h 29min
55 | Network,(1976),2h 1min
56 | Cool Hand Luke,(1967),2h 7min
57 | Ben-Hur,(1959),3h 32min
58 | Harry Potter and the Deathly Hallows: Part 2,(2011),2h 10min
59 | Million Dollar Baby,(2004),2h 12min
60 | Mr. Smith Goes to Washington,(1939),2h 9min
61 | The Bandit,(1996),2h 8min
62 | Dead Poets Society,(1989),2h 8min
63 | Barry Lyndon,(1975),3h 5min
64 | Soul,(2020),1h 40min
65 | Mad Max: Fury Road,(2015),2h
66 | 12 Years a Slave,(2013),2h 14min
67 | Sherlock Jr.,(1924),45min
68 | To Be or Not to Be,(1942),1h 39min
69 | Autumn Sonata,(1978),1h 39min
70 | How to Train Your Dragon,(2010),1h 38min
71 | The General,(1926),1h 7min
72 | Ford v Ferrari,(2019),2h 32min
73 | The Big Lebowski,(1998),1h 57min
74 | Andhadhun,(2018),2h 19min
75 | Prisoners,(2013),2h 33min
76 | Persona,(1966),1h 23min
77 | Hacksaw Ridge,(2016),2h 19min
78 | Catch Me If You Can,(2002),2h 21min
79 | Before Sunrise,(1995),1h 41min
80 | Gone Girl,(2014),2h 29min
81 | Mary and Max,(2009),1h 32min
82 | Anand,(1971),2h 2min
83 | In the Name of the Father,(1993),2h 13min
84 | The Grand Budapest Hotel,(2014),1h 39min
85 | The Deer Hunter,(1978),3h 3min
86 | On the Waterfront,(1954),1h 48min
87 | Tokyo Story,(1953),2h 16min
88 | The Third Man,(1949),1h 33min
89 | Wild Tales,(2014),2h 2min
90 | Gran Torino,(2008),1h 56min
91 | Room,(2015),1h 58min
92 | The Bridge on the River Kwai,(1957),2h 41min
93 | Fargo,(1996),1h 38min
94 | Memories of Murder,(2003),2h 12min
95 | Kill Bill: Vol. 1,(2003),1h 51min
96 | Blade Runner,(1982),1h 57min
97 | Wild Strawberries,(1957),1h 31min
98 | Stalker,(1979),2h 42min
99 | Finding Nemo,(2003),1h 40min
100 | Gone with the Wind,(1939),3h 58min
101 | The Truman Show,(1998),1h 43min
102 | Jurassic Park,(1993),2h 7min
103 | Trainspotting,(1996),1h 33min
104 | The Thing,(1982),1h 49min
105 | The Sixth Sense,(1999),1h 47min
106 | The Seventh Seal,(1957),1h 36min
107 | The Elephant Man,(1980),2h 4min
108 | Klaus,(2019),1h 36min
109 | Warrior,(2011),2h 20min
110 | My Father and My Son,(2005),1h 52min
111 | Inside Out,(2015),1h 35min
112 | V for Vendetta,(2005),2h 12min
113 | No Country for Old Men,(2007),2h 2min
114 | Chinatown,(1974),2h 10min
115 | Shutter Island,(2010),2h 18min
116 | "Three Billboards Outside Ebbing, Missouri",(2017),1h 55min
117 | The Gold Rush,(1925),1h 35min
118 | Dial M for Murder,(1954),1h 45min
119 | The Treasure of the Sierra Madre,(1948),2h 6min
120 | Raging Bull,(1980),2h 9min
121 | My Neighbor Totoro,(1988),1h 26min
122 | Judgment at Nuremberg,(1961),2h 59min
123 | The Secret in Their Eyes,(2009),2h 9min
124 | Pan's Labyrinth,(2006),1h 58min
125 | "Lock, Stock and Two Smoking Barrels",(1998),1h 47min
126 | The Great Escape,(1963),2h 52min
127 | There Will Be Blood,(2007),2h 38min
128 | Casino,(1995),2h 58min
129 | All About Eve,(1950),2h 18min
130 | A Beautiful Mind,(2001),2h 15min
131 | The Wolf of Wall Street,(2013),3h
132 | Ran,(1985),2h 42min
133 | Unforgiven,(1992),2h 10min
134 | Howl's Moving Castle,(2004),1h 59min
135 | Some Like It Hot,(1959),2h 1min
136 | Children of Heaven,(1997),1h 29min
137 | Downfall,(2004),2h 36min
138 | Rashomon,(1950),1h 28min
139 | Yojimbo,(1961),1h 50min
140 | Batman Begins,(2005),2h 20min
141 | Die Hard,(1988),2h 12min
142 | Monty Python and the Holy Grail,(1975),1h 31min
143 | Green Book,(2018),2h 10min
144 | L.A. Confidential,(1997),2h 18min
145 | Heat,(1995),2h 50min
146 | Indiana Jones and the Last Crusade,(1989),2h 7min
147 | For a Few Dollars More,(1965),2h 12min
148 | Up,(2009),1h 36min
149 | Double Indemnity,(1944),1h 47min
150 | To Kill a Mockingbird,(1962),2h 9min
151 | Metropolis,(1927),2h 33min
152 | The Apartment,(1960),2h 5min
153 | The Sting,(1973),2h 9min
154 | Amélie,(2001),2h 2min
155 | A Separation,(2011),2h 3min
156 | Incendies,(2010),2h 11min
157 | Lawrence of Arabia,(1962),3h 48min
158 | Taxi Driver,(1976),1h 54min
159 | Ikiru,(1952),2h 23min
160 | Come and See,(1985),2h 22min
161 | Toy Story 3,(2010),1h 43min
162 | 1917,(2019),1h 59min
163 | A Clockwork Orange,(1971),2h 16min
164 | North by Northwest,(1959),2h 16min
165 | Singin' in the Rain,(1952),1h 43min
166 | The Kid,(1921),1h 8min
167 | Scarface,(1983),2h 50min
168 | Bicycle Thieves,(1948),1h 29min
169 | Snatch,(2000),1h 42min
170 | Full Metal Jacket,(1987),1h 56min
171 | Dangal,(2016),2h 41min
172 | Citizen Kane,(1941),1h 59min
173 | Vertigo,(1958),2h 8min
174 | Requiem for a Dream,(2000),1h 42min
175 | Eternal Sunshine of the Spotless Mind,(2004),1h 48min
176 | M,(1931),1h 39min
177 | Reservoir Dogs,(1992),1h 39min
178 | The Hunt,(2012),1h 55min
179 | 2001: A Space Odyssey,(1968),2h 29min
180 | Like Stars on Earth,(2007),2h 45min
181 | Good Will Hunting,(1997),2h 6min
182 | 3 Idiots,(2009),2h 50min
183 | Amadeus,(1984),2h 40min
184 | Inglourious Basterds,(2009),2h 33min
185 | Star Wars: Episode VI - Return of the Jedi,(1983),2h 11min
186 | High and Low,(1963),2h 23min
187 | Das Boot,(1981),2h 29min
188 | Capernaum,(2018),2h 6min
189 | Braveheart,(1995),2h 58min
190 | Aliens,(1986),2h 17min
191 | Toy Story,(1995),1h 21min
192 | American Beauty,(1999),2h 2min
193 | Your Name.,(2016),1h 46min
194 | Coco,(2017),1h 45min
195 | Once Upon a Time in America,(1984),3h 49min
196 | Avengers: Endgame,(2019),3h 1min
197 | The Dark Knight Rises,(2012),2h 44min
198 | Princess Mononoke,(1997),2h 14min
199 | Sunset Blvd.,(1950),1h 50min
200 | Witness for the Prosecution,(1957),1h 56min
201 | Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb,(1964),1h 35min
202 | Oldboy,(2003),2h
203 | The Shining,(1980),2h 26min
204 | WALL·E,(2008),1h 38min
205 | Avengers: Infinity War,(2018),2h 29min
206 | Paths of Glory,(1957),1h 28min
207 | The Lives of Others,(2006),2h 17min
208 | Joker,(2019),2h 2min
209 | Indiana Jones and the Raiders of the Lost Ark,(1981),1h 55min
210 | Hamilton,(2020),2h 40min
211 | The Great Dictator,(1940),2h 5min
212 | Memento,(2000),1h 53min
213 | Apocalypse Now,(1979),2h 27min
214 | Spider-Man: Into the Spider-Verse,(2018),1h 57min
215 | Django Unchained,(2012),2h 45min
216 | Alien,(1979),1h 57min
217 | Cinema Paradiso,(1988),2h 35min
218 | Rear Window,(1954),1h 52min
219 | Once Upon a Time in the West,(1968),2h 45min
220 | The Prestige,(2006),2h 10min
221 | Casablanca,(1942),1h 42min
222 | Grave of the Fireflies,(1988),1h 29min
223 | Whiplash,(2014),1h 46min
224 | The Intouchables,(2011),1h 52min
225 | City Lights,(1931),1h 27min
226 | The Departed,(2006),2h 31min
227 | Modern Times,(1936),1h 27min
228 | Psycho,(1960),1h 49min
229 | Back to the Future,(1985),1h 56min
230 | The Pianist,(2002),2h 30min
231 | The Usual Suspects,(1995),1h 46min
232 | Hara-Kiri,(1962),2h 13min
233 | American History X,(1998),1h 59min
234 | Terminator 2: Judgment Day,(1991),2h 17min
235 | The Lion King,(1994),1h 28min
236 | Gladiator,(2000),2h 35min
237 | Léon: The Professional,(1994),1h 50min
238 | Parasite,(2019),2h 12min
239 | Interstellar,(2014),2h 49min
240 | It's a Wonderful Life,(1946),2h 10min
241 | Spirited Away,(2001),2h 5min
242 | The Green Mile,(1999),3h 9min
243 | City of God,(2002),2h 10min
244 | Star Wars: Episode IV - A New Hope,(1977),2h 1min
245 | Life Is Beautiful,(1997),1h 56min
246 | The Silence of the Lambs,(1991),1h 58min
247 | One Flew Over the Cuckoo's Nest,(1975),2h 13min
248 | The Matrix,(1999),2h 16min
249 | Goodfellas,(1990),2h 26min
250 | Seven Samurai,(1954),3h 27min
251 | Se7en,(1995),2h 7min
252 |
--------------------------------------------------------------------------------
/Quizzes/Selenium/Quiz 1 (06-Quiz(Extracting Quotes)).py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
3 |
4 | driver.get('https://quotes.toscrape.com/')
5 | for div in driver.find_elements_by_css_selector('.quote'):
6 | print(div.find_element_by_css_selector('.text').text)
7 | print(div.find_element_by_css_selector('.author').text)
8 | for tag in div.find_elements_by_css_selector('.tag'):
9 | print(tag.text)
10 | print('--------------------')
11 |
12 |
13 | driver.quit()
--------------------------------------------------------------------------------
/Quizzes/Selenium/Quiz 2 (12-Quiz(Log in and Extract Quote)).py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
4 |
5 | driver.get('https://quotes.toscrape.com/')
6 |
7 | driver.find_element_by_css_selector('.header-box p a').click()
8 | userName = driver.find_element_by_css_selector('#username')
9 | userName.send_keys('XZY')
10 | time.sleep(3)
11 | password = driver.find_element_by_css_selector('#password')
12 | password.send_keys('12345')
13 | time.sleep(3)
14 | driver.find_element_by_css_selector('[value="Login"]').click()
15 | for div in driver.find_elements_by_css_selector('.text'):
16 | print(div.text)
17 |
18 |
19 |
20 |
21 | # driver.quit()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python
5 | Data Scraping and Data Mining from Beginner to Pro with Python, by Packt publishing
6 |
--------------------------------------------------------------------------------
/Requests/Lec 10-12.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | with open('News.txt','w') as f:
4 | for i in range(1,6):
5 | url = f'https://www.espncricinfo.com/ci/content/story/data/index.json?;type=7;page={i}'
6 | res = requests.get(url)
7 | data = json.loads(res.text)
8 | for news in data:
9 | f.write(news['author']+' | '+news['summary'])
10 | f.write('\n')
11 |
--------------------------------------------------------------------------------
/Requests/Lec 2-6.py:
--------------------------------------------------------------------------------
1 | import requests
2 | for i in range(1, 12):
3 | print("Page: ", i)
4 | url = f'https://quotes.toscrape.com/page/{i}/'
5 | r = requests.get(url)
6 | html = r.text
7 | with open('quotes.txt', 'a', encoding='utf-8') as f:
8 | for line in html.split('\n'):
9 | if '' in line:
10 | line = line.replace('“', '').replace('”', '')
11 | line = line.strip()
12 | f.write(line)
13 | f.write("\n")
14 |
--------------------------------------------------------------------------------
/SCRAPY.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/SCRAPY.zip
--------------------------------------------------------------------------------
/Selenium/DeepL Script.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
4 |
5 | with open('input.txt','r') as f:
6 | text = f.read().strip()
7 |
8 | driver.get('https://www.deepl.com/translator')
9 | driver.find_element_by_css_selector('.dl_cookieBanner--buttonClose').click()
10 | time.sleep(3)
11 | driver.find_element_by_css_selector('.lmt__language_container_prim .lmt__language_select__opener').click()
12 | time.sleep(3)
13 | driver.find_element_by_css_selector('[dl-test="translator-lang-option-ru-RU"]').click()
14 | time.sleep(3)
15 | inputTextArea = driver.find_element_by_css_selector('.lmt__textarea')
16 | inputTextArea.send_keys(text)
17 | time.sleep(5)
18 | driver.find_element_by_css_selector('.lmt__target_toolbar__save button').click()
19 |
20 | # driver.quit()
21 |
22 |
--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 11.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
4 |
5 | driver.get('https://quotes.toscrape.com/')
6 |
7 | driver.find_element_by_css_selector('.header-box p a').click()
8 | username = driver.find_element_by_css_selector('#username')
9 | username.send_keys('ABC')
10 | time.sleep(3)
11 | password = driver.find_element_by_css_selector('#password')
12 | password.send_keys('12345')
13 | time.sleep(3)
14 | driver.find_element_by_css_selector('[value="Login"]').click()
15 |
16 | driver.quit()
--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 4.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
3 |
4 |
5 | driver.get('https://quotes.toscrape.com/')
6 | print(type(driver.find_element_by_css_selector('.text')))
7 | print(driver.find_element_by_css_selector('.text').text)
8 |
9 | print('--------------------')
10 | print(type(driver.find_elements_by_css_selector('.text')))
11 | for tag in driver.find_elements_by_css_selector('.text'):
12 | print(tag.text)
13 |
14 | driver.quit()
--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 5.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
3 |
4 |
5 | driver.get('https://quotes.toscrape.com/')
6 | for div in driver.find_elements_by_css_selector('.quote'):
7 | print(div.find_element_by_css_selector('.text').text)
8 | print(div.find_element_by_css_selector('.author').text)
9 | print('----------------')
10 |
11 |
12 |
13 |
14 | driver.quit()
--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 9-10.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
3 |
4 | driver.get('https://quotes.toscrape.com/page/9/')
5 |
6 | while True:
7 | for div in driver.find_elements_by_css_selector('.quote'):
8 | print(div.find_element_by_css_selector('.text').text)
9 | print(div.find_element_by_css_selector('.author').text)
10 |
11 | try:
12 | driver.find_element_by_css_selector('.next a').click()
13 | except:
14 | break
15 |
16 |
17 |
18 |
19 | driver.quit()
--------------------------------------------------------------------------------
/Selenium/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Selenium/chromedriver.exe
--------------------------------------------------------------------------------
/Selenium/chromedriver_win32.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Selenium/chromedriver_win32.zip
--------------------------------------------------------------------------------
/Selenium/input.txt:
--------------------------------------------------------------------------------
1 | Happy data scraping, Hope this helps you.
--------------------------------------------------------------------------------
/Slides.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Slides.pptx
--------------------------------------------------------------------------------