├── BS4
    ├── Lec 11-17.py
    ├── Lec 18-21.py
    ├── Lec 4-6.py
    └── Lec 9-10.py
├── LICENSE
├── Quizzes
    ├── BS4
    │   ├── Quiz 2 (07-Quiz(Extracting Author Names)).py
    │   ├── Quiz 3 (12-Quiz(Getting the Rattings,Year,Name of the Movie)).py
    │   ├── quiz2solution.csv
    │   └── quiz3solution.csv
    ├── CSS Selectors
    │   ├── Quiz 1 (03-Quiz(Tags)).html
    │   ├── Quiz 10 (28-Quiz(Last Child)).html
    │   ├── Quiz 11 (31-Quiz(Negation)).html
    │   ├── Quiz 12 (34-Quiz(Attributes Values)).html
    │   ├── Quiz 2 (06-Quiz(Descendants)).html
    │   ├── Quiz 3 (08-Quiz(ID)).html
    │   ├── Quiz 4 (13-Quiz(Class with Tag)).html
    │   ├── Quiz 5 (16-Quiz(Combining Two Selectors)).html
    │   ├── Quiz 6 (19-Quiz(Adjacent Sibling)).html
    │   ├── Quiz 7 (21-Quiz(General Sibling)).html
    │   ├── Quiz 8 (24-Quiz(First Child)).html
    │   └── Quiz 9 (26-Quiz(Only Child)).html
    ├── Requests
    │   ├── Quiz 1 (04-Quiz(Extracting Authors)).py
    │   ├── Quiz 2 (07-Quiz(Extracting Author and Quotes)).py
    │   ├── Quiz 3 (13-Quiz(Extracting Top Stats from Cricinfo)).py
    │   ├── quiz1solution.txt
    │   ├── quiz2solution.csv
    │   └── quiz3solution.csv
    ├── Scrapy
    │   ├── Quiz 1 (24-Quiz(Get The Tags)).py
    │   ├── Quiz 2 (33-Quiz(Extracting the Year)).py
    │   ├── quiz1solution.csv
    │   └── quiz2solution.csv
    └── Selenium
    │   ├── Quiz 1 (06-Quiz(Extracting Quotes)).py
    │   └── Quiz 2 (12-Quiz(Log in and Extract Quote)).py
├── README.md
├── Requests
    ├── Lec 10-12.py
    └── Lec 2-6.py
├── SCRAPY.zip
├── Selenium
    ├── DeepL Script.py
    ├── Selenium Different Lectures Scripts
    │   ├── lec 11.py
    │   ├── lec 4.py
    │   ├── lec 5.py
    │   └── lec 9-10.py
    ├── chromedriver.exe
    ├── chromedriver_win32.zip
    └── input.txt
└── Slides.pptx


/BS4/Lec 11-17.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | res = requests.get('https://www.imdb.com/chart/top/')
 4 | html = res.text
 5 | 
 6 | soup = BeautifulSoup(html, 'html.parser')
 7 | tbody = soup.find('tbody', {'class': 'lister-list'})
 8 | trs = tbody.findAll('tr')
 9 | for tr in trs:
10 |     td = tr.find('td', {'class': 'titleColumn'})
11 |     movieId = td.a['href']
12 |     movieUrl = f'https://www.imdb.com/{movieId}'
13 | 
14 |     res2 = requests.get(movieUrl)
15 |     html = res2.text
16 |     soup2 = BeautifulSoup(html, 'html.parser')
17 |     info = soup2.find('div', {'class': 'subtext'})
18 | 
19 |     a = info.findAll('a')
20 |     print(td.a.string)
21 |     print(info.time.string.strip())
22 |     print(a[0].string.strip())
23 |     print(a[1].string.strip())
24 | 
25 | 


--------------------------------------------------------------------------------
/BS4/Lec 18-21.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | movieName = input('Enter Movie Name: ')
 5 | movieName = movieName.lower()
 6 | 
 7 | res = requests.get('https://www.imdb.com/chart/top/')
 8 | html = res.text
 9 | 
10 | soup = BeautifulSoup(html, 'html.parser')
11 | tbody = soup.find('tbody', {'class': 'lister-list'})
12 | trs = tbody.findAll('tr')
13 | for tr in trs:
14 |     td = tr.find('td', {'class': 'titleColumn'})
15 |     imdbMovieName = td.a.string.strip().lower()
16 |     if imdbMovieName == movieName:
17 |         movieId = td.a['href']
18 |         movieUrl = f'https://www.imdb.com/{movieId}'
19 |         res2 = requests.get(movieUrl)
20 |         html = res2.text
21 |         soup2 = BeautifulSoup(html, 'html.parser')
22 |         summary = soup2.find('div', {'class': 'credit_summary_item'})
23 |         dirID = summary.a['href']
24 |         dirUrl = f'https://www.imdb.com/{dirID}'
25 |         print("Dir Name: ",summary.a.string)
26 |         res3 = requests.get(dirUrl)
27 |         html = res3.text
28 |         soup3 = BeautifulSoup(html, 'html.parser')
29 |         knownfor = soup3.find('div', {'id': 'knownfor'})
30 |         movieDivs = knownfor.findAll('div', {'class': 'knownfor-title'})
31 |         for div in movieDivs:
32 |             moviediv = div.find('div', {'class': 'knownfor-title-role'})
33 |             print(moviediv.a.string)
34 | 
35 |         break
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/BS4/Lec 4-6.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import requests
3 | r = requests.get('https://quotes.toscrape.com/')
4 | html = r.text
5 | soup = BeautifulSoup(html, 'html.parser')
6 | with open('bs4quotes.txt','w') as f:
7 |     for tag in soup.findAll('span', {'class': 'text'}):
8 |         f.write(tag.string)
9 |         f.write('\n')


--------------------------------------------------------------------------------
/BS4/Lec 9-10.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | html = '<b id="xyz" class="abc 123"> Hello World</b><span></span><span></span>'
 3 | soup = BeautifulSoup(html, 'html.parser', multi_valued_attributes=None)
 4 | tag = soup.b
 5 | 
 6 | print(tag['id'])
 7 | print(tag['class'])
 8 | print(tag.attrs)
 9 | print(tag)
10 | tag['id'] = 'HELLO'
11 | tag['class'] = 'World'
12 | print(tag)
13 | print(tag['class'])
14 | 
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Quizzes/BS4/Quiz 2 (07-Quiz(Extracting Author Names)).py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | res = requests.get('https://quotes.toscrape.com/')
 4 | html = res.text
 5 | soup = BeautifulSoup(html, 'html.parser')
 6 | with open('AuthorNames.csv', 'w') as f:
 7 |     for tag in soup.findAll('small',{'class':'author'}):
 8 |         f.write(tag.string)
 9 |         f.write('\n')
10 | 


--------------------------------------------------------------------------------
/Quizzes/BS4/Quiz 3 (12-Quiz(Getting the Rattings,Year,Name of the Movie)).py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | res = requests.get('https://www.imdb.com/chart/top/')
 4 | html = res.text
 5 | soup = BeautifulSoup(html, 'html.parser')
 6 | tbody = soup.find('tbody', {'class':'lister-list'})
 7 | trs = tbody.findAll('tr')
 8 | with open('imdbMoviesNameRating.csv', 'w') as f:
 9 |     for tr in trs:
10 |         movieNametd = tr.find('td',{'class':'titleColumn'})
11 |         ratingtd = tr.find('td',{'class':'ratingColumn'})
12 |         f.write(movieNametd.a.string+ "," + movieNametd.span.string + "," +ratingtd.strong.string)
13 |         f.write('\n')
14 | 
15 | 


--------------------------------------------------------------------------------
/Quizzes/BS4/quiz2solution.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/BS4/quiz2solution.csv


--------------------------------------------------------------------------------
/Quizzes/BS4/quiz3solution.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/BS4/quiz3solution.csv


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 1 (03-Quiz(Tags)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>Hello</span>
10 | 		<span>World</span>
11 | 		<div>How are you?</div>
12 | 	</div>
13 | 	
14 | </body>
15 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 10 (28-Quiz(Last Child)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<title></title>
 5 | 	</head>
 6 | 	<body>
 7 | 
 8 | 		<div id="x">
 9 | 			<div>a</div>
10 | 			<div>b</div>
11 | 			<div>c</div>
12 | 			<div>d</div>
13 | 			<div>e</div>
14 | 			<div>c</div>
15 | 		</div>
16 | 
17 | 	</body>
18 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 11 (31-Quiz(Negation)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<title></title>
 5 | 	</head>
 6 | 	<body>
 7 | 
 8 | 		<span randomAttribure="aa bb cc">
 9 | 			<div>a</div>
10 | 			<div>b</div>
11 | 		</div>
12 | 		<span randomAttribure="aa bbb cc">
13 | 			c
14 | 		</span>
15 | 		<span randomAttribure="aa bbbb cc">
16 | 			d
17 | 		</span>
18 | 
19 | 	</body>
20 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 12 (34-Quiz(Attributes Values)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<title></title>
 5 | 	</head>
 6 | 	<body>
 7 | 		<div randomAttribure="aa bb cc">
 8 | 			a
 9 | 		</div>
10 | 		<span randomAttribure="aa bb cc">
11 | 			b
12 | 		</span>
13 | 		<span randomAttribure="aa bbb cc">
14 | 			c
15 | 		</span>
16 | 		<span randomAttribure="aa bbbb cc">
17 | 			d
18 | 		</span>
19 | 
20 | 	</body>
21 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 2 (06-Quiz(Descendants)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<span>b</span>
11 | 		<div>c</div>
12 | 	</div>
13 | 
14 | 	<span>
15 | 		<span>d</span>
16 | 		<span>e</span>
17 | 		<div>f</div>
18 | 	</span>
19 | 
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 3 (08-Quiz(ID)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<span id="b">b</span>
11 | 		<div>c</div>
12 | 	</div>
13 | 	<span>
14 | 		<span>d</span>
15 | 		<span>e</span>
16 | 		<div>f</div>
17 | 	</span>
18 | 
19 | </body>
20 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 4 (13-Quiz(Class with Tag)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<span class="b">b</span>
11 | 		<div>c</div>
12 | 	</div>
13 | 	<span>
14 | 		<span>d</span>
15 | 		<span>e</span>
16 | 		<span class="b">b</span>
17 | 		<span class="b">ball</span>
18 | 		<span class="b">bat</span>
19 | 		<div>f</div>
20 | 	</span>
21 | 
22 | </body>
23 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 5 (16-Quiz(Combining Two Selectors)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<span class="b">ball</span>
11 | 		<div>c</div>
12 | 	</div>
13 | 	<span>
14 | 		<span>d</span>
15 | 		<span>e</span>
16 | 		<div class="b">bat</div>
17 | 		<div>f</div>
18 | 	</span>
19 | 
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 6 (19-Quiz(Adjacent Sibling)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<div class="b">ball</div>
11 | 		<div>c</div>
12 | 	</div>
13 | 	<span>
14 | 		<span>d</span>
15 | 		<span>e</span>
16 | 		<div class="bb">bat</div>
17 | 		<div>f</div>
18 | 	</span>
19 | 
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 7 (21-Quiz(General Sibling)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<div class="b">ball</div>
11 | 		<div>c</div>
12 | 	</div>
13 | 	<span>
14 | 		<span>d</span>
15 | 		<span>e</span>
16 | 		<div class="bb">bat</div>
17 | 		<div>f</div>
18 | 	</span>
19 | 
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 8 (24-Quiz(First Child)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 | </head>
 6 | <body>
 7 | 
 8 | 	<div>
 9 | 		<span>a</span>
10 | 		<div class="b">ball</div>
11 | 		<div>c</div>
12 | 		<div>cc</div>
13 | 		<div>ccc</div>
14 | 		<span>d</span>
15 | 		<div>cccc</div>
16 | 	</div>
17 | 	<span>
18 | 		<span>d</span>
19 | 		<span>e</span>
20 | 		<div class="bb">bat</div>
21 | 		<div>f</div>
22 | 	</span>
23 | 
24 | </body>
25 | </html>


--------------------------------------------------------------------------------
/Quizzes/CSS Selectors/Quiz 9 (26-Quiz(Only Child)).html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<title></title>
 5 | 	</head>
 6 | 	<body>
 7 | 
 8 | 		<div id="x">
 9 | 			<div>a</div>
10 | 			<div>b</div>
11 | 			<span>c</span>
12 | 		</div>
13 | 		<span>
14 | 			<span>b</span>
15 | 			<span>c</span>
16 | 		</span>
17 | 	</body>
18 | </html>


--------------------------------------------------------------------------------
/Quizzes/Requests/Quiz 1 (04-Quiz(Extracting Authors)).py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | res = requests.get('https://quotes.toscrape.com/')
 3 | html = res.text
 4 | with open('Authors.txt','w') as f:
 5 |     for line in html.split('\n'):
 6 |         if '<small class="author" itemprop="author">' in line:
 7 |             line = line.replace('<span>by <small class="author" itemprop="author">','')
 8 |             line = line.replace('</small>','')
 9 |             author = line.strip()
10 |             f.write(author)
11 |             f.write('\n')


--------------------------------------------------------------------------------
/Quizzes/Requests/Quiz 2 (07-Quiz(Extracting Author and Quotes)).py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | res = requests.get('https://quotes.toscrape.com/')
 3 | html = res.text
 4 | 
 5 | with open('quotes.csv','w') as f:
 6 |     for line in html.split('\n'):
 7 |         if '<span class="text" itemprop="text">' in line:
 8 |             line = line.replace('<span class="text" itemprop="text">“','')
 9 |             line = line.replace('”</span>', '')
10 |             quote = line.strip()
11 | 
12 |         if '<small class="author" itemprop="author">' in line:
13 |             line = line.replace(' <span>by <small class="author" itemprop="author">','')
14 |             line = line.replace('</small>', '')
15 |             author = line.strip()
16 | 
17 |             f.write(author+","+quote)
18 |             f.write('\n')
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/Quizzes/Requests/Quiz 3 (13-Quiz(Extracting Top Stats from Cricinfo)).py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | with open('superstats.csv','w') as f:
 4 |     for i in range(1,3):
 5 |         url = f'https://www.espncricinfo.com/ci/content/story/data/index.json?genre=706;;page={i}'
 6 |         res = requests.get(url)
 7 |         data = res.text
 8 |         data = json.loads(data)
 9 |         for headline in data:
10 |             _headline = headline['headline']
11 |             _headline = _headline.replace(',', '|')
12 |             f.write(_headline)
13 |             f.write('\n')


--------------------------------------------------------------------------------
/Quizzes/Requests/quiz1solution.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/Requests/quiz1solution.txt


--------------------------------------------------------------------------------
/Quizzes/Requests/quiz2solution.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/Requests/quiz2solution.csv


--------------------------------------------------------------------------------
/Quizzes/Requests/quiz3solution.csv:
--------------------------------------------------------------------------------
 1 | Smart Stats: Rashid Khan BBL's all-time MVP| D'arcy Short the best batsman
 2 | Overseas players' impact in IPL 2020 - England on top| West Indies smash it
 3 | Jofra Archer - IPL 2020's MVP by a distance
 4 | Why Shikhar Dhawan and not KL Rahul tops the Smart Runs tally
 5 | Luck Index - Umpire's call adds 20 runs to Mumbai's total
 6 | The dropped catch that gave Kings XI Punjab a fighting chance
 7 | Luck Index: Did Vijay Shankar dropping Ben Stokes hurt Sunrisers or Royals?
 8 | Why Mohammed Shami trumps Purple Cap holder Kagiso Rabada in Smart Wickets tally
 9 | Super Kings pay for letting Dhawan live a charmed life
10 | Smart Stats: Krunal Pandya's 2 for 26 more impactful than Quinton de Kock and Suryakumar Yadav's fifties
11 | Smart Stats: Axar Patel most impactful player Delhi Capitals' big win
12 | Archer| Agarwal and Samson take early Smart Stats honours
13 | Why Rahul Chahar is the Smart Stats player of the match
14 | Introducing Smart Stats| where context trumps raw numbers
15 | Why Pat Cummins| and not Shubman Gill| is the Smart Stats player of the match
16 | KL Rahul makes the most of Virat Kohli's lapses
17 | Why Jasprit Bumrah is the Smart Stats Player of the Match
18 | Smart Stats: Lungi Ngidi's super spell for CSK| and Chris Gayle's 175 not out
19 | Smart Stats: Why David Warner topped Virat Kohli in IPL 2016
20 | Andre Russell| the IPL's most destructive| impactful and valuable player
21 | 


--------------------------------------------------------------------------------
/Quizzes/Scrapy/Quiz 1 (24-Quiz(Get The Tags)).py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | class Quiz(scrapy.Spider):
 3 |     name='quiz1'
 4 |     start_urls = ['https://quotes.toscrape.com/']
 5 | 
 6 |     def parse(self, response):
 7 |         for div in response.css('.quote'):
 8 |             tags = div.css('.tag::text').getall()
 9 |             tags = ' , '.join(tags)
10 |             yield {
11 |                 'Quote': div.css('.text::text').get(),
12 |                 'Author': div.css('.author::text').get(),
13 |                 'Tags' : tags
14 |             }
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Quizzes/Scrapy/Quiz 2 (33-Quiz(Extracting the Year)).py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | class Quiz(scrapy.Spider):
 3 |     name='quiz2'
 4 |     start_urls = ['https://www.imdb.com/chart/top/']
 5 | 
 6 |     def parse(self, response):
 7 |         for td in response.css('.titleColumn'):
 8 |             dict = {
 9 |                 'Movie' : td.css('a::text').get(),
10 |                 'Year': td.css('span::text').get()
11 |             }
12 |             url = td.css('a::attr(href)').get()
13 |             yield response.follow(url, callback=self.parseMovie, meta=dict)
14 | 
15 |     def parseMovie(self, response):
16 |         duration = response.css('.subtext time::text').get().strip()
17 |         movie = response.meta['Movie']
18 |         year = response.meta['Year']
19 | 
20 |         yield {
21 |             'Movie': movie,
22 |             'Year': year,
23 |             'Duration': duration
24 |         }
25 | 


--------------------------------------------------------------------------------
/Quizzes/Scrapy/quiz1solution.csv:
--------------------------------------------------------------------------------
 1 | Quote,Author,Tags
 2 | “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”,Albert Einstein,"change , deep-thoughts , thinking , world"
 3 | "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",J.K. Rowling,"abilities , choices"
 4 | “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”,Albert Einstein,"inspirational , life , live , miracle , miracles"
 5 | "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”",Jane Austen,"aliteracy , books , classic , humor"
 6 | "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",Marilyn Monroe,"be-yourself , inspirational"
 7 | “Try not to become a man of success. Rather become a man of value.”,Albert Einstein,"adulthood , success , value"
 8 | “It is better to be hated for what you are than to be loved for what you are not.”,André Gide,"life , love"
 9 | "“I have not failed. I've just found 10,000 ways that won't work.”",Thomas A. Edison,"edison , failure , inspirational , paraphrased"
10 | “A woman is like a tea bag; you never know how strong it is until it's in hot water.”,Eleanor Roosevelt,misattributed-eleanor-roosevelt
11 | "“A day without sunshine is like, you know, night.”",Steve Martin,"humor , obvious , simile"
12 | 


--------------------------------------------------------------------------------
/Quizzes/Scrapy/quiz2solution.csv:
--------------------------------------------------------------------------------
  1 | Movie,Year,Duration
  2 | The Shawshank Redemption,(1994),2h 22min
  3 | 12 Angry Men,(1957),1h 36min
  4 | Inception,(2010),2h 28min
  5 | Schindler's List,(1993),3h 15min
  6 | The Dark Knight,(2008),2h 32min
  7 | The Godfather,(1972),2h 55min
  8 | The Godfather: Part II,(1974),3h 22min
  9 | The Lord of the Rings: The Return of the King,(2003),3h 21min
 10 | Forrest Gump,(1994),2h 22min
 11 | The Lord of the Rings: The Fellowship of the Ring,(2001),2h 58min
 12 | "The Good, the Bad and the Ugly",(1966),2h 58min
 13 | Fight Club,(1999),2h 19min
 14 | Pulp Fiction,(1994),2h 34min
 15 | Star Wars: Episode V - The Empire Strikes Back,(1980),2h 4min
 16 | The Lord of the Rings: The Two Towers,(2002),2h 59min
 17 | Swades,(2004),3h 9min
 18 | Saving Private Ryan,(1998),2h 49min
 19 | Tangerines,(2013),1h 27min
 20 | The Battle of Algiers,(1966),2h 1min
 21 | Three Colors: Red,(1994),1h 39min
 22 | The Help,(2011),2h 26min
 23 | A Silent Voice: The Movie,(2016),2h 10min
 24 | Drishyam,(2015),2h 43min
 25 | The Invisible Guest,(2016),1h 46min
 26 | "Paris, Texas",(1984),2h 25min
 27 | It Happened One Night,(1934),1h 45min
 28 | Portrait of a Lady on Fire,(2019),2h 2min
 29 | In the Mood for Love,(2000),1h 38min
 30 | Rang De Basanti,(2006),2h 47min
 31 | Rififi,(1955),1h 58min
 32 | Rebecca,(1940),2h 10min
 33 | Before Sunset,(2004),1h 20min
 34 | Nausicaä of the Valley of the Wind,(1984),1h 57min
 35 | Andrei Rublev,(1966),3h 25min
 36 | "Monsters, Inc.",(2001),1h 32min
 37 | Rocky,(1976),2h
 38 | Gangs of Wasseypur,(2012),5h 21min
 39 | Amores Perros,(2000),2h 34min
 40 | Time of the Gypsies,(1988),2h 22min
 41 | Hotel Rwanda,(2004),2h 1min
 42 | The Passion of Joan of Arc,(1928),1h 54min
 43 | Spotlight,(2015),2h 9min
 44 | The 400 Blows,(1959),1h 39min
 45 | The Wages of Fear,(1953),2h 11min
 46 | La Haine,(1995),1h 38min
 47 | Rush,(2013),2h 3min
 48 | Into the Wild,(2007),2h 28min
 49 | Logan,(2017),2h 17min
 50 | Monty Python's Life of Brian,(1979),1h 34min
 51 | The Handmaiden,(2016),2h 25min
 52 | Hachi: A Dog's Tale,(2009),1h 33min
 53 | Platoon,(1986),2h
 54 | Stand by Me,(1986),1h 29min
 55 | Network,(1976),2h 1min
 56 | Cool Hand Luke,(1967),2h 7min
 57 | Ben-Hur,(1959),3h 32min
 58 | Harry Potter and the Deathly Hallows: Part 2,(2011),2h 10min
 59 | Million Dollar Baby,(2004),2h 12min
 60 | Mr. Smith Goes to Washington,(1939),2h 9min
 61 | The Bandit,(1996),2h 8min
 62 | Dead Poets Society,(1989),2h 8min
 63 | Barry Lyndon,(1975),3h 5min
 64 | Soul,(2020),1h 40min
 65 | Mad Max: Fury Road,(2015),2h
 66 | 12 Years a Slave,(2013),2h 14min
 67 | Sherlock Jr.,(1924),45min
 68 | To Be or Not to Be,(1942),1h 39min
 69 | Autumn Sonata,(1978),1h 39min
 70 | How to Train Your Dragon,(2010),1h 38min
 71 | The General,(1926),1h 7min
 72 | Ford v Ferrari,(2019),2h 32min
 73 | The Big Lebowski,(1998),1h 57min
 74 | Andhadhun,(2018),2h 19min
 75 | Prisoners,(2013),2h 33min
 76 | Persona,(1966),1h 23min
 77 | Hacksaw Ridge,(2016),2h 19min
 78 | Catch Me If You Can,(2002),2h 21min
 79 | Before Sunrise,(1995),1h 41min
 80 | Gone Girl,(2014),2h 29min
 81 | Mary and Max,(2009),1h 32min
 82 | Anand,(1971),2h 2min
 83 | In the Name of the Father,(1993),2h 13min
 84 | The Grand Budapest Hotel,(2014),1h 39min
 85 | The Deer Hunter,(1978),3h 3min
 86 | On the Waterfront,(1954),1h 48min
 87 | Tokyo Story,(1953),2h 16min
 88 | The Third Man,(1949),1h 33min
 89 | Wild Tales,(2014),2h 2min
 90 | Gran Torino,(2008),1h 56min
 91 | Room,(2015),1h 58min
 92 | The Bridge on the River Kwai,(1957),2h 41min
 93 | Fargo,(1996),1h 38min
 94 | Memories of Murder,(2003),2h 12min
 95 | Kill Bill: Vol. 1,(2003),1h 51min
 96 | Blade Runner,(1982),1h 57min
 97 | Wild Strawberries,(1957),1h 31min
 98 | Stalker,(1979),2h 42min
 99 | Finding Nemo,(2003),1h 40min
100 | Gone with the Wind,(1939),3h 58min
101 | The Truman Show,(1998),1h 43min
102 | Jurassic Park,(1993),2h 7min
103 | Trainspotting,(1996),1h 33min
104 | The Thing,(1982),1h 49min
105 | The Sixth Sense,(1999),1h 47min
106 | The Seventh Seal,(1957),1h 36min
107 | The Elephant Man,(1980),2h 4min
108 | Klaus,(2019),1h 36min
109 | Warrior,(2011),2h 20min
110 | My Father and My Son,(2005),1h 52min
111 | Inside Out,(2015),1h 35min
112 | V for Vendetta,(2005),2h 12min
113 | No Country for Old Men,(2007),2h 2min
114 | Chinatown,(1974),2h 10min
115 | Shutter Island,(2010),2h 18min
116 | "Three Billboards Outside Ebbing, Missouri",(2017),1h 55min
117 | The Gold Rush,(1925),1h 35min
118 | Dial M for Murder,(1954),1h 45min
119 | The Treasure of the Sierra Madre,(1948),2h 6min
120 | Raging Bull,(1980),2h 9min
121 | My Neighbor Totoro,(1988),1h 26min
122 | Judgment at Nuremberg,(1961),2h 59min
123 | The Secret in Their Eyes,(2009),2h 9min
124 | Pan's Labyrinth,(2006),1h 58min
125 | "Lock, Stock and Two Smoking Barrels",(1998),1h 47min
126 | The Great Escape,(1963),2h 52min
127 | There Will Be Blood,(2007),2h 38min
128 | Casino,(1995),2h 58min
129 | All About Eve,(1950),2h 18min
130 | A Beautiful Mind,(2001),2h 15min
131 | The Wolf of Wall Street,(2013),3h
132 | Ran,(1985),2h 42min
133 | Unforgiven,(1992),2h 10min
134 | Howl's Moving Castle,(2004),1h 59min
135 | Some Like It Hot,(1959),2h 1min
136 | Children of Heaven,(1997),1h 29min
137 | Downfall,(2004),2h 36min
138 | Rashomon,(1950),1h 28min
139 | Yojimbo,(1961),1h 50min
140 | Batman Begins,(2005),2h 20min
141 | Die Hard,(1988),2h 12min
142 | Monty Python and the Holy Grail,(1975),1h 31min
143 | Green Book,(2018),2h 10min
144 | L.A. Confidential,(1997),2h 18min
145 | Heat,(1995),2h 50min
146 | Indiana Jones and the Last Crusade,(1989),2h 7min
147 | For a Few Dollars More,(1965),2h 12min
148 | Up,(2009),1h 36min
149 | Double Indemnity,(1944),1h 47min
150 | To Kill a Mockingbird,(1962),2h 9min
151 | Metropolis,(1927),2h 33min
152 | The Apartment,(1960),2h 5min
153 | The Sting,(1973),2h 9min
154 | Amélie,(2001),2h 2min
155 | A Separation,(2011),2h 3min
156 | Incendies,(2010),2h 11min
157 | Lawrence of Arabia,(1962),3h 48min
158 | Taxi Driver,(1976),1h 54min
159 | Ikiru,(1952),2h 23min
160 | Come and See,(1985),2h 22min
161 | Toy Story 3,(2010),1h 43min
162 | 1917,(2019),1h 59min
163 | A Clockwork Orange,(1971),2h 16min
164 | North by Northwest,(1959),2h 16min
165 | Singin' in the Rain,(1952),1h 43min
166 | The Kid,(1921),1h 8min
167 | Scarface,(1983),2h 50min
168 | Bicycle Thieves,(1948),1h 29min
169 | Snatch,(2000),1h 42min
170 | Full Metal Jacket,(1987),1h 56min
171 | Dangal,(2016),2h 41min
172 | Citizen Kane,(1941),1h 59min
173 | Vertigo,(1958),2h 8min
174 | Requiem for a Dream,(2000),1h 42min
175 | Eternal Sunshine of the Spotless Mind,(2004),1h 48min
176 | M,(1931),1h 39min
177 | Reservoir Dogs,(1992),1h 39min
178 | The Hunt,(2012),1h 55min
179 | 2001: A Space Odyssey,(1968),2h 29min
180 | Like Stars on Earth,(2007),2h 45min
181 | Good Will Hunting,(1997),2h 6min
182 | 3 Idiots,(2009),2h 50min
183 | Amadeus,(1984),2h 40min
184 | Inglourious Basterds,(2009),2h 33min
185 | Star Wars: Episode VI - Return of the Jedi,(1983),2h 11min
186 | High and Low,(1963),2h 23min
187 | Das Boot,(1981),2h 29min
188 | Capernaum,(2018),2h 6min
189 | Braveheart,(1995),2h 58min
190 | Aliens,(1986),2h 17min
191 | Toy Story,(1995),1h 21min
192 | American Beauty,(1999),2h 2min
193 | Your Name.,(2016),1h 46min
194 | Coco,(2017),1h 45min
195 | Once Upon a Time in America,(1984),3h 49min
196 | Avengers: Endgame,(2019),3h 1min
197 | The Dark Knight Rises,(2012),2h 44min
198 | Princess Mononoke,(1997),2h 14min
199 | Sunset Blvd.,(1950),1h 50min
200 | Witness for the Prosecution,(1957),1h 56min
201 | Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb,(1964),1h 35min
202 | Oldboy,(2003),2h
203 | The Shining,(1980),2h 26min
204 | WALL·E,(2008),1h 38min
205 | Avengers: Infinity War,(2018),2h 29min
206 | Paths of Glory,(1957),1h 28min
207 | The Lives of Others,(2006),2h 17min
208 | Joker,(2019),2h 2min
209 | Indiana Jones and the Raiders of the Lost Ark,(1981),1h 55min
210 | Hamilton,(2020),2h 40min
211 | The Great Dictator,(1940),2h 5min
212 | Memento,(2000),1h 53min
213 | Apocalypse Now,(1979),2h 27min
214 | Spider-Man: Into the Spider-Verse,(2018),1h 57min
215 | Django Unchained,(2012),2h 45min
216 | Alien,(1979),1h 57min
217 | Cinema Paradiso,(1988),2h 35min
218 | Rear Window,(1954),1h 52min
219 | Once Upon a Time in the West,(1968),2h 45min
220 | The Prestige,(2006),2h 10min
221 | Casablanca,(1942),1h 42min
222 | Grave of the Fireflies,(1988),1h 29min
223 | Whiplash,(2014),1h 46min
224 | The Intouchables,(2011),1h 52min
225 | City Lights,(1931),1h 27min
226 | The Departed,(2006),2h 31min
227 | Modern Times,(1936),1h 27min
228 | Psycho,(1960),1h 49min
229 | Back to the Future,(1985),1h 56min
230 | The Pianist,(2002),2h 30min
231 | The Usual Suspects,(1995),1h 46min
232 | Hara-Kiri,(1962),2h 13min
233 | American History X,(1998),1h 59min
234 | Terminator 2: Judgment Day,(1991),2h 17min
235 | The Lion King,(1994),1h 28min
236 | Gladiator,(2000),2h 35min
237 | Léon: The Professional,(1994),1h 50min
238 | Parasite,(2019),2h 12min
239 | Interstellar,(2014),2h 49min
240 | It's a Wonderful Life,(1946),2h 10min
241 | Spirited Away,(2001),2h 5min
242 | The Green Mile,(1999),3h 9min
243 | City of God,(2002),2h 10min
244 | Star Wars: Episode IV - A New Hope,(1977),2h 1min
245 | Life Is Beautiful,(1997),1h 56min
246 | The Silence of the Lambs,(1991),1h 58min
247 | One Flew Over the Cuckoo's Nest,(1975),2h 13min
248 | The Matrix,(1999),2h 16min
249 | Goodfellas,(1990),2h 26min
250 | Seven Samurai,(1954),3h 27min
251 | Se7en,(1995),2h 7min
252 | 


--------------------------------------------------------------------------------
/Quizzes/Selenium/Quiz 1 (06-Quiz(Extracting Quotes)).py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 3 | 
 4 | driver.get('https://quotes.toscrape.com/')
 5 | for div in driver.find_elements_by_css_selector('.quote'):
 6 |     print(div.find_element_by_css_selector('.text').text)
 7 |     print(div.find_element_by_css_selector('.author').text)
 8 |     for tag in div.find_elements_by_css_selector('.tag'):
 9 |         print(tag.text)
10 |     print('--------------------')
11 | 
12 | 
13 | driver.quit()


--------------------------------------------------------------------------------
/Quizzes/Selenium/Quiz 2 (12-Quiz(Log in and Extract Quote)).py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import time
 3 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 4 | 
 5 | driver.get('https://quotes.toscrape.com/')
 6 | 
 7 | driver.find_element_by_css_selector('.header-box p a').click()
 8 | userName = driver.find_element_by_css_selector('#username')
 9 | userName.send_keys('XZY')
10 | time.sleep(3)
11 | password = driver.find_element_by_css_selector('#password')
12 | password.send_keys('12345')
13 | time.sleep(3)
14 | driver.find_element_by_css_selector('[value="Login"]').click()
15 | for div in driver.find_elements_by_css_selector('.text'):
16 |     print(div.text)
17 | 
18 | 
19 | 
20 | 
21 | # driver.quit()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | # Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python
5 | Data Scraping and Data Mining from Beginner to Pro with Python, by Packt publishing
6 | 


--------------------------------------------------------------------------------
/Requests/Lec 10-12.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | with open('News.txt','w') as f:
 4 |     for i in range(1,6):
 5 |         url = f'https://www.espncricinfo.com/ci/content/story/data/index.json?;type=7;page={i}'
 6 |         res = requests.get(url)
 7 |         data = json.loads(res.text)
 8 |         for news in data:
 9 |             f.write(news['author']+' | '+news['summary'])
10 |             f.write('\n')
11 | 


--------------------------------------------------------------------------------
/Requests/Lec 2-6.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | for i in range(1, 12):
 3 |     print("Page: ", i)
 4 |     url = f'https://quotes.toscrape.com/page/{i}/'
 5 |     r = requests.get(url)
 6 |     html = r.text
 7 |     with open('quotes.txt', 'a', encoding='utf-8') as f:
 8 |         for line in html.split('\n'):
 9 |             if '<span class="text" itemprop="text">' in line:
10 |                 line = line.replace('<span class="text" itemprop="text">“', '').replace('”</span>', '')
11 |                 line = line.strip()
12 |                 f.write(line)
13 |                 f.write("\n")
14 | 


--------------------------------------------------------------------------------
/SCRAPY.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/SCRAPY.zip


--------------------------------------------------------------------------------
/Selenium/DeepL Script.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import time
 3 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 4 | 
 5 | with open('input.txt','r') as f:
 6 |     text = f.read().strip()
 7 | 
 8 | driver.get('https://www.deepl.com/translator')
 9 | driver.find_element_by_css_selector('.dl_cookieBanner--buttonClose').click()
10 | time.sleep(3)
11 | driver.find_element_by_css_selector('.lmt__language_container_prim .lmt__language_select__opener').click()
12 | time.sleep(3)
13 | driver.find_element_by_css_selector('[dl-test="translator-lang-option-ru-RU"]').click()
14 | time.sleep(3)
15 | inputTextArea = driver.find_element_by_css_selector('.lmt__textarea')
16 | inputTextArea.send_keys(text)
17 | time.sleep(5)
18 | driver.find_element_by_css_selector('.lmt__target_toolbar__save button').click()
19 | 
20 | # driver.quit()
21 | 
22 | 


--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 11.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import time
 3 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 4 | 
 5 | driver.get('https://quotes.toscrape.com/')
 6 | 
 7 | driver.find_element_by_css_selector('.header-box p a').click()
 8 | username = driver.find_element_by_css_selector('#username')
 9 | username.send_keys('ABC')
10 | time.sleep(3)
11 | password = driver.find_element_by_css_selector('#password')
12 | password.send_keys('12345')
13 | time.sleep(3)
14 | driver.find_element_by_css_selector('[value="Login"]').click()
15 | 
16 | driver.quit()


--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 4.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 3 | 
 4 | 
 5 | driver.get('https://quotes.toscrape.com/')
 6 | print(type(driver.find_element_by_css_selector('.text')))
 7 | print(driver.find_element_by_css_selector('.text').text)
 8 | 
 9 | print('--------------------')
10 | print(type(driver.find_elements_by_css_selector('.text')))
11 | for tag in driver.find_elements_by_css_selector('.text'):
12 |     print(tag.text)
13 | 
14 | driver.quit()


--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 5.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 3 | 
 4 | 
 5 | driver.get('https://quotes.toscrape.com/')
 6 | for div in driver.find_elements_by_css_selector('.quote'):
 7 |     print(div.find_element_by_css_selector('.text').text)
 8 |     print(div.find_element_by_css_selector('.author').text)
 9 |     print('----------------')
10 | 
11 | 
12 | 
13 | 
14 | driver.quit()


--------------------------------------------------------------------------------
/Selenium/Selenium Different Lectures Scripts/lec 9-10.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | driver = webdriver.Chrome(executable_path='chromedriver.exe')
 3 | 
 4 | driver.get('https://quotes.toscrape.com/page/9/')
 5 | 
 6 | while True:
 7 |     for div in driver.find_elements_by_css_selector('.quote'):
 8 |         print(div.find_element_by_css_selector('.text').text)
 9 |         print(div.find_element_by_css_selector('.author').text)
10 | 
11 |     try:
12 |         driver.find_element_by_css_selector('.next a').click()
13 |     except:
14 |         break
15 | 
16 | 
17 | 
18 | 
19 | driver.quit()


--------------------------------------------------------------------------------
/Selenium/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Selenium/chromedriver.exe


--------------------------------------------------------------------------------
/Selenium/chromedriver_win32.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Selenium/chromedriver_win32.zip


--------------------------------------------------------------------------------
/Selenium/input.txt:
--------------------------------------------------------------------------------
1 | Happy data scraping, Hope this helps you.


--------------------------------------------------------------------------------
/Slides.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Slides.pptx


--------------------------------------------------------------------------------