├── BS4 ├── Lec 11-17.py ├── Lec 18-21.py ├── Lec 4-6.py └── Lec 9-10.py ├── LICENSE ├── Quizzes ├── BS4 │ ├── Quiz 2 (07-Quiz(Extracting Author Names)).py │ ├── Quiz 3 (12-Quiz(Getting the Rattings,Year,Name of the Movie)).py │ ├── quiz2solution.csv │ └── quiz3solution.csv ├── CSS Selectors │ ├── Quiz 1 (03-Quiz(Tags)).html │ ├── Quiz 10 (28-Quiz(Last Child)).html │ ├── Quiz 11 (31-Quiz(Negation)).html │ ├── Quiz 12 (34-Quiz(Attributes Values)).html │ ├── Quiz 2 (06-Quiz(Descendants)).html │ ├── Quiz 3 (08-Quiz(ID)).html │ ├── Quiz 4 (13-Quiz(Class with Tag)).html │ ├── Quiz 5 (16-Quiz(Combining Two Selectors)).html │ ├── Quiz 6 (19-Quiz(Adjacent Sibling)).html │ ├── Quiz 7 (21-Quiz(General Sibling)).html │ ├── Quiz 8 (24-Quiz(First Child)).html │ └── Quiz 9 (26-Quiz(Only Child)).html ├── Requests │ ├── Quiz 1 (04-Quiz(Extracting Authors)).py │ ├── Quiz 2 (07-Quiz(Extracting Author and Quotes)).py │ ├── Quiz 3 (13-Quiz(Extracting Top Stats from Cricinfo)).py │ ├── quiz1solution.txt │ ├── quiz2solution.csv │ └── quiz3solution.csv ├── Scrapy │ ├── Quiz 1 (24-Quiz(Get The Tags)).py │ ├── Quiz 2 (33-Quiz(Extracting the Year)).py │ ├── quiz1solution.csv │ └── quiz2solution.csv └── Selenium │ ├── Quiz 1 (06-Quiz(Extracting Quotes)).py │ └── Quiz 2 (12-Quiz(Log in and Extract Quote)).py ├── README.md ├── Requests ├── Lec 10-12.py └── Lec 2-6.py ├── SCRAPY.zip ├── Selenium ├── DeepL Script.py ├── Selenium Different Lectures Scripts │ ├── lec 11.py │ ├── lec 4.py │ ├── lec 5.py │ └── lec 9-10.py ├── chromedriver.exe ├── chromedriver_win32.zip └── input.txt └── Slides.pptx /BS4/Lec 11-17.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | res = requests.get('https://www.imdb.com/chart/top/') 4 | html = res.text 5 | 6 | soup = BeautifulSoup(html, 'html.parser') 7 | tbody = soup.find('tbody', {'class': 'lister-list'}) 8 | trs = tbody.findAll('tr') 9 | for tr in trs: 10 | td = tr.find('td', {'class': 'titleColumn'}) 11 | movieId = td.a['href'] 12 | movieUrl = f'https://www.imdb.com/{movieId}' 13 | 14 | res2 = requests.get(movieUrl) 15 | html = res2.text 16 | soup2 = BeautifulSoup(html, 'html.parser') 17 | info = soup2.find('div', {'class': 'subtext'}) 18 | 19 | a = info.findAll('a') 20 | print(td.a.string) 21 | print(info.time.string.strip()) 22 | print(a[0].string.strip()) 23 | print(a[1].string.strip()) 24 | 25 | -------------------------------------------------------------------------------- /BS4/Lec 18-21.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | movieName = input('Enter Movie Name: ') 5 | movieName = movieName.lower() 6 | 7 | res = requests.get('https://www.imdb.com/chart/top/') 8 | html = res.text 9 | 10 | soup = BeautifulSoup(html, 'html.parser') 11 | tbody = soup.find('tbody', {'class': 'lister-list'}) 12 | trs = tbody.findAll('tr') 13 | for tr in trs: 14 | td = tr.find('td', {'class': 'titleColumn'}) 15 | imdbMovieName = td.a.string.strip().lower() 16 | if imdbMovieName == movieName: 17 | movieId = td.a['href'] 18 | movieUrl = f'https://www.imdb.com/{movieId}' 19 | res2 = requests.get(movieUrl) 20 | html = res2.text 21 | soup2 = BeautifulSoup(html, 'html.parser') 22 | summary = soup2.find('div', {'class': 'credit_summary_item'}) 23 | dirID = summary.a['href'] 24 | dirUrl = f'https://www.imdb.com/{dirID}' 25 | print("Dir Name: ",summary.a.string) 26 | res3 = requests.get(dirUrl) 27 | html = res3.text 28 | soup3 = BeautifulSoup(html, 'html.parser') 29 | knownfor = soup3.find('div', {'id': 'knownfor'}) 30 | movieDivs = knownfor.findAll('div', {'class': 'knownfor-title'}) 31 | for div in movieDivs: 32 | moviediv = div.find('div', {'class': 'knownfor-title-role'}) 33 | print(moviediv.a.string) 34 | 35 | break 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /BS4/Lec 4-6.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import requests 3 | r = requests.get('https://quotes.toscrape.com/') 4 | html = r.text 5 | soup = BeautifulSoup(html, 'html.parser') 6 | with open('bs4quotes.txt','w') as f: 7 | for tag in soup.findAll('span', {'class': 'text'}): 8 | f.write(tag.string) 9 | f.write('\n') -------------------------------------------------------------------------------- /BS4/Lec 9-10.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | html = ' Hello World' 3 | soup = BeautifulSoup(html, 'html.parser', multi_valued_attributes=None) 4 | tag = soup.b 5 | 6 | print(tag['id']) 7 | print(tag['class']) 8 | print(tag.attrs) 9 | print(tag) 10 | tag['id'] = 'HELLO' 11 | tag['class'] = 'World' 12 | print(tag) 13 | print(tag['class']) 14 | 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Quizzes/BS4/Quiz 2 (07-Quiz(Extracting Author Names)).py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | res = requests.get('https://quotes.toscrape.com/') 4 | html = res.text 5 | soup = BeautifulSoup(html, 'html.parser') 6 | with open('AuthorNames.csv', 'w') as f: 7 | for tag in soup.findAll('small',{'class':'author'}): 8 | f.write(tag.string) 9 | f.write('\n') 10 | -------------------------------------------------------------------------------- /Quizzes/BS4/Quiz 3 (12-Quiz(Getting the Rattings,Year,Name of the Movie)).py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | res = requests.get('https://www.imdb.com/chart/top/') 4 | html = res.text 5 | soup = BeautifulSoup(html, 'html.parser') 6 | tbody = soup.find('tbody', {'class':'lister-list'}) 7 | trs = tbody.findAll('tr') 8 | with open('imdbMoviesNameRating.csv', 'w') as f: 9 | for tr in trs: 10 | movieNametd = tr.find('td',{'class':'titleColumn'}) 11 | ratingtd = tr.find('td',{'class':'ratingColumn'}) 12 | f.write(movieNametd.a.string+ "," + movieNametd.span.string + "," +ratingtd.strong.string) 13 | f.write('\n') 14 | 15 | -------------------------------------------------------------------------------- /Quizzes/BS4/quiz2solution.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/BS4/quiz2solution.csv -------------------------------------------------------------------------------- /Quizzes/BS4/quiz3solution.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Data-Scraping-and-Data-Mining-from-Beginner-to-Pro-with-Python/f59155b38464be004d88e239ac1e610d2500434b/Quizzes/BS4/quiz3solution.csv -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 1 (03-Quiz(Tags)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | Hello 10 | World 11 |
How are you?
12 |
13 | 14 | 15 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 10 (28-Quiz(Last Child)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
a
10 |
b
11 |
c
12 |
d
13 |
e
14 |
c
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 11 (31-Quiz(Negation)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
a
10 |
b
11 | 12 | 13 | c 14 | 15 | 16 | d 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 12 (34-Quiz(Attributes Values)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | a 9 |
10 | 11 | b 12 | 13 | 14 | c 15 | 16 | 17 | d 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 2 (06-Quiz(Descendants)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 | b 11 |
c
12 |
13 | 14 | 15 | d 16 | e 17 |
f
18 |
19 | 20 | 21 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 3 (08-Quiz(ID)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 | b 11 |
c
12 |
13 | 14 | d 15 | e 16 |
f
17 |
18 | 19 | 20 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 4 (13-Quiz(Class with Tag)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 | b 11 |
c
12 |
13 | 14 | d 15 | e 16 | b 17 | ball 18 | bat 19 |
f
20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 5 (16-Quiz(Combining Two Selectors)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 | ball 11 |
c
12 |
13 | 14 | d 15 | e 16 |
bat
17 |
f
18 |
19 | 20 | 21 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 6 (19-Quiz(Adjacent Sibling)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 |
ball
11 |
c
12 |
13 | 14 | d 15 | e 16 |
bat
17 |
f
18 |
19 | 20 | 21 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 7 (21-Quiz(General Sibling)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 |
ball
11 |
c
12 |
13 | 14 | d 15 | e 16 |
bat
17 |
f
18 |
19 | 20 | 21 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 8 (24-Quiz(First Child)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | a 10 |
ball
11 |
c
12 |
cc
13 |
ccc
14 | d 15 |
cccc
16 |
17 | 18 | d 19 | e 20 |
bat
21 |
f
22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /Quizzes/CSS Selectors/Quiz 9 (26-Quiz(Only Child)).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
a
10 |
b
11 | c 12 |
13 | 14 | b 15 | c 16 | 17 | 18 | -------------------------------------------------------------------------------- /Quizzes/Requests/Quiz 1 (04-Quiz(Extracting Authors)).py: -------------------------------------------------------------------------------- 1 | import requests 2 | res = requests.get('https://quotes.toscrape.com/') 3 | html = res.text 4 | with open('Authors.txt','w') as f: 5 | for line in html.split('\n'): 6 | if '