├── images
    ├── loss.png
    └── accuracy.png
├── imdb
    ├── idmain.png
    ├── imdbData.png
    ├── imdbmain.png
    ├── imdbweb.png
    ├── first-line.png
    ├── highlight-elements.png
    ├── highlight-firstmovie.png
    ├── right-click-inspect.png
    ├── highlight-movie-frame.png
    └── hightlight-target-text.png
├── imdbData.py
├── python-code
    └── imdbwebscraping.py
├── jupyter-notebook
    ├── IMDBwebscraping.ipynb
    └── IMDb-Web-Scraping-Full-Code.ipynb
└── README.md


/images/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/images/loss.png


--------------------------------------------------------------------------------
/imdb/idmain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/idmain.png


--------------------------------------------------------------------------------
/imdb/imdbData.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/imdbData.png


--------------------------------------------------------------------------------
/imdb/imdbmain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/imdbmain.png


--------------------------------------------------------------------------------
/imdb/imdbweb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/imdbweb.png


--------------------------------------------------------------------------------
/images/accuracy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/images/accuracy.png


--------------------------------------------------------------------------------
/imdb/first-line.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/first-line.png


--------------------------------------------------------------------------------
/imdb/highlight-elements.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/highlight-elements.png


--------------------------------------------------------------------------------
/imdb/highlight-firstmovie.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/highlight-firstmovie.png


--------------------------------------------------------------------------------
/imdb/right-click-inspect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/right-click-inspect.png


--------------------------------------------------------------------------------
/imdb/highlight-movie-frame.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/highlight-movie-frame.png


--------------------------------------------------------------------------------
/imdb/hightlight-target-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Reljod/Python-Data-Scraping-IMDb-Movie-site-using-BeautifulSoup-Series-1-/HEAD/imdb/hightlight-target-text.png


--------------------------------------------------------------------------------
/imdbData.py:
--------------------------------------------------------------------------------
 1 | import lxml
 2 | import re
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from bs4 import BeautifulSoup
 7 | from requests import get
 8 | 
 9 | url1 = "https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2"
10 | 
11 | class IMDB(object):
12 | 	"""docstring for IMDB"""
13 | 	def __init__(self, url):
14 | 		super(IMDB, self).__init__()
15 | 		page = get(url)
16 | 
17 | 		self.soup = BeautifulSoup(page.content, 'lxml')
18 | 
19 | 	def articleTitle(self):
20 | 		return self.soup.find("h1", class_="header").text.replace("\n","")
21 | 
22 | 	def bodyContent(self):
23 | 		content = self.soup.find(id="main")
24 | 		return content.find_all("div", class_="lister-item mode-advanced")
25 | 
26 | 	def movieData(self):
27 | 		movieFrame = self.bodyContent()
28 | 		movieTitle = []
29 | 		movieDate = []
30 | 		movieRunTime = []
31 | 		movieGenre = []
32 | 		movieRating = []
33 | 		movieScore = []
34 | 		movieDescription = []
35 | 		movieDirector = []
36 | 		movieStars = []
37 | 		movieVotes = []
38 | 		movieGross = []
39 | 		for movie in movieFrame:
40 | 			movieFirstLine = movie.find("h3", class_="lister-item-header")
41 | 			movieTitle.append(movieFirstLine.find("a").text)
42 | 			movieDate.append(re.sub(r"[()]","", movieFirstLine.find_all("span")[-1].text))
43 | 			try:
44 | 				movieRunTime.append(movie.find("span", class_="runtime").text[:-4])
45 | 			except:
46 | 				movieRunTime.append(np.nan)
47 | 			movieGenre.append(movie.find("span", class_="genre").text.rstrip().replace("\n","").split(","))
48 | 			try:
49 | 				movieRating.append(movie.find("strong").text)
50 | 			except:
51 | 				movieRating.append(np.nan)
52 | 			try:
53 | 				movieScore.append(movie.find("span", class_="metascore unfavorable").text.rstrip())
54 | 			except:
55 | 				movieScore.append(np.nan)
56 | 			movieDescription.append(movie.find_all("p", class_="text-muted")[-1].text.lstrip())
57 | 			movieCast = movie.find("p", class_="")
58 | 
59 | 			try:
60 | 				casts = movieCast.text.replace("\n","").split('|')
61 | 				casts = [x.strip() for x in casts]
62 | 				casts = [casts[i].replace(j, "") for i,j in enumerate(["Director:", "Stars:"])]
63 | 				movieDirector.append(casts[0])
64 | 				movieStars.append([x.strip() for x in casts[1].split(",")])
65 | 			except:
66 | 				casts = movieCast.text.replace("\n","").strip()
67 | 				movieDirector.append(np.nan)
68 | 				movieStars.append([x.strip() for x in casts.split(",")])
69 | 
70 | 			movieNumbers = movie.find_all("span", attrs={"name": "nv"})
71 | 
72 | 			if len(movieNumbers) == 2:
73 | 				movieVotes.append(movieNumbers[0].text)
74 | 				movieGross.append(movieNumbers[1].text)
75 | 			elif len(movieNumbers) == 1:
76 | 				movieVotes.append(movieNumbers[0].text)
77 | 				movieGross.append(np.nan)
78 | 			else:
79 | 				movieVotes.append(np.nan)
80 | 				movieGross.append(np.nan)
81 | 
82 | 		movieData = [movieTitle, movieDate, movieRunTime, movieGenre, movieRating, movieScore, movieDescription,
83 | 							movieDirector, movieStars, movieVotes, movieGross]
84 | 		return movieData


--------------------------------------------------------------------------------
/python-code/imdbwebscraping.py:
--------------------------------------------------------------------------------
 1 | import lxml
 2 | import re
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from bs4 import BeautifulSoup
 7 | from requests import get
 8 | 
 9 | url1 = "https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2"
10 | 
11 | class IMDB(object):
12 | 	"""docstring for IMDB"""
13 | 	def __init__(self, url):
14 | 		super(IMDB, self).__init__()
15 | 		page = get(url)
16 | 
17 | 		self.soup = BeautifulSoup(page.content, 'lxml')
18 | 
19 | 	def articleTitle(self):
20 | 		return self.soup.find("h1", class_="header").text.replace("\n","")
21 | 
22 | 	def bodyContent(self):
23 | 		content = self.soup.find(id="main")
24 | 		return content.find_all("div", class_="lister-item mode-advanced")
25 | 
26 | 	def movieData(self):
27 | 		movieFrame = self.bodyContent()
28 | 		movieTitle = []
29 | 		movieDate = []
30 | 		movieRunTime = []
31 | 		movieGenre = []
32 | 		movieRating = []
33 | 		movieScore = []
34 | 		movieDescription = []
35 | 		movieDirector = []
36 | 		movieStars = []
37 | 		movieVotes = []
38 | 		movieGross = []
39 | 		for movie in movieFrame:
40 | 			movieFirstLine = movie.find("h3", class_="lister-item-header")
41 | 			movieTitle.append(movieFirstLine.find("a").text)
42 | 			movieDate.append(re.sub(r"[()]","", movieFirstLine.find_all("span")[-1].text))
43 | 			try:
44 | 				movieRunTime.append(movie.find("span", class_="runtime").text[:-4])
45 | 			except:
46 | 				movieRunTime.append(np.nan)
47 | 			movieGenre.append(movie.find("span", class_="genre").text.rstrip().replace("\n","").split(","))
48 | 			try:
49 | 				movieRating.append(movie.find("strong").text)
50 | 			except:
51 | 				movieRating.append(np.nan)
52 | 			try:
53 | 				movieScore.append(movie.find("span", class_="metascore unfavorable").text.rstrip())
54 | 			except:
55 | 				movieScore.append(np.nan)
56 | 			movieDescription.append(movie.find_all("p", class_="text-muted")[-1].text.lstrip())
57 | 			movieCast = movie.find("p", class_="")
58 | 
59 | 			try:
60 | 				casts = movieCast.text.replace("\n","").split('|')
61 | 				casts = [x.strip() for x in casts]
62 | 				casts = [casts[i].replace(j, "") for i,j in enumerate(["Director:", "Stars:"])]
63 | 				movieDirector.append(casts[0])
64 | 				movieStars.append([x.strip() for x in casts[1].split(",")])
65 | 			except:
66 | 				casts = movieCast.text.replace("\n","").strip()
67 | 				movieDirector.append(np.nan)
68 | 				movieStars.append([x.strip() for x in casts.split(",")])
69 | 
70 | 			movieNumbers = movie.find_all("span", attrs={"name": "nv"})
71 | 
72 | 			if len(movieNumbers) == 2:
73 | 				movieVotes.append(movieNumbers[0].text)
74 | 				movieGross.append(movieNumbers[1].text)
75 | 			elif len(movieNumbers) == 1:
76 | 				movieVotes.append(movieNumbers[0].text)
77 | 				movieGross.append(np.nan)
78 | 			else:
79 | 				movieVotes.append(np.nan)
80 | 				movieGross.append(np.nan)
81 | 
82 | 		movieData = [movieTitle, movieDate, movieRunTime, movieGenre, movieRating, movieScore, movieDescription,
83 | 							movieDirector, movieStars, movieVotes, movieGross]
84 | 		return movieData
85 | 
86 | if __name__ == '__main__':
87 | 	site1 = IMDB(url1)
88 | 	print("Subject: ", site1.articleTitle())
89 | 	data = site1.movieData()
90 | 	for i in range(len(data)):
91 | 		print(data[i][:])
92 | 
93 | 


--------------------------------------------------------------------------------
/jupyter-notebook/IMDBwebscraping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Data Scraping IMDB Movie Site using Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Import Modules: "
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 36,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import pandas as pd\n",
 24 |     "import numpy as np\n",
 25 |     "import re\n",
 26 |     "import lxml\n",
 27 |     "\n",
 28 |     "from bs4 import BeautifulSoup\n",
 29 |     "from requests import get\n",
 30 |     "%matplotlib inline"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "##### Get the link of the page for the top 100 most popular movies:"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "url= \"https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2\""
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "##### Get the html of the page using requests.get(url) framework"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 4,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "page = get(url)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "##### Using Beautifulsoup and lxml to parse the html data"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 13,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "soup = BeautifulSoup(page.content, 'lxml')"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "## Get the element or tag that holds the movie contents\n",
 86 |     "![id-main-image](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/idmain.png)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 16,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "content = soup.find(id=\"main\")"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "## Get Article Title\n",
103 |     "#**soup.find(\"h1\", class_=\"header\")** finds the first line that has **h1** tag and has a **class** header.<br>\n",
104 |     "#**.text** gets the text of that line or that element.<br>\n",
105 |     "#**.replace(\"\\n\",\"\")** just erases **\\n**."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 25,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "articleTitle = soup.find(\"h1\", class_=\"header\").text.replace(\"\\n\",\"\")"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 27,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "movieFrame = content.find_all(\"div\", class_=\"lister-item mode-advanced\")"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 31,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "movieFirstLine = movieFrame[0].find(\"h3\", class_=\"lister-item-header\")"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 34,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "movieTitle = movieFirstLine.find(\"a\").text"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 45,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "movieDate = re.sub(r\"[()]\",\"\", movieFirstLine.find_all(\"span\")[-1].text)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 52,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "movieRunTime = movieFrame[0].find(\"span\", class_=\"runtime\").text[:-4]"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 71,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "movieGenre = movieFrame[0].find(\"span\", class_=\"genre\").text.rstrip().replace(\"\\n\",\"\").split(\",\")"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 74,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "movieRating = movieFrame[0].find(\"strong\").text"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 78,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "movieScore = movieFrame[0].find(\"span\", class_=\"metascore unfavorable\").text.rstrip()"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 105,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "movieDesc = movieFrame[0].find_all(\"p\", class_=\"text-muted\")[-1].text.lstrip()"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 118,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "movieCast = movieFrame[0].find(\"p\", class_=\"\")"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 165,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "#Movie Director and Movie Stars\n",
214 |     "try:\n",
215 |     "    casts = movieCast.text.replace(\"\\n\",\"\").split('|')\n",
216 |     "    casts = [x.strip() for x in casts]\n",
217 |     "    casts = [casts[i].replace(j, \"\") for i,j in enumerate([\"Director:\", \"Stars:\"])]\n",
218 |     "    movieDirector = casts[0]\n",
219 |     "    movieStars = [x.strip() for x in casts[1].split(\",\")]\n",
220 |     "except:\n",
221 |     "    casts = movieCast.text.replace(\"\\n\",\"\").strip()\n",
222 |     "    movieDirector = np.nan\n",
223 |     "    movieStars = [x.strip() for x in casts.split(\",\")]"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 212,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "movieNumbers = movieFrame[0].find_all(\"span\", attrs={\"name\": \"nv\"})"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 214,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "if len(movieNumbers) == 2:\n",
242 |     "    movieVotes = movieNumbers[0].text\n",
243 |     "    movieGross = movieNumbers[1].text\n",
244 |     "else:\n",
245 |     "    movieVotes = movieNumbers[0].text\n",
246 |     "    movieGross = np.nan"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": []
255 |   }
256 |  ],
257 |  "metadata": {
258 |   "kernelspec": {
259 |    "display_name": "Python 3",
260 |    "language": "python",
261 |    "name": "python3"
262 |   },
263 |   "language_info": {
264 |    "codemirror_mode": {
265 |     "name": "ipython",
266 |     "version": 3
267 |    },
268 |    "file_extension": ".py",
269 |    "mimetype": "text/x-python",
270 |    "name": "python",
271 |    "nbconvert_exporter": "python",
272 |    "pygments_lexer": "ipython3",
273 |    "version": "3.6.7"
274 |   }
275 |  },
276 |  "nbformat": 4,
277 |  "nbformat_minor": 2
278 | }
279 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python Data Scraping IMDb Movie site using BeautifulSoup
  2 | Data Scraping and Data Wrangling using Python BeautifulSoup
  3 | 
  4 | ## Data Scraping the TOP 100 most popular videos in IMDb in 2019
  5 | <b>Data Scraping</b> from a website is one of the way to get <b>valuable data</b> about present trends especially because most of the data in this age really comes from the datas coming from different website especially <i>Youtube, Facebook, Twitter</i> and other <i>Social Media sites</i>.<br>
  6 | Now, if one wants to analyze what's the trend movie currently and use that data for personal or business reasons, scraping data from the popular movie website like <b>IMDb</b> is the way to go.
  7 | 
  8 | # Data Scraping using PYTHON
  9 | We use <b>Python</b> because it is one of the most used languages in data science and also because it is the language that I am most <b>familiar</b> with.
 10 | ## Install Important Packages 
 11 | (If you're already done in this part, just <b>skip this</b>)
 12 | <br>
 13 | ### On Windows
 14 | **Note:** Make sure that you already installed the Python before pip-installing the following packages.<br>
 15 | 
 16 | Open <i>Command Prompt</i> or cmd<br>
 17 | Type the following:
 18 | ```
 19 | pip install lxml
 20 | pip install numpy
 21 | pip install pandas
 22 | pip install bs4
 23 | pip install requests
 24 | ```
 25 | 
 26 | ### On Linux
 27 | **Note:** Make sure that you already installed the Python before aptget-installing the following packages.<br>
 28 | 
 29 | Open the <i>Terminal</i><br>
 30 | Type the following:
 31 | ```
 32 | apt-get install lxml
 33 | apt-get install numpy
 34 | apt-get install pandas
 35 | apt-get install bs4
 36 | apt-get install requests
 37 | ```
 38 | 
 39 | # Going to IMDb Website
 40 | ![image](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/imdbmain.png)
 41 | 1. Go to [IMDb](https://www.imdb.com/?ref_=nv_home) movie website
 42 | 2. Hover your mouse to the <b>Watchlist</b>
 43 | 3. Click the <b>Popular Movies</b> section
 44 | 
 45 | If you want to use my Jupyter notebook, use this link:
 46 | [IMDb-most-popular-2019](https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2)
 47 | ![image](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/imdbweb.png)
 48 | ## Use Google Chrome Developer Tools
 49 | Use Google Chrome Developer Tools to inspect <b>elements</b> or the data of the website.
 50 | Right Click mouse then Inspect
 51 | ![right-click](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/right-click-inspect.png)
 52 | Find the Elements that correspond to the data we're getting
 53 | ![hover-mouse-content](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/highlight-elements.png)
 54 | ![hover-mouse-movie](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/highlight-firstmovie.png)
 55 | ![hover-mouse-title](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/hightlight-target-text.png)
 56 | <b>Keep in mind</b> that one need to find the source of the data before getting it. In the case of the IMDb movie website, the structure of the data of the 1st movie is similar to the structure of those remaining 99 movies. We can take advantage of that later.
 57 | #### Take note of the TAGS as well as the Attributes like class, id, etc. We'll use that later.
 58 | 
 59 | ## CODE (Click the jupyter notebook link to continue)
 60 | [Step-by-Step-Jupyter-Notebook-Code](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/jupyter-notebook/IMDBwebscraping.ipynb)
 61 | ### IMDb class jupyter notebook code (full):
 62 | [Full-code](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/jupyter-notebook/IMDb-Web-Scraping-Full-Code.ipynb)
 63 | ## Step-by-Step python code
 64 | **Import modules**
 65 | ```
 66 | import pandas as pd
 67 | import numpy as np
 68 | import re
 69 | import lxml
 70 | 
 71 | from bs4 import BeautifulSoup
 72 | from requests import get
 73 | %matplotlib inline
 74 | ```
 75 | **Get the page link**
 76 | ```
 77 | url= "https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2"
 78 | ```
 79 | <dl>
 80 |   <dt><b>Get page data</b></dt>
 81 |   <dd>- Get page using requests.get<br></dd>
 82 |   <dd>- Parse page using BeautifulSoup and lxml</dd>
 83 | </dl>
 84 | 
 85 | ```
 86 | page = get(url)
 87 | soup = BeautifulSoup(page.content, 'lxml') 
 88 | ```
 89 | 
 90 | <b>Get the Element or tag that holds the <i>movie</i> contents</b><br>
 91 | ![id-main-image](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/idmain.png)
 92 | ```
 93 | content = soup.find(id="main")
 94 | ```
 95 | ### Get Article Title
 96 | soup.find("h1", class_="header")** finds the first line that has **h1** tag and has a **class** header.<br>
 97 | .text** gets the text of that line or that element.<br>
 98 | .replace("\n","")** just erases **\n**.
 99 | ```
100 | articleTitle = soup.find("h1", class_="header").text.replace("\n","")
101 | ```
102 | ### Get the contents of one movie content
103 | ![movie-frame](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/highlight-movie-frame.png)
104 | Find_all returns a <b>list</b> of all instances that has the <b>tags</b> specified (i.e. "div", "class")<br>
105 | To get the first movie only, use movieFrame[0]
106 | ```
107 | movieFrame = content.find_all("div", class_="lister-item mode-advanced")
108 | ```
109 | ### Getting the Movie Title and Movie Date
110 | ![first-line](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/first-line.png)<br>
111 | We need to first get the line where the title and the date contains because the tags that holds those values are too <b>common</b> and using find might not get it to <b>appear</b>.<br>
112 | <b>.find("a")</b> returns the first line that has a <b>tag "a"</b><br>
113 | <b>.find_all("span")</b> returns all lines that has a tag of "span". Because we only want the date, we only return the second line denoted by ("span")[-1]<br>
114 | <b> .text</b> returns the <b>text value</b> of that line.<br>
115 | ```
116 | movieFirstLine = movieFrame[0].find("h3", class_="lister-item-header")
117 | movieTitle = movieFirstLine.find("a").text
118 | movieDate = re.sub(r"[()]","", movieFirstLine.find_all("span")[-1].text)
119 | ```
120 | ### Getting the Runtime, genre, rating, score and movie description
121 | Find the other datas are just the same as what we did with the first ones. Just take note that <b>be more specific</b> in describing the <b>attributes</b> (i.e. class, id, etc.) so that the it will directly return the line that we want to get. 
122 | ```
123 | movieRunTime = movieFrame[0].find("span", class_="runtime").text[:-4]
124 | movieGenre = movieFrame[0].find("span", class_="genre").text.rstrip().replace("\n","").split(",")
125 | movieRating = movieFrame[0].find("strong").text
126 | movieScore = movieFrame[0].find("span", class_="metascore unfavorable").text.rstrip()
127 | movieDesc = movieFrame[0].find_all("p", class_="text-muted")[-1].text.lstrip()
128 | ```
129 | ### Getting the movie casts and directors
130 | Movies w/o including the directors are troublesome and we need to anticipate that by making that missing value into NaN using np.nan.
131 | Getting the movie casts is a bit tricky because there is an indefinite number of casts that can be included in each movie, sometimes none, sometimes a few. That is the reason why we need to anticipate those three scenarios.<br>
132 | Take a look at the <b>code</b>:
133 | ```
134 | #Movie Director and Movie Stars
135 | try:
136 |     casts = movieCast.text.replace("\n","").split('|')
137 |     casts = [x.strip() for x in casts]
138 |     casts = [casts[i].replace(j, "") for i,j in enumerate(["Director:", "Stars:"])]
139 |     movieDirector = casts[0]
140 |     movieStars = [x.strip() for x in casts[1].split(",")]
141 | except:
142 |     casts = movieCast.text.replace("\n","").strip()
143 |     movieDirector = np.nan
144 |     movieStars = [x.strip() for x in casts.split(",")]
145 | ```
146 | ##### Same scenario with the votes and gross
147 | We can get an attribute by including it to the attrs dictionary and adding its value to it.
148 | ```
149 | movieNumbers = movieFrame[0].find_all("span", attrs={"name": "nv"})
150 | if len(movieNumbers) == 2:
151 |     movieVotes = movieNumbers[0].text
152 |     movieGross = movieNumbers[1].text
153 | else:
154 |     movieVotes = movieNumbers[0].text
155 |     movieGross = np.nan
156 | ```
157 | ### Full code
158 | ```
159 | '''
160 | Author: Reljod T. Oreta PUP-Manila
161 | BSECE 5th year
162 | '''
163 | import lxml
164 | import re
165 | import numpy as np
166 | import pandas as pd
167 | 
168 | from bs4 import BeautifulSoup
169 | from requests import get
170 | 
171 | url1 = "https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2"
172 | 
173 | class IMDB(object):
174 | 	"""docstring for IMDB"""
175 | 	def __init__(self, url):
176 | 		super(IMDB, self).__init__()
177 | 		page = get(url)
178 | 
179 | 		self.soup = BeautifulSoup(page.content, 'lxml')
180 | 
181 | 	def articleTitle(self):
182 | 		return self.soup.find("h1", class_="header").text.replace("\n","")
183 | 
184 | 	def bodyContent(self):
185 | 		content = self.soup.find(id="main")
186 | 		return content.find_all("div", class_="lister-item mode-advanced")
187 | 
188 | 	def movieData(self):
189 | 		movieFrame = self.bodyContent()
190 | 		movieTitle = []
191 | 		movieDate = []
192 | 		movieRunTime = []
193 | 		movieGenre = []
194 | 		movieRating = []
195 | 		movieScore = []
196 | 		movieDescription = []
197 | 		movieDirector = []
198 | 		movieStars = []
199 | 		movieVotes = []
200 | 		movieGross = []
201 | 		for movie in movieFrame:
202 | 			movieFirstLine = movie.find("h3", class_="lister-item-header")
203 | 			movieTitle.append(movieFirstLine.find("a").text)
204 | 			movieDate.append(re.sub(r"[()]","", movieFirstLine.find_all("span")[-1].text))
205 | 			try:
206 | 				movieRunTime.append(movie.find("span", class_="runtime").text[:-4])
207 | 			except:
208 | 				movieRunTime.append(np.nan)
209 | 			movieGenre.append(movie.find("span", class_="genre").text.rstrip().replace("\n","").split(","))
210 | 			try:
211 | 				movieRating.append(movie.find("strong").text)
212 | 			except:
213 | 				movieRating.append(np.nan)
214 | 			try:
215 | 				movieScore.append(movie.find("span", class_="metascore unfavorable").text.rstrip())
216 | 			except:
217 | 				movieScore.append(np.nan)
218 | 			movieDescription.append(movie.find_all("p", class_="text-muted")[-1].text.lstrip())
219 | 			movieCast = movie.find("p", class_="")
220 | 
221 | 			try:
222 | 				casts = movieCast.text.replace("\n","").split('|')
223 | 				casts = [x.strip() for x in casts]
224 | 				casts = [casts[i].replace(j, "") for i,j in enumerate(["Director:", "Stars:"])]
225 | 				movieDirector.append(casts[0])
226 | 				movieStars.append([x.strip() for x in casts[1].split(",")])
227 | 			except:
228 | 				casts = movieCast.text.replace("\n","").strip()
229 | 				movieDirector.append(np.nan)
230 | 				movieStars.append([x.strip() for x in casts.split(",")])
231 | 
232 | 			movieNumbers = movie.find_all("span", attrs={"name": "nv"})
233 | 
234 | 			if len(movieNumbers) == 2:
235 | 				movieVotes.append(movieNumbers[0].text)
236 | 				movieGross.append(movieNumbers[1].text)
237 | 			elif len(movieNumbers) == 1:
238 | 				movieVotes.append(movieNumbers[0].text)
239 | 				movieGross.append(np.nan)
240 | 			else:
241 | 				movieVotes.append(np.nan)
242 | 				movieGross.append(np.nan)
243 | 
244 | 		movieData = [movieTitle, movieDate, movieRunTime, movieGenre, movieRating, movieScore, movieDescription,
245 | 							movieDirector, movieStars, movieVotes, movieGross]
246 | 		return movieData
247 | ```
248 | ### Check the result using the IMDB class
249 | ```
250 | id1 = IMDB(url1)
251 | #Get Article Title
252 | print(id1.articleTitle())
253 | #Get the first 5 movie data using for loop
254 | for i in range(5):
255 | 	print(movieData[i][:5])
256 | ```
257 | Result should be:
258 | ![imdbData](https://github.com/Reljod/Data-Scraping-IMDB-Movie-Site-using-Python/blob/master/imdb/imdbData.png)
259 | 
260 | ## What's next?
261 | The data we extracted from the website should be <b>cleaned</b> first before using it for <b>data analyzation</b> or <b>machine learning</b> but it will be done on my next project using exactly the data that we've been extracted so far.
262 | 
263 | ## Thank You!!
264 | 
265 | ## Author
266 | 
267 | * **Reljod T. Oreta**- [**Reljod**](https://github.com/Reljod)
268 | 
269 | 


--------------------------------------------------------------------------------
/jupyter-notebook/IMDb-Web-Scraping-Full-Code.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import lxml\n",
 10 |     "import re\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "\n",
 14 |     "from bs4 import BeautifulSoup\n",
 15 |     "from requests import get"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "url1 = \"https://www.imdb.com/search/title?count=100&title_type=feature,tv_series&ref_=nv_wl_img_2\""
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 3,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "class IMDB(object):\n",
 34 |     "\t\"\"\"docstring for IMDB\"\"\"\n",
 35 |     "\tdef __init__(self, url):\n",
 36 |     "\t\tsuper(IMDB, self).__init__()\n",
 37 |     "\t\tpage = get(url)\n",
 38 |     "\n",
 39 |     "\t\tself.soup = BeautifulSoup(page.content, 'lxml')\n",
 40 |     "\n",
 41 |     "\tdef articleTitle(self):\n",
 42 |     "\t\treturn self.soup.find(\"h1\", class_=\"header\").text.replace(\"\\n\",\"\")\n",
 43 |     "\n",
 44 |     "\tdef bodyContent(self):\n",
 45 |     "\t\tcontent = self.soup.find(id=\"main\")\n",
 46 |     "\t\treturn content.find_all(\"div\", class_=\"lister-item mode-advanced\")\n",
 47 |     "\n",
 48 |     "\tdef movieData(self):\n",
 49 |     "\t\tmovieFrame = self.bodyContent()\n",
 50 |     "\t\tmovieTitle = []\n",
 51 |     "\t\tmovieDate = []\n",
 52 |     "\t\tmovieRunTime = []\n",
 53 |     "\t\tmovieGenre = []\n",
 54 |     "\t\tmovieRating = []\n",
 55 |     "\t\tmovieScore = []\n",
 56 |     "\t\tmovieDescription = []\n",
 57 |     "\t\tmovieDirector = []\n",
 58 |     "\t\tmovieStars = []\n",
 59 |     "\t\tmovieVotes = []\n",
 60 |     "\t\tmovieGross = []\n",
 61 |     "\t\tfor movie in movieFrame:\n",
 62 |     "\t\t\tmovieFirstLine = movie.find(\"h3\", class_=\"lister-item-header\")\n",
 63 |     "\t\t\tmovieTitle.append(movieFirstLine.find(\"a\").text)\n",
 64 |     "\t\t\tmovieDate.append(re.sub(r\"[()]\",\"\", movieFirstLine.find_all(\"span\")[-1].text))\n",
 65 |     "\t\t\ttry:\n",
 66 |     "\t\t\t\tmovieRunTime.append(movie.find(\"span\", class_=\"runtime\").text[:-4])\n",
 67 |     "\t\t\texcept:\n",
 68 |     "\t\t\t\tmovieRunTime.append(np.nan)\n",
 69 |     "\t\t\tmovieGenre.append(movie.find(\"span\", class_=\"genre\").text.rstrip().replace(\"\\n\",\"\").split(\",\"))\n",
 70 |     "\t\t\ttry:\n",
 71 |     "\t\t\t\tmovieRating.append(movie.find(\"strong\").text)\n",
 72 |     "\t\t\texcept:\n",
 73 |     "\t\t\t\tmovieRating.append(np.nan)\n",
 74 |     "\t\t\ttry:\n",
 75 |     "\t\t\t\tmovieScore.append(movie.find(\"span\", class_=\"metascore unfavorable\").text.rstrip())\n",
 76 |     "\t\t\texcept:\n",
 77 |     "\t\t\t\tmovieScore.append(np.nan)\n",
 78 |     "\t\t\tmovieDescription.append(movie.find_all(\"p\", class_=\"text-muted\")[-1].text.lstrip())\n",
 79 |     "\t\t\tmovieCast = movie.find(\"p\", class_=\"\")\n",
 80 |     "\n",
 81 |     "\t\t\ttry:\n",
 82 |     "\t\t\t\tcasts = movieCast.text.replace(\"\\n\",\"\").split('|')\n",
 83 |     "\t\t\t\tcasts = [x.strip() for x in casts]\n",
 84 |     "\t\t\t\tcasts = [casts[i].replace(j, \"\") for i,j in enumerate([\"Director:\", \"Stars:\"])]\n",
 85 |     "\t\t\t\tmovieDirector.append(casts[0])\n",
 86 |     "\t\t\t\tmovieStars.append([x.strip() for x in casts[1].split(\",\")])\n",
 87 |     "\t\t\texcept:\n",
 88 |     "\t\t\t\tcasts = movieCast.text.replace(\"\\n\",\"\").strip()\n",
 89 |     "\t\t\t\tmovieDirector.append(np.nan)\n",
 90 |     "\t\t\t\tmovieStars.append([x.strip() for x in casts.split(\",\")])\n",
 91 |     "\n",
 92 |     "\t\t\tmovieNumbers = movie.find_all(\"span\", attrs={\"name\": \"nv\"})\n",
 93 |     "\n",
 94 |     "\t\t\tif len(movieNumbers) == 2:\n",
 95 |     "\t\t\t\tmovieVotes.append(movieNumbers[0].text)\n",
 96 |     "\t\t\t\tmovieGross.append(movieNumbers[1].text)\n",
 97 |     "\t\t\telif len(movieNumbers) == 1:\n",
 98 |     "\t\t\t\tmovieVotes.append(movieNumbers[0].text)\n",
 99 |     "\t\t\t\tmovieGross.append(np.nan)\n",
100 |     "\t\t\telse:\n",
101 |     "\t\t\t\tmovieVotes.append(np.nan)\n",
102 |     "\t\t\t\tmovieGross.append(np.nan)\n",
103 |     "\n",
104 |     "\t\tmovieData = [movieTitle, movieDate, movieRunTime, movieGenre, movieRating, movieScore, movieDescription,\n",
105 |     "\t\t\t\t\t\t\tmovieDirector, movieStars, movieVotes, movieGross]\n",
106 |     "\t\treturn movieData"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "Subject:  Feature Film/TV Series(Sorted by Popularity Ascending) \n",
121 |       "['Polar', 'Extremely Wicked, Shockingly Evil, and Vile', 'The Punisher', 'Glass', 'Sex Education', 'Game of Thrones', 'True Detective', 'Bohemian Rhapsody', 'Aquaman', 'Vikings', 'You', 'Green Book', 'The Favourite', 'A Star Is Born', 'Once Upon a Time in Hollywood', 'Fast & Furious Presents: Hobbs & Shaw', 'Serenity', 'Roma', 'Birds of Prey (And the Fantabulous Emancipation of One Harley Quinn)', 'The Orville', 'Titans', 'Velvet Buzzsaw', 'Alita: Battle Angel', 'The Walking Dead', 'Outlander', 'Vice', 'Kingdom', 'Mortal Engines', 'Widows', 'Avengers: Endgame', 'Grace and Frankie', 'IO', 'Joker', 'Spider-Man: Into the Spider-Verse', 'Black Mirror', \"Grey's Anatomy\", 'Shameless', 'Russian Doll', 'Supernatural', 'Suspiria', 'How to Train Your Dragon: The Hidden World', 'Bird Box', 'Star Trek: Discovery', 'American Crime Story', 'Brooklyn Nine-Nine', 'Doragon bôru chô: Burorî', 'BlacKkKlansman', 'Riverdale', 'Peaky Blinders', 'Split', 'Gotham', 'Close', 'The Big Bang Theory', 'The Office', 'The Good Place', 'The Upside', 'The Mule', 'The Passage', 'Conversations with a Killer: The Ted Bundy Tapes', 'Unbreakable Kimmy Schmidt', 'First Man', 'Friends', 'Suits', 'A Quiet Place', 'The Flash', 'Criminal Minds', 'Black Panther', 'Lucifer', 'Bumblebee', 'The Marvelous Mrs. Maisel', 'Arctic', 'Captain Marvel', 'Avengers: Infinity War', 'Tag', 'Fighting with My Family', 'Solo: A Star Wars Story', 'Mary Queen of Scots', 'The Blacklist', 'This Is Us', 'Arrow', 'Escape Room', 'Unbreakable', 'Rent', 'Hunter Killer', 'Stranger Things', 'The Kid Who Would Be King', 'Can You Ever Forgive Me?', 'A Private War', \"The Girl in the Spider's Web\", 'The Ballad of Buster Scruggs', 'Luther', 'Spider-Man: Far From Home', 'The Greatest Showman', 'Breaking Bad', 'Roswell, New Mexico', 'Friends from College', 'The Lego Movie 2: The Second Part', 'The Boys', 'Law & Order: Special Victims Unit', 'The Last Kingdom']\n",
122 |       "['2019', '2019', '2017– ', '2019', '2019– ', '2011– ', '2014– ', '2018', '2018', '2013– ', '2018– ', '2018', '2018', '2018', '2019', '2019', '2019', '2018', '2020', '2017– ', 'I 2018– ', '2019', '2019', '2010– ', '2014– ', 'I 2018', '2019– ', '2018', '2018', '2019', '2015– ', '2019', '2019', '2018', '2011– ', '2005– ', '2011– ', '2019– ', '2005– ', 'I 2018', '2019', '2018', '2017– ', '2016– ', '2013– ', '2018', '2018', '2016– ', '2013– ', 'IX 2016', '2014– ', 'I 2019', '2007– ', '2005–2013', '2016– ', '2017', '2018', '2019– ', '2019– ', '2015–2019', '2018', '1994–2004', '2011– ', '2018', '2014– ', '2005– ', '2018', '2015– ', '2018', '2017– ', '2018', '2019', '2018', 'I 2018', '2019', '2018', '2018', '2013– ', '2016– ', '2012– ', '2019', '2000', '2005', '2018', '2016– ', '2019', '2018', '2018', '2018', '2018', '2010– ', '2019', '2017', '2008–2013', '2019– ', '2017– ', '2019', '2019– ', '1999– ', '2015– ']\n",
123 |       "['118', '108', '53', '129', '45', '57', '55', '134', '143', '44', '60', '130', '119', '136', nan, nan, '106', '135', nan, '44', '45', '113', '122', '44', '64', '132', '45', '128', '129', nan, '30', '96', nan, '117', '60', '41', '46', '30', '44', '152', '104', '124', '60', '42', '22', '100', '135', '45', '60', '117', '42', '94', '22', '22', '22', '126', '116', '60', '60', '30', '141', '22', '44', '90', '43', '42', '134', '42', '114', '57', '97', '128', '149', '100', '108', '135', '124', '43', '45', '42', '99', '106', '138', '122', '51', '120', '106', '110', '117', '133', '60', nan, '105', '49', '60', '30', '106', '60', '60', '60']\n",
124 |       "[['Action', ' Crime'], ['Biography', ' Crime', ' Thriller'], ['Action', ' Adventure', ' Crime'], ['Drama', ' Sci-Fi', ' Thriller'], ['Comedy', ' Drama'], ['Action', ' Adventure', ' Drama'], ['Crime', ' Drama', ' Mystery'], ['Biography', ' Drama', ' Music'], ['Action', ' Adventure', ' Fantasy'], ['Action', ' Adventure', ' Drama'], ['Crime', ' Drama', ' Romance'], ['Biography', ' Comedy', ' Drama'], ['Biography', ' Comedy', ' Drama'], ['Drama', ' Music', ' Romance'], ['Comedy', ' Drama'], ['Action', ' Adventure'], ['Drama', ' Thriller'], ['Drama'], ['Action', ' Adventure', ' Crime'], ['Adventure', ' Comedy', ' Drama'], ['Action', ' Adventure', ' Drama'], ['Horror', ' Mystery', ' Thriller'], ['Action', ' Adventure', ' Romance'], ['Drama', ' Horror', ' Sci-Fi'], ['Drama', ' Fantasy', ' Romance'], ['Biography', ' Comedy', ' Drama'], ['Action', ' Thriller'], ['Action', ' Adventure', ' Fantasy'], ['Crime', ' Drama', ' Thriller'], ['Action', ' Adventure', ' Fantasy'], ['Comedy'], ['Adventure', ' Drama', ' Romance'], ['Crime', ' Drama', ' Thriller'], ['Animation', ' Action', ' Adventure'], ['Drama', ' Sci-Fi', ' Thriller'], ['Drama', ' Romance'], ['Comedy', ' Drama'], ['Comedy', ' Drama', ' Mystery'], ['Drama', ' Fantasy', ' Horror'], ['Fantasy', ' Horror', ' Mystery'], ['Animation', ' Action', ' Adventure'], ['Drama', ' Horror', ' Sci-Fi'], ['Action', ' Adventure', ' Drama'], ['Biography', ' Crime', ' Drama'], ['Comedy', ' Crime'], ['Animation', ' Action', ' Fantasy'], ['Biography', ' Crime', ' Drama'], ['Crime', ' Drama', ' Mystery'], ['Crime', ' Drama'], ['Horror', ' Thriller'], ['Action', ' Crime', ' Drama'], ['Action', ' Thriller'], ['Comedy', ' Romance'], ['Comedy'], ['Comedy', ' Drama', ' Fantasy'], ['Comedy', ' Drama'], ['Crime', ' Drama', ' Thriller'], ['Action', ' Adventure', ' Drama'], ['Documentary', ' Crime'], ['Comedy', ' Drama'], ['Biography', ' Drama', ' History'], ['Comedy', ' Romance'], ['Comedy', ' Drama'], ['Drama', ' Horror', ' Mystery'], ['Action', ' Adventure', ' Drama'], ['Crime', ' Drama', ' Mystery'], ['Action', ' Adventure', ' Sci-Fi'], ['Crime', ' Drama', ' Fantasy'], ['Action', ' Adventure', ' Sci-Fi'], ['Comedy', ' Drama'], ['Drama'], ['Action', ' Adventure', ' Sci-Fi'], ['Action', ' Adventure', ' Fantasy'], ['Comedy'], ['Biography', ' Comedy', ' Drama'], ['Action', ' Adventure', ' Fantasy'], ['Biography', ' Drama', ' History'], ['Crime', ' Drama', ' Mystery'], ['Comedy', ' Drama', ' Romance'], ['Action', ' Adventure', ' Crime'], ['Action', ' Adventure', ' Thriller'], ['Drama', ' Mystery', ' Sci-Fi'], ['Drama', ' Musical', ' Romance'], ['Action', ' Thriller'], ['Drama', ' Fantasy', ' Horror'], ['Adventure', ' Family', ' Fantasy'], ['Biography', ' Comedy', ' Crime'], ['Biography', ' Drama', ' War'], ['Action', ' Crime', ' Drama'], ['Comedy', ' Drama', ' Musical'], ['Crime', ' Drama', ' Mystery'], ['Action', ' Adventure', ' Comedy'], ['Biography', ' Drama', ' Musical'], ['Crime', ' Drama', ' Thriller'], ['Drama', ' Romance', ' Sci-Fi'], ['Comedy', ' Drama'], ['Animation', ' Action', ' Adventure'], ['Action', ' Drama', ' Sci-Fi'], ['Crime', ' Drama', ' Mystery'], ['Action', ' Drama', ' History']]\n",
125 |       "['6.3', '7.8', '8.6', '7.0', '8.5', '9.5', '9.0', '8.2', '7.4', '8.6', '7.9', '8.3', '7.9', '7.9', nan, nan, '5.1', '8.0', nan, '7.9', '8.2', '5.8', '7.6', '8.3', '8.5', '7.1', '8.4', '6.2', '7.2', nan, '8.3', '4.7', nan, '8.7', '8.9', '7.6', '8.7', '8.2', '8.5', '7.0', '8.0', '6.7', '7.4', '8.5', '8.4', '8.3', '7.5', '7.5', '8.8', '7.3', '7.9', '5.6', '8.2', '8.8', '8.2', '6.3', '7.2', '7.4', '8.0', '7.7', '7.4', '8.9', '8.6', '7.6', '7.9', '8.1', '7.4', '8.2', '7.1', '8.7', '7.3', nan, '8.5', '6.6', '8.1', '7.0', '6.5', '8.1', '8.7', '7.7', '6.4', '7.3', '7.0', '6.6', '8.9', '6.3', '7.3', '6.7', '6.1', '7.3', '8.5', nan, '7.6', '9.5', '5.4', '6.8', '7.5', nan, '8.0', '8.3']\n",
126 |       "['19', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, '38', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]\n",
127 |       "[\"The world's top assassin, Duncan Vizla, is settling into retirement when his former employer marks him as a liability to the firm. Against his will, he finds himself back in the game going head to head with an army of younger killers.\", 'A chronicle of the crimes of Ted Bundy, from the perspective of his longtime girlfriend, Elizabeth Kloepfer, who refused to believe the truth about him for years.', 'After the murder of his family, Marine veteran Frank Castle becomes the vigilante known as \"The Punisher,\" with only one goal in mind: to avenge them.', 'Security guard David Dunn uses his supernatural abilities to track Kevin Wendell Crumb, a disturbed man who has twenty-four personalities.', 'A teenage boy with a sex therapist mother teams up with a high school classmate to set up an underground sex therapy clinic at school.', 'Nine noble families fight for control over the mythical lands of Westeros, while an ancient enemy returns after being dormant for thousands of years.', 'Seasonal anthology series in which police investigations unearth the personal and professional secrets of those involved, both within and outside the law.', 'The story of the legendary rock band Queen and lead singer Freddie Mercury, leading up to their famous performance at Live Aid (1985).', 'Arthur Curry, the human-born heir to the underwater kingdom of Atlantis, goes on a quest to prevent a war between the worlds of ocean and land.', 'Vikings transports us to the brutal and mysterious world of Ragnar Lothbrok, a Viking warrior and farmer who yearns to explore - and raid - the distant shores across the ocean.', 'A clever bookstore manager relies on his savvy Internet know-how to make the woman of his dreams fall in love with him.', 'A working-class Italian-American bouncer becomes the driver of an African-American classical pianist on a tour of venues through the 1960s American South.', 'In early 18th century England, a frail Queen Anne occupies the throne and her close friend, Lady Sarah, governs the country in her stead. When a new servant, Abigail, arrives, her charm endears her to Sarah.', 'A musician helps a young singer find fame, even as age and alcoholism send his own career into a downward spiral.', 'A faded TV actor and his stunt double strive to achieve fame and success in the film industry during the final years of the Hollywood Golden Age in 1969 Los Angeles.', 'Lawman Luke Hobbs and outcast Deckard Shaw form an unlikely alliance when a cyber-genetically enhanced villain threatens the future of humanity.', 'A fishing boat captain juggles facing his mysterious past and finding himself ensnared in a reality where nothing is what it seems.', \"A year in the life of a middle-class family's maid in Mexico City in the early 1970s.\", 'After splitting up with the Joker, Harley Quinn joins three female superheroes - Black Canary, Huntress and Renee Montoya - to save the life of a little girl (Cassandra Cain) from an evil crime lord.', 'An exploratory ship from Earth faces intergalactic challenges 400 years in the future.', \"A team of young superheroes led by Nightwing (formerly Batman's first Robin) form to combat evil and other perils.\", 'After a series of paintings by an unknown artist are discovered, a supernatural force enacts revenge on those who have allowed their greed to get in the way of art.', \"An action-packed story of one young woman's journey to discover the truth of who she is and her fight to change the world.\", 'Sheriff Deputy Rick Grimes wakes up from a coma to learn the world is in ruins, and must lead a group of survivors to stay alive.', 'An English combat nurse from 1945 is mysteriously swept back in time to 1743.', 'The story of Dick Cheney, an unassuming bureaucratic Washington insider, who quietly wielded immense power as Vice President to George W. Bush, reshaping the country and the globe in ways that we still feel today.', 'While strange rumors about their ill king grip a kingdom, the crown prince becomes their only hope against a mysterious plague overtaking the land.', 'In a post-apocalyptic world where cities ride on wheels and consume each other to survive, two people meet in London and try to stop a conspiracy.', \"Set in contemporary Chicago, amid a time of turmoil, four women with nothing in common except a debt left behind by their dead husbands' criminal activities, take fate into their own hands, and conspire to forge a future on their own terms.\", \"After the devastating events of Avengers: Infinity War (2018), the universe is in ruins. With the help of remaining allies, the Avengers assemble once more in order to undo Thanos' actions and restore order to the universe.\", 'Finding out that their husbands are not just work partners, but have also been romantically involved for the last twenty years, two women with an already strained relationship try to cope with the circumstances together.', \"As a young scientist searches for a way to save a dying Earth, she finds a connection with a man who's racing to catch the last shuttle off the planet.\", 'A failed stand-up comedian is driven insane and becomes a psychopathic murderer.', 'Teen Miles Morales becomes Spider-Man of his reality, crossing his path with five counterparts from other dimensions to stop a threat for all realities.', \"An anthology series exploring a twisted, high-tech world where humanity's greatest innovations and darkest instincts collide.\", 'A drama centered on the personal and professional lives of five surgical interns and their supervisors.', 'A scrappy, fiercely loyal Chicago family makes no apologies.', \"A cynical young woman in New York City keeps dying and returning to the party that's being thrown in her honor on that same evening. She tries to find a way out of this strange time loop.\", 'Two brothers follow their father\\'s footsteps as \"hunters\", fighting evil supernatural beings of many kinds, including monsters, demons, and gods that roam the earth.', 'A darkness swirls at the center of a world-renowned dance company, one that will engulf the artistic director, an ambitious young dancer, and a grieving psychotherapist. Some will succumb to the nightmare. Others will finally wake up.', 'When Hiccup discovers Toothless isn\\'t the only Night Fury, he must seek \"The Hidden World\", a secret Dragon Utopia before a hired tyrant named Grimmel finds it first.', 'Five years after an ominous unseen presence drives most of society to suicide, a mother and her two children make a desperate bid to reach safety.', 'Ten years before Kirk, Spock, and the Enterprise, the USS Discovery discovers new worlds and lifeforms as one Starfleet officer learns to understand all things alien.', \"An anthology series centered around America's most notorious crimes and criminals.\", \"Jake Peralta, an immature, but talented N.Y.P.D. detective in Brooklyn's 99th Precinct, comes into immediate conflict with his new commanding officer, the serious and stern Captain Ray Holt.\", \"Goku and Vegeta encounter Broly, a Saiyan warrior unlike any fighter they've faced before.\", 'Ron Stallworth, an African American police officer from Colorado Springs, CO, successfully manages to infiltrate the local Ku Klux Klan branch with the help of a Jewish surrogate who eventually becomes its leader. Based on actual events.', 'While navigating the troubled waters of romance, school and family, Archie and his gang become entangled in dark Riverdale mysteries.', 'A gangster family epic set in 1919 Birmingham, England; centered on a gang who sew razor blades in the peaks of their caps, and their fierce boss Tommy Shelby.', 'Three girls are kidnapped by a man with a diagnosed 23 distinct personalities. They must try to escape before the apparent emergence of a frightful new 24th.', \"The story behind Detective James Gordon's rise to prominence in Gotham City in the years before Batman's arrival.\", 'Sam, a bodyguard and counter-terrorism expert, takes a job protecting a rich young heiress named Zoe. Neither party is keen on the arrangement until a violent kidnapping forces them to go on the run.', 'A woman who moves into an apartment across the hall from two brilliant but socially awkward physicists shows them how little they know about life outside of the laboratory.', 'A mockumentary on a group of typical office workers, where the workday consists of ego clashes, inappropriate behavior, and tedium.', 'Four people and their otherworldly frienemy struggle in the afterlife to define what it means to be good.', \"A comedic look at the relationship between a wealthy man with quadriplegia and an unemployed man with a criminal record who's hired to help him.\", 'A 90-year-old horticulturist and Korean War veteran is caught transporting $3 million worth of cocaine through Illinois for a Mexican drug cartel.', 'When a botched U.S. government experiment turns a group of death row inmates into highly infectious vampires, an orphan girl might be the only person able to stop the ensuing crisis.', 'A look inside the mind of serial killer Ted Bundy, featuring interviews with him on death row.', 'A woman is rescued from a doomsday cult and starts life over again in New York City.', 'A look at the life of the astronaut, Neil Armstrong, and the legendary space mission that led him to become the first man to walk on the Moon on July 20, 1969.', 'Follows the personal and professional lives of six twenty to thirty-something-year-old friends living in Manhattan.', \"On the run from a drug deal gone bad, Mike Ross, a brilliant college dropout, finds himself a job working with Harvey Specter, one of New York City's best lawyers.\", 'In a post-apocalyptic world, a family is forced to live in silence while hiding from monsters with ultra-sensitive hearing.', \"After being struck by lightning, Barry Allen wakes up from his coma to discover he's been given the power of super speed, becoming the Flash, fighting crime in Central City.\", \"The cases of the F.B.I. Behavioral Analysis Unit (B.A.U.), an elite group of profilers who analyze the nation's most dangerous serial killers and individual heinous crimes in an effort to anticipate their next moves before they strike again.\", \"T'Challa, heir to the hidden but advanced kingdom of Wakanda, must step forward to lead his people into a new future and must confront a challenger from his country's past.\", \"Lucifer Morningstar has decided he's had enough of being the dutiful servant in Hell and decides to spend some time on Earth to better understand humanity. He settles in Los Angeles - the City of Angels.\", 'On the run in the year of 1987, Bumblebee finds refuge in a junkyard in a small Californian beach town. Charlie, on the cusp of turning 18 and trying to find her place in the world, discovers Bumblebee, battle-scarred and broken.', 'A housewife in the 1950s decides to become a stand-up comic.', 'A man stranded in the Arctic after an airplane crash must decide whether to remain in the relative safety of his makeshift camp or to embark on a deadly trek through the unknown.', \"Carol Danvers becomes one of the universe's most powerful heroes when Earth is caught in the middle of a galactic war between two alien races.\", 'The Avengers and their allies must be willing to sacrifice all in an attempt to defeat the powerful Thanos before his blitz of devastation and ruin puts an end to the universe.', 'A small group of former classmates organize an elaborate, annual game of tag that requires some to travel all over the country.', 'A former wrestler and his family make a living performing at small venues around the country while his kids dream of joining World Wrestling Entertainment.', 'During an adventure into the criminal underworld, Han Solo meets his future co-pilot Chewbacca and encounters Lando Calrissian years before joining the Rebellion.', \"Mary Stuart's attempt to overthrow her cousin Elizabeth I, Queen of England, finds her condemned to years of imprisonment before facing execution.\", 'A new FBI profiler, Elizabeth Keen, has her entire life uprooted when a mysterious criminal, Raymond Reddington, who has eluded capture for decades, turns himself in and insists on speaking only to her.', 'A heartwarming and emotional story about a unique set of triplets, their struggles, and their wonderful parents.', 'Spoiled billionaire playboy Oliver Queen is missing and presumed dead when his yacht is lost at sea. He returns five years later a changed man, determined to clean up the city as a hooded vigilante armed with a bow.', 'Six strangers find themselves in a maze of deadly mystery rooms, and must use their wits to survive.', 'A man learns something extraordinary about himself after a devastating accident.', 'This is the film version of the Pulitzer and Tony Award winning musical about Bohemians in the East Village of New York City struggling with life, love and AIDS, and the impacts they have on America.', 'An untested American submarine captain teams with U.S. Navy Seals to rescue the Russian president, who has been kidnapped by a rogue general.', 'When a young boy disappears, his mother, a police chief, and his friends must confront terrifying forces in order to get him back.', 'A band of kids embark on an epic quest to thwart a medieval menace.', 'When Lee Israel falls out of step with current tastes, she turns her art form to deception.', 'One of the most celebrated war correspondents of our time, Marie Colvin is an utterly fearless and rebellious spirit, driven to the frontline of conflicts across the globe to give voice to the voiceless.', 'Young computer hacker Lisbeth Salander and journalist Mikael Blomkvist find themselves caught in a web of spies, cybercriminals and corrupt government officials.', 'Six tales of life and violence in the Old West, following a singing gunslinger, a bank robber, a traveling impresario, an elderly prospector, a wagon train, and a perverse pair of bounty hunters.', \"DCI John Luther is a near-genius murder detective whose brilliant mind can't always save him from the dangerous violence of his passions.\", 'Peter Parker and his friends go on summer vacation to Europe, where Peter finds himself trying to save his friends against a villain known as Mysterio.', 'Celebrates the birth of show business and tells of a visionary who rose from nothing to create a spectacle that became a worldwide sensation.', \"A high school chemistry teacher diagnosed with inoperable lung cancer turns to manufacturing and selling methamphetamine in order to secure his family's future.\", 'A reimagining of Roswell, which centers on the residents of New Mexico, where aliens live undercover among humans.', 'A group of friends from Harvard are facing down their forties. With interwoven and oftentimes complicated relationships with one another. \"Friends from College\" is a comedic exploration of ...                See full summary\\xa0»\\n', \"It's been five years since everything was awesome and the citizens are facing a huge new threat: Lego Duplo invaders from outer space, wrecking everything faster than they can rebuild.\", 'An action story centered on a CIA squad tasked with keeping superheroes in line, by any means necessary.', 'The Special Victims Unit, a specially trained squad of detectives in the NYPD, investigate sexually related crimes.', 'The year is 872, and many of the separate kingdoms of what we now know as England have fallen to the invading Danes, leaving the great kingdom of Wessex standing alone and defiant under the...                See full summary\\xa0»\\n']\n",
128 |       "['Jonas Åkerlund', 'Joe Berlinger', nan, 'M. Night Shyamalan', nan, nan, nan, 'Bryan Singer', 'James Wan', nan, nan, 'Peter Farrelly', 'Yorgos Lanthimos', 'Bradley Cooper', 'Quentin Tarantino', 'David Leitch', 'Steven Knight', 'Alfonso Cuarón', 'Cathy Yan', nan, nan, 'Dan Gilroy', 'Robert Rodriguez', nan, nan, 'Adam McKay', nan, 'Christian Rivers', 'Steve McQueen', 'Directors:Anthony Russo, Joe Russo', nan, 'Jonathan Helpert', 'Todd Phillips', 'Directors:Bob Persichetti, Peter Ramsey, Rodney Rothman', nan, nan, nan, nan, nan, 'Luca Guadagnino', 'Dean DeBlois', 'Susanne Bier', nan, nan, nan, 'Tatsuya Nagamine', 'Spike Lee', nan, nan, 'M. Night Shyamalan', nan, 'Vicky Jewson', nan, nan, nan, 'Neil Burger', 'Clint Eastwood', nan, nan, nan, 'Damien Chazelle', nan, nan, 'John Krasinski', nan, nan, 'Ryan Coogler', nan, 'Travis Knight', nan, 'Joe Penna', 'Directors:Anna Boden, Ryan Fleck', 'Directors:Anthony Russo, Joe Russo', 'Jeff Tomsic', 'Stephen Merchant', 'Ron Howard', 'Josie Rourke', nan, nan, nan, 'Adam Robitel', 'M. Night Shyamalan', 'Chris Columbus', 'Donovan Marsh', nan, 'Joe Cornish', 'Marielle Heller', 'Matthew Heineman', 'Fede Alvarez', 'Directors:Ethan Coen, Joel Coen', nan, 'Jon Watts', 'Michael Gracey', nan, nan, nan, 'Mike Mitchell', nan, nan, nan]\n",
129 |       "[['Mads Mikkelsen', 'Vanessa Hudgens', 'Katheryn Winnick', 'Fei Ren'], ['Lily Collins', 'Zac Efron', 'Angela Sarafyan', 'Sydney Vollmer'], ['Stars:Jon Bernthal', 'Amber Rose Revah', 'Ben Barnes', 'Jason R. Moore'], ['James McAvoy', 'Bruce Willis', 'Samuel L. Jackson', 'Anya Taylor-Joy'], ['Stars:Asa Butterfield', 'Gillian Anderson', 'Emma Mackey', 'Alistair Petrie'], ['Stars:Emilia Clarke', 'Peter Dinklage', 'Kit Harington', 'Lena Headey'], ['Stars:Vince Vaughn', 'Colin Farrell', 'Rachel McAdams', 'Taylor Kitsch'], ['Rami Malek', 'Lucy Boynton', 'Gwilym Lee', 'Ben Hardy'], ['Jason Momoa', 'Amber Heard', 'Willem Dafoe', 'Patrick Wilson'], ['Stars:Gustaf Skarsgård', 'Katheryn Winnick', 'Alexander Ludwig', 'Travis Fimmel'], ['Stars:Penn Badgley', 'Ambyr Childers', 'Elizabeth Lail', 'Luca Padovan'], ['Viggo Mortensen', 'Mahershala Ali', 'Linda Cardellini', 'Sebastian Maniscalco'], ['Olivia Colman', 'Emma Stone', 'Rachel Weisz', 'Nicholas Hoult'], ['Lady Gaga', 'Bradley Cooper', 'Sam Elliott', 'Greg Grunberg'], ['Margot Robbie', 'Margaret Qualley', 'Leonardo DiCaprio', 'Brad Pitt'], ['Eiza González', 'Vanessa Kirby', 'Dwayne Johnson', 'Idris Elba'], ['Matthew McConaughey', 'Anne Hathaway', 'Diane Lane', 'Jason Clarke'], ['Yalitza Aparicio', 'Marina de Tavira', 'Diego Cortina Autrey', 'Carlos Peralta'], ['Margot Robbie', 'Jurnee Smollett-Bell', 'Mary Elizabeth Winstead', 'Ewan McGregor'], ['Stars:Seth MacFarlane', 'Adrianne Palicki', 'Penny Johnson Jerald', 'Scott Grimes'], ['Stars:Teagan Croft', 'Brenton Thwaites', 'Anna Diop', 'Ryan Potter'], ['Jake Gyllenhaal', 'Rene Russo', 'Zawe Ashton', 'Tom Sturridge'], ['Rosa Salazar', 'Christoph Waltz', 'Jennifer Connelly', 'Mahershala Ali'], ['Stars:Andrew Lincoln', 'Norman Reedus', 'Melissa McBride', 'Lauren Cohan'], ['Stars:Caitriona Balfe', 'Sam Heughan', 'Duncan Lacroix', 'Tobias Menzies'], ['Christian Bale', 'Amy Adams', 'Steve Carell', 'Sam Rockwell'], ['Stars:Doona Bae', 'Greg Chun', 'Jun-ho Heo', 'Ji-Hoon Ju'], ['Hera Hilmar', 'Robert Sheehan', 'Hugo Weaving', 'Jihae'], ['Viola Davis', 'Michelle Rodriguez', 'Elizabeth Debicki'], ['Bradley Cooper', 'Brie Larson', 'Scarlett Johansson', 'Evangeline Lilly'], ['Stars:Jane Fonda', 'Lily Tomlin', 'Sam Waterston', 'Martin Sheen'], ['Margaret Qualley', 'Anthony Mackie', 'Danny Huston', 'Tom Payne'], ['Joaquin Phoenix', 'Robert De Niro', 'Zazie Beetz', 'Shea Whigham'], ['Shameik Moore', 'Jake Johnson', 'Hailee Steinfeld', 'Mahershala Ali'], ['Stars:Daniel Lapaine', 'Hannah John-Kamen', 'Michaela Coel', 'Beatrice Robertson-Jones'], ['Stars:Ellen Pompeo', 'Justin Chambers', 'Chandra Wilson', 'James Pickens Jr.'], ['Stars:Emmy Rossum', 'William H. Macy', 'Ethan Cutkosky', 'Jeremy Allen White'], ['Stars:Natasha Lyonne', 'Charlie Barnett', 'Greta Lee', 'Elizabeth Ashley'], ['Stars:Jared Padalecki', 'Jensen Ackles', 'Jim Beaver', 'Misha Collins'], ['Dakota Johnson', 'Tilda Swinton', 'Doris Hick', 'Malgorzata Bela'], ['Jay Baruchel', 'America Ferrera', 'F. Murray Abraham', 'Cate Blanchett'], ['Sandra Bullock', 'Trevante Rhodes', 'John Malkovich', 'Sarah Paulson'], ['Stars:Sonequa Martin-Green', 'Doug Jones', 'Anthony Rapp', 'Mary Wiseman'], ['Stars:Sarah Paulson', 'Cuba Gooding Jr.', 'Courtney B. Vance', 'Sterling K. Brown'], ['Stars:Andy Samberg', 'Stephanie Beatriz', 'Terry Crews', 'Melissa Fumero'], ['Masako Nozawa', 'Aya Hisakawa', 'Ryô Horikawa', 'Toshio Furukawa'], ['John David Washington', 'Adam Driver', 'Laura Harrier', 'Topher Grace'], ['Stars:K.J. Apa', 'Lili Reinhart', 'Camila Mendes', 'Cole Sprouse'], ['Stars:Cillian Murphy', 'Helen McCrory', 'Paul Anderson', 'Sophie Rundle'], ['James McAvoy', 'Anya Taylor-Joy', 'Haley Lu Richardson', 'Jessica Sula'], ['Stars:Ben McKenzie', 'Jada Pinkett Smith', 'Donal Logue', 'Camren Bicondova'], ['Noomi Rapace', 'Olivia Jewson', 'Abdellatif Chaouqi', 'Sophie Nélisse'], ['Stars:Kaley Cuoco', 'Johnny Galecki', 'Jim Parsons', 'Simon Helberg'], ['Stars:Steve Carell', 'Jenna Fischer', 'John Krasinski', 'Rainn Wilson'], ['Stars:Kristen Bell', 'William Jackson Harper', 'Jameela Jamil', \"D'Arcy Carden\"], ['Kevin Hart', 'Bryan Cranston', 'Nicole Kidman', 'Aja Naomi King'], ['Bradley Cooper', 'Clint Eastwood', 'Michael Peña', 'Manny Montana'], ['Stars:Mark-Paul Gosselaar', 'Saniyya Sidney', 'Jamie McShane', 'Caroline Chikezie'], ['Stars:Hugh Aynesworth', 'Bob Keppel', 'Stephen Michaud', 'Ted Bundy'], ['Stars:Ellie Kemper', 'Jane Krakowski', 'Tituss Burgess', 'Carol Kane'], ['Ryan Gosling', 'Claire Foy', 'Jason Clarke', 'Kyle Chandler'], ['Stars:Jennifer Aniston', 'Courteney Cox', 'Lisa Kudrow', 'Matt LeBlanc'], ['Stars:Gabriel Macht', 'Patrick J. Adams', 'Meghan Markle', 'Sarah Rafferty'], ['Emily Blunt', 'John Krasinski', 'Millicent Simmonds', 'Noah Jupe'], ['Stars:Grant Gustin', 'Candice Patton', 'Danielle Panabaker', 'Carlos Valdes'], ['Stars:Matthew Gray Gubler', 'Kirsten Vangsness', 'A.J. Cook', 'Joe Mantegna'], ['Chadwick Boseman', 'Michael B. Jordan', \"Lupita Nyong'o\", 'Danai Gurira'], ['Stars:Lauren German', 'Tom Ellis', 'Kevin Alejandro', 'D.B. Woodside'], ['Hailee Steinfeld', 'Jorge Lendeborg Jr.', 'John Cena', 'Jason Drucker'], ['Stars:Rachel Brosnahan', 'Michael Zegen', 'Marin Hinkle', 'Tony Shalhoub'], ['Mads Mikkelsen', 'Maria Thelma Smáradóttir'], ['Brie Larson', 'Gemma Chan', 'Samuel L. Jackson', 'Mckenna Grace'], ['Robert Downey Jr.', 'Chris Hemsworth', 'Mark Ruffalo', 'Chris Evans'], ['Jeremy Renner', 'Ed Helms', 'Jake Johnson', 'Jon Hamm'], ['Dwayne Johnson', 'Florence Pugh', 'Lena Headey', 'Saraya-Jade Bevis'], ['Alden Ehrenreich', 'Woody Harrelson', 'Emilia Clarke', 'Donald Glover'], ['Saoirse Ronan', 'Margot Robbie', 'Jack Lowden', 'Joe Alwyn'], ['Stars:James Spader', 'Megan Boone', 'Diego Klattenhoff', 'Ryan Eggold'], ['Stars:Milo Ventimiglia', 'Mandy Moore', 'Sterling K. Brown', 'Chrissy Metz'], ['Stars:Stephen Amell', 'Katie Cassidy', 'David Ramsey', 'Susanna Thompson'], ['Taylor Russell', 'Logan Miller', 'Jay Ellis', 'Tyler Labine'], ['Bruce Willis', 'Samuel L. Jackson', 'Robin Wright', 'Spencer Treat Clark'], ['Taye Diggs', 'Wilson Jermaine Heredia', 'Rosario Dawson', 'Anthony Rapp'], ['Gerard Butler', 'Gary Oldman', 'Common', 'Linda Cardellini'], ['Stars:Millie Bobby Brown', 'Finn Wolfhard', 'Winona Ryder', 'David Harbour'], ['Louis Ashbourne Serkis', 'Denise Gough', 'Dean Chaumoo', 'Tom Taylor'], ['Melissa McCarthy', 'Richard E. Grant', 'Dolly Wells', 'Ben Falcone'], ['Rosamund Pike', 'Greg Wise', 'Alexandra Moen', 'Tom Hollander'], ['Claire Foy', 'Beau Gadsdon', 'Sverrir Gudnason', 'Lakeith Stanfield'], ['Tim Blake Nelson', 'Willie Watson', 'Clancy Brown', 'Danny McCarthy'], ['Stars:Idris Elba', 'Dermot Crowley', 'Michael Smiley', 'Warren Brown'], ['Zendaya', 'Jake Gyllenhaal', 'Cobie Smulders', 'Samuel L. Jackson'], ['Hugh Jackman', 'Michelle Williams', 'Zac Efron', 'Zendaya'], ['Stars:Bryan Cranston', 'Aaron Paul', 'Anna Gunn', 'Betsy Brandt'], ['Stars:Jeanine Mason', 'Nathan Parsons', 'Michael Vlamis', 'Lily Cowles'], ['Stars:Keegan-Michael Key', 'Cobie Smulders', 'Annie Parisse', 'Nat Faxon'], ['Chris Pratt', 'Elizabeth Banks', 'Will Arnett', 'Tiffany Haddish'], ['Stars:Karl Urban', 'Tomer Capon', 'Jennifer Esposito', 'Erin Moriarty'], ['Stars:Mariska Hargitay', 'Christopher Meloni', 'Ice-T', 'Dann Florek'], ['Stars:Alexander Dreymon', 'Ian Hart', 'David Dawson', 'Eliza Butterworth']]\n",
130 |       "['29,815', '404', '133,239', '58,153', '33,544', '1,407,282', '424,648', '234,545', '159,280', '324,111', '41,555', '53,118', '44,602', '170,127', nan, nan, '2,975', '69,264', nan, '41,222', '27,867', '16,872', '3,107', '770,334', '88,468', '28,061', '4,441', '34,200', '38,269', nan, '26,862', '17,368', nan, '92,639', '288,545', '206,210', '164,295', '9,438', '345,218', '22,280', '9,164', '176,778', '59,810', '62,952', '132,284', '10,550', '100,900', '78,452', '175,477', '328,942', '187,999', '8,575', '630,471', '281,728', '52,668', '9,986', '17,143', '2,809', '6,341', '52,990', '91,433', '646,066', '318,053', '271,263', '268,071', '135,280', '457,138', '131,247', '48,433', '38,015', '1,008', nan, '581,342', '70,351', '454', '201,929', '8,966', '151,431', '71,289', '372,518', '8,374', '333,024', '46,087', '22,304', '557,424', '1,706', '9,052', '4,075', '16,928', '67,974', '101,060', nan, '187,614', '1,156,819', '2,621', '13,219', '806', nan, '66,557', '49,267']\n",
131 |       "[nan, nan, nan, '$91.43M', nan, nan, nan, '$209.12M', '$324.94M', nan, nan, '$57.51M', '$29.25M', '$208.11M', nan, nan, '$8.20M', nan, nan, nan, nan, nan, nan, nan, nan, '$44.46M', nan, '$15.95M', '$42.38M', nan, nan, nan, nan, '$176.53M', nan, nan, nan, nan, nan, '$2.47M', nan, nan, nan, nan, nan, '$30.24M', '$48.69M', nan, nan, '$138.29M', nan, nan, nan, nan, nan, '$77.94M', '$102.07M', nan, nan, nan, '$44.94M', nan, nan, '$188.02M', nan, nan, '$700.06M', nan, '$124.81M', nan, '$0.06M', nan, '$678.82M', '$54.55M', nan, '$213.77M', '$16.47M', nan, nan, nan, '$52.87M', '$95.01M', '$29.08M', '$15.77M', nan, '$14.06M', '$8.42M', '$1.63M', '$14.84M', nan, nan, nan, '$174.34M', nan, nan, nan, nan, nan, nan, nan]\n"
132 |      ]
133 |     }
134 |    ],
135 |    "source": [
136 |     "if __name__ == '__main__':\n",
137 |     "\tsite1 = IMDB(url1)\n",
138 |     "\tprint(\"Subject: \", site1.articleTitle())\n",
139 |     "\tdata = site1.movieData()\n",
140 |     "\tfor i in range(len(data)):\n",
141 |     "\t\tprint(data[i][:]) #Print the data"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": []
150 |   }
151 |  ],
152 |  "metadata": {
153 |   "kernelspec": {
154 |    "display_name": "Python 3",
155 |    "language": "python",
156 |    "name": "python3"
157 |   },
158 |   "language_info": {
159 |    "codemirror_mode": {
160 |     "name": "ipython",
161 |     "version": 3
162 |    },
163 |    "file_extension": ".py",
164 |    "mimetype": "text/x-python",
165 |    "name": "python",
166 |    "nbconvert_exporter": "python",
167 |    "pygments_lexer": "ipython3",
168 |    "version": "3.6.7"
169 |   }
170 |  },
171 |  "nbformat": 4,
172 |  "nbformat_minor": 2
173 | }
174 | 


--------------------------------------------------------------------------------