├── src
├── __init__.py
└── utils
│ └── __init__.py
├── Procfile
├── artifacts
├── .gitignore
└── movie_dict.pkl
├── data
└── .gitignore
├── requirements.txt
├── demo
├── 1.png
├── 2.png
├── 3.png
└── 6.jpeg
├── setup.sh
├── setup.py
├── LICENSE
├── .gitignore
├── app.py
├── README.md
└── Movie Recommender System Data Analysis.ipynb
/src/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: sh setup.sh && streamlit run app.py
--------------------------------------------------------------------------------
/artifacts/.gitignore:
--------------------------------------------------------------------------------
1 | movie_list.pkl
2 | similarity.pkl
3 |
--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | tmdb_5000_credits.csv
2 | tmdb_5000_movies.csv
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ### dependency
2 | streamlit
3 |
4 | ### local packages -
5 | -e .
6 |
--------------------------------------------------------------------------------
/demo/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Movie-Recommender-System-Using-Machine-Learning/HEAD/demo/1.png
--------------------------------------------------------------------------------
/demo/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Movie-Recommender-System-Using-Machine-Learning/HEAD/demo/2.png
--------------------------------------------------------------------------------
/demo/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Movie-Recommender-System-Using-Machine-Learning/HEAD/demo/3.png
--------------------------------------------------------------------------------
/demo/6.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Movie-Recommender-System-Using-Machine-Learning/HEAD/demo/6.jpeg
--------------------------------------------------------------------------------
/artifacts/movie_dict.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/Movie-Recommender-System-Using-Machine-Learning/HEAD/artifacts/movie_dict.pkl
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | mkdir -p ~/.streamlit/
2 |
3 | echo "\
4 | [server]\n\
5 | port = $PORT\n\
6 | enableCORS = false\n\
7 | headless = true\n\
8 | \n\
9 | " > ~/.streamlit/config.toml
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | with open("README.md", "r", encoding="utf-8") as f:
4 | long_description = f.read()
5 |
6 | ## edit below variables as per your requirements -
7 | REPO_NAME = "Movie-Recommender-System-Using-Machine-Learning"
8 | AUTHOR_USER_NAME = "entbappy"
9 | SRC_REPO = "src"
10 | LIST_OF_REQUIREMENTS = ['streamlit']
11 |
12 |
13 | setup(
14 | name=SRC_REPO,
15 | version="0.0.1",
16 | author=AUTHOR_USER_NAME,
17 | description="A small package for Movie Recommender System",
18 | long_description=long_description,
19 | long_description_content_type="text/markdown",
20 | url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
21 | author_email="entbappy73@gmail.com",
22 | packages=[SRC_REPO],
23 | license="MIT",
24 | python_requires=">=3.7",
25 | install_requires=LIST_OF_REQUIREMENTS
26 | )
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Bappy Ahmed
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # if using pycharm
132 | .idea
133 |
134 | # if using VScode
135 | .vscode
136 |
137 | # add secret keys or API keys here
138 | configs/secrets.yaml
139 |
140 | # add your env folder here if its there
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | '''
2 | Author: Bappy Ahmed
3 | Email: entbappy73@gmail.com
4 | Date: 2021-Nov-15
5 | Updated by: Malhar Nikam
6 | '''
7 |
8 | import pickle
9 | import streamlit as st
10 | import requests
11 | import pandas as pd
12 |
13 | def fetch_poster(movie_id):
14 | """Fetches the movie poster URL from TMDB API."""
15 | url = "https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US".format(movie_id)
16 | try:
17 | data = requests.get(url)
18 | data.raise_for_status() # Raise an exception for bad status codes
19 | data = data.json()
20 | poster_path = data.get('poster_path')
21 | if poster_path:
22 | full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
23 | return full_path
24 | except requests.exceptions.RequestException as e:
25 | st.error(f"Error fetching poster: {e}")
26 | # Return a placeholder if the poster is not found or an error occurs
27 | return "https://placehold.co/500x750/333/FFFFFF?text=No+Poster"
28 |
29 |
30 | def recommend(movie):
31 | """Recommends 5 similar movies based on the selected movie."""
32 | try:
33 | index = movies[movies['title'] == movie].index[0]
34 | except IndexError:
35 | st.error("Movie not found in the dataset. Please select another one.")
36 | return [], [], [], []
37 |
38 | distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
39 |
40 | recommended_movie_names = []
41 | recommended_movie_posters = []
42 | recommended_movie_years = []
43 | recommended_movie_ratings = []
44 |
45 | for i in distances[1:6]:
46 | # fetch the movie details
47 | movie_id = movies.iloc[i[0]].movie_id
48 |
49 | recommended_movie_posters.append(fetch_poster(movie_id))
50 | recommended_movie_names.append(movies.iloc[i[0]].title)
51 | recommended_movie_years.append(movies.iloc[i[0]].year)
52 | recommended_movie_ratings.append(movies.iloc[i[0]].vote_average)
53 |
54 | return recommended_movie_names, recommended_movie_posters, recommended_movie_years, recommended_movie_ratings
55 |
56 |
57 | st.set_page_config(layout="wide")
58 | st.header('Movie Recommender System Using Machine Learning')
59 |
60 | # Load the data files
61 | try:
62 | movies_dict = pickle.load(open('artifacts/movie_dict.pkl', 'rb'))
63 | movies = pd.DataFrame(movies_dict)
64 | similarity = pickle.load(open('artifacts/similarity.pkl', 'rb'))
65 | except FileNotFoundError:
66 | st.error("Model files not found. Please run the data processing notebook first.")
67 | st.stop()
68 |
69 |
70 | movie_list = movies['title'].values
71 | selected_movie = st.selectbox(
72 | "Type or select a movie from the dropdown",
73 | movie_list
74 | )
75 |
76 | if st.button('Show Recommendation'):
77 | with st.spinner('Finding recommendations...'):
78 | recommended_movie_names, recommended_movie_posters, recommended_movie_years, recommended_movie_ratings = recommend(selected_movie)
79 |
80 | if recommended_movie_names:
81 | cols = st.columns(5)
82 | for i, col in enumerate(cols):
83 | with col:
84 | st.text(recommended_movie_names[i])
85 | st.image(recommended_movie_posters[i])
86 | # Ensure year is an integer before displaying
87 | year = recommended_movie_years[i]
88 | if pd.notna(year):
89 | st.caption(f"Year: {int(year)}")
90 | else:
91 | st.caption("Year: N/A")
92 |
93 | rating = recommended_movie_ratings[i]
94 | st.caption(f"Rating: {rating:.1f} ⭐")
95 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Project: Movie Recommender System Using Machine Learning!
2 |
3 |
4 |
5 | Recommendation systems are becoming increasingly important in today’s extremely busy world. People are always short on time with the myriad tasks they need to accomplish in the limited 24 hours. Therefore, the recommendation systems are important as they help them make the right choices, without having to expend their cognitive resources.
6 |
7 | The purpose of a recommendation system basically is to search for content that would be interesting to an individual. Moreover, it involves a number of factors to create personalised lists of useful and interesting content specific to each user/individual. Recommendation systems are Artificial Intelligence based algorithms that skim through all possible options and create a customized list of items that are interesting and relevant to an individual. These results are based on their profile, search/browsing history, what other people with similar traits/demographics are watching, and how likely are you to watch those movies. This is achieved through predictive modeling and heuristics with the data available.
8 |
9 | # Types of Recommendation System :
10 |
11 | ### 1 ) Content Based :
12 |
13 | - Content-based systems, which use characteristic information and takes item attriubutes into consideration .
14 |
15 | - Twitter , Youtube .
16 |
17 | - Which music you are listening , what singer are you watching . Form embeddings for the features .
18 |
19 | - User specific actions or similar items reccomendation .
20 |
21 | - It will create a vector of it .
22 |
23 | - These systems make recommendations using a user's item and profile features. They hypothesize that if a user was interested in an item in the past, they will once again be interested in it in the future
24 |
25 | - One issue that arises is making obvious recommendations because of excessive specialization (user A is only interested in categories B, C, and D, and the system is not able to recommend items outside those categories, even though they could be interesting to them).
26 |
27 | ### 2 ) Collaborative Based :
28 |
29 | - Collaborative filtering systems, which are based on user-item interactions.
30 |
31 | - Clusters of users with same ratings , similar users .
32 |
33 | - Book recommendation , so use cluster mechanism .
34 |
35 | - We take only one parameter , ratings or comments .
36 |
37 | - In short, collaborative filtering systems are based on the assumption that if a user likes item A and another user likes the same item A as well as another item, item B, the first user could also be interested in the second item .
38 |
39 | - Issues are :
40 |
41 | - User-Item nXn matrix , so computationally expensive .
42 |
43 | - Only famous items will get reccomended .
44 |
45 | - New items might not get reccomended at all .
46 |
47 | ### 3 ) Hybrid Based :
48 |
49 | - Hybrid systems, which combine both types of information with the aim of avoiding problems that are generated when working with just one kind.
50 |
51 | - Combination of both and used now a days .
52 |
53 | - Uses : word2vec , embedding .
54 |
55 | # About this project:
56 |
57 | This is a streamlit web application that can recommend various kinds of similar movies based on an user interest.
58 | here is a demo,
59 |
60 | * [Click here to run it live on server](https://movie-recommeder-system.herokuapp.com/)
61 |
62 |
63 | # Demo:
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 | # Dataset has been used:
73 |
74 | * [Dataset link](https://www.kaggle.com/tmdb/tmdb-movie-metadata?select=tmdb_5000_movies.csv)
75 |
76 | # Concept used to build the model.pkl file : cosine_similarity
77 |
78 | 1 . Cosine Similarity is a metric that allows you to measure the similarity of the documents.
79 |
80 | 2 . In order to demonstrate cosine similarity function we need vectors. Here vectors are numpy array.
81 |
82 | 3 . Finally, Once we have vectors, We can call cosine_similarity() by passing both vectors. It will calculate the cosine similarity between these two.
83 |
84 | 4 . It will be a value between [0,1]. If it is 0 then both vectors are complete different. But in the place of that if it is 1, It will be completely similar.
85 |
86 | 5 . For more details , check URL : https://www.learndatasci.com/glossary/cosine-similarity/
87 |
88 | # How to run?
89 | ### STEPS:
90 |
91 | Clone the repository
92 |
93 | ```bash
94 | https://github.com/entbappy/Movie-Recommender-System-Using-Machine-Learning.git
95 | ```
96 | ### STEP 01- Create a conda environment after opening the repository
97 |
98 | ```bash
99 | conda create -n movie python=3.7.10 -y
100 | ```
101 |
102 | ```bash
103 | conda activate movie
104 | ```
105 |
106 |
107 | ### STEP 02- install the requirements
108 | ```bash
109 | pip install -r requirements.txt
110 | ```
111 |
112 |
113 | ```bash
114 | #run this file to generate the models
115 |
116 | Movie Recommender System Data Analysis.ipynb
117 | ```
118 |
119 | Now run,
120 | ```bash
121 | streamlit run app.py
122 | ```
123 |
124 |
125 | ```bash
126 | Author: Bappy Ahmed
127 | Data Scientist
128 | Email: entbappy73@gmail.com
129 |
130 | ```
131 |
--------------------------------------------------------------------------------
/Movie Recommender System Data Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Content Based Recommender System"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np \n",
17 | "import pandas as pd"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "movies = pd.read_csv('data/tmdb_5000_movies.csv')\n",
27 | "credits = pd.read_csv('data/tmdb_5000_credits.csv')"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "
\n",
39 | "\n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " | \n",
56 | " budget | \n",
57 | " genres | \n",
58 | " homepage | \n",
59 | " id | \n",
60 | " keywords | \n",
61 | " original_language | \n",
62 | " original_title | \n",
63 | " overview | \n",
64 | " popularity | \n",
65 | " production_companies | \n",
66 | " production_countries | \n",
67 | " release_date | \n",
68 | " revenue | \n",
69 | " runtime | \n",
70 | " spoken_languages | \n",
71 | " status | \n",
72 | " tagline | \n",
73 | " title | \n",
74 | " vote_average | \n",
75 | " vote_count | \n",
76 | "
\n",
77 | " \n",
78 | " \n",
79 | " \n",
80 | " | 0 | \n",
81 | " 237000000 | \n",
82 | " [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... | \n",
83 | " http://www.avatarmovie.com/ | \n",
84 | " 19995 | \n",
85 | " [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... | \n",
86 | " en | \n",
87 | " Avatar | \n",
88 | " In the 22nd century, a paraplegic Marine is di... | \n",
89 | " 150.437577 | \n",
90 | " [{\"name\": \"Ingenious Film Partners\", \"id\": 289... | \n",
91 | " [{\"iso_3166_1\": \"US\", \"name\": \"United States o... | \n",
92 | " 2009-12-10 | \n",
93 | " 2787965087 | \n",
94 | " 162.0 | \n",
95 | " [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... | \n",
96 | " Released | \n",
97 | " Enter the World of Pandora. | \n",
98 | " Avatar | \n",
99 | " 7.2 | \n",
100 | " 11800 | \n",
101 | "
\n",
102 | " \n",
103 | " | 1 | \n",
104 | " 300000000 | \n",
105 | " [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... | \n",
106 | " http://disney.go.com/disneypictures/pirates/ | \n",
107 | " 285 | \n",
108 | " [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... | \n",
109 | " en | \n",
110 | " Pirates of the Caribbean: At World's End | \n",
111 | " Captain Barbossa, long believed to be dead, ha... | \n",
112 | " 139.082615 | \n",
113 | " [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... | \n",
114 | " [{\"iso_3166_1\": \"US\", \"name\": \"United States o... | \n",
115 | " 2007-05-19 | \n",
116 | " 961000000 | \n",
117 | " 169.0 | \n",
118 | " [{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n",
119 | " Released | \n",
120 | " At the end of the world, the adventure begins. | \n",
121 | " Pirates of the Caribbean: At World's End | \n",
122 | " 6.9 | \n",
123 | " 4500 | \n",
124 | "
\n",
125 | " \n",
126 | "
\n",
127 | "
"
128 | ],
129 | "text/plain": [
130 | " budget genres \\\n",
131 | "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n",
132 | "1 300000000 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n",
133 | "\n",
134 | " homepage id \\\n",
135 | "0 http://www.avatarmovie.com/ 19995 \n",
136 | "1 http://disney.go.com/disneypictures/pirates/ 285 \n",
137 | "\n",
138 | " keywords original_language \\\n",
139 | "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n",
140 | "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... en \n",
141 | "\n",
142 | " original_title \\\n",
143 | "0 Avatar \n",
144 | "1 Pirates of the Caribbean: At World's End \n",
145 | "\n",
146 | " overview popularity \\\n",
147 | "0 In the 22nd century, a paraplegic Marine is di... 150.437577 \n",
148 | "1 Captain Barbossa, long believed to be dead, ha... 139.082615 \n",
149 | "\n",
150 | " production_companies \\\n",
151 | "0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n",
152 | "1 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... \n",
153 | "\n",
154 | " production_countries release_date revenue \\\n",
155 | "0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n",
156 | "1 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2007-05-19 961000000 \n",
157 | "\n",
158 | " runtime spoken_languages status \\\n",
159 | "0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n",
160 | "1 169.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n",
161 | "\n",
162 | " tagline \\\n",
163 | "0 Enter the World of Pandora. \n",
164 | "1 At the end of the world, the adventure begins. \n",
165 | "\n",
166 | " title vote_average vote_count \n",
167 | "0 Avatar 7.2 11800 \n",
168 | "1 Pirates of the Caribbean: At World's End 6.9 4500 "
169 | ]
170 | },
171 | "execution_count": 3,
172 | "metadata": {},
173 | "output_type": "execute_result"
174 | }
175 | ],
176 | "source": [
177 | "movies.head(2)"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 4,
183 | "metadata": {},
184 | "outputs": [
185 | {
186 | "data": {
187 | "text/plain": [
188 | "(4803, 20)"
189 | ]
190 | },
191 | "execution_count": 4,
192 | "metadata": {},
193 | "output_type": "execute_result"
194 | }
195 | ],
196 | "source": [
197 | "movies.shape"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 5,
203 | "metadata": {},
204 | "outputs": [
205 | {
206 | "data": {
207 | "text/html": [
208 | "\n",
209 | "\n",
222 | "
\n",
223 | " \n",
224 | " \n",
225 | " | \n",
226 | " movie_id | \n",
227 | " title | \n",
228 | " cast | \n",
229 | " crew | \n",
230 | "
\n",
231 | " \n",
232 | " \n",
233 | " \n",
234 | " | 0 | \n",
235 | " 19995 | \n",
236 | " Avatar | \n",
237 | " [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... | \n",
238 | " [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... | \n",
239 | "
\n",
240 | " \n",
241 | " | 1 | \n",
242 | " 285 | \n",
243 | " Pirates of the Caribbean: At World's End | \n",
244 | " [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... | \n",
245 | " [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... | \n",
246 | "
\n",
247 | " \n",
248 | " | 2 | \n",
249 | " 206647 | \n",
250 | " Spectre | \n",
251 | " [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... | \n",
252 | " [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... | \n",
253 | "
\n",
254 | " \n",
255 | " | 3 | \n",
256 | " 49026 | \n",
257 | " The Dark Knight Rises | \n",
258 | " [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... | \n",
259 | " [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... | \n",
260 | "
\n",
261 | " \n",
262 | " | 4 | \n",
263 | " 49529 | \n",
264 | " John Carter | \n",
265 | " [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... | \n",
266 | " [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... | \n",
267 | "
\n",
268 | " \n",
269 | "
\n",
270 | "
"
271 | ],
272 | "text/plain": [
273 | " movie_id title \\\n",
274 | "0 19995 Avatar \n",
275 | "1 285 Pirates of the Caribbean: At World's End \n",
276 | "2 206647 Spectre \n",
277 | "3 49026 The Dark Knight Rises \n",
278 | "4 49529 John Carter \n",
279 | "\n",
280 | " cast \\\n",
281 | "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
282 | "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n",
283 | "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n",
284 | "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n",
285 | "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n",
286 | "\n",
287 | " crew \n",
288 | "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n",
289 | "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n",
290 | "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n",
291 | "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n",
292 | "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "
293 | ]
294 | },
295 | "execution_count": 5,
296 | "metadata": {},
297 | "output_type": "execute_result"
298 | }
299 | ],
300 | "source": [
301 | "credits.head()"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 6,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "(4803, 4)"
313 | ]
314 | },
315 | "execution_count": 6,
316 | "metadata": {},
317 | "output_type": "execute_result"
318 | }
319 | ],
320 | "source": [
321 | "credits.shape"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 7,
327 | "metadata": {},
328 | "outputs": [],
329 | "source": [
330 | "movies = movies.merge(credits,on='title')"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 8,
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "data": {
340 | "text/html": [
341 | "\n",
342 | "\n",
355 | "
\n",
356 | " \n",
357 | " \n",
358 | " | \n",
359 | " budget | \n",
360 | " genres | \n",
361 | " homepage | \n",
362 | " id | \n",
363 | " keywords | \n",
364 | " original_language | \n",
365 | " original_title | \n",
366 | " overview | \n",
367 | " popularity | \n",
368 | " production_companies | \n",
369 | " ... | \n",
370 | " runtime | \n",
371 | " spoken_languages | \n",
372 | " status | \n",
373 | " tagline | \n",
374 | " title | \n",
375 | " vote_average | \n",
376 | " vote_count | \n",
377 | " movie_id | \n",
378 | " cast | \n",
379 | " crew | \n",
380 | "
\n",
381 | " \n",
382 | " \n",
383 | " \n",
384 | " | 0 | \n",
385 | " 237000000 | \n",
386 | " [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... | \n",
387 | " http://www.avatarmovie.com/ | \n",
388 | " 19995 | \n",
389 | " [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... | \n",
390 | " en | \n",
391 | " Avatar | \n",
392 | " In the 22nd century, a paraplegic Marine is di... | \n",
393 | " 150.437577 | \n",
394 | " [{\"name\": \"Ingenious Film Partners\", \"id\": 289... | \n",
395 | " ... | \n",
396 | " 162.0 | \n",
397 | " [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... | \n",
398 | " Released | \n",
399 | " Enter the World of Pandora. | \n",
400 | " Avatar | \n",
401 | " 7.2 | \n",
402 | " 11800 | \n",
403 | " 19995 | \n",
404 | " [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... | \n",
405 | " [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... | \n",
406 | "
\n",
407 | " \n",
408 | " | 1 | \n",
409 | " 300000000 | \n",
410 | " [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... | \n",
411 | " http://disney.go.com/disneypictures/pirates/ | \n",
412 | " 285 | \n",
413 | " [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... | \n",
414 | " en | \n",
415 | " Pirates of the Caribbean: At World's End | \n",
416 | " Captain Barbossa, long believed to be dead, ha... | \n",
417 | " 139.082615 | \n",
418 | " [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... | \n",
419 | " ... | \n",
420 | " 169.0 | \n",
421 | " [{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n",
422 | " Released | \n",
423 | " At the end of the world, the adventure begins. | \n",
424 | " Pirates of the Caribbean: At World's End | \n",
425 | " 6.9 | \n",
426 | " 4500 | \n",
427 | " 285 | \n",
428 | " [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... | \n",
429 | " [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... | \n",
430 | "
\n",
431 | " \n",
432 | "
\n",
433 | "
2 rows × 23 columns
\n",
434 | "
"
435 | ],
436 | "text/plain": [
437 | " budget genres \\\n",
438 | "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n",
439 | "1 300000000 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n",
440 | "\n",
441 | " homepage id \\\n",
442 | "0 http://www.avatarmovie.com/ 19995 \n",
443 | "1 http://disney.go.com/disneypictures/pirates/ 285 \n",
444 | "\n",
445 | " keywords original_language \\\n",
446 | "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n",
447 | "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... en \n",
448 | "\n",
449 | " original_title \\\n",
450 | "0 Avatar \n",
451 | "1 Pirates of the Caribbean: At World's End \n",
452 | "\n",
453 | " overview popularity \\\n",
454 | "0 In the 22nd century, a paraplegic Marine is di... 150.437577 \n",
455 | "1 Captain Barbossa, long believed to be dead, ha... 139.082615 \n",
456 | "\n",
457 | " production_companies ... runtime \\\n",
458 | "0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... ... 162.0 \n",
459 | "1 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... ... 169.0 \n",
460 | "\n",
461 | " spoken_languages status \\\n",
462 | "0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n",
463 | "1 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n",
464 | "\n",
465 | " tagline \\\n",
466 | "0 Enter the World of Pandora. \n",
467 | "1 At the end of the world, the adventure begins. \n",
468 | "\n",
469 | " title vote_average vote_count movie_id \\\n",
470 | "0 Avatar 7.2 11800 19995 \n",
471 | "1 Pirates of the Caribbean: At World's End 6.9 4500 285 \n",
472 | "\n",
473 | " cast \\\n",
474 | "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
475 | "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n",
476 | "\n",
477 | " crew \n",
478 | "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n",
479 | "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n",
480 | "\n",
481 | "[2 rows x 23 columns]"
482 | ]
483 | },
484 | "execution_count": 8,
485 | "metadata": {},
486 | "output_type": "execute_result"
487 | }
488 | ],
489 | "source": [
490 | "movies.head(2)"
491 | ]
492 | },
493 | {
494 | "cell_type": "code",
495 | "execution_count": 9,
496 | "metadata": {},
497 | "outputs": [
498 | {
499 | "data": {
500 | "text/plain": [
501 | "(4809, 23)"
502 | ]
503 | },
504 | "execution_count": 9,
505 | "metadata": {},
506 | "output_type": "execute_result"
507 | }
508 | ],
509 | "source": [
510 | "movies.shape"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 10,
516 | "metadata": {},
517 | "outputs": [],
518 | "source": [
519 | "# Keeping important columns for recommendation\n",
520 | "movies = movies[['movie_id','title','overview','genres','keywords','cast','crew', 'release_date', 'vote_average']]"
521 | ]
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": 11,
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "# This handles any missing data and creates the 'year' column\n",
530 | "movies.dropna(inplace=True)\n",
531 | "movies['year'] = pd.to_datetime(movies['release_date'], errors='coerce').dt.year"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": 12,
537 | "metadata": {},
538 | "outputs": [
539 | {
540 | "data": {
541 | "text/html": [
542 | "\n",
543 | "\n",
556 | "
\n",
557 | " \n",
558 | " \n",
559 | " | \n",
560 | " movie_id | \n",
561 | " title | \n",
562 | " overview | \n",
563 | " genres | \n",
564 | " keywords | \n",
565 | " cast | \n",
566 | " crew | \n",
567 | " release_date | \n",
568 | " vote_average | \n",
569 | " year | \n",
570 | "
\n",
571 | " \n",
572 | " \n",
573 | " \n",
574 | " | 0 | \n",
575 | " 19995 | \n",
576 | " Avatar | \n",
577 | " In the 22nd century, a paraplegic Marine is di... | \n",
578 | " [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... | \n",
579 | " [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... | \n",
580 | " [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... | \n",
581 | " [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... | \n",
582 | " 2009-12-10 | \n",
583 | " 7.2 | \n",
584 | " 2009 | \n",
585 | "
\n",
586 | " \n",
587 | " | 1 | \n",
588 | " 285 | \n",
589 | " Pirates of the Caribbean: At World's End | \n",
590 | " Captain Barbossa, long believed to be dead, ha... | \n",
591 | " [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... | \n",
592 | " [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... | \n",
593 | " [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... | \n",
594 | " [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... | \n",
595 | " 2007-05-19 | \n",
596 | " 6.9 | \n",
597 | " 2007 | \n",
598 | "
\n",
599 | " \n",
600 | "
\n",
601 | "
"
602 | ],
603 | "text/plain": [
604 | " movie_id title \\\n",
605 | "0 19995 Avatar \n",
606 | "1 285 Pirates of the Caribbean: At World's End \n",
607 | "\n",
608 | " overview \\\n",
609 | "0 In the 22nd century, a paraplegic Marine is di... \n",
610 | "1 Captain Barbossa, long believed to be dead, ha... \n",
611 | "\n",
612 | " genres \\\n",
613 | "0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n",
614 | "1 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n",
615 | "\n",
616 | " keywords \\\n",
617 | "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n",
618 | "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n",
619 | "\n",
620 | " cast \\\n",
621 | "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
622 | "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n",
623 | "\n",
624 | " crew release_date \\\n",
625 | "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... 2009-12-10 \n",
626 | "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... 2007-05-19 \n",
627 | "\n",
628 | " vote_average year \n",
629 | "0 7.2 2009 \n",
630 | "1 6.9 2007 "
631 | ]
632 | },
633 | "execution_count": 12,
634 | "metadata": {},
635 | "output_type": "execute_result"
636 | }
637 | ],
638 | "source": [
639 | "movies.head(2)"
640 | ]
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": 13,
645 | "metadata": {},
646 | "outputs": [
647 | {
648 | "data": {
649 | "text/plain": [
650 | "(4805, 10)"
651 | ]
652 | },
653 | "execution_count": 13,
654 | "metadata": {},
655 | "output_type": "execute_result"
656 | }
657 | ],
658 | "source": [
659 | "movies.shape"
660 | ]
661 | },
662 | {
663 | "cell_type": "code",
664 | "execution_count": 14,
665 | "metadata": {},
666 | "outputs": [
667 | {
668 | "data": {
669 | "text/plain": [
670 | "movie_id 0\n",
671 | "title 0\n",
672 | "overview 0\n",
673 | "genres 0\n",
674 | "keywords 0\n",
675 | "cast 0\n",
676 | "crew 0\n",
677 | "release_date 0\n",
678 | "vote_average 0\n",
679 | "year 0\n",
680 | "dtype: int64"
681 | ]
682 | },
683 | "execution_count": 14,
684 | "metadata": {},
685 | "output_type": "execute_result"
686 | }
687 | ],
688 | "source": [
689 | "movies.isnull().sum()"
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": 15,
695 | "metadata": {},
696 | "outputs": [],
697 | "source": [
698 | "movies.dropna(inplace=True)"
699 | ]
700 | },
701 | {
702 | "cell_type": "code",
703 | "execution_count": 16,
704 | "metadata": {},
705 | "outputs": [
706 | {
707 | "data": {
708 | "text/plain": [
709 | "movie_id 0\n",
710 | "title 0\n",
711 | "overview 0\n",
712 | "genres 0\n",
713 | "keywords 0\n",
714 | "cast 0\n",
715 | "crew 0\n",
716 | "release_date 0\n",
717 | "vote_average 0\n",
718 | "year 0\n",
719 | "dtype: int64"
720 | ]
721 | },
722 | "execution_count": 16,
723 | "metadata": {},
724 | "output_type": "execute_result"
725 | }
726 | ],
727 | "source": [
728 | "movies.isnull().sum()"
729 | ]
730 | },
731 | {
732 | "cell_type": "code",
733 | "execution_count": 17,
734 | "metadata": {},
735 | "outputs": [
736 | {
737 | "data": {
738 | "text/plain": [
739 | "(4805, 10)"
740 | ]
741 | },
742 | "execution_count": 17,
743 | "metadata": {},
744 | "output_type": "execute_result"
745 | }
746 | ],
747 | "source": [
748 | "movies.shape"
749 | ]
750 | },
751 | {
752 | "cell_type": "code",
753 | "execution_count": 18,
754 | "metadata": {},
755 | "outputs": [
756 | {
757 | "data": {
758 | "text/plain": [
759 | "np.int64(0)"
760 | ]
761 | },
762 | "execution_count": 18,
763 | "metadata": {},
764 | "output_type": "execute_result"
765 | }
766 | ],
767 | "source": [
768 | "movies.duplicated().sum()"
769 | ]
770 | },
771 | {
772 | "cell_type": "code",
773 | "execution_count": 19,
774 | "metadata": {},
775 | "outputs": [
776 | {
777 | "data": {
778 | "text/plain": [
779 | "'[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]'"
780 | ]
781 | },
782 | "execution_count": 19,
783 | "metadata": {},
784 | "output_type": "execute_result"
785 | }
786 | ],
787 | "source": [
788 | "# handle genres\n",
789 | "\n",
790 | "movies.iloc[0]['genres']"
791 | ]
792 | },
793 | {
794 | "cell_type": "code",
795 | "execution_count": 20,
796 | "metadata": {},
797 | "outputs": [],
798 | "source": [
799 | "import ast #for converting str to list\n",
800 | "\n",
801 | "def convert(text):\n",
802 | " L = []\n",
803 | " for i in ast.literal_eval(text):\n",
804 | " L.append(i['name']) \n",
805 | " return L"
806 | ]
807 | },
808 | {
809 | "cell_type": "code",
810 | "execution_count": 21,
811 | "metadata": {},
812 | "outputs": [],
813 | "source": [
814 | "movies['genres'] = movies['genres'].apply(convert)"
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "execution_count": 22,
820 | "metadata": {},
821 | "outputs": [
822 | {
823 | "data": {
824 | "text/html": [
825 | "\n",
826 | "\n",
839 | "
\n",
840 | " \n",
841 | " \n",
842 | " | \n",
843 | " movie_id | \n",
844 | " title | \n",
845 | " overview | \n",
846 | " genres | \n",
847 | " keywords | \n",
848 | " cast | \n",
849 | " crew | \n",
850 | " release_date | \n",
851 | " vote_average | \n",
852 | " year | \n",
853 | "
\n",
854 | " \n",
855 | " \n",
856 | " \n",
857 | " | 0 | \n",
858 | " 19995 | \n",
859 | " Avatar | \n",
860 | " In the 22nd century, a paraplegic Marine is di... | \n",
861 | " [Action, Adventure, Fantasy, Science Fiction] | \n",
862 | " [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... | \n",
863 | " [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... | \n",
864 | " [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... | \n",
865 | " 2009-12-10 | \n",
866 | " 7.2 | \n",
867 | " 2009 | \n",
868 | "
\n",
869 | " \n",
870 | " | 1 | \n",
871 | " 285 | \n",
872 | " Pirates of the Caribbean: At World's End | \n",
873 | " Captain Barbossa, long believed to be dead, ha... | \n",
874 | " [Adventure, Fantasy, Action] | \n",
875 | " [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... | \n",
876 | " [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... | \n",
877 | " [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... | \n",
878 | " 2007-05-19 | \n",
879 | " 6.9 | \n",
880 | " 2007 | \n",
881 | "
\n",
882 | " \n",
883 | " | 2 | \n",
884 | " 206647 | \n",
885 | " Spectre | \n",
886 | " A cryptic message from Bond’s past sends him o... | \n",
887 | " [Action, Adventure, Crime] | \n",
888 | " [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... | \n",
889 | " [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... | \n",
890 | " [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... | \n",
891 | " 2015-10-26 | \n",
892 | " 6.3 | \n",
893 | " 2015 | \n",
894 | "
\n",
895 | " \n",
896 | " | 3 | \n",
897 | " 49026 | \n",
898 | " The Dark Knight Rises | \n",
899 | " Following the death of District Attorney Harve... | \n",
900 | " [Action, Crime, Drama, Thriller] | \n",
901 | " [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... | \n",
902 | " [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... | \n",
903 | " [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... | \n",
904 | " 2012-07-16 | \n",
905 | " 7.6 | \n",
906 | " 2012 | \n",
907 | "
\n",
908 | " \n",
909 | " | 4 | \n",
910 | " 49529 | \n",
911 | " John Carter | \n",
912 | " John Carter is a war-weary, former military ca... | \n",
913 | " [Action, Adventure, Science Fiction] | \n",
914 | " [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... | \n",
915 | " [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... | \n",
916 | " [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... | \n",
917 | " 2012-03-07 | \n",
918 | " 6.1 | \n",
919 | " 2012 | \n",
920 | "
\n",
921 | " \n",
922 | "
\n",
923 | "
"
924 | ],
925 | "text/plain": [
926 | " movie_id title \\\n",
927 | "0 19995 Avatar \n",
928 | "1 285 Pirates of the Caribbean: At World's End \n",
929 | "2 206647 Spectre \n",
930 | "3 49026 The Dark Knight Rises \n",
931 | "4 49529 John Carter \n",
932 | "\n",
933 | " overview \\\n",
934 | "0 In the 22nd century, a paraplegic Marine is di... \n",
935 | "1 Captain Barbossa, long believed to be dead, ha... \n",
936 | "2 A cryptic message from Bond’s past sends him o... \n",
937 | "3 Following the death of District Attorney Harve... \n",
938 | "4 John Carter is a war-weary, former military ca... \n",
939 | "\n",
940 | " genres \\\n",
941 | "0 [Action, Adventure, Fantasy, Science Fiction] \n",
942 | "1 [Adventure, Fantasy, Action] \n",
943 | "2 [Action, Adventure, Crime] \n",
944 | "3 [Action, Crime, Drama, Thriller] \n",
945 | "4 [Action, Adventure, Science Fiction] \n",
946 | "\n",
947 | " keywords \\\n",
948 | "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n",
949 | "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n",
950 | "2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n",
951 | "3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n",
952 | "4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n",
953 | "\n",
954 | " cast \\\n",
955 | "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
956 | "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n",
957 | "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n",
958 | "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n",
959 | "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n",
960 | "\n",
961 | " crew release_date \\\n",
962 | "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... 2009-12-10 \n",
963 | "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... 2007-05-19 \n",
964 | "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... 2015-10-26 \n",
965 | "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... 2012-07-16 \n",
966 | "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... 2012-03-07 \n",
967 | "\n",
968 | " vote_average year \n",
969 | "0 7.2 2009 \n",
970 | "1 6.9 2007 \n",
971 | "2 6.3 2015 \n",
972 | "3 7.6 2012 \n",
973 | "4 6.1 2012 "
974 | ]
975 | },
976 | "execution_count": 22,
977 | "metadata": {},
978 | "output_type": "execute_result"
979 | }
980 | ],
981 | "source": [
982 | "movies.head()"
983 | ]
984 | },
985 | {
986 | "cell_type": "code",
987 | "execution_count": 23,
988 | "metadata": {},
989 | "outputs": [
990 | {
991 | "data": {
992 | "text/plain": [
993 | "'[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \"space war\"}, {\"id\": 3388, \"name\": \"space colony\"}, {\"id\": 3679, \"name\": \"society\"}, {\"id\": 3801, \"name\": \"space travel\"}, {\"id\": 9685, \"name\": \"futuristic\"}, {\"id\": 9840, \"name\": \"romance\"}, {\"id\": 9882, \"name\": \"space\"}, {\"id\": 9951, \"name\": \"alien\"}, {\"id\": 10148, \"name\": \"tribe\"}, {\"id\": 10158, \"name\": \"alien planet\"}, {\"id\": 10987, \"name\": \"cgi\"}, {\"id\": 11399, \"name\": \"marine\"}, {\"id\": 13065, \"name\": \"soldier\"}, {\"id\": 14643, \"name\": \"battle\"}, {\"id\": 14720, \"name\": \"love affair\"}, {\"id\": 165431, \"name\": \"anti war\"}, {\"id\": 193554, \"name\": \"power relations\"}, {\"id\": 206690, \"name\": \"mind and soul\"}, {\"id\": 209714, \"name\": \"3d\"}]'"
994 | ]
995 | },
996 | "execution_count": 23,
997 | "metadata": {},
998 | "output_type": "execute_result"
999 | }
1000 | ],
1001 | "source": [
1002 | "# handle keywords\n",
1003 | "movies.iloc[0]['keywords']"
1004 | ]
1005 | },
1006 | {
1007 | "cell_type": "code",
1008 | "execution_count": 24,
1009 | "metadata": {},
1010 | "outputs": [
1011 | {
1012 | "data": {
1013 | "text/html": [
1014 | "\n",
1015 | "\n",
1028 | "
\n",
1029 | " \n",
1030 | " \n",
1031 | " | \n",
1032 | " movie_id | \n",
1033 | " title | \n",
1034 | " overview | \n",
1035 | " genres | \n",
1036 | " keywords | \n",
1037 | " cast | \n",
1038 | " crew | \n",
1039 | " release_date | \n",
1040 | " vote_average | \n",
1041 | " year | \n",
1042 | "
\n",
1043 | " \n",
1044 | " \n",
1045 | " \n",
1046 | " | 0 | \n",
1047 | " 19995 | \n",
1048 | " Avatar | \n",
1049 | " In the 22nd century, a paraplegic Marine is di... | \n",
1050 | " [Action, Adventure, Fantasy, Science Fiction] | \n",
1051 | " [culture clash, future, space war, space colon... | \n",
1052 | " [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... | \n",
1053 | " [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... | \n",
1054 | " 2009-12-10 | \n",
1055 | " 7.2 | \n",
1056 | " 2009 | \n",
1057 | "
\n",
1058 | " \n",
1059 | " | 1 | \n",
1060 | " 285 | \n",
1061 | " Pirates of the Caribbean: At World's End | \n",
1062 | " Captain Barbossa, long believed to be dead, ha... | \n",
1063 | " [Adventure, Fantasy, Action] | \n",
1064 | " [ocean, drug abuse, exotic island, east india ... | \n",
1065 | " [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... | \n",
1066 | " [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... | \n",
1067 | " 2007-05-19 | \n",
1068 | " 6.9 | \n",
1069 | " 2007 | \n",
1070 | "
\n",
1071 | " \n",
1072 | " | 2 | \n",
1073 | " 206647 | \n",
1074 | " Spectre | \n",
1075 | " A cryptic message from Bond’s past sends him o... | \n",
1076 | " [Action, Adventure, Crime] | \n",
1077 | " [spy, based on novel, secret agent, sequel, mi... | \n",
1078 | " [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... | \n",
1079 | " [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... | \n",
1080 | " 2015-10-26 | \n",
1081 | " 6.3 | \n",
1082 | " 2015 | \n",
1083 | "
\n",
1084 | " \n",
1085 | " | 3 | \n",
1086 | " 49026 | \n",
1087 | " The Dark Knight Rises | \n",
1088 | " Following the death of District Attorney Harve... | \n",
1089 | " [Action, Crime, Drama, Thriller] | \n",
1090 | " [dc comics, crime fighter, terrorist, secret i... | \n",
1091 | " [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... | \n",
1092 | " [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... | \n",
1093 | " 2012-07-16 | \n",
1094 | " 7.6 | \n",
1095 | " 2012 | \n",
1096 | "
\n",
1097 | " \n",
1098 | " | 4 | \n",
1099 | " 49529 | \n",
1100 | " John Carter | \n",
1101 | " John Carter is a war-weary, former military ca... | \n",
1102 | " [Action, Adventure, Science Fiction] | \n",
1103 | " [based on novel, mars, medallion, space travel... | \n",
1104 | " [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... | \n",
1105 | " [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... | \n",
1106 | " 2012-03-07 | \n",
1107 | " 6.1 | \n",
1108 | " 2012 | \n",
1109 | "
\n",
1110 | " \n",
1111 | "
\n",
1112 | "
"
1113 | ],
1114 | "text/plain": [
1115 | " movie_id title \\\n",
1116 | "0 19995 Avatar \n",
1117 | "1 285 Pirates of the Caribbean: At World's End \n",
1118 | "2 206647 Spectre \n",
1119 | "3 49026 The Dark Knight Rises \n",
1120 | "4 49529 John Carter \n",
1121 | "\n",
1122 | " overview \\\n",
1123 | "0 In the 22nd century, a paraplegic Marine is di... \n",
1124 | "1 Captain Barbossa, long believed to be dead, ha... \n",
1125 | "2 A cryptic message from Bond’s past sends him o... \n",
1126 | "3 Following the death of District Attorney Harve... \n",
1127 | "4 John Carter is a war-weary, former military ca... \n",
1128 | "\n",
1129 | " genres \\\n",
1130 | "0 [Action, Adventure, Fantasy, Science Fiction] \n",
1131 | "1 [Adventure, Fantasy, Action] \n",
1132 | "2 [Action, Adventure, Crime] \n",
1133 | "3 [Action, Crime, Drama, Thriller] \n",
1134 | "4 [Action, Adventure, Science Fiction] \n",
1135 | "\n",
1136 | " keywords \\\n",
1137 | "0 [culture clash, future, space war, space colon... \n",
1138 | "1 [ocean, drug abuse, exotic island, east india ... \n",
1139 | "2 [spy, based on novel, secret agent, sequel, mi... \n",
1140 | "3 [dc comics, crime fighter, terrorist, secret i... \n",
1141 | "4 [based on novel, mars, medallion, space travel... \n",
1142 | "\n",
1143 | " cast \\\n",
1144 | "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
1145 | "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n",
1146 | "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n",
1147 | "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n",
1148 | "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n",
1149 | "\n",
1150 | " crew release_date \\\n",
1151 | "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... 2009-12-10 \n",
1152 | "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... 2007-05-19 \n",
1153 | "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... 2015-10-26 \n",
1154 | "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... 2012-07-16 \n",
1155 | "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... 2012-03-07 \n",
1156 | "\n",
1157 | " vote_average year \n",
1158 | "0 7.2 2009 \n",
1159 | "1 6.9 2007 \n",
1160 | "2 6.3 2015 \n",
1161 | "3 7.6 2012 \n",
1162 | "4 6.1 2012 "
1163 | ]
1164 | },
1165 | "execution_count": 24,
1166 | "metadata": {},
1167 | "output_type": "execute_result"
1168 | }
1169 | ],
1170 | "source": [
1171 | "movies['keywords'] = movies['keywords'].apply(convert)\n",
1172 | "movies.head()"
1173 | ]
1174 | },
1175 | {
1176 | "cell_type": "code",
1177 | "execution_count": 25,
1178 | "metadata": {},
1179 | "outputs": [
1180 | {
1181 | "data": {
1182 | "text/plain": [
1183 | "'[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"credit_id\": \"5602a8a7c3a3685532001c9a\", \"gender\": 2, \"id\": 65731, \"name\": \"Sam Worthington\", \"order\": 0}, {\"cast_id\": 3, \"character\": \"Neytiri\", \"credit_id\": \"52fe48009251416c750ac9cb\", \"gender\": 1, \"id\": 8691, \"name\": \"Zoe Saldana\", \"order\": 1}, {\"cast_id\": 25, \"character\": \"Dr. Grace Augustine\", \"credit_id\": \"52fe48009251416c750aca39\", \"gender\": 1, \"id\": 10205, \"name\": \"Sigourney Weaver\", \"order\": 2}, {\"cast_id\": 4, \"character\": \"Col. Quaritch\", \"credit_id\": \"52fe48009251416c750ac9cf\", \"gender\": 2, \"id\": 32747, \"name\": \"Stephen Lang\", \"order\": 3}, {\"cast_id\": 5, \"character\": \"Trudy Chacon\", \"credit_id\": \"52fe48009251416c750ac9d3\", \"gender\": 1, \"id\": 17647, \"name\": \"Michelle Rodriguez\", \"order\": 4}, {\"cast_id\": 8, \"character\": \"Selfridge\", \"credit_id\": \"52fe48009251416c750ac9e1\", \"gender\": 2, \"id\": 1771, \"name\": \"Giovanni Ribisi\", \"order\": 5}, {\"cast_id\": 7, \"character\": \"Norm Spellman\", \"credit_id\": \"52fe48009251416c750ac9dd\", \"gender\": 2, \"id\": 59231, \"name\": \"Joel David Moore\", \"order\": 6}, {\"cast_id\": 9, \"character\": \"Moat\", \"credit_id\": \"52fe48009251416c750ac9e5\", \"gender\": 1, \"id\": 30485, \"name\": \"CCH Pounder\", \"order\": 7}, {\"cast_id\": 11, \"character\": \"Eytukan\", \"credit_id\": \"52fe48009251416c750ac9ed\", \"gender\": 2, \"id\": 15853, \"name\": \"Wes Studi\", \"order\": 8}, {\"cast_id\": 10, \"character\": \"Tsu\\'Tey\", \"credit_id\": \"52fe48009251416c750ac9e9\", \"gender\": 2, \"id\": 10964, \"name\": \"Laz Alonso\", \"order\": 9}, {\"cast_id\": 12, \"character\": \"Dr. Max Patel\", \"credit_id\": \"52fe48009251416c750ac9f1\", \"gender\": 2, \"id\": 95697, \"name\": \"Dileep Rao\", \"order\": 10}, {\"cast_id\": 13, \"character\": \"Lyle Wainfleet\", \"credit_id\": \"52fe48009251416c750ac9f5\", \"gender\": 2, \"id\": 98215, \"name\": \"Matt Gerald\", \"order\": 11}, {\"cast_id\": 32, \"character\": \"Private Fike\", \"credit_id\": \"52fe48009251416c750aca5b\", \"gender\": 2, \"id\": 154153, \"name\": \"Sean Anthony Moran\", \"order\": 12}, {\"cast_id\": 33, \"character\": \"Cryo Vault Med Tech\", \"credit_id\": \"52fe48009251416c750aca5f\", \"gender\": 2, \"id\": 397312, \"name\": \"Jason Whyte\", \"order\": 13}, {\"cast_id\": 34, \"character\": \"Venture Star Crew Chief\", \"credit_id\": \"52fe48009251416c750aca63\", \"gender\": 2, \"id\": 42317, \"name\": \"Scott Lawrence\", \"order\": 14}, {\"cast_id\": 35, \"character\": \"Lock Up Trooper\", \"credit_id\": \"52fe48009251416c750aca67\", \"gender\": 2, \"id\": 986734, \"name\": \"Kelly Kilgour\", \"order\": 15}, {\"cast_id\": 36, \"character\": \"Shuttle Pilot\", \"credit_id\": \"52fe48009251416c750aca6b\", \"gender\": 0, \"id\": 1207227, \"name\": \"James Patrick Pitt\", \"order\": 16}, {\"cast_id\": 37, \"character\": \"Shuttle Co-Pilot\", \"credit_id\": \"52fe48009251416c750aca6f\", \"gender\": 0, \"id\": 1180936, \"name\": \"Sean Patrick Murphy\", \"order\": 17}, {\"cast_id\": 38, \"character\": \"Shuttle Crew Chief\", \"credit_id\": \"52fe48009251416c750aca73\", \"gender\": 2, \"id\": 1019578, \"name\": \"Peter Dillon\", \"order\": 18}, {\"cast_id\": 39, \"character\": \"Tractor Operator / Troupe\", \"credit_id\": \"52fe48009251416c750aca77\", \"gender\": 0, \"id\": 91443, \"name\": \"Kevin Dorman\", \"order\": 19}, {\"cast_id\": 40, \"character\": \"Dragon Gunship Pilot\", \"credit_id\": \"52fe48009251416c750aca7b\", \"gender\": 2, \"id\": 173391, \"name\": \"Kelson Henderson\", \"order\": 20}, {\"cast_id\": 41, \"character\": \"Dragon Gunship Gunner\", \"credit_id\": \"52fe48009251416c750aca7f\", \"gender\": 0, \"id\": 1207236, \"name\": \"David Van Horn\", \"order\": 21}, {\"cast_id\": 42, \"character\": \"Dragon Gunship Navigator\", \"credit_id\": \"52fe48009251416c750aca83\", \"gender\": 0, \"id\": 215913, \"name\": \"Jacob Tomuri\", \"order\": 22}, {\"cast_id\": 43, \"character\": \"Suit #1\", \"credit_id\": \"52fe48009251416c750aca87\", \"gender\": 0, \"id\": 143206, \"name\": \"Michael Blain-Rozgay\", \"order\": 23}, {\"cast_id\": 44, \"character\": \"Suit #2\", \"credit_id\": \"52fe48009251416c750aca8b\", \"gender\": 2, \"id\": 169676, \"name\": \"Jon Curry\", \"order\": 24}, {\"cast_id\": 46, \"character\": \"Ambient Room Tech\", \"credit_id\": \"52fe48009251416c750aca8f\", \"gender\": 0, \"id\": 1048610, \"name\": \"Luke Hawker\", \"order\": 25}, {\"cast_id\": 47, \"character\": \"Ambient Room Tech / Troupe\", \"credit_id\": \"52fe48009251416c750aca93\", \"gender\": 0, \"id\": 42288, \"name\": \"Woody Schultz\", \"order\": 26}, {\"cast_id\": 48, \"character\": \"Horse Clan Leader\", \"credit_id\": \"52fe48009251416c750aca97\", \"gender\": 2, \"id\": 68278, \"name\": \"Peter Mensah\", \"order\": 27}, {\"cast_id\": 49, \"character\": \"Link Room Tech\", \"credit_id\": \"52fe48009251416c750aca9b\", \"gender\": 0, \"id\": 1207247, \"name\": \"Sonia Yee\", \"order\": 28}, {\"cast_id\": 50, \"character\": \"Basketball Avatar / Troupe\", \"credit_id\": \"52fe48009251416c750aca9f\", \"gender\": 1, \"id\": 1207248, \"name\": \"Jahnel Curfman\", \"order\": 29}, {\"cast_id\": 51, \"character\": \"Basketball Avatar\", \"credit_id\": \"52fe48009251416c750acaa3\", \"gender\": 0, \"id\": 89714, \"name\": \"Ilram Choi\", \"order\": 30}, {\"cast_id\": 52, \"character\": \"Na\\'vi Child\", \"credit_id\": \"52fe48009251416c750acaa7\", \"gender\": 0, \"id\": 1207249, \"name\": \"Kyla Warren\", \"order\": 31}, {\"cast_id\": 53, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acaab\", \"gender\": 0, \"id\": 1207250, \"name\": \"Lisa Roumain\", \"order\": 32}, {\"cast_id\": 54, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acaaf\", \"gender\": 1, \"id\": 83105, \"name\": \"Debra Wilson\", \"order\": 33}, {\"cast_id\": 57, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acabb\", \"gender\": 0, \"id\": 1207253, \"name\": \"Chris Mala\", \"order\": 34}, {\"cast_id\": 55, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acab3\", \"gender\": 0, \"id\": 1207251, \"name\": \"Taylor Kibby\", \"order\": 35}, {\"cast_id\": 56, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acab7\", \"gender\": 0, \"id\": 1207252, \"name\": \"Jodie Landau\", \"order\": 36}, {\"cast_id\": 58, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acabf\", \"gender\": 0, \"id\": 1207254, \"name\": \"Julie Lamm\", \"order\": 37}, {\"cast_id\": 59, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acac3\", \"gender\": 0, \"id\": 1207257, \"name\": \"Cullen B. Madden\", \"order\": 38}, {\"cast_id\": 60, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acac7\", \"gender\": 0, \"id\": 1207259, \"name\": \"Joseph Brady Madden\", \"order\": 39}, {\"cast_id\": 61, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acacb\", \"gender\": 0, \"id\": 1207262, \"name\": \"Frankie Torres\", \"order\": 40}, {\"cast_id\": 62, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acacf\", \"gender\": 1, \"id\": 1158600, \"name\": \"Austin Wilson\", \"order\": 41}, {\"cast_id\": 63, \"character\": \"Troupe\", \"credit_id\": \"52fe48019251416c750acad3\", \"gender\": 1, \"id\": 983705, \"name\": \"Sara Wilson\", \"order\": 42}, {\"cast_id\": 64, \"character\": \"Troupe\", \"credit_id\": \"52fe48019251416c750acad7\", \"gender\": 0, \"id\": 1207263, \"name\": \"Tamica Washington-Miller\", \"order\": 43}, {\"cast_id\": 65, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acadb\", \"gender\": 1, \"id\": 1145098, \"name\": \"Lucy Briant\", \"order\": 44}, {\"cast_id\": 66, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acadf\", \"gender\": 2, \"id\": 33305, \"name\": \"Nathan Meister\", \"order\": 45}, {\"cast_id\": 67, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acae3\", \"gender\": 0, \"id\": 1207264, \"name\": \"Gerry Blair\", \"order\": 46}, {\"cast_id\": 68, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acae7\", \"gender\": 2, \"id\": 33311, \"name\": \"Matthew Chamberlain\", \"order\": 47}, {\"cast_id\": 69, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acaeb\", \"gender\": 0, \"id\": 1207265, \"name\": \"Paul Yates\", \"order\": 48}, {\"cast_id\": 70, \"character\": \"Op Center Duty Officer\", \"credit_id\": \"52fe48019251416c750acaef\", \"gender\": 0, \"id\": 1207266, \"name\": \"Wray Wilson\", \"order\": 49}, {\"cast_id\": 71, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acaf3\", \"gender\": 2, \"id\": 54492, \"name\": \"James Gaylyn\", \"order\": 50}, {\"cast_id\": 72, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acaf7\", \"gender\": 0, \"id\": 1207267, \"name\": \"Melvin Leno Clark III\", \"order\": 51}, {\"cast_id\": 73, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acafb\", \"gender\": 0, \"id\": 1207268, \"name\": \"Carvon Futrell\", \"order\": 52}, {\"cast_id\": 74, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acaff\", \"gender\": 0, \"id\": 1207269, \"name\": \"Brandon Jelkes\", \"order\": 53}, {\"cast_id\": 75, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb03\", \"gender\": 0, \"id\": 1207270, \"name\": \"Micah Moch\", \"order\": 54}, {\"cast_id\": 76, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb07\", \"gender\": 0, \"id\": 1207271, \"name\": \"Hanniyah Muhammad\", \"order\": 55}, {\"cast_id\": 77, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb0b\", \"gender\": 0, \"id\": 1207272, \"name\": \"Christopher Nolen\", \"order\": 56}, {\"cast_id\": 78, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb0f\", \"gender\": 0, \"id\": 1207273, \"name\": \"Christa Oliver\", \"order\": 57}, {\"cast_id\": 79, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb13\", \"gender\": 0, \"id\": 1207274, \"name\": \"April Marie Thomas\", \"order\": 58}, {\"cast_id\": 80, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb17\", \"gender\": 0, \"id\": 1207275, \"name\": \"Bravita A. Threatt\", \"order\": 59}, {\"cast_id\": 81, \"character\": \"Mining Chief (uncredited)\", \"credit_id\": \"52fe48019251416c750acb1b\", \"gender\": 0, \"id\": 1207276, \"name\": \"Colin Bleasdale\", \"order\": 60}, {\"cast_id\": 82, \"character\": \"Veteran Miner (uncredited)\", \"credit_id\": \"52fe48019251416c750acb1f\", \"gender\": 0, \"id\": 107969, \"name\": \"Mike Bodnar\", \"order\": 61}, {\"cast_id\": 83, \"character\": \"Richard (uncredited)\", \"credit_id\": \"52fe48019251416c750acb23\", \"gender\": 0, \"id\": 1207278, \"name\": \"Matt Clayton\", \"order\": 62}, {\"cast_id\": 84, \"character\": \"Nav\\'i (uncredited)\", \"credit_id\": \"52fe48019251416c750acb27\", \"gender\": 1, \"id\": 147898, \"name\": \"Nicole Dionne\", \"order\": 63}, {\"cast_id\": 85, \"character\": \"Trooper (uncredited)\", \"credit_id\": \"52fe48019251416c750acb2b\", \"gender\": 0, \"id\": 1207280, \"name\": \"Jamie Harrison\", \"order\": 64}, {\"cast_id\": 86, \"character\": \"Trooper (uncredited)\", \"credit_id\": \"52fe48019251416c750acb2f\", \"gender\": 0, \"id\": 1207281, \"name\": \"Allan Henry\", \"order\": 65}, {\"cast_id\": 87, \"character\": \"Ground Technician (uncredited)\", \"credit_id\": \"52fe48019251416c750acb33\", \"gender\": 2, \"id\": 1207282, \"name\": \"Anthony Ingruber\", \"order\": 66}, {\"cast_id\": 88, \"character\": \"Flight Crew Mechanic (uncredited)\", \"credit_id\": \"52fe48019251416c750acb37\", \"gender\": 0, \"id\": 1207283, \"name\": \"Ashley Jeffery\", \"order\": 67}, {\"cast_id\": 14, \"character\": \"Samson Pilot\", \"credit_id\": \"52fe48009251416c750ac9f9\", \"gender\": 0, \"id\": 98216, \"name\": \"Dean Knowsley\", \"order\": 68}, {\"cast_id\": 89, \"character\": \"Trooper (uncredited)\", \"credit_id\": \"52fe48019251416c750acb3b\", \"gender\": 0, \"id\": 1201399, \"name\": \"Joseph Mika-Hunt\", \"order\": 69}, {\"cast_id\": 90, \"character\": \"Banshee (uncredited)\", \"credit_id\": \"52fe48019251416c750acb3f\", \"gender\": 0, \"id\": 236696, \"name\": \"Terry Notary\", \"order\": 70}, {\"cast_id\": 91, \"character\": \"Soldier (uncredited)\", \"credit_id\": \"52fe48019251416c750acb43\", \"gender\": 0, \"id\": 1207287, \"name\": \"Kai Pantano\", \"order\": 71}, {\"cast_id\": 92, \"character\": \"Blast Technician (uncredited)\", \"credit_id\": \"52fe48019251416c750acb47\", \"gender\": 0, \"id\": 1207288, \"name\": \"Logan Pithyou\", \"order\": 72}, {\"cast_id\": 93, \"character\": \"Vindum Raah (uncredited)\", \"credit_id\": \"52fe48019251416c750acb4b\", \"gender\": 0, \"id\": 1207289, \"name\": \"Stuart Pollock\", \"order\": 73}, {\"cast_id\": 94, \"character\": \"Hero (uncredited)\", \"credit_id\": \"52fe48019251416c750acb4f\", \"gender\": 0, \"id\": 584868, \"name\": \"Raja\", \"order\": 74}, {\"cast_id\": 95, \"character\": \"Ops Centreworker (uncredited)\", \"credit_id\": \"52fe48019251416c750acb53\", \"gender\": 0, \"id\": 1207290, \"name\": \"Gareth Ruck\", \"order\": 75}, {\"cast_id\": 96, \"character\": \"Engineer (uncredited)\", \"credit_id\": \"52fe48019251416c750acb57\", \"gender\": 0, \"id\": 1062463, \"name\": \"Rhian Sheehan\", \"order\": 76}, {\"cast_id\": 97, \"character\": \"Col. Quaritch\\'s Mech Suit (uncredited)\", \"credit_id\": \"52fe48019251416c750acb5b\", \"gender\": 0, \"id\": 60656, \"name\": \"T. J. Storm\", \"order\": 77}, {\"cast_id\": 98, \"character\": \"Female Marine (uncredited)\", \"credit_id\": \"52fe48019251416c750acb5f\", \"gender\": 0, \"id\": 1207291, \"name\": \"Jodie Taylor\", \"order\": 78}, {\"cast_id\": 99, \"character\": \"Ikran Clan Leader (uncredited)\", \"credit_id\": \"52fe48019251416c750acb63\", \"gender\": 1, \"id\": 1186027, \"name\": \"Alicia Vela-Bailey\", \"order\": 79}, {\"cast_id\": 100, \"character\": \"Geologist (uncredited)\", \"credit_id\": \"52fe48019251416c750acb67\", \"gender\": 0, \"id\": 1207292, \"name\": \"Richard Whiteside\", \"order\": 80}, {\"cast_id\": 101, \"character\": \"Na\\'vi (uncredited)\", \"credit_id\": \"52fe48019251416c750acb6b\", \"gender\": 0, \"id\": 103259, \"name\": \"Nikie Zambo\", \"order\": 81}, {\"cast_id\": 102, \"character\": \"Ambient Room Tech / Troupe\", \"credit_id\": \"52fe48019251416c750acb6f\", \"gender\": 1, \"id\": 42286, \"name\": \"Julene Renee\", \"order\": 82}]'"
1184 | ]
1185 | },
1186 | "execution_count": 25,
1187 | "metadata": {},
1188 | "output_type": "execute_result"
1189 | }
1190 | ],
1191 | "source": [
1192 | "# handle cast\n",
1193 | "movies.iloc[0]['cast']"
1194 | ]
1195 | },
1196 | {
1197 | "cell_type": "code",
1198 | "execution_count": 26,
1199 | "metadata": {},
1200 | "outputs": [],
1201 | "source": [
1202 | "# Here i am just keeping top 3 cast\n",
1203 | "\n",
1204 | "def convert_cast(text):\n",
1205 | " L = []\n",
1206 | " counter = 0\n",
1207 | " for i in ast.literal_eval(text):\n",
1208 | " if counter < 3:\n",
1209 | " L.append(i['name'])\n",
1210 | " counter+=1\n",
1211 | " return L"
1212 | ]
1213 | },
1214 | {
1215 | "cell_type": "code",
1216 | "execution_count": 27,
1217 | "metadata": {},
1218 | "outputs": [
1219 | {
1220 | "data": {
1221 | "text/html": [
1222 | "\n",
1223 | "\n",
1236 | "
\n",
1237 | " \n",
1238 | " \n",
1239 | " | \n",
1240 | " movie_id | \n",
1241 | " title | \n",
1242 | " overview | \n",
1243 | " genres | \n",
1244 | " keywords | \n",
1245 | " cast | \n",
1246 | " crew | \n",
1247 | " release_date | \n",
1248 | " vote_average | \n",
1249 | " year | \n",
1250 | "
\n",
1251 | " \n",
1252 | " \n",
1253 | " \n",
1254 | " | 0 | \n",
1255 | " 19995 | \n",
1256 | " Avatar | \n",
1257 | " In the 22nd century, a paraplegic Marine is di... | \n",
1258 | " [Action, Adventure, Fantasy, Science Fiction] | \n",
1259 | " [culture clash, future, space war, space colon... | \n",
1260 | " [Sam Worthington, Zoe Saldana, Sigourney Weaver] | \n",
1261 | " [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... | \n",
1262 | " 2009-12-10 | \n",
1263 | " 7.2 | \n",
1264 | " 2009 | \n",
1265 | "
\n",
1266 | " \n",
1267 | " | 1 | \n",
1268 | " 285 | \n",
1269 | " Pirates of the Caribbean: At World's End | \n",
1270 | " Captain Barbossa, long believed to be dead, ha... | \n",
1271 | " [Adventure, Fantasy, Action] | \n",
1272 | " [ocean, drug abuse, exotic island, east india ... | \n",
1273 | " [Johnny Depp, Orlando Bloom, Keira Knightley] | \n",
1274 | " [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... | \n",
1275 | " 2007-05-19 | \n",
1276 | " 6.9 | \n",
1277 | " 2007 | \n",
1278 | "
\n",
1279 | " \n",
1280 | " | 2 | \n",
1281 | " 206647 | \n",
1282 | " Spectre | \n",
1283 | " A cryptic message from Bond’s past sends him o... | \n",
1284 | " [Action, Adventure, Crime] | \n",
1285 | " [spy, based on novel, secret agent, sequel, mi... | \n",
1286 | " [Daniel Craig, Christoph Waltz, Léa Seydoux] | \n",
1287 | " [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... | \n",
1288 | " 2015-10-26 | \n",
1289 | " 6.3 | \n",
1290 | " 2015 | \n",
1291 | "
\n",
1292 | " \n",
1293 | " | 3 | \n",
1294 | " 49026 | \n",
1295 | " The Dark Knight Rises | \n",
1296 | " Following the death of District Attorney Harve... | \n",
1297 | " [Action, Crime, Drama, Thriller] | \n",
1298 | " [dc comics, crime fighter, terrorist, secret i... | \n",
1299 | " [Christian Bale, Michael Caine, Gary Oldman] | \n",
1300 | " [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... | \n",
1301 | " 2012-07-16 | \n",
1302 | " 7.6 | \n",
1303 | " 2012 | \n",
1304 | "
\n",
1305 | " \n",
1306 | " | 4 | \n",
1307 | " 49529 | \n",
1308 | " John Carter | \n",
1309 | " John Carter is a war-weary, former military ca... | \n",
1310 | " [Action, Adventure, Science Fiction] | \n",
1311 | " [based on novel, mars, medallion, space travel... | \n",
1312 | " [Taylor Kitsch, Lynn Collins, Samantha Morton] | \n",
1313 | " [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... | \n",
1314 | " 2012-03-07 | \n",
1315 | " 6.1 | \n",
1316 | " 2012 | \n",
1317 | "
\n",
1318 | " \n",
1319 | "
\n",
1320 | "
"
1321 | ],
1322 | "text/plain": [
1323 | " movie_id title \\\n",
1324 | "0 19995 Avatar \n",
1325 | "1 285 Pirates of the Caribbean: At World's End \n",
1326 | "2 206647 Spectre \n",
1327 | "3 49026 The Dark Knight Rises \n",
1328 | "4 49529 John Carter \n",
1329 | "\n",
1330 | " overview \\\n",
1331 | "0 In the 22nd century, a paraplegic Marine is di... \n",
1332 | "1 Captain Barbossa, long believed to be dead, ha... \n",
1333 | "2 A cryptic message from Bond’s past sends him o... \n",
1334 | "3 Following the death of District Attorney Harve... \n",
1335 | "4 John Carter is a war-weary, former military ca... \n",
1336 | "\n",
1337 | " genres \\\n",
1338 | "0 [Action, Adventure, Fantasy, Science Fiction] \n",
1339 | "1 [Adventure, Fantasy, Action] \n",
1340 | "2 [Action, Adventure, Crime] \n",
1341 | "3 [Action, Crime, Drama, Thriller] \n",
1342 | "4 [Action, Adventure, Science Fiction] \n",
1343 | "\n",
1344 | " keywords \\\n",
1345 | "0 [culture clash, future, space war, space colon... \n",
1346 | "1 [ocean, drug abuse, exotic island, east india ... \n",
1347 | "2 [spy, based on novel, secret agent, sequel, mi... \n",
1348 | "3 [dc comics, crime fighter, terrorist, secret i... \n",
1349 | "4 [based on novel, mars, medallion, space travel... \n",
1350 | "\n",
1351 | " cast \\\n",
1352 | "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] \n",
1353 | "1 [Johnny Depp, Orlando Bloom, Keira Knightley] \n",
1354 | "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] \n",
1355 | "3 [Christian Bale, Michael Caine, Gary Oldman] \n",
1356 | "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] \n",
1357 | "\n",
1358 | " crew release_date \\\n",
1359 | "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... 2009-12-10 \n",
1360 | "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... 2007-05-19 \n",
1361 | "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... 2015-10-26 \n",
1362 | "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... 2012-07-16 \n",
1363 | "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... 2012-03-07 \n",
1364 | "\n",
1365 | " vote_average year \n",
1366 | "0 7.2 2009 \n",
1367 | "1 6.9 2007 \n",
1368 | "2 6.3 2015 \n",
1369 | "3 7.6 2012 \n",
1370 | "4 6.1 2012 "
1371 | ]
1372 | },
1373 | "execution_count": 27,
1374 | "metadata": {},
1375 | "output_type": "execute_result"
1376 | }
1377 | ],
1378 | "source": [
1379 | "movies['cast'] = movies['cast'].apply(convert_cast)\n",
1380 | "movies.head()"
1381 | ]
1382 | },
1383 | {
1384 | "cell_type": "code",
1385 | "execution_count": 28,
1386 | "metadata": {},
1387 | "outputs": [
1388 | {
1389 | "data": {
1390 | "text/plain": [
1391 | "'[{\"credit_id\": \"52fe48009251416c750aca23\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1721, \"job\": \"Editor\", \"name\": \"Stephen E. Rivkin\"}, {\"credit_id\": \"539c47ecc3a36810e3001f87\", \"department\": \"Art\", \"gender\": 2, \"id\": 496, \"job\": \"Production Design\", \"name\": \"Rick Carter\"}, {\"credit_id\": \"54491c89c3a3680fb4001cf7\", \"department\": \"Sound\", \"gender\": 0, \"id\": 900, \"job\": \"Sound Designer\", \"name\": \"Christopher Boyes\"}, {\"credit_id\": \"54491cb70e0a267480001bd0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 900, \"job\": \"Supervising Sound Editor\", \"name\": \"Christopher Boyes\"}, {\"credit_id\": \"539c4a4cc3a36810c9002101\", \"department\": \"Production\", \"gender\": 1, \"id\": 1262, \"job\": \"Casting\", \"name\": \"Mali Finn\"}, {\"credit_id\": \"5544ee3b925141499f0008fc\", \"department\": \"Sound\", \"gender\": 2, \"id\": 1729, \"job\": \"Original Music Composer\", \"name\": \"James Horner\"}, {\"credit_id\": \"52fe48009251416c750ac9c3\", \"department\": \"Directing\", \"gender\": 2, \"id\": 2710, \"job\": \"Director\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750ac9d9\", \"department\": \"Writing\", \"gender\": 2, \"id\": 2710, \"job\": \"Writer\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750aca17\", \"department\": \"Editing\", \"gender\": 2, \"id\": 2710, \"job\": \"Editor\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750aca29\", \"department\": \"Production\", \"gender\": 2, \"id\": 2710, \"job\": \"Producer\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750aca3f\", \"department\": \"Writing\", \"gender\": 2, \"id\": 2710, \"job\": \"Screenplay\", \"name\": \"James Cameron\"}, {\"credit_id\": \"539c4987c3a36810ba0021a4\", \"department\": \"Art\", \"gender\": 2, \"id\": 7236, \"job\": \"Art Direction\", \"name\": \"Andrew Menzies\"}, {\"credit_id\": \"549598c3c3a3686ae9004383\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 6690, \"job\": \"Visual Effects Producer\", \"name\": \"Jill Brooks\"}, {\"credit_id\": \"52fe48009251416c750aca4b\", \"department\": \"Production\", \"gender\": 1, \"id\": 6347, \"job\": \"Casting\", \"name\": \"Margery Simkin\"}, {\"credit_id\": \"570b6f419251417da70032fe\", \"department\": \"Art\", \"gender\": 2, \"id\": 6878, \"job\": \"Supervising Art Director\", \"name\": \"Kevin Ishioka\"}, {\"credit_id\": \"5495a0fac3a3686ae9004468\", \"department\": \"Sound\", \"gender\": 0, \"id\": 6883, \"job\": \"Music Editor\", \"name\": \"Dick Bernstein\"}, {\"credit_id\": \"54959706c3a3686af3003e81\", \"department\": \"Sound\", \"gender\": 0, \"id\": 8159, \"job\": \"Sound Effects Editor\", \"name\": \"Shannon Mills\"}, {\"credit_id\": \"54491d58c3a3680fb1001ccb\", \"department\": \"Sound\", \"gender\": 0, \"id\": 8160, \"job\": \"Foley\", \"name\": \"Dennie Thorpe\"}, {\"credit_id\": \"54491d6cc3a3680fa5001b2c\", \"department\": \"Sound\", \"gender\": 0, \"id\": 8163, \"job\": \"Foley\", \"name\": \"Jana Vance\"}, {\"credit_id\": \"52fe48009251416c750aca57\", \"department\": \"Costume & Make-Up\", \"gender\": 1, \"id\": 8527, \"job\": \"Costume Design\", \"name\": \"Deborah Lynn Scott\"}, {\"credit_id\": \"52fe48009251416c750aca2f\", \"department\": \"Production\", \"gender\": 2, \"id\": 8529, \"job\": \"Producer\", \"name\": \"Jon Landau\"}, {\"credit_id\": \"539c4937c3a36810ba002194\", \"department\": \"Art\", \"gender\": 0, \"id\": 9618, \"job\": \"Art Direction\", \"name\": \"Sean Haworth\"}, {\"credit_id\": \"539c49b6c3a36810c10020e6\", \"department\": \"Art\", \"gender\": 1, \"id\": 12653, \"job\": \"Set Decoration\", \"name\": \"Kim Sinclair\"}, {\"credit_id\": \"570b6f2f9251413a0e00020d\", \"department\": \"Art\", \"gender\": 1, \"id\": 12653, \"job\": \"Supervising Art Director\", \"name\": \"Kim Sinclair\"}, {\"credit_id\": \"54491a6c0e0a26748c001b19\", \"department\": \"Art\", \"gender\": 2, \"id\": 14350, \"job\": \"Set Designer\", \"name\": \"Richard F. Mays\"}, {\"credit_id\": \"56928cf4c3a3684cff0025c4\", \"department\": \"Production\", \"gender\": 1, \"id\": 20294, \"job\": \"Executive Producer\", \"name\": \"Laeta Kalogridis\"}, {\"credit_id\": \"52fe48009251416c750aca51\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 17675, \"job\": \"Costume Design\", \"name\": \"Mayes C. Rubeo\"}, {\"credit_id\": \"52fe48009251416c750aca11\", \"department\": \"Camera\", \"gender\": 2, \"id\": 18265, \"job\": \"Director of Photography\", \"name\": \"Mauro Fiore\"}, {\"credit_id\": \"5449194d0e0a26748f001b39\", \"department\": \"Art\", \"gender\": 0, \"id\": 42281, \"job\": \"Set Designer\", \"name\": \"Scott Herbertson\"}, {\"credit_id\": \"52fe48009251416c750aca05\", \"department\": \"Crew\", \"gender\": 0, \"id\": 42288, \"job\": \"Stunts\", \"name\": \"Woody Schultz\"}, {\"credit_id\": \"5592aefb92514152de0010f5\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 29067, \"job\": \"Makeup Artist\", \"name\": \"Linda DeVetta\"}, {\"credit_id\": \"5592afa492514152de00112c\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 29067, \"job\": \"Hairstylist\", \"name\": \"Linda DeVetta\"}, {\"credit_id\": \"54959ed592514130fc002e5d\", \"department\": \"Camera\", \"gender\": 2, \"id\": 33302, \"job\": \"Camera Operator\", \"name\": \"Richard Bluck\"}, {\"credit_id\": \"539c4891c3a36810ba002147\", \"department\": \"Art\", \"gender\": 2, \"id\": 33303, \"job\": \"Art Direction\", \"name\": \"Simon Bright\"}, {\"credit_id\": \"54959c069251417a81001f3a\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 113145, \"job\": \"Visual Effects Supervisor\", \"name\": \"Richard Martin\"}, {\"credit_id\": \"54959a0dc3a3680ff5002c8d\", \"department\": \"Crew\", \"gender\": 2, \"id\": 58188, \"job\": \"Visual Effects Editor\", \"name\": \"Steve R. Moore\"}, {\"credit_id\": \"52fe48009251416c750aca1d\", \"department\": \"Editing\", \"gender\": 2, \"id\": 58871, \"job\": \"Editor\", \"name\": \"John Refoua\"}, {\"credit_id\": \"54491a4dc3a3680fc30018ca\", \"department\": \"Art\", \"gender\": 0, \"id\": 92359, \"job\": \"Set Designer\", \"name\": \"Karl J. Martin\"}, {\"credit_id\": \"52fe48009251416c750aca35\", \"department\": \"Camera\", \"gender\": 1, \"id\": 72201, \"job\": \"Director of Photography\", \"name\": \"Chiling Lin\"}, {\"credit_id\": \"52fe48009251416c750ac9ff\", \"department\": \"Crew\", \"gender\": 0, \"id\": 89714, \"job\": \"Stunts\", \"name\": \"Ilram Choi\"}, {\"credit_id\": \"54959c529251416e2b004394\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 93214, \"job\": \"Visual Effects Supervisor\", \"name\": \"Steven Quale\"}, {\"credit_id\": \"54491edf0e0a267489001c37\", \"department\": \"Crew\", \"gender\": 1, \"id\": 122607, \"job\": \"Dialect Coach\", \"name\": \"Carla Meyer\"}, {\"credit_id\": \"539c485bc3a368653d001a3a\", \"department\": \"Art\", \"gender\": 2, \"id\": 132585, \"job\": \"Art Direction\", \"name\": \"Nick Bassett\"}, {\"credit_id\": \"539c4903c3a368653d001a74\", \"department\": \"Art\", \"gender\": 0, \"id\": 132596, \"job\": \"Art Direction\", \"name\": \"Jill Cormack\"}, {\"credit_id\": \"539c4967c3a368653d001a94\", \"department\": \"Art\", \"gender\": 0, \"id\": 132604, \"job\": \"Art Direction\", \"name\": \"Andy McLaren\"}, {\"credit_id\": \"52fe48009251416c750aca45\", \"department\": \"Crew\", \"gender\": 0, \"id\": 236696, \"job\": \"Motion Capture Artist\", \"name\": \"Terry Notary\"}, {\"credit_id\": \"54959e02c3a3680fc60027d2\", \"department\": \"Crew\", \"gender\": 2, \"id\": 956198, \"job\": \"Stunt Coordinator\", \"name\": \"Garrett Warren\"}, {\"credit_id\": \"54959ca3c3a3686ae300438c\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 957874, \"job\": \"Visual Effects Supervisor\", \"name\": \"Jonathan Rothbart\"}, {\"credit_id\": \"570b6f519251412c74001b2f\", \"department\": \"Art\", \"gender\": 0, \"id\": 957889, \"job\": \"Supervising Art Director\", \"name\": \"Stefan Dechant\"}, {\"credit_id\": \"570b6f62c3a3680b77007460\", \"department\": \"Art\", \"gender\": 2, \"id\": 959555, \"job\": \"Supervising Art Director\", \"name\": \"Todd Cherniawsky\"}, {\"credit_id\": \"539c4a3ac3a36810da0021cc\", \"department\": \"Production\", \"gender\": 0, \"id\": 1016177, \"job\": \"Casting\", \"name\": \"Miranda Rivers\"}, {\"credit_id\": \"539c482cc3a36810c1002062\", \"department\": \"Art\", \"gender\": 0, \"id\": 1032536, \"job\": \"Production Design\", \"name\": \"Robert Stromberg\"}, {\"credit_id\": \"539c4b65c3a36810c9002125\", \"department\": \"Costume & Make-Up\", \"gender\": 2, \"id\": 1071680, \"job\": \"Costume Design\", \"name\": \"John Harding\"}, {\"credit_id\": \"54959e6692514130fc002e4e\", \"department\": \"Camera\", \"gender\": 0, \"id\": 1177364, \"job\": \"Steadicam Operator\", \"name\": \"Roberto De Angelis\"}, {\"credit_id\": \"539c49f1c3a368653d001aac\", \"department\": \"Costume & Make-Up\", \"gender\": 2, \"id\": 1202850, \"job\": \"Makeup Department Head\", \"name\": \"Mike Smithson\"}, {\"credit_id\": \"5495999ec3a3686ae100460c\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1204668, \"job\": \"Visual Effects Producer\", \"name\": \"Alain Lalanne\"}, {\"credit_id\": \"54959cdfc3a3681153002729\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1206410, \"job\": \"Visual Effects Supervisor\", \"name\": \"Lucas Salton\"}, {\"credit_id\": \"549596239251417a81001eae\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1234266, \"job\": \"Post Production Supervisor\", \"name\": \"Janace Tashjian\"}, {\"credit_id\": \"54959c859251416e1e003efe\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1271932, \"job\": \"Visual Effects Supervisor\", \"name\": \"Stephen Rosenbaum\"}, {\"credit_id\": \"5592af28c3a368775a00105f\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1310064, \"job\": \"Makeup Artist\", \"name\": \"Frankie Karena\"}, {\"credit_id\": \"539c4adfc3a36810e300203b\", \"department\": \"Costume & Make-Up\", \"gender\": 1, \"id\": 1319844, \"job\": \"Costume Supervisor\", \"name\": \"Lisa Lovaas\"}, {\"credit_id\": \"54959b579251416e2b004371\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1327028, \"job\": \"Visual Effects Supervisor\", \"name\": \"Jonathan Fawkner\"}, {\"credit_id\": \"539c48a7c3a36810b5001fa7\", \"department\": \"Art\", \"gender\": 0, \"id\": 1330561, \"job\": \"Art Direction\", \"name\": \"Robert Bavin\"}, {\"credit_id\": \"539c4a71c3a36810da0021e0\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1330567, \"job\": \"Costume Supervisor\", \"name\": \"Anthony Almaraz\"}, {\"credit_id\": \"539c4a8ac3a36810ba0021e4\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1330570, \"job\": \"Costume Supervisor\", \"name\": \"Carolyn M. Fenton\"}, {\"credit_id\": \"539c4ab6c3a36810da0021f0\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1330574, \"job\": \"Costume Supervisor\", \"name\": \"Beth Koenigsberg\"}, {\"credit_id\": \"54491ab70e0a267480001ba2\", \"department\": \"Art\", \"gender\": 0, \"id\": 1336191, \"job\": \"Set Designer\", \"name\": \"Sam Page\"}, {\"credit_id\": \"544919d9c3a3680fc30018bd\", \"department\": \"Art\", \"gender\": 0, \"id\": 1339441, \"job\": \"Set Designer\", \"name\": \"Tex Kadonaga\"}, {\"credit_id\": \"54491cf50e0a267483001b0c\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1352422, \"job\": \"Dialogue Editor\", \"name\": \"Kim Foscato\"}, {\"credit_id\": \"544919f40e0a26748c001b09\", \"department\": \"Art\", \"gender\": 0, \"id\": 1352962, \"job\": \"Set Designer\", \"name\": \"Tammy S. Lee\"}, {\"credit_id\": \"5495a115c3a3680ff5002d71\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1357070, \"job\": \"Transportation Coordinator\", \"name\": \"Denny Caira\"}, {\"credit_id\": \"5495a12f92514130fc002e94\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1357071, \"job\": \"Transportation Coordinator\", \"name\": \"James Waitkus\"}, {\"credit_id\": \"5495976fc3a36811530026b0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1360103, \"job\": \"Supervising Sound Editor\", \"name\": \"Addison Teague\"}, {\"credit_id\": \"54491837c3a3680fb1001c5a\", \"department\": \"Art\", \"gender\": 2, \"id\": 1376887, \"job\": \"Set Designer\", \"name\": \"C. Scott Baker\"}, {\"credit_id\": \"54491878c3a3680fb4001c9d\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376888, \"job\": \"Set Designer\", \"name\": \"Luke Caska\"}, {\"credit_id\": \"544918dac3a3680fa5001ae0\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376889, \"job\": \"Set Designer\", \"name\": \"David Chow\"}, {\"credit_id\": \"544919110e0a267486001b68\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376890, \"job\": \"Set Designer\", \"name\": \"Jonathan Dyer\"}, {\"credit_id\": \"54491967c3a3680faa001b5e\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376891, \"job\": \"Set Designer\", \"name\": \"Joseph Hiura\"}, {\"credit_id\": \"54491997c3a3680fb1001c8a\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376892, \"job\": \"Art Department Coordinator\", \"name\": \"Rebecca Jellie\"}, {\"credit_id\": \"544919ba0e0a26748f001b42\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376893, \"job\": \"Set Designer\", \"name\": \"Robert Andrew Johnson\"}, {\"credit_id\": \"54491b1dc3a3680faa001b8c\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376895, \"job\": \"Assistant Art Director\", \"name\": \"Mike Stassi\"}, {\"credit_id\": \"54491b79c3a3680fbb001826\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376897, \"job\": \"Construction Coordinator\", \"name\": \"John Villarino\"}, {\"credit_id\": \"54491baec3a3680fb4001ce6\", \"department\": \"Art\", \"gender\": 2, \"id\": 1376898, \"job\": \"Assistant Art Director\", \"name\": \"Jeffrey Wisniewski\"}, {\"credit_id\": \"54491d2fc3a3680fb4001d07\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1376899, \"job\": \"Dialogue Editor\", \"name\": \"Cheryl Nardi\"}, {\"credit_id\": \"54491d86c3a3680fa5001b2f\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1376901, \"job\": \"Dialogue Editor\", \"name\": \"Marshall Winn\"}, {\"credit_id\": \"54491d9dc3a3680faa001bb0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1376902, \"job\": \"Supervising Sound Editor\", \"name\": \"Gwendolyn Yates Whittle\"}, {\"credit_id\": \"54491dc10e0a267486001bce\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1376903, \"job\": \"Sound Re-Recording Mixer\", \"name\": \"William Stein\"}, {\"credit_id\": \"54491f500e0a26747c001c07\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1376909, \"job\": \"Choreographer\", \"name\": \"Lula Washington\"}, {\"credit_id\": \"549599239251412c4e002a2e\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1391692, \"job\": \"Visual Effects Producer\", \"name\": \"Chris Del Conte\"}, {\"credit_id\": \"54959d54c3a36831b8001d9a\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 1391695, \"job\": \"Visual Effects Supervisor\", \"name\": \"R. Christopher White\"}, {\"credit_id\": \"54959bdf9251412c4e002a66\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1394070, \"job\": \"Visual Effects Supervisor\", \"name\": \"Dan Lemmon\"}, {\"credit_id\": \"5495971d92514132ed002922\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1394129, \"job\": \"Sound Effects Editor\", \"name\": \"Tim Nielsen\"}, {\"credit_id\": \"5592b25792514152cc0011aa\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1394286, \"job\": \"CG Supervisor\", \"name\": \"Michael Mulholland\"}, {\"credit_id\": \"54959a329251416e2b004355\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1394750, \"job\": \"Visual Effects Editor\", \"name\": \"Thomas Nittmann\"}, {\"credit_id\": \"54959d6dc3a3686ae9004401\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1394755, \"job\": \"Visual Effects Supervisor\", \"name\": \"Edson Williams\"}, {\"credit_id\": \"5495a08fc3a3686ae300441c\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1394953, \"job\": \"Digital Intermediate\", \"name\": \"Christine Carr\"}, {\"credit_id\": \"55402d659251413d6d000249\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1395269, \"job\": \"Visual Effects Supervisor\", \"name\": \"John Bruno\"}, {\"credit_id\": \"54959e7b9251416e1e003f3e\", \"department\": \"Camera\", \"gender\": 0, \"id\": 1398970, \"job\": \"Steadicam Operator\", \"name\": \"David Emmerichs\"}, {\"credit_id\": \"54959734c3a3686ae10045e0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1400906, \"job\": \"Sound Effects Editor\", \"name\": \"Christopher Scarabosio\"}, {\"credit_id\": \"549595dd92514130fc002d79\", \"department\": \"Production\", \"gender\": 0, \"id\": 1401784, \"job\": \"Production Supervisor\", \"name\": \"Jennifer Teves\"}, {\"credit_id\": \"549596009251413af70028cc\", \"department\": \"Production\", \"gender\": 0, \"id\": 1401785, \"job\": \"Production Manager\", \"name\": \"Brigitte Yorke\"}, {\"credit_id\": \"549596e892514130fc002d99\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1401786, \"job\": \"Sound Effects Editor\", \"name\": \"Ken Fischer\"}, {\"credit_id\": \"549598229251412c4e002a1c\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401787, \"job\": \"Special Effects Coordinator\", \"name\": \"Iain Hutton\"}, {\"credit_id\": \"549598349251416e2b00432b\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401788, \"job\": \"Special Effects Coordinator\", \"name\": \"Steve Ingram\"}, {\"credit_id\": \"54959905c3a3686ae3004324\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401789, \"job\": \"Visual Effects Producer\", \"name\": \"Joyce Cox\"}, {\"credit_id\": \"5495994b92514132ed002951\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401790, \"job\": \"Visual Effects Producer\", \"name\": \"Jenny Foster\"}, {\"credit_id\": \"549599cbc3a3686ae1004613\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401791, \"job\": \"Visual Effects Editor\", \"name\": \"Christopher Marino\"}, {\"credit_id\": \"549599f2c3a3686ae100461e\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401792, \"job\": \"Visual Effects Editor\", \"name\": \"Jim Milton\"}, {\"credit_id\": \"54959a51c3a3686af3003eb5\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401793, \"job\": \"Visual Effects Producer\", \"name\": \"Cyndi Ochs\"}, {\"credit_id\": \"54959a7cc3a36811530026f4\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401794, \"job\": \"Visual Effects Editor\", \"name\": \"Lucas Putnam\"}, {\"credit_id\": \"54959b91c3a3680ff5002cb4\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401795, \"job\": \"Visual Effects Supervisor\", \"name\": \"Anthony \\'Max\\' Ivins\"}, {\"credit_id\": \"54959bb69251412c4e002a5f\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401796, \"job\": \"Visual Effects Supervisor\", \"name\": \"John Knoll\"}, {\"credit_id\": \"54959cbbc3a3686ae3004391\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 1401799, \"job\": \"Visual Effects Supervisor\", \"name\": \"Eric Saindon\"}, {\"credit_id\": \"54959d06c3a3686ae90043f6\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401800, \"job\": \"Visual Effects Supervisor\", \"name\": \"Wayne Stables\"}, {\"credit_id\": \"54959d259251416e1e003f11\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401801, \"job\": \"Visual Effects Supervisor\", \"name\": \"David Stinnett\"}, {\"credit_id\": \"54959db49251413af7002975\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401803, \"job\": \"Visual Effects Supervisor\", \"name\": \"Guy Williams\"}, {\"credit_id\": \"54959de4c3a3681153002750\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401804, \"job\": \"Stunt Coordinator\", \"name\": \"Stuart Thorp\"}, {\"credit_id\": \"54959ef2c3a3680fc60027f2\", \"department\": \"Lighting\", \"gender\": 0, \"id\": 1401805, \"job\": \"Best Boy Electric\", \"name\": \"Giles Coburn\"}, {\"credit_id\": \"54959f07c3a3680fc60027f9\", \"department\": \"Camera\", \"gender\": 2, \"id\": 1401806, \"job\": \"Still Photographer\", \"name\": \"Mark Fellman\"}, {\"credit_id\": \"54959f47c3a3681153002774\", \"department\": \"Lighting\", \"gender\": 0, \"id\": 1401807, \"job\": \"Lighting Technician\", \"name\": \"Scott Sprague\"}, {\"credit_id\": \"54959f8cc3a36831b8001df2\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401808, \"job\": \"Animation Director\", \"name\": \"Jeremy Hollobon\"}, {\"credit_id\": \"54959fa0c3a36831b8001dfb\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401809, \"job\": \"Animation Director\", \"name\": \"Orlando Meunier\"}, {\"credit_id\": \"54959fb6c3a3686af3003f54\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401810, \"job\": \"Animation Director\", \"name\": \"Taisuke Tanimura\"}, {\"credit_id\": \"54959fd2c3a36831b8001e02\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1401812, \"job\": \"Set Costumer\", \"name\": \"Lilia Mishel Acevedo\"}, {\"credit_id\": \"54959ff9c3a3686ae300440c\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1401814, \"job\": \"Set Costumer\", \"name\": \"Alejandro M. Hernandez\"}, {\"credit_id\": \"5495a0ddc3a3686ae10046fe\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1401815, \"job\": \"Digital Intermediate\", \"name\": \"Marvin Hall\"}, {\"credit_id\": \"5495a1f7c3a3686ae3004443\", \"department\": \"Production\", \"gender\": 0, \"id\": 1401816, \"job\": \"Publicist\", \"name\": \"Judy Alley\"}, {\"credit_id\": \"5592b29fc3a36869d100002f\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1418381, \"job\": \"CG Supervisor\", \"name\": \"Mike Perry\"}, {\"credit_id\": \"5592b23a9251415df8001081\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1426854, \"job\": \"CG Supervisor\", \"name\": \"Andrew Morley\"}, {\"credit_id\": \"55491e1192514104c40002d8\", \"department\": \"Art\", \"gender\": 0, \"id\": 1438901, \"job\": \"Conceptual Design\", \"name\": \"Seth Engstrom\"}, {\"credit_id\": \"5525d5809251417276002b06\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1447362, \"job\": \"Visual Effects Art Director\", \"name\": \"Eric Oliver\"}, {\"credit_id\": \"554427ca925141586500312a\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1447503, \"job\": \"Modeling\", \"name\": \"Matsune Suzuki\"}, {\"credit_id\": \"551906889251415aab001c88\", \"department\": \"Art\", \"gender\": 0, \"id\": 1447524, \"job\": \"Art Department Manager\", \"name\": \"Paul Tobin\"}, {\"credit_id\": \"5592af8492514152cc0010de\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1452643, \"job\": \"Hairstylist\", \"name\": \"Roxane Griffin\"}, {\"credit_id\": \"553d3c109251415852001318\", \"department\": \"Lighting\", \"gender\": 0, \"id\": 1453938, \"job\": \"Lighting Artist\", \"name\": \"Arun Ram-Mohan\"}, {\"credit_id\": \"5592af4692514152d5001355\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1457305, \"job\": \"Makeup Artist\", \"name\": \"Georgia Lockhart-Adams\"}, {\"credit_id\": \"5592b2eac3a36877470012a5\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1466035, \"job\": \"CG Supervisor\", \"name\": \"Thrain Shadbolt\"}, {\"credit_id\": \"5592b032c3a36877450015f1\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483220, \"job\": \"CG Supervisor\", \"name\": \"Brad Alexander\"}, {\"credit_id\": \"5592b05592514152d80012f6\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483221, \"job\": \"CG Supervisor\", \"name\": \"Shadi Almassizadeh\"}, {\"credit_id\": \"5592b090c3a36877570010b5\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483222, \"job\": \"CG Supervisor\", \"name\": \"Simon Clutterbuck\"}, {\"credit_id\": \"5592b0dbc3a368774b00112c\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483223, \"job\": \"CG Supervisor\", \"name\": \"Graeme Demmocks\"}, {\"credit_id\": \"5592b0fe92514152db0010c1\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483224, \"job\": \"CG Supervisor\", \"name\": \"Adrian Fernandes\"}, {\"credit_id\": \"5592b11f9251415df8001059\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483225, \"job\": \"CG Supervisor\", \"name\": \"Mitch Gates\"}, {\"credit_id\": \"5592b15dc3a3687745001645\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483226, \"job\": \"CG Supervisor\", \"name\": \"Jerry Kung\"}, {\"credit_id\": \"5592b18e925141645a0004ae\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483227, \"job\": \"CG Supervisor\", \"name\": \"Andy Lomas\"}, {\"credit_id\": \"5592b1bfc3a368775d0010e7\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483228, \"job\": \"CG Supervisor\", \"name\": \"Sebastian Marino\"}, {\"credit_id\": \"5592b2049251415df8001078\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483229, \"job\": \"CG Supervisor\", \"name\": \"Matthias Menz\"}, {\"credit_id\": \"5592b27b92514152d800136a\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483230, \"job\": \"CG Supervisor\", \"name\": \"Sergei Nevshupov\"}, {\"credit_id\": \"5592b2c3c3a36869e800003c\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483231, \"job\": \"CG Supervisor\", \"name\": \"Philippe Rebours\"}, {\"credit_id\": \"5592b317c3a36877470012af\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483232, \"job\": \"CG Supervisor\", \"name\": \"Michael Takarangi\"}, {\"credit_id\": \"5592b345c3a36877470012bb\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483233, \"job\": \"CG Supervisor\", \"name\": \"David Weitzberg\"}, {\"credit_id\": \"5592b37cc3a368775100113b\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483234, \"job\": \"CG Supervisor\", \"name\": \"Ben White\"}, {\"credit_id\": \"573c8e2f9251413f5d000094\", \"department\": \"Crew\", \"gender\": 1, \"id\": 1621932, \"job\": \"Stunts\", \"name\": \"Min Windle\"}]'"
1392 | ]
1393 | },
1394 | "execution_count": 28,
1395 | "metadata": {},
1396 | "output_type": "execute_result"
1397 | }
1398 | ],
1399 | "source": [
1400 | "# handle crew\n",
1401 | "\n",
1402 | "movies.iloc[0]['crew']"
1403 | ]
1404 | },
1405 | {
1406 | "cell_type": "code",
1407 | "execution_count": 29,
1408 | "metadata": {},
1409 | "outputs": [],
1410 | "source": [
1411 | "def fetch_director(text):\n",
1412 | " L = []\n",
1413 | " for i in ast.literal_eval(text):\n",
1414 | " if i['job'] == 'Director':\n",
1415 | " L.append(i['name'])\n",
1416 | " break\n",
1417 | " return L"
1418 | ]
1419 | },
1420 | {
1421 | "cell_type": "code",
1422 | "execution_count": 30,
1423 | "metadata": {},
1424 | "outputs": [],
1425 | "source": [
1426 | "movies['crew'] = movies['crew'].apply(fetch_director)"
1427 | ]
1428 | },
1429 | {
1430 | "cell_type": "code",
1431 | "execution_count": 31,
1432 | "metadata": {},
1433 | "outputs": [
1434 | {
1435 | "data": {
1436 | "text/html": [
1437 | "\n",
1438 | "\n",
1451 | "
\n",
1452 | " \n",
1453 | " \n",
1454 | " | \n",
1455 | " movie_id | \n",
1456 | " title | \n",
1457 | " overview | \n",
1458 | " genres | \n",
1459 | " keywords | \n",
1460 | " cast | \n",
1461 | " crew | \n",
1462 | " release_date | \n",
1463 | " vote_average | \n",
1464 | " year | \n",
1465 | "
\n",
1466 | " \n",
1467 | " \n",
1468 | " \n",
1469 | " | 0 | \n",
1470 | " 19995 | \n",
1471 | " Avatar | \n",
1472 | " In the 22nd century, a paraplegic Marine is di... | \n",
1473 | " [Action, Adventure, Fantasy, Science Fiction] | \n",
1474 | " [culture clash, future, space war, space colon... | \n",
1475 | " [Sam Worthington, Zoe Saldana, Sigourney Weaver] | \n",
1476 | " [James Cameron] | \n",
1477 | " 2009-12-10 | \n",
1478 | " 7.2 | \n",
1479 | " 2009 | \n",
1480 | "
\n",
1481 | " \n",
1482 | " | 1 | \n",
1483 | " 285 | \n",
1484 | " Pirates of the Caribbean: At World's End | \n",
1485 | " Captain Barbossa, long believed to be dead, ha... | \n",
1486 | " [Adventure, Fantasy, Action] | \n",
1487 | " [ocean, drug abuse, exotic island, east india ... | \n",
1488 | " [Johnny Depp, Orlando Bloom, Keira Knightley] | \n",
1489 | " [Gore Verbinski] | \n",
1490 | " 2007-05-19 | \n",
1491 | " 6.9 | \n",
1492 | " 2007 | \n",
1493 | "
\n",
1494 | " \n",
1495 | " | 2 | \n",
1496 | " 206647 | \n",
1497 | " Spectre | \n",
1498 | " A cryptic message from Bond’s past sends him o... | \n",
1499 | " [Action, Adventure, Crime] | \n",
1500 | " [spy, based on novel, secret agent, sequel, mi... | \n",
1501 | " [Daniel Craig, Christoph Waltz, Léa Seydoux] | \n",
1502 | " [Sam Mendes] | \n",
1503 | " 2015-10-26 | \n",
1504 | " 6.3 | \n",
1505 | " 2015 | \n",
1506 | "
\n",
1507 | " \n",
1508 | " | 3 | \n",
1509 | " 49026 | \n",
1510 | " The Dark Knight Rises | \n",
1511 | " Following the death of District Attorney Harve... | \n",
1512 | " [Action, Crime, Drama, Thriller] | \n",
1513 | " [dc comics, crime fighter, terrorist, secret i... | \n",
1514 | " [Christian Bale, Michael Caine, Gary Oldman] | \n",
1515 | " [Christopher Nolan] | \n",
1516 | " 2012-07-16 | \n",
1517 | " 7.6 | \n",
1518 | " 2012 | \n",
1519 | "
\n",
1520 | " \n",
1521 | " | 4 | \n",
1522 | " 49529 | \n",
1523 | " John Carter | \n",
1524 | " John Carter is a war-weary, former military ca... | \n",
1525 | " [Action, Adventure, Science Fiction] | \n",
1526 | " [based on novel, mars, medallion, space travel... | \n",
1527 | " [Taylor Kitsch, Lynn Collins, Samantha Morton] | \n",
1528 | " [Andrew Stanton] | \n",
1529 | " 2012-03-07 | \n",
1530 | " 6.1 | \n",
1531 | " 2012 | \n",
1532 | "
\n",
1533 | " \n",
1534 | "
\n",
1535 | "
"
1536 | ],
1537 | "text/plain": [
1538 | " movie_id title \\\n",
1539 | "0 19995 Avatar \n",
1540 | "1 285 Pirates of the Caribbean: At World's End \n",
1541 | "2 206647 Spectre \n",
1542 | "3 49026 The Dark Knight Rises \n",
1543 | "4 49529 John Carter \n",
1544 | "\n",
1545 | " overview \\\n",
1546 | "0 In the 22nd century, a paraplegic Marine is di... \n",
1547 | "1 Captain Barbossa, long believed to be dead, ha... \n",
1548 | "2 A cryptic message from Bond’s past sends him o... \n",
1549 | "3 Following the death of District Attorney Harve... \n",
1550 | "4 John Carter is a war-weary, former military ca... \n",
1551 | "\n",
1552 | " genres \\\n",
1553 | "0 [Action, Adventure, Fantasy, Science Fiction] \n",
1554 | "1 [Adventure, Fantasy, Action] \n",
1555 | "2 [Action, Adventure, Crime] \n",
1556 | "3 [Action, Crime, Drama, Thriller] \n",
1557 | "4 [Action, Adventure, Science Fiction] \n",
1558 | "\n",
1559 | " keywords \\\n",
1560 | "0 [culture clash, future, space war, space colon... \n",
1561 | "1 [ocean, drug abuse, exotic island, east india ... \n",
1562 | "2 [spy, based on novel, secret agent, sequel, mi... \n",
1563 | "3 [dc comics, crime fighter, terrorist, secret i... \n",
1564 | "4 [based on novel, mars, medallion, space travel... \n",
1565 | "\n",
1566 | " cast crew \\\n",
1567 | "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] [James Cameron] \n",
1568 | "1 [Johnny Depp, Orlando Bloom, Keira Knightley] [Gore Verbinski] \n",
1569 | "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] [Sam Mendes] \n",
1570 | "3 [Christian Bale, Michael Caine, Gary Oldman] [Christopher Nolan] \n",
1571 | "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] [Andrew Stanton] \n",
1572 | "\n",
1573 | " release_date vote_average year \n",
1574 | "0 2009-12-10 7.2 2009 \n",
1575 | "1 2007-05-19 6.9 2007 \n",
1576 | "2 2015-10-26 6.3 2015 \n",
1577 | "3 2012-07-16 7.6 2012 \n",
1578 | "4 2012-03-07 6.1 2012 "
1579 | ]
1580 | },
1581 | "execution_count": 31,
1582 | "metadata": {},
1583 | "output_type": "execute_result"
1584 | }
1585 | ],
1586 | "source": [
1587 | "movies.head()"
1588 | ]
1589 | },
1590 | {
1591 | "cell_type": "code",
1592 | "execution_count": 32,
1593 | "metadata": {},
1594 | "outputs": [
1595 | {
1596 | "data": {
1597 | "text/plain": [
1598 | "'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.'"
1599 | ]
1600 | },
1601 | "execution_count": 32,
1602 | "metadata": {},
1603 | "output_type": "execute_result"
1604 | }
1605 | ],
1606 | "source": [
1607 | "# handle overview (converting to list)\n",
1608 | "\n",
1609 | "movies.iloc[0]['overview']"
1610 | ]
1611 | },
1612 | {
1613 | "cell_type": "code",
1614 | "execution_count": 33,
1615 | "metadata": {},
1616 | "outputs": [
1617 | {
1618 | "data": {
1619 | "text/html": [
1620 | "\n",
1621 | "\n",
1634 | "
\n",
1635 | " \n",
1636 | " \n",
1637 | " | \n",
1638 | " movie_id | \n",
1639 | " title | \n",
1640 | " overview | \n",
1641 | " genres | \n",
1642 | " keywords | \n",
1643 | " cast | \n",
1644 | " crew | \n",
1645 | " release_date | \n",
1646 | " vote_average | \n",
1647 | " year | \n",
1648 | "
\n",
1649 | " \n",
1650 | " \n",
1651 | " \n",
1652 | " | 4486 | \n",
1653 | " 14275 | \n",
1654 | " Hoop Dreams | \n",
1655 | " [This, documentary, follows, two, inner-city, ... | \n",
1656 | " [Documentary] | \n",
1657 | " [chicago, sports team, ghetto, narration, coll... | \n",
1658 | " [William Gates, Arthur Agee, Steve James] | \n",
1659 | " [Steve James] | \n",
1660 | " 1994-09-12 | \n",
1661 | " 7.7 | \n",
1662 | " 1994 | \n",
1663 | "
\n",
1664 | " \n",
1665 | " | 2087 | \n",
1666 | " 85 | \n",
1667 | " Raiders of the Lost Ark | \n",
1668 | " [When, Dr., Indiana, Jones, –, the, tweed-suit... | \n",
1669 | " [Adventure, Action] | \n",
1670 | " [saving the world, riddle, nepal, himalaya, ca... | \n",
1671 | " [Harrison Ford, Karen Allen, Paul Freeman] | \n",
1672 | " [Steven Spielberg] | \n",
1673 | " 1981-06-12 | \n",
1674 | " 7.7 | \n",
1675 | " 1981 | \n",
1676 | "
\n",
1677 | " \n",
1678 | " | 4768 | \n",
1679 | " 18292 | \n",
1680 | " George Washington | \n",
1681 | " [A, delicately, told, and, deceptively, simple... | \n",
1682 | " [Drama] | \n",
1683 | " [independent film] | \n",
1684 | " [Candace Evanofski, Donald Holden, Damian Jewa... | \n",
1685 | " [David Gordon Green] | \n",
1686 | " 2000-10-01 | \n",
1687 | " 6.4 | \n",
1688 | " 2000 | \n",
1689 | "
\n",
1690 | " \n",
1691 | " | 3524 | \n",
1692 | " 57022 | \n",
1693 | " Sunshine State | \n",
1694 | " [A, woman, and, her, new, husband, returns, to... | \n",
1695 | " [Drama] | \n",
1696 | " [independent film] | \n",
1697 | " [Alan King, Angela Bassett, Timothy Hutton] | \n",
1698 | " [John Sayles] | \n",
1699 | " 2002-06-21 | \n",
1700 | " 6.0 | \n",
1701 | " 2002 | \n",
1702 | "
\n",
1703 | " \n",
1704 | "
\n",
1705 | "
"
1706 | ],
1707 | "text/plain": [
1708 | " movie_id title \\\n",
1709 | "4486 14275 Hoop Dreams \n",
1710 | "2087 85 Raiders of the Lost Ark \n",
1711 | "4768 18292 George Washington \n",
1712 | "3524 57022 Sunshine State \n",
1713 | "\n",
1714 | " overview genres \\\n",
1715 | "4486 [This, documentary, follows, two, inner-city, ... [Documentary] \n",
1716 | "2087 [When, Dr., Indiana, Jones, –, the, tweed-suit... [Adventure, Action] \n",
1717 | "4768 [A, delicately, told, and, deceptively, simple... [Drama] \n",
1718 | "3524 [A, woman, and, her, new, husband, returns, to... [Drama] \n",
1719 | "\n",
1720 | " keywords \\\n",
1721 | "4486 [chicago, sports team, ghetto, narration, coll... \n",
1722 | "2087 [saving the world, riddle, nepal, himalaya, ca... \n",
1723 | "4768 [independent film] \n",
1724 | "3524 [independent film] \n",
1725 | "\n",
1726 | " cast crew \\\n",
1727 | "4486 [William Gates, Arthur Agee, Steve James] [Steve James] \n",
1728 | "2087 [Harrison Ford, Karen Allen, Paul Freeman] [Steven Spielberg] \n",
1729 | "4768 [Candace Evanofski, Donald Holden, Damian Jewa... [David Gordon Green] \n",
1730 | "3524 [Alan King, Angela Bassett, Timothy Hutton] [John Sayles] \n",
1731 | "\n",
1732 | " release_date vote_average year \n",
1733 | "4486 1994-09-12 7.7 1994 \n",
1734 | "2087 1981-06-12 7.7 1981 \n",
1735 | "4768 2000-10-01 6.4 2000 \n",
1736 | "3524 2002-06-21 6.0 2002 "
1737 | ]
1738 | },
1739 | "execution_count": 33,
1740 | "metadata": {},
1741 | "output_type": "execute_result"
1742 | }
1743 | ],
1744 | "source": [
1745 | "movies['overview'] = movies['overview'].apply(lambda x:x.split())\n",
1746 | "movies.sample(4)"
1747 | ]
1748 | },
1749 | {
1750 | "cell_type": "code",
1751 | "execution_count": 34,
1752 | "metadata": {},
1753 | "outputs": [
1754 | {
1755 | "data": {
1756 | "text/plain": [
1757 | "['In',\n",
1758 | " 'the',\n",
1759 | " '22nd',\n",
1760 | " 'century,',\n",
1761 | " 'a',\n",
1762 | " 'paraplegic',\n",
1763 | " 'Marine',\n",
1764 | " 'is',\n",
1765 | " 'dispatched',\n",
1766 | " 'to',\n",
1767 | " 'the',\n",
1768 | " 'moon',\n",
1769 | " 'Pandora',\n",
1770 | " 'on',\n",
1771 | " 'a',\n",
1772 | " 'unique',\n",
1773 | " 'mission,',\n",
1774 | " 'but',\n",
1775 | " 'becomes',\n",
1776 | " 'torn',\n",
1777 | " 'between',\n",
1778 | " 'following',\n",
1779 | " 'orders',\n",
1780 | " 'and',\n",
1781 | " 'protecting',\n",
1782 | " 'an',\n",
1783 | " 'alien',\n",
1784 | " 'civilization.']"
1785 | ]
1786 | },
1787 | "execution_count": 34,
1788 | "metadata": {},
1789 | "output_type": "execute_result"
1790 | }
1791 | ],
1792 | "source": [
1793 | "movies.iloc[0]['overview']"
1794 | ]
1795 | },
1796 | {
1797 | "cell_type": "code",
1798 | "execution_count": 35,
1799 | "metadata": {},
1800 | "outputs": [],
1801 | "source": [
1802 | "# now removing space like that \n",
1803 | "'Anna Kendrick'\n",
1804 | "'AnnaKendrick'\n",
1805 | "\n",
1806 | "def remove_space(L):\n",
1807 | " L1 = []\n",
1808 | " for i in L:\n",
1809 | " L1.append(i.replace(\" \",\"\"))\n",
1810 | " return L1"
1811 | ]
1812 | },
1813 | {
1814 | "cell_type": "code",
1815 | "execution_count": 36,
1816 | "metadata": {},
1817 | "outputs": [],
1818 | "source": [
1819 | "\n",
1820 | "movies['cast'] = movies['cast'].apply(remove_space)\n",
1821 | "movies['crew'] = movies['crew'].apply(remove_space)\n",
1822 | "movies['genres'] = movies['genres'].apply(remove_space)\n",
1823 | "movies['keywords'] = movies['keywords'].apply(remove_space)"
1824 | ]
1825 | },
1826 | {
1827 | "cell_type": "code",
1828 | "execution_count": 37,
1829 | "metadata": {},
1830 | "outputs": [
1831 | {
1832 | "data": {
1833 | "text/html": [
1834 | "\n",
1835 | "\n",
1848 | "
\n",
1849 | " \n",
1850 | " \n",
1851 | " | \n",
1852 | " movie_id | \n",
1853 | " title | \n",
1854 | " overview | \n",
1855 | " genres | \n",
1856 | " keywords | \n",
1857 | " cast | \n",
1858 | " crew | \n",
1859 | " release_date | \n",
1860 | " vote_average | \n",
1861 | " year | \n",
1862 | "
\n",
1863 | " \n",
1864 | " \n",
1865 | " \n",
1866 | " | 0 | \n",
1867 | " 19995 | \n",
1868 | " Avatar | \n",
1869 | " [In, the, 22nd, century,, a, paraplegic, Marin... | \n",
1870 | " [Action, Adventure, Fantasy, ScienceFiction] | \n",
1871 | " [cultureclash, future, spacewar, spacecolony, ... | \n",
1872 | " [SamWorthington, ZoeSaldana, SigourneyWeaver] | \n",
1873 | " [JamesCameron] | \n",
1874 | " 2009-12-10 | \n",
1875 | " 7.2 | \n",
1876 | " 2009 | \n",
1877 | "
\n",
1878 | " \n",
1879 | " | 1 | \n",
1880 | " 285 | \n",
1881 | " Pirates of the Caribbean: At World's End | \n",
1882 | " [Captain, Barbossa,, long, believed, to, be, d... | \n",
1883 | " [Adventure, Fantasy, Action] | \n",
1884 | " [ocean, drugabuse, exoticisland, eastindiatrad... | \n",
1885 | " [JohnnyDepp, OrlandoBloom, KeiraKnightley] | \n",
1886 | " [GoreVerbinski] | \n",
1887 | " 2007-05-19 | \n",
1888 | " 6.9 | \n",
1889 | " 2007 | \n",
1890 | "
\n",
1891 | " \n",
1892 | " | 2 | \n",
1893 | " 206647 | \n",
1894 | " Spectre | \n",
1895 | " [A, cryptic, message, from, Bond’s, past, send... | \n",
1896 | " [Action, Adventure, Crime] | \n",
1897 | " [spy, basedonnovel, secretagent, sequel, mi6, ... | \n",
1898 | " [DanielCraig, ChristophWaltz, LéaSeydoux] | \n",
1899 | " [SamMendes] | \n",
1900 | " 2015-10-26 | \n",
1901 | " 6.3 | \n",
1902 | " 2015 | \n",
1903 | "
\n",
1904 | " \n",
1905 | " | 3 | \n",
1906 | " 49026 | \n",
1907 | " The Dark Knight Rises | \n",
1908 | " [Following, the, death, of, District, Attorney... | \n",
1909 | " [Action, Crime, Drama, Thriller] | \n",
1910 | " [dccomics, crimefighter, terrorist, secretiden... | \n",
1911 | " [ChristianBale, MichaelCaine, GaryOldman] | \n",
1912 | " [ChristopherNolan] | \n",
1913 | " 2012-07-16 | \n",
1914 | " 7.6 | \n",
1915 | " 2012 | \n",
1916 | "
\n",
1917 | " \n",
1918 | " | 4 | \n",
1919 | " 49529 | \n",
1920 | " John Carter | \n",
1921 | " [John, Carter, is, a, war-weary,, former, mili... | \n",
1922 | " [Action, Adventure, ScienceFiction] | \n",
1923 | " [basedonnovel, mars, medallion, spacetravel, p... | \n",
1924 | " [TaylorKitsch, LynnCollins, SamanthaMorton] | \n",
1925 | " [AndrewStanton] | \n",
1926 | " 2012-03-07 | \n",
1927 | " 6.1 | \n",
1928 | " 2012 | \n",
1929 | "
\n",
1930 | " \n",
1931 | "
\n",
1932 | "
"
1933 | ],
1934 | "text/plain": [
1935 | " movie_id title \\\n",
1936 | "0 19995 Avatar \n",
1937 | "1 285 Pirates of the Caribbean: At World's End \n",
1938 | "2 206647 Spectre \n",
1939 | "3 49026 The Dark Knight Rises \n",
1940 | "4 49529 John Carter \n",
1941 | "\n",
1942 | " overview \\\n",
1943 | "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n",
1944 | "1 [Captain, Barbossa,, long, believed, to, be, d... \n",
1945 | "2 [A, cryptic, message, from, Bond’s, past, send... \n",
1946 | "3 [Following, the, death, of, District, Attorney... \n",
1947 | "4 [John, Carter, is, a, war-weary,, former, mili... \n",
1948 | "\n",
1949 | " genres \\\n",
1950 | "0 [Action, Adventure, Fantasy, ScienceFiction] \n",
1951 | "1 [Adventure, Fantasy, Action] \n",
1952 | "2 [Action, Adventure, Crime] \n",
1953 | "3 [Action, Crime, Drama, Thriller] \n",
1954 | "4 [Action, Adventure, ScienceFiction] \n",
1955 | "\n",
1956 | " keywords \\\n",
1957 | "0 [cultureclash, future, spacewar, spacecolony, ... \n",
1958 | "1 [ocean, drugabuse, exoticisland, eastindiatrad... \n",
1959 | "2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n",
1960 | "3 [dccomics, crimefighter, terrorist, secretiden... \n",
1961 | "4 [basedonnovel, mars, medallion, spacetravel, p... \n",
1962 | "\n",
1963 | " cast crew \\\n",
1964 | "0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n",
1965 | "1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n",
1966 | "2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n",
1967 | "3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n",
1968 | "4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] \n",
1969 | "\n",
1970 | " release_date vote_average year \n",
1971 | "0 2009-12-10 7.2 2009 \n",
1972 | "1 2007-05-19 6.9 2007 \n",
1973 | "2 2015-10-26 6.3 2015 \n",
1974 | "3 2012-07-16 7.6 2012 \n",
1975 | "4 2012-03-07 6.1 2012 "
1976 | ]
1977 | },
1978 | "execution_count": 37,
1979 | "metadata": {},
1980 | "output_type": "execute_result"
1981 | }
1982 | ],
1983 | "source": [
1984 | "movies.head()"
1985 | ]
1986 | },
1987 | {
1988 | "cell_type": "code",
1989 | "execution_count": 38,
1990 | "metadata": {},
1991 | "outputs": [],
1992 | "source": [
1993 | "# Concatinate all\n",
1994 | "movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']"
1995 | ]
1996 | },
1997 | {
1998 | "cell_type": "code",
1999 | "execution_count": 39,
2000 | "metadata": {},
2001 | "outputs": [
2002 | {
2003 | "data": {
2004 | "text/html": [
2005 | "\n",
2006 | "\n",
2019 | "
\n",
2020 | " \n",
2021 | " \n",
2022 | " | \n",
2023 | " movie_id | \n",
2024 | " title | \n",
2025 | " overview | \n",
2026 | " genres | \n",
2027 | " keywords | \n",
2028 | " cast | \n",
2029 | " crew | \n",
2030 | " release_date | \n",
2031 | " vote_average | \n",
2032 | " year | \n",
2033 | " tags | \n",
2034 | "
\n",
2035 | " \n",
2036 | " \n",
2037 | " \n",
2038 | " | 0 | \n",
2039 | " 19995 | \n",
2040 | " Avatar | \n",
2041 | " [In, the, 22nd, century,, a, paraplegic, Marin... | \n",
2042 | " [Action, Adventure, Fantasy, ScienceFiction] | \n",
2043 | " [cultureclash, future, spacewar, spacecolony, ... | \n",
2044 | " [SamWorthington, ZoeSaldana, SigourneyWeaver] | \n",
2045 | " [JamesCameron] | \n",
2046 | " 2009-12-10 | \n",
2047 | " 7.2 | \n",
2048 | " 2009 | \n",
2049 | " [In, the, 22nd, century,, a, paraplegic, Marin... | \n",
2050 | "
\n",
2051 | " \n",
2052 | " | 1 | \n",
2053 | " 285 | \n",
2054 | " Pirates of the Caribbean: At World's End | \n",
2055 | " [Captain, Barbossa,, long, believed, to, be, d... | \n",
2056 | " [Adventure, Fantasy, Action] | \n",
2057 | " [ocean, drugabuse, exoticisland, eastindiatrad... | \n",
2058 | " [JohnnyDepp, OrlandoBloom, KeiraKnightley] | \n",
2059 | " [GoreVerbinski] | \n",
2060 | " 2007-05-19 | \n",
2061 | " 6.9 | \n",
2062 | " 2007 | \n",
2063 | " [Captain, Barbossa,, long, believed, to, be, d... | \n",
2064 | "
\n",
2065 | " \n",
2066 | " | 2 | \n",
2067 | " 206647 | \n",
2068 | " Spectre | \n",
2069 | " [A, cryptic, message, from, Bond’s, past, send... | \n",
2070 | " [Action, Adventure, Crime] | \n",
2071 | " [spy, basedonnovel, secretagent, sequel, mi6, ... | \n",
2072 | " [DanielCraig, ChristophWaltz, LéaSeydoux] | \n",
2073 | " [SamMendes] | \n",
2074 | " 2015-10-26 | \n",
2075 | " 6.3 | \n",
2076 | " 2015 | \n",
2077 | " [A, cryptic, message, from, Bond’s, past, send... | \n",
2078 | "
\n",
2079 | " \n",
2080 | " | 3 | \n",
2081 | " 49026 | \n",
2082 | " The Dark Knight Rises | \n",
2083 | " [Following, the, death, of, District, Attorney... | \n",
2084 | " [Action, Crime, Drama, Thriller] | \n",
2085 | " [dccomics, crimefighter, terrorist, secretiden... | \n",
2086 | " [ChristianBale, MichaelCaine, GaryOldman] | \n",
2087 | " [ChristopherNolan] | \n",
2088 | " 2012-07-16 | \n",
2089 | " 7.6 | \n",
2090 | " 2012 | \n",
2091 | " [Following, the, death, of, District, Attorney... | \n",
2092 | "
\n",
2093 | " \n",
2094 | " | 4 | \n",
2095 | " 49529 | \n",
2096 | " John Carter | \n",
2097 | " [John, Carter, is, a, war-weary,, former, mili... | \n",
2098 | " [Action, Adventure, ScienceFiction] | \n",
2099 | " [basedonnovel, mars, medallion, spacetravel, p... | \n",
2100 | " [TaylorKitsch, LynnCollins, SamanthaMorton] | \n",
2101 | " [AndrewStanton] | \n",
2102 | " 2012-03-07 | \n",
2103 | " 6.1 | \n",
2104 | " 2012 | \n",
2105 | " [John, Carter, is, a, war-weary,, former, mili... | \n",
2106 | "
\n",
2107 | " \n",
2108 | "
\n",
2109 | "
"
2110 | ],
2111 | "text/plain": [
2112 | " movie_id title \\\n",
2113 | "0 19995 Avatar \n",
2114 | "1 285 Pirates of the Caribbean: At World's End \n",
2115 | "2 206647 Spectre \n",
2116 | "3 49026 The Dark Knight Rises \n",
2117 | "4 49529 John Carter \n",
2118 | "\n",
2119 | " overview \\\n",
2120 | "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n",
2121 | "1 [Captain, Barbossa,, long, believed, to, be, d... \n",
2122 | "2 [A, cryptic, message, from, Bond’s, past, send... \n",
2123 | "3 [Following, the, death, of, District, Attorney... \n",
2124 | "4 [John, Carter, is, a, war-weary,, former, mili... \n",
2125 | "\n",
2126 | " genres \\\n",
2127 | "0 [Action, Adventure, Fantasy, ScienceFiction] \n",
2128 | "1 [Adventure, Fantasy, Action] \n",
2129 | "2 [Action, Adventure, Crime] \n",
2130 | "3 [Action, Crime, Drama, Thriller] \n",
2131 | "4 [Action, Adventure, ScienceFiction] \n",
2132 | "\n",
2133 | " keywords \\\n",
2134 | "0 [cultureclash, future, spacewar, spacecolony, ... \n",
2135 | "1 [ocean, drugabuse, exoticisland, eastindiatrad... \n",
2136 | "2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n",
2137 | "3 [dccomics, crimefighter, terrorist, secretiden... \n",
2138 | "4 [basedonnovel, mars, medallion, spacetravel, p... \n",
2139 | "\n",
2140 | " cast crew \\\n",
2141 | "0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n",
2142 | "1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n",
2143 | "2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n",
2144 | "3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n",
2145 | "4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] \n",
2146 | "\n",
2147 | " release_date vote_average year \\\n",
2148 | "0 2009-12-10 7.2 2009 \n",
2149 | "1 2007-05-19 6.9 2007 \n",
2150 | "2 2015-10-26 6.3 2015 \n",
2151 | "3 2012-07-16 7.6 2012 \n",
2152 | "4 2012-03-07 6.1 2012 \n",
2153 | "\n",
2154 | " tags \n",
2155 | "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n",
2156 | "1 [Captain, Barbossa,, long, believed, to, be, d... \n",
2157 | "2 [A, cryptic, message, from, Bond’s, past, send... \n",
2158 | "3 [Following, the, death, of, District, Attorney... \n",
2159 | "4 [John, Carter, is, a, war-weary,, former, mili... "
2160 | ]
2161 | },
2162 | "execution_count": 39,
2163 | "metadata": {},
2164 | "output_type": "execute_result"
2165 | }
2166 | ],
2167 | "source": [
2168 | "movies.head()"
2169 | ]
2170 | },
2171 | {
2172 | "cell_type": "code",
2173 | "execution_count": 40,
2174 | "metadata": {},
2175 | "outputs": [
2176 | {
2177 | "data": {
2178 | "text/plain": [
2179 | "['In',\n",
2180 | " 'the',\n",
2181 | " '22nd',\n",
2182 | " 'century,',\n",
2183 | " 'a',\n",
2184 | " 'paraplegic',\n",
2185 | " 'Marine',\n",
2186 | " 'is',\n",
2187 | " 'dispatched',\n",
2188 | " 'to',\n",
2189 | " 'the',\n",
2190 | " 'moon',\n",
2191 | " 'Pandora',\n",
2192 | " 'on',\n",
2193 | " 'a',\n",
2194 | " 'unique',\n",
2195 | " 'mission,',\n",
2196 | " 'but',\n",
2197 | " 'becomes',\n",
2198 | " 'torn',\n",
2199 | " 'between',\n",
2200 | " 'following',\n",
2201 | " 'orders',\n",
2202 | " 'and',\n",
2203 | " 'protecting',\n",
2204 | " 'an',\n",
2205 | " 'alien',\n",
2206 | " 'civilization.',\n",
2207 | " 'Action',\n",
2208 | " 'Adventure',\n",
2209 | " 'Fantasy',\n",
2210 | " 'ScienceFiction',\n",
2211 | " 'cultureclash',\n",
2212 | " 'future',\n",
2213 | " 'spacewar',\n",
2214 | " 'spacecolony',\n",
2215 | " 'society',\n",
2216 | " 'spacetravel',\n",
2217 | " 'futuristic',\n",
2218 | " 'romance',\n",
2219 | " 'space',\n",
2220 | " 'alien',\n",
2221 | " 'tribe',\n",
2222 | " 'alienplanet',\n",
2223 | " 'cgi',\n",
2224 | " 'marine',\n",
2225 | " 'soldier',\n",
2226 | " 'battle',\n",
2227 | " 'loveaffair',\n",
2228 | " 'antiwar',\n",
2229 | " 'powerrelations',\n",
2230 | " 'mindandsoul',\n",
2231 | " '3d',\n",
2232 | " 'SamWorthington',\n",
2233 | " 'ZoeSaldana',\n",
2234 | " 'SigourneyWeaver',\n",
2235 | " 'JamesCameron']"
2236 | ]
2237 | },
2238 | "execution_count": 40,
2239 | "metadata": {},
2240 | "output_type": "execute_result"
2241 | }
2242 | ],
2243 | "source": [
2244 | "movies.iloc[0]['tags']"
2245 | ]
2246 | },
2247 | {
2248 | "cell_type": "code",
2249 | "execution_count": 41,
2250 | "metadata": {},
2251 | "outputs": [],
2252 | "source": [
2253 | "# droping those extra columns\n",
2254 | "new_df = movies[['movie_id', 'title', 'tags', 'year', 'vote_average']]"
2255 | ]
2256 | },
2257 | {
2258 | "cell_type": "code",
2259 | "execution_count": 42,
2260 | "metadata": {},
2261 | "outputs": [
2262 | {
2263 | "data": {
2264 | "text/html": [
2265 | "\n",
2266 | "\n",
2279 | "
\n",
2280 | " \n",
2281 | " \n",
2282 | " | \n",
2283 | " movie_id | \n",
2284 | " title | \n",
2285 | " tags | \n",
2286 | " year | \n",
2287 | " vote_average | \n",
2288 | "
\n",
2289 | " \n",
2290 | " \n",
2291 | " \n",
2292 | " | 0 | \n",
2293 | " 19995 | \n",
2294 | " Avatar | \n",
2295 | " [In, the, 22nd, century,, a, paraplegic, Marin... | \n",
2296 | " 2009 | \n",
2297 | " 7.2 | \n",
2298 | "
\n",
2299 | " \n",
2300 | " | 1 | \n",
2301 | " 285 | \n",
2302 | " Pirates of the Caribbean: At World's End | \n",
2303 | " [Captain, Barbossa,, long, believed, to, be, d... | \n",
2304 | " 2007 | \n",
2305 | " 6.9 | \n",
2306 | "
\n",
2307 | " \n",
2308 | " | 2 | \n",
2309 | " 206647 | \n",
2310 | " Spectre | \n",
2311 | " [A, cryptic, message, from, Bond’s, past, send... | \n",
2312 | " 2015 | \n",
2313 | " 6.3 | \n",
2314 | "
\n",
2315 | " \n",
2316 | " | 3 | \n",
2317 | " 49026 | \n",
2318 | " The Dark Knight Rises | \n",
2319 | " [Following, the, death, of, District, Attorney... | \n",
2320 | " 2012 | \n",
2321 | " 7.6 | \n",
2322 | "
\n",
2323 | " \n",
2324 | " | 4 | \n",
2325 | " 49529 | \n",
2326 | " John Carter | \n",
2327 | " [John, Carter, is, a, war-weary,, former, mili... | \n",
2328 | " 2012 | \n",
2329 | " 6.1 | \n",
2330 | "
\n",
2331 | " \n",
2332 | "
\n",
2333 | "
"
2334 | ],
2335 | "text/plain": [
2336 | " movie_id title \\\n",
2337 | "0 19995 Avatar \n",
2338 | "1 285 Pirates of the Caribbean: At World's End \n",
2339 | "2 206647 Spectre \n",
2340 | "3 49026 The Dark Knight Rises \n",
2341 | "4 49529 John Carter \n",
2342 | "\n",
2343 | " tags year vote_average \n",
2344 | "0 [In, the, 22nd, century,, a, paraplegic, Marin... 2009 7.2 \n",
2345 | "1 [Captain, Barbossa,, long, believed, to, be, d... 2007 6.9 \n",
2346 | "2 [A, cryptic, message, from, Bond’s, past, send... 2015 6.3 \n",
2347 | "3 [Following, the, death, of, District, Attorney... 2012 7.6 \n",
2348 | "4 [John, Carter, is, a, war-weary,, former, mili... 2012 6.1 "
2349 | ]
2350 | },
2351 | "execution_count": 42,
2352 | "metadata": {},
2353 | "output_type": "execute_result"
2354 | }
2355 | ],
2356 | "source": [
2357 | "new_df.head()"
2358 | ]
2359 | },
2360 | {
2361 | "cell_type": "code",
2362 | "execution_count": 43,
2363 | "metadata": {},
2364 | "outputs": [
2365 | {
2366 | "name": "stderr",
2367 | "output_type": "stream",
2368 | "text": [
2369 | "C:\\Users\\dell admin\\AppData\\Local\\Temp\\ipykernel_4196\\3674298559.py:2: SettingWithCopyWarning: \n",
2370 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
2371 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
2372 | "\n",
2373 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
2374 | " new_df['tags'] = new_df['tags'].apply(lambda x: \" \".join(x))\n"
2375 | ]
2376 | },
2377 | {
2378 | "data": {
2379 | "text/html": [
2380 | "\n",
2381 | "\n",
2394 | "
\n",
2395 | " \n",
2396 | " \n",
2397 | " | \n",
2398 | " movie_id | \n",
2399 | " title | \n",
2400 | " tags | \n",
2401 | " year | \n",
2402 | " vote_average | \n",
2403 | "
\n",
2404 | " \n",
2405 | " \n",
2406 | " \n",
2407 | " | 0 | \n",
2408 | " 19995 | \n",
2409 | " Avatar | \n",
2410 | " In the 22nd century, a paraplegic Marine is di... | \n",
2411 | " 2009 | \n",
2412 | " 7.2 | \n",
2413 | "
\n",
2414 | " \n",
2415 | " | 1 | \n",
2416 | " 285 | \n",
2417 | " Pirates of the Caribbean: At World's End | \n",
2418 | " Captain Barbossa, long believed to be dead, ha... | \n",
2419 | " 2007 | \n",
2420 | " 6.9 | \n",
2421 | "
\n",
2422 | " \n",
2423 | " | 2 | \n",
2424 | " 206647 | \n",
2425 | " Spectre | \n",
2426 | " A cryptic message from Bond’s past sends him o... | \n",
2427 | " 2015 | \n",
2428 | " 6.3 | \n",
2429 | "
\n",
2430 | " \n",
2431 | " | 3 | \n",
2432 | " 49026 | \n",
2433 | " The Dark Knight Rises | \n",
2434 | " Following the death of District Attorney Harve... | \n",
2435 | " 2012 | \n",
2436 | " 7.6 | \n",
2437 | "
\n",
2438 | " \n",
2439 | " | 4 | \n",
2440 | " 49529 | \n",
2441 | " John Carter | \n",
2442 | " John Carter is a war-weary, former military ca... | \n",
2443 | " 2012 | \n",
2444 | " 6.1 | \n",
2445 | "
\n",
2446 | " \n",
2447 | "
\n",
2448 | "
"
2449 | ],
2450 | "text/plain": [
2451 | " movie_id title \\\n",
2452 | "0 19995 Avatar \n",
2453 | "1 285 Pirates of the Caribbean: At World's End \n",
2454 | "2 206647 Spectre \n",
2455 | "3 49026 The Dark Knight Rises \n",
2456 | "4 49529 John Carter \n",
2457 | "\n",
2458 | " tags year vote_average \n",
2459 | "0 In the 22nd century, a paraplegic Marine is di... 2009 7.2 \n",
2460 | "1 Captain Barbossa, long believed to be dead, ha... 2007 6.9 \n",
2461 | "2 A cryptic message from Bond’s past sends him o... 2015 6.3 \n",
2462 | "3 Following the death of District Attorney Harve... 2012 7.6 \n",
2463 | "4 John Carter is a war-weary, former military ca... 2012 6.1 "
2464 | ]
2465 | },
2466 | "execution_count": 43,
2467 | "metadata": {},
2468 | "output_type": "execute_result"
2469 | }
2470 | ],
2471 | "source": [
2472 | "# Converting list to str\n",
2473 | "new_df['tags'] = new_df['tags'].apply(lambda x: \" \".join(x))\n",
2474 | "new_df.head()"
2475 | ]
2476 | },
2477 | {
2478 | "cell_type": "code",
2479 | "execution_count": 44,
2480 | "metadata": {},
2481 | "outputs": [
2482 | {
2483 | "data": {
2484 | "text/plain": [
2485 | "'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. Action Adventure Fantasy ScienceFiction cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d SamWorthington ZoeSaldana SigourneyWeaver JamesCameron'"
2486 | ]
2487 | },
2488 | "execution_count": 44,
2489 | "metadata": {},
2490 | "output_type": "execute_result"
2491 | }
2492 | ],
2493 | "source": [
2494 | "new_df.iloc[0]['tags']"
2495 | ]
2496 | },
2497 | {
2498 | "cell_type": "code",
2499 | "execution_count": 45,
2500 | "metadata": {},
2501 | "outputs": [
2502 | {
2503 | "name": "stderr",
2504 | "output_type": "stream",
2505 | "text": [
2506 | "C:\\Users\\dell admin\\AppData\\Local\\Temp\\ipykernel_4196\\3444714728.py:2: SettingWithCopyWarning: \n",
2507 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
2508 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
2509 | "\n",
2510 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
2511 | " new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())\n"
2512 | ]
2513 | }
2514 | ],
2515 | "source": [
2516 | "# Converting to lower case\n",
2517 | "new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())"
2518 | ]
2519 | },
2520 | {
2521 | "cell_type": "code",
2522 | "execution_count": 46,
2523 | "metadata": {},
2524 | "outputs": [
2525 | {
2526 | "data": {
2527 | "text/html": [
2528 | "\n",
2529 | "\n",
2542 | "
\n",
2543 | " \n",
2544 | " \n",
2545 | " | \n",
2546 | " movie_id | \n",
2547 | " title | \n",
2548 | " tags | \n",
2549 | " year | \n",
2550 | " vote_average | \n",
2551 | "
\n",
2552 | " \n",
2553 | " \n",
2554 | " \n",
2555 | " | 0 | \n",
2556 | " 19995 | \n",
2557 | " Avatar | \n",
2558 | " in the 22nd century, a paraplegic marine is di... | \n",
2559 | " 2009 | \n",
2560 | " 7.2 | \n",
2561 | "
\n",
2562 | " \n",
2563 | " | 1 | \n",
2564 | " 285 | \n",
2565 | " Pirates of the Caribbean: At World's End | \n",
2566 | " captain barbossa, long believed to be dead, ha... | \n",
2567 | " 2007 | \n",
2568 | " 6.9 | \n",
2569 | "
\n",
2570 | " \n",
2571 | " | 2 | \n",
2572 | " 206647 | \n",
2573 | " Spectre | \n",
2574 | " a cryptic message from bond’s past sends him o... | \n",
2575 | " 2015 | \n",
2576 | " 6.3 | \n",
2577 | "
\n",
2578 | " \n",
2579 | " | 3 | \n",
2580 | " 49026 | \n",
2581 | " The Dark Knight Rises | \n",
2582 | " following the death of district attorney harve... | \n",
2583 | " 2012 | \n",
2584 | " 7.6 | \n",
2585 | "
\n",
2586 | " \n",
2587 | " | 4 | \n",
2588 | " 49529 | \n",
2589 | " John Carter | \n",
2590 | " john carter is a war-weary, former military ca... | \n",
2591 | " 2012 | \n",
2592 | " 6.1 | \n",
2593 | "
\n",
2594 | " \n",
2595 | "
\n",
2596 | "
"
2597 | ],
2598 | "text/plain": [
2599 | " movie_id title \\\n",
2600 | "0 19995 Avatar \n",
2601 | "1 285 Pirates of the Caribbean: At World's End \n",
2602 | "2 206647 Spectre \n",
2603 | "3 49026 The Dark Knight Rises \n",
2604 | "4 49529 John Carter \n",
2605 | "\n",
2606 | " tags year vote_average \n",
2607 | "0 in the 22nd century, a paraplegic marine is di... 2009 7.2 \n",
2608 | "1 captain barbossa, long believed to be dead, ha... 2007 6.9 \n",
2609 | "2 a cryptic message from bond’s past sends him o... 2015 6.3 \n",
2610 | "3 following the death of district attorney harve... 2012 7.6 \n",
2611 | "4 john carter is a war-weary, former military ca... 2012 6.1 "
2612 | ]
2613 | },
2614 | "execution_count": 46,
2615 | "metadata": {},
2616 | "output_type": "execute_result"
2617 | }
2618 | ],
2619 | "source": [
2620 | "new_df.head()"
2621 | ]
2622 | },
2623 | {
2624 | "cell_type": "code",
2625 | "execution_count": 47,
2626 | "metadata": {},
2627 | "outputs": [
2628 | {
2629 | "data": {
2630 | "text/plain": [
2631 | "'in the 22nd century, a paraplegic marine is dispatched to the moon pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. action adventure fantasy sciencefiction cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d samworthington zoesaldana sigourneyweaver jamescameron'"
2632 | ]
2633 | },
2634 | "execution_count": 47,
2635 | "metadata": {},
2636 | "output_type": "execute_result"
2637 | }
2638 | ],
2639 | "source": [
2640 | "new_df.iloc[0]['tags']"
2641 | ]
2642 | },
2643 | {
2644 | "cell_type": "code",
2645 | "execution_count": 48,
2646 | "metadata": {},
2647 | "outputs": [
2648 | {
2649 | "name": "stdout",
2650 | "output_type": "stream",
2651 | "text": [
2652 | "Requirement already satisfied: nltk in f:\\rosp-project\\movie-recommender-system-using-machine-learning\\venv\\lib\\site-packages (3.9.1)\n",
2653 | "Requirement already satisfied: click in f:\\rosp-project\\movie-recommender-system-using-machine-learning\\venv\\lib\\site-packages (from nltk) (8.2.1)\n",
2654 | "Requirement already satisfied: joblib in f:\\rosp-project\\movie-recommender-system-using-machine-learning\\venv\\lib\\site-packages (from nltk) (1.5.2)\n",
2655 | "Requirement already satisfied: regex>=2021.8.3 in f:\\rosp-project\\movie-recommender-system-using-machine-learning\\venv\\lib\\site-packages (from nltk) (2025.9.1)\n",
2656 | "Requirement already satisfied: tqdm in f:\\rosp-project\\movie-recommender-system-using-machine-learning\\venv\\lib\\site-packages (from nltk) (4.67.1)\n",
2657 | "Requirement already satisfied: colorama in f:\\rosp-project\\movie-recommender-system-using-machine-learning\\venv\\lib\\site-packages (from click->nltk) (0.4.6)\n",
2658 | "Note: you may need to restart the kernel to use updated packages.\n"
2659 | ]
2660 | }
2661 | ],
2662 | "source": [
2663 | "%pip install nltk\n",
2664 | "\n",
2665 | "import nltk\n",
2666 | "from nltk.stem import PorterStemmer"
2667 | ]
2668 | },
2669 | {
2670 | "cell_type": "code",
2671 | "execution_count": 49,
2672 | "metadata": {},
2673 | "outputs": [],
2674 | "source": [
2675 | "ps = PorterStemmer()"
2676 | ]
2677 | },
2678 | {
2679 | "cell_type": "code",
2680 | "execution_count": 50,
2681 | "metadata": {},
2682 | "outputs": [],
2683 | "source": [
2684 | "def stems(text):\n",
2685 | " T = []\n",
2686 | " \n",
2687 | " for i in text.split():\n",
2688 | " T.append(ps.stem(i))\n",
2689 | " \n",
2690 | " return \" \".join(T)"
2691 | ]
2692 | },
2693 | {
2694 | "cell_type": "code",
2695 | "execution_count": 51,
2696 | "metadata": {},
2697 | "outputs": [
2698 | {
2699 | "name": "stderr",
2700 | "output_type": "stream",
2701 | "text": [
2702 | "C:\\Users\\dell admin\\AppData\\Local\\Temp\\ipykernel_4196\\3973021881.py:1: SettingWithCopyWarning: \n",
2703 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
2704 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
2705 | "\n",
2706 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
2707 | " new_df['tags'] = new_df['tags'].apply(stems)\n"
2708 | ]
2709 | }
2710 | ],
2711 | "source": [
2712 | "new_df['tags'] = new_df['tags'].apply(stems)"
2713 | ]
2714 | },
2715 | {
2716 | "cell_type": "code",
2717 | "execution_count": 52,
2718 | "metadata": {},
2719 | "outputs": [
2720 | {
2721 | "data": {
2722 | "text/plain": [
2723 | "'in the 22nd century, a parapleg marin is dispatch to the moon pandora on a uniqu mission, but becom torn between follow order and protect an alien civilization. action adventur fantasi sciencefict cultureclash futur spacewar spacecoloni societi spacetravel futurist romanc space alien tribe alienplanet cgi marin soldier battl loveaffair antiwar powerrel mindandsoul 3d samworthington zoesaldana sigourneyweav jamescameron'"
2724 | ]
2725 | },
2726 | "execution_count": 52,
2727 | "metadata": {},
2728 | "output_type": "execute_result"
2729 | }
2730 | ],
2731 | "source": [
2732 | "new_df.iloc[0]['tags']"
2733 | ]
2734 | },
2735 | {
2736 | "cell_type": "code",
2737 | "execution_count": 53,
2738 | "metadata": {},
2739 | "outputs": [],
2740 | "source": [
2741 | "from sklearn.feature_extraction.text import CountVectorizer\n",
2742 | "cv = CountVectorizer(max_features=5000,stop_words='english')\n"
2743 | ]
2744 | },
2745 | {
2746 | "cell_type": "code",
2747 | "execution_count": 54,
2748 | "metadata": {},
2749 | "outputs": [],
2750 | "source": [
2751 | "vector = cv.fit_transform(new_df['tags']).toarray()"
2752 | ]
2753 | },
2754 | {
2755 | "cell_type": "code",
2756 | "execution_count": 55,
2757 | "metadata": {},
2758 | "outputs": [
2759 | {
2760 | "data": {
2761 | "text/plain": [
2762 | "array([0, 0, 0, ..., 0, 0, 0], shape=(5000,))"
2763 | ]
2764 | },
2765 | "execution_count": 55,
2766 | "metadata": {},
2767 | "output_type": "execute_result"
2768 | }
2769 | ],
2770 | "source": [
2771 | "vector[0]"
2772 | ]
2773 | },
2774 | {
2775 | "cell_type": "code",
2776 | "execution_count": 56,
2777 | "metadata": {},
2778 | "outputs": [
2779 | {
2780 | "data": {
2781 | "text/plain": [
2782 | "(4805, 5000)"
2783 | ]
2784 | },
2785 | "execution_count": 56,
2786 | "metadata": {},
2787 | "output_type": "execute_result"
2788 | }
2789 | ],
2790 | "source": [
2791 | "vector.shape"
2792 | ]
2793 | },
2794 | {
2795 | "cell_type": "code",
2796 | "execution_count": 57,
2797 | "metadata": {},
2798 | "outputs": [
2799 | {
2800 | "data": {
2801 | "text/plain": [
2802 | "5000"
2803 | ]
2804 | },
2805 | "execution_count": 57,
2806 | "metadata": {},
2807 | "output_type": "execute_result"
2808 | }
2809 | ],
2810 | "source": [
2811 | "len(cv.get_feature_names_out())"
2812 | ]
2813 | },
2814 | {
2815 | "cell_type": "code",
2816 | "execution_count": 58,
2817 | "metadata": {},
2818 | "outputs": [],
2819 | "source": [
2820 | "from sklearn.metrics.pairwise import cosine_similarity"
2821 | ]
2822 | },
2823 | {
2824 | "cell_type": "code",
2825 | "execution_count": 59,
2826 | "metadata": {},
2827 | "outputs": [],
2828 | "source": [
2829 | "similarity = cosine_similarity(vector)"
2830 | ]
2831 | },
2832 | {
2833 | "cell_type": "code",
2834 | "execution_count": 60,
2835 | "metadata": {},
2836 | "outputs": [
2837 | {
2838 | "data": {
2839 | "text/plain": [
2840 | "(4805, 4805)"
2841 | ]
2842 | },
2843 | "execution_count": 60,
2844 | "metadata": {},
2845 | "output_type": "execute_result"
2846 | }
2847 | ],
2848 | "source": [
2849 | "similarity.shape"
2850 | ]
2851 | },
2852 | {
2853 | "cell_type": "code",
2854 | "execution_count": 61,
2855 | "metadata": {},
2856 | "outputs": [],
2857 | "source": [
2858 | "# similarity"
2859 | ]
2860 | },
2861 | {
2862 | "cell_type": "code",
2863 | "execution_count": 62,
2864 | "metadata": {},
2865 | "outputs": [
2866 | {
2867 | "data": {
2868 | "text/plain": [
2869 | "np.int64(744)"
2870 | ]
2871 | },
2872 | "execution_count": 62,
2873 | "metadata": {},
2874 | "output_type": "execute_result"
2875 | }
2876 | ],
2877 | "source": [
2878 | "new_df[new_df['title'] == 'The Lego Movie'].index[0]"
2879 | ]
2880 | },
2881 | {
2882 | "cell_type": "code",
2883 | "execution_count": 63,
2884 | "metadata": {},
2885 | "outputs": [],
2886 | "source": [
2887 | "def recommend(movie):\n",
2888 | " index = new_df[new_df['title'] == movie].index[0]\n",
2889 | " distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])\n",
2890 | " for i in distances[1:6]:\n",
2891 | " print(new_df.iloc[i[0]].title)"
2892 | ]
2893 | },
2894 | {
2895 | "cell_type": "code",
2896 | "execution_count": 64,
2897 | "metadata": {},
2898 | "outputs": [
2899 | {
2900 | "name": "stdout",
2901 | "output_type": "stream",
2902 | "text": [
2903 | "Spider-Man 3\n",
2904 | "Spider-Man\n",
2905 | "The Amazing Spider-Man\n",
2906 | "Iron Man 2\n",
2907 | "Superman\n"
2908 | ]
2909 | }
2910 | ],
2911 | "source": [
2912 | "recommend('Spider-Man 2')"
2913 | ]
2914 | },
2915 | {
2916 | "cell_type": "code",
2917 | "execution_count": 65,
2918 | "metadata": {},
2919 | "outputs": [],
2920 | "source": [
2921 | "import pickle"
2922 | ]
2923 | },
2924 | {
2925 | "cell_type": "code",
2926 | "execution_count": 66,
2927 | "metadata": {},
2928 | "outputs": [],
2929 | "source": [
2930 | "# AFTER\n",
2931 | "pickle.dump(new_df.to_dict(), open('artifacts/movie_dict.pkl','wb'))\n",
2932 | "pickle.dump(similarity,open('artifacts/similarity.pkl','wb'))"
2933 | ]
2934 | },
2935 | {
2936 | "cell_type": "code",
2937 | "execution_count": null,
2938 | "metadata": {},
2939 | "outputs": [],
2940 | "source": []
2941 | }
2942 | ],
2943 | "metadata": {
2944 | "kernelspec": {
2945 | "display_name": "venv",
2946 | "language": "python",
2947 | "name": "python3"
2948 | },
2949 | "language_info": {
2950 | "codemirror_mode": {
2951 | "name": "ipython",
2952 | "version": 3
2953 | },
2954 | "file_extension": ".py",
2955 | "mimetype": "text/x-python",
2956 | "name": "python",
2957 | "nbconvert_exporter": "python",
2958 | "pygments_lexer": "ipython3",
2959 | "version": "3.13.5"
2960 | }
2961 | },
2962 | "nbformat": 4,
2963 | "nbformat_minor": 4
2964 | }
2965 |
--------------------------------------------------------------------------------