"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"# evaluates an expression node or a string containing a Python literal or container display\nimport ast\ndata['genres'] = data['genres'].map(lambda x: ast.literal_eval(x))\ndata['cast'] = data['cast'].map(lambda x: ast.literal_eval(x))\ndata['crew'] = data['crew'].map(lambda x: ast.literal_eval(x))","execution_count":98,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def make_genresList(x):\n gen = []\n st = \" \"\n for i in x:\n if i.get('name') == 'Science Fiction':\n scifi = 'Sci-Fi'\n gen.append(scifi)\n else:\n gen.append(i.get('name'))\n if gen == []:\n return np.NaN\n else:\n return (st.join(gen))","execution_count":99,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['genres_list'] = data['genres'].map(lambda x: make_genresList(x))","execution_count":100,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['genres_list']","execution_count":101,"outputs":[{"output_type":"execute_result","execution_count":101,"data":{"text/plain":"0 Animation Comedy Family\n1 Adventure Fantasy Family\n2 Romance Comedy\n3 Comedy Drama Romance\n4 Comedy\n ... \n45440 Drama Action Romance\n45441 Drama\n45442 Action Drama Thriller\n45443 NaN\n45444 NaN\nName: genres_list, Length: 45445, dtype: object"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor1(x):\n casts = []\n for i in x:\n casts.append(i.get('name'))\n if casts == []:\n return np.NaN\n else:\n return (casts[0])","execution_count":102,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['actor_1_name'] = data['cast'].map(lambda x: get_actor1(x))","execution_count":103,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor2(x):\n casts = []\n for i in x:\n casts.append(i.get('name'))\n if casts == [] or len(casts)<=1:\n return np.NaN\n else:\n return (casts[1])","execution_count":104,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['actor_2_name'] = data['cast'].map(lambda x: get_actor2(x))","execution_count":105,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['actor_2_name']","execution_count":106,"outputs":[{"output_type":"execute_result","execution_count":106,"data":{"text/plain":"0 Tim Allen\n1 Jonathan Hyde\n2 Jack Lemmon\n3 Angela Bassett\n4 Diane Keaton\n ... \n45440 Uma Thurman\n45441 Perry Dizon\n45442 Adam Baldwin\n45443 Nathalie Lissenko\n45444 NaN\nName: actor_2_name, Length: 45445, dtype: object"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor3(x):\n casts = []\n for i in x:\n casts.append(i.get('name'))\n if casts == [] or len(casts)<=2:\n return np.NaN\n else:\n return (casts[2])","execution_count":107,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['actor_3_name'] = data['cast'].map(lambda x: get_actor3(x))","execution_count":108,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['actor_3_name']","execution_count":109,"outputs":[{"output_type":"execute_result","execution_count":109,"data":{"text/plain":"0 Don Rickles\n1 Kirsten Dunst\n2 Ann-Margret\n3 Loretta Devine\n4 Martin Short\n ... \n45440 David Morrissey\n45441 Hazel Orencio\n45442 Julie du Page\n45443 Pavel Pavlov\n45444 NaN\nName: actor_3_name, Length: 45445, dtype: object"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_directors(x):\n dt = []\n st = \" \"\n for i in x:\n if i.get('job') == 'Director':\n dt.append(i.get('name'))\n if dt == []:\n return np.NaN\n else:\n return (st.join(dt))","execution_count":110,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['director_name'] = data['crew'].map(lambda x: get_directors(x))","execution_count":111,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"data['director_name']","execution_count":112,"outputs":[{"output_type":"execute_result","execution_count":112,"data":{"text/plain":"0 John Lasseter\n1 Joe Johnston\n2 Howard Deutch\n3 Forest Whitaker\n4 Charles Shyer\n ... \n45440 John Irvin\n45441 Lav Diaz\n45442 Mark L. Lester\n45443 Yakov Protazanov\n45444 Daisy Asquith\nName: director_name, Length: 45445, dtype: object"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie = data.loc[:,['director_name','actor_1_name','actor_2_name','actor_3_name','genres_list','title']]","execution_count":113,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie","execution_count":114,"outputs":[{"output_type":"execute_result","execution_count":114,"data":{"text/plain":" director_name actor_1_name actor_2_name actor_3_name \\\n0 John Lasseter Tom Hanks Tim Allen Don Rickles \n1 Joe Johnston Robin Williams Jonathan Hyde Kirsten Dunst \n2 Howard Deutch Walter Matthau Jack Lemmon Ann-Margret \n3 Forest Whitaker Whitney Houston Angela Bassett Loretta Devine \n4 Charles Shyer Steve Martin Diane Keaton Martin Short \n... ... ... ... ... \n45440 John Irvin Patrick Bergin Uma Thurman David Morrissey \n45441 Lav Diaz Angel Aquino Perry Dizon Hazel Orencio \n45442 Mark L. Lester Erika Eleniak Adam Baldwin Julie du Page \n45443 Yakov Protazanov Iwan Mosschuchin Nathalie Lissenko Pavel Pavlov \n45444 Daisy Asquith NaN NaN NaN \n\n genres_list title \n0 Animation Comedy Family Toy Story \n1 Adventure Fantasy Family Jumanji \n2 Romance Comedy Grumpier Old Men \n3 Comedy Drama Romance Waiting to Exhale \n4 Comedy Father of the Bride Part II \n... ... ... \n45440 Drama Action Romance Robin Hood \n45441 Drama Century of Birthing \n45442 Action Drama Thriller Betrayal \n45443 NaN Satan Triumphant \n45444 NaN Queerama \n\n[45445 rows x 6 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres_list
\n
title
\n
\n \n \n
\n
0
\n
John Lasseter
\n
Tom Hanks
\n
Tim Allen
\n
Don Rickles
\n
Animation Comedy Family
\n
Toy Story
\n
\n
\n
1
\n
Joe Johnston
\n
Robin Williams
\n
Jonathan Hyde
\n
Kirsten Dunst
\n
Adventure Fantasy Family
\n
Jumanji
\n
\n
\n
2
\n
Howard Deutch
\n
Walter Matthau
\n
Jack Lemmon
\n
Ann-Margret
\n
Romance Comedy
\n
Grumpier Old Men
\n
\n
\n
3
\n
Forest Whitaker
\n
Whitney Houston
\n
Angela Bassett
\n
Loretta Devine
\n
Comedy Drama Romance
\n
Waiting to Exhale
\n
\n
\n
4
\n
Charles Shyer
\n
Steve Martin
\n
Diane Keaton
\n
Martin Short
\n
Comedy
\n
Father of the Bride Part II
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
45440
\n
John Irvin
\n
Patrick Bergin
\n
Uma Thurman
\n
David Morrissey
\n
Drama Action Romance
\n
Robin Hood
\n
\n
\n
45441
\n
Lav Diaz
\n
Angel Aquino
\n
Perry Dizon
\n
Hazel Orencio
\n
Drama
\n
Century of Birthing
\n
\n
\n
45442
\n
Mark L. Lester
\n
Erika Eleniak
\n
Adam Baldwin
\n
Julie du Page
\n
Action Drama Thriller
\n
Betrayal
\n
\n
\n
45443
\n
Yakov Protazanov
\n
Iwan Mosschuchin
\n
Nathalie Lissenko
\n
Pavel Pavlov
\n
NaN
\n
Satan Triumphant
\n
\n
\n
45444
\n
Daisy Asquith
\n
NaN
\n
NaN
\n
NaN
\n
NaN
\n
Queerama
\n
\n \n
\n
45445 rows × 6 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie.isna().sum()","execution_count":115,"outputs":[{"output_type":"execute_result","execution_count":115,"data":{"text/plain":"director_name 835\nactor_1_name 2354\nactor_2_name 3683\nactor_3_name 4593\ngenres_list 2384\ntitle 0\ndtype: int64"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie = movie.dropna(how='any')","execution_count":116,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie.isna().sum()","execution_count":117,"outputs":[{"output_type":"execute_result","execution_count":117,"data":{"text/plain":"director_name 0\nactor_1_name 0\nactor_2_name 0\nactor_3_name 0\ngenres_list 0\ntitle 0\ndtype: int64"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie = movie.rename(columns={'genres_list':'genres'})\nmovie = movie.rename(columns={'title':'movie_title'})","execution_count":118,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie['movie_title'] = movie['movie_title'].str.lower()","execution_count":119,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie['comb'] = movie['actor_1_name'] + ' ' + movie['actor_2_name'] + ' '+ movie['actor_3_name'] + ' '+ movie['director_name'] +' ' + movie['genres']","execution_count":120,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie","execution_count":121,"outputs":[{"output_type":"execute_result","execution_count":121,"data":{"text/plain":" director_name actor_1_name actor_2_name actor_3_name \\\n0 John Lasseter Tom Hanks Tim Allen Don Rickles \n1 Joe Johnston Robin Williams Jonathan Hyde Kirsten Dunst \n2 Howard Deutch Walter Matthau Jack Lemmon Ann-Margret \n3 Forest Whitaker Whitney Houston Angela Bassett Loretta Devine \n4 Charles Shyer Steve Martin Diane Keaton Martin Short \n... ... ... ... ... \n45438 Ben Rock Monty Bane Lucy Butler David Grammer \n45439 Aaron Osborne Lisa Boyle Kena Land Zaneta Polard \n45440 John Irvin Patrick Bergin Uma Thurman David Morrissey \n45441 Lav Diaz Angel Aquino Perry Dizon Hazel Orencio \n45442 Mark L. Lester Erika Eleniak Adam Baldwin Julie du Page \n\n genres movie_title \\\n0 Animation Comedy Family toy story \n1 Adventure Fantasy Family jumanji \n2 Romance Comedy grumpier old men \n3 Comedy Drama Romance waiting to exhale \n4 Comedy father of the bride part ii \n... ... ... \n45438 Horror the burkittsville 7 \n45439 Sci-Fi caged heat 3000 \n45440 Drama Action Romance robin hood \n45441 Drama century of birthing \n45442 Action Drama Thriller betrayal \n\n comb \n0 Tom Hanks Tim Allen Don Rickles John Lasseter Animation Comedy Family \n1 Robin Williams Jonathan Hyde Kirsten Dunst Joe Johnston Adventure Fanta... \n2 Walter Matthau Jack Lemmon Ann-Margret Howard Deutch Romance Comedy \n3 Whitney Houston Angela Bassett Loretta Devine Forest Whitaker Comedy Dr... \n4 Steve Martin Diane Keaton Martin Short Charles Shyer Comedy \n... ... \n45438 Monty Bane Lucy Butler David Grammer Ben Rock Horror \n45439 Lisa Boyle Kena Land Zaneta Polard Aaron Osborne Sci-Fi \n45440 Patrick Bergin Uma Thurman David Morrissey John Irvin Drama Action Romance \n45441 Angel Aquino Perry Dizon Hazel Orencio Lav Diaz Drama \n45442 Erika Eleniak Adam Baldwin Julie du Page Mark L. Lester Action Drama Th... \n\n[39201 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
John Lasseter
\n
Tom Hanks
\n
Tim Allen
\n
Don Rickles
\n
Animation Comedy Family
\n
toy story
\n
Tom Hanks Tim Allen Don Rickles John Lasseter Animation Comedy Family
\n
\n
\n
1
\n
Joe Johnston
\n
Robin Williams
\n
Jonathan Hyde
\n
Kirsten Dunst
\n
Adventure Fantasy Family
\n
jumanji
\n
Robin Williams Jonathan Hyde Kirsten Dunst Joe Johnston Adventure Fanta...
\n
\n
\n
2
\n
Howard Deutch
\n
Walter Matthau
\n
Jack Lemmon
\n
Ann-Margret
\n
Romance Comedy
\n
grumpier old men
\n
Walter Matthau Jack Lemmon Ann-Margret Howard Deutch Romance Comedy
\n
\n
\n
3
\n
Forest Whitaker
\n
Whitney Houston
\n
Angela Bassett
\n
Loretta Devine
\n
Comedy Drama Romance
\n
waiting to exhale
\n
Whitney Houston Angela Bassett Loretta Devine Forest Whitaker Comedy Dr...
\n
\n
\n
4
\n
Charles Shyer
\n
Steve Martin
\n
Diane Keaton
\n
Martin Short
\n
Comedy
\n
father of the bride part ii
\n
Steve Martin Diane Keaton Martin Short Charles Shyer Comedy
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
45438
\n
Ben Rock
\n
Monty Bane
\n
Lucy Butler
\n
David Grammer
\n
Horror
\n
the burkittsville 7
\n
Monty Bane Lucy Butler David Grammer Ben Rock Horror
\n
\n
\n
45439
\n
Aaron Osborne
\n
Lisa Boyle
\n
Kena Land
\n
Zaneta Polard
\n
Sci-Fi
\n
caged heat 3000
\n
Lisa Boyle Kena Land Zaneta Polard Aaron Osborne Sci-Fi
\n
\n
\n
45440
\n
John Irvin
\n
Patrick Bergin
\n
Uma Thurman
\n
David Morrissey
\n
Drama Action Romance
\n
robin hood
\n
Patrick Bergin Uma Thurman David Morrissey John Irvin Drama Action Romance
\n
\n
\n
45441
\n
Lav Diaz
\n
Angel Aquino
\n
Perry Dizon
\n
Hazel Orencio
\n
Drama
\n
century of birthing
\n
Angel Aquino Perry Dizon Hazel Orencio Lav Diaz Drama
\n
\n
\n
45442
\n
Mark L. Lester
\n
Erika Eleniak
\n
Adam Baldwin
\n
Julie du Page
\n
Action Drama Thriller
\n
betrayal
\n
Erika Eleniak Adam Baldwin Julie du Page Mark L. Lester Action Drama Th...
\n
\n \n
\n
39201 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie.drop_duplicates(subset =\"movie_title\", keep = 'last', inplace = True)","execution_count":122,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie","execution_count":123,"outputs":[{"output_type":"execute_result","execution_count":123,"data":{"text/plain":" director_name actor_1_name actor_2_name actor_3_name \\\n0 John Lasseter Tom Hanks Tim Allen Don Rickles \n1 Joe Johnston Robin Williams Jonathan Hyde Kirsten Dunst \n2 Howard Deutch Walter Matthau Jack Lemmon Ann-Margret \n3 Forest Whitaker Whitney Houston Angela Bassett Loretta Devine \n4 Charles Shyer Steve Martin Diane Keaton Martin Short \n... ... ... ... ... \n45438 Ben Rock Monty Bane Lucy Butler David Grammer \n45439 Aaron Osborne Lisa Boyle Kena Land Zaneta Polard \n45440 John Irvin Patrick Bergin Uma Thurman David Morrissey \n45441 Lav Diaz Angel Aquino Perry Dizon Hazel Orencio \n45442 Mark L. Lester Erika Eleniak Adam Baldwin Julie du Page \n\n genres movie_title \\\n0 Animation Comedy Family toy story \n1 Adventure Fantasy Family jumanji \n2 Romance Comedy grumpier old men \n3 Comedy Drama Romance waiting to exhale \n4 Comedy father of the bride part ii \n... ... ... \n45438 Horror the burkittsville 7 \n45439 Sci-Fi caged heat 3000 \n45440 Drama Action Romance robin hood \n45441 Drama century of birthing \n45442 Action Drama Thriller betrayal \n\n comb \n0 Tom Hanks Tim Allen Don Rickles John Lasseter Animation Comedy Family \n1 Robin Williams Jonathan Hyde Kirsten Dunst Joe Johnston Adventure Fanta... \n2 Walter Matthau Jack Lemmon Ann-Margret Howard Deutch Romance Comedy \n3 Whitney Houston Angela Bassett Loretta Devine Forest Whitaker Comedy Dr... \n4 Steve Martin Diane Keaton Martin Short Charles Shyer Comedy \n... ... \n45438 Monty Bane Lucy Butler David Grammer Ben Rock Horror \n45439 Lisa Boyle Kena Land Zaneta Polard Aaron Osborne Sci-Fi \n45440 Patrick Bergin Uma Thurman David Morrissey John Irvin Drama Action Romance \n45441 Angel Aquino Perry Dizon Hazel Orencio Lav Diaz Drama \n45442 Erika Eleniak Adam Baldwin Julie du Page Mark L. Lester Action Drama Th... \n\n[36341 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
John Lasseter
\n
Tom Hanks
\n
Tim Allen
\n
Don Rickles
\n
Animation Comedy Family
\n
toy story
\n
Tom Hanks Tim Allen Don Rickles John Lasseter Animation Comedy Family
\n
\n
\n
1
\n
Joe Johnston
\n
Robin Williams
\n
Jonathan Hyde
\n
Kirsten Dunst
\n
Adventure Fantasy Family
\n
jumanji
\n
Robin Williams Jonathan Hyde Kirsten Dunst Joe Johnston Adventure Fanta...
\n
\n
\n
2
\n
Howard Deutch
\n
Walter Matthau
\n
Jack Lemmon
\n
Ann-Margret
\n
Romance Comedy
\n
grumpier old men
\n
Walter Matthau Jack Lemmon Ann-Margret Howard Deutch Romance Comedy
\n
\n
\n
3
\n
Forest Whitaker
\n
Whitney Houston
\n
Angela Bassett
\n
Loretta Devine
\n
Comedy Drama Romance
\n
waiting to exhale
\n
Whitney Houston Angela Bassett Loretta Devine Forest Whitaker Comedy Dr...
\n
\n
\n
4
\n
Charles Shyer
\n
Steve Martin
\n
Diane Keaton
\n
Martin Short
\n
Comedy
\n
father of the bride part ii
\n
Steve Martin Diane Keaton Martin Short Charles Shyer Comedy
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
45438
\n
Ben Rock
\n
Monty Bane
\n
Lucy Butler
\n
David Grammer
\n
Horror
\n
the burkittsville 7
\n
Monty Bane Lucy Butler David Grammer Ben Rock Horror
\n
\n
\n
45439
\n
Aaron Osborne
\n
Lisa Boyle
\n
Kena Land
\n
Zaneta Polard
\n
Sci-Fi
\n
caged heat 3000
\n
Lisa Boyle Kena Land Zaneta Polard Aaron Osborne Sci-Fi
\n
\n
\n
45440
\n
John Irvin
\n
Patrick Bergin
\n
Uma Thurman
\n
David Morrissey
\n
Drama Action Romance
\n
robin hood
\n
Patrick Bergin Uma Thurman David Morrissey John Irvin Drama Action Romance
\n
\n
\n
45441
\n
Lav Diaz
\n
Angel Aquino
\n
Perry Dizon
\n
Hazel Orencio
\n
Drama
\n
century of birthing
\n
Angel Aquino Perry Dizon Hazel Orencio Lav Diaz Drama
\n
\n
\n
45442
\n
Mark L. Lester
\n
Erika Eleniak
\n
Adam Baldwin
\n
Julie du Page
\n
Action Drama Thriller
\n
betrayal
\n
Erika Eleniak Adam Baldwin Julie du Page Mark L. Lester Action Drama Th...
\n
\n \n
\n
36341 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"movie.to_csv('movie.csv',index=False)","execution_count":124,"outputs":[]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.7.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":4}
--------------------------------------------------------------------------------
/.ipynb_checkpoints/preprocessing 3.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"import pandas as pd\nimport numpy as np","execution_count":2,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Extracting features of 2018 movies from Wikipedia"},{"metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"cell_type":"code","source":"link = \"https://en.wikipedia.org/wiki/List_of_American_films_of_2018\"\ndf1 = pd.read_html(link, header=0)[2]\ndf2 = pd.read_html(link, header=0)[3]\ndf3 = pd.read_html(link, header=0)[4]\ndf4 = pd.read_html(link, header=0)[5]","execution_count":3,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df = df1.append(df2.append(df3.append(df4,ignore_index=True),ignore_index=True),ignore_index=True)","execution_count":4,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df","execution_count":5,"outputs":[{"output_type":"execute_result","execution_count":5,"data":{"text/plain":" Opening Opening.1 Title \\\n0 JANUARY 5 Insidious: The Last Key \n1 JANUARY 5 The Strange Ones \n2 JANUARY 5 Stratton \n3 JANUARY 10 Sweet Country \n4 JANUARY 12 The Commuter \n.. ... ... ... \n263 DECEMBER 25 Holmes & Watson \n264 DECEMBER 25 Vice \n265 DECEMBER 25 On the Basis of Sex \n266 DECEMBER 25 Destroyer \n267 DECEMBER 28 Black Mirror: Bandersnatch \n\n Production company \\\n0 Universal Pictures / Blumhouse Productions / S... \n1 Vertical Entertainment \n2 Momentum Pictures \n3 Samuel Goldwyn Films \n4 Lionsgate / StudioCanal / The Picture Company \n.. ... \n263 Columbia Pictures / Gary Sanchez Productions \n264 Annapurna Pictures / Plan B Entertainment \n265 Focus Features \n266 Annapurna Pictures \n267 Netflix \n\n Cast and crew Ref. \n0 Adam Robitel (director); Leigh Whannell (scree... [2] \n1 Lauren Wolkstein (director); Christopher Radcl... [3] \n2 Simon West (director); Duncan Falconer, Warren... [4] \n3 Warwick Thornton (director); David Tranter, St... [5] \n4 Jaume Collet-Serra (director); Byron Willinger... [6] \n.. ... ... \n263 Etan Cohen (director/screenplay); Will Ferrell... [162] \n264 Adam McKay (director/screenplay); Christian Ba... [136] \n265 Mimi Leder (director); Daniel Stiepleman (scre... [223] \n266 Karyn Kusama (director); Phil Hay, Matt Manfre... [256] \n267 David Slade (director); Charlie Brooker (scree... [257] \n\n[268 rows x 6 columns]","text/html":"
\n\n
\n \n
\n
\n
Opening
\n
Opening.1
\n
Title
\n
Production company
\n
Cast and crew
\n
Ref.
\n
\n \n \n
\n
0
\n
JANUARY
\n
5
\n
Insidious: The Last Key
\n
Universal Pictures / Blumhouse Productions / S...
\n
Adam Robitel (director); Leigh Whannell (scree...
\n
[2]
\n
\n
\n
1
\n
JANUARY
\n
5
\n
The Strange Ones
\n
Vertical Entertainment
\n
Lauren Wolkstein (director); Christopher Radcl...
\n
[3]
\n
\n
\n
2
\n
JANUARY
\n
5
\n
Stratton
\n
Momentum Pictures
\n
Simon West (director); Duncan Falconer, Warren...
\n
[4]
\n
\n
\n
3
\n
JANUARY
\n
10
\n
Sweet Country
\n
Samuel Goldwyn Films
\n
Warwick Thornton (director); David Tranter, St...
\n
[5]
\n
\n
\n
4
\n
JANUARY
\n
12
\n
The Commuter
\n
Lionsgate / StudioCanal / The Picture Company
\n
Jaume Collet-Serra (director); Byron Willinger...
\n
[6]
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
263
\n
DECEMBER
\n
25
\n
Holmes & Watson
\n
Columbia Pictures / Gary Sanchez Productions
\n
Etan Cohen (director/screenplay); Will Ferrell...
\n
[162]
\n
\n
\n
264
\n
DECEMBER
\n
25
\n
Vice
\n
Annapurna Pictures / Plan B Entertainment
\n
Adam McKay (director/screenplay); Christian Ba...
\n
[136]
\n
\n
\n
265
\n
DECEMBER
\n
25
\n
On the Basis of Sex
\n
Focus Features
\n
Mimi Leder (director); Daniel Stiepleman (scre...
\n
[223]
\n
\n
\n
266
\n
DECEMBER
\n
25
\n
Destroyer
\n
Annapurna Pictures
\n
Karyn Kusama (director); Phil Hay, Matt Manfre...
\n
[256]
\n
\n
\n
267
\n
DECEMBER
\n
28
\n
Black Mirror: Bandersnatch
\n
Netflix
\n
David Slade (director); Charlie Brooker (scree...
\n
[257]
\n
\n \n
\n
268 rows × 6 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"!pip install tmdbv3api","execution_count":6,"outputs":[{"output_type":"stream","text":"Collecting tmdbv3api\n Downloading tmdbv3api-1.6.1-py2.py3-none-any.whl (13 kB)\nRequirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from tmdbv3api) (2.23.0)\nRequirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (2.9)\nRequirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (3.0.4)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (1.24.3)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (2020.6.20)\nInstalling collected packages: tmdbv3api\nSuccessfully installed tmdbv3api-1.6.1\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"from tmdbv3api import TMDb\nimport json\nimport requests\ntmdb = TMDb()\ntmdb.api_key = 'YOUR_API_KEY'","execution_count":7,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from tmdbv3api import Movie\ntmdb_movie = Movie()\ndef get_genre(x):\n genres = []\n result = tmdb_movie.search(x)\n movie_id = result[0].id\n response = requests.get('https://api.themoviedb.org/3/movie/{}?api_key={}'.format(movie_id,tmdb.api_key))\n data_json = response.json()\n if data_json['genres']:\n genre_str = \" \" \n for i in range(0,len(data_json['genres'])):\n genres.append(data_json['genres'][i]['name'])\n return genre_str.join(genres)\n else:\n np.NaN","execution_count":8,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df['genres'] = df['Title'].map(lambda x: get_genre(str(x)))","execution_count":9,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df","execution_count":10,"outputs":[{"output_type":"execute_result","execution_count":10,"data":{"text/plain":" Opening Opening.1 Title \\\n0 JANUARY 5 Insidious: The Last Key \n1 JANUARY 5 The Strange Ones \n2 JANUARY 5 Stratton \n3 JANUARY 10 Sweet Country \n4 JANUARY 12 The Commuter \n.. ... ... ... \n263 DECEMBER 25 Holmes & Watson \n264 DECEMBER 25 Vice \n265 DECEMBER 25 On the Basis of Sex \n266 DECEMBER 25 Destroyer \n267 DECEMBER 28 Black Mirror: Bandersnatch \n\n Production company \\\n0 Universal Pictures / Blumhouse Productions / S... \n1 Vertical Entertainment \n2 Momentum Pictures \n3 Samuel Goldwyn Films \n4 Lionsgate / StudioCanal / The Picture Company \n.. ... \n263 Columbia Pictures / Gary Sanchez Productions \n264 Annapurna Pictures / Plan B Entertainment \n265 Focus Features \n266 Annapurna Pictures \n267 Netflix \n\n Cast and crew Ref. \\\n0 Adam Robitel (director); Leigh Whannell (scree... [2] \n1 Lauren Wolkstein (director); Christopher Radcl... [3] \n2 Simon West (director); Duncan Falconer, Warren... [4] \n3 Warwick Thornton (director); David Tranter, St... [5] \n4 Jaume Collet-Serra (director); Byron Willinger... [6] \n.. ... ... \n263 Etan Cohen (director/screenplay); Will Ferrell... [162] \n264 Adam McKay (director/screenplay); Christian Ba... [136] \n265 Mimi Leder (director); Daniel Stiepleman (scre... [223] \n266 Karyn Kusama (director); Phil Hay, Matt Manfre... [256] \n267 David Slade (director); Charlie Brooker (scree... [257] \n\n genres \n0 Mystery Horror Thriller \n1 Thriller Drama \n2 Action Thriller \n3 Drama History Western \n4 Action Thriller \n.. ... \n263 Mystery Adventure Comedy Crime \n264 Thriller Science Fiction Action Adventure \n265 Drama History \n266 Thriller Crime Drama Action \n267 Science Fiction Mystery Drama Thriller TV Movie \n\n[268 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
Opening
\n
Opening.1
\n
Title
\n
Production company
\n
Cast and crew
\n
Ref.
\n
genres
\n
\n \n \n
\n
0
\n
JANUARY
\n
5
\n
Insidious: The Last Key
\n
Universal Pictures / Blumhouse Productions / S...
\n
Adam Robitel (director); Leigh Whannell (scree...
\n
[2]
\n
Mystery Horror Thriller
\n
\n
\n
1
\n
JANUARY
\n
5
\n
The Strange Ones
\n
Vertical Entertainment
\n
Lauren Wolkstein (director); Christopher Radcl...
\n
[3]
\n
Thriller Drama
\n
\n
\n
2
\n
JANUARY
\n
5
\n
Stratton
\n
Momentum Pictures
\n
Simon West (director); Duncan Falconer, Warren...
\n
[4]
\n
Action Thriller
\n
\n
\n
3
\n
JANUARY
\n
10
\n
Sweet Country
\n
Samuel Goldwyn Films
\n
Warwick Thornton (director); David Tranter, St...
\n
[5]
\n
Drama History Western
\n
\n
\n
4
\n
JANUARY
\n
12
\n
The Commuter
\n
Lionsgate / StudioCanal / The Picture Company
\n
Jaume Collet-Serra (director); Byron Willinger...
\n
[6]
\n
Action Thriller
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
263
\n
DECEMBER
\n
25
\n
Holmes & Watson
\n
Columbia Pictures / Gary Sanchez Productions
\n
Etan Cohen (director/screenplay); Will Ferrell...
\n
[162]
\n
Mystery Adventure Comedy Crime
\n
\n
\n
264
\n
DECEMBER
\n
25
\n
Vice
\n
Annapurna Pictures / Plan B Entertainment
\n
Adam McKay (director/screenplay); Christian Ba...
\n
[136]
\n
Thriller Science Fiction Action Adventure
\n
\n
\n
265
\n
DECEMBER
\n
25
\n
On the Basis of Sex
\n
Focus Features
\n
Mimi Leder (director); Daniel Stiepleman (scre...
\n
[223]
\n
Drama History
\n
\n
\n
266
\n
DECEMBER
\n
25
\n
Destroyer
\n
Annapurna Pictures
\n
Karyn Kusama (director); Phil Hay, Matt Manfre...
\n
[256]
\n
Thriller Crime Drama Action
\n
\n
\n
267
\n
DECEMBER
\n
28
\n
Black Mirror: Bandersnatch
\n
Netflix
\n
David Slade (director); Charlie Brooker (scree...
\n
[257]
\n
Science Fiction Mystery Drama Thriller TV Movie
\n
\n \n
\n
268 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018 = df[['Title','Cast and crew','genres']]","execution_count":11,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018","execution_count":12,"outputs":[{"output_type":"execute_result","execution_count":12,"data":{"text/plain":" Title \\\n0 Insidious: The Last Key \n1 The Strange Ones \n2 Stratton \n3 Sweet Country \n4 The Commuter \n.. ... \n263 Holmes & Watson \n264 Vice \n265 On the Basis of Sex \n266 Destroyer \n267 Black Mirror: Bandersnatch \n\n Cast and crew \\\n0 Adam Robitel (director); Leigh Whannell (scree... \n1 Lauren Wolkstein (director); Christopher Radcl... \n2 Simon West (director); Duncan Falconer, Warren... \n3 Warwick Thornton (director); David Tranter, St... \n4 Jaume Collet-Serra (director); Byron Willinger... \n.. ... \n263 Etan Cohen (director/screenplay); Will Ferrell... \n264 Adam McKay (director/screenplay); Christian Ba... \n265 Mimi Leder (director); Daniel Stiepleman (scre... \n266 Karyn Kusama (director); Phil Hay, Matt Manfre... \n267 David Slade (director); Charlie Brooker (scree... \n\n genres \n0 Mystery Horror Thriller \n1 Thriller Drama \n2 Action Thriller \n3 Drama History Western \n4 Action Thriller \n.. ... \n263 Mystery Adventure Comedy Crime \n264 Thriller Science Fiction Action Adventure \n265 Drama History \n266 Thriller Crime Drama Action \n267 Science Fiction Mystery Drama Thriller TV Movie \n\n[268 rows x 3 columns]","text/html":"
\n\n
\n \n
\n
\n
Title
\n
Cast and crew
\n
genres
\n
\n \n \n
\n
0
\n
Insidious: The Last Key
\n
Adam Robitel (director); Leigh Whannell (scree...
\n
Mystery Horror Thriller
\n
\n
\n
1
\n
The Strange Ones
\n
Lauren Wolkstein (director); Christopher Radcl...
\n
Thriller Drama
\n
\n
\n
2
\n
Stratton
\n
Simon West (director); Duncan Falconer, Warren...
\n
Action Thriller
\n
\n
\n
3
\n
Sweet Country
\n
Warwick Thornton (director); David Tranter, St...
\n
Drama History Western
\n
\n
\n
4
\n
The Commuter
\n
Jaume Collet-Serra (director); Byron Willinger...
\n
Action Thriller
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
263
\n
Holmes & Watson
\n
Etan Cohen (director/screenplay); Will Ferrell...
\n
Mystery Adventure Comedy Crime
\n
\n
\n
264
\n
Vice
\n
Adam McKay (director/screenplay); Christian Ba...
\n
Thriller Science Fiction Action Adventure
\n
\n
\n
265
\n
On the Basis of Sex
\n
Mimi Leder (director); Daniel Stiepleman (scre...
\n
Drama History
\n
\n
\n
266
\n
Destroyer
\n
Karyn Kusama (director); Phil Hay, Matt Manfre...
\n
Thriller Crime Drama Action
\n
\n
\n
267
\n
Black Mirror: Bandersnatch
\n
David Slade (director); Charlie Brooker (scree...
\n
Science Fiction Mystery Drama Thriller TV Movie
\n
\n \n
\n
268 rows × 3 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_director(x):\n if \" (director)\" in x:\n return x.split(\" (director)\")[0]\n elif \" (directors)\" in x:\n return x.split(\" (directors)\")[0]\n else:\n return x.split(\" (director/screenplay)\")[0]","execution_count":13,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018['director_name'] = df_2018['Cast and crew'].map(lambda x: get_director(x))","execution_count":14,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor1(x):\n return ((x.split(\"screenplay); \")[-1]).split(\", \")[0])","execution_count":15,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018['actor_1_name'] = df_2018['Cast and crew'].map(lambda x: get_actor1(x))","execution_count":16,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor2(x):\n if len((x.split(\"screenplay); \")[-1]).split(\", \")) < 2:\n return np.NaN\n else:\n return ((x.split(\"screenplay); \")[-1]).split(\", \")[1])","execution_count":17,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018['actor_2_name'] = df_2018['Cast and crew'].map(lambda x: get_actor2(x))","execution_count":18,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor3(x):\n if len((x.split(\"screenplay); \")[-1]).split(\", \")) < 3:\n return np.NaN\n else:\n return ((x.split(\"screenplay); \")[-1]).split(\", \")[2])","execution_count":19,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018['actor_3_name'] = df_2018['Cast and crew'].map(lambda x: get_actor3(x))","execution_count":20,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2018","execution_count":21,"outputs":[{"output_type":"execute_result","execution_count":21,"data":{"text/plain":" Title \\\n0 Insidious: The Last Key \n1 The Strange Ones \n2 Stratton \n3 Sweet Country \n4 The Commuter \n.. ... \n263 Holmes & Watson \n264 Vice \n265 On the Basis of Sex \n266 Destroyer \n267 Black Mirror: Bandersnatch \n\n Cast and crew \\\n0 Adam Robitel (director); Leigh Whannell (scree... \n1 Lauren Wolkstein (director); Christopher Radcl... \n2 Simon West (director); Duncan Falconer, Warren... \n3 Warwick Thornton (director); David Tranter, St... \n4 Jaume Collet-Serra (director); Byron Willinger... \n.. ... \n263 Etan Cohen (director/screenplay); Will Ferrell... \n264 Adam McKay (director/screenplay); Christian Ba... \n265 Mimi Leder (director); Daniel Stiepleman (scre... \n266 Karyn Kusama (director); Phil Hay, Matt Manfre... \n267 David Slade (director); Charlie Brooker (scree... \n\n genres director_name \\\n0 Mystery Horror Thriller Adam Robitel \n1 Thriller Drama Lauren Wolkstein \n2 Action Thriller Simon West \n3 Drama History Western Warwick Thornton \n4 Action Thriller Jaume Collet-Serra \n.. ... ... \n263 Mystery Adventure Comedy Crime Etan Cohen \n264 Thriller Science Fiction Action Adventure Adam McKay \n265 Drama History Mimi Leder \n266 Thriller Crime Drama Action Karyn Kusama \n267 Science Fiction Mystery Drama Thriller TV Movie David Slade \n\n actor_1_name actor_2_name actor_3_name \n0 Lin Shaye Angus Sampson Leigh Whannell \n1 Alex Pettyfer James Freedson-Jackson Emily Althaus \n2 Dominic Cooper Austin Stowell Gemma Chan \n3 Bryan Brown Sam Neill NaN \n4 Liam Neeson Vera Farmiga Patrick Wilson \n.. ... ... ... \n263 Will Ferrell John C. Reilly Rebecca Hall \n264 Christian Bale Amy Adams Steve Carell \n265 Felicity Jones Armie Hammer Justin Theroux \n266 Nicole Kidman Sebastian Stan Toby Kebbell \n267 Fionn Whitehead Will Poulter Asim Chaudhry \n\n[268 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
Title
\n
Cast and crew
\n
genres
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
\n \n \n
\n
0
\n
Insidious: The Last Key
\n
Adam Robitel (director); Leigh Whannell (scree...
\n
Mystery Horror Thriller
\n
Adam Robitel
\n
Lin Shaye
\n
Angus Sampson
\n
Leigh Whannell
\n
\n
\n
1
\n
The Strange Ones
\n
Lauren Wolkstein (director); Christopher Radcl...
\n
Thriller Drama
\n
Lauren Wolkstein
\n
Alex Pettyfer
\n
James Freedson-Jackson
\n
Emily Althaus
\n
\n
\n
2
\n
Stratton
\n
Simon West (director); Duncan Falconer, Warren...
\n
Action Thriller
\n
Simon West
\n
Dominic Cooper
\n
Austin Stowell
\n
Gemma Chan
\n
\n
\n
3
\n
Sweet Country
\n
Warwick Thornton (director); David Tranter, St...
\n
Drama History Western
\n
Warwick Thornton
\n
Bryan Brown
\n
Sam Neill
\n
NaN
\n
\n
\n
4
\n
The Commuter
\n
Jaume Collet-Serra (director); Byron Willinger...
\n
Action Thriller
\n
Jaume Collet-Serra
\n
Liam Neeson
\n
Vera Farmiga
\n
Patrick Wilson
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
263
\n
Holmes & Watson
\n
Etan Cohen (director/screenplay); Will Ferrell...
\n
Mystery Adventure Comedy Crime
\n
Etan Cohen
\n
Will Ferrell
\n
John C. Reilly
\n
Rebecca Hall
\n
\n
\n
264
\n
Vice
\n
Adam McKay (director/screenplay); Christian Ba...
\n
Thriller Science Fiction Action Adventure
\n
Adam McKay
\n
Christian Bale
\n
Amy Adams
\n
Steve Carell
\n
\n
\n
265
\n
On the Basis of Sex
\n
Mimi Leder (director); Daniel Stiepleman (scre...
\n
Drama History
\n
Mimi Leder
\n
Felicity Jones
\n
Armie Hammer
\n
Justin Theroux
\n
\n
\n
266
\n
Destroyer
\n
Karyn Kusama (director); Phil Hay, Matt Manfre...
\n
Thriller Crime Drama Action
\n
Karyn Kusama
\n
Nicole Kidman
\n
Sebastian Stan
\n
Toby Kebbell
\n
\n
\n
267
\n
Black Mirror: Bandersnatch
\n
David Slade (director); Charlie Brooker (scree...
\n
Science Fiction Mystery Drama Thriller TV Movie
\n
David Slade
\n
Fionn Whitehead
\n
Will Poulter
\n
Asim Chaudhry
\n
\n \n
\n
268 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndf_2018 = df_2018.rename(columns={'Title':'movie_title'})","execution_count":22,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df18 = df_2018.loc[:,['director_name','actor_1_name','actor_2_name','actor_3_name','genres','movie_title']]","execution_count":23,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df18","execution_count":24,"outputs":[{"output_type":"execute_result","execution_count":24,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 Adam Robitel Lin Shaye Angus Sampson \n1 Lauren Wolkstein Alex Pettyfer James Freedson-Jackson \n2 Simon West Dominic Cooper Austin Stowell \n3 Warwick Thornton Bryan Brown Sam Neill \n4 Jaume Collet-Serra Liam Neeson Vera Farmiga \n.. ... ... ... \n263 Etan Cohen Will Ferrell John C. Reilly \n264 Adam McKay Christian Bale Amy Adams \n265 Mimi Leder Felicity Jones Armie Hammer \n266 Karyn Kusama Nicole Kidman Sebastian Stan \n267 David Slade Fionn Whitehead Will Poulter \n\n actor_3_name genres \\\n0 Leigh Whannell Mystery Horror Thriller \n1 Emily Althaus Thriller Drama \n2 Gemma Chan Action Thriller \n3 NaN Drama History Western \n4 Patrick Wilson Action Thriller \n.. ... ... \n263 Rebecca Hall Mystery Adventure Comedy Crime \n264 Steve Carell Thriller Science Fiction Action Adventure \n265 Justin Theroux Drama History \n266 Toby Kebbell Thriller Crime Drama Action \n267 Asim Chaudhry Science Fiction Mystery Drama Thriller TV Movie \n\n movie_title \n0 Insidious: The Last Key \n1 The Strange Ones \n2 Stratton \n3 Sweet Country \n4 The Commuter \n.. ... \n263 Holmes & Watson \n264 Vice \n265 On the Basis of Sex \n266 Destroyer \n267 Black Mirror: Bandersnatch \n\n[268 rows x 6 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
\n \n \n
\n
0
\n
Adam Robitel
\n
Lin Shaye
\n
Angus Sampson
\n
Leigh Whannell
\n
Mystery Horror Thriller
\n
Insidious: The Last Key
\n
\n
\n
1
\n
Lauren Wolkstein
\n
Alex Pettyfer
\n
James Freedson-Jackson
\n
Emily Althaus
\n
Thriller Drama
\n
The Strange Ones
\n
\n
\n
2
\n
Simon West
\n
Dominic Cooper
\n
Austin Stowell
\n
Gemma Chan
\n
Action Thriller
\n
Stratton
\n
\n
\n
3
\n
Warwick Thornton
\n
Bryan Brown
\n
Sam Neill
\n
NaN
\n
Drama History Western
\n
Sweet Country
\n
\n
\n
4
\n
Jaume Collet-Serra
\n
Liam Neeson
\n
Vera Farmiga
\n
Patrick Wilson
\n
Action Thriller
\n
The Commuter
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
263
\n
Etan Cohen
\n
Will Ferrell
\n
John C. Reilly
\n
Rebecca Hall
\n
Mystery Adventure Comedy Crime
\n
Holmes & Watson
\n
\n
\n
264
\n
Adam McKay
\n
Christian Bale
\n
Amy Adams
\n
Steve Carell
\n
Thriller Science Fiction Action Adventure
\n
Vice
\n
\n
\n
265
\n
Mimi Leder
\n
Felicity Jones
\n
Armie Hammer
\n
Justin Theroux
\n
Drama History
\n
On the Basis of Sex
\n
\n
\n
266
\n
Karyn Kusama
\n
Nicole Kidman
\n
Sebastian Stan
\n
Toby Kebbell
\n
Thriller Crime Drama Action
\n
Destroyer
\n
\n
\n
267
\n
David Slade
\n
Fionn Whitehead
\n
Will Poulter
\n
Asim Chaudhry
\n
Science Fiction Mystery Drama Thriller TV Movie
\n
Black Mirror: Bandersnatch
\n
\n \n
\n
268 rows × 6 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df18['actor_2_name'] = new_df18['actor_2_name'].replace(np.nan, 'unknown')\nnew_df18['actor_3_name'] = new_df18['actor_3_name'].replace(np.nan, 'unknown')","execution_count":25,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df18['movie_title'] = new_df18['movie_title'].str.lower()","execution_count":26,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df18['comb'] = new_df18['actor_1_name'] + ' ' + new_df18['actor_2_name'] + ' '+ new_df18['actor_3_name'] + ' '+ new_df18['director_name'] +' ' + new_df18['genres']","execution_count":27,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df18","execution_count":28,"outputs":[{"output_type":"execute_result","execution_count":28,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 Adam Robitel Lin Shaye Angus Sampson \n1 Lauren Wolkstein Alex Pettyfer James Freedson-Jackson \n2 Simon West Dominic Cooper Austin Stowell \n3 Warwick Thornton Bryan Brown Sam Neill \n4 Jaume Collet-Serra Liam Neeson Vera Farmiga \n.. ... ... ... \n263 Etan Cohen Will Ferrell John C. Reilly \n264 Adam McKay Christian Bale Amy Adams \n265 Mimi Leder Felicity Jones Armie Hammer \n266 Karyn Kusama Nicole Kidman Sebastian Stan \n267 David Slade Fionn Whitehead Will Poulter \n\n actor_3_name genres \\\n0 Leigh Whannell Mystery Horror Thriller \n1 Emily Althaus Thriller Drama \n2 Gemma Chan Action Thriller \n3 unknown Drama History Western \n4 Patrick Wilson Action Thriller \n.. ... ... \n263 Rebecca Hall Mystery Adventure Comedy Crime \n264 Steve Carell Thriller Science Fiction Action Adventure \n265 Justin Theroux Drama History \n266 Toby Kebbell Thriller Crime Drama Action \n267 Asim Chaudhry Science Fiction Mystery Drama Thriller TV Movie \n\n movie_title \\\n0 insidious: the last key \n1 the strange ones \n2 stratton \n3 sweet country \n4 the commuter \n.. ... \n263 holmes & watson \n264 vice \n265 on the basis of sex \n266 destroyer \n267 black mirror: bandersnatch \n\n comb \n0 Lin Shaye Angus Sampson Leigh Whannell Adam Ro... \n1 Alex Pettyfer James Freedson-Jackson Emily Alt... \n2 Dominic Cooper Austin Stowell Gemma Chan Simon... \n3 Bryan Brown Sam Neill unknown Warwick Thornton... \n4 Liam Neeson Vera Farmiga Patrick Wilson Jaume ... \n.. ... \n263 Will Ferrell John C. Reilly Rebecca Hall Etan ... \n264 Christian Bale Amy Adams Steve Carell Adam McK... \n265 Felicity Jones Armie Hammer Justin Theroux Mim... \n266 Nicole Kidman Sebastian Stan Toby Kebbell Kary... \n267 Fionn Whitehead Will Poulter Asim Chaudhry Dav... \n\n[268 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
Adam Robitel
\n
Lin Shaye
\n
Angus Sampson
\n
Leigh Whannell
\n
Mystery Horror Thriller
\n
insidious: the last key
\n
Lin Shaye Angus Sampson Leigh Whannell Adam Ro...
\n
\n
\n
1
\n
Lauren Wolkstein
\n
Alex Pettyfer
\n
James Freedson-Jackson
\n
Emily Althaus
\n
Thriller Drama
\n
the strange ones
\n
Alex Pettyfer James Freedson-Jackson Emily Alt...
\n
\n
\n
2
\n
Simon West
\n
Dominic Cooper
\n
Austin Stowell
\n
Gemma Chan
\n
Action Thriller
\n
stratton
\n
Dominic Cooper Austin Stowell Gemma Chan Simon...
\n
\n
\n
3
\n
Warwick Thornton
\n
Bryan Brown
\n
Sam Neill
\n
unknown
\n
Drama History Western
\n
sweet country
\n
Bryan Brown Sam Neill unknown Warwick Thornton...
\n
\n
\n
4
\n
Jaume Collet-Serra
\n
Liam Neeson
\n
Vera Farmiga
\n
Patrick Wilson
\n
Action Thriller
\n
the commuter
\n
Liam Neeson Vera Farmiga Patrick Wilson Jaume ...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
263
\n
Etan Cohen
\n
Will Ferrell
\n
John C. Reilly
\n
Rebecca Hall
\n
Mystery Adventure Comedy Crime
\n
holmes & watson
\n
Will Ferrell John C. Reilly Rebecca Hall Etan ...
\n
\n
\n
264
\n
Adam McKay
\n
Christian Bale
\n
Amy Adams
\n
Steve Carell
\n
Thriller Science Fiction Action Adventure
\n
vice
\n
Christian Bale Amy Adams Steve Carell Adam McK...
\n
\n
\n
265
\n
Mimi Leder
\n
Felicity Jones
\n
Armie Hammer
\n
Justin Theroux
\n
Drama History
\n
on the basis of sex
\n
Felicity Jones Armie Hammer Justin Theroux Mim...
\n
\n
\n
266
\n
Karyn Kusama
\n
Nicole Kidman
\n
Sebastian Stan
\n
Toby Kebbell
\n
Thriller Crime Drama Action
\n
destroyer
\n
Nicole Kidman Sebastian Stan Toby Kebbell Kary...
\n
\n
\n
267
\n
David Slade
\n
Fionn Whitehead
\n
Will Poulter
\n
Asim Chaudhry
\n
Science Fiction Mystery Drama Thriller TV Movie
\n
black mirror: bandersnatch
\n
Fionn Whitehead Will Poulter Asim Chaudhry Dav...
\n
\n \n
\n
268 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":"## Extracting features of 2019 movies from Wikipedia"},{"metadata":{"trusted":true},"cell_type":"code","source":"link = \"https://en.wikipedia.org/wiki/List_of_American_films_of_2019\"\ndf1 = pd.read_html(link, header=0)[3]\ndf2 = pd.read_html(link, header=0)[4]\ndf3 = pd.read_html(link, header=0)[5]\ndf4 = pd.read_html(link, header=0)[6]","execution_count":29,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df = df1.append(df2.append(df3.append(df4,ignore_index=True),ignore_index=True),ignore_index=True)","execution_count":30,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df","execution_count":31,"outputs":[{"output_type":"execute_result","execution_count":31,"data":{"text/plain":" Opening Opening.1 Title \\\n0 JANUARY 4 Escape Room \n1 JANUARY 4 Rust Creek \n2 JANUARY 4 American Hangman \n3 JANUARY 11 A Dog's Way Home \n4 JANUARY 11 The Upside \n.. ... ... ... \n236 DECEMBER 25 Little Women \n237 DECEMBER 25 1917 \n238 DECEMBER 25 Just Mercy \n239 DECEMBER 27 Clemency \n240 DECEMBER 27 Apparition \n\n Production company \\\n0 Columbia Pictures \n1 IFC Films \n2 Hangman Justice Productions \n3 Columbia Pictures \n4 STX Entertainment \n.. ... \n236 Columbia Pictures / Regency Enterprises \n237 Universal Pictures / DreamWorks Pictures / Ent... \n238 Warner Bros. Pictures / Participant Media \n239 Neon \n240 Vertical Pictures \n\n Cast and crew Ref. \n0 Adam Robitel (director); Bragi F. Schut, Maria... [2] \n1 Jen McGowan (director); Julie Lipson (screenpl... [3] \n2 Wilson Coneybeare (director/screenplay); Donal... [4] \n3 Charles Martin Smith (director); W. Bruce Came... [5] \n4 Neil Burger (director); Jon Hartmere (screenpl... [6] \n.. ... ... \n236 Greta Gerwig (director/screenplay); Saoirse Ro... [222] \n237 Sam Mendes (director/screenplay); Krysty Wilso... [223] \n238 Destin Daniel Cretton (director/screenplay), A... [224] \n239 Chinonye Chukwu (director/screenplay); Alfre W... [225] \n240 Waymon Boone (director/screenplay); Mena Suvar... [226] \n\n[241 rows x 6 columns]","text/html":"
\n\n
\n \n
\n
\n
Opening
\n
Opening.1
\n
Title
\n
Production company
\n
Cast and crew
\n
Ref.
\n
\n \n \n
\n
0
\n
JANUARY
\n
4
\n
Escape Room
\n
Columbia Pictures
\n
Adam Robitel (director); Bragi F. Schut, Maria...
\n
[2]
\n
\n
\n
1
\n
JANUARY
\n
4
\n
Rust Creek
\n
IFC Films
\n
Jen McGowan (director); Julie Lipson (screenpl...
\n
[3]
\n
\n
\n
2
\n
JANUARY
\n
4
\n
American Hangman
\n
Hangman Justice Productions
\n
Wilson Coneybeare (director/screenplay); Donal...
\n
[4]
\n
\n
\n
3
\n
JANUARY
\n
11
\n
A Dog's Way Home
\n
Columbia Pictures
\n
Charles Martin Smith (director); W. Bruce Came...
\n
[5]
\n
\n
\n
4
\n
JANUARY
\n
11
\n
The Upside
\n
STX Entertainment
\n
Neil Burger (director); Jon Hartmere (screenpl...
\n
[6]
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
236
\n
DECEMBER
\n
25
\n
Little Women
\n
Columbia Pictures / Regency Enterprises
\n
Greta Gerwig (director/screenplay); Saoirse Ro...
\n
[222]
\n
\n
\n
237
\n
DECEMBER
\n
25
\n
1917
\n
Universal Pictures / DreamWorks Pictures / Ent...
\n
Sam Mendes (director/screenplay); Krysty Wilso...
\n
[223]
\n
\n
\n
238
\n
DECEMBER
\n
25
\n
Just Mercy
\n
Warner Bros. Pictures / Participant Media
\n
Destin Daniel Cretton (director/screenplay), A...
\n
[224]
\n
\n
\n
239
\n
DECEMBER
\n
27
\n
Clemency
\n
Neon
\n
Chinonye Chukwu (director/screenplay); Alfre W...
\n
[225]
\n
\n
\n
240
\n
DECEMBER
\n
27
\n
Apparition
\n
Vertical Pictures
\n
Waymon Boone (director/screenplay); Mena Suvar...
\n
[226]
\n
\n \n
\n
241 rows × 6 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df['genres'] = df['Title'].map(lambda x: get_genre(str(x)))","execution_count":32,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019 = df[['Title','Cast and crew','genres']]","execution_count":33,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019","execution_count":34,"outputs":[{"output_type":"execute_result","execution_count":34,"data":{"text/plain":" Title Cast and crew \\\n0 Escape Room Adam Robitel (director); Bragi F. Schut, Maria... \n1 Rust Creek Jen McGowan (director); Julie Lipson (screenpl... \n2 American Hangman Wilson Coneybeare (director/screenplay); Donal... \n3 A Dog's Way Home Charles Martin Smith (director); W. Bruce Came... \n4 The Upside Neil Burger (director); Jon Hartmere (screenpl... \n.. ... ... \n236 Little Women Greta Gerwig (director/screenplay); Saoirse Ro... \n237 1917 Sam Mendes (director/screenplay); Krysty Wilso... \n238 Just Mercy Destin Daniel Cretton (director/screenplay), A... \n239 Clemency Chinonye Chukwu (director/screenplay); Alfre W... \n240 Apparition Waymon Boone (director/screenplay); Mena Suvar... \n\n genres \n0 Thriller Action Mystery Horror \n1 Thriller Drama \n2 Thriller \n3 Drama Adventure Family \n4 Comedy Drama \n.. ... \n236 Drama Romance \n237 War Drama Action History \n238 Drama Crime \n239 Drama \n240 Horror Thriller \n\n[241 rows x 3 columns]","text/html":"
\n\n
\n \n
\n
\n
Title
\n
Cast and crew
\n
genres
\n
\n \n \n
\n
0
\n
Escape Room
\n
Adam Robitel (director); Bragi F. Schut, Maria...
\n
Thriller Action Mystery Horror
\n
\n
\n
1
\n
Rust Creek
\n
Jen McGowan (director); Julie Lipson (screenpl...
\n
Thriller Drama
\n
\n
\n
2
\n
American Hangman
\n
Wilson Coneybeare (director/screenplay); Donal...
\n
Thriller
\n
\n
\n
3
\n
A Dog's Way Home
\n
Charles Martin Smith (director); W. Bruce Came...
\n
Drama Adventure Family
\n
\n
\n
4
\n
The Upside
\n
Neil Burger (director); Jon Hartmere (screenpl...
\n
Comedy Drama
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
236
\n
Little Women
\n
Greta Gerwig (director/screenplay); Saoirse Ro...
\n
Drama Romance
\n
\n
\n
237
\n
1917
\n
Sam Mendes (director/screenplay); Krysty Wilso...
\n
War Drama Action History
\n
\n
\n
238
\n
Just Mercy
\n
Destin Daniel Cretton (director/screenplay), A...
\n
Drama Crime
\n
\n
\n
239
\n
Clemency
\n
Chinonye Chukwu (director/screenplay); Alfre W...
\n
Drama
\n
\n
\n
240
\n
Apparition
\n
Waymon Boone (director/screenplay); Mena Suvar...
\n
Horror Thriller
\n
\n \n
\n
241 rows × 3 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019['director_name'] = df_2019['Cast and crew'].map(lambda x: get_director(str(x)))","execution_count":35,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019['actor_1_name'] = df_2019['Cast and crew'].map(lambda x: get_actor1(x))","execution_count":36,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019['actor_2_name'] = df_2019['Cast and crew'].map(lambda x: get_actor2(x))","execution_count":37,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019['actor_3_name'] = df_2019['Cast and crew'].map(lambda x: get_actor3(x))","execution_count":38,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2019 = df_2019.rename(columns={'Title':'movie_title'})","execution_count":39,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df19 = df_2019.loc[:,['director_name','actor_1_name','actor_2_name','actor_3_name','genres','movie_title']]","execution_count":40,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"\nnew_df19['actor_2_name'] = new_df19['actor_2_name'].replace(np.nan, 'unknown')\nnew_df19['actor_3_name'] = new_df19['actor_3_name'].replace(np.nan, 'unknown')","execution_count":41,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df19['movie_title'] = new_df19['movie_title'].str.lower()","execution_count":42,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df19['comb'] = new_df19['actor_1_name'] + ' ' + new_df19['actor_2_name'] + ' '+ new_df19['actor_3_name'] + ' '+ new_df19['director_name'] +' ' + new_df19['genres']","execution_count":43,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df19","execution_count":44,"outputs":[{"output_type":"execute_result","execution_count":44,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 Adam Robitel Taylor Russell Logan Miller \n1 Jen McGowan Hermione Corfield Jay Paulson \n2 Wilson Coneybeare Donald Sutherland Vincent Kartheiser \n3 Charles Martin Smith Bryce Dallas Howard Edward James Olmos \n4 Neil Burger Bryan Cranston Kevin Hart \n.. ... ... ... \n236 Greta Gerwig Saoirse Ronan Emma Watson \n237 Sam Mendes George MacKay Dean-Charles Chapman \n238 Destin Daniel Cretton Michael B. Jordan Jamie Foxx \n239 Chinonye Chukwu Alfre Woodard Wendell Pierce \n240 Waymon Boone Mena Suvari Kevin Pollak \n\n actor_3_name genres movie_title \\\n0 Deborah Ann Woll Thriller Action Mystery Horror escape room \n1 Sean O'Bryan Thriller Drama rust creek \n2 Oliver Dennis Thriller american hangman \n3 Alexandra Shipp Drama Adventure Family a dog's way home \n4 Nicole Kidman Comedy Drama the upside \n.. ... ... ... \n236 Florence Pugh Drama Romance little women \n237 Mark Strong War Drama Action History 1917 \n238 Brie Larson Drama Crime just mercy \n239 Aldis Hodge Drama clemency \n240 unknown Horror Thriller apparition \n\n comb \n0 Taylor Russell Logan Miller Deborah Ann Woll A... \n1 Hermione Corfield Jay Paulson Sean O'Bryan Jen... \n2 Donald Sutherland Vincent Kartheiser Oliver De... \n3 Bryce Dallas Howard Edward James Olmos Alexand... \n4 Bryan Cranston Kevin Hart Nicole Kidman Neil B... \n.. ... \n236 Saoirse Ronan Emma Watson Florence Pugh Greta ... \n237 George MacKay Dean-Charles Chapman Mark Strong... \n238 Michael B. Jordan Jamie Foxx Brie Larson Desti... \n239 Alfre Woodard Wendell Pierce Aldis Hodge Chino... \n240 Mena Suvari Kevin Pollak unknown Waymon Boone ... \n\n[241 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
Adam Robitel
\n
Taylor Russell
\n
Logan Miller
\n
Deborah Ann Woll
\n
Thriller Action Mystery Horror
\n
escape room
\n
Taylor Russell Logan Miller Deborah Ann Woll A...
\n
\n
\n
1
\n
Jen McGowan
\n
Hermione Corfield
\n
Jay Paulson
\n
Sean O'Bryan
\n
Thriller Drama
\n
rust creek
\n
Hermione Corfield Jay Paulson Sean O'Bryan Jen...
\n
\n
\n
2
\n
Wilson Coneybeare
\n
Donald Sutherland
\n
Vincent Kartheiser
\n
Oliver Dennis
\n
Thriller
\n
american hangman
\n
Donald Sutherland Vincent Kartheiser Oliver De...
\n
\n
\n
3
\n
Charles Martin Smith
\n
Bryce Dallas Howard
\n
Edward James Olmos
\n
Alexandra Shipp
\n
Drama Adventure Family
\n
a dog's way home
\n
Bryce Dallas Howard Edward James Olmos Alexand...
\n
\n
\n
4
\n
Neil Burger
\n
Bryan Cranston
\n
Kevin Hart
\n
Nicole Kidman
\n
Comedy Drama
\n
the upside
\n
Bryan Cranston Kevin Hart Nicole Kidman Neil B...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
236
\n
Greta Gerwig
\n
Saoirse Ronan
\n
Emma Watson
\n
Florence Pugh
\n
Drama Romance
\n
little women
\n
Saoirse Ronan Emma Watson Florence Pugh Greta ...
\n
\n
\n
237
\n
Sam Mendes
\n
George MacKay
\n
Dean-Charles Chapman
\n
Mark Strong
\n
War Drama Action History
\n
1917
\n
George MacKay Dean-Charles Chapman Mark Strong...
\n
\n
\n
238
\n
Destin Daniel Cretton
\n
Michael B. Jordan
\n
Jamie Foxx
\n
Brie Larson
\n
Drama Crime
\n
just mercy
\n
Michael B. Jordan Jamie Foxx Brie Larson Desti...
\n
\n
\n
239
\n
Chinonye Chukwu
\n
Alfre Woodard
\n
Wendell Pierce
\n
Aldis Hodge
\n
Drama
\n
clemency
\n
Alfre Woodard Wendell Pierce Aldis Hodge Chino...
\n
\n
\n
240
\n
Waymon Boone
\n
Mena Suvari
\n
Kevin Pollak
\n
unknown
\n
Horror Thriller
\n
apparition
\n
Mena Suvari Kevin Pollak unknown Waymon Boone ...
\n
\n \n
\n
241 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"my_df = new_df18.append(new_df19,ignore_index=True)","execution_count":45,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"my_df","execution_count":46,"outputs":[{"output_type":"execute_result","execution_count":46,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 Adam Robitel Lin Shaye Angus Sampson \n1 Lauren Wolkstein Alex Pettyfer James Freedson-Jackson \n2 Simon West Dominic Cooper Austin Stowell \n3 Warwick Thornton Bryan Brown Sam Neill \n4 Jaume Collet-Serra Liam Neeson Vera Farmiga \n.. ... ... ... \n504 Greta Gerwig Saoirse Ronan Emma Watson \n505 Sam Mendes George MacKay Dean-Charles Chapman \n506 Destin Daniel Cretton Michael B. Jordan Jamie Foxx \n507 Chinonye Chukwu Alfre Woodard Wendell Pierce \n508 Waymon Boone Mena Suvari Kevin Pollak \n\n actor_3_name genres movie_title \\\n0 Leigh Whannell Mystery Horror Thriller insidious: the last key \n1 Emily Althaus Thriller Drama the strange ones \n2 Gemma Chan Action Thriller stratton \n3 unknown Drama History Western sweet country \n4 Patrick Wilson Action Thriller the commuter \n.. ... ... ... \n504 Florence Pugh Drama Romance little women \n505 Mark Strong War Drama Action History 1917 \n506 Brie Larson Drama Crime just mercy \n507 Aldis Hodge Drama clemency \n508 unknown Horror Thriller apparition \n\n comb \n0 Lin Shaye Angus Sampson Leigh Whannell Adam Ro... \n1 Alex Pettyfer James Freedson-Jackson Emily Alt... \n2 Dominic Cooper Austin Stowell Gemma Chan Simon... \n3 Bryan Brown Sam Neill unknown Warwick Thornton... \n4 Liam Neeson Vera Farmiga Patrick Wilson Jaume ... \n.. ... \n504 Saoirse Ronan Emma Watson Florence Pugh Greta ... \n505 George MacKay Dean-Charles Chapman Mark Strong... \n506 Michael B. Jordan Jamie Foxx Brie Larson Desti... \n507 Alfre Woodard Wendell Pierce Aldis Hodge Chino... \n508 Mena Suvari Kevin Pollak unknown Waymon Boone ... \n\n[509 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
Adam Robitel
\n
Lin Shaye
\n
Angus Sampson
\n
Leigh Whannell
\n
Mystery Horror Thriller
\n
insidious: the last key
\n
Lin Shaye Angus Sampson Leigh Whannell Adam Ro...
\n
\n
\n
1
\n
Lauren Wolkstein
\n
Alex Pettyfer
\n
James Freedson-Jackson
\n
Emily Althaus
\n
Thriller Drama
\n
the strange ones
\n
Alex Pettyfer James Freedson-Jackson Emily Alt...
\n
\n
\n
2
\n
Simon West
\n
Dominic Cooper
\n
Austin Stowell
\n
Gemma Chan
\n
Action Thriller
\n
stratton
\n
Dominic Cooper Austin Stowell Gemma Chan Simon...
\n
\n
\n
3
\n
Warwick Thornton
\n
Bryan Brown
\n
Sam Neill
\n
unknown
\n
Drama History Western
\n
sweet country
\n
Bryan Brown Sam Neill unknown Warwick Thornton...
\n
\n
\n
4
\n
Jaume Collet-Serra
\n
Liam Neeson
\n
Vera Farmiga
\n
Patrick Wilson
\n
Action Thriller
\n
the commuter
\n
Liam Neeson Vera Farmiga Patrick Wilson Jaume ...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
504
\n
Greta Gerwig
\n
Saoirse Ronan
\n
Emma Watson
\n
Florence Pugh
\n
Drama Romance
\n
little women
\n
Saoirse Ronan Emma Watson Florence Pugh Greta ...
\n
\n
\n
505
\n
Sam Mendes
\n
George MacKay
\n
Dean-Charles Chapman
\n
Mark Strong
\n
War Drama Action History
\n
1917
\n
George MacKay Dean-Charles Chapman Mark Strong...
\n
\n
\n
506
\n
Destin Daniel Cretton
\n
Michael B. Jordan
\n
Jamie Foxx
\n
Brie Larson
\n
Drama Crime
\n
just mercy
\n
Michael B. Jordan Jamie Foxx Brie Larson Desti...
\n
\n
\n
507
\n
Chinonye Chukwu
\n
Alfre Woodard
\n
Wendell Pierce
\n
Aldis Hodge
\n
Drama
\n
clemency
\n
Alfre Woodard Wendell Pierce Aldis Hodge Chino...
\n
\n
\n
508
\n
Waymon Boone
\n
Mena Suvari
\n
Kevin Pollak
\n
unknown
\n
Horror Thriller
\n
apparition
\n
Mena Suvari Kevin Pollak unknown Waymon Boone ...
\n
\n \n
\n
509 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"old_df = pd.read_csv('../input/movie.csv')","execution_count":51,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"old_df","execution_count":52,"outputs":[{"output_type":"execute_result","execution_count":52,"data":{"text/plain":" director_name actor_1_name actor_2_name actor_3_name \\\n0 John Lasseter Tom Hanks Tim Allen Don Rickles \n1 Joe Johnston Robin Williams Jonathan Hyde Kirsten Dunst \n2 Howard Deutch Walter Matthau Jack Lemmon Ann-Margret \n3 Forest Whitaker Whitney Houston Angela Bassett Loretta Devine \n4 Charles Shyer Steve Martin Diane Keaton Martin Short \n... ... ... ... ... \n36336 Ben Rock Monty Bane Lucy Butler David Grammer \n36337 Aaron Osborne Lisa Boyle Kena Land Zaneta Polard \n36338 John Irvin Patrick Bergin Uma Thurman David Morrissey \n36339 Lav Diaz Angel Aquino Perry Dizon Hazel Orencio \n36340 Mark L. Lester Erika Eleniak Adam Baldwin Julie du Page \n\n genres movie_title \\\n0 Animation Comedy Family toy story \n1 Adventure Fantasy Family jumanji \n2 Romance Comedy grumpier old men \n3 Comedy Drama Romance waiting to exhale \n4 Comedy father of the bride part ii \n... ... ... \n36336 Horror the burkittsville 7 \n36337 Sci-Fi caged heat 3000 \n36338 Drama Action Romance robin hood \n36339 Drama century of birthing \n36340 Action Drama Thriller betrayal \n\n comb \n0 Tom Hanks Tim Allen Don Rickles John Lasseter ... \n1 Robin Williams Jonathan Hyde Kirsten Dunst Joe... \n2 Walter Matthau Jack Lemmon Ann-Margret Howard ... \n3 Whitney Houston Angela Bassett Loretta Devine ... \n4 Steve Martin Diane Keaton Martin Short Charles... \n... ... \n36336 Monty Bane Lucy Butler David Grammer Ben Rock ... \n36337 Lisa Boyle Kena Land Zaneta Polard Aaron Osbor... \n36338 Patrick Bergin Uma Thurman David Morrissey Joh... \n36339 Angel Aquino Perry Dizon Hazel Orencio Lav Dia... \n36340 Erika Eleniak Adam Baldwin Julie du Page Mark ... \n\n[36341 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
John Lasseter
\n
Tom Hanks
\n
Tim Allen
\n
Don Rickles
\n
Animation Comedy Family
\n
toy story
\n
Tom Hanks Tim Allen Don Rickles John Lasseter ...
\n
\n
\n
1
\n
Joe Johnston
\n
Robin Williams
\n
Jonathan Hyde
\n
Kirsten Dunst
\n
Adventure Fantasy Family
\n
jumanji
\n
Robin Williams Jonathan Hyde Kirsten Dunst Joe...
\n
\n
\n
2
\n
Howard Deutch
\n
Walter Matthau
\n
Jack Lemmon
\n
Ann-Margret
\n
Romance Comedy
\n
grumpier old men
\n
Walter Matthau Jack Lemmon Ann-Margret Howard ...
\n
\n
\n
3
\n
Forest Whitaker
\n
Whitney Houston
\n
Angela Bassett
\n
Loretta Devine
\n
Comedy Drama Romance
\n
waiting to exhale
\n
Whitney Houston Angela Bassett Loretta Devine ...
\n
\n
\n
4
\n
Charles Shyer
\n
Steve Martin
\n
Diane Keaton
\n
Martin Short
\n
Comedy
\n
father of the bride part ii
\n
Steve Martin Diane Keaton Martin Short Charles...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
36336
\n
Ben Rock
\n
Monty Bane
\n
Lucy Butler
\n
David Grammer
\n
Horror
\n
the burkittsville 7
\n
Monty Bane Lucy Butler David Grammer Ben Rock ...
\n
\n
\n
36337
\n
Aaron Osborne
\n
Lisa Boyle
\n
Kena Land
\n
Zaneta Polard
\n
Sci-Fi
\n
caged heat 3000
\n
Lisa Boyle Kena Land Zaneta Polard Aaron Osbor...
\n
\n
\n
36338
\n
John Irvin
\n
Patrick Bergin
\n
Uma Thurman
\n
David Morrissey
\n
Drama Action Romance
\n
robin hood
\n
Patrick Bergin Uma Thurman David Morrissey Joh...
\n
\n
\n
36339
\n
Lav Diaz
\n
Angel Aquino
\n
Perry Dizon
\n
Hazel Orencio
\n
Drama
\n
century of birthing
\n
Angel Aquino Perry Dizon Hazel Orencio Lav Dia...
\n
\n
\n
36340
\n
Mark L. Lester
\n
Erika Eleniak
\n
Adam Baldwin
\n
Julie du Page
\n
Action Drama Thriller
\n
betrayal
\n
Erika Eleniak Adam Baldwin Julie du Page Mark ...
\n
\n \n
\n
36341 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df = old_df.append(my_df,ignore_index=True)","execution_count":53,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df","execution_count":54,"outputs":[{"output_type":"execute_result","execution_count":54,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 John Lasseter Tom Hanks Tim Allen \n1 Joe Johnston Robin Williams Jonathan Hyde \n2 Howard Deutch Walter Matthau Jack Lemmon \n3 Forest Whitaker Whitney Houston Angela Bassett \n4 Charles Shyer Steve Martin Diane Keaton \n... ... ... ... \n36845 Greta Gerwig Saoirse Ronan Emma Watson \n36846 Sam Mendes George MacKay Dean-Charles Chapman \n36847 Destin Daniel Cretton Michael B. Jordan Jamie Foxx \n36848 Chinonye Chukwu Alfre Woodard Wendell Pierce \n36849 Waymon Boone Mena Suvari Kevin Pollak \n\n actor_3_name genres movie_title \\\n0 Don Rickles Animation Comedy Family toy story \n1 Kirsten Dunst Adventure Fantasy Family jumanji \n2 Ann-Margret Romance Comedy grumpier old men \n3 Loretta Devine Comedy Drama Romance waiting to exhale \n4 Martin Short Comedy father of the bride part ii \n... ... ... ... \n36845 Florence Pugh Drama Romance little women \n36846 Mark Strong War Drama Action History 1917 \n36847 Brie Larson Drama Crime just mercy \n36848 Aldis Hodge Drama clemency \n36849 unknown Horror Thriller apparition \n\n comb \n0 Tom Hanks Tim Allen Don Rickles John Lasseter ... \n1 Robin Williams Jonathan Hyde Kirsten Dunst Joe... \n2 Walter Matthau Jack Lemmon Ann-Margret Howard ... \n3 Whitney Houston Angela Bassett Loretta Devine ... \n4 Steve Martin Diane Keaton Martin Short Charles... \n... ... \n36845 Saoirse Ronan Emma Watson Florence Pugh Greta ... \n36846 George MacKay Dean-Charles Chapman Mark Strong... \n36847 Michael B. Jordan Jamie Foxx Brie Larson Desti... \n36848 Alfre Woodard Wendell Pierce Aldis Hodge Chino... \n36849 Mena Suvari Kevin Pollak unknown Waymon Boone ... \n\n[36850 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
John Lasseter
\n
Tom Hanks
\n
Tim Allen
\n
Don Rickles
\n
Animation Comedy Family
\n
toy story
\n
Tom Hanks Tim Allen Don Rickles John Lasseter ...
\n
\n
\n
1
\n
Joe Johnston
\n
Robin Williams
\n
Jonathan Hyde
\n
Kirsten Dunst
\n
Adventure Fantasy Family
\n
jumanji
\n
Robin Williams Jonathan Hyde Kirsten Dunst Joe...
\n
\n
\n
2
\n
Howard Deutch
\n
Walter Matthau
\n
Jack Lemmon
\n
Ann-Margret
\n
Romance Comedy
\n
grumpier old men
\n
Walter Matthau Jack Lemmon Ann-Margret Howard ...
\n
\n
\n
3
\n
Forest Whitaker
\n
Whitney Houston
\n
Angela Bassett
\n
Loretta Devine
\n
Comedy Drama Romance
\n
waiting to exhale
\n
Whitney Houston Angela Bassett Loretta Devine ...
\n
\n
\n
4
\n
Charles Shyer
\n
Steve Martin
\n
Diane Keaton
\n
Martin Short
\n
Comedy
\n
father of the bride part ii
\n
Steve Martin Diane Keaton Martin Short Charles...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
36845
\n
Greta Gerwig
\n
Saoirse Ronan
\n
Emma Watson
\n
Florence Pugh
\n
Drama Romance
\n
little women
\n
Saoirse Ronan Emma Watson Florence Pugh Greta ...
\n
\n
\n
36846
\n
Sam Mendes
\n
George MacKay
\n
Dean-Charles Chapman
\n
Mark Strong
\n
War Drama Action History
\n
1917
\n
George MacKay Dean-Charles Chapman Mark Strong...
\n
\n
\n
36847
\n
Destin Daniel Cretton
\n
Michael B. Jordan
\n
Jamie Foxx
\n
Brie Larson
\n
Drama Crime
\n
just mercy
\n
Michael B. Jordan Jamie Foxx Brie Larson Desti...
\n
\n
\n
36848
\n
Chinonye Chukwu
\n
Alfre Woodard
\n
Wendell Pierce
\n
Aldis Hodge
\n
Drama
\n
clemency
\n
Alfre Woodard Wendell Pierce Aldis Hodge Chino...
\n
\n
\n
36849
\n
Waymon Boone
\n
Mena Suvari
\n
Kevin Pollak
\n
unknown
\n
Horror Thriller
\n
apparition
\n
Mena Suvari Kevin Pollak unknown Waymon Boone ...
\n
\n \n
\n
36850 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df.isna().sum()","execution_count":55,"outputs":[{"output_type":"execute_result","execution_count":55,"data":{"text/plain":"director_name 0\nactor_1_name 0\nactor_2_name 0\nactor_3_name 0\ngenres 4\nmovie_title 0\ncomb 4\ndtype: int64"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df = final_df.dropna(how='any')","execution_count":56,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df.to_csv('final_data.csv',index=False)","execution_count":57,"outputs":[]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.7.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":4}
--------------------------------------------------------------------------------
/.ipynb_checkpoints/preprocessing 4.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport requests\nimport bs4 as bs\nimport urllib.request","execution_count":1,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"## Extracting features of 2020 movies from Wikipedia"},{"metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"cell_type":"code","source":"link = \"https://en.wikipedia.org/wiki/List_of_American_films_of_2020\"","execution_count":2,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"source = urllib.request.urlopen(link).read()\nsoup = bs.BeautifulSoup(source,'lxml')","execution_count":3,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"tables = soup.find_all('table',class_='wikitable sortable')","execution_count":4,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df1 = pd.read_html(str(tables[0]))[0]\ndf2 = pd.read_html(str(tables[1]))[0]\ndf3 = pd.read_html(str(tables[2]))[0]\ndf4 = pd.read_html(str(tables[3]).replace(\"'1\\\"\\'\",'\"1\"'))[0] # avoided \"ValueError: invalid literal for int() with base 10: '1\"'","execution_count":5,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df = df1.append(df2.append(df3.append(df4,ignore_index=True),ignore_index=True),ignore_index=True)","execution_count":6,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df","execution_count":7,"outputs":[{"output_type":"execute_result","execution_count":7,"data":{"text/plain":" Opening Opening.1 Title \\\n0 JANUARY 3.0 The Grudge \n1 JANUARY 10.0 Underwater \n2 JANUARY 10.0 Like a Boss \n3 JANUARY 10.0 Inherit the Viper \n4 JANUARY 10.0 The Sonata \n.. ... ... ... \n150 DECEMBER 23.0 Top Gun: Maverick \n151 DECEMBER 23.0 The Croods 2 \n152 DECEMBER 25.0 Respect \n153 DECEMBER 25.0 The Last Duel \n154 DECEMBER 25.0 News of the World \n\n Production company \\\n0 Screen Gems / Stage 6 Films / Ghost House Pict... \n1 20th Century Fox / TSG Entertainment / Chernin... \n2 Paramount Pictures \n3 Barry Films / Tycor International Film Company \n4 Screen Media Films \n.. ... \n150 Paramount Pictures / Skydance Media / Don Simp... \n151 Universal Pictures / DreamWorks Animation \n152 Metro-Goldwyn-Mayer / Universal Pictures / Bro... \n153 20th Century Studios / Scott Free Productions ... \n154 Universal Pictures / Playtone \n\n Cast and crew Ref. \n0 Nicolas Pesce (director/screenplay); Andrea Ri... [2] \n1 William Eubank (director); Brian Duffield, Ada... [3] \n2 Miguel Arteta (director); Sam Pitman, Adam Col... [4] \n3 Anthony Jerjen (director); Andrew Crabtree (sc... [5] \n4 Andrew Desmond (director/screenplay); Arthur M... [6] \n.. ... ... \n150 Joseph Kosinski (director); Ehren Kruger, Eric... [149] \n151 Joel Crawford (director); Kevin Hageman, Dan H... [150] \n152 Liesl Tommy (director); Tracey Scott Wilson (s... [151] \n153 Ridley Scott (director); Ben Affleck, Matt Dam... [152] \n154 Paul Greengrass (director/screenplay); Luke Da... [153] \n\n[155 rows x 6 columns]","text/html":"
\n\n
\n \n
\n
\n
Opening
\n
Opening.1
\n
Title
\n
Production company
\n
Cast and crew
\n
Ref.
\n
\n \n \n
\n
0
\n
JANUARY
\n
3.0
\n
The Grudge
\n
Screen Gems / Stage 6 Films / Ghost House Pict...
\n
Nicolas Pesce (director/screenplay); Andrea Ri...
\n
[2]
\n
\n
\n
1
\n
JANUARY
\n
10.0
\n
Underwater
\n
20th Century Fox / TSG Entertainment / Chernin...
\n
William Eubank (director); Brian Duffield, Ada...
\n
[3]
\n
\n
\n
2
\n
JANUARY
\n
10.0
\n
Like a Boss
\n
Paramount Pictures
\n
Miguel Arteta (director); Sam Pitman, Adam Col...
\n
[4]
\n
\n
\n
3
\n
JANUARY
\n
10.0
\n
Inherit the Viper
\n
Barry Films / Tycor International Film Company
\n
Anthony Jerjen (director); Andrew Crabtree (sc...
\n
[5]
\n
\n
\n
4
\n
JANUARY
\n
10.0
\n
The Sonata
\n
Screen Media Films
\n
Andrew Desmond (director/screenplay); Arthur M...
\n
[6]
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
150
\n
DECEMBER
\n
23.0
\n
Top Gun: Maverick
\n
Paramount Pictures / Skydance Media / Don Simp...
\n
Joseph Kosinski (director); Ehren Kruger, Eric...
\n
[149]
\n
\n
\n
151
\n
DECEMBER
\n
23.0
\n
The Croods 2
\n
Universal Pictures / DreamWorks Animation
\n
Joel Crawford (director); Kevin Hageman, Dan H...
\n
[150]
\n
\n
\n
152
\n
DECEMBER
\n
25.0
\n
Respect
\n
Metro-Goldwyn-Mayer / Universal Pictures / Bro...
\n
Liesl Tommy (director); Tracey Scott Wilson (s...
\n
[151]
\n
\n
\n
153
\n
DECEMBER
\n
25.0
\n
The Last Duel
\n
20th Century Studios / Scott Free Productions ...
\n
Ridley Scott (director); Ben Affleck, Matt Dam...
\n
[152]
\n
\n
\n
154
\n
DECEMBER
\n
25.0
\n
News of the World
\n
Universal Pictures / Playtone
\n
Paul Greengrass (director/screenplay); Luke Da...
\n
[153]
\n
\n \n
\n
155 rows × 6 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020 = df[['Title','Cast and crew']]","execution_count":8,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020","execution_count":9,"outputs":[{"output_type":"execute_result","execution_count":9,"data":{"text/plain":" Title Cast and crew\n0 The Grudge Nicolas Pesce (director/screenplay); Andrea Ri...\n1 Underwater William Eubank (director); Brian Duffield, Ada...\n2 Like a Boss Miguel Arteta (director); Sam Pitman, Adam Col...\n3 Inherit the Viper Anthony Jerjen (director); Andrew Crabtree (sc...\n4 The Sonata Andrew Desmond (director/screenplay); Arthur M...\n.. ... ...\n150 Top Gun: Maverick Joseph Kosinski (director); Ehren Kruger, Eric...\n151 The Croods 2 Joel Crawford (director); Kevin Hageman, Dan H...\n152 Respect Liesl Tommy (director); Tracey Scott Wilson (s...\n153 The Last Duel Ridley Scott (director); Ben Affleck, Matt Dam...\n154 News of the World Paul Greengrass (director/screenplay); Luke Da...\n\n[155 rows x 2 columns]","text/html":"
\n\n
\n \n
\n
\n
Title
\n
Cast and crew
\n
\n \n \n
\n
0
\n
The Grudge
\n
Nicolas Pesce (director/screenplay); Andrea Ri...
\n
\n
\n
1
\n
Underwater
\n
William Eubank (director); Brian Duffield, Ada...
\n
\n
\n
2
\n
Like a Boss
\n
Miguel Arteta (director); Sam Pitman, Adam Col...
\n
\n
\n
3
\n
Inherit the Viper
\n
Anthony Jerjen (director); Andrew Crabtree (sc...
\n
\n
\n
4
\n
The Sonata
\n
Andrew Desmond (director/screenplay); Arthur M...
\n
\n
\n
...
\n
...
\n
...
\n
\n
\n
150
\n
Top Gun: Maverick
\n
Joseph Kosinski (director); Ehren Kruger, Eric...
\n
\n
\n
151
\n
The Croods 2
\n
Joel Crawford (director); Kevin Hageman, Dan H...
\n
\n
\n
152
\n
Respect
\n
Liesl Tommy (director); Tracey Scott Wilson (s...
\n
\n
\n
153
\n
The Last Duel
\n
Ridley Scott (director); Ben Affleck, Matt Dam...
\n
\n
\n
154
\n
News of the World
\n
Paul Greengrass (director/screenplay); Luke Da...
\n
\n \n
\n
155 rows × 2 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"!pip install tmdbv3api","execution_count":12,"outputs":[{"output_type":"stream","text":"Collecting tmdbv3api\n Downloading tmdbv3api-1.6.1-py2.py3-none-any.whl (13 kB)\nRequirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from tmdbv3api) (2.23.0)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (2020.6.20)\nRequirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (2.9)\nRequirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (3.0.4)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->tmdbv3api) (1.24.3)\nInstalling collected packages: tmdbv3api\nSuccessfully installed tmdbv3api-1.6.1\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"from tmdbv3api import TMDb\nimport json\nimport requests\ntmdb = TMDb()\ntmdb.api_key = 'YOUR_API_KEY'","execution_count":13,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"from tmdbv3api import Movie\ntmdb_movie = Movie()\ndef get_genre(x):\n genres = []\n result = tmdb_movie.search(x)\n movie_id = result[0].id\n response = requests.get('https://api.themoviedb.org/3/movie/{}?api_key={}'.format(movie_id,tmdb.api_key))\n data_json = response.json()\n if data_json['genres']:\n genre_str = \" \" \n for i in range(0,len(data_json['genres'])):\n genres.append(data_json['genres'][i]['name'])\n return genre_str.join(genres)\n else:\n np.NaN","execution_count":14,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020['genres'] = df_2020['Title'].map(lambda x: get_genre(str(x)))","execution_count":15,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020","execution_count":16,"outputs":[{"output_type":"execute_result","execution_count":16,"data":{"text/plain":" Title Cast and crew \\\n0 The Grudge Nicolas Pesce (director/screenplay); Andrea Ri... \n1 Underwater William Eubank (director); Brian Duffield, Ada... \n2 Like a Boss Miguel Arteta (director); Sam Pitman, Adam Col... \n3 Inherit the Viper Anthony Jerjen (director); Andrew Crabtree (sc... \n4 The Sonata Andrew Desmond (director/screenplay); Arthur M... \n.. ... ... \n150 Top Gun: Maverick Joseph Kosinski (director); Ehren Kruger, Eric... \n151 The Croods 2 Joel Crawford (director); Kevin Hageman, Dan H... \n152 Respect Liesl Tommy (director); Tracey Scott Wilson (s... \n153 The Last Duel Ridley Scott (director); Ben Affleck, Matt Dam... \n154 News of the World Paul Greengrass (director/screenplay); Luke Da... \n\n genres \n0 Horror Mystery \n1 Action Horror Science Fiction Thriller \n2 Comedy \n3 Drama Thriller Crime \n4 Horror Thriller Mystery \n.. ... \n150 Action Drama \n151 Animation Adventure Family \n152 Music Drama \n153 Drama \n154 Drama Western \n\n[155 rows x 3 columns]","text/html":"
\n\n
\n \n
\n
\n
Title
\n
Cast and crew
\n
genres
\n
\n \n \n
\n
0
\n
The Grudge
\n
Nicolas Pesce (director/screenplay); Andrea Ri...
\n
Horror Mystery
\n
\n
\n
1
\n
Underwater
\n
William Eubank (director); Brian Duffield, Ada...
\n
Action Horror Science Fiction Thriller
\n
\n
\n
2
\n
Like a Boss
\n
Miguel Arteta (director); Sam Pitman, Adam Col...
\n
Comedy
\n
\n
\n
3
\n
Inherit the Viper
\n
Anthony Jerjen (director); Andrew Crabtree (sc...
\n
Drama Thriller Crime
\n
\n
\n
4
\n
The Sonata
\n
Andrew Desmond (director/screenplay); Arthur M...
\n
Horror Thriller Mystery
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
150
\n
Top Gun: Maverick
\n
Joseph Kosinski (director); Ehren Kruger, Eric...
\n
Action Drama
\n
\n
\n
151
\n
The Croods 2
\n
Joel Crawford (director); Kevin Hageman, Dan H...
\n
Animation Adventure Family
\n
\n
\n
152
\n
Respect
\n
Liesl Tommy (director); Tracey Scott Wilson (s...
\n
Music Drama
\n
\n
\n
153
\n
The Last Duel
\n
Ridley Scott (director); Ben Affleck, Matt Dam...
\n
Drama
\n
\n
\n
154
\n
News of the World
\n
Paul Greengrass (director/screenplay); Luke Da...
\n
Drama Western
\n
\n \n
\n
155 rows × 3 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_director(x):\n if \" (director)\" in x:\n return x.split(\" (director)\")[0]\n elif \" (directors)\" in x:\n return x.split(\" (directors)\")[0]\n else:\n return x.split(\" (director/screenplay)\")[0]","execution_count":17,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020['director_name'] = df_2020['Cast and crew'].map(lambda x: get_director(str(x)))","execution_count":18,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor1(x):\n return ((x.split(\"screenplay); \")[-1]).split(\", \")[0])","execution_count":19,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020['actor_1_name'] = df_2020['Cast and crew'].map(lambda x: get_actor1(str(x)))","execution_count":20,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor2(x):\n if len((x.split(\"screenplay); \")[-1]).split(\", \")) < 2:\n return np.NaN\n else:\n return ((x.split(\"screenplay); \")[-1]).split(\", \")[1])","execution_count":21,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020['actor_2_name'] = df_2020['Cast and crew'].map(lambda x: get_actor2(str(x)))","execution_count":22,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"def get_actor3(x):\n if len((x.split(\"screenplay); \")[-1]).split(\", \")) < 3:\n return np.NaN\n else:\n return ((x.split(\"screenplay); \")[-1]).split(\", \")[2])","execution_count":23,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"\ndf_2020['actor_3_name'] = df_2020['Cast and crew'].map(lambda x: get_actor3(str(x)))","execution_count":24,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020","execution_count":25,"outputs":[{"output_type":"execute_result","execution_count":25,"data":{"text/plain":" Title Cast and crew \\\n0 The Grudge Nicolas Pesce (director/screenplay); Andrea Ri... \n1 Underwater William Eubank (director); Brian Duffield, Ada... \n2 Like a Boss Miguel Arteta (director); Sam Pitman, Adam Col... \n3 Inherit the Viper Anthony Jerjen (director); Andrew Crabtree (sc... \n4 The Sonata Andrew Desmond (director/screenplay); Arthur M... \n.. ... ... \n150 Top Gun: Maverick Joseph Kosinski (director); Ehren Kruger, Eric... \n151 The Croods 2 Joel Crawford (director); Kevin Hageman, Dan H... \n152 Respect Liesl Tommy (director); Tracey Scott Wilson (s... \n153 The Last Duel Ridley Scott (director); Ben Affleck, Matt Dam... \n154 News of the World Paul Greengrass (director/screenplay); Luke Da... \n\n genres director_name \\\n0 Horror Mystery Nicolas Pesce \n1 Action Horror Science Fiction Thriller William Eubank \n2 Comedy Miguel Arteta \n3 Drama Thriller Crime Anthony Jerjen \n4 Horror Thriller Mystery Andrew Desmond \n.. ... ... \n150 Action Drama Joseph Kosinski \n151 Animation Adventure Family Joel Crawford \n152 Music Drama Liesl Tommy \n153 Drama Ridley Scott \n154 Drama Western Paul Greengrass \n\n actor_1_name actor_2_name actor_3_name \n0 Andrea Riseborough Demián Bichir John Cho \n1 Kristen Stewart Vincent Cassel Jessica Henwick \n2 Tiffany Haddish Rose Byrne Salma Hayek \n3 Josh Hartnett Margarita Levieva Chandler Riggs \n4 Freya Tingley Simon Abkarian Rutger Hauer \n.. ... ... ... \n150 Tom Cruise Miles Teller Jennifer Connelly \n151 Nicolas Cage Emma Stone Ryan Reynolds \n152 Jennifer Hudson Forest Whitaker Marlon Wayans \n153 Matt Damon Adam Driver Jodie Comer \n154 Tom Hanks Helena Zengel Neil Sandilands \n\n[155 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
Title
\n
Cast and crew
\n
genres
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
\n \n \n
\n
0
\n
The Grudge
\n
Nicolas Pesce (director/screenplay); Andrea Ri...
\n
Horror Mystery
\n
Nicolas Pesce
\n
Andrea Riseborough
\n
Demián Bichir
\n
John Cho
\n
\n
\n
1
\n
Underwater
\n
William Eubank (director); Brian Duffield, Ada...
\n
Action Horror Science Fiction Thriller
\n
William Eubank
\n
Kristen Stewart
\n
Vincent Cassel
\n
Jessica Henwick
\n
\n
\n
2
\n
Like a Boss
\n
Miguel Arteta (director); Sam Pitman, Adam Col...
\n
Comedy
\n
Miguel Arteta
\n
Tiffany Haddish
\n
Rose Byrne
\n
Salma Hayek
\n
\n
\n
3
\n
Inherit the Viper
\n
Anthony Jerjen (director); Andrew Crabtree (sc...
\n
Drama Thriller Crime
\n
Anthony Jerjen
\n
Josh Hartnett
\n
Margarita Levieva
\n
Chandler Riggs
\n
\n
\n
4
\n
The Sonata
\n
Andrew Desmond (director/screenplay); Arthur M...
\n
Horror Thriller Mystery
\n
Andrew Desmond
\n
Freya Tingley
\n
Simon Abkarian
\n
Rutger Hauer
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
150
\n
Top Gun: Maverick
\n
Joseph Kosinski (director); Ehren Kruger, Eric...
\n
Action Drama
\n
Joseph Kosinski
\n
Tom Cruise
\n
Miles Teller
\n
Jennifer Connelly
\n
\n
\n
151
\n
The Croods 2
\n
Joel Crawford (director); Kevin Hageman, Dan H...
\n
Animation Adventure Family
\n
Joel Crawford
\n
Nicolas Cage
\n
Emma Stone
\n
Ryan Reynolds
\n
\n
\n
152
\n
Respect
\n
Liesl Tommy (director); Tracey Scott Wilson (s...
\n
Music Drama
\n
Liesl Tommy
\n
Jennifer Hudson
\n
Forest Whitaker
\n
Marlon Wayans
\n
\n
\n
153
\n
The Last Duel
\n
Ridley Scott (director); Ben Affleck, Matt Dam...
\n
Drama
\n
Ridley Scott
\n
Matt Damon
\n
Adam Driver
\n
Jodie Comer
\n
\n
\n
154
\n
News of the World
\n
Paul Greengrass (director/screenplay); Luke Da...
\n
Drama Western
\n
Paul Greengrass
\n
Tom Hanks
\n
Helena Zengel
\n
Neil Sandilands
\n
\n \n
\n
155 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df_2020 = df_2020.rename(columns={'Title':'movie_title'})","execution_count":26,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df20 = df_2020.loc[:,['director_name','actor_1_name','actor_2_name','actor_3_name','genres','movie_title']]","execution_count":27,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df20","execution_count":28,"outputs":[{"output_type":"execute_result","execution_count":28,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 Nicolas Pesce Andrea Riseborough Demián Bichir \n1 William Eubank Kristen Stewart Vincent Cassel \n2 Miguel Arteta Tiffany Haddish Rose Byrne \n3 Anthony Jerjen Josh Hartnett Margarita Levieva \n4 Andrew Desmond Freya Tingley Simon Abkarian \n.. ... ... ... \n150 Joseph Kosinski Tom Cruise Miles Teller \n151 Joel Crawford Nicolas Cage Emma Stone \n152 Liesl Tommy Jennifer Hudson Forest Whitaker \n153 Ridley Scott Matt Damon Adam Driver \n154 Paul Greengrass Tom Hanks Helena Zengel \n\n actor_3_name genres \\\n0 John Cho Horror Mystery \n1 Jessica Henwick Action Horror Science Fiction Thriller \n2 Salma Hayek Comedy \n3 Chandler Riggs Drama Thriller Crime \n4 Rutger Hauer Horror Thriller Mystery \n.. ... ... \n150 Jennifer Connelly Action Drama \n151 Ryan Reynolds Animation Adventure Family \n152 Marlon Wayans Music Drama \n153 Jodie Comer Drama \n154 Neil Sandilands Drama Western \n\n movie_title \n0 The Grudge \n1 Underwater \n2 Like a Boss \n3 Inherit the Viper \n4 The Sonata \n.. ... \n150 Top Gun: Maverick \n151 The Croods 2 \n152 Respect \n153 The Last Duel \n154 News of the World \n\n[155 rows x 6 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
\n \n \n
\n
0
\n
Nicolas Pesce
\n
Andrea Riseborough
\n
Demián Bichir
\n
John Cho
\n
Horror Mystery
\n
The Grudge
\n
\n
\n
1
\n
William Eubank
\n
Kristen Stewart
\n
Vincent Cassel
\n
Jessica Henwick
\n
Action Horror Science Fiction Thriller
\n
Underwater
\n
\n
\n
2
\n
Miguel Arteta
\n
Tiffany Haddish
\n
Rose Byrne
\n
Salma Hayek
\n
Comedy
\n
Like a Boss
\n
\n
\n
3
\n
Anthony Jerjen
\n
Josh Hartnett
\n
Margarita Levieva
\n
Chandler Riggs
\n
Drama Thriller Crime
\n
Inherit the Viper
\n
\n
\n
4
\n
Andrew Desmond
\n
Freya Tingley
\n
Simon Abkarian
\n
Rutger Hauer
\n
Horror Thriller Mystery
\n
The Sonata
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
150
\n
Joseph Kosinski
\n
Tom Cruise
\n
Miles Teller
\n
Jennifer Connelly
\n
Action Drama
\n
Top Gun: Maverick
\n
\n
\n
151
\n
Joel Crawford
\n
Nicolas Cage
\n
Emma Stone
\n
Ryan Reynolds
\n
Animation Adventure Family
\n
The Croods 2
\n
\n
\n
152
\n
Liesl Tommy
\n
Jennifer Hudson
\n
Forest Whitaker
\n
Marlon Wayans
\n
Music Drama
\n
Respect
\n
\n
\n
153
\n
Ridley Scott
\n
Matt Damon
\n
Adam Driver
\n
Jodie Comer
\n
Drama
\n
The Last Duel
\n
\n
\n
154
\n
Paul Greengrass
\n
Tom Hanks
\n
Helena Zengel
\n
Neil Sandilands
\n
Drama Western
\n
News of the World
\n
\n \n
\n
155 rows × 6 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df20['comb'] = new_df20['actor_1_name'] + ' ' + new_df20['actor_2_name'] + ' '+ new_df20['actor_3_name'] + ' '+ new_df20['director_name'] +' ' + new_df20['genres']","execution_count":29,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df20 = new_df20.dropna(how='any')","execution_count":30,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df20['movie_title'] = new_df20['movie_title'].str.lower()","execution_count":31,"outputs":[{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n \"\"\"Entry point for launching an IPython kernel.\n","name":"stderr"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"new_df20","execution_count":32,"outputs":[{"output_type":"execute_result","execution_count":32,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 Nicolas Pesce Andrea Riseborough Demián Bichir \n1 William Eubank Kristen Stewart Vincent Cassel \n2 Miguel Arteta Tiffany Haddish Rose Byrne \n3 Anthony Jerjen Josh Hartnett Margarita Levieva \n4 Andrew Desmond Freya Tingley Simon Abkarian \n.. ... ... ... \n150 Joseph Kosinski Tom Cruise Miles Teller \n151 Joel Crawford Nicolas Cage Emma Stone \n152 Liesl Tommy Jennifer Hudson Forest Whitaker \n153 Ridley Scott Matt Damon Adam Driver \n154 Paul Greengrass Tom Hanks Helena Zengel \n\n actor_3_name genres \\\n0 John Cho Horror Mystery \n1 Jessica Henwick Action Horror Science Fiction Thriller \n2 Salma Hayek Comedy \n3 Chandler Riggs Drama Thriller Crime \n4 Rutger Hauer Horror Thriller Mystery \n.. ... ... \n150 Jennifer Connelly Action Drama \n151 Ryan Reynolds Animation Adventure Family \n152 Marlon Wayans Music Drama \n153 Jodie Comer Drama \n154 Neil Sandilands Drama Western \n\n movie_title comb \n0 the grudge Andrea Riseborough Demián Bichir John Cho Nico... \n1 underwater Kristen Stewart Vincent Cassel Jessica Henwick... \n2 like a boss Tiffany Haddish Rose Byrne Salma Hayek Miguel ... \n3 inherit the viper Josh Hartnett Margarita Levieva Chandler Riggs... \n4 the sonata Freya Tingley Simon Abkarian Rutger Hauer Andr... \n.. ... ... \n150 top gun: maverick Tom Cruise Miles Teller Jennifer Connelly Jose... \n151 the croods 2 Nicolas Cage Emma Stone Ryan Reynolds Joel Cra... \n152 respect Jennifer Hudson Forest Whitaker Marlon Wayans ... \n153 the last duel Matt Damon Adam Driver Jodie Comer Ridley Scot... \n154 news of the world Tom Hanks Helena Zengel Neil Sandilands Paul G... \n\n[141 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
Nicolas Pesce
\n
Andrea Riseborough
\n
Demián Bichir
\n
John Cho
\n
Horror Mystery
\n
the grudge
\n
Andrea Riseborough Demián Bichir John Cho Nico...
\n
\n
\n
1
\n
William Eubank
\n
Kristen Stewart
\n
Vincent Cassel
\n
Jessica Henwick
\n
Action Horror Science Fiction Thriller
\n
underwater
\n
Kristen Stewart Vincent Cassel Jessica Henwick...
\n
\n
\n
2
\n
Miguel Arteta
\n
Tiffany Haddish
\n
Rose Byrne
\n
Salma Hayek
\n
Comedy
\n
like a boss
\n
Tiffany Haddish Rose Byrne Salma Hayek Miguel ...
\n
\n
\n
3
\n
Anthony Jerjen
\n
Josh Hartnett
\n
Margarita Levieva
\n
Chandler Riggs
\n
Drama Thriller Crime
\n
inherit the viper
\n
Josh Hartnett Margarita Levieva Chandler Riggs...
\n
\n
\n
4
\n
Andrew Desmond
\n
Freya Tingley
\n
Simon Abkarian
\n
Rutger Hauer
\n
Horror Thriller Mystery
\n
the sonata
\n
Freya Tingley Simon Abkarian Rutger Hauer Andr...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
150
\n
Joseph Kosinski
\n
Tom Cruise
\n
Miles Teller
\n
Jennifer Connelly
\n
Action Drama
\n
top gun: maverick
\n
Tom Cruise Miles Teller Jennifer Connelly Jose...
\n
\n
\n
151
\n
Joel Crawford
\n
Nicolas Cage
\n
Emma Stone
\n
Ryan Reynolds
\n
Animation Adventure Family
\n
the croods 2
\n
Nicolas Cage Emma Stone Ryan Reynolds Joel Cra...
\n
\n
\n
152
\n
Liesl Tommy
\n
Jennifer Hudson
\n
Forest Whitaker
\n
Marlon Wayans
\n
Music Drama
\n
respect
\n
Jennifer Hudson Forest Whitaker Marlon Wayans ...
\n
\n
\n
153
\n
Ridley Scott
\n
Matt Damon
\n
Adam Driver
\n
Jodie Comer
\n
Drama
\n
the last duel
\n
Matt Damon Adam Driver Jodie Comer Ridley Scot...
\n
\n
\n
154
\n
Paul Greengrass
\n
Tom Hanks
\n
Helena Zengel
\n
Neil Sandilands
\n
Drama Western
\n
news of the world
\n
Tom Hanks Helena Zengel Neil Sandilands Paul G...
\n
\n \n
\n
141 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"old_df = pd.read_csv('../input/final_data.csv')","execution_count":34,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"old_df","execution_count":35,"outputs":[{"output_type":"execute_result","execution_count":35,"data":{"text/plain":" director_name actor_1_name actor_2_name \\\n0 John Lasseter Tom Hanks Tim Allen \n1 Joe Johnston Robin Williams Jonathan Hyde \n2 Howard Deutch Walter Matthau Jack Lemmon \n3 Forest Whitaker Whitney Houston Angela Bassett \n4 Charles Shyer Steve Martin Diane Keaton \n... ... ... ... \n36841 Greta Gerwig Saoirse Ronan Emma Watson \n36842 Sam Mendes George MacKay Dean-Charles Chapman \n36843 Destin Daniel Cretton Michael B. Jordan Jamie Foxx \n36844 Chinonye Chukwu Alfre Woodard Wendell Pierce \n36845 Waymon Boone Mena Suvari Kevin Pollak \n\n actor_3_name genres movie_title \\\n0 Don Rickles Animation Comedy Family toy story \n1 Kirsten Dunst Adventure Fantasy Family jumanji \n2 Ann-Margret Romance Comedy grumpier old men \n3 Loretta Devine Comedy Drama Romance waiting to exhale \n4 Martin Short Comedy father of the bride part ii \n... ... ... ... \n36841 Florence Pugh Drama Romance little women \n36842 Mark Strong War Drama Action History 1917 \n36843 Brie Larson Drama Crime just mercy \n36844 Aldis Hodge Drama clemency \n36845 unknown Horror Thriller apparition \n\n comb \n0 Tom Hanks Tim Allen Don Rickles John Lasseter ... \n1 Robin Williams Jonathan Hyde Kirsten Dunst Joe... \n2 Walter Matthau Jack Lemmon Ann-Margret Howard ... \n3 Whitney Houston Angela Bassett Loretta Devine ... \n4 Steve Martin Diane Keaton Martin Short Charles... \n... ... \n36841 Saoirse Ronan Emma Watson Florence Pugh Greta ... \n36842 George MacKay Dean-Charles Chapman Mark Strong... \n36843 Michael B. Jordan Jamie Foxx Brie Larson Desti... \n36844 Alfre Woodard Wendell Pierce Aldis Hodge Chino... \n36845 Mena Suvari Kevin Pollak unknown Waymon Boone ... \n\n[36846 rows x 7 columns]","text/html":"
\n\n
\n \n
\n
\n
director_name
\n
actor_1_name
\n
actor_2_name
\n
actor_3_name
\n
genres
\n
movie_title
\n
comb
\n
\n \n \n
\n
0
\n
John Lasseter
\n
Tom Hanks
\n
Tim Allen
\n
Don Rickles
\n
Animation Comedy Family
\n
toy story
\n
Tom Hanks Tim Allen Don Rickles John Lasseter ...
\n
\n
\n
1
\n
Joe Johnston
\n
Robin Williams
\n
Jonathan Hyde
\n
Kirsten Dunst
\n
Adventure Fantasy Family
\n
jumanji
\n
Robin Williams Jonathan Hyde Kirsten Dunst Joe...
\n
\n
\n
2
\n
Howard Deutch
\n
Walter Matthau
\n
Jack Lemmon
\n
Ann-Margret
\n
Romance Comedy
\n
grumpier old men
\n
Walter Matthau Jack Lemmon Ann-Margret Howard ...
\n
\n
\n
3
\n
Forest Whitaker
\n
Whitney Houston
\n
Angela Bassett
\n
Loretta Devine
\n
Comedy Drama Romance
\n
waiting to exhale
\n
Whitney Houston Angela Bassett Loretta Devine ...
\n
\n
\n
4
\n
Charles Shyer
\n
Steve Martin
\n
Diane Keaton
\n
Martin Short
\n
Comedy
\n
father of the bride part ii
\n
Steve Martin Diane Keaton Martin Short Charles...
\n
\n
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
...
\n
\n
\n
36841
\n
Greta Gerwig
\n
Saoirse Ronan
\n
Emma Watson
\n
Florence Pugh
\n
Drama Romance
\n
little women
\n
Saoirse Ronan Emma Watson Florence Pugh Greta ...
\n
\n
\n
36842
\n
Sam Mendes
\n
George MacKay
\n
Dean-Charles Chapman
\n
Mark Strong
\n
War Drama Action History
\n
1917
\n
George MacKay Dean-Charles Chapman Mark Strong...
\n
\n
\n
36843
\n
Destin Daniel Cretton
\n
Michael B. Jordan
\n
Jamie Foxx
\n
Brie Larson
\n
Drama Crime
\n
just mercy
\n
Michael B. Jordan Jamie Foxx Brie Larson Desti...
\n
\n
\n
36844
\n
Chinonye Chukwu
\n
Alfre Woodard
\n
Wendell Pierce
\n
Aldis Hodge
\n
Drama
\n
clemency
\n
Alfre Woodard Wendell Pierce Aldis Hodge Chino...
\n
\n
\n
36845
\n
Waymon Boone
\n
Mena Suvari
\n
Kevin Pollak
\n
unknown
\n
Horror Thriller
\n
apparition
\n
Mena Suvari Kevin Pollak unknown Waymon Boone ...
\n
\n \n
\n
36846 rows × 7 columns
\n
"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df = old_df.append(new_df20,ignore_index=True)","execution_count":36,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"final_df","execution_count":37,"outputs":[{"output_type":"execute_result","execution_count":37,"data":{"text/plain":" director_name actor_1_name actor_2_name actor_3_name \\\n0 John Lasseter Tom Hanks Tim Allen Don Rickles \n1 Joe Johnston Robin Williams Jonathan Hyde Kirsten Dunst \n2 Howard Deutch Walter Matthau Jack Lemmon Ann-Margret \n3 Forest Whitaker Whitney Houston Angela Bassett Loretta Devine \n4 Charles Shyer Steve Martin Diane Keaton Martin Short \n... ... ... ... ... \n36982 Joseph Kosinski Tom Cruise Miles Teller Jennifer Connelly \n36983 Joel Crawford Nicolas Cage Emma Stone Ryan Reynolds \n36984 Liesl Tommy Jennifer Hudson Forest Whitaker Marlon Wayans \n36985 Ridley Scott Matt Damon Adam Driver Jodie Comer \n36986 Paul Greengrass Tom Hanks Helena Zengel Neil Sandilands \n\n genres movie_title \\\n0 Animation Comedy Family toy story \n1 Adventure Fantasy Family jumanji \n2 Romance Comedy grumpier old men \n3 Comedy Drama Romance waiting to exhale \n4 Comedy father of the bride part ii \n... ... ... \n36982 Action Drama top gun: maverick \n36983 Animation Adventure Family the croods 2 \n36984 Music Drama respect \n36985 Drama the last duel \n36986 Drama Western news of the world \n\n comb \n0 Tom Hanks Tim Allen Don Rickles John Lasseter ... \n1 Robin Williams Jonathan Hyde Kirsten Dunst Joe... \n2 Walter Matthau Jack Lemmon Ann-Margret Howard ... \n3 Whitney Houston Angela Bassett Loretta Devine ... \n4 Steve Martin Diane Keaton Martin Short Charles... \n... ... \n36982 Tom Cruise Miles Teller Jennifer Connelly Jose... \n36983 Nicolas Cage Emma Stone Ryan Reynolds Joel Cra... \n36984 Jennifer Hudson Forest Whitaker Marlon Wayans ... \n36985 Matt Damon Adam Driver Jodie Comer Ridley Scot... \n36986 Tom Hanks Helena Zengel Neil Sandilands Paul G... \n\n[36987 rows x 7 columns]","text/html":"
Sorry! The movie you requested is not in our database.
68 | Please check the spelling or try with other movies!
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
Hey there!
82 |
85 |
86 |
87 |
Don't worry if the movie that you are looking for is not auto-suggested while typing. Just type the movie name and click on "enter". You will be good to go even though if you made some typo errors.