├── README.md
└── Recommender System With Matrix Factorization .ipynb
/README.md:
--------------------------------------------------------------------------------
1 |
Recommendation System Using Matrix Factorization
2 | In this project we make a movie recommender using matrix factorization in python.The tools that are used in this project are as
3 | follows.
4 |
5 | - Numpy
6 | - Pandas
7 |
8 | The dataset i have used is MovieLens dataset which is one of the most common datasets used when implementing and testing recommender engines.I have used the small dataset for this program you can check the website for the larger dataset here.To downlaod the exact dataset i have used then just click here
9 | Matrix Factorization
10 | A matrix factorization is simply a mathematical tool for playing around with matrices and is therefore applicable in many scenarios where one would like to find out something hidden under the data.
11 |
12 |
13 |
14 |
15 | In order to understand matrix factorization here is an artical with the complete implementaion on python just click here.
16 |
--------------------------------------------------------------------------------
/Recommender System With Matrix Factorization .ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Import Libraries "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import numpy as np"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "### Import the movies dataset"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "movie_df = pd.read_csv('movies.csv')"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "rating_df = pd.read_csv('ratings.csv')"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### Checking the tables "
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 29,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/html": [
60 | "\n",
61 | "\n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " | \n",
78 | " movieId | \n",
79 | " title | \n",
80 | " genres | \n",
81 | "
\n",
82 | " \n",
83 | " \n",
84 | " \n",
85 | " 0 | \n",
86 | " 1 | \n",
87 | " Toy Story (1995) | \n",
88 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
89 | "
\n",
90 | " \n",
91 | " 1 | \n",
92 | " 2 | \n",
93 | " Jumanji (1995) | \n",
94 | " Adventure|Children|Fantasy | \n",
95 | "
\n",
96 | " \n",
97 | " 2 | \n",
98 | " 3 | \n",
99 | " Grumpier Old Men (1995) | \n",
100 | " Comedy|Romance | \n",
101 | "
\n",
102 | " \n",
103 | " 3 | \n",
104 | " 4 | \n",
105 | " Waiting to Exhale (1995) | \n",
106 | " Comedy|Drama|Romance | \n",
107 | "
\n",
108 | " \n",
109 | " 4 | \n",
110 | " 5 | \n",
111 | " Father of the Bride Part II (1995) | \n",
112 | " Comedy | \n",
113 | "
\n",
114 | " \n",
115 | " 5 | \n",
116 | " 6 | \n",
117 | " Heat (1995) | \n",
118 | " Action|Crime|Thriller | \n",
119 | "
\n",
120 | " \n",
121 | " 6 | \n",
122 | " 7 | \n",
123 | " Sabrina (1995) | \n",
124 | " Comedy|Romance | \n",
125 | "
\n",
126 | " \n",
127 | " 7 | \n",
128 | " 8 | \n",
129 | " Tom and Huck (1995) | \n",
130 | " Adventure|Children | \n",
131 | "
\n",
132 | " \n",
133 | " 8 | \n",
134 | " 9 | \n",
135 | " Sudden Death (1995) | \n",
136 | " Action | \n",
137 | "
\n",
138 | " \n",
139 | " 9 | \n",
140 | " 10 | \n",
141 | " GoldenEye (1995) | \n",
142 | " Action|Adventure|Thriller | \n",
143 | "
\n",
144 | " \n",
145 | "
\n",
146 | "
"
147 | ],
148 | "text/plain": [
149 | " movieId title \\\n",
150 | "0 1 Toy Story (1995) \n",
151 | "1 2 Jumanji (1995) \n",
152 | "2 3 Grumpier Old Men (1995) \n",
153 | "3 4 Waiting to Exhale (1995) \n",
154 | "4 5 Father of the Bride Part II (1995) \n",
155 | "5 6 Heat (1995) \n",
156 | "6 7 Sabrina (1995) \n",
157 | "7 8 Tom and Huck (1995) \n",
158 | "8 9 Sudden Death (1995) \n",
159 | "9 10 GoldenEye (1995) \n",
160 | "\n",
161 | " genres \n",
162 | "0 Adventure|Animation|Children|Comedy|Fantasy \n",
163 | "1 Adventure|Children|Fantasy \n",
164 | "2 Comedy|Romance \n",
165 | "3 Comedy|Drama|Romance \n",
166 | "4 Comedy \n",
167 | "5 Action|Crime|Thriller \n",
168 | "6 Comedy|Romance \n",
169 | "7 Adventure|Children \n",
170 | "8 Action \n",
171 | "9 Action|Adventure|Thriller "
172 | ]
173 | },
174 | "execution_count": 29,
175 | "metadata": {},
176 | "output_type": "execute_result"
177 | }
178 | ],
179 | "source": [
180 | "movie_df.head(10)"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 30,
186 | "metadata": {},
187 | "outputs": [
188 | {
189 | "data": {
190 | "text/html": [
191 | "\n",
192 | "\n",
205 | "
\n",
206 | " \n",
207 | " \n",
208 | " | \n",
209 | " userId | \n",
210 | " movieId | \n",
211 | " rating | \n",
212 | " timestamp | \n",
213 | "
\n",
214 | " \n",
215 | " \n",
216 | " \n",
217 | " 0 | \n",
218 | " 1 | \n",
219 | " 1 | \n",
220 | " 4.0 | \n",
221 | " 964982703 | \n",
222 | "
\n",
223 | " \n",
224 | " 1 | \n",
225 | " 1 | \n",
226 | " 3 | \n",
227 | " 4.0 | \n",
228 | " 964981247 | \n",
229 | "
\n",
230 | " \n",
231 | " 2 | \n",
232 | " 1 | \n",
233 | " 6 | \n",
234 | " 4.0 | \n",
235 | " 964982224 | \n",
236 | "
\n",
237 | " \n",
238 | " 3 | \n",
239 | " 1 | \n",
240 | " 47 | \n",
241 | " 5.0 | \n",
242 | " 964983815 | \n",
243 | "
\n",
244 | " \n",
245 | " 4 | \n",
246 | " 1 | \n",
247 | " 50 | \n",
248 | " 5.0 | \n",
249 | " 964982931 | \n",
250 | "
\n",
251 | " \n",
252 | " 5 | \n",
253 | " 1 | \n",
254 | " 70 | \n",
255 | " 3.0 | \n",
256 | " 964982400 | \n",
257 | "
\n",
258 | " \n",
259 | " 6 | \n",
260 | " 1 | \n",
261 | " 101 | \n",
262 | " 5.0 | \n",
263 | " 964980868 | \n",
264 | "
\n",
265 | " \n",
266 | " 7 | \n",
267 | " 1 | \n",
268 | " 110 | \n",
269 | " 4.0 | \n",
270 | " 964982176 | \n",
271 | "
\n",
272 | " \n",
273 | " 8 | \n",
274 | " 1 | \n",
275 | " 151 | \n",
276 | " 5.0 | \n",
277 | " 964984041 | \n",
278 | "
\n",
279 | " \n",
280 | " 9 | \n",
281 | " 1 | \n",
282 | " 157 | \n",
283 | " 5.0 | \n",
284 | " 964984100 | \n",
285 | "
\n",
286 | " \n",
287 | "
\n",
288 | "
"
289 | ],
290 | "text/plain": [
291 | " userId movieId rating timestamp\n",
292 | "0 1 1 4.0 964982703\n",
293 | "1 1 3 4.0 964981247\n",
294 | "2 1 6 4.0 964982224\n",
295 | "3 1 47 5.0 964983815\n",
296 | "4 1 50 5.0 964982931\n",
297 | "5 1 70 3.0 964982400\n",
298 | "6 1 101 5.0 964980868\n",
299 | "7 1 110 4.0 964982176\n",
300 | "8 1 151 5.0 964984041\n",
301 | "9 1 157 5.0 964984100"
302 | ]
303 | },
304 | "execution_count": 30,
305 | "metadata": {},
306 | "output_type": "execute_result"
307 | }
308 | ],
309 | "source": [
310 | "rating_df.head(10)"
311 | ]
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "metadata": {},
316 | "source": [
317 | "### Now combine the two tables and drop things we dont have to use"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 9,
323 | "metadata": {},
324 | "outputs": [
325 | {
326 | "data": {
327 | "text/html": [
328 | "\n",
329 | "\n",
342 | "
\n",
343 | " \n",
344 | " \n",
345 | " | \n",
346 | " userId | \n",
347 | " movieId | \n",
348 | " rating | \n",
349 | " timestamp | \n",
350 | " title | \n",
351 | " genres | \n",
352 | "
\n",
353 | " \n",
354 | " \n",
355 | " \n",
356 | " 0 | \n",
357 | " 1 | \n",
358 | " 1 | \n",
359 | " 4.0 | \n",
360 | " 964982703 | \n",
361 | " Toy Story (1995) | \n",
362 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
363 | "
\n",
364 | " \n",
365 | " 1 | \n",
366 | " 5 | \n",
367 | " 1 | \n",
368 | " 4.0 | \n",
369 | " 847434962 | \n",
370 | " Toy Story (1995) | \n",
371 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
372 | "
\n",
373 | " \n",
374 | " 2 | \n",
375 | " 7 | \n",
376 | " 1 | \n",
377 | " 4.5 | \n",
378 | " 1106635946 | \n",
379 | " Toy Story (1995) | \n",
380 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
381 | "
\n",
382 | " \n",
383 | " 3 | \n",
384 | " 15 | \n",
385 | " 1 | \n",
386 | " 2.5 | \n",
387 | " 1510577970 | \n",
388 | " Toy Story (1995) | \n",
389 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
390 | "
\n",
391 | " \n",
392 | " 4 | \n",
393 | " 17 | \n",
394 | " 1 | \n",
395 | " 4.5 | \n",
396 | " 1305696483 | \n",
397 | " Toy Story (1995) | \n",
398 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
399 | "
\n",
400 | " \n",
401 | " 5 | \n",
402 | " 18 | \n",
403 | " 1 | \n",
404 | " 3.5 | \n",
405 | " 1455209816 | \n",
406 | " Toy Story (1995) | \n",
407 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
408 | "
\n",
409 | " \n",
410 | " 6 | \n",
411 | " 19 | \n",
412 | " 1 | \n",
413 | " 4.0 | \n",
414 | " 965705637 | \n",
415 | " Toy Story (1995) | \n",
416 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
417 | "
\n",
418 | " \n",
419 | " 7 | \n",
420 | " 21 | \n",
421 | " 1 | \n",
422 | " 3.5 | \n",
423 | " 1407618878 | \n",
424 | " Toy Story (1995) | \n",
425 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
426 | "
\n",
427 | " \n",
428 | " 8 | \n",
429 | " 27 | \n",
430 | " 1 | \n",
431 | " 3.0 | \n",
432 | " 962685262 | \n",
433 | " Toy Story (1995) | \n",
434 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
435 | "
\n",
436 | " \n",
437 | " 9 | \n",
438 | " 31 | \n",
439 | " 1 | \n",
440 | " 5.0 | \n",
441 | " 850466616 | \n",
442 | " Toy Story (1995) | \n",
443 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
444 | "
\n",
445 | " \n",
446 | "
\n",
447 | "
"
448 | ],
449 | "text/plain": [
450 | " userId movieId rating timestamp title \\\n",
451 | "0 1 1 4.0 964982703 Toy Story (1995) \n",
452 | "1 5 1 4.0 847434962 Toy Story (1995) \n",
453 | "2 7 1 4.5 1106635946 Toy Story (1995) \n",
454 | "3 15 1 2.5 1510577970 Toy Story (1995) \n",
455 | "4 17 1 4.5 1305696483 Toy Story (1995) \n",
456 | "5 18 1 3.5 1455209816 Toy Story (1995) \n",
457 | "6 19 1 4.0 965705637 Toy Story (1995) \n",
458 | "7 21 1 3.5 1407618878 Toy Story (1995) \n",
459 | "8 27 1 3.0 962685262 Toy Story (1995) \n",
460 | "9 31 1 5.0 850466616 Toy Story (1995) \n",
461 | "\n",
462 | " genres \n",
463 | "0 Adventure|Animation|Children|Comedy|Fantasy \n",
464 | "1 Adventure|Animation|Children|Comedy|Fantasy \n",
465 | "2 Adventure|Animation|Children|Comedy|Fantasy \n",
466 | "3 Adventure|Animation|Children|Comedy|Fantasy \n",
467 | "4 Adventure|Animation|Children|Comedy|Fantasy \n",
468 | "5 Adventure|Animation|Children|Comedy|Fantasy \n",
469 | "6 Adventure|Animation|Children|Comedy|Fantasy \n",
470 | "7 Adventure|Animation|Children|Comedy|Fantasy \n",
471 | "8 Adventure|Animation|Children|Comedy|Fantasy \n",
472 | "9 Adventure|Animation|Children|Comedy|Fantasy "
473 | ]
474 | },
475 | "execution_count": 9,
476 | "metadata": {},
477 | "output_type": "execute_result"
478 | }
479 | ],
480 | "source": [
481 | "combine_movie_rating = pd.merge(rating_df, movie_df, on='movieId')\n",
482 | "combine_movie_rating.head(10)"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": 31,
488 | "metadata": {},
489 | "outputs": [
490 | {
491 | "data": {
492 | "text/html": [
493 | "\n",
494 | "\n",
507 | "
\n",
508 | " \n",
509 | " \n",
510 | " | \n",
511 | " userId | \n",
512 | " movieId | \n",
513 | " rating | \n",
514 | " title | \n",
515 | "
\n",
516 | " \n",
517 | " \n",
518 | " \n",
519 | " 0 | \n",
520 | " 1 | \n",
521 | " 1 | \n",
522 | " 4.0 | \n",
523 | " Toy Story (1995) | \n",
524 | "
\n",
525 | " \n",
526 | " 1 | \n",
527 | " 5 | \n",
528 | " 1 | \n",
529 | " 4.0 | \n",
530 | " Toy Story (1995) | \n",
531 | "
\n",
532 | " \n",
533 | " 2 | \n",
534 | " 7 | \n",
535 | " 1 | \n",
536 | " 4.5 | \n",
537 | " Toy Story (1995) | \n",
538 | "
\n",
539 | " \n",
540 | " 3 | \n",
541 | " 15 | \n",
542 | " 1 | \n",
543 | " 2.5 | \n",
544 | " Toy Story (1995) | \n",
545 | "
\n",
546 | " \n",
547 | " 4 | \n",
548 | " 17 | \n",
549 | " 1 | \n",
550 | " 4.5 | \n",
551 | " Toy Story (1995) | \n",
552 | "
\n",
553 | " \n",
554 | " 5 | \n",
555 | " 18 | \n",
556 | " 1 | \n",
557 | " 3.5 | \n",
558 | " Toy Story (1995) | \n",
559 | "
\n",
560 | " \n",
561 | " 6 | \n",
562 | " 19 | \n",
563 | " 1 | \n",
564 | " 4.0 | \n",
565 | " Toy Story (1995) | \n",
566 | "
\n",
567 | " \n",
568 | " 7 | \n",
569 | " 21 | \n",
570 | " 1 | \n",
571 | " 3.5 | \n",
572 | " Toy Story (1995) | \n",
573 | "
\n",
574 | " \n",
575 | " 8 | \n",
576 | " 27 | \n",
577 | " 1 | \n",
578 | " 3.0 | \n",
579 | " Toy Story (1995) | \n",
580 | "
\n",
581 | " \n",
582 | " 9 | \n",
583 | " 31 | \n",
584 | " 1 | \n",
585 | " 5.0 | \n",
586 | " Toy Story (1995) | \n",
587 | "
\n",
588 | " \n",
589 | "
\n",
590 | "
"
591 | ],
592 | "text/plain": [
593 | " userId movieId rating title\n",
594 | "0 1 1 4.0 Toy Story (1995)\n",
595 | "1 5 1 4.0 Toy Story (1995)\n",
596 | "2 7 1 4.5 Toy Story (1995)\n",
597 | "3 15 1 2.5 Toy Story (1995)\n",
598 | "4 17 1 4.5 Toy Story (1995)\n",
599 | "5 18 1 3.5 Toy Story (1995)\n",
600 | "6 19 1 4.0 Toy Story (1995)\n",
601 | "7 21 1 3.5 Toy Story (1995)\n",
602 | "8 27 1 3.0 Toy Story (1995)\n",
603 | "9 31 1 5.0 Toy Story (1995)"
604 | ]
605 | },
606 | "execution_count": 31,
607 | "metadata": {},
608 | "output_type": "execute_result"
609 | }
610 | ],
611 | "source": [
612 | "columns = ['timestamp', 'genres']\n",
613 | "combine_movie_rating = combine_movie_rating.drop(columns, axis=1)\n",
614 | "combine_movie_rating.head(10)"
615 | ]
616 | },
617 | {
618 | "cell_type": "code",
619 | "execution_count": 32,
620 | "metadata": {},
621 | "outputs": [
622 | {
623 | "data": {
624 | "text/html": [
625 | "\n",
626 | "\n",
639 | "
\n",
640 | " \n",
641 | " \n",
642 | " | \n",
643 | " title | \n",
644 | " totalRatingCount | \n",
645 | "
\n",
646 | " \n",
647 | " \n",
648 | " \n",
649 | " 0 | \n",
650 | " '71 (2014) | \n",
651 | " 1 | \n",
652 | "
\n",
653 | " \n",
654 | " 1 | \n",
655 | " 'Hellboy': The Seeds of Creation (2004) | \n",
656 | " 1 | \n",
657 | "
\n",
658 | " \n",
659 | " 2 | \n",
660 | " 'Round Midnight (1986) | \n",
661 | " 2 | \n",
662 | "
\n",
663 | " \n",
664 | " 3 | \n",
665 | " 'Salem's Lot (2004) | \n",
666 | " 1 | \n",
667 | "
\n",
668 | " \n",
669 | " 4 | \n",
670 | " 'Til There Was You (1997) | \n",
671 | " 2 | \n",
672 | "
\n",
673 | " \n",
674 | " 5 | \n",
675 | " 'Tis the Season for Love (2015) | \n",
676 | " 1 | \n",
677 | "
\n",
678 | " \n",
679 | " 6 | \n",
680 | " 'burbs, The (1989) | \n",
681 | " 17 | \n",
682 | "
\n",
683 | " \n",
684 | " 7 | \n",
685 | " 'night Mother (1986) | \n",
686 | " 1 | \n",
687 | "
\n",
688 | " \n",
689 | " 8 | \n",
690 | " (500) Days of Summer (2009) | \n",
691 | " 42 | \n",
692 | "
\n",
693 | " \n",
694 | " 9 | \n",
695 | " *batteries not included (1987) | \n",
696 | " 7 | \n",
697 | "
\n",
698 | " \n",
699 | "
\n",
700 | "
"
701 | ],
702 | "text/plain": [
703 | " title totalRatingCount\n",
704 | "0 '71 (2014) 1\n",
705 | "1 'Hellboy': The Seeds of Creation (2004) 1\n",
706 | "2 'Round Midnight (1986) 2\n",
707 | "3 'Salem's Lot (2004) 1\n",
708 | "4 'Til There Was You (1997) 2\n",
709 | "5 'Tis the Season for Love (2015) 1\n",
710 | "6 'burbs, The (1989) 17\n",
711 | "7 'night Mother (1986) 1\n",
712 | "8 (500) Days of Summer (2009) 42\n",
713 | "9 *batteries not included (1987) 7"
714 | ]
715 | },
716 | "execution_count": 32,
717 | "metadata": {},
718 | "output_type": "execute_result"
719 | }
720 | ],
721 | "source": [
722 | "combine_movie_rating = combine_movie_rating.dropna(axis = 0, subset = ['title'])\n",
723 | "\n",
724 | "movie_ratingCount = (combine_movie_rating.\n",
725 | " groupby(by = ['title'])['rating'].\n",
726 | " count().\n",
727 | " reset_index().\n",
728 | " rename(columns = {'rating': 'totalRatingCount'})\n",
729 | " [['title', 'totalRatingCount']]\n",
730 | " )\n",
731 | "movie_ratingCount.head(10)"
732 | ]
733 | },
734 | {
735 | "cell_type": "code",
736 | "execution_count": 14,
737 | "metadata": {},
738 | "outputs": [
739 | {
740 | "data": {
741 | "text/html": [
742 | "\n",
743 | "\n",
756 | "
\n",
757 | " \n",
758 | " \n",
759 | " | \n",
760 | " userId | \n",
761 | " movieId | \n",
762 | " rating | \n",
763 | " timestamp | \n",
764 | " title | \n",
765 | " genres | \n",
766 | " totalRatingCount | \n",
767 | "
\n",
768 | " \n",
769 | " \n",
770 | " \n",
771 | " 0 | \n",
772 | " 1 | \n",
773 | " 1 | \n",
774 | " 4.0 | \n",
775 | " 964982703 | \n",
776 | " Toy Story (1995) | \n",
777 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
778 | " 215 | \n",
779 | "
\n",
780 | " \n",
781 | " 1 | \n",
782 | " 5 | \n",
783 | " 1 | \n",
784 | " 4.0 | \n",
785 | " 847434962 | \n",
786 | " Toy Story (1995) | \n",
787 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
788 | " 215 | \n",
789 | "
\n",
790 | " \n",
791 | " 2 | \n",
792 | " 7 | \n",
793 | " 1 | \n",
794 | " 4.5 | \n",
795 | " 1106635946 | \n",
796 | " Toy Story (1995) | \n",
797 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
798 | " 215 | \n",
799 | "
\n",
800 | " \n",
801 | " 3 | \n",
802 | " 15 | \n",
803 | " 1 | \n",
804 | " 2.5 | \n",
805 | " 1510577970 | \n",
806 | " Toy Story (1995) | \n",
807 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
808 | " 215 | \n",
809 | "
\n",
810 | " \n",
811 | " 4 | \n",
812 | " 17 | \n",
813 | " 1 | \n",
814 | " 4.5 | \n",
815 | " 1305696483 | \n",
816 | " Toy Story (1995) | \n",
817 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
818 | " 215 | \n",
819 | "
\n",
820 | " \n",
821 | " 5 | \n",
822 | " 18 | \n",
823 | " 1 | \n",
824 | " 3.5 | \n",
825 | " 1455209816 | \n",
826 | " Toy Story (1995) | \n",
827 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
828 | " 215 | \n",
829 | "
\n",
830 | " \n",
831 | " 6 | \n",
832 | " 19 | \n",
833 | " 1 | \n",
834 | " 4.0 | \n",
835 | " 965705637 | \n",
836 | " Toy Story (1995) | \n",
837 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
838 | " 215 | \n",
839 | "
\n",
840 | " \n",
841 | " 7 | \n",
842 | " 21 | \n",
843 | " 1 | \n",
844 | " 3.5 | \n",
845 | " 1407618878 | \n",
846 | " Toy Story (1995) | \n",
847 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
848 | " 215 | \n",
849 | "
\n",
850 | " \n",
851 | " 8 | \n",
852 | " 27 | \n",
853 | " 1 | \n",
854 | " 3.0 | \n",
855 | " 962685262 | \n",
856 | " Toy Story (1995) | \n",
857 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
858 | " 215 | \n",
859 | "
\n",
860 | " \n",
861 | " 9 | \n",
862 | " 31 | \n",
863 | " 1 | \n",
864 | " 5.0 | \n",
865 | " 850466616 | \n",
866 | " Toy Story (1995) | \n",
867 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
868 | " 215 | \n",
869 | "
\n",
870 | " \n",
871 | "
\n",
872 | "
"
873 | ],
874 | "text/plain": [
875 | " userId movieId rating timestamp title \\\n",
876 | "0 1 1 4.0 964982703 Toy Story (1995) \n",
877 | "1 5 1 4.0 847434962 Toy Story (1995) \n",
878 | "2 7 1 4.5 1106635946 Toy Story (1995) \n",
879 | "3 15 1 2.5 1510577970 Toy Story (1995) \n",
880 | "4 17 1 4.5 1305696483 Toy Story (1995) \n",
881 | "5 18 1 3.5 1455209816 Toy Story (1995) \n",
882 | "6 19 1 4.0 965705637 Toy Story (1995) \n",
883 | "7 21 1 3.5 1407618878 Toy Story (1995) \n",
884 | "8 27 1 3.0 962685262 Toy Story (1995) \n",
885 | "9 31 1 5.0 850466616 Toy Story (1995) \n",
886 | "\n",
887 | " genres totalRatingCount \n",
888 | "0 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
889 | "1 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
890 | "2 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
891 | "3 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
892 | "4 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
893 | "5 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
894 | "6 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
895 | "7 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
896 | "8 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
897 | "9 Adventure|Animation|Children|Comedy|Fantasy 215 "
898 | ]
899 | },
900 | "execution_count": 14,
901 | "metadata": {},
902 | "output_type": "execute_result"
903 | }
904 | ],
905 | "source": [
906 | "rating_with_totalRatingCount = combine_movie_rating.merge(movie_ratingCount, left_on = 'title', right_on = 'title', how = 'left')\n",
907 | "rating_with_totalRatingCount.head(10)"
908 | ]
909 | },
910 | {
911 | "cell_type": "markdown",
912 | "metadata": {},
913 | "source": [
914 | "### Now drop the duplicate data"
915 | ]
916 | },
917 | {
918 | "cell_type": "code",
919 | "execution_count": 20,
920 | "metadata": {},
921 | "outputs": [
922 | {
923 | "data": {
924 | "text/html": [
925 | "\n",
926 | "\n",
939 | "
\n",
940 | " \n",
941 | " \n",
942 | " | \n",
943 | " userId | \n",
944 | " movieId | \n",
945 | " rating | \n",
946 | " timestamp | \n",
947 | " title | \n",
948 | " genres | \n",
949 | " totalRatingCount | \n",
950 | "
\n",
951 | " \n",
952 | " \n",
953 | " \n",
954 | " 0 | \n",
955 | " 1 | \n",
956 | " 1 | \n",
957 | " 4.0 | \n",
958 | " 964982703 | \n",
959 | " Toy Story (1995) | \n",
960 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
961 | " 215 | \n",
962 | "
\n",
963 | " \n",
964 | " 1 | \n",
965 | " 5 | \n",
966 | " 1 | \n",
967 | " 4.0 | \n",
968 | " 847434962 | \n",
969 | " Toy Story (1995) | \n",
970 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
971 | " 215 | \n",
972 | "
\n",
973 | " \n",
974 | " 2 | \n",
975 | " 7 | \n",
976 | " 1 | \n",
977 | " 4.5 | \n",
978 | " 1106635946 | \n",
979 | " Toy Story (1995) | \n",
980 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
981 | " 215 | \n",
982 | "
\n",
983 | " \n",
984 | " 3 | \n",
985 | " 15 | \n",
986 | " 1 | \n",
987 | " 2.5 | \n",
988 | " 1510577970 | \n",
989 | " Toy Story (1995) | \n",
990 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
991 | " 215 | \n",
992 | "
\n",
993 | " \n",
994 | " 4 | \n",
995 | " 17 | \n",
996 | " 1 | \n",
997 | " 4.5 | \n",
998 | " 1305696483 | \n",
999 | " Toy Story (1995) | \n",
1000 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
1001 | " 215 | \n",
1002 | "
\n",
1003 | " \n",
1004 | " 5 | \n",
1005 | " 18 | \n",
1006 | " 1 | \n",
1007 | " 3.5 | \n",
1008 | " 1455209816 | \n",
1009 | " Toy Story (1995) | \n",
1010 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
1011 | " 215 | \n",
1012 | "
\n",
1013 | " \n",
1014 | " 6 | \n",
1015 | " 19 | \n",
1016 | " 1 | \n",
1017 | " 4.0 | \n",
1018 | " 965705637 | \n",
1019 | " Toy Story (1995) | \n",
1020 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
1021 | " 215 | \n",
1022 | "
\n",
1023 | " \n",
1024 | " 7 | \n",
1025 | " 21 | \n",
1026 | " 1 | \n",
1027 | " 3.5 | \n",
1028 | " 1407618878 | \n",
1029 | " Toy Story (1995) | \n",
1030 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
1031 | " 215 | \n",
1032 | "
\n",
1033 | " \n",
1034 | " 8 | \n",
1035 | " 27 | \n",
1036 | " 1 | \n",
1037 | " 3.0 | \n",
1038 | " 962685262 | \n",
1039 | " Toy Story (1995) | \n",
1040 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
1041 | " 215 | \n",
1042 | "
\n",
1043 | " \n",
1044 | " 9 | \n",
1045 | " 31 | \n",
1046 | " 1 | \n",
1047 | " 5.0 | \n",
1048 | " 850466616 | \n",
1049 | " Toy Story (1995) | \n",
1050 | " Adventure|Animation|Children|Comedy|Fantasy | \n",
1051 | " 215 | \n",
1052 | "
\n",
1053 | " \n",
1054 | "
\n",
1055 | "
"
1056 | ],
1057 | "text/plain": [
1058 | " userId movieId rating timestamp title \\\n",
1059 | "0 1 1 4.0 964982703 Toy Story (1995) \n",
1060 | "1 5 1 4.0 847434962 Toy Story (1995) \n",
1061 | "2 7 1 4.5 1106635946 Toy Story (1995) \n",
1062 | "3 15 1 2.5 1510577970 Toy Story (1995) \n",
1063 | "4 17 1 4.5 1305696483 Toy Story (1995) \n",
1064 | "5 18 1 3.5 1455209816 Toy Story (1995) \n",
1065 | "6 19 1 4.0 965705637 Toy Story (1995) \n",
1066 | "7 21 1 3.5 1407618878 Toy Story (1995) \n",
1067 | "8 27 1 3.0 962685262 Toy Story (1995) \n",
1068 | "9 31 1 5.0 850466616 Toy Story (1995) \n",
1069 | "\n",
1070 | " genres totalRatingCount \n",
1071 | "0 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1072 | "1 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1073 | "2 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1074 | "3 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1075 | "4 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1076 | "5 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1077 | "6 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1078 | "7 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1079 | "8 Adventure|Animation|Children|Comedy|Fantasy 215 \n",
1080 | "9 Adventure|Animation|Children|Comedy|Fantasy 215 "
1081 | ]
1082 | },
1083 | "execution_count": 20,
1084 | "metadata": {},
1085 | "output_type": "execute_result"
1086 | }
1087 | ],
1088 | "source": [
1089 | "user_rating = rating_with_totalRatingCount.drop_duplicates(['userId','title'])\n",
1090 | "user_rating.head(10)"
1091 | ]
1092 | },
1093 | {
1094 | "cell_type": "markdown",
1095 | "metadata": {},
1096 | "source": [
1097 | "## Matrix Factorization"
1098 | ]
1099 | },
1100 | {
1101 | "cell_type": "markdown",
1102 | "metadata": {},
1103 | "source": [
1104 | "### Now create a matrix and fill 0 values "
1105 | ]
1106 | },
1107 | {
1108 | "cell_type": "code",
1109 | "execution_count": 21,
1110 | "metadata": {},
1111 | "outputs": [
1112 | {
1113 | "data": {
1114 | "text/html": [
1115 | "\n",
1116 | "\n",
1129 | "
\n",
1130 | " \n",
1131 | " \n",
1132 | " title | \n",
1133 | " '71 (2014) | \n",
1134 | " 'Hellboy': The Seeds of Creation (2004) | \n",
1135 | " 'Round Midnight (1986) | \n",
1136 | " 'Salem's Lot (2004) | \n",
1137 | " 'Til There Was You (1997) | \n",
1138 | " 'Tis the Season for Love (2015) | \n",
1139 | " 'burbs, The (1989) | \n",
1140 | " 'night Mother (1986) | \n",
1141 | " (500) Days of Summer (2009) | \n",
1142 | " *batteries not included (1987) | \n",
1143 | " ... | \n",
1144 | " Zulu (2013) | \n",
1145 | " [REC] (2007) | \n",
1146 | " [REC]² (2009) | \n",
1147 | " [REC]³ 3 Génesis (2012) | \n",
1148 | " anohana: The Flower We Saw That Day - The Movie (2013) | \n",
1149 | " eXistenZ (1999) | \n",
1150 | " xXx (2002) | \n",
1151 | " xXx: State of the Union (2005) | \n",
1152 | " ¡Three Amigos! (1986) | \n",
1153 | " À nous la liberté (Freedom for Us) (1931) | \n",
1154 | "
\n",
1155 | " \n",
1156 | " userId | \n",
1157 | " | \n",
1158 | " | \n",
1159 | " | \n",
1160 | " | \n",
1161 | " | \n",
1162 | " | \n",
1163 | " | \n",
1164 | " | \n",
1165 | " | \n",
1166 | " | \n",
1167 | " | \n",
1168 | " | \n",
1169 | " | \n",
1170 | " | \n",
1171 | " | \n",
1172 | " | \n",
1173 | " | \n",
1174 | " | \n",
1175 | " | \n",
1176 | " | \n",
1177 | " | \n",
1178 | "
\n",
1179 | " \n",
1180 | " \n",
1181 | " \n",
1182 | " 1 | \n",
1183 | " 0.0 | \n",
1184 | " 0.0 | \n",
1185 | " 0.0 | \n",
1186 | " 0.0 | \n",
1187 | " 0.0 | \n",
1188 | " 0.0 | \n",
1189 | " 0.0 | \n",
1190 | " 0.0 | \n",
1191 | " 0.0 | \n",
1192 | " 0.0 | \n",
1193 | " ... | \n",
1194 | " 0.0 | \n",
1195 | " 0.0 | \n",
1196 | " 0.0 | \n",
1197 | " 0.0 | \n",
1198 | " 0.0 | \n",
1199 | " 0.0 | \n",
1200 | " 0.0 | \n",
1201 | " 0.0 | \n",
1202 | " 4.0 | \n",
1203 | " 0.0 | \n",
1204 | "
\n",
1205 | " \n",
1206 | " 2 | \n",
1207 | " 0.0 | \n",
1208 | " 0.0 | \n",
1209 | " 0.0 | \n",
1210 | " 0.0 | \n",
1211 | " 0.0 | \n",
1212 | " 0.0 | \n",
1213 | " 0.0 | \n",
1214 | " 0.0 | \n",
1215 | " 0.0 | \n",
1216 | " 0.0 | \n",
1217 | " ... | \n",
1218 | " 0.0 | \n",
1219 | " 0.0 | \n",
1220 | " 0.0 | \n",
1221 | " 0.0 | \n",
1222 | " 0.0 | \n",
1223 | " 0.0 | \n",
1224 | " 0.0 | \n",
1225 | " 0.0 | \n",
1226 | " 0.0 | \n",
1227 | " 0.0 | \n",
1228 | "
\n",
1229 | " \n",
1230 | " 3 | \n",
1231 | " 0.0 | \n",
1232 | " 0.0 | \n",
1233 | " 0.0 | \n",
1234 | " 0.0 | \n",
1235 | " 0.0 | \n",
1236 | " 0.0 | \n",
1237 | " 0.0 | \n",
1238 | " 0.0 | \n",
1239 | " 0.0 | \n",
1240 | " 0.0 | \n",
1241 | " ... | \n",
1242 | " 0.0 | \n",
1243 | " 0.0 | \n",
1244 | " 0.0 | \n",
1245 | " 0.0 | \n",
1246 | " 0.0 | \n",
1247 | " 0.0 | \n",
1248 | " 0.0 | \n",
1249 | " 0.0 | \n",
1250 | " 0.0 | \n",
1251 | " 0.0 | \n",
1252 | "
\n",
1253 | " \n",
1254 | " 4 | \n",
1255 | " 0.0 | \n",
1256 | " 0.0 | \n",
1257 | " 0.0 | \n",
1258 | " 0.0 | \n",
1259 | " 0.0 | \n",
1260 | " 0.0 | \n",
1261 | " 0.0 | \n",
1262 | " 0.0 | \n",
1263 | " 0.0 | \n",
1264 | " 0.0 | \n",
1265 | " ... | \n",
1266 | " 0.0 | \n",
1267 | " 0.0 | \n",
1268 | " 0.0 | \n",
1269 | " 0.0 | \n",
1270 | " 0.0 | \n",
1271 | " 0.0 | \n",
1272 | " 0.0 | \n",
1273 | " 0.0 | \n",
1274 | " 0.0 | \n",
1275 | " 0.0 | \n",
1276 | "
\n",
1277 | " \n",
1278 | " 5 | \n",
1279 | " 0.0 | \n",
1280 | " 0.0 | \n",
1281 | " 0.0 | \n",
1282 | " 0.0 | \n",
1283 | " 0.0 | \n",
1284 | " 0.0 | \n",
1285 | " 0.0 | \n",
1286 | " 0.0 | \n",
1287 | " 0.0 | \n",
1288 | " 0.0 | \n",
1289 | " ... | \n",
1290 | " 0.0 | \n",
1291 | " 0.0 | \n",
1292 | " 0.0 | \n",
1293 | " 0.0 | \n",
1294 | " 0.0 | \n",
1295 | " 0.0 | \n",
1296 | " 0.0 | \n",
1297 | " 0.0 | \n",
1298 | " 0.0 | \n",
1299 | " 0.0 | \n",
1300 | "
\n",
1301 | " \n",
1302 | " 6 | \n",
1303 | " 0.0 | \n",
1304 | " 0.0 | \n",
1305 | " 0.0 | \n",
1306 | " 0.0 | \n",
1307 | " 0.0 | \n",
1308 | " 0.0 | \n",
1309 | " 0.0 | \n",
1310 | " 0.0 | \n",
1311 | " 0.0 | \n",
1312 | " 0.0 | \n",
1313 | " ... | \n",
1314 | " 0.0 | \n",
1315 | " 0.0 | \n",
1316 | " 0.0 | \n",
1317 | " 0.0 | \n",
1318 | " 0.0 | \n",
1319 | " 0.0 | \n",
1320 | " 0.0 | \n",
1321 | " 0.0 | \n",
1322 | " 0.0 | \n",
1323 | " 0.0 | \n",
1324 | "
\n",
1325 | " \n",
1326 | " 7 | \n",
1327 | " 0.0 | \n",
1328 | " 0.0 | \n",
1329 | " 0.0 | \n",
1330 | " 0.0 | \n",
1331 | " 0.0 | \n",
1332 | " 0.0 | \n",
1333 | " 0.0 | \n",
1334 | " 0.0 | \n",
1335 | " 0.0 | \n",
1336 | " 0.0 | \n",
1337 | " ... | \n",
1338 | " 0.0 | \n",
1339 | " 0.0 | \n",
1340 | " 0.0 | \n",
1341 | " 0.0 | \n",
1342 | " 0.0 | \n",
1343 | " 0.0 | \n",
1344 | " 0.0 | \n",
1345 | " 0.0 | \n",
1346 | " 0.0 | \n",
1347 | " 0.0 | \n",
1348 | "
\n",
1349 | " \n",
1350 | " 8 | \n",
1351 | " 0.0 | \n",
1352 | " 0.0 | \n",
1353 | " 0.0 | \n",
1354 | " 0.0 | \n",
1355 | " 0.0 | \n",
1356 | " 0.0 | \n",
1357 | " 0.0 | \n",
1358 | " 0.0 | \n",
1359 | " 0.0 | \n",
1360 | " 0.0 | \n",
1361 | " ... | \n",
1362 | " 0.0 | \n",
1363 | " 0.0 | \n",
1364 | " 0.0 | \n",
1365 | " 0.0 | \n",
1366 | " 0.0 | \n",
1367 | " 0.0 | \n",
1368 | " 0.0 | \n",
1369 | " 0.0 | \n",
1370 | " 0.0 | \n",
1371 | " 0.0 | \n",
1372 | "
\n",
1373 | " \n",
1374 | " 9 | \n",
1375 | " 0.0 | \n",
1376 | " 0.0 | \n",
1377 | " 0.0 | \n",
1378 | " 0.0 | \n",
1379 | " 0.0 | \n",
1380 | " 0.0 | \n",
1381 | " 0.0 | \n",
1382 | " 0.0 | \n",
1383 | " 0.0 | \n",
1384 | " 0.0 | \n",
1385 | " ... | \n",
1386 | " 0.0 | \n",
1387 | " 0.0 | \n",
1388 | " 0.0 | \n",
1389 | " 0.0 | \n",
1390 | " 0.0 | \n",
1391 | " 0.0 | \n",
1392 | " 1.0 | \n",
1393 | " 0.0 | \n",
1394 | " 0.0 | \n",
1395 | " 0.0 | \n",
1396 | "
\n",
1397 | " \n",
1398 | " 10 | \n",
1399 | " 0.0 | \n",
1400 | " 0.0 | \n",
1401 | " 0.0 | \n",
1402 | " 0.0 | \n",
1403 | " 0.0 | \n",
1404 | " 0.0 | \n",
1405 | " 0.0 | \n",
1406 | " 0.0 | \n",
1407 | " 0.0 | \n",
1408 | " 0.0 | \n",
1409 | " ... | \n",
1410 | " 0.0 | \n",
1411 | " 0.0 | \n",
1412 | " 0.0 | \n",
1413 | " 0.0 | \n",
1414 | " 0.0 | \n",
1415 | " 0.0 | \n",
1416 | " 0.0 | \n",
1417 | " 0.0 | \n",
1418 | " 0.0 | \n",
1419 | " 0.0 | \n",
1420 | "
\n",
1421 | " \n",
1422 | "
\n",
1423 | "
10 rows × 9719 columns
\n",
1424 | "
"
1425 | ],
1426 | "text/plain": [
1427 | "title '71 (2014) 'Hellboy': The Seeds of Creation (2004) \\\n",
1428 | "userId \n",
1429 | "1 0.0 0.0 \n",
1430 | "2 0.0 0.0 \n",
1431 | "3 0.0 0.0 \n",
1432 | "4 0.0 0.0 \n",
1433 | "5 0.0 0.0 \n",
1434 | "6 0.0 0.0 \n",
1435 | "7 0.0 0.0 \n",
1436 | "8 0.0 0.0 \n",
1437 | "9 0.0 0.0 \n",
1438 | "10 0.0 0.0 \n",
1439 | "\n",
1440 | "title 'Round Midnight (1986) 'Salem's Lot (2004) \\\n",
1441 | "userId \n",
1442 | "1 0.0 0.0 \n",
1443 | "2 0.0 0.0 \n",
1444 | "3 0.0 0.0 \n",
1445 | "4 0.0 0.0 \n",
1446 | "5 0.0 0.0 \n",
1447 | "6 0.0 0.0 \n",
1448 | "7 0.0 0.0 \n",
1449 | "8 0.0 0.0 \n",
1450 | "9 0.0 0.0 \n",
1451 | "10 0.0 0.0 \n",
1452 | "\n",
1453 | "title 'Til There Was You (1997) 'Tis the Season for Love (2015) \\\n",
1454 | "userId \n",
1455 | "1 0.0 0.0 \n",
1456 | "2 0.0 0.0 \n",
1457 | "3 0.0 0.0 \n",
1458 | "4 0.0 0.0 \n",
1459 | "5 0.0 0.0 \n",
1460 | "6 0.0 0.0 \n",
1461 | "7 0.0 0.0 \n",
1462 | "8 0.0 0.0 \n",
1463 | "9 0.0 0.0 \n",
1464 | "10 0.0 0.0 \n",
1465 | "\n",
1466 | "title 'burbs, The (1989) 'night Mother (1986) (500) Days of Summer (2009) \\\n",
1467 | "userId \n",
1468 | "1 0.0 0.0 0.0 \n",
1469 | "2 0.0 0.0 0.0 \n",
1470 | "3 0.0 0.0 0.0 \n",
1471 | "4 0.0 0.0 0.0 \n",
1472 | "5 0.0 0.0 0.0 \n",
1473 | "6 0.0 0.0 0.0 \n",
1474 | "7 0.0 0.0 0.0 \n",
1475 | "8 0.0 0.0 0.0 \n",
1476 | "9 0.0 0.0 0.0 \n",
1477 | "10 0.0 0.0 0.0 \n",
1478 | "\n",
1479 | "title *batteries not included (1987) \\\n",
1480 | "userId \n",
1481 | "1 0.0 \n",
1482 | "2 0.0 \n",
1483 | "3 0.0 \n",
1484 | "4 0.0 \n",
1485 | "5 0.0 \n",
1486 | "6 0.0 \n",
1487 | "7 0.0 \n",
1488 | "8 0.0 \n",
1489 | "9 0.0 \n",
1490 | "10 0.0 \n",
1491 | "\n",
1492 | "title ... Zulu (2013) [REC] (2007) \\\n",
1493 | "userId ... \n",
1494 | "1 ... 0.0 0.0 \n",
1495 | "2 ... 0.0 0.0 \n",
1496 | "3 ... 0.0 0.0 \n",
1497 | "4 ... 0.0 0.0 \n",
1498 | "5 ... 0.0 0.0 \n",
1499 | "6 ... 0.0 0.0 \n",
1500 | "7 ... 0.0 0.0 \n",
1501 | "8 ... 0.0 0.0 \n",
1502 | "9 ... 0.0 0.0 \n",
1503 | "10 ... 0.0 0.0 \n",
1504 | "\n",
1505 | "title [REC]² (2009) [REC]³ 3 Génesis (2012) \\\n",
1506 | "userId \n",
1507 | "1 0.0 0.0 \n",
1508 | "2 0.0 0.0 \n",
1509 | "3 0.0 0.0 \n",
1510 | "4 0.0 0.0 \n",
1511 | "5 0.0 0.0 \n",
1512 | "6 0.0 0.0 \n",
1513 | "7 0.0 0.0 \n",
1514 | "8 0.0 0.0 \n",
1515 | "9 0.0 0.0 \n",
1516 | "10 0.0 0.0 \n",
1517 | "\n",
1518 | "title anohana: The Flower We Saw That Day - The Movie (2013) \\\n",
1519 | "userId \n",
1520 | "1 0.0 \n",
1521 | "2 0.0 \n",
1522 | "3 0.0 \n",
1523 | "4 0.0 \n",
1524 | "5 0.0 \n",
1525 | "6 0.0 \n",
1526 | "7 0.0 \n",
1527 | "8 0.0 \n",
1528 | "9 0.0 \n",
1529 | "10 0.0 \n",
1530 | "\n",
1531 | "title eXistenZ (1999) xXx (2002) xXx: State of the Union (2005) \\\n",
1532 | "userId \n",
1533 | "1 0.0 0.0 0.0 \n",
1534 | "2 0.0 0.0 0.0 \n",
1535 | "3 0.0 0.0 0.0 \n",
1536 | "4 0.0 0.0 0.0 \n",
1537 | "5 0.0 0.0 0.0 \n",
1538 | "6 0.0 0.0 0.0 \n",
1539 | "7 0.0 0.0 0.0 \n",
1540 | "8 0.0 0.0 0.0 \n",
1541 | "9 0.0 1.0 0.0 \n",
1542 | "10 0.0 0.0 0.0 \n",
1543 | "\n",
1544 | "title ¡Three Amigos! (1986) À nous la liberté (Freedom for Us) (1931) \n",
1545 | "userId \n",
1546 | "1 4.0 0.0 \n",
1547 | "2 0.0 0.0 \n",
1548 | "3 0.0 0.0 \n",
1549 | "4 0.0 0.0 \n",
1550 | "5 0.0 0.0 \n",
1551 | "6 0.0 0.0 \n",
1552 | "7 0.0 0.0 \n",
1553 | "8 0.0 0.0 \n",
1554 | "9 0.0 0.0 \n",
1555 | "10 0.0 0.0 \n",
1556 | "\n",
1557 | "[10 rows x 9719 columns]"
1558 | ]
1559 | },
1560 | "execution_count": 21,
1561 | "metadata": {},
1562 | "output_type": "execute_result"
1563 | }
1564 | ],
1565 | "source": [
1566 | "movie_user_rating_pivot = user_rating.pivot(index = 'userId', columns = 'title', values = 'rating').fillna(0)\n",
1567 | "movie_user_rating_pivot.head(10)"
1568 | ]
1569 | },
1570 | {
1571 | "cell_type": "code",
1572 | "execution_count": 22,
1573 | "metadata": {},
1574 | "outputs": [
1575 | {
1576 | "data": {
1577 | "text/plain": [
1578 | "(9719, 610)"
1579 | ]
1580 | },
1581 | "execution_count": 22,
1582 | "metadata": {},
1583 | "output_type": "execute_result"
1584 | }
1585 | ],
1586 | "source": [
1587 | "X = movie_user_rating_pivot.values.T\n",
1588 | "X.shape"
1589 | ]
1590 | },
1591 | {
1592 | "cell_type": "markdown",
1593 | "metadata": {},
1594 | "source": [
1595 | "### Now lets fit the model"
1596 | ]
1597 | },
1598 | {
1599 | "cell_type": "code",
1600 | "execution_count": 35,
1601 | "metadata": {},
1602 | "outputs": [
1603 | {
1604 | "data": {
1605 | "text/plain": [
1606 | "(9719, 12)"
1607 | ]
1608 | },
1609 | "execution_count": 35,
1610 | "metadata": {},
1611 | "output_type": "execute_result"
1612 | }
1613 | ],
1614 | "source": [
1615 | "import sklearn\n",
1616 | "from sklearn.decomposition import TruncatedSVD\n",
1617 | "\n",
1618 | "SVD = TruncatedSVD(n_components=12, random_state=17)\n",
1619 | "matrix = SVD.fit_transform(X)\n",
1620 | "matrix.shape"
1621 | ]
1622 | },
1623 | {
1624 | "cell_type": "code",
1625 | "execution_count": 24,
1626 | "metadata": {},
1627 | "outputs": [
1628 | {
1629 | "data": {
1630 | "text/plain": [
1631 | "(9719, 9719)"
1632 | ]
1633 | },
1634 | "execution_count": 24,
1635 | "metadata": {},
1636 | "output_type": "execute_result"
1637 | }
1638 | ],
1639 | "source": [
1640 | "import warnings\n",
1641 | "warnings.filterwarnings(\"ignore\",category =RuntimeWarning)\n",
1642 | "corr = np.corrcoef(matrix)\n",
1643 | "corr.shape"
1644 | ]
1645 | },
1646 | {
1647 | "cell_type": "markdown",
1648 | "metadata": {},
1649 | "source": [
1650 | "### Now lets check the results"
1651 | ]
1652 | },
1653 | {
1654 | "cell_type": "code",
1655 | "execution_count": 27,
1656 | "metadata": {},
1657 | "outputs": [],
1658 | "source": [
1659 | "movie_title = movie_user_rating_pivot.columns\n",
1660 | "movie_title_list = list(movie_title)\n",
1661 | "coffey_hands = movie_title_list.index(\"Guardians of the Galaxy (2014)\")"
1662 | ]
1663 | },
1664 | {
1665 | "cell_type": "code",
1666 | "execution_count": 28,
1667 | "metadata": {},
1668 | "outputs": [
1669 | {
1670 | "data": {
1671 | "text/plain": [
1672 | "['Adjustment Bureau, The (2011)',\n",
1673 | " 'Amazing Spider-Man, The (2012)',\n",
1674 | " 'Ant-Man (2015)',\n",
1675 | " 'Avatar (2009)',\n",
1676 | " 'Avengers, The (2012)',\n",
1677 | " 'Avengers: Age of Ultron (2015)',\n",
1678 | " 'Big Hero 6 (2014)',\n",
1679 | " 'Brave (2012)',\n",
1680 | " 'Captain America: Civil War (2016)',\n",
1681 | " 'Captain America: The First Avenger (2011)',\n",
1682 | " 'Captain America: The Winter Soldier (2014)',\n",
1683 | " 'Cloud Atlas (2012)',\n",
1684 | " 'Cloudy with a Chance of Meatballs (2009)',\n",
1685 | " 'Dark Knight Rises, The (2012)',\n",
1686 | " 'Deadpool (2016)',\n",
1687 | " 'Despicable Me (2010)',\n",
1688 | " 'District 9 (2009)',\n",
1689 | " 'Django Unchained (2012)',\n",
1690 | " 'Doctor Strange (2016)',\n",
1691 | " 'Edge of Tomorrow (2014)',\n",
1692 | " \"Ender's Game (2013)\",\n",
1693 | " 'Grand Budapest Hotel, The (2014)',\n",
1694 | " 'Gravity (2013)',\n",
1695 | " 'Guardians of the Galaxy (2014)',\n",
1696 | " 'Guardians of the Galaxy 2 (2017)',\n",
1697 | " 'Harry Potter and the Deathly Hallows: Part 1 (2010)',\n",
1698 | " 'Harry Potter and the Deathly Hallows: Part 2 (2011)',\n",
1699 | " 'Hobbit: An Unexpected Journey, The (2012)',\n",
1700 | " 'Hobbit: The Desolation of Smaug, The (2013)',\n",
1701 | " 'How to Train Your Dragon (2010)',\n",
1702 | " 'Hugo (2011)',\n",
1703 | " 'Inside Out (2015)',\n",
1704 | " 'Iron Man (2008)',\n",
1705 | " 'Iron Man 2 (2010)',\n",
1706 | " 'Iron Man 3 (2013)',\n",
1707 | " 'John Wick (2014)',\n",
1708 | " 'Jurassic World (2015)',\n",
1709 | " 'Kingsman: The Golden Circle (2017)',\n",
1710 | " 'Kingsman: The Secret Service (2015)',\n",
1711 | " 'Kung Fu Panda (2008)',\n",
1712 | " 'Kung Fu Panda 2 (2011)',\n",
1713 | " 'Life of Pi (2012)',\n",
1714 | " 'Logan (2017)',\n",
1715 | " 'Lone Ranger, The (2013)',\n",
1716 | " 'Looper (2012)',\n",
1717 | " 'Mad Max: Fury Road (2015)',\n",
1718 | " 'Man of Steel (2013)',\n",
1719 | " 'Maze Runner, The (2014)',\n",
1720 | " 'Men in Black III (M.III.B.) (M.I.B.³) (2012)',\n",
1721 | " 'Mission: Impossible - Ghost Protocol (2011)',\n",
1722 | " 'Now You See Me (2013)',\n",
1723 | " 'Oblivion (2013)',\n",
1724 | " 'Pirates of the Caribbean: On Stranger Tides (2011)',\n",
1725 | " 'Prometheus (2012)',\n",
1726 | " 'Quantum of Solace (2008)',\n",
1727 | " 'Rango (2011)',\n",
1728 | " 'Red (2010)',\n",
1729 | " 'Rise of the Planet of the Apes (2011)',\n",
1730 | " 'Rogue One: A Star Wars Story (2016)',\n",
1731 | " 'Scott Pilgrim vs. the World (2010)',\n",
1732 | " 'Secret Life of Walter Mitty, The (2013)',\n",
1733 | " 'Sherlock Holmes (2009)',\n",
1734 | " 'Sherlock Holmes: A Game of Shadows (2011)',\n",
1735 | " 'Skyfall (2012)',\n",
1736 | " 'Star Trek (2009)',\n",
1737 | " 'Star Trek Into Darkness (2013)',\n",
1738 | " 'Star Wars: Episode VII - The Force Awakens (2015)',\n",
1739 | " 'Suicide Squad (2016)',\n",
1740 | " 'The Expendables 3 (2014)',\n",
1741 | " 'The Hobbit: The Battle of the Five Armies (2014)',\n",
1742 | " 'The Hunger Games (2012)',\n",
1743 | " 'The Hunger Games: Catching Fire (2013)',\n",
1744 | " 'The Hunger Games: Mockingjay - Part 1 (2014)',\n",
1745 | " 'The Lego Movie (2014)',\n",
1746 | " 'The Man from U.N.C.L.E. (2015)',\n",
1747 | " 'The Martian (2015)',\n",
1748 | " 'Thor (2011)',\n",
1749 | " 'Thor: The Dark World (2013)',\n",
1750 | " 'Toy Story 3 (2010)',\n",
1751 | " 'Up (2009)',\n",
1752 | " 'Wolverine, The (2013)',\n",
1753 | " 'World War Z (2013)',\n",
1754 | " \"World's End, The (2013)\",\n",
1755 | " 'Wreck-It Ralph (2012)',\n",
1756 | " 'X-Men Origins: Wolverine (2009)',\n",
1757 | " 'X-Men: Days of Future Past (2014)',\n",
1758 | " 'X-Men: First Class (2011)',\n",
1759 | " 'Zombieland (2009)',\n",
1760 | " 'Zootopia (2016)']"
1761 | ]
1762 | },
1763 | "execution_count": 28,
1764 | "metadata": {},
1765 | "output_type": "execute_result"
1766 | }
1767 | ],
1768 | "source": [
1769 | "corr_coffey_hands = corr[coffey_hands]\n",
1770 | "list(movie_title[(corr_coffey_hands >= 0.9)])"
1771 | ]
1772 | }
1773 | ],
1774 | "metadata": {
1775 | "kernelspec": {
1776 | "display_name": "Python 3",
1777 | "language": "python",
1778 | "name": "python3"
1779 | },
1780 | "language_info": {
1781 | "codemirror_mode": {
1782 | "name": "ipython",
1783 | "version": 3
1784 | },
1785 | "file_extension": ".py",
1786 | "mimetype": "text/x-python",
1787 | "name": "python",
1788 | "nbconvert_exporter": "python",
1789 | "pygments_lexer": "ipython3",
1790 | "version": "3.6.1"
1791 | }
1792 | },
1793 | "nbformat": 4,
1794 | "nbformat_minor": 2
1795 | }
1796 |
--------------------------------------------------------------------------------