├── .idea
    ├── Reccomender-Systems-Using-Python.iml
    ├── inspectionProfiles
    │   ├── Project_Default.xml
    │   └── profiles_settings.xml
    ├── modules.xml
    ├── vcs.xml
    └── workspace.xml
├── 01 - Evaluating Recommender Systems
    ├── .ipynb_checkpoints
    │   ├── MovieLens-checkpoint.py
    │   └── Test Evaluation Metrics-checkpoint.ipynb
    ├── MovieLens.py
    ├── RecommenderMetrics.py
    ├── Test Evaluation Metrics.ipynb
    └── __pycache__
    │   ├── MovieLens.cpython-38.pyc
    │   └── RecommenderMetrics.cpython-38.pyc
├── 02 - Recommender Engine Framework
    ├── .ipynb_checkpoints
    │   ├── EvaluatedAlgorithm-checkpoint.py
    │   ├── EvaluationData-checkpoint.py
    │   ├── Evaluator-checkpoint.py
    │   ├── MovieLens-checkpoint.py
    │   ├── RecSys Framework Notebook-checkpoint.ipynb
    │   └── RecommenderMetrics-checkpoint.py
    ├── EvaluatedAlgorithm.py
    ├── EvaluationData.py
    ├── Evaluator.py
    ├── MovieLens.py
    ├── RecSys Framework Notebook.ipynb
    ├── RecommenderMetrics.py
    └── __pycache__
    │   ├── EvaluatedAlgorithm.cpython-38.pyc
    │   ├── EvaluationData.cpython-38.pyc
    │   ├── Evaluator.cpython-38.pyc
    │   ├── MovieLens.cpython-38.pyc
    │   └── RecommenderMetrics.cpython-38.pyc
├── 03 - Content Based Recommendation
    ├── .ipynb_checkpoints
    │   ├── Content Based Recommendation with MisEnScene-checkpoint.ipynb
    │   ├── Content Based Recommendation-checkpoint.ipynb
    │   ├── ContentKNNAlgorithm-checkpoint.py
    │   ├── ContentKNNwithMisEnScene-checkpoint.py
    │   ├── ContentRecs-checkpoint.py
    │   ├── EvaluationData-checkpoint.py
    │   ├── Evaluator-checkpoint.py
    │   ├── MovieLens-checkpoint.py
    │   └── RecommenderMetrics-checkpoint.py
    ├── Content Based Recommendation with MisEnScene.ipynb
    ├── Content Based Recommendation.ipynb
    ├── ContentKNNAlgorithm.py
    ├── ContentKNNwithMisEnScene.py
    ├── ContentRecs.py
    ├── EvaluatedAlgorithm.py
    ├── EvaluationData.py
    ├── Evaluator.py
    ├── LLVisualFeatures13K_Log.csv
    ├── MovieLens.py
    ├── RecommenderMetrics.py
    └── __pycache__
    │   ├── ContentKNNAlgorithm.cpython-38.pyc
    │   ├── ContentKNNwithMisEnScene.cpython-38.pyc
    │   ├── EvaluatedAlgorithm.cpython-38.pyc
    │   ├── EvaluationData.cpython-38.pyc
    │   ├── Evaluator.cpython-38.pyc
    │   ├── MovieLens.cpython-38.pyc
    │   └── RecommenderMetrics.cpython-38.pyc
├── 04 - Neighborhood Based Collaborative Filtering
    ├── .ipynb_checkpoints
    │   ├── EvaluateUserCF-checkpoint.py
    │   ├── EvaluatedAlgorithm-checkpoint.py
    │   ├── EvaluationData-checkpoint.py
    │   ├── Evaluator-checkpoint.py
    │   ├── Item-Based Collaborative Filtering-checkpoint.ipynb
    │   ├── KNNBakeOff-checkpoint.py
    │   ├── MovieLens-checkpoint.py
    │   ├── RecommenderMetrics-checkpoint.py
    │   ├── SimpleItemCF-checkpoint.py
    │   └── User-Based Collaborative Filtering-checkpoint.ipynb
    ├── EvaluateUserCF.py
    ├── EvaluatedAlgorithm.py
    ├── EvaluationData.py
    ├── Evaluator.py
    ├── Item-Based Collaborative Filtering.ipynb
    ├── KNNBakeOff.py
    ├── MovieLens.py
    ├── RecommenderMetrics.py
    ├── SimpleItemCF.py
    ├── User-Based Collaborative Filtering.ipynb
    └── __pycache__
    │   └── MovieLens.cpython-38.pyc
├── 05 - Matrix Factorization Methods
    ├── .ipynb_checkpoints
    │   ├── EvaluatedAlgorithm-checkpoint.py
    │   ├── EvaluationData-checkpoint.py
    │   ├── Evaluator-checkpoint.py
    │   ├── MovieLens-checkpoint.py
    │   ├── RecommenderMetrics-checkpoint.py
    │   ├── SVD Matrix Factorization-checkpoint.ipynb
    │   ├── SVDBakeOff-checkpoint.py
    │   └── SVDTuning-checkpoint.py
    ├── EvaluatedAlgorithm.py
    ├── EvaluationData.py
    ├── Evaluator.py
    ├── MovieLens.py
    ├── RecommenderMetrics.py
    ├── SVD Matrix Factorization.ipynb
    └── __pycache__
    │   ├── EvaluatedAlgorithm.cpython-38.pyc
    │   ├── EvaluationData.cpython-38.pyc
    │   ├── Evaluator.cpython-38.pyc
    │   ├── MovieLens.cpython-38.pyc
    │   └── RecommenderMetrics.cpython-38.pyc
├── 06 - Deep Learning for Recommender Systems
    ├── .ipynb_checkpoints
    │   ├── AutoRec-checkpoint.py
    │   ├── AutoRecAlgorithm-checkpoint.py
    │   ├── AutoRecBakeOff-checkpoint.py
    │   ├── EvaluatedAlgorithm-checkpoint.py
    │   ├── EvaluationData-checkpoint.py
    │   ├── Evaluator-checkpoint.py
    │   ├── MovieLens-checkpoint.py
    │   ├── RBM-checkpoint.py
    │   ├── RBMAlgorithm-checkpoint.py
    │   ├── RBMBakeOff-checkpoint.py
    │   ├── RBMTuning-checkpoint.py
    │   ├── Recommendations using Restricted Boltzmann Machine(RBM)-checkpoint.ipynb
    │   ├── Recommendations with Deep Neural Networks-checkpoint.ipynb
    │   └── RecommenderMetrics-checkpoint.py
    ├── AutoRec.py
    ├── AutoRecAlgorithm.py
    ├── AutoRecBakeOff.py
    ├── EvaluatedAlgorithm.py
    ├── EvaluationData.py
    ├── Evaluator.py
    ├── MovieLens.py
    ├── RBM.py
    ├── RBMAlgorithm.py
    ├── RBMBakeOff.py
    ├── RBMTuning.py
    ├── Recommendations using Restricted Boltzmann Machine(RBM).ipynb
    ├── Recommendations with Deep Neural Networks.ipynb
    ├── RecommenderMetrics.py
    └── __pycache__
    │   ├── AutoRec.cpython-38.pyc
    │   ├── AutoRecAlgorithm.cpython-38.pyc
    │   ├── EvaluatedAlgorithm.cpython-38.pyc
    │   ├── EvaluationData.cpython-38.pyc
    │   ├── Evaluator.cpython-38.pyc
    │   ├── MovieLens.cpython-38.pyc
    │   ├── RBM.cpython-38.pyc
    │   ├── RBMAlgorithm.cpython-38.pyc
    │   └── RecommenderMetrics.cpython-38.pyc
├── README.md
└── ml-latest-small
    ├── README.txt
    ├── links.csv
    ├── movies.csv
    ├── ratings.csv
    └── tags.csv


/.idea/Reccomender-Systems-Using-Python.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="inheritedJdk" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <profile version="1.0">
3 |     <option name="myName" value="Project Default" />
4 |     <inspection_tool class="PyInterpreterInspection" enabled="false" level="WARNING" enabled_by_default="false" />
5 |   </profile>
6 | </component>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Reccomender-Systems-Using-Python.iml" filepath="$PROJECT_DIR$/.idea/Reccomender-Systems-Using-Python.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="BranchesTreeState">
 4 |     <expand>
 5 |       <path>
 6 |         <item name="ROOT" type="e8cecc67:BranchNodeDescriptor" />
 7 |         <item name="LOCAL_ROOT" type="e8cecc67:BranchNodeDescriptor" />
 8 |       </path>
 9 |       <path>
10 |         <item name="ROOT" type="e8cecc67:BranchNodeDescriptor" />
11 |         <item name="REMOTE_ROOT" type="e8cecc67:BranchNodeDescriptor" />
12 |       </path>
13 |       <path>
14 |         <item name="ROOT" type="e8cecc67:BranchNodeDescriptor" />
15 |         <item name="REMOTE_ROOT" type="e8cecc67:BranchNodeDescriptor" />
16 |         <item name="GROUP_NODE:origin" type="e8cecc67:BranchNodeDescriptor" />
17 |       </path>
18 |     </expand>
19 |     <select />
20 |   </component>
21 |   <component name="ChangeListManager">
22 |     <list default="true" id="0dd6c530-948f-4700-8145-af4db99ece4d" name="Default Changelist" comment="">
23 |       <change afterPath="$PROJECT_DIR$/.idea/Reccomender-Systems-Using-Python.iml" afterDir="false" />
24 |       <change afterPath="$PROJECT_DIR$/.idea/inspectionProfiles/Project_Default.xml" afterDir="false" />
25 |       <change afterPath="$PROJECT_DIR$/.idea/inspectionProfiles/profiles_settings.xml" afterDir="false" />
26 |       <change afterPath="$PROJECT_DIR$/.idea/modules.xml" afterDir="false" />
27 |       <change afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
28 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/.ipynb_checkpoints/MovieLens-checkpoint.py" afterDir="false" />
29 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/.ipynb_checkpoints/Test Evaluation Metrics-checkpoint.ipynb" afterDir="false" />
30 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/.ipynb_checkpoints/TestMetrics-checkpoint.py" afterDir="false" />
31 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/MovieLens.py" afterDir="false" />
32 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/RecommenderMetrics.py" afterDir="false" />
33 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/Test Evaluation Metrics.ipynb" afterDir="false" />
34 |       <change afterPath="$PROJECT_DIR$/02 - Evaluating Recommender Systems/TestMetrics.py" afterDir="false" />
35 |       <change afterPath="$PROJECT_DIR$/ml-latest-small/README.txt" afterDir="false" />
36 |       <change afterPath="$PROJECT_DIR$/ml-latest-small/links.csv" afterDir="false" />
37 |       <change afterPath="$PROJECT_DIR$/ml-latest-small/movies.csv" afterDir="false" />
38 |       <change afterPath="$PROJECT_DIR$/ml-latest-small/ratings.csv" afterDir="false" />
39 |       <change afterPath="$PROJECT_DIR$/ml-latest-small/tags.csv" afterDir="false" />
40 |     </list>
41 |     <option name="SHOW_DIALOG" value="false" />
42 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
43 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
44 |     <option name="LAST_RESOLUTION" value="IGNORE" />
45 |   </component>
46 |   <component name="Git.Settings">
47 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
48 |   </component>
49 |   <component name="ProjectId" id="1emSFuKLYOZtJve27injycXWmTI" />
50 |   <component name="ProjectLevelVcsManager">
51 |     <ConfirmationsSetting value="2" id="Add" />
52 |   </component>
53 |   <component name="ProjectViewState">
54 |     <option name="hideEmptyMiddlePackages" value="true" />
55 |     <option name="showLibraryContents" value="true" />
56 |   </component>
57 |   <component name="PropertiesComponent">
58 |     <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
59 |     <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
60 |     <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
61 |   </component>
62 |   <component name="SvnConfiguration">
63 |     <configuration />
64 |   </component>
65 |   <component name="TaskManager">
66 |     <task active="true" id="Default" summary="Default task">
67 |       <changelist id="0dd6c530-948f-4700-8145-af4db99ece4d" name="Default Changelist" comment="" />
68 |       <created>1594925174437</created>
69 |       <option name="number" value="Default" />
70 |       <option name="presentableId" value="Default" />
71 |       <updated>1594925174437</updated>
72 |     </task>
73 |     <servers />
74 |   </component>
75 |   <component name="Vcs.Log.Tabs.Properties">
76 |     <option name="TAB_STATES">
77 |       <map>
78 |         <entry key="MAIN">
79 |           <value>
80 |             <State />
81 |           </value>
82 |         </entry>
83 |       </map>
84 |     </option>
85 |     <option name="oldMeFiltersMigrated" value="true" />
86 |   </component>
87 |   <component name="VcsManagerConfiguration">
88 |     <option name="ADD_EXTERNAL_FILES_SILENTLY" value="true" />
89 |   </component>
90 |   <component name="WindowStateProjectService">
91 |     <state x="141" y="54" key="#com.intellij.execution.impl.EditConfigurationsDialog" timestamp="1594926864001">
92 |       <screen x="0" y="25" width="1366" height="743" />
93 |     </state>
94 |     <state x="141" y="54" key="#com.intellij.execution.impl.EditConfigurationsDialog/0.25.1366.743@0.25.1366.743" timestamp="1594926864001" />
95 |   </component>
96 | </project>


--------------------------------------------------------------------------------
/01 - Evaluating Recommender Systems/__pycache__/MovieLens.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/01 - Evaluating Recommender Systems/__pycache__/MovieLens.cpython-38.pyc


--------------------------------------------------------------------------------
/01 - Evaluating Recommender Systems/__pycache__/RecommenderMetrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/01 - Evaluating Recommender Systems/__pycache__/RecommenderMetrics.cpython-38.pyc


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/.ipynb_checkpoints/EvaluatedAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May  3 10:45:33 2018
 4 | 
 5 | @author: Frank
 6 | """
 7 | from RecommenderMetrics import RecommenderMetrics
 8 | from EvaluationData import EvaluationData
 9 | 
10 | class EvaluatedAlgorithm:
11 |     
12 |     def __init__(self, algorithm, name):
13 |         self.algorithm = algorithm
14 |         self.name = name
15 |         
16 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
17 |         metrics = {}
18 |         # Compute accuracy
19 |         if (verbose):
20 |             print("Evaluating accuracy...")
21 |         self.algorithm.fit(evaluationData.GetTrainSet())
22 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
23 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
24 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
25 |         
26 |         if (doTopN):
27 |             # Evaluate top-10 with Leave One Out testing
28 |             if (verbose):
29 |                 print("Evaluating top-N with leave-one-out...")
30 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
31 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
32 |             # Build predictions for all ratings not in the training set
33 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
34 |             # Compute top 10 recs for each user
35 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
36 |             if (verbose):
37 |                 print("Computing hit-rate and rank metrics...")
38 |             # See how often we recommended a movie the user actually rated
39 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
40 |             # See how often we recommended a movie the user actually liked
41 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
42 |             # Compute ARHR
43 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
44 |         
45 |             #Evaluate properties of recommendations on full training set
46 |             if (verbose):
47 |                 print("Computing recommendations with full data set...")
48 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
49 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
50 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
51 |             if (verbose):
52 |                 print("Analyzing coverage, diversity, and novelty...")
53 |             # Print user coverage with a minimum predicted rating of 4.0:
54 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
55 |                                                                    evaluationData.GetFullTrainSet().n_users, 
56 |                                                                    ratingThreshold=4.0)
57 |             # Measure diversity of recommendations:
58 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
59 |             
60 |             # Measure novelty (average popularity rank of recommendations):
61 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
62 |                                                             evaluationData.GetPopularityRankings())
63 |         
64 |         if (verbose):
65 |             print("Analysis complete.")
66 |     
67 |         return metrics
68 |     
69 |     def GetName(self):
70 |         return self.name
71 |     
72 |     def GetAlgorithm(self):
73 |         return self.algorithm
74 |     
75 |     


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/.ipynb_checkpoints/EvaluationData-checkpoint.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May  3 10:48:02 2018
 4 | 
 5 | @author: Frank
 6 | """
 7 | from surprise.model_selection import train_test_split
 8 | from surprise.model_selection import LeaveOneOut
 9 | from surprise import KNNBaseline
10 | 
11 | class EvaluationData:
12 |     
13 |     def __init__(self, data, popularityRankings):
14 |         
15 |         self.rankings = popularityRankings
16 |         
17 |         #Build a full training set for evaluating overall properties
18 |         self.fullTrainSet = data.build_full_trainset()
19 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
20 |         
21 |         #Build a 75/25 train/test split for measuring accuracy
22 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
23 |         
24 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
25 |         #And build an anti-test-set for building predictions
26 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
27 |         for train, test in LOOCV.split(data):
28 |             self.LOOCVTrain = train
29 |             self.LOOCVTest = test
30 |             
31 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
32 |         
33 |         #Compute similarty matrix between items so we can measure diversity
34 |         sim_options = {'name': 'cosine', 'user_based': False}
35 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
36 |         self.simsAlgo.fit(self.fullTrainSet)
37 |             
38 |     def GetFullTrainSet(self):
39 |         return self.fullTrainSet
40 |     
41 |     def GetFullAntiTestSet(self):
42 |         return self.fullAntiTestSet
43 |     
44 |     def GetAntiTestSetForUser(self, testSubject):
45 |         trainset = self.fullTrainSet
46 |         fill = trainset.global_mean
47 |         anti_testset = []
48 |         u = trainset.to_inner_uid(str(testSubject))
49 |         user_items = set([j for (j, _) in trainset.ur[u]])
50 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
51 |                                  i in trainset.all_items() if
52 |                                  i not in user_items]
53 |         return anti_testset
54 | 
55 |     def GetTrainSet(self):
56 |         return self.trainSet
57 |     
58 |     def GetTestSet(self):
59 |         return self.testSet
60 |     
61 |     def GetLOOCVTrainSet(self):
62 |         return self.LOOCVTrain
63 |     
64 |     def GetLOOCVTestSet(self):
65 |         return self.LOOCVTest
66 |     
67 |     def GetLOOCVAntiTestSet(self):
68 |         return self.LOOCVAntiTestSet
69 |     
70 |     def GetSimilarities(self):
71 |         return self.simsAlgo
72 |     
73 |     def GetPopularityRankings(self):
74 |         return self.rankings


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/.ipynb_checkpoints/Evaluator-checkpoint.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May  3 10:22:34 2018
 4 | 
 5 | @author: Frank
 6 | """
 7 | from EvaluationData import EvaluationData
 8 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 9 | 
10 | class Evaluator:
11 |     
12 |     algorithms = []
13 |     
14 |     def __init__(self, dataset, rankings):
15 |         ed = EvaluationData(dataset, rankings)
16 |         self.dataset = ed
17 |         
18 |     def AddAlgorithm(self, algorithm, name):
19 |         alg = EvaluatedAlgorithm(algorithm, name)
20 |         self.algorithms.append(alg)
21 |         
22 |     def Evaluate(self, doTopN):
23 |         results = {}
24 |         for algorithm in self.algorithms:
25 |             print("Evaluating ", algorithm.GetName(), "...")
26 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
27 | 
28 |         # Print results
29 |         print("\n")
30 |         
31 |         if (doTopN):
32 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
33 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
36 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
37 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
38 |         else:
39 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
40 |             for (name, metrics) in results.items():
41 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
42 |                 
43 |         print("\nLegend:\n")
44 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
45 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
46 |         if (doTopN):
47 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
48 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
49 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
50 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
51 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
52 |             print("           for a given user. Higher means more diverse.")
53 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
54 |         
55 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
56 |         
57 |         for algo in self.algorithms:
58 |             print("\nUsing recommender ", algo.GetName())
59 |             
60 |             print("\nBuilding recommendation model...")
61 |             trainSet = self.dataset.GetFullTrainSet()
62 |             algo.GetAlgorithm().fit(trainSet)
63 |             
64 |             print("Computing recommendations...")
65 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
66 |         
67 |             predictions = algo.GetAlgorithm().test(testSet)
68 |             
69 |             recommendations = []
70 |             
71 |             print ("\nWe recommend:")
72 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
73 |                 intMovieID = int(movieID)
74 |                 recommendations.append((intMovieID, estimatedRating))
75 |             
76 |             recommendations.sort(key=lambda x: x[1], reverse=True)
77 |             
78 |             for ratings in recommendations[:10]:
79 |                 print(ml.getMovieName(ratings[0]), ratings[1])
80 |                 
81 | 
82 |             
83 |             
84 |     
85 |     


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/EvaluatedAlgorithm.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     
70 | 


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/EvaluationData.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings
69 | 


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/Evaluator.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     
80 | 


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/RecommenderMetrics.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | from surprise import accuracy
  4 | from collections import defaultdict
  5 | 
  6 | class RecommenderMetrics:
  7 | 
  8 |     def MAE(predictions):
  9 |         return accuracy.mae(predictions, verbose=False)
 10 | 
 11 |     def RMSE(predictions):
 12 |         return accuracy.rmse(predictions, verbose=False)
 13 | 
 14 |     def GetTopN(predictions, n=10, minimumRating=4.0):
 15 |         topN = defaultdict(list)
 16 | 
 17 | 
 18 |         for userID, movieID, actualRating, estimatedRating, _ in predictions:
 19 |             if (estimatedRating >= minimumRating):
 20 |                 topN[int(userID)].append((int(movieID), estimatedRating))
 21 | 
 22 |         for userID, ratings in topN.items():
 23 |             ratings.sort(key=lambda x: x[1], reverse=True)
 24 |             topN[int(userID)] = ratings[:n]
 25 | 
 26 |         return topN
 27 | 
 28 |     def HitRate(topNPredicted, leftOutPredictions):
 29 |         hits = 0
 30 |         total = 0
 31 | 
 32 |         # For each left-out rating
 33 |         for leftOut in leftOutPredictions:
 34 |             userID = leftOut[0]
 35 |             leftOutMovieID = leftOut[1]
 36 |             # Is it in the predicted top 10 for this user?
 37 |             hit = False
 38 |             for movieID, predictedRating in topNPredicted[int(userID)]:
 39 |                 if (int(leftOutMovieID) == int(movieID)):
 40 |                     hit = True
 41 |                     break
 42 |             if (hit) :
 43 |                 hits += 1
 44 | 
 45 |             total += 1
 46 | 
 47 |         # Compute overall precision
 48 |         return hits/total
 49 | 
 50 |     def CumulativeHitRate(topNPredicted, leftOutPredictions, ratingCutoff=0):
 51 |         hits = 0
 52 |         total = 0
 53 | 
 54 |         # For each left-out rating
 55 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 56 |             # Only look at ability to recommend things the users actually liked...
 57 |             if (actualRating >= ratingCutoff):
 58 |                 # Is it in the predicted top 10 for this user?
 59 |                 hit = False
 60 |                 for movieID, predictedRating in topNPredicted[int(userID)]:
 61 |                     if (int(leftOutMovieID) == movieID):
 62 |                         hit = True
 63 |                         break
 64 |                 if (hit) :
 65 |                     hits += 1
 66 | 
 67 |                 total += 1
 68 | 
 69 |         # Compute overall precision
 70 |         return hits/total
 71 | 
 72 |     def RatingHitRate(topNPredicted, leftOutPredictions):
 73 |         hits = defaultdict(float)
 74 |         total = defaultdict(float)
 75 | 
 76 |         # For each left-out rating
 77 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 78 |             # Is it in the predicted top N for this user?
 79 |             hit = False
 80 |             for movieID, predictedRating in topNPredicted[int(userID)]:
 81 |                 if (int(leftOutMovieID) == movieID):
 82 |                     hit = True
 83 |                     break
 84 |             if (hit) :
 85 |                 hits[actualRating] += 1
 86 | 
 87 |             total[actualRating] += 1
 88 | 
 89 |         # Compute overall precision
 90 |         for rating in sorted(hits.keys()):
 91 |             print (rating, hits[rating] / total[rating])
 92 | 
 93 |     def AverageReciprocalHitRank(topNPredicted, leftOutPredictions):
 94 |         summation = 0
 95 |         total = 0
 96 |         # For each left-out rating
 97 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 98 |             # Is it in the predicted top N for this user?
 99 |             hitRank = 0
100 |             rank = 0
101 |             for movieID, predictedRating in topNPredicted[int(userID)]:
102 |                 rank = rank + 1
103 |                 if (int(leftOutMovieID) == movieID):
104 |                     hitRank = rank
105 |                     break
106 |             if (hitRank > 0) :
107 |                 summation += 1.0 / hitRank
108 | 
109 |             total += 1
110 | 
111 |         return summation / total
112 | 
113 |     # What percentage of users have at least one "good" recommendation
114 |     def UserCoverage(topNPredicted, numUsers, ratingThreshold=0):
115 |         hits = 0
116 |         for userID in topNPredicted.keys():
117 |             hit = False
118 |             for movieID, predictedRating in topNPredicted[userID]:
119 |                 if (predictedRating >= ratingThreshold):
120 |                     hit = True
121 |                     break
122 |             if (hit):
123 |                 hits += 1
124 | 
125 |         return hits / numUsers
126 | 
127 |     def Diversity(topNPredicted, simsAlgo):
128 |         n = 0
129 |         total = 0
130 |         simsMatrix = simsAlgo.compute_similarities()
131 |         for userID in topNPredicted.keys():
132 |             pairs = itertools.combinations(topNPredicted[userID], 2)
133 |             for pair in pairs:
134 |                 movie1 = pair[0][0]
135 |                 movie2 = pair[1][0]
136 |                 innerID1 = simsAlgo.trainset.to_inner_iid(str(movie1))
137 |                 innerID2 = simsAlgo.trainset.to_inner_iid(str(movie2))
138 |                 similarity = simsMatrix[innerID1][innerID2]
139 |                 total += similarity
140 |                 n += 1
141 | 
142 |         S = total / n
143 |         return (1-S)
144 | 
145 |     def Novelty(topNPredicted, rankings):
146 |         n = 0
147 |         total = 0
148 |         for userID in topNPredicted.keys():
149 |             for rating in topNPredicted[userID]:
150 |                 movieID = rating[0]
151 |                 rank = rankings[movieID]
152 |                 total += rank
153 |                 n += 1
154 |         return total / n
155 | 


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/__pycache__/EvaluatedAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/02 - Recommender Engine Framework/__pycache__/EvaluatedAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/__pycache__/EvaluationData.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/02 - Recommender Engine Framework/__pycache__/EvaluationData.cpython-38.pyc


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/__pycache__/Evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/02 - Recommender Engine Framework/__pycache__/Evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/__pycache__/MovieLens.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/02 - Recommender Engine Framework/__pycache__/MovieLens.cpython-38.pyc


--------------------------------------------------------------------------------
/02 - Recommender Engine Framework/__pycache__/RecommenderMetrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/02 - Recommender Engine Framework/__pycache__/RecommenderMetrics.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/Content Based Recommendation with MisEnScene-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Content Based Movie Recommendation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Importing Dependencies"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "from MovieLens import MovieLens\n",
 24 |     "from ContentKNNAlgorithm import ContentKNNAlgorithm\n",
 25 |     "from Evaluator import Evaluator\n",
 26 |     "from surprise import NormalPredictor\n",
 27 |     "\n",
 28 |     "import random\n",
 29 |     "import numpy as np"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "## Loading Data"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "def LoadMovieLensData():\n",
 46 |     "    ml = MovieLens()\n",
 47 |     "    print(\"Loading movie ratings...\")\n",
 48 |     "    data = ml.loadMovieLensLatestSmall()\n",
 49 |     "    print(\"\\nComputing movie popularity ranks so we can measure novelty later...\")\n",
 50 |     "    rankings = ml.getPopularityRanks()\n",
 51 |     "    return (ml, data, rankings)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "np.random.seed(0)\n",
 61 |     "random.seed(0)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 4,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "name": "stdout",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "Loading movie ratings...\n",
 74 |       "\n",
 75 |       "Computing movie popularity ranks so we can measure novelty later...\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "# Load up common data set for the recommender algorithms\n",
 81 |     "(ml, evaluationData, rankings) = LoadMovieLensData()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## Instantiating Evaluator"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 5,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "Estimating biases using als...\n",
101 |       "Computing the cosine similarity matrix...\n",
102 |       "Done computing similarity matrix.\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "# Construct an Evaluator to, you know, evaluate them\n",
108 |     "evaluator = Evaluator(evaluationData, rankings)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "## Bulding Recommender Algorithms"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "### Adding a Content KNN Algorithm"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 6,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "contentKNN = ContentKNNAlgorithm()\n",
132 |     "evaluator.AddAlgorithm(contentKNN, \"ContentKNN\")"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "### Adding a Random Recommendation Algorithm"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 7,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "# Just make random recommendations\n",
149 |     "Random = NormalPredictor()\n",
150 |     "evaluator.AddAlgorithm(Random, \"Random\")"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "## Evaluate"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "Evaluating  ContentKNN ...\n",
170 |       "Evaluating accuracy...\n",
171 |       "Computing content-based similarity matrix...\n",
172 |       "0  of  8211\n",
173 |       "1000  of  8211\n",
174 |       "2000  of  8211\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "evaluator.Evaluate(False)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": []
188 |   }
189 |  ],
190 |  "metadata": {
191 |   "kernelspec": {
192 |    "display_name": "Python 3",
193 |    "language": "python",
194 |    "name": "python3"
195 |   },
196 |   "language_info": {
197 |    "codemirror_mode": {
198 |     "name": "ipython",
199 |     "version": 3
200 |    },
201 |    "file_extension": ".py",
202 |    "mimetype": "text/x-python",
203 |    "name": "python",
204 |    "nbconvert_exporter": "python",
205 |    "pygments_lexer": "ipython3",
206 |    "version": "3.8.2"
207 |   }
208 |  },
209 |  "nbformat": 4,
210 |  "nbformat_minor": 4
211 | }
212 | 


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/Content Based Recommendation-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/ContentKNNAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
  1 | from surprise import AlgoBase
  2 | from surprise import PredictionImpossible
  3 | from MovieLens import MovieLens
  4 | import math
  5 | import numpy as np
  6 | import heapq
  7 | 
  8 | class ContentKNNAlgorithm(AlgoBase):
  9 | 
 10 |     def __init__(self, k=40, sim_options={}):
 11 |         AlgoBase.__init__(self)
 12 |         self.k = k
 13 | 
 14 |     def fit(self, trainset):
 15 |         AlgoBase.fit(self, trainset)
 16 | 
 17 |         # Compute item similarity matrix based on content attributes
 18 | 
 19 |         # Load up genre vectors for every movie
 20 |         ml = MovieLens()
 21 |         genres = ml.getGenres()
 22 |         years = ml.getYears()
 23 |         mes = ml.getMiseEnScene()
 24 |         
 25 |         print("Computing content-based similarity matrix...")
 26 |             
 27 |         # Compute genre distance for every movie combination as a 2x2 matrix
 28 |         self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items))
 29 |         
 30 |         for thisRating in range(self.trainset.n_items):
 31 |             if (thisRating % 1000 == 0):
 32 |                 print(thisRating, " of ", self.trainset.n_items)
 33 |             for otherRating in range(thisRating+1, self.trainset.n_items):
 34 |                 thisMovieID = int(self.trainset.to_raw_iid(thisRating))
 35 |                 otherMovieID = int(self.trainset.to_raw_iid(otherRating))
 36 |                 genreSimilarity = self.computeGenreSimilarity(thisMovieID, otherMovieID, genres)
 37 |                 yearSimilarity = self.computeYearSimilarity(thisMovieID, otherMovieID, years)
 38 |                 #mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes)
 39 |                 self.similarities[thisRating, otherRating] = genreSimilarity * yearSimilarity
 40 |                 self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating]
 41 |                 
 42 |         print("...done.")
 43 |                 
 44 |         return self
 45 |     
 46 |     def computeGenreSimilarity(self, movie1, movie2, genres):
 47 |         genres1 = genres[movie1]
 48 |         genres2 = genres[movie2]
 49 |         sumxx, sumxy, sumyy = 0, 0, 0
 50 |         for i in range(len(genres1)):
 51 |             x = genres1[i]
 52 |             y = genres2[i]
 53 |             sumxx += x * x
 54 |             sumyy += y * y
 55 |             sumxy += x * y
 56 |         
 57 |         return sumxy/math.sqrt(sumxx*sumyy)
 58 |     
 59 |     def computeYearSimilarity(self, movie1, movie2, years):
 60 |         diff = abs(years[movie1] - years[movie2])
 61 |         sim = math.exp(-diff / 10.0)
 62 |         return sim
 63 |     
 64 |     def computeMiseEnSceneSimilarity(self, movie1, movie2, mes):
 65 |         mes1 = mes[movie1]
 66 |         mes2 = mes[movie2]
 67 |         if (mes1 and mes2):
 68 |             shotLengthDiff = math.fabs(mes1[0] - mes2[0])
 69 |             colorVarianceDiff = math.fabs(mes1[1] - mes2[1])
 70 |             motionDiff = math.fabs(mes1[3] - mes2[3])
 71 |             lightingDiff = math.fabs(mes1[5] - mes2[5])
 72 |             numShotsDiff = math.fabs(mes1[6] - mes2[6])
 73 |             return shotLengthDiff * colorVarianceDiff * motionDiff * lightingDiff * numShotsDiff
 74 |         else:
 75 |             return 0
 76 | 
 77 |     def estimate(self, u, i):
 78 | 
 79 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
 80 |             raise PredictionImpossible('User and/or item is unkown.')
 81 |         
 82 |         # Build up similarity scores between this item and everything the user rated
 83 |         neighbors = []
 84 |         for rating in self.trainset.ur[u]:
 85 |             genreSimilarity = self.similarities[i,rating[0]]
 86 |             neighbors.append( (genreSimilarity, rating[1]) )
 87 |         
 88 |         # Extract the top-K most-similar ratings
 89 |         k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
 90 |         
 91 |         # Compute average sim score of K neighbors weighted by user ratings
 92 |         simTotal = weightedSum = 0
 93 |         for (simScore, rating) in k_neighbors:
 94 |             if (simScore > 0):
 95 |                 simTotal += simScore
 96 |                 weightedSum += simScore * rating
 97 |             
 98 |         if (simTotal == 0):
 99 |             raise PredictionImpossible('No neighbors')
100 | 
101 |         predictedRating = weightedSum / simTotal
102 | 
103 |         return predictedRating
104 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/ContentKNNwithMisEnScene-checkpoint.py:
--------------------------------------------------------------------------------
  1 | from surprise import AlgoBase
  2 | from surprise import PredictionImpossible
  3 | from MovieLens import MovieLens
  4 | import math
  5 | import numpy as np
  6 | import heapq
  7 | 
  8 | class ContentKNNwithMisEnScene(AlgoBase):
  9 | 
 10 |     def __init__(self, k=40, sim_options={}):
 11 |         AlgoBase.__init__(self)
 12 |         self.k = k
 13 | 
 14 |     def fit(self, trainset):
 15 |         AlgoBase.fit(self, trainset)
 16 | 
 17 |         # Compute item similarity matrix based on content attributes
 18 | 
 19 |         # Load up genre vectors for every movie
 20 |         ml = MovieLens()
 21 |         genres = ml.getGenres()
 22 |         years = ml.getYears()
 23 |         mes = ml.getMiseEnScene()
 24 |         
 25 |         print("Computing content-based similarity matrix...")
 26 |             
 27 |         # Compute genre distance for every movie combination as a 2x2 matrix
 28 |         self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items))
 29 |         
 30 |         for thisRating in range(self.trainset.n_items):
 31 |             if (thisRating % 1000 == 0):
 32 |                 print(thisRating, " of ", self.trainset.n_items)
 33 |             for otherRating in range(thisRating+1, self.trainset.n_items):
 34 |                 thisMovieID = int(self.trainset.to_raw_iid(thisRating))
 35 |                 otherMovieID = int(self.trainset.to_raw_iid(otherRating))
 36 |                 genreSimilarity = self.computeGenreSimilarity(thisMovieID, otherMovieID, genres)
 37 |                 yearSimilarity = self.computeYearSimilarity(thisMovieID, otherMovieID, years)
 38 |                 mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes)
 39 |                 self.similarities[thisRating, otherRating] = genreSimilarity * yearSimilarity * mesSimilarity
 40 |                 self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating]
 41 |                 
 42 |         print("...done.")
 43 |                 
 44 |         return self
 45 |     
 46 |     def computeGenreSimilarity(self, movie1, movie2, genres):
 47 |         genres1 = genres[movie1]
 48 |         genres2 = genres[movie2]
 49 |         sumxx, sumxy, sumyy = 0, 0, 0
 50 |         for i in range(len(genres1)):
 51 |             x = genres1[i]
 52 |             y = genres2[i]
 53 |             sumxx += x * x
 54 |             sumyy += y * y
 55 |             sumxy += x * y
 56 |         
 57 |         return sumxy/math.sqrt(sumxx*sumyy)
 58 |     
 59 |     def computeYearSimilarity(self, movie1, movie2, years):
 60 |         diff = abs(years[movie1] - years[movie2])
 61 |         sim = math.exp(-diff / 10.0)
 62 |         return sim
 63 |     
 64 |     def computeMiseEnSceneSimilarity(self, movie1, movie2, mes):
 65 |         mes1 = mes[movie1]
 66 |         mes2 = mes[movie2]
 67 |         if (mes1 and mes2):
 68 |             shotLengthDiff = math.fabs(mes1[0] - mes2[0])
 69 |             colorVarianceDiff = math.fabs(mes1[1] - mes2[1])
 70 |             motionDiff = math.fabs(mes1[3] - mes2[3])
 71 |             lightingDiff = math.fabs(mes1[5] - mes2[5])
 72 |             numShotsDiff = math.fabs(mes1[6] - mes2[6])
 73 |             return shotLengthDiff * colorVarianceDiff * motionDiff * lightingDiff * numShotsDiff
 74 |         else:
 75 |             return 0
 76 | 
 77 |     def estimate(self, u, i):
 78 | 
 79 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
 80 |             raise PredictionImpossible('User and/or item is unkown.')
 81 |         
 82 |         # Build up similarity scores between this item and everything the user rated
 83 |         neighbors = []
 84 |         for rating in self.trainset.ur[u]:
 85 |             genreSimilarity = self.similarities[i,rating[0]]
 86 |             neighbors.append( (genreSimilarity, rating[1]) )
 87 |         
 88 |         # Extract the top-K most-similar ratings
 89 |         k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
 90 |         
 91 |         # Compute average sim score of K neighbors weighted by user ratings
 92 |         simTotal = weightedSum = 0
 93 |         for (simScore, rating) in k_neighbors:
 94 |             if (simScore > 0):
 95 |                 simTotal += simScore
 96 |                 weightedSum += simScore * rating
 97 |             
 98 |         if (simTotal == 0):
 99 |             raise PredictionImpossible('No neighbors')
100 | 
101 |         predictedRating = weightedSum / simTotal
102 | 
103 |         return predictedRating
104 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/ContentRecs-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from ContentKNNAlgorithm import ContentKNNAlgorithm
 3 | from Evaluator import Evaluator
 4 | from surprise import NormalPredictor
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | contentKNN = ContentKNNAlgorithm()
27 | evaluator.AddAlgorithm(contentKNN, "ContentKNN")
28 | 
29 | # Just make random recommendations
30 | Random = NormalPredictor()
31 | evaluator.AddAlgorithm(Random, "Random")
32 | 
33 | evaluator.Evaluate(False)
34 | 
35 | evaluator.SampleTopNRecs(ml)
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/EvaluationData-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/.ipynb_checkpoints/Evaluator-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/ContentKNNAlgorithm.py:
--------------------------------------------------------------------------------
  1 | from surprise import AlgoBase
  2 | from surprise import PredictionImpossible
  3 | from MovieLens import MovieLens
  4 | import math
  5 | import numpy as np
  6 | import heapq
  7 | 
  8 | class ContentKNNAlgorithm(AlgoBase):
  9 | 
 10 |     def __init__(self, k=40, sim_options={}):
 11 |         AlgoBase.__init__(self)
 12 |         self.k = k
 13 | 
 14 |     def fit(self, trainset):
 15 |         AlgoBase.fit(self, trainset)
 16 | 
 17 |         # Compute item similarity matrix based on content attributes
 18 | 
 19 |         # Load up genre vectors for every movie
 20 |         ml = MovieLens()
 21 |         genres = ml.getGenres()
 22 |         years = ml.getYears()
 23 |         mes = ml.getMiseEnScene()
 24 |         
 25 |         print("Computing content-based similarity matrix...")
 26 |             
 27 |         # Compute genre distance for every movie combination as a 2x2 matrix
 28 |         self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items))
 29 |         
 30 |         for thisRating in range(self.trainset.n_items):
 31 |             if (thisRating % 1000 == 0):
 32 |                 print(thisRating, " of ", self.trainset.n_items)
 33 |             for otherRating in range(thisRating+1, self.trainset.n_items):
 34 |                 thisMovieID = int(self.trainset.to_raw_iid(thisRating))
 35 |                 otherMovieID = int(self.trainset.to_raw_iid(otherRating))
 36 |                 genreSimilarity = self.computeGenreSimilarity(thisMovieID, otherMovieID, genres)
 37 |                 yearSimilarity = self.computeYearSimilarity(thisMovieID, otherMovieID, years)
 38 |                 #mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes)
 39 |                 self.similarities[thisRating, otherRating] = genreSimilarity * yearSimilarity
 40 |                 self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating]
 41 |                 
 42 |         print("...done.")
 43 |                 
 44 |         return self
 45 |     
 46 |     def computeGenreSimilarity(self, movie1, movie2, genres):
 47 |         genres1 = genres[movie1]
 48 |         genres2 = genres[movie2]
 49 |         sumxx, sumxy, sumyy = 0, 0, 0
 50 |         for i in range(len(genres1)):
 51 |             x = genres1[i]
 52 |             y = genres2[i]
 53 |             sumxx += x * x
 54 |             sumyy += y * y
 55 |             sumxy += x * y
 56 |         
 57 |         return sumxy/math.sqrt(sumxx*sumyy)
 58 |     
 59 |     def computeYearSimilarity(self, movie1, movie2, years):
 60 |         diff = abs(years[movie1] - years[movie2])
 61 |         sim = math.exp(-diff / 10.0)
 62 |         return sim
 63 |     
 64 |     def computeMiseEnSceneSimilarity(self, movie1, movie2, mes):
 65 |         mes1 = mes[movie1]
 66 |         mes2 = mes[movie2]
 67 |         if (mes1 and mes2):
 68 |             shotLengthDiff = math.fabs(mes1[0] - mes2[0])
 69 |             colorVarianceDiff = math.fabs(mes1[1] - mes2[1])
 70 |             motionDiff = math.fabs(mes1[3] - mes2[3])
 71 |             lightingDiff = math.fabs(mes1[5] - mes2[5])
 72 |             numShotsDiff = math.fabs(mes1[6] - mes2[6])
 73 |             return shotLengthDiff * colorVarianceDiff * motionDiff * lightingDiff * numShotsDiff
 74 |         else:
 75 |             return 0
 76 | 
 77 |     def estimate(self, u, i):
 78 | 
 79 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
 80 |             raise PredictionImpossible('User and/or item is unkown.')
 81 |         
 82 |         # Build up similarity scores between this item and everything the user rated
 83 |         neighbors = []
 84 |         for rating in self.trainset.ur[u]:
 85 |             genreSimilarity = self.similarities[i,rating[0]]
 86 |             neighbors.append( (genreSimilarity, rating[1]) )
 87 |         
 88 |         # Extract the top-K most-similar ratings
 89 |         k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
 90 |         
 91 |         # Compute average sim score of K neighbors weighted by user ratings
 92 |         simTotal = weightedSum = 0
 93 |         for (simScore, rating) in k_neighbors:
 94 |             if (simScore > 0):
 95 |                 simTotal += simScore
 96 |                 weightedSum += simScore * rating
 97 |             
 98 |         if (simTotal == 0):
 99 |             raise PredictionImpossible('No neighbors')
100 | 
101 |         predictedRating = weightedSum / simTotal
102 | 
103 |         return predictedRating
104 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/ContentKNNwithMisEnScene.py:
--------------------------------------------------------------------------------
  1 | from surprise import AlgoBase
  2 | from surprise import PredictionImpossible
  3 | from MovieLens import MovieLens
  4 | import math
  5 | import numpy as np
  6 | import heapq
  7 | 
  8 | class ContentKNNwithMisEnScene(AlgoBase):
  9 | 
 10 |     def __init__(self, k=40, sim_options={}):
 11 |         AlgoBase.__init__(self)
 12 |         self.k = k
 13 | 
 14 |     def fit(self, trainset):
 15 |         AlgoBase.fit(self, trainset)
 16 | 
 17 |         # Compute item similarity matrix based on content attributes
 18 | 
 19 |         # Load up genre vectors for every movie
 20 |         ml = MovieLens()
 21 |         genres = ml.getGenres()
 22 |         years = ml.getYears()
 23 |         mes = ml.getMiseEnScene()
 24 |         
 25 |         print("Computing content-based similarity matrix...")
 26 |             
 27 |         # Compute genre distance for every movie combination as a 2x2 matrix
 28 |         self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items))
 29 |         
 30 |         for thisRating in range(self.trainset.n_items):
 31 |             if (thisRating % 1000 == 0):
 32 |                 print(thisRating, " of ", self.trainset.n_items)
 33 |             for otherRating in range(thisRating+1, self.trainset.n_items):
 34 |                 thisMovieID = int(self.trainset.to_raw_iid(thisRating))
 35 |                 otherMovieID = int(self.trainset.to_raw_iid(otherRating))
 36 |                 genreSimilarity = self.computeGenreSimilarity(thisMovieID, otherMovieID, genres)
 37 |                 yearSimilarity = self.computeYearSimilarity(thisMovieID, otherMovieID, years)
 38 |                 mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes)
 39 |                 self.similarities[thisRating, otherRating] = genreSimilarity * yearSimilarity * mesSimilarity
 40 |                 self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating]
 41 |                 
 42 |         print("...done.")
 43 |                 
 44 |         return self
 45 |     
 46 |     def computeGenreSimilarity(self, movie1, movie2, genres):
 47 |         genres1 = genres[movie1]
 48 |         genres2 = genres[movie2]
 49 |         sumxx, sumxy, sumyy = 0, 0, 0
 50 |         for i in range(len(genres1)):
 51 |             x = genres1[i]
 52 |             y = genres2[i]
 53 |             sumxx += x * x
 54 |             sumyy += y * y
 55 |             sumxy += x * y
 56 |         
 57 |         return sumxy/math.sqrt(sumxx*sumyy)
 58 |     
 59 |     def computeYearSimilarity(self, movie1, movie2, years):
 60 |         diff = abs(years[movie1] - years[movie2])
 61 |         sim = math.exp(-diff / 10.0)
 62 |         return sim
 63 |     
 64 |     def computeMiseEnSceneSimilarity(self, movie1, movie2, mes):
 65 |         mes1 = mes[movie1]
 66 |         mes2 = mes[movie2]
 67 |         if (mes1 and mes2):
 68 |             shotLengthDiff = math.fabs(mes1[0] - mes2[0])
 69 |             colorVarianceDiff = math.fabs(mes1[1] - mes2[1])
 70 |             motionDiff = math.fabs(mes1[3] - mes2[3])
 71 |             lightingDiff = math.fabs(mes1[5] - mes2[5])
 72 |             numShotsDiff = math.fabs(mes1[6] - mes2[6])
 73 |             return shotLengthDiff * colorVarianceDiff * motionDiff * lightingDiff * numShotsDiff
 74 |         else:
 75 |             return 0
 76 | 
 77 |     def estimate(self, u, i):
 78 | 
 79 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
 80 |             raise PredictionImpossible('User and/or item is unkown.')
 81 |         
 82 |         # Build up similarity scores between this item and everything the user rated
 83 |         neighbors = []
 84 |         for rating in self.trainset.ur[u]:
 85 |             genreSimilarity = self.similarities[i,rating[0]]
 86 |             neighbors.append( (genreSimilarity, rating[1]) )
 87 |         
 88 |         # Extract the top-K most-similar ratings
 89 |         k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
 90 |         
 91 |         # Compute average sim score of K neighbors weighted by user ratings
 92 |         simTotal = weightedSum = 0
 93 |         for (simScore, rating) in k_neighbors:
 94 |             if (simScore > 0):
 95 |                 simTotal += simScore
 96 |                 weightedSum += simScore * rating
 97 |             
 98 |         if (simTotal == 0):
 99 |             raise PredictionImpossible('No neighbors')
100 | 
101 |         predictedRating = weightedSum / simTotal
102 | 
103 |         return predictedRating
104 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/ContentRecs.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from ContentKNNAlgorithm import ContentKNNAlgorithm
 3 | from Evaluator import Evaluator
 4 | from surprise import NormalPredictor
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | contentKNN = ContentKNNAlgorithm()
27 | evaluator.AddAlgorithm(contentKNN, "ContentKNN")
28 | 
29 | # Just make random recommendations
30 | Random = NormalPredictor()
31 | evaluator.AddAlgorithm(Random, "Random")
32 | 
33 | evaluator.Evaluate(False)
34 | 
35 | evaluator.SampleTopNRecs(ml)
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/EvaluatedAlgorithm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May  3 10:45:33 2018
 4 | 
 5 | @author: Frank
 6 | """
 7 | from RecommenderMetrics import RecommenderMetrics
 8 | from EvaluationData import EvaluationData
 9 | 
10 | class EvaluatedAlgorithm:
11 |     
12 |     def __init__(self, algorithm, name):
13 |         self.algorithm = algorithm
14 |         self.name = name
15 |         
16 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
17 |         metrics = {}
18 |         # Compute accuracy
19 |         if (verbose):
20 |             print("Evaluating accuracy...")
21 |         self.algorithm.fit(evaluationData.GetTrainSet())
22 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
23 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
24 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
25 |         
26 |         if (doTopN):
27 |             # Evaluate top-10 with Leave One Out testing
28 |             if (verbose):
29 |                 print("Evaluating top-N with leave-one-out...")
30 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
31 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
32 |             # Build predictions for all ratings not in the training set
33 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
34 |             # Compute top 10 recs for each user
35 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
36 |             if (verbose):
37 |                 print("Computing hit-rate and rank metrics...")
38 |             # See how often we recommended a movie the user actually rated
39 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
40 |             # See how often we recommended a movie the user actually liked
41 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
42 |             # Compute ARHR
43 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
44 |         
45 |             #Evaluate properties of recommendations on full training set
46 |             if (verbose):
47 |                 print("Computing recommendations with full data set...")
48 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
49 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
50 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
51 |             if (verbose):
52 |                 print("Analyzing coverage, diversity, and novelty...")
53 |             # Print user coverage with a minimum predicted rating of 4.0:
54 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
55 |                                                                    evaluationData.GetFullTrainSet().n_users, 
56 |                                                                    ratingThreshold=4.0)
57 |             # Measure diversity of recommendations:
58 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
59 |             
60 |             # Measure novelty (average popularity rank of recommendations):
61 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
62 |                                                             evaluationData.GetPopularityRankings())
63 |         
64 |         if (verbose):
65 |             print("Analysis complete.")
66 |     
67 |         return metrics
68 |     
69 |     def GetName(self):
70 |         return self.name
71 |     
72 |     def GetAlgorithm(self):
73 |         return self.algorithm
74 |     
75 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/EvaluationData.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/Evaluator.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/RecommenderMetrics.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | from surprise import accuracy
  4 | from collections import defaultdict
  5 | 
  6 | class RecommenderMetrics:
  7 | 
  8 |     def MAE(predictions):
  9 |         return accuracy.mae(predictions, verbose=False)
 10 | 
 11 |     def RMSE(predictions):
 12 |         return accuracy.rmse(predictions, verbose=False)
 13 | 
 14 |     def GetTopN(predictions, n=10, minimumRating=4.0):
 15 |         topN = defaultdict(list)
 16 | 
 17 | 
 18 |         for userID, movieID, actualRating, estimatedRating, _ in predictions:
 19 |             if (estimatedRating >= minimumRating):
 20 |                 topN[int(userID)].append((int(movieID), estimatedRating))
 21 | 
 22 |         for userID, ratings in topN.items():
 23 |             ratings.sort(key=lambda x: x[1], reverse=True)
 24 |             topN[int(userID)] = ratings[:n]
 25 | 
 26 |         return topN
 27 | 
 28 |     def HitRate(topNPredicted, leftOutPredictions):
 29 |         hits = 0
 30 |         total = 0
 31 | 
 32 |         # For each left-out rating
 33 |         for leftOut in leftOutPredictions:
 34 |             userID = leftOut[0]
 35 |             leftOutMovieID = leftOut[1]
 36 |             # Is it in the predicted top 10 for this user?
 37 |             hit = False
 38 |             for movieID, predictedRating in topNPredicted[int(userID)]:
 39 |                 if (int(leftOutMovieID) == int(movieID)):
 40 |                     hit = True
 41 |                     break
 42 |             if (hit) :
 43 |                 hits += 1
 44 | 
 45 |             total += 1
 46 | 
 47 |         # Compute overall precision
 48 |         return hits/total
 49 | 
 50 |     def CumulativeHitRate(topNPredicted, leftOutPredictions, ratingCutoff=0):
 51 |         hits = 0
 52 |         total = 0
 53 | 
 54 |         # For each left-out rating
 55 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 56 |             # Only look at ability to recommend things the users actually liked...
 57 |             if (actualRating >= ratingCutoff):
 58 |                 # Is it in the predicted top 10 for this user?
 59 |                 hit = False
 60 |                 for movieID, predictedRating in topNPredicted[int(userID)]:
 61 |                     if (int(leftOutMovieID) == movieID):
 62 |                         hit = True
 63 |                         break
 64 |                 if (hit) :
 65 |                     hits += 1
 66 | 
 67 |                 total += 1
 68 | 
 69 |         # Compute overall precision
 70 |         return hits/total
 71 | 
 72 |     def RatingHitRate(topNPredicted, leftOutPredictions):
 73 |         hits = defaultdict(float)
 74 |         total = defaultdict(float)
 75 | 
 76 |         # For each left-out rating
 77 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 78 |             # Is it in the predicted top N for this user?
 79 |             hit = False
 80 |             for movieID, predictedRating in topNPredicted[int(userID)]:
 81 |                 if (int(leftOutMovieID) == movieID):
 82 |                     hit = True
 83 |                     break
 84 |             if (hit) :
 85 |                 hits[actualRating] += 1
 86 | 
 87 |             total[actualRating] += 1
 88 | 
 89 |         # Compute overall precision
 90 |         for rating in sorted(hits.keys()):
 91 |             print (rating, hits[rating] / total[rating])
 92 | 
 93 |     def AverageReciprocalHitRank(topNPredicted, leftOutPredictions):
 94 |         summation = 0
 95 |         total = 0
 96 |         # For each left-out rating
 97 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 98 |             # Is it in the predicted top N for this user?
 99 |             hitRank = 0
100 |             rank = 0
101 |             for movieID, predictedRating in topNPredicted[int(userID)]:
102 |                 rank = rank + 1
103 |                 if (int(leftOutMovieID) == movieID):
104 |                     hitRank = rank
105 |                     break
106 |             if (hitRank > 0) :
107 |                 summation += 1.0 / hitRank
108 | 
109 |             total += 1
110 | 
111 |         return summation / total
112 | 
113 |     # What percentage of users have at least one "good" recommendation
114 |     def UserCoverage(topNPredicted, numUsers, ratingThreshold=0):
115 |         hits = 0
116 |         for userID in topNPredicted.keys():
117 |             hit = False
118 |             for movieID, predictedRating in topNPredicted[userID]:
119 |                 if (predictedRating >= ratingThreshold):
120 |                     hit = True
121 |                     break
122 |             if (hit):
123 |                 hits += 1
124 | 
125 |         return hits / numUsers
126 | 
127 |     def Diversity(topNPredicted, simsAlgo):
128 |         n = 0
129 |         total = 0
130 |         simsMatrix = simsAlgo.compute_similarities()
131 |         for userID in topNPredicted.keys():
132 |             pairs = itertools.combinations(topNPredicted[userID], 2)
133 |             for pair in pairs:
134 |                 movie1 = pair[0][0]
135 |                 movie2 = pair[1][0]
136 |                 innerID1 = simsAlgo.trainset.to_inner_iid(str(movie1))
137 |                 innerID2 = simsAlgo.trainset.to_inner_iid(str(movie2))
138 |                 similarity = simsMatrix[innerID1][innerID2]
139 |                 total += similarity
140 |                 n += 1
141 | 
142 |         S = total / n
143 |         return (1-S)
144 | 
145 |     def Novelty(topNPredicted, rankings):
146 |         n = 0
147 |         total = 0
148 |         for userID in topNPredicted.keys():
149 |             for rating in topNPredicted[userID]:
150 |                 movieID = rating[0]
151 |                 rank = rankings[movieID]
152 |                 total += rank
153 |                 n += 1
154 |         return total / n
155 | 


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/ContentKNNAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/ContentKNNAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/ContentKNNwithMisEnScene.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/ContentKNNwithMisEnScene.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/EvaluatedAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/EvaluatedAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/EvaluationData.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/EvaluationData.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/Evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/Evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/MovieLens.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/MovieLens.cpython-38.pyc


--------------------------------------------------------------------------------
/03 - Content Based Recommendation/__pycache__/RecommenderMetrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/03 - Content Based Recommendation/__pycache__/RecommenderMetrics.cpython-38.pyc


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/EvaluateUserCF-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import KNNBasic
 3 | import heapq
 4 | from collections import defaultdict
 5 | from operator import itemgetter
 6 | from surprise.model_selection import LeaveOneOut
 7 | from RecommenderMetrics import RecommenderMetrics
 8 | from EvaluationData import EvaluationData
 9 | 
10 | def LoadMovieLensData():
11 |     ml = MovieLens()
12 |     print("Loading movie ratings...")
13 |     data = ml.loadMovieLensLatestSmall()
14 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
15 |     rankings = ml.getPopularityRanks()
16 |     return (ml, data, rankings)
17 | 
18 | ml, data, rankings = LoadMovieLensData()
19 | 
20 | evalData = EvaluationData(data, rankings)
21 | 
22 | # Train on leave-One-Out train set
23 | trainSet = evalData.GetLOOCVTrainSet()
24 | sim_options = {'name': 'cosine',
25 |                'user_based': True
26 |                }
27 | 
28 | model = KNNBasic(sim_options=sim_options)
29 | model.fit(trainSet)
30 | simsMatrix = model.compute_similarities()
31 | 
32 | leftOutTestSet = evalData.GetLOOCVTestSet()
33 | 
34 | # Build up dict to lists of (int(movieID), predictedrating) pairs
35 | topN = defaultdict(list)
36 | k = 10
37 | for uiid in range(trainSet.n_users):
38 |     # Get top N similar users to this one
39 |     similarityRow = simsMatrix[uiid]
40 |     
41 |     similarUsers = []
42 |     for innerID, score in enumerate(similarityRow):
43 |         if (innerID != uiid):
44 |             similarUsers.append( (innerID, score) )
45 |     
46 |     kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])
47 |     
48 |     # Get the stuff they rated, and add up ratings for each item, weighted by user similarity
49 |     candidates = defaultdict(float)
50 |     for similarUser in kNeighbors:
51 |         innerID = similarUser[0]
52 |         userSimilarityScore = similarUser[1]
53 |         theirRatings = trainSet.ur[innerID]
54 |         for rating in theirRatings:
55 |             candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore
56 |         
57 |     # Build a dictionary of stuff the user has already seen
58 |     watched = {}
59 |     for itemID, rating in trainSet.ur[uiid]:
60 |         watched[itemID] = 1
61 |         
62 |     # Get top-rated items from similar users:
63 |     pos = 0
64 |     for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
65 |         if not itemID in watched:
66 |             movieID = trainSet.to_raw_iid(itemID)
67 |             topN[int(trainSet.to_raw_uid(uiid))].append( (int(movieID), 0.0) )
68 |             pos += 1
69 |             if (pos > 40):
70 |                 break
71 |     
72 | # Measure
73 | print("HR", RecommenderMetrics.HitRate(topN, leftOutTestSet))   
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/EvaluatedAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/EvaluationData-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/Evaluator-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/KNNBakeOff-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import KNNBasic
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | # User-based KNN
27 | UserKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': True})
28 | evaluator.AddAlgorithm(UserKNN, "User KNN")
29 | 
30 | # Item-based KNN
31 | ItemKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': False})
32 | evaluator.AddAlgorithm(ItemKNN, "Item KNN")
33 | 
34 | # Just make random recommendations
35 | Random = NormalPredictor()
36 | evaluator.AddAlgorithm(Random, "Random")
37 | 
38 | # Fight!
39 | evaluator.Evaluate(False)
40 | 
41 | evaluator.SampleTopNRecs(ml)
42 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/SimpleItemCF-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import KNNBasic
 3 | import heapq
 4 | from collections import defaultdict
 5 | from operator import itemgetter
 6 |         
 7 | testSubject = '85'
 8 | k = 10
 9 | 
10 | ml = MovieLens()
11 | data = ml.loadMovieLensLatestSmall()
12 | 
13 | trainSet = data.build_full_trainset()
14 | 
15 | sim_options = {'name': 'cosine',
16 |                'user_based': False
17 |                }
18 | 
19 | model = KNNBasic(sim_options=sim_options)
20 | model.fit(trainSet)
21 | simsMatrix = model.compute_similarities()
22 | 
23 | testUserInnerID = trainSet.to_inner_uid(testSubject)
24 | 
25 | # Get the top K items we rated
26 | testUserRatings = trainSet.ur[testUserInnerID]
27 | kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])
28 | 
29 | # Get similar items to stuff we liked (weighted by rating)
30 | candidates = defaultdict(float)
31 | for itemID, rating in kNeighbors:
32 |     similarityRow = simsMatrix[itemID]
33 |     for innerID, score in enumerate(similarityRow):
34 |         candidates[innerID] += score * (rating / 5.0)
35 |     
36 | # Build a dictionary of stuff the user has already seen
37 | watched = {}
38 | for itemID, rating in trainSet.ur[testUserInnerID]:
39 |     watched[itemID] = 1
40 |     
41 | # Get top-rated items from similar users:
42 | pos = 0
43 | for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
44 |     if not itemID in watched:
45 |         movieID = trainSet.to_raw_iid(itemID)
46 |         print(ml.getMovieName(int(movieID)), ratingSum)
47 |         pos += 1
48 |         if (pos > 10):
49 |             break
50 | 
51 |     
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/.ipynb_checkpoints/User-Based Collaborative Filtering-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/EvaluateUserCF.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import KNNBasic
 3 | import heapq
 4 | from collections import defaultdict
 5 | from operator import itemgetter
 6 | from surprise.model_selection import LeaveOneOut
 7 | from RecommenderMetrics import RecommenderMetrics
 8 | from EvaluationData import EvaluationData
 9 | 
10 | def LoadMovieLensData():
11 |     ml = MovieLens()
12 |     print("Loading movie ratings...")
13 |     data = ml.loadMovieLensLatestSmall()
14 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
15 |     rankings = ml.getPopularityRanks()
16 |     return (ml, data, rankings)
17 | 
18 | ml, data, rankings = LoadMovieLensData()
19 | 
20 | evalData = EvaluationData(data, rankings)
21 | 
22 | # Train on leave-One-Out train set
23 | trainSet = evalData.GetLOOCVTrainSet()
24 | sim_options = {'name': 'cosine',
25 |                'user_based': True
26 |                }
27 | 
28 | model = KNNBasic(sim_options=sim_options)
29 | model.fit(trainSet)
30 | simsMatrix = model.compute_similarities()
31 | 
32 | leftOutTestSet = evalData.GetLOOCVTestSet()
33 | 
34 | # Build up dict to lists of (int(movieID), predictedrating) pairs
35 | topN = defaultdict(list)
36 | k = 10
37 | for uiid in range(trainSet.n_users):
38 |     # Get top N similar users to this one
39 |     similarityRow = simsMatrix[uiid]
40 |     
41 |     similarUsers = []
42 |     for innerID, score in enumerate(similarityRow):
43 |         if (innerID != uiid):
44 |             similarUsers.append( (innerID, score) )
45 |     
46 |     kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])
47 |     
48 |     # Get the stuff they rated, and add up ratings for each item, weighted by user similarity
49 |     candidates = defaultdict(float)
50 |     for similarUser in kNeighbors:
51 |         innerID = similarUser[0]
52 |         userSimilarityScore = similarUser[1]
53 |         theirRatings = trainSet.ur[innerID]
54 |         for rating in theirRatings:
55 |             candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore
56 |         
57 |     # Build a dictionary of stuff the user has already seen
58 |     watched = {}
59 |     for itemID, rating in trainSet.ur[uiid]:
60 |         watched[itemID] = 1
61 |         
62 |     # Get top-rated items from similar users:
63 |     pos = 0
64 |     for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
65 |         if not itemID in watched:
66 |             movieID = trainSet.to_raw_iid(itemID)
67 |             topN[int(trainSet.to_raw_uid(uiid))].append( (int(movieID), 0.0) )
68 |             pos += 1
69 |             if (pos > 40):
70 |                 break
71 |     
72 | # Measure
73 | print("HR", RecommenderMetrics.HitRate(topN, leftOutTestSet))   
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/EvaluatedAlgorithm.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/EvaluationData.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/Evaluator.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/Item-Based Collaborative Filtering.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Item-Based Collaborative Movie Recommendation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Importing Dependencies"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "from MovieLens import MovieLens\n",
 24 |     "from surprise import KNNBasic\n",
 25 |     "import heapq\n",
 26 |     "from collections import defaultdict\n",
 27 |     "from operator import itemgetter"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "## Loading Dataset"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "ml = MovieLens()\n",
 44 |     "data = ml.loadMovieLensLatestSmall()\n",
 45 |     "trainSet = data.build_full_trainset()"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Calculating Item Similarities using Supriselib"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "Computing the cosine similarity matrix...\n",
 65 |       "Done computing similarity matrix.\n",
 66 |       "Computing the cosine similarity matrix...\n",
 67 |       "Done computing similarity matrix.\n"
 68 |      ]
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "sim_options = {'name': 'cosine',\n",
 73 |     "               'user_based': False\n",
 74 |     "               }\n",
 75 |     "\n",
 76 |     "model = KNNBasic(sim_options=sim_options)\n",
 77 |     "model.fit(trainSet)\n",
 78 |     "simsMatrix = model.compute_similarities()"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "## Getting Top-N User Recommendations"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 4,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "testSubject = '85'\n",
 95 |     "k = 10"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 5,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "testUserInnerID = trainSet.to_inner_uid(testSubject)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# Get the top K items we rated\n",
114 |     "testUserRatings = trainSet.ur[testUserInnerID]\n",
115 |     "kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 7,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "# Get similar items to stuff we liked (weighted by rating)\n",
125 |     "candidates = defaultdict(float)\n",
126 |     "for itemID, rating in kNeighbors:\n",
127 |     "    similarityRow = simsMatrix[itemID]\n",
128 |     "    for innerID, score in enumerate(similarityRow):\n",
129 |     "        candidates[innerID] += score * (rating / 5.0)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 8,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "# Build a dictionary of stuff the user has already seen\n",
139 |     "watched = {}\n",
140 |     "for itemID, rating in trainSet.ur[testUserInnerID]:\n",
141 |     "    watched[itemID] = 1"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 9,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "James Dean Story, The (1957) 10.0\n",
154 |       "Get Real (1998) 9.987241120712646\n",
155 |       "Kiss of Death (1995) 9.966881877751941\n",
156 |       "Set It Off (1996) 9.963732215657119\n",
157 |       "How Green Was My Valley (1941) 9.943984081065269\n",
158 |       "Amos & Andrew (1993) 9.93973694500253\n",
159 |       "My Crazy Life (Mi vida loca) (1993) 9.938290487546041\n",
160 |       "Grace of My Heart (1996) 9.926255896645218\n",
161 |       "Fanny and Alexander (Fanny och Alexander) (1982) 9.925699671455906\n",
162 |       "Wild Reeds (Les roseaux sauvages) (1994) 9.916226404418774\n",
163 |       "Edge of Seventeen (1998) 9.913028764691676\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "# Get top-rated items from similar users:\n",
169 |     "pos = 0\n",
170 |     "for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):\n",
171 |     "    if not itemID in watched:\n",
172 |     "        movieID = trainSet.to_raw_iid(itemID)\n",
173 |     "        print(ml.getMovieName(int(movieID)), ratingSum)\n",
174 |     "        pos += 1\n",
175 |     "        if (pos > 10):\n",
176 |     "            break"
177 |    ]
178 |   }
179 |  ],
180 |  "metadata": {
181 |   "kernelspec": {
182 |    "display_name": "Python 3",
183 |    "language": "python",
184 |    "name": "python3"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.8.2"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 4
201 | }
202 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/KNNBakeOff.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import KNNBasic
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | # User-based KNN
27 | UserKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': True})
28 | evaluator.AddAlgorithm(UserKNN, "User KNN")
29 | 
30 | # Item-based KNN
31 | ItemKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': False})
32 | evaluator.AddAlgorithm(ItemKNN, "Item KNN")
33 | 
34 | # Just make random recommendations
35 | Random = NormalPredictor()
36 | evaluator.AddAlgorithm(Random, "Random")
37 | 
38 | # Fight!
39 | evaluator.Evaluate(False)
40 | 
41 | evaluator.SampleTopNRecs(ml)
42 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/SimpleItemCF.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import KNNBasic
 3 | import heapq
 4 | from collections import defaultdict
 5 | from operator import itemgetter
 6 |         
 7 | testSubject = '85'
 8 | k = 10
 9 | 
10 | ml = MovieLens()
11 | data = ml.loadMovieLensLatestSmall()
12 | 
13 | trainSet = data.build_full_trainset()
14 | 
15 | sim_options = {'name': 'cosine',
16 |                'user_based': False
17 |                }
18 | 
19 | model = KNNBasic(sim_options=sim_options)
20 | model.fit(trainSet)
21 | simsMatrix = model.compute_similarities()
22 | 
23 | testUserInnerID = trainSet.to_inner_uid(testSubject)
24 | 
25 | # Get the top K items we rated
26 | testUserRatings = trainSet.ur[testUserInnerID]
27 | kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])
28 | 
29 | # Get similar items to stuff we liked (weighted by rating)
30 | candidates = defaultdict(float)
31 | for itemID, rating in kNeighbors:
32 |     similarityRow = simsMatrix[itemID]
33 |     for innerID, score in enumerate(similarityRow):
34 |         candidates[innerID] += score * (rating / 5.0)
35 |     
36 | # Build a dictionary of stuff the user has already seen
37 | watched = {}
38 | for itemID, rating in trainSet.ur[testUserInnerID]:
39 |     watched[itemID] = 1
40 |     
41 | # Get top-rated items from similar users:
42 | pos = 0
43 | for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
44 |     if not itemID in watched:
45 |         movieID = trainSet.to_raw_iid(itemID)
46 |         print(ml.getMovieName(int(movieID)), ratingSum)
47 |         pos += 1
48 |         if (pos > 10):
49 |             break
50 | 
51 |     
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/04 - Neighborhood Based Collaborative Filtering/__pycache__/MovieLens.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/04 - Neighborhood Based Collaborative Filtering/__pycache__/MovieLens.cpython-38.pyc


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/.ipynb_checkpoints/EvaluatedAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/.ipynb_checkpoints/EvaluationData-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/.ipynb_checkpoints/Evaluator-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/.ipynb_checkpoints/SVD Matrix Factorization-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/.ipynb_checkpoints/SVDBakeOff-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import SVD, SVDpp
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | # SVD
27 | SVD = SVD()
28 | evaluator.AddAlgorithm(SVD, "SVD")
29 | 
30 | # SVD++
31 | SVDPlusPlus = SVDpp()
32 | evaluator.AddAlgorithm(SVDPlusPlus, "SVD++")
33 | 
34 | # Just make random recommendations
35 | Random = NormalPredictor()
36 | evaluator.AddAlgorithm(Random, "Random")
37 | 
38 | # Fight!
39 | evaluator.Evaluate(False)
40 | 
41 | evaluator.SampleTopNRecs(ml)
42 | 


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/.ipynb_checkpoints/SVDTuning-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from surprise import SVD
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | from surprise.model_selection import GridSearchCV
 6 | 
 7 | import random
 8 | import numpy as np
 9 | 
10 | def LoadMovieLensData():
11 |     ml = MovieLens()
12 |     print("Loading movie ratings...")
13 |     data = ml.loadMovieLensLatestSmall()
14 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
15 |     rankings = ml.getPopularityRanks()
16 |     return (ml, data, rankings)
17 | 
18 | np.random.seed(0)
19 | random.seed(0)
20 | 
21 | # Load up common data set for the recommender algorithms
22 | (ml, evaluationData, rankings) = LoadMovieLensData()
23 | 
24 | print("Searching for best parameters...")
25 | param_grid = {'n_epochs': [20, 30], 'lr_all': [0.005, 0.010],
26 |               'n_factors': [50, 100]}
27 | gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
28 | 
29 | gs.fit(evaluationData)
30 | 
31 | # best RMSE score
32 | print("Best RMSE score attained: ", gs.best_score['rmse'])
33 | 
34 | # combination of parameters that gave the best RMSE score
35 | print(gs.best_params['rmse'])
36 | 
37 | # Construct an Evaluator to, you know, evaluate them
38 | evaluator = Evaluator(evaluationData, rankings)
39 | 
40 | params = gs.best_params['rmse']
41 | SVDtuned = SVD(n_epochs = params['n_epochs'], lr_all = params['lr_all'], n_factors = params['n_factors'])
42 | evaluator.AddAlgorithm(SVDtuned, "SVD - Tuned")
43 | 
44 | SVDUntuned = SVD()
45 | evaluator.AddAlgorithm(SVDUntuned, "SVD - Untuned")
46 | 
47 | # Just make random recommendations
48 | Random = NormalPredictor()
49 | evaluator.AddAlgorithm(Random, "Random")
50 | 
51 | # Fight!
52 | evaluator.Evaluate(False)
53 | 
54 | evaluator.SampleTopNRecs(ml)
55 | 


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/EvaluatedAlgorithm.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/EvaluationData.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/Evaluator.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/RecommenderMetrics.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | from surprise import accuracy
  4 | from collections import defaultdict
  5 | 
  6 | class RecommenderMetrics:
  7 | 
  8 |     def MAE(predictions):
  9 |         return accuracy.mae(predictions, verbose=False)
 10 | 
 11 |     def RMSE(predictions):
 12 |         return accuracy.rmse(predictions, verbose=False)
 13 | 
 14 |     def GetTopN(predictions, n=10, minimumRating=4.0):
 15 |         topN = defaultdict(list)
 16 | 
 17 | 
 18 |         for userID, movieID, actualRating, estimatedRating, _ in predictions:
 19 |             if (estimatedRating >= minimumRating):
 20 |                 topN[int(userID)].append((int(movieID), estimatedRating))
 21 | 
 22 |         for userID, ratings in topN.items():
 23 |             ratings.sort(key=lambda x: x[1], reverse=True)
 24 |             topN[int(userID)] = ratings[:n]
 25 | 
 26 |         return topN
 27 | 
 28 |     def HitRate(topNPredicted, leftOutPredictions):
 29 |         hits = 0
 30 |         total = 0
 31 | 
 32 |         # For each left-out rating
 33 |         for leftOut in leftOutPredictions:
 34 |             userID = leftOut[0]
 35 |             leftOutMovieID = leftOut[1]
 36 |             # Is it in the predicted top 10 for this user?
 37 |             hit = False
 38 |             for movieID, predictedRating in topNPredicted[int(userID)]:
 39 |                 if (int(leftOutMovieID) == int(movieID)):
 40 |                     hit = True
 41 |                     break
 42 |             if (hit) :
 43 |                 hits += 1
 44 | 
 45 |             total += 1
 46 | 
 47 |         # Compute overall precision
 48 |         return hits/total
 49 | 
 50 |     def CumulativeHitRate(topNPredicted, leftOutPredictions, ratingCutoff=0):
 51 |         hits = 0
 52 |         total = 0
 53 | 
 54 |         # For each left-out rating
 55 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 56 |             # Only look at ability to recommend things the users actually liked...
 57 |             if (actualRating >= ratingCutoff):
 58 |                 # Is it in the predicted top 10 for this user?
 59 |                 hit = False
 60 |                 for movieID, predictedRating in topNPredicted[int(userID)]:
 61 |                     if (int(leftOutMovieID) == movieID):
 62 |                         hit = True
 63 |                         break
 64 |                 if (hit) :
 65 |                     hits += 1
 66 | 
 67 |                 total += 1
 68 | 
 69 |         # Compute overall precision
 70 |         return hits/total
 71 | 
 72 |     def RatingHitRate(topNPredicted, leftOutPredictions):
 73 |         hits = defaultdict(float)
 74 |         total = defaultdict(float)
 75 | 
 76 |         # For each left-out rating
 77 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 78 |             # Is it in the predicted top N for this user?
 79 |             hit = False
 80 |             for movieID, predictedRating in topNPredicted[int(userID)]:
 81 |                 if (int(leftOutMovieID) == movieID):
 82 |                     hit = True
 83 |                     break
 84 |             if (hit) :
 85 |                 hits[actualRating] += 1
 86 | 
 87 |             total[actualRating] += 1
 88 | 
 89 |         # Compute overall precision
 90 |         for rating in sorted(hits.keys()):
 91 |             print (rating, hits[rating] / total[rating])
 92 | 
 93 |     def AverageReciprocalHitRank(topNPredicted, leftOutPredictions):
 94 |         summation = 0
 95 |         total = 0
 96 |         # For each left-out rating
 97 |         for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
 98 |             # Is it in the predicted top N for this user?
 99 |             hitRank = 0
100 |             rank = 0
101 |             for movieID, predictedRating in topNPredicted[int(userID)]:
102 |                 rank = rank + 1
103 |                 if (int(leftOutMovieID) == movieID):
104 |                     hitRank = rank
105 |                     break
106 |             if (hitRank > 0) :
107 |                 summation += 1.0 / hitRank
108 | 
109 |             total += 1
110 | 
111 |         return summation / total
112 | 
113 |     # What percentage of users have at least one "good" recommendation
114 |     def UserCoverage(topNPredicted, numUsers, ratingThreshold=0):
115 |         hits = 0
116 |         for userID in topNPredicted.keys():
117 |             hit = False
118 |             for movieID, predictedRating in topNPredicted[userID]:
119 |                 if (predictedRating >= ratingThreshold):
120 |                     hit = True
121 |                     break
122 |             if (hit):
123 |                 hits += 1
124 | 
125 |         return hits / numUsers
126 | 
127 |     def Diversity(topNPredicted, simsAlgo):
128 |         n = 0
129 |         total = 0
130 |         simsMatrix = simsAlgo.compute_similarities()
131 |         for userID in topNPredicted.keys():
132 |             pairs = itertools.combinations(topNPredicted[userID], 2)
133 |             for pair in pairs:
134 |                 movie1 = pair[0][0]
135 |                 movie2 = pair[1][0]
136 |                 innerID1 = simsAlgo.trainset.to_inner_iid(str(movie1))
137 |                 innerID2 = simsAlgo.trainset.to_inner_iid(str(movie2))
138 |                 similarity = simsMatrix[innerID1][innerID2]
139 |                 total += similarity
140 |                 n += 1
141 | 
142 |         S = total / n
143 |         return (1-S)
144 | 
145 |     def Novelty(topNPredicted, rankings):
146 |         n = 0
147 |         total = 0
148 |         for userID in topNPredicted.keys():
149 |             for rating in topNPredicted[userID]:
150 |                 movieID = rating[0]
151 |                 rank = rankings[movieID]
152 |                 total += rank
153 |                 n += 1
154 |         return total / n
155 | 


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/__pycache__/EvaluatedAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/05 - Matrix Factorization Methods/__pycache__/EvaluatedAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/__pycache__/EvaluationData.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/05 - Matrix Factorization Methods/__pycache__/EvaluationData.cpython-38.pyc


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/__pycache__/Evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/05 - Matrix Factorization Methods/__pycache__/Evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/__pycache__/MovieLens.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/05 - Matrix Factorization Methods/__pycache__/MovieLens.cpython-38.pyc


--------------------------------------------------------------------------------
/05 - Matrix Factorization Methods/__pycache__/RecommenderMetrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/05 - Matrix Factorization Methods/__pycache__/RecommenderMetrics.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/AutoRec-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | class AutoRec(object):
 5 | 
 6 |     def __init__(self, visibleDimensions, epochs=200, hiddenDimensions=50, learningRate=0.1, batchSize=100):
 7 | 
 8 |         self.visibleDimensions = visibleDimensions
 9 |         self.epochs = epochs
10 |         self.hiddenDimensions = hiddenDimensions
11 |         self.learningRate = learningRate
12 |         self.batchSize = batchSize
13 |         self.optimizer = tf.keras.optimizers.RMSprop(self.learningRate)
14 |         
15 |                 
16 |     def Train(self, X):
17 |         
18 |         for epoch in range(self.epochs):
19 |             for i in range(0, X.shape[0], self.batchSize):
20 |                 epochX = X[i:i+self.batchSize]
21 |                 self.run_optimization(epochX)
22 | 
23 | 
24 |             print("Trained epoch ", epoch)
25 | 
26 |     def GetRecommendations(self, inputUser):
27 |         
28 |         # Feed through a single user and return predictions from the output layer.
29 |         rec = self.neural_net(inputUser)
30 |         
31 |         # It is being used as the return type is Eager Tensor.
32 |         return rec[0]
33 | 
34 |     
35 |     def neural_net(self, inputUser):
36 | 
37 |         #tf.set_random_seed(0)
38 |         
39 |         # Create varaibles for weights for the encoding (visible->hidden) and decoding (hidden->output) stages, randomly initialized
40 |         self.weights = {
41 |             'h1': tf.Variable(tf.random.normal([self.visibleDimensions, self.hiddenDimensions])),
42 |             'out': tf.Variable(tf.random.normal([self.hiddenDimensions, self.visibleDimensions]))
43 |             }
44 |         
45 |         # Create biases
46 |         self.biases = {
47 |             'b1': tf.Variable(tf.random.normal([self.hiddenDimensions])),
48 |             'out': tf.Variable(tf.random.normal([self.visibleDimensions]))
49 |             }
50 |         
51 |         # Create the input layer
52 |         self.inputLayer = inputUser
53 |         
54 |         # hidden layer
55 |         hidden = tf.nn.sigmoid(tf.add(tf.matmul(self.inputLayer, self.weights['h1']), self.biases['b1']))
56 |         
57 |         # output layer for our predictions.
58 |         self.outputLayer = tf.nn.sigmoid(tf.add(tf.matmul(hidden, self.weights['out']), self.biases['out']))
59 |         
60 |         return self.outputLayer
61 |     
62 |     def run_optimization(self, inputUser):
63 |         with tf.GradientTape() as g:
64 |             pred = self.neural_net(inputUser)
65 |             loss = tf.keras.losses.MSE(inputUser, pred)
66 |             
67 |         trainable_variables = list(self.weights.values()) + list(self.biases.values())
68 |         
69 |         gradients = g.gradient(loss, trainable_variables)
70 |         
71 |         self.optimizer.apply_gradients(zip(gradients, trainable_variables))
72 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/AutoRecAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from surprise import AlgoBase
 2 | from surprise import PredictionImpossible
 3 | import numpy as np
 4 | from AutoRec import AutoRec
 5 | 
 6 | class AutoRecAlgorithm(AlgoBase):
 7 | 
 8 |     def __init__(self, epochs=100, hiddenDim=100, learningRate=0.01, batchSize=100, sim_options={}):
 9 |         AlgoBase.__init__(self)
10 |         self.epochs = epochs
11 |         self.hiddenDim = hiddenDim
12 |         self.learningRate = learningRate
13 |         self.batchSize = batchSize
14 | 
15 |     def fit(self, trainset):
16 |         AlgoBase.fit(self, trainset)
17 | 
18 |         numUsers = trainset.n_users
19 |         numItems = trainset.n_items
20 |         
21 |         trainingMatrix = np.zeros([numUsers, numItems], dtype=np.float32)
22 |         
23 |         for (uid, iid, rating) in trainset.all_ratings():
24 |             trainingMatrix[int(uid), int(iid)] = rating / 5.0
25 |         
26 |         # Create an RBM with (num items * rating values) visible nodes
27 |         autoRec = AutoRec(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs)
28 |         autoRec.Train(trainingMatrix)
29 | 
30 |         self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32)
31 |         
32 |         for uiid in range(trainset.n_users):
33 |             if (uiid % 50 == 0):
34 |                 print("Processing user ", uiid)
35 |             recs = autoRec.GetRecommendations([trainingMatrix[uiid]])
36 |             
37 |             for itemID, rec in enumerate(recs):
38 |                 self.predictedRatings[uiid, itemID] = rec * 5.0
39 |         
40 |         return self
41 | 
42 | 
43 |     def estimate(self, u, i):
44 | 
45 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
46 |             raise PredictionImpossible('User and/or item is unkown.')
47 |         
48 |         rating = self.predictedRatings[u, i]
49 |         
50 |         if (rating < 0.001):
51 |             raise PredictionImpossible('No valid prediction exists.')
52 |             
53 |         return rating
54 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/AutoRecBakeOff-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from AutoRecAlgorithm import AutoRecAlgorithm
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | #Autoencoder
27 | AutoRec = AutoRecAlgorithm()
28 | evaluator.AddAlgorithm(AutoRec, "AutoRec")
29 | 
30 | # Just make random recommendations
31 | Random = NormalPredictor()
32 | evaluator.AddAlgorithm(Random, "Random")
33 | 
34 | # Fight!
35 | evaluator.Evaluate(True)
36 | 
37 | evaluator.SampleTopNRecs(ml)
38 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/EvaluatedAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/EvaluationData-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/Evaluator-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/RBM-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | class RBM(object):
 5 | 
 6 |     def __init__(self, visibleDimensions, epochs=20, hiddenDimensions=50, ratingValues=10, learningRate=0.001, batchSize=100):
 7 | 
 8 |         self.visibleDimensions = visibleDimensions
 9 |         self.epochs = epochs
10 |         self.hiddenDimensions = hiddenDimensions
11 |         self.ratingValues = ratingValues
12 |         self.learningRate = learningRate
13 |         self.batchSize = batchSize
14 |         
15 |                 
16 |     def Train(self, X):
17 | 
18 |         for epoch in range(self.epochs):
19 |             np.random.shuffle(X)
20 |             
21 |             trX = np.array(X)
22 |             for i in range(0, trX.shape[0], self.batchSize):
23 |                 epochX = trX[i:i+self.batchSize]
24 |                 self.MakeGraph(epochX)
25 | 
26 |             print("Trained epoch ", epoch)
27 | 
28 | 
29 |     def GetRecommendations(self, inputUser):
30 |         
31 |         feed = self.MakeHidden(inputUser)
32 |         rec = self.MakeVisible(feed)
33 |         return rec[0]       
34 | 
35 |     def MakeGraph(self, inputUser):
36 | 
37 |         # Initialize weights randomly
38 |         maxWeight = -4.0 * np.sqrt(6.0 / (self.hiddenDimensions + self.visibleDimensions))
39 |         self.weights = tf.Variable(tf.random.uniform([self.visibleDimensions, self.hiddenDimensions], minval=-maxWeight, maxval=maxWeight), tf.float32, name="weights")
40 |         
41 |         self.hiddenBias = tf.Variable(tf.zeros([self.hiddenDimensions], tf.float32, name="hiddenBias"))
42 |         self.visibleBias = tf.Variable(tf.zeros([self.visibleDimensions], tf.float32, name="visibleBias"))
43 |         
44 |         # Perform Gibbs Sampling for Contrastive Divergence, per the paper we assume k=1 instead of iterating over the 
45 |         # forward pass multiple times since it seems to work just fine
46 |         
47 |         # Forward pass
48 |         # Sample hidden layer given visible...
49 |         # Get tensor of hidden probabilities
50 |         hProb0 = tf.nn.sigmoid(tf.matmul(inputUser, self.weights) + self.hiddenBias)
51 |         # Sample from all of the distributions
52 |         hSample = tf.nn.relu(tf.sign(hProb0 - tf.random.uniform(tf.shape(hProb0))))
53 |         # Stitch it together
54 |         forward = tf.matmul(tf.transpose(inputUser), hSample)
55 |         
56 |         # Backward pass
57 |         # Reconstruct visible layer given hidden layer sample
58 |         v = tf.matmul(hSample, tf.transpose(self.weights)) + self.visibleBias
59 |         
60 |         # Build up our mask for missing ratings
61 |         vMask = tf.sign(inputUser) # Make sure everything is 0 or 1
62 |         vMask3D = tf.reshape(vMask, [tf.shape(v)[0], -1, self.ratingValues]) # Reshape into arrays of individual ratings
63 |         vMask3D = tf.reduce_max(vMask3D, axis=[2], keepdims=True) # Use reduce_max to either give us 1 for ratings that exist, and 0 for missing ratings
64 |         
65 |         # Extract rating vectors for each individual set of 10 rating binary values
66 |         v = tf.reshape(v, [tf.shape(v)[0], -1, self.ratingValues])
67 |         vProb = tf.nn.softmax(v * vMask3D) # Apply softmax activation function
68 |         vProb = tf.reshape(vProb, [tf.shape(v)[0], -1]) # And shove them back into the flattened state. Reconstruction is done now.
69 |         # Stitch it together to define the backward pass and updated hidden biases
70 |         hProb1 = tf.nn.sigmoid(tf.matmul(vProb, self.weights) + self.hiddenBias)
71 |         backward = tf.matmul(tf.transpose(vProb), hProb1)
72 |     
73 |         # Now define what each epoch will do...
74 |         # Run the forward and backward passes, and update the weights
75 |         weightUpdate = self.weights.assign_add(self.learningRate * (forward - backward))
76 |         # Update hidden bias, minimizing the divergence in the hidden nodes
77 |         hiddenBiasUpdate = self.hiddenBias.assign_add(self.learningRate * tf.reduce_mean(hProb0 - hProb1, 0))
78 |         # Update the visible bias, minimizng divergence in the visible results
79 |         visibleBiasUpdate = self.visibleBias.assign_add(self.learningRate * tf.reduce_mean(inputUser - vProb, 0))
80 | 
81 |         self.update = [weightUpdate, hiddenBiasUpdate, visibleBiasUpdate]
82 |         
83 |     def MakeHidden(self, inputUser):
84 |         hidden = tf.nn.sigmoid(tf.matmul(inputUser, self.weights) + self.hiddenBias)
85 |         self.MakeGraph(inputUser)
86 |         return hidden
87 |     
88 |     def MakeVisible(self, feed):
89 |         visible = tf.nn.sigmoid(tf.matmul(feed, tf.transpose(self.weights)) + self.visibleBias)
90 |         #self.MakeGraph(feed)
91 |         return visible
92 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/RBMAlgorithm-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from surprise import AlgoBase
 2 | from surprise import PredictionImpossible
 3 | import numpy as np
 4 | from RBM import RBM
 5 | 
 6 | class RBMAlgorithm(AlgoBase):
 7 | 
 8 |     def __init__(self, epochs=20, hiddenDim=100, learningRate=0.001, batchSize=100, sim_options={}):
 9 |         AlgoBase.__init__(self)
10 |         self.epochs = epochs
11 |         self.hiddenDim = hiddenDim
12 |         self.learningRate = learningRate
13 |         self.batchSize = batchSize
14 |         
15 |     def softmax(self, x):
16 |         return np.exp(x) / np.sum(np.exp(x), axis=0)
17 | 
18 |     def fit(self, trainset):
19 |         AlgoBase.fit(self, trainset)
20 | 
21 |         numUsers = trainset.n_users
22 |         numItems = trainset.n_items
23 |         
24 |         trainingMatrix = np.zeros([numUsers, numItems, 10], dtype=np.float32)
25 |         
26 |         for (uid, iid, rating) in trainset.all_ratings():
27 |             adjustedRating = int(float(rating)*2.0) - 1
28 |             trainingMatrix[int(uid), int(iid), adjustedRating] = 1
29 |         
30 |         # Flatten to a 2D array, with nodes for each possible rating type on each possible item, for every user.
31 |         trainingMatrix = np.reshape(trainingMatrix, [trainingMatrix.shape[0], -1])
32 |         
33 |         # Create an RBM with (num items * rating values) visible nodes
34 |         rbm = RBM(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs)
35 |         rbm.Train(trainingMatrix)
36 | 
37 |         self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32)
38 |         for uiid in range(trainset.n_users):
39 |             if (uiid % 50 == 0):
40 |                 print("Processing user ", uiid)
41 |             recs = rbm.GetRecommendations([trainingMatrix[uiid]])
42 |             recs = np.reshape(recs, [numItems, 10])
43 |             
44 |             for itemID, rec in enumerate(recs):
45 |                 # The obvious thing would be to just take the rating with the highest score:                
46 |                 #rating = rec.argmax()
47 |                 # ... but this just leads to a huge multi-way tie for 5-star predictions.
48 |                 # The paper suggests performing normalization over K values to get probabilities
49 |                 # and take the expectation as your prediction, so we'll do that instead:
50 |                 normalized = self.softmax(rec)
51 |                 rating = np.average(np.arange(10), weights=normalized)
52 |                 self.predictedRatings[uiid, itemID] = (rating + 1) * 0.5
53 |         
54 |         return self
55 | 
56 | 
57 |     def estimate(self, u, i):
58 | 
59 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
60 |             raise PredictionImpossible('User and/or item is unkown.')
61 |         
62 |         rating = self.predictedRatings[u, i]
63 |         
64 |         if (rating < 0.001):
65 |             raise PredictionImpossible('No valid prediction exists.')
66 |             
67 |         return rating
68 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/RBMBakeOff-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from RBMAlgorithm import RBMAlgorithm
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | #RBM
27 | RBM = RBMAlgorithm(epochs=20)
28 | evaluator.AddAlgorithm(RBM, "RBM")
29 | 
30 | # Just make random recommendations
31 | Random = NormalPredictor()
32 | evaluator.AddAlgorithm(Random, "Random")
33 | 
34 | # Fight!
35 | evaluator.Evaluate(True)
36 | 
37 | evaluator.SampleTopNRecs(ml)
38 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/RBMTuning-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from RBMAlgorithm import RBMAlgorithm
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | from surprise.model_selection import GridSearchCV
 6 | 
 7 | import random
 8 | import numpy as np
 9 | 
10 | def LoadMovieLensData():
11 |     ml = MovieLens()
12 |     print("Loading movie ratings...")
13 |     data = ml.loadMovieLensLatestSmall()
14 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
15 |     rankings = ml.getPopularityRanks()
16 |     return (ml, data, rankings)
17 | 
18 | np.random.seed(0)
19 | random.seed(0)
20 | 
21 | # Load up common data set for the recommender algorithms
22 | (ml, evaluationData, rankings) = LoadMovieLensData()
23 | 
24 | print("Searching for best parameters...")
25 | param_grid = {'hiddenDim': [20, 10], 'learningRate': [0.1, 0.01]}
26 | gs = GridSearchCV(RBMAlgorithm, param_grid, measures=['rmse', 'mae'], cv=3)
27 | 
28 | gs.fit(evaluationData)
29 | 
30 | # best RMSE score
31 | print("Best RMSE score attained: ", gs.best_score['rmse'])
32 | 
33 | # combination of parameters that gave the best RMSE score
34 | print(gs.best_params['rmse'])
35 | 
36 | # Construct an Evaluator to, you know, evaluate them
37 | evaluator = Evaluator(evaluationData, rankings)
38 | 
39 | params = gs.best_params['rmse']
40 | RBMtuned = RBMAlgorithm(hiddenDim = params['hiddenDim'], learningRate = params['learningRate'])
41 | evaluator.AddAlgorithm(RBMtuned, "RBM - Tuned")
42 | 
43 | RBMUntuned = RBMAlgorithm()
44 | evaluator.AddAlgorithm(RBMUntuned, "RBM - Untuned")
45 | 
46 | # Just make random recommendations
47 | Random = NormalPredictor()
48 | evaluator.AddAlgorithm(Random, "Random")
49 | 
50 | # Fight!
51 | evaluator.Evaluate(False)
52 | 
53 | evaluator.SampleTopNRecs(ml)
54 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/Recommendations using Restricted Boltzmann Machine(RBM)-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/.ipynb_checkpoints/Recommendations with Deep Neural Networks-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/AutoRec.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | class AutoRec(object):
 5 | 
 6 |     def __init__(self, visibleDimensions, epochs=200, hiddenDimensions=50, learningRate=0.1, batchSize=100):
 7 | 
 8 |         self.visibleDimensions = visibleDimensions
 9 |         self.epochs = epochs
10 |         self.hiddenDimensions = hiddenDimensions
11 |         self.learningRate = learningRate
12 |         self.batchSize = batchSize
13 |         self.optimizer = tf.keras.optimizers.RMSprop(self.learningRate)
14 |         
15 |                 
16 |     def Train(self, X):
17 |         
18 |         for epoch in range(self.epochs):
19 |             for i in range(0, X.shape[0], self.batchSize):
20 |                 epochX = X[i:i+self.batchSize]
21 |                 self.run_optimization(epochX)
22 | 
23 | 
24 |             print("Trained epoch ", epoch)
25 | 
26 |     def GetRecommendations(self, inputUser):
27 |         
28 |         # Feed through a single user and return predictions from the output layer.
29 |         rec = self.neural_net(inputUser)
30 |         
31 |         # It is being used as the return type is Eager Tensor.
32 |         return rec[0]
33 | 
34 |     
35 |     def neural_net(self, inputUser):
36 | 
37 |         #tf.set_random_seed(0)
38 |         
39 |         # Create varaibles for weights for the encoding (visible->hidden) and decoding (hidden->output) stages, randomly initialized
40 |         self.weights = {
41 |             'h1': tf.Variable(tf.random.normal([self.visibleDimensions, self.hiddenDimensions])),
42 |             'out': tf.Variable(tf.random.normal([self.hiddenDimensions, self.visibleDimensions]))
43 |             }
44 |         
45 |         # Create biases
46 |         self.biases = {
47 |             'b1': tf.Variable(tf.random.normal([self.hiddenDimensions])),
48 |             'out': tf.Variable(tf.random.normal([self.visibleDimensions]))
49 |             }
50 |         
51 |         # Create the input layer
52 |         self.inputLayer = inputUser
53 |         
54 |         # hidden layer
55 |         hidden = tf.nn.sigmoid(tf.add(tf.matmul(self.inputLayer, self.weights['h1']), self.biases['b1']))
56 |         
57 |         # output layer for our predictions.
58 |         self.outputLayer = tf.nn.sigmoid(tf.add(tf.matmul(hidden, self.weights['out']), self.biases['out']))
59 |         
60 |         return self.outputLayer
61 |     
62 |     def run_optimization(self, inputUser):
63 |         with tf.GradientTape() as g:
64 |             pred = self.neural_net(inputUser)
65 |             loss = tf.keras.losses.MSE(inputUser, pred)
66 |             
67 |         trainable_variables = list(self.weights.values()) + list(self.biases.values())
68 |         
69 |         gradients = g.gradient(loss, trainable_variables)
70 |         
71 |         self.optimizer.apply_gradients(zip(gradients, trainable_variables))
72 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/AutoRecAlgorithm.py:
--------------------------------------------------------------------------------
 1 | from surprise import AlgoBase
 2 | from surprise import PredictionImpossible
 3 | import numpy as np
 4 | from AutoRec import AutoRec
 5 | 
 6 | class AutoRecAlgorithm(AlgoBase):
 7 | 
 8 |     def __init__(self, epochs=100, hiddenDim=100, learningRate=0.01, batchSize=100, sim_options={}):
 9 |         AlgoBase.__init__(self)
10 |         self.epochs = epochs
11 |         self.hiddenDim = hiddenDim
12 |         self.learningRate = learningRate
13 |         self.batchSize = batchSize
14 | 
15 |     def fit(self, trainset):
16 |         AlgoBase.fit(self, trainset)
17 | 
18 |         numUsers = trainset.n_users
19 |         numItems = trainset.n_items
20 |         
21 |         trainingMatrix = np.zeros([numUsers, numItems], dtype=np.float32)
22 |         
23 |         for (uid, iid, rating) in trainset.all_ratings():
24 |             trainingMatrix[int(uid), int(iid)] = rating / 5.0
25 |         
26 |         # Create an RBM with (num items * rating values) visible nodes
27 |         autoRec = AutoRec(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs)
28 |         autoRec.Train(trainingMatrix)
29 | 
30 |         self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32)
31 |         
32 |         for uiid in range(trainset.n_users):
33 |             if (uiid % 50 == 0):
34 |                 print("Processing user ", uiid)
35 |             recs = autoRec.GetRecommendations([trainingMatrix[uiid]])
36 |             
37 |             for itemID, rec in enumerate(recs):
38 |                 self.predictedRatings[uiid, itemID] = rec * 5.0
39 |         
40 |         return self
41 | 
42 | 
43 |     def estimate(self, u, i):
44 | 
45 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
46 |             raise PredictionImpossible('User and/or item is unkown.')
47 |         
48 |         rating = self.predictedRatings[u, i]
49 |         
50 |         if (rating < 0.001):
51 |             raise PredictionImpossible('No valid prediction exists.')
52 |             
53 |         return rating
54 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/AutoRecBakeOff.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from AutoRecAlgorithm import AutoRecAlgorithm
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | #Autoencoder
27 | AutoRec = AutoRecAlgorithm()
28 | evaluator.AddAlgorithm(AutoRec, "AutoRec")
29 | 
30 | # Just make random recommendations
31 | Random = NormalPredictor()
32 | evaluator.AddAlgorithm(Random, "Random")
33 | 
34 | # Fight!
35 | evaluator.Evaluate(True)
36 | 
37 | evaluator.SampleTopNRecs(ml)
38 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/EvaluatedAlgorithm.py:
--------------------------------------------------------------------------------
 1 | from RecommenderMetrics import RecommenderMetrics
 2 | from EvaluationData import EvaluationData
 3 | 
 4 | class EvaluatedAlgorithm:
 5 |     
 6 |     def __init__(self, algorithm, name):
 7 |         self.algorithm = algorithm
 8 |         self.name = name
 9 |         
10 |     def Evaluate(self, evaluationData, doTopN, n=10, verbose=True):
11 |         metrics = {}
12 |         # Compute accuracy
13 |         if (verbose):
14 |             print("Evaluating accuracy...")
15 |         self.algorithm.fit(evaluationData.GetTrainSet())
16 |         predictions = self.algorithm.test(evaluationData.GetTestSet())
17 |         metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
18 |         metrics["MAE"] = RecommenderMetrics.MAE(predictions)
19 |         
20 |         if (doTopN):
21 |             # Evaluate top-10 with Leave One Out testing
22 |             if (verbose):
23 |                 print("Evaluating top-N with leave-one-out...")
24 |             self.algorithm.fit(evaluationData.GetLOOCVTrainSet())
25 |             leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet())        
26 |             # Build predictions for all ratings not in the training set
27 |             allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet())
28 |             # Compute top 10 recs for each user
29 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
30 |             if (verbose):
31 |                 print("Computing hit-rate and rank metrics...")
32 |             # See how often we recommended a movie the user actually rated
33 |             metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
34 |             # See how often we recommended a movie the user actually liked
35 |             metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
36 |             # Compute ARHR
37 |             metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)
38 |         
39 |             #Evaluate properties of recommendations on full training set
40 |             if (verbose):
41 |                 print("Computing recommendations with full data set...")
42 |             self.algorithm.fit(evaluationData.GetFullTrainSet())
43 |             allPredictions = self.algorithm.test(evaluationData.GetFullAntiTestSet())
44 |             topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
45 |             if (verbose):
46 |                 print("Analyzing coverage, diversity, and novelty...")
47 |             # Print user coverage with a minimum predicted rating of 4.0:
48 |             metrics["Coverage"] = RecommenderMetrics.UserCoverage(  topNPredicted, 
49 |                                                                    evaluationData.GetFullTrainSet().n_users, 
50 |                                                                    ratingThreshold=4.0)
51 |             # Measure diversity of recommendations:
52 |             metrics["Diversity"] = RecommenderMetrics.Diversity(topNPredicted, evaluationData.GetSimilarities())
53 |             
54 |             # Measure novelty (average popularity rank of recommendations):
55 |             metrics["Novelty"] = RecommenderMetrics.Novelty(topNPredicted, 
56 |                                                             evaluationData.GetPopularityRankings())
57 |         
58 |         if (verbose):
59 |             print("Analysis complete.")
60 |     
61 |         return metrics
62 |     
63 |     def GetName(self):
64 |         return self.name
65 |     
66 |     def GetAlgorithm(self):
67 |         return self.algorithm
68 |     
69 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/EvaluationData.py:
--------------------------------------------------------------------------------
 1 | from surprise.model_selection import train_test_split
 2 | from surprise.model_selection import LeaveOneOut
 3 | from surprise import KNNBaseline
 4 | 
 5 | class EvaluationData:
 6 |     
 7 |     def __init__(self, data, popularityRankings):
 8 |         
 9 |         self.rankings = popularityRankings
10 |         
11 |         #Build a full training set for evaluating overall properties
12 |         self.fullTrainSet = data.build_full_trainset()
13 |         self.fullAntiTestSet = self.fullTrainSet.build_anti_testset()
14 |         
15 |         #Build a 75/25 train/test split for measuring accuracy
16 |         self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
17 |         
18 |         #Build a "leave one out" train/test split for evaluating top-N recommenders
19 |         #And build an anti-test-set for building predictions
20 |         LOOCV = LeaveOneOut(n_splits=1, random_state=1)
21 |         for train, test in LOOCV.split(data):
22 |             self.LOOCVTrain = train
23 |             self.LOOCVTest = test
24 |             
25 |         self.LOOCVAntiTestSet = self.LOOCVTrain.build_anti_testset()
26 |         
27 |         #Compute similarty matrix between items so we can measure diversity
28 |         sim_options = {'name': 'cosine', 'user_based': False}
29 |         self.simsAlgo = KNNBaseline(sim_options=sim_options)
30 |         self.simsAlgo.fit(self.fullTrainSet)
31 |             
32 |     def GetFullTrainSet(self):
33 |         return self.fullTrainSet
34 |     
35 |     def GetFullAntiTestSet(self):
36 |         return self.fullAntiTestSet
37 |     
38 |     def GetAntiTestSetForUser(self, testSubject):
39 |         trainset = self.fullTrainSet
40 |         fill = trainset.global_mean
41 |         anti_testset = []
42 |         u = trainset.to_inner_uid(str(testSubject))
43 |         user_items = set([j for (j, _) in trainset.ur[u]])
44 |         anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
45 |                                  i in trainset.all_items() if
46 |                                  i not in user_items]
47 |         return anti_testset
48 | 
49 |     def GetTrainSet(self):
50 |         return self.trainSet
51 |     
52 |     def GetTestSet(self):
53 |         return self.testSet
54 |     
55 |     def GetLOOCVTrainSet(self):
56 |         return self.LOOCVTrain
57 |     
58 |     def GetLOOCVTestSet(self):
59 |         return self.LOOCVTest
60 |     
61 |     def GetLOOCVAntiTestSet(self):
62 |         return self.LOOCVAntiTestSet
63 |     
64 |     def GetSimilarities(self):
65 |         return self.simsAlgo
66 |     
67 |     def GetPopularityRankings(self):
68 |         return self.rankings


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/Evaluator.py:
--------------------------------------------------------------------------------
 1 | from EvaluationData import EvaluationData
 2 | from EvaluatedAlgorithm import EvaluatedAlgorithm
 3 | 
 4 | class Evaluator:
 5 |     
 6 |     algorithms = []
 7 |     
 8 |     def __init__(self, dataset, rankings):
 9 |         ed = EvaluationData(dataset, rankings)
10 |         self.dataset = ed
11 |         
12 |     def AddAlgorithm(self, algorithm, name):
13 |         alg = EvaluatedAlgorithm(algorithm, name)
14 |         self.algorithms.append(alg)
15 |         
16 |     def Evaluate(self, doTopN):
17 |         results = {}
18 |         for algorithm in self.algorithms:
19 |             print("Evaluating ", algorithm.GetName(), "...")
20 |             results[algorithm.GetName()] = algorithm.Evaluate(self.dataset, doTopN)
21 | 
22 |         # Print results
23 |         print("\n")
24 |         
25 |         if (doTopN):
26 |             print("{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
27 |                     "Algorithm", "RMSE", "MAE", "HR", "cHR", "ARHR", "Coverage", "Diversity", "Novelty"))
28 |             for (name, metrics) in results.items():
29 |                 print("{:<10} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
30 |                         name, metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"],
31 |                                       metrics["Coverage"], metrics["Diversity"], metrics["Novelty"]))
32 |         else:
33 |             print("{:<10} {:<10} {:<10}".format("Algorithm", "RMSE", "MAE"))
34 |             for (name, metrics) in results.items():
35 |                 print("{:<10} {:<10.4f} {:<10.4f}".format(name, metrics["RMSE"], metrics["MAE"]))
36 |                 
37 |         print("\nLegend:\n")
38 |         print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
39 |         print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
40 |         if (doTopN):
41 |             print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
42 |             print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
43 |             print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better." )
44 |             print("Coverage:  Ratio of users for whom recommendations above a certain threshold exist. Higher is better.")
45 |             print("Diversity: 1-S, where S is the average similarity score between every possible pair of recommendations")
46 |             print("           for a given user. Higher means more diverse.")
47 |             print("Novelty:   Average popularity rank of recommended items. Higher means more novel.")
48 |         
49 |     def SampleTopNRecs(self, ml, testSubject=85, k=10):
50 |         
51 |         for algo in self.algorithms:
52 |             print("\nUsing recommender ", algo.GetName())
53 |             
54 |             print("\nBuilding recommendation model...")
55 |             trainSet = self.dataset.GetFullTrainSet()
56 |             algo.GetAlgorithm().fit(trainSet)
57 |             
58 |             print("Computing recommendations...")
59 |             testSet = self.dataset.GetAntiTestSetForUser(testSubject)
60 |         
61 |             predictions = algo.GetAlgorithm().test(testSet)
62 |             
63 |             recommendations = []
64 |             
65 |             print ("\nWe recommend:")
66 |             for userID, movieID, actualRating, estimatedRating, _ in predictions:
67 |                 intMovieID = int(movieID)
68 |                 recommendations.append((intMovieID, estimatedRating))
69 |             
70 |             recommendations.sort(key=lambda x: x[1], reverse=True)
71 |             
72 |             for ratings in recommendations[:10]:
73 |                 print(ml.getMovieName(ratings[0]), ratings[1])
74 |                 
75 | 
76 |             
77 |             
78 |     
79 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/RBM.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | class RBM(object):
 5 | 
 6 |     def __init__(self, visibleDimensions, epochs=20, hiddenDimensions=50, ratingValues=10, learningRate=0.001, batchSize=100):
 7 | 
 8 |         self.visibleDimensions = visibleDimensions
 9 |         self.epochs = epochs
10 |         self.hiddenDimensions = hiddenDimensions
11 |         self.ratingValues = ratingValues
12 |         self.learningRate = learningRate
13 |         self.batchSize = batchSize
14 |         
15 |                 
16 |     def Train(self, X):
17 | 
18 |         for epoch in range(self.epochs):
19 |             np.random.shuffle(X)
20 |             
21 |             trX = np.array(X)
22 |             for i in range(0, trX.shape[0], self.batchSize):
23 |                 epochX = trX[i:i+self.batchSize]
24 |                 self.MakeGraph(epochX)
25 | 
26 |             print("Trained epoch ", epoch)
27 | 
28 | 
29 |     def GetRecommendations(self, inputUser):
30 |         
31 |         feed = self.MakeHidden(inputUser)
32 |         rec = self.MakeVisible(feed)
33 |         return rec[0]       
34 | 
35 |     def MakeGraph(self, inputUser):
36 | 
37 |         # Initialize weights randomly
38 |         maxWeight = -4.0 * np.sqrt(6.0 / (self.hiddenDimensions + self.visibleDimensions))
39 |         self.weights = tf.Variable(tf.random.uniform([self.visibleDimensions, self.hiddenDimensions], minval=-maxWeight, maxval=maxWeight), tf.float32, name="weights")
40 |         
41 |         self.hiddenBias = tf.Variable(tf.zeros([self.hiddenDimensions], tf.float32, name="hiddenBias"))
42 |         self.visibleBias = tf.Variable(tf.zeros([self.visibleDimensions], tf.float32, name="visibleBias"))
43 |         
44 |         # Perform Gibbs Sampling for Contrastive Divergence, per the paper we assume k=1 instead of iterating over the 
45 |         # forward pass multiple times since it seems to work just fine
46 |         
47 |         # Forward pass
48 |         # Sample hidden layer given visible...
49 |         # Get tensor of hidden probabilities
50 |         hProb0 = tf.nn.sigmoid(tf.matmul(inputUser, self.weights) + self.hiddenBias)
51 |         # Sample from all of the distributions
52 |         hSample = tf.nn.relu(tf.sign(hProb0 - tf.random.uniform(tf.shape(hProb0))))
53 |         # Stitch it together
54 |         forward = tf.matmul(tf.transpose(inputUser), hSample)
55 |         
56 |         # Backward pass
57 |         # Reconstruct visible layer given hidden layer sample
58 |         v = tf.matmul(hSample, tf.transpose(self.weights)) + self.visibleBias
59 |         
60 |         # Build up our mask for missing ratings
61 |         vMask = tf.sign(inputUser) # Make sure everything is 0 or 1
62 |         vMask3D = tf.reshape(vMask, [tf.shape(v)[0], -1, self.ratingValues]) # Reshape into arrays of individual ratings
63 |         vMask3D = tf.reduce_max(vMask3D, axis=[2], keepdims=True) # Use reduce_max to either give us 1 for ratings that exist, and 0 for missing ratings
64 |         
65 |         # Extract rating vectors for each individual set of 10 rating binary values
66 |         v = tf.reshape(v, [tf.shape(v)[0], -1, self.ratingValues])
67 |         vProb = tf.nn.softmax(v * vMask3D) # Apply softmax activation function
68 |         vProb = tf.reshape(vProb, [tf.shape(v)[0], -1]) # And shove them back into the flattened state. Reconstruction is done now.
69 |         # Stitch it together to define the backward pass and updated hidden biases
70 |         hProb1 = tf.nn.sigmoid(tf.matmul(vProb, self.weights) + self.hiddenBias)
71 |         backward = tf.matmul(tf.transpose(vProb), hProb1)
72 |     
73 |         # Now define what each epoch will do...
74 |         # Run the forward and backward passes, and update the weights
75 |         weightUpdate = self.weights.assign_add(self.learningRate * (forward - backward))
76 |         # Update hidden bias, minimizing the divergence in the hidden nodes
77 |         hiddenBiasUpdate = self.hiddenBias.assign_add(self.learningRate * tf.reduce_mean(hProb0 - hProb1, 0))
78 |         # Update the visible bias, minimizng divergence in the visible results
79 |         visibleBiasUpdate = self.visibleBias.assign_add(self.learningRate * tf.reduce_mean(inputUser - vProb, 0))
80 | 
81 |         self.update = [weightUpdate, hiddenBiasUpdate, visibleBiasUpdate]
82 |         
83 |     def MakeHidden(self, inputUser):
84 |         hidden = tf.nn.sigmoid(tf.matmul(inputUser, self.weights) + self.hiddenBias)
85 |         self.MakeGraph(inputUser)
86 |         return hidden
87 |     
88 |     def MakeVisible(self, feed):
89 |         visible = tf.nn.sigmoid(tf.matmul(feed, tf.transpose(self.weights)) + self.visibleBias)
90 |         #self.MakeGraph(feed)
91 |         return visible
92 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/RBMAlgorithm.py:
--------------------------------------------------------------------------------
 1 | from surprise import AlgoBase
 2 | from surprise import PredictionImpossible
 3 | import numpy as np
 4 | from RBM import RBM
 5 | 
 6 | class RBMAlgorithm(AlgoBase):
 7 | 
 8 |     def __init__(self, epochs=20, hiddenDim=100, learningRate=0.001, batchSize=100, sim_options={}):
 9 |         AlgoBase.__init__(self)
10 |         self.epochs = epochs
11 |         self.hiddenDim = hiddenDim
12 |         self.learningRate = learningRate
13 |         self.batchSize = batchSize
14 |         
15 |     def softmax(self, x):
16 |         return np.exp(x) / np.sum(np.exp(x), axis=0)
17 | 
18 |     def fit(self, trainset):
19 |         AlgoBase.fit(self, trainset)
20 | 
21 |         numUsers = trainset.n_users
22 |         numItems = trainset.n_items
23 |         
24 |         trainingMatrix = np.zeros([numUsers, numItems, 10], dtype=np.float32)
25 |         
26 |         for (uid, iid, rating) in trainset.all_ratings():
27 |             adjustedRating = int(float(rating)*2.0) - 1
28 |             trainingMatrix[int(uid), int(iid), adjustedRating] = 1
29 |         
30 |         # Flatten to a 2D array, with nodes for each possible rating type on each possible item, for every user.
31 |         trainingMatrix = np.reshape(trainingMatrix, [trainingMatrix.shape[0], -1])
32 |         
33 |         # Create an RBM with (num items * rating values) visible nodes
34 |         rbm = RBM(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs)
35 |         rbm.Train(trainingMatrix)
36 | 
37 |         self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32)
38 |         for uiid in range(trainset.n_users):
39 |             if (uiid % 50 == 0):
40 |                 print("Processing user ", uiid)
41 |             recs = rbm.GetRecommendations([trainingMatrix[uiid]])
42 |             recs = np.reshape(recs, [numItems, 10])
43 |             
44 |             for itemID, rec in enumerate(recs):
45 |                 # The obvious thing would be to just take the rating with the highest score:                
46 |                 #rating = rec.argmax()
47 |                 # ... but this just leads to a huge multi-way tie for 5-star predictions.
48 |                 # The paper suggests performing normalization over K values to get probabilities
49 |                 # and take the expectation as your prediction, so we'll do that instead:
50 |                 normalized = self.softmax(rec)
51 |                 rating = np.average(np.arange(10), weights=normalized)
52 |                 self.predictedRatings[uiid, itemID] = (rating + 1) * 0.5
53 |         
54 |         return self
55 | 
56 | 
57 |     def estimate(self, u, i):
58 | 
59 |         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
60 |             raise PredictionImpossible('User and/or item is unkown.')
61 |         
62 |         rating = self.predictedRatings[u, i]
63 |         
64 |         if (rating < 0.001):
65 |             raise PredictionImpossible('No valid prediction exists.')
66 |             
67 |         return rating
68 |     


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/RBMBakeOff.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from RBMAlgorithm import RBMAlgorithm
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | 
 6 | import random
 7 | import numpy as np
 8 | 
 9 | def LoadMovieLensData():
10 |     ml = MovieLens()
11 |     print("Loading movie ratings...")
12 |     data = ml.loadMovieLensLatestSmall()
13 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
14 |     rankings = ml.getPopularityRanks()
15 |     return (ml, data, rankings)
16 | 
17 | np.random.seed(0)
18 | random.seed(0)
19 | 
20 | # Load up common data set for the recommender algorithms
21 | (ml, evaluationData, rankings) = LoadMovieLensData()
22 | 
23 | # Construct an Evaluator to, you know, evaluate them
24 | evaluator = Evaluator(evaluationData, rankings)
25 | 
26 | #RBM
27 | RBM = RBMAlgorithm(epochs=20)
28 | evaluator.AddAlgorithm(RBM, "RBM")
29 | 
30 | # Just make random recommendations
31 | Random = NormalPredictor()
32 | evaluator.AddAlgorithm(Random, "Random")
33 | 
34 | # Fight!
35 | evaluator.Evaluate(True)
36 | 
37 | evaluator.SampleTopNRecs(ml)
38 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/RBMTuning.py:
--------------------------------------------------------------------------------
 1 | from MovieLens import MovieLens
 2 | from RBMAlgorithm import RBMAlgorithm
 3 | from surprise import NormalPredictor
 4 | from Evaluator import Evaluator
 5 | from surprise.model_selection import GridSearchCV
 6 | 
 7 | import random
 8 | import numpy as np
 9 | 
10 | def LoadMovieLensData():
11 |     ml = MovieLens()
12 |     print("Loading movie ratings...")
13 |     data = ml.loadMovieLensLatestSmall()
14 |     print("\nComputing movie popularity ranks so we can measure novelty later...")
15 |     rankings = ml.getPopularityRanks()
16 |     return (ml, data, rankings)
17 | 
18 | np.random.seed(0)
19 | random.seed(0)
20 | 
21 | # Load up common data set for the recommender algorithms
22 | (ml, evaluationData, rankings) = LoadMovieLensData()
23 | 
24 | print("Searching for best parameters...")
25 | param_grid = {'hiddenDim': [20, 10], 'learningRate': [0.1, 0.01]}
26 | gs = GridSearchCV(RBMAlgorithm, param_grid, measures=['rmse', 'mae'], cv=3)
27 | 
28 | gs.fit(evaluationData)
29 | 
30 | # best RMSE score
31 | print("Best RMSE score attained: ", gs.best_score['rmse'])
32 | 
33 | # combination of parameters that gave the best RMSE score
34 | print(gs.best_params['rmse'])
35 | 
36 | # Construct an Evaluator to, you know, evaluate them
37 | evaluator = Evaluator(evaluationData, rankings)
38 | 
39 | params = gs.best_params['rmse']
40 | RBMtuned = RBMAlgorithm(hiddenDim = params['hiddenDim'], learningRate = params['learningRate'])
41 | evaluator.AddAlgorithm(RBMtuned, "RBM - Tuned")
42 | 
43 | RBMUntuned = RBMAlgorithm()
44 | evaluator.AddAlgorithm(RBMUntuned, "RBM - Untuned")
45 | 
46 | # Just make random recommendations
47 | Random = NormalPredictor()
48 | evaluator.AddAlgorithm(Random, "Random")
49 | 
50 | # Fight!
51 | evaluator.Evaluate(False)
52 | 
53 | evaluator.SampleTopNRecs(ml)
54 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/Recommendations with Deep Neural Networks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Recommendations with Deep Neural Networks"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Importing Dependencies"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "from MovieLens import MovieLens\n",
 24 |     "from AutoRecAlgorithm import AutoRecAlgorithm\n",
 25 |     "from surprise import NormalPredictor\n",
 26 |     "from Evaluator import Evaluator"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import random\n",
 36 |     "import numpy as np"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Loading Data"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "def LoadMovieLensData():\n",
 53 |     "    ml = MovieLens()\n",
 54 |     "    print(\"Loading movie ratings...\")\n",
 55 |     "    data = ml.loadMovieLensLatestSmall()\n",
 56 |     "    print(\"\\nComputing movie popularity ranks so we can measure novelty later...\")\n",
 57 |     "    rankings = ml.getPopularityRanks()\n",
 58 |     "    return (ml, data, rankings)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "np.random.seed(0)\n",
 68 |     "random.seed(0)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "Loading movie ratings...\n",
 81 |       "\n",
 82 |       "Computing movie popularity ranks so we can measure novelty later...\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "# Load up common data set for the recommender algorithms\n",
 88 |     "(ml, evaluationData, rankings) = LoadMovieLensData()"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Instantiating the Evaluator"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "Estimating biases using als...\n",
108 |       "Computing the cosine similarity matrix...\n",
109 |       "Done computing similarity matrix.\n"
110 |      ]
111 |     }
112 |    ],
113 |    "source": [
114 |     "# Construct an Evaluator to, you know, evaluate them\n",
115 |     "evaluator = Evaluator(evaluationData, rankings)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "## Recommendation Algorithm"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 7,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "#Autoencoder\n",
132 |     "AutoRec = AutoRecAlgorithm()\n",
133 |     "evaluator.AddAlgorithm(AutoRec, \"AutoRec\")"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 8,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "# Just make random recommendations\n",
143 |     "Random = NormalPredictor()\n",
144 |     "evaluator.AddAlgorithm(Random, \"Random\")"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Evaluation"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "Evaluating  AutoRec ...\n",
164 |       "Evaluating accuracy...\n",
165 |       "Trained epoch  0\n",
166 |       "Trained epoch  1\n",
167 |       "Trained epoch  2\n",
168 |       "Trained epoch  3\n",
169 |       "Trained epoch  4\n",
170 |       "Trained epoch  5\n",
171 |       "Trained epoch  6\n",
172 |       "Trained epoch  7\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "evaluator.Evaluate(True)"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "evaluator.SampleTopNRecs(ml)"
187 |    ]
188 |   }
189 |  ],
190 |  "metadata": {
191 |   "kernelspec": {
192 |    "display_name": "Python 3",
193 |    "language": "python",
194 |    "name": "python3"
195 |   },
196 |   "language_info": {
197 |    "codemirror_mode": {
198 |     "name": "ipython",
199 |     "version": 3
200 |    },
201 |    "file_extension": ".py",
202 |    "mimetype": "text/x-python",
203 |    "name": "python",
204 |    "nbconvert_exporter": "python",
205 |    "pygments_lexer": "ipython3",
206 |    "version": "3.8.2"
207 |   }
208 |  },
209 |  "nbformat": 4,
210 |  "nbformat_minor": 4
211 | }
212 | 


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/AutoRec.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/AutoRec.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/AutoRecAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/AutoRecAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/EvaluatedAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/EvaluatedAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/EvaluationData.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/EvaluationData.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/Evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/Evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/MovieLens.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/MovieLens.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/RBM.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/RBM.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/RBMAlgorithm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/RBMAlgorithm.cpython-38.pyc


--------------------------------------------------------------------------------
/06 - Deep Learning for Recommender Systems/__pycache__/RecommenderMetrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amanjeetsahu/Recommender-Systems-Using-Python/012a21902c05bb0a20acbb91bd0df1bf49f67f99/06 - Deep Learning for Recommender Systems/__pycache__/RecommenderMetrics.cpython-38.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Reccomender-Systems-Using-Python


--------------------------------------------------------------------------------