├── Recommenders.py
└── 音乐推荐.ipynb


/Recommenders.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas
  3 | 
  4 | #Class for Popularity based Recommender System model
  5 | class popularity_recommender_py():
  6 |     def __init__(self):
  7 |         self.train_data = None
  8 |         self.user_id = None
  9 |         self.item_id = None
 10 |         self.popularity_recommendations = None
 11 |         
 12 |     #Create the popularity based recommender system model
 13 |     def create(self, train_data, user_id, item_id):
 14 |         self.train_data = train_data
 15 |         self.user_id = user_id
 16 |         self.item_id = item_id
 17 | 
 18 |         #Get a count of user_ids for each unique song as recommendation score
 19 |         train_data_grouped = train_data.groupby([self.item_id]).agg({self.user_id: 'count'}).reset_index()
 20 |         train_data_grouped.rename(columns = {user_id: 'score'},inplace=True)
 21 |     
 22 |         #Sort the songs based upon recommendation score
 23 |         train_data_sort = train_data_grouped.sort_values(['score', self.item_id], ascending = [0,1])
 24 |     
 25 |         #Generate a recommendation rank based upon score
 26 |         train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first')
 27 |         
 28 |         #Get the top 10 recommendations
 29 |         self.popularity_recommendations = train_data_sort.head(10)
 30 | 
 31 |     #Use the popularity based recommender system model to
 32 |     #make recommendations
 33 |     def recommend(self, user_id):    
 34 |         user_recommendations = self.popularity_recommendations
 35 |         
 36 |         #Add user_id column for which the recommendations are being generated
 37 |         user_recommendations['user_id'] = user_id
 38 |     
 39 |         #Bring user_id column to the front
 40 |         cols = user_recommendations.columns.tolist()
 41 |         cols = cols[-1:] + cols[:-1]
 42 |         user_recommendations = user_recommendations[cols]
 43 |         
 44 |         return user_recommendations
 45 |     
 46 | 
 47 | #Class for Item similarity based Recommender System model
 48 | class item_similarity_recommender_py():
 49 |     def __init__(self):
 50 |         self.train_data = None
 51 |         self.user_id = None
 52 |         self.item_id = None
 53 |         self.cooccurence_matrix = None
 54 |         self.songs_dict = None
 55 |         self.rev_songs_dict = None
 56 |         self.item_similarity_recommendations = None
 57 |         
 58 |     #Get unique items (songs) corresponding to a given user
 59 |     def get_user_items(self, user):
 60 |         user_data = self.train_data[self.train_data[self.user_id] == user]
 61 |         user_items = list(user_data[self.item_id].unique())
 62 |         
 63 |         return user_items
 64 |         
 65 |     #Get unique users for a given item (song)
 66 |     def get_item_users(self, item):
 67 |         item_data = self.train_data[self.train_data[self.item_id] == item]
 68 |         item_users = set(item_data[self.user_id].unique())
 69 |             
 70 |         return item_users
 71 |         
 72 |     #Get unique items (songs) in the training data
 73 |     def get_all_items_train_data(self):
 74 |         all_items = list(self.train_data[self.item_id].unique())
 75 |             
 76 |         return all_items
 77 |         
 78 |     #Construct cooccurence matrix
 79 |     def construct_cooccurence_matrix(self, user_songs, all_songs):
 80 |             
 81 |         ####################################
 82 |         #Get users for all songs in user_songs.
 83 |         ####################################
 84 |         user_songs_users = []        
 85 |         for i in range(0, len(user_songs)):
 86 |             user_songs_users.append(self.get_item_users(user_songs[i]))
 87 |             
 88 |         ###############################################
 89 |         #Initialize the item cooccurence matrix of size 
 90 |         #len(user_songs) X len(songs)
 91 |         ###############################################
 92 |         cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float)
 93 |            
 94 |         #############################################################
 95 |         #Calculate similarity between user songs and all unique songs
 96 |         #in the training data
 97 |         #############################################################
 98 |         for i in range(0,len(all_songs)):
 99 |             #Calculate unique listeners (users) of song (item) i
100 |             songs_i_data = self.train_data[self.train_data[self.item_id] == all_songs[i]]
101 |             users_i = set(songs_i_data[self.user_id].unique())
102 |             
103 |             for j in range(0,len(user_songs)):       
104 |                     
105 |                 #Get unique listeners (users) of song (item) j
106 |                 users_j = user_songs_users[j]
107 |                     
108 |                 #Calculate intersection of listeners of songs i and j
109 |                 users_intersection = users_i.intersection(users_j)
110 |                 
111 |                 #Calculate cooccurence_matrix[i,j] as Jaccard Index
112 |                 if len(users_intersection) != 0:
113 |                     #Calculate union of listeners of songs i and j
114 |                     users_union = users_i.union(users_j)
115 |                     
116 |                     cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))
117 |                 else:
118 |                     cooccurence_matrix[j,i] = 0
119 |                     
120 |         
121 |         return cooccurence_matrix
122 | 
123 |     
124 |     #Use the cooccurence matrix to make top recommendations
125 |     def generate_top_recommendations(self, user, cooccurence_matrix, all_songs, user_songs):
126 |         print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))
127 |         
128 |         #Calculate a weighted average of the scores in cooccurence matrix for all user songs.
129 |         user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
130 |         user_sim_scores = np.array(user_sim_scores)[0].tolist()
131 |  
132 |         #Sort the indices of user_sim_scores based upon their value
133 |         #Also maintain the corresponding score
134 |         sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
135 |     
136 |         #Create a dataframe from the following
137 |         columns = ['user_id', 'song', 'score', 'rank']
138 |         #index = np.arange(1) # array of numbers for the number of samples
139 |         df = pandas.DataFrame(columns=columns)
140 |          
141 |         #Fill the dataframe with top 10 item based recommendations
142 |         rank = 1 
143 |         for i in range(0,len(sort_index)):
144 |             if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 10:
145 |                 df.loc[len(df)]=[user,all_songs[sort_index[i][1]],sort_index[i][0],rank]
146 |                 rank = rank+1
147 |         
148 |         #Handle the case where there are no recommendations
149 |         if df.shape[0] == 0:
150 |             print("The current user has no songs for training the item similarity based recommendation model.")
151 |             return -1
152 |         else:
153 |             return df
154 |  
155 |     #Create the item similarity based recommender system model
156 |     def create(self, train_data, user_id, item_id):
157 |         self.train_data = train_data
158 |         self.user_id = user_id
159 |         self.item_id = item_id
160 | 
161 |     #Use the item similarity based recommender system model to
162 |     #make recommendations
163 |     def recommend(self, user):
164 |         
165 |         ########################################
166 |         #A. Get all unique songs for this user
167 |         ########################################
168 |         user_songs = self.get_user_items(user)    
169 |             
170 |         print("No. of unique songs for the user: %d" % len(user_songs))
171 |         
172 |         ######################################################
173 |         #B. Get all unique items (songs) in the training data
174 |         ######################################################
175 |         all_songs = self.get_all_items_train_data()
176 |         
177 |         print("no. of unique songs in the training set: %d" % len(all_songs))
178 |          
179 |         ###############################################
180 |         #C. Construct item cooccurence matrix of size 
181 |         #len(user_songs) X len(songs)
182 |         ###############################################
183 |         cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
184 |         
185 |         #######################################################
186 |         #D. Use the cooccurence matrix to make recommendations
187 |         #######################################################
188 |         df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
189 |                 
190 |         return df_recommendations
191 |     
192 |     #Get similar items to given items
193 |     def get_similar_items(self, item_list):
194 |         
195 |         user_songs = item_list
196 |         
197 |         ######################################################
198 |         #B. Get all unique items (songs) in the training data
199 |         ######################################################
200 |         all_songs = self.get_all_items_train_data()
201 |         
202 |         print("no. of unique songs in the training set: %d" % len(all_songs))
203 |          
204 |         ###############################################
205 |         #C. Construct item cooccurence matrix of size 
206 |         #len(user_songs) X len(songs)
207 |         ###############################################
208 |         cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
209 |         
210 |         #######################################################
211 |         #D. Use the cooccurence matrix to make recommendations
212 |         #######################################################
213 |         user = ""
214 |         df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
215 |          
216 |         return df_recommendations
217 | 


--------------------------------------------------------------------------------