├── Recommenders.py └── 音乐推荐.ipynb /Recommenders.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas 3 | 4 | #Class for Popularity based Recommender System model 5 | class popularity_recommender_py(): 6 | def __init__(self): 7 | self.train_data = None 8 | self.user_id = None 9 | self.item_id = None 10 | self.popularity_recommendations = None 11 | 12 | #Create the popularity based recommender system model 13 | def create(self, train_data, user_id, item_id): 14 | self.train_data = train_data 15 | self.user_id = user_id 16 | self.item_id = item_id 17 | 18 | #Get a count of user_ids for each unique song as recommendation score 19 | train_data_grouped = train_data.groupby([self.item_id]).agg({self.user_id: 'count'}).reset_index() 20 | train_data_grouped.rename(columns = {user_id: 'score'},inplace=True) 21 | 22 | #Sort the songs based upon recommendation score 23 | train_data_sort = train_data_grouped.sort_values(['score', self.item_id], ascending = [0,1]) 24 | 25 | #Generate a recommendation rank based upon score 26 | train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first') 27 | 28 | #Get the top 10 recommendations 29 | self.popularity_recommendations = train_data_sort.head(10) 30 | 31 | #Use the popularity based recommender system model to 32 | #make recommendations 33 | def recommend(self, user_id): 34 | user_recommendations = self.popularity_recommendations 35 | 36 | #Add user_id column for which the recommendations are being generated 37 | user_recommendations['user_id'] = user_id 38 | 39 | #Bring user_id column to the front 40 | cols = user_recommendations.columns.tolist() 41 | cols = cols[-1:] + cols[:-1] 42 | user_recommendations = user_recommendations[cols] 43 | 44 | return user_recommendations 45 | 46 | 47 | #Class for Item similarity based Recommender System model 48 | class item_similarity_recommender_py(): 49 | def __init__(self): 50 | self.train_data = None 51 | self.user_id = None 52 | self.item_id = None 53 | self.cooccurence_matrix = None 54 | self.songs_dict = None 55 | self.rev_songs_dict = None 56 | self.item_similarity_recommendations = None 57 | 58 | #Get unique items (songs) corresponding to a given user 59 | def get_user_items(self, user): 60 | user_data = self.train_data[self.train_data[self.user_id] == user] 61 | user_items = list(user_data[self.item_id].unique()) 62 | 63 | return user_items 64 | 65 | #Get unique users for a given item (song) 66 | def get_item_users(self, item): 67 | item_data = self.train_data[self.train_data[self.item_id] == item] 68 | item_users = set(item_data[self.user_id].unique()) 69 | 70 | return item_users 71 | 72 | #Get unique items (songs) in the training data 73 | def get_all_items_train_data(self): 74 | all_items = list(self.train_data[self.item_id].unique()) 75 | 76 | return all_items 77 | 78 | #Construct cooccurence matrix 79 | def construct_cooccurence_matrix(self, user_songs, all_songs): 80 | 81 | #################################### 82 | #Get users for all songs in user_songs. 83 | #################################### 84 | user_songs_users = [] 85 | for i in range(0, len(user_songs)): 86 | user_songs_users.append(self.get_item_users(user_songs[i])) 87 | 88 | ############################################### 89 | #Initialize the item cooccurence matrix of size 90 | #len(user_songs) X len(songs) 91 | ############################################### 92 | cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float) 93 | 94 | ############################################################# 95 | #Calculate similarity between user songs and all unique songs 96 | #in the training data 97 | ############################################################# 98 | for i in range(0,len(all_songs)): 99 | #Calculate unique listeners (users) of song (item) i 100 | songs_i_data = self.train_data[self.train_data[self.item_id] == all_songs[i]] 101 | users_i = set(songs_i_data[self.user_id].unique()) 102 | 103 | for j in range(0,len(user_songs)): 104 | 105 | #Get unique listeners (users) of song (item) j 106 | users_j = user_songs_users[j] 107 | 108 | #Calculate intersection of listeners of songs i and j 109 | users_intersection = users_i.intersection(users_j) 110 | 111 | #Calculate cooccurence_matrix[i,j] as Jaccard Index 112 | if len(users_intersection) != 0: 113 | #Calculate union of listeners of songs i and j 114 | users_union = users_i.union(users_j) 115 | 116 | cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union)) 117 | else: 118 | cooccurence_matrix[j,i] = 0 119 | 120 | 121 | return cooccurence_matrix 122 | 123 | 124 | #Use the cooccurence matrix to make top recommendations 125 | def generate_top_recommendations(self, user, cooccurence_matrix, all_songs, user_songs): 126 | print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix)) 127 | 128 | #Calculate a weighted average of the scores in cooccurence matrix for all user songs. 129 | user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0]) 130 | user_sim_scores = np.array(user_sim_scores)[0].tolist() 131 | 132 | #Sort the indices of user_sim_scores based upon their value 133 | #Also maintain the corresponding score 134 | sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True) 135 | 136 | #Create a dataframe from the following 137 | columns = ['user_id', 'song', 'score', 'rank'] 138 | #index = np.arange(1) # array of numbers for the number of samples 139 | df = pandas.DataFrame(columns=columns) 140 | 141 | #Fill the dataframe with top 10 item based recommendations 142 | rank = 1 143 | for i in range(0,len(sort_index)): 144 | if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 10: 145 | df.loc[len(df)]=[user,all_songs[sort_index[i][1]],sort_index[i][0],rank] 146 | rank = rank+1 147 | 148 | #Handle the case where there are no recommendations 149 | if df.shape[0] == 0: 150 | print("The current user has no songs for training the item similarity based recommendation model.") 151 | return -1 152 | else: 153 | return df 154 | 155 | #Create the item similarity based recommender system model 156 | def create(self, train_data, user_id, item_id): 157 | self.train_data = train_data 158 | self.user_id = user_id 159 | self.item_id = item_id 160 | 161 | #Use the item similarity based recommender system model to 162 | #make recommendations 163 | def recommend(self, user): 164 | 165 | ######################################## 166 | #A. Get all unique songs for this user 167 | ######################################## 168 | user_songs = self.get_user_items(user) 169 | 170 | print("No. of unique songs for the user: %d" % len(user_songs)) 171 | 172 | ###################################################### 173 | #B. Get all unique items (songs) in the training data 174 | ###################################################### 175 | all_songs = self.get_all_items_train_data() 176 | 177 | print("no. of unique songs in the training set: %d" % len(all_songs)) 178 | 179 | ############################################### 180 | #C. Construct item cooccurence matrix of size 181 | #len(user_songs) X len(songs) 182 | ############################################### 183 | cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs) 184 | 185 | ####################################################### 186 | #D. Use the cooccurence matrix to make recommendations 187 | ####################################################### 188 | df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs) 189 | 190 | return df_recommendations 191 | 192 | #Get similar items to given items 193 | def get_similar_items(self, item_list): 194 | 195 | user_songs = item_list 196 | 197 | ###################################################### 198 | #B. Get all unique items (songs) in the training data 199 | ###################################################### 200 | all_songs = self.get_all_items_train_data() 201 | 202 | print("no. of unique songs in the training set: %d" % len(all_songs)) 203 | 204 | ############################################### 205 | #C. Construct item cooccurence matrix of size 206 | #len(user_songs) X len(songs) 207 | ############################################### 208 | cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs) 209 | 210 | ####################################################### 211 | #D. Use the cooccurence matrix to make recommendations 212 | ####################################################### 213 | user = "" 214 | df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs) 215 | 216 | return df_recommendations 217 | --------------------------------------------------------------------------------