├── collaborative_filtering.py ├── recommendation_data.py └── recommendation_data.pyc /collaborative_filtering.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Implementation of collaborative filtering recommendation engine 3 | 4 | from recommendation_data import dataset 5 | from math import sqrt 6 | 7 | def similarity_score(person1,person2): 8 | 9 | # Returns ratio Euclidean distance score of person1 and person2 10 | 11 | both_viewed = {} # To get both rated items by person1 and person2 12 | 13 | for item in dataset[person1]: 14 | if item in dataset[person2]: 15 | both_viewed[item] = 1 16 | 17 | # Conditions to check they both have an common rating items 18 | if len(both_viewed) == 0: 19 | return 0 20 | 21 | # Finding Euclidean distance 22 | sum_of_eclidean_distance = [] 23 | 24 | for item in dataset[person1]: 25 | if item in dataset[person2]: 26 | sum_of_eclidean_distance.append(pow(dataset[person1][item] - dataset[person2][item],2)) 27 | sum_of_eclidean_distance = sum(sum_of_eclidean_distance) 28 | 29 | return 1/(1+sqrt(sum_of_eclidean_distance)) 30 | 31 | def pearson_correlation(person1,person2): 32 | 33 | # To get both rated items 34 | both_rated = {} 35 | for item in dataset[person1]: 36 | if item in dataset[person2]: 37 | both_rated[item] = 1 38 | 39 | number_of_ratings = len(both_rated) 40 | 41 | # Checking for number of ratings in common 42 | if number_of_ratings == 0: 43 | return 0 44 | 45 | # Add up all the preferences of each user 46 | person1_preferences_sum = sum([dataset[person1][item] for item in both_rated]) 47 | person2_preferences_sum = sum([dataset[person2][item] for item in both_rated]) 48 | 49 | # Sum up the squares of preferences of each user 50 | person1_square_preferences_sum = sum([pow(dataset[person1][item],2) for item in both_rated]) 51 | person2_square_preferences_sum = sum([pow(dataset[person2][item],2) for item in both_rated]) 52 | 53 | # Sum up the product value of both preferences for each item 54 | product_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated]) 55 | 56 | # Calculate the pearson score 57 | numerator_value = product_sum_of_both_users - (person1_preferences_sum*person2_preferences_sum/number_of_ratings) 58 | denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum,2)/number_of_ratings) * (person2_square_preferences_sum -pow(person2_preferences_sum,2)/number_of_ratings)) 59 | if denominator_value == 0: 60 | return 0 61 | else: 62 | r = numerator_value/denominator_value 63 | return r 64 | 65 | def most_similar_users(person,number_of_users): 66 | # returns the number_of_users (similar persons) for a given specific person. 67 | scores = [(pearson_correlation(person,other_person),other_person) for other_person in dataset if other_person != person ] 68 | 69 | # Sort the similar persons so that highest scores person will appear at the first 70 | scores.sort() 71 | scores.reverse() 72 | return scores[0:number_of_users] 73 | 74 | def user_reommendations(person): 75 | 76 | # Gets recommendations for a person by using a weighted average of every other user's rankings 77 | totals = {} 78 | simSums = {} 79 | rankings_list =[] 80 | for other in dataset: 81 | # don't compare me to myself 82 | if other == person: 83 | continue 84 | sim = pearson_correlation(person,other) 85 | 86 | # ignore scores of zero or lower 87 | if sim == 0: 88 | continue 89 | for item in dataset[other]: 90 | 91 | # only score movies i haven't seen yet 92 | if item not in dataset[person] or dataset[person][item] == 0: 93 | 94 | # Similrity * score 95 | totals.setdefault(item,0) 96 | totals[item] += dataset[other][item]* sim 97 | # sum of similarities 98 | simSums.setdefault(item,0) 99 | simSums[item]+= sim 100 | 101 | # Create the normalized list 102 | 103 | rankings = [(total/simSums[item],item) for item,total in totals.items()] 104 | rankings.sort() 105 | rankings.reverse() 106 | # returns the recommended items 107 | recommendataions_list = [recommend_item for score,recommend_item in rankings] 108 | return recommendataions_list 109 | print 110 | print "Lisa Rose rating on Lady in the water score: {}".format(dataset['Lisa Rose']['Lady in the Water']) 111 | print "Michael Phillips rating on Lady in the water score: {}".format(dataset['Michael Phillips']['Lady in the Water']) 112 | print 113 | print '**************Jack Matthews ratings**************' 114 | print dataset['Jack Matthews'] 115 | print 116 | print '************** Smililarity Score Euclidean Distance Socre Lisa and Jack **************' 117 | print similarity_score('Lisa Rose','Jack Matthews') 118 | print 119 | print '************** Lisa : Who is most similar Users **************' 120 | print most_similar_users('Lisa Rose',3) 121 | print 122 | print "************** Recommendations for Toby **************" 123 | print 'Toby :' 124 | print user_reommendations('Toby') 125 | -------------------------------------------------------------------------------- /recommendation_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Collabrative Filtering data set taken from Collective Intelligence book. 4 | 5 | dataset={ 6 | 'Lisa Rose': { 7 | 'Lady in the Water': 2.5, 8 | 'Snakes on a Plane': 3.5, 9 | 'Just My Luck': 3.0, 10 | 'Superman Returns': 3.5, 11 | 'You, Me and Dupree': 2.5, 12 | 'The Night Listener': 3.0}, 13 | 'Gene Seymour': {'Lady in the Water': 3.0, 14 | 'Snakes on a Plane': 3.5, 15 | 'Just My Luck': 1.5, 16 | 'Superman Returns': 5.0, 17 | 'The Night Listener': 3.0, 18 | 'You, Me and Dupree': 3.5}, 19 | 20 | 'Michael Phillips': {'Lady in the Water': 2.5, 21 | 'Snakes on a Plane': 3.0, 22 | 'Superman Returns': 3.5, 23 | 'The Night Listener': 4.0}, 24 | 'Claudia Puig': {'Snakes on a Plane': 3.5, 25 | 'Just My Luck': 3.0, 26 | 'The Night Listener': 4.5, 27 | 'Superman Returns': 4.0, 28 | 'You, Me and Dupree': 2.5}, 29 | 30 | 'Mick LaSalle': {'Lady in the Water': 3.0, 31 | 'Snakes on a Plane': 4.0, 32 | 'Just My Luck': 2.0, 33 | 'Superman Returns': 3.0, 34 | 'The Night Listener': 3.0, 35 | 'You, Me and Dupree': 2.0}, 36 | 37 | 'Jack Matthews': {'Lady in the Water': 3.0, 38 | 'Snakes on a Plane': 4.0, 39 | 'The Night Listener': 3.0, 40 | 'Superman Returns': 5.0, 41 | 'You, Me and Dupree': 3.5}, 42 | 43 | 'Toby': {'Snakes on a Plane':4.5, 44 | 'You, Me and Dupree':1.0, 45 | 'Superman Returns':4.0}} -------------------------------------------------------------------------------- /recommendation_data.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ddps-lab/instacart/4ae445be340731e1f56cfd958685429c608638de/recommendation_data.pyc --------------------------------------------------------------------------------