├── index.js ├── .gitignore ├── .travis.yml ├── lib ├── client.js ├── config.js ├── raccoon.js ├── key.js ├── input.js ├── stat.js └── algorithms.js ├── LICENSE ├── package.json ├── Gruntfile.js ├── test └── testRaccoon.js └── README.md /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./lib/raccoon'); 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .npmignore 3 | node_modules 4 | dumb.rdb 5 | npm-debug.log 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '6' 4 | script: grunt travis 5 | services: 6 | - redis-server 7 | -------------------------------------------------------------------------------- /lib/client.js: -------------------------------------------------------------------------------- 1 | 2 | const redis = require('redis'), 3 | config = require('./config'), 4 | bluebird = require('bluebird'); 5 | 6 | bluebird.promisifyAll(redis.RedisClient.prototype); 7 | 8 | client = redis.createClient(config.redisPort, config.redisUrl); 9 | if (config.redisAuth){ 10 | client.auth(config.redisAuth, function (err) { 11 | if (err) { throw err; } 12 | }); 13 | } 14 | 15 | module.exports = exports = client; 16 | -------------------------------------------------------------------------------- /lib/config.js: -------------------------------------------------------------------------------- 1 | 2 | class Config { 3 | constructor(args) { 4 | this.nearestNeighbors = 5; 5 | this.className = 'movie'; 6 | this.numOfRecsStore = 30; 7 | this.factorLeastSimilarLeastLiked = false; 8 | this.redisUrl = process.env.RACCOON_REDIS_URL || '127.0.0.1'; 9 | this.redisPort = process.env.RACCOON_REDIS_PORT || 6379; 10 | this.redisAuth = process.env.RACCOON_REDIS_AUTH || ''; 11 | } 12 | } 13 | 14 | module.exports = exports = new Config(); 15 | -------------------------------------------------------------------------------- /lib/raccoon.js: -------------------------------------------------------------------------------- 1 | 2 | const config = require('./config.js'), 3 | algo = require('./algorithms.js'), 4 | input = require('./input.js'), 5 | stat = require('./stat.js'); 6 | 7 | class Raccoon { 8 | constructor(args) { 9 | } 10 | } 11 | 12 | const inputProtoMethods = { 13 | liked, 14 | disliked, 15 | unliked, 16 | undisliked, 17 | updateSequence 18 | } = input; 19 | 20 | const statProtoMethods = { 21 | recommendFor, 22 | bestRated, 23 | worstRated, 24 | bestRatedWithScores, 25 | mostLiked, 26 | mostDisliked, 27 | usersWhoLikedAlsoLiked, 28 | mostSimilarUsers, 29 | leastSimilarUsers, 30 | likedBy, 31 | likedCount, 32 | dislikedBy, 33 | dislikedCount, 34 | allLikedFor, 35 | allDislikedFor, 36 | allWatchedFor 37 | } = stat; 38 | 39 | const recProtoMethods = { 40 | predictFor: algo.predictFor 41 | } = algo; 42 | 43 | Raccoon.prototype = Object.assign(Raccoon.prototype, { config, stat }, 44 | inputProtoMethods, statProtoMethods, recProtoMethods); 45 | 46 | module.exports = exports = new Raccoon(); 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2013 Guy Morita 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "raccoon", 3 | "author": "Guy Morita", 4 | "description": "A Collaborative Filtering Recommendation Engine for Node.js utilizing Redis", 5 | "version": "0.2.8", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/guymorita/recommendationRaccoon.git" 9 | }, 10 | "dependencies": { 11 | "async": "~2.1.x", 12 | "bluebird": "~3.4.x", 13 | "redis": "~2.6.x", 14 | "underscore": "~1.8.x" 15 | }, 16 | "main": "./index.js", 17 | "engines": { 18 | "node": ">= 6.0.0" 19 | }, 20 | "licenses": [ 21 | { 22 | "type": "MIT", 23 | "url": "https://github.com/guymorita/recommendationRaccoon/LICENSE" 24 | } 25 | ], 26 | "keywords": [ 27 | "recommend", 28 | "recommended", 29 | "recommendation", 30 | "engine", 31 | "collaborative", 32 | "filtering", 33 | "middleware", 34 | "redis" 35 | ], 36 | "license": "MIT", 37 | "readmeFilename": "README.md", 38 | "bugs": { 39 | "url": "https://github.com/guymorita/recommendationRaccoon/issues" 40 | }, 41 | "devDependencies": { 42 | "chai": "~3.5.0", 43 | "grunt": "^1.0.1", 44 | "grunt-contrib-jshint": "^1.1.0", 45 | "grunt-contrib-watch": "^1.0.0", 46 | "grunt-mocha-cov": "^0.4.0", 47 | "mocha": "~3.2.0" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /lib/key.js: -------------------------------------------------------------------------------- 1 | 2 | const config = require('./config.js'); 3 | 4 | const USER = 'user', 5 | ITEM = 'item'; 6 | 7 | class Key { 8 | constructor() { 9 | this.key = ''; 10 | this.keyArr = []; 11 | } 12 | 13 | joinKey() { 14 | this.key = [config.className].concat(this.keyArr).join(':'); 15 | return this.key; 16 | } 17 | 18 | userLikedSet(userId) { 19 | this.keyArr = [USER, userId, 'liked']; 20 | return this.joinKey(); 21 | } 22 | 23 | userDislikedSet(userId) { 24 | this.keyArr = [USER, userId, 'disliked']; 25 | return this.joinKey(); 26 | } 27 | 28 | itemLikedBySet(itemId) { 29 | this.keyArr = [ITEM, itemId, 'liked']; 30 | return this.joinKey(); 31 | } 32 | 33 | itemDislikedBySet(itemId) { 34 | this.keyArr = [ITEM, itemId, 'disliked']; 35 | return this.joinKey(); 36 | } 37 | 38 | mostLiked() { 39 | this.keyArr = ['mostLiked']; 40 | return this.joinKey(); 41 | } 42 | 43 | mostDisliked() { 44 | this.keyArr = ['mostDisliked']; 45 | return this.joinKey(); 46 | } 47 | 48 | recommendedZSet(userId) { 49 | this.keyArr = [USER, userId, 'recommendedZSet']; 50 | return this.joinKey(); 51 | } 52 | 53 | scoreboardZSet() { 54 | this.keyArr = ['scoreboard']; 55 | return this.joinKey(); 56 | } 57 | 58 | similarityZSet(userId) { 59 | this.keyArr = [USER, userId, 'similarityZSet']; 60 | return this.joinKey(); 61 | } 62 | 63 | tempAllLikedSet(userId) { 64 | this.keyArr = [USER, userId, 'tempAllLikedSet']; 65 | return this.joinKey(); 66 | } 67 | } 68 | 69 | module.exports = exports = new Key(); 70 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | 2 | var _ = require('underscore'); 3 | 4 | module.exports = function(grunt) { 5 | 'use strict'; 6 | 7 | var files = { 8 | src: 'lib/**/*.js', 9 | tests: 'test/**/*.js' 10 | }; 11 | 12 | // Project configuration. 13 | grunt.initConfig({ 14 | pkg: grunt.file.readJSON('package.json'), 15 | 16 | jshint: { 17 | all: [ 18 | 'Gruntfile.js', 19 | files.src, 20 | files.tests 21 | ], 22 | options: { 23 | esversion: 6, 24 | moz: true 25 | } 26 | }, 27 | mochacov: { 28 | test: { 29 | src: [files.tests] 30 | }, // Run with the spec testrunner 31 | coverage: { 32 | src: [files.tests], 33 | options: { 34 | coveralls: { 35 | serviceName: 'travis-ci', 36 | repoToken: process.env.COVERALLS_REPO_TOKEN 37 | } 38 | } 39 | }, 40 | options: { 41 | reporter: 'spec', 42 | ignoreLeaks: false, 43 | files: [files.tests] 44 | } 45 | }, 46 | watch: { 47 | tests: { 48 | files: _.toArray(files), 49 | tasks: ['test'] 50 | } 51 | } 52 | }); 53 | 54 | // On watch events, configure jshint:all to run only on changed file 55 | grunt.event.on('watch', function(action, filepath) { 56 | grunt.config(['jshint', 'all'], filepath); 57 | }); 58 | 59 | // Load third-party modules 60 | grunt.loadNpmTasks('grunt-contrib-jshint'); 61 | grunt.loadNpmTasks('grunt-contrib-watch'); 62 | grunt.loadNpmTasks('grunt-mocha-cov'); 63 | 64 | // Tasks 65 | // grunt.registerTask('travis', [ 'jshint', 'mochacov:test', 'mochacov:coverage' ]); 66 | grunt.registerTask('travis', [ 'jshint:all', 'mochacov:test' ]); 67 | 68 | grunt.registerTask('test', ['jshint:all', 'mochacov:test']); 69 | 70 | // Default task (runs when running `grunt` without arguments) 71 | grunt.registerTask('default', ['test']); 72 | }; 73 | -------------------------------------------------------------------------------- /lib/input.js: -------------------------------------------------------------------------------- 1 | 2 | const config = require('./config.js'), 3 | algo = require('./algorithms.js'), 4 | async = require('async'), 5 | Key = require('./key'); 6 | 7 | const updateSequence = function(userId, itemId, options = {}){ 8 | let updateWilson = true; 9 | if ('updateWilson' in options) { 10 | updateWilson = options.updateWilson ? true : false; 11 | } 12 | 13 | return new Promise((resolve, reject) => { 14 | algo.updateSimilarityFor(userId, function(){ 15 | async.parallel([ 16 | function(cb){ 17 | algo.updateWilsonScore(itemId, function(){ 18 | cb(null); 19 | }); 20 | }, 21 | function(cb){ 22 | algo.updateRecommendationsFor(userId, function(){ 23 | cb(null); 24 | }); 25 | } 26 | ], 27 | function(err){ 28 | if (err){console.log('error', err);} 29 | resolve(); 30 | }); 31 | }); 32 | }); 33 | }; 34 | 35 | const changeRating = function(userId, itemId, options){ 36 | let updateRecommendations = true; 37 | if ('updateRecs' in options) { 38 | updateRecommendations = options.updateRecs ? true : false; 39 | } 40 | 41 | const removeRating = options.removeRating ? true : false; 42 | 43 | const feelingItemSet = options.liked ? Key.itemLikedBySet(itemId) : Key.itemDislikedBySet(itemId); 44 | const feelingUserSet = options.liked ? Key.userLikedSet(userId) : Key.userDislikedSet(userId); 45 | const mostFeelingSet = options.liked ? Key.mostLiked() : Key.mostDisliked(); 46 | 47 | return new Promise((resolve, reject) => { 48 | Promise.resolve().then(() => { 49 | // check if the rating is already stored 50 | return client.sismemberAsync(feelingItemSet, userId); 51 | }).then((result) => { 52 | // only increment the most feeling set if it doesn't already exist 53 | if (result === 0 && !removeRating) { 54 | client.zincrby(mostFeelingSet, 1, itemId); 55 | } else if (result > 0 && removeRating) { 56 | client.zincrby(mostFeelingSet, -1, itemId); 57 | } 58 | return removeRating ? client.sremAsync(feelingUserSet, itemId) : 59 | client.saddAsync(feelingUserSet, itemId); 60 | }).then(() => { 61 | return removeRating ? client.sremAsync(feelingItemSet, userId) : 62 | client.saddAsync(feelingItemSet, userId); 63 | }).then(() => { 64 | return client.sismemberAsync(feelingItemSet, userId); 65 | }).then((result) => { 66 | // only fire update sequence if requested by the user 67 | // and there are results to compare 68 | if (updateRecommendations && result > 0) { 69 | updateSequence(userId, itemId).then(() => { 70 | resolve(); 71 | }); 72 | } else { 73 | resolve(); 74 | } 75 | }); 76 | }); 77 | }; 78 | 79 | const liked = function(userId, itemId, options = {}){ 80 | options.liked = true; 81 | return changeRating(userId, itemId, options); 82 | }; 83 | 84 | const disliked = function(userId, itemId, options = {}){ 85 | options.liked = false; 86 | return changeRating(userId, itemId, options); 87 | }; 88 | 89 | const unliked = function(userId, itemId, options = {}){ 90 | options.liked = true; 91 | options.removeRating = true; 92 | return changeRating(userId, itemId, options); 93 | }; 94 | 95 | const undisliked = function(userId, itemId, options = {}){ 96 | options.liked = false; 97 | options.removeRating = true; 98 | return changeRating(userId, itemId, options); 99 | }; 100 | 101 | const input = { 102 | liked, 103 | disliked, 104 | unliked, 105 | undisliked, 106 | updateSequence 107 | }; 108 | 109 | module.exports = input; 110 | -------------------------------------------------------------------------------- /lib/stat.js: -------------------------------------------------------------------------------- 1 | 2 | const config = require('./config.js'), 3 | client = require('./client.js'), 4 | Key = require('./key'); 5 | 6 | const stat = { 7 | recommendFor: function(userId, numberOfRecs){ 8 | return new Promise((resolve, reject) => { 9 | client.zrevrangeAsync(Key.recommendedZSet(userId), 0, numberOfRecs).then((results) => { 10 | resolve(results); 11 | }); 12 | }); 13 | }, 14 | bestRated: function(){ 15 | return new Promise((resolve, reject) => { 16 | client.zrevrangeAsync(Key.scoreboardZSet(), 0, -1).then((results) => { 17 | resolve(results); 18 | }); 19 | }); 20 | }, 21 | worstRated: function(){ 22 | return new Promise((resolve, reject) => { 23 | client.zrangeAsync(Key.scoreboardZSet(), 0, -1).then((results) => { 24 | resolve(results); 25 | }); 26 | }); 27 | }, 28 | bestRatedWithScores: function(numOfRatings){ 29 | return new Promise((resolve, reject) => { 30 | client.zrevrangeAsync(Key.scoreboardZSet(), 0, numOfRatings, 'withscores').then((results) => { 31 | resolve(results); 32 | }); 33 | }); 34 | }, 35 | mostLiked: function(){ 36 | return new Promise((resolve, reject) => { 37 | client.zrevrangeAsync(Key.mostLiked(), 0, -1).then((results) => { 38 | resolve(results); 39 | }); 40 | }); 41 | }, 42 | mostDisliked: function(){ 43 | return new Promise((resolve, reject) => { 44 | client.zrevrangeAsync(Key.mostDisliked(), 0, -1).then((results) => { 45 | resolve(results); 46 | }); 47 | }); 48 | }, 49 | usersWhoLikedAlsoLiked: function(itemId){ 50 | }, 51 | mostSimilarUsers: function(userId){ 52 | return new Promise((resolve, reject) => { 53 | client.zrevrangeAsync(Key.similarityZSet(userId), 0, -1).then((results) => { 54 | resolve(results); 55 | }); 56 | }); 57 | }, 58 | leastSimilarUsers: function(userId){ 59 | return new Promise((resolve, reject) => { 60 | client.zrangeAsync(Key.similarityZSet(userId), 0, -1).then((results) => { 61 | resolve(results); 62 | }); 63 | }); 64 | }, 65 | likedBy: function(itemId){ 66 | return new Promise((resolve, reject) => { 67 | client.smembersAsync(Key.itemLikedBySet(itemId)).then((results) => { 68 | resolve(results); 69 | }); 70 | }); 71 | }, 72 | likedCount: function(itemId){ 73 | return new Promise((resolve, reject) => { 74 | client.scardAsync(Key.itemLikedBySet(itemId)).then((results) => { 75 | resolve(results); 76 | }); 77 | }); 78 | }, 79 | dislikedBy: function(itemId){ 80 | return new Promise((resolve, reject) => { 81 | client.smembersAsync(Key.itemDislikedBySet(itemId)).then((results) => { 82 | resolve(results); 83 | }); 84 | }); 85 | }, 86 | dislikedCount: function(itemId){ 87 | return new Promise((resolve, reject) => { 88 | client.scardAsync(Key.itemDislikedBySet(itemId)).then((results) => { 89 | resolve(results); 90 | }); 91 | }); 92 | }, 93 | allLikedFor: function(userId){ 94 | return new Promise((resolve, reject) => { 95 | client.smembersAsync(Key.userLikedSet(userId)).then((results) => { 96 | resolve(results); 97 | }); 98 | }); 99 | }, 100 | allDislikedFor: function(userId){ 101 | return new Promise((resolve, reject) => { 102 | client.smembersAsync(Key.userDislikedSet(userId)).then((results) => { 103 | resolve(results); 104 | }); 105 | }); 106 | }, 107 | allWatchedFor: function(userId){ 108 | return new Promise((resolve, reject) => { 109 | client.sunionAsync(Key.userLikedSet(userId), Key.userDislikedSet(userId)).then((results) => { 110 | resolve(results); 111 | }); 112 | }); 113 | } 114 | }; 115 | 116 | module.exports = exports = stat; 117 | -------------------------------------------------------------------------------- /test/testRaccoon.js: -------------------------------------------------------------------------------- 1 | /*jshint expr:true*/ 2 | var chai = require('chai'); 3 | var assert = chai.assert; 4 | var expect = chai.expect; 5 | 6 | // var blanket = require("blanket")({ 7 | // // options are passed as an argument object to the require statement 8 | // "pattern": "../lib/" 9 | // }); 10 | 11 | const config = require('../lib/config.js'), 12 | raccoon = require('../lib/raccoon.js'); 13 | 14 | describe('basic likes, dislikes, unlikes, and undislikes', function(){ 15 | beforeEach(function(done){ 16 | client.flushdbAsync().then(() => { 17 | return raccoon.liked('chris', 'batman'); 18 | }).then(() => { 19 | return raccoon.liked('larry', 'batman'); 20 | }).then(() => { 21 | return raccoon.disliked('greg', 'batman'); 22 | }).then(() => { 23 | return raccoon.liked('mai', 'superman'); 24 | }).then(() => { 25 | return raccoon.unliked('mai', 'superman'); 26 | }).then(() => { 27 | return raccoon.disliked('jesse', 'superman'); 28 | }).then(() => { 29 | return raccoon.undisliked('jesse', 'superman'); 30 | }).then(() => { 31 | done(); 32 | }); 33 | }); 34 | describe('basic like', function(){ 35 | it('should validate a user has been added after a rating', function(done){ 36 | client.smembersAsync('movie:user:chris:liked').then((results) => { 37 | assert.equal(results[0],'batman'); 38 | done(); 39 | }); 40 | }); 41 | }); 42 | describe('basic dislike', function(){ 43 | it('should validate a user has been added after a rating', function(done){ 44 | client.smembersAsync('movie:user:greg:disliked').then((results) => { 45 | assert.equal(results[0],'batman'); 46 | done(); 47 | }); 48 | }); 49 | }); 50 | describe('basic unlike', function(){ 51 | it('should validate a user has been removed after an unlike', function(done){ 52 | client.smembersAsync('movie:user:mai:liked').then((results) => { 53 | assert.equal(results[0],undefined); 54 | done(); 55 | }); 56 | }); 57 | }); 58 | describe('basic undislike', function(){ 59 | it('should validate a user has been removed after an undislike', function(done){ 60 | client.smembersAsync('movie:user:jesse:disliked').then((results) => { 61 | assert.equal(results[0],undefined); 62 | done(); 63 | }); 64 | }); 65 | }); 66 | }); 67 | 68 | describe('callbacks', function(){ 69 | it('should fire the input callback after a like is added', function(done){ 70 | raccoon.liked('hao', 'superman').then(() => { 71 | done(); 72 | }); 73 | }); 74 | it('should fire the input callback after a disliked is added', function(done){ 75 | raccoon.liked('hao', 'superman').then(() => { 76 | done(); 77 | }); 78 | }); 79 | }); 80 | 81 | describe('accurate recommendations', function(){ 82 | before(function(done){ 83 | client.flushdbAsync().then(() => { 84 | return raccoon.liked('ChristianB', 'Typical'); 85 | }).then(() => { 86 | return raccoon.liked('ChristianB', 'Value7'); 87 | }).then(() => { 88 | return raccoon.liked('malbery', 'Typical'); 89 | }).then(() => { 90 | return raccoon.liked('malbery', 'Value1'); 91 | }).then(() => { 92 | return raccoon.liked('malbery', 'Value2'); 93 | }).then(() => { 94 | return raccoon.liked('malbery', 'Value3'); 95 | }).then(() => { 96 | return raccoon.liked('malbery', 'Value4'); 97 | }).then(() => { 98 | return raccoon.liked('malbery', 'Value5'); 99 | }).then(() => { 100 | return raccoon.liked('malbery', 'Value6'); 101 | }).then(() => { 102 | return raccoon.liked('malbery', 'Value7'); 103 | }).then(() => { 104 | done(); 105 | }); 106 | }); 107 | it('should not have recommendations for malbery', function(done){ 108 | raccoon.recommendFor('malbery', 5).then((recs) => { 109 | assert.equal(recs[0], undefined); 110 | done(); 111 | }); 112 | }); 113 | }); 114 | 115 | describe('recommendations', function(){ 116 | before(function(done){ 117 | client.flushdbAsync().then(() => { 118 | return raccoon.liked('chris', 'batman'); 119 | }).then(() => { 120 | return raccoon.liked('chris', 'superman'); 121 | }).then(() => { 122 | return raccoon.disliked('chris', 'chipmunks'); 123 | }).then(() => { 124 | return raccoon.liked('max', 'batman'); 125 | }).then(() => { 126 | return raccoon.disliked('max', 'chipmunks'); 127 | }).then(() => { 128 | return raccoon.liked('greg', 'batman'); 129 | }).then(() => { 130 | return raccoon.liked('greg', 'superman'); 131 | }).then(() => { 132 | return raccoon.liked('larry', 'batman'); 133 | }).then(() => { 134 | return raccoon.liked('larry', 'iceage'); 135 | }).then(() => { 136 | return raccoon.disliked('tuhin', 'batman'); 137 | }).then(() => { 138 | return raccoon.disliked('tuhin', 'superman'); 139 | }).then(() => { 140 | return raccoon.disliked('tuhin', 'chipmunks'); 141 | }).then(() => { 142 | return raccoon.disliked('kristina', 'batman'); 143 | }).then(() => { 144 | return raccoon.disliked('kristina', 'superman'); 145 | }).then(() => { 146 | return raccoon.disliked('andre', 'superman'); 147 | }).then(() => { 148 | return raccoon.disliked('andre', 'chipmunks'); 149 | }).then(() => { 150 | return raccoon.disliked('guy', 'superman', { updateRecs: false }); 151 | }).then(() => { 152 | done(); 153 | }); 154 | }); 155 | it('should recommend a movie if a similar user liked it', function(done){ 156 | raccoon.recommendFor('andre', 5).then((recs) => { 157 | assert.equal(recs[0], 'batman'); 158 | done(); 159 | }); 160 | }); 161 | it('should not recommend a movie if updateRecs was false', function(done){ 162 | raccoon.recommendFor('guy', 5).then((recs) => { 163 | assert.equal(recs[0], undefined); 164 | done(); 165 | }); 166 | }); 167 | // it('should not recommend a movie that people opposite liked', function(done){ 168 | // raccoon.recommendFor('andre', 5, function(recs){ 169 | // assert.notEqualequal(recs[0], 'chipmunks'); 170 | // done(); 171 | // }); 172 | // }); 173 | }); 174 | 175 | describe('stats1', function(){ 176 | before(function(done){ 177 | client.flushdbAsync().then(() => { 178 | return raccoon.liked('chris', 'batman'); 179 | }).then(() => { 180 | return raccoon.liked('chris', 'superman'); 181 | }).then(() => { 182 | return raccoon.disliked('chris', 'chipmunks'); 183 | }).then(() => { 184 | return raccoon.liked('max', 'batman'); 185 | }).then(() => { 186 | return raccoon.disliked('max', 'chipmunks'); 187 | }).then(() => { 188 | return raccoon.liked('greg', 'batman'); 189 | }).then(() => { 190 | return raccoon.liked('greg', 'superman'); 191 | }).then(() => { 192 | return raccoon.liked('larry', 'batman'); 193 | }).then(() => { 194 | return raccoon.liked('larry', 'iceage'); 195 | }).then(() => { 196 | return raccoon.disliked('tuhin', 'batman'); 197 | }).then(() => { 198 | return raccoon.disliked('tuhin', 'superman'); 199 | }).then(() => { 200 | return raccoon.disliked('tuhin', 'chipmunks'); 201 | }).then(() => { 202 | for (var i = 0; i < 25; i++){ 203 | raccoon.liked('user'+i, 'batman'); 204 | } 205 | done(); 206 | }); 207 | }); 208 | it('should have batman as the bestRated even though iceage has only likes', function(done){ 209 | raccoon.bestRated().then((bestRated) => { 210 | assert.equal(bestRated[0], 'batman'); 211 | done(); 212 | }); 213 | }); 214 | it('should have chipmunks as the worst rated', function(done){ 215 | raccoon.worstRated().then((worstRated) => { 216 | assert.equal(worstRated[0], 'chipmunks'); 217 | done(); 218 | }); 219 | }); 220 | it('should have batman as the most liked and superman as second', function(done){ 221 | raccoon.mostLiked().then((mostLiked) => { 222 | assert.equal(mostLiked[0], 'batman'); 223 | assert.equal(mostLiked[1], 'superman'); 224 | done(); 225 | }); 226 | }); 227 | it('should have chipmunks as the most disliked', function(done){ 228 | raccoon.mostDisliked().then((mostDisliked) => { 229 | assert.equal(mostDisliked[0], 'chipmunks'); 230 | done(); 231 | }); 232 | }); 233 | it('should have an accurate list of users who liked an item', function(done){ 234 | raccoon.likedBy('superman').then((listOfUsers) => { 235 | assert.include(listOfUsers, 'chris'); 236 | assert.include(listOfUsers, 'greg'); 237 | done(); 238 | }); 239 | }); 240 | it('should have an accurate number of users who liked an item', function(done){ 241 | raccoon.likedCount('batman').then((numUsers) => { 242 | assert.equal(numUsers, 29); 243 | done(); 244 | }); 245 | }); 246 | it('should have an accurate list of users who disliked an item', function(done){ 247 | raccoon.dislikedBy('chipmunks').then((listOfUsers) => { 248 | expect(listOfUsers).to.include('chris'); 249 | expect(listOfUsers).to.include('max'); 250 | expect(listOfUsers).to.include('tuhin'); 251 | done(); 252 | }); 253 | }); 254 | it('should have an accurate number of users who disliked an item', function(done){ 255 | raccoon.dislikedCount('superman').then((numUsers) => { 256 | assert.equal(numUsers, 1); 257 | done(); 258 | }); 259 | }); 260 | it('should list all a users likes', function(done){ 261 | raccoon.allLikedFor('greg').then((itemList) => { 262 | expect(itemList).to.include('batman'); 263 | expect(itemList).to.include('superman'); 264 | done(); 265 | }); 266 | }); 267 | it('should list all a users dislikes', function(done){ 268 | raccoon.allDislikedFor('tuhin').then((itemList) => { 269 | expect(itemList).to.include('batman'); 270 | expect(itemList).to.include('superman'); 271 | expect(itemList).to.include('chipmunks'); 272 | done(); 273 | }); 274 | }); 275 | it('should list all a users rated items', function(done){ 276 | raccoon.allWatchedFor('max').then((itemList) => { 277 | expect(itemList).to.include('batman'); 278 | expect(itemList).to.include('chipmunks'); 279 | done(); 280 | }); 281 | }); 282 | it('should not have similar users before updating', function(done){ 283 | raccoon.mostSimilarUsers('chris').then((similarUsers) => { 284 | assert.equal(similarUsers[0], undefined); 285 | done(); 286 | }); 287 | }); 288 | it('should not have dissimilar users before updating', function(done){ 289 | raccoon.leastSimilarUsers('chris').then((leastSimilarUsers) => { 290 | assert.equal(leastSimilarUsers[0], undefined); 291 | done(); 292 | }); 293 | }); 294 | }); 295 | 296 | // describe('db connections', function(){ 297 | // it('should connect to a remove db successfully', function(done){ 298 | // client.flushdb(); 299 | // client.end(); 300 | // client.quit(); 301 | // config.localSetup = false; 302 | // config.remoteRedisPort = 6379; 303 | // config.remoteRedisURL = '127.0.0.1'; 304 | // config.remoteRedisAuth = 1111; 305 | // raccoon.liked('chris', 'batman', function(){ 306 | // raccoon.allLikedFor('chris', function(itemList){ 307 | // expect(itemList).to.include('batman'); 308 | // client.flushdb(); 309 | // client.end(); 310 | // config.localSetup = true; 311 | // done(); 312 | // }); 313 | // }); 314 | // }); 315 | // }); 316 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # recommendationRaccoon (raccoon) 2 | 3 | 4 | 5 | An easy-to-use collaborative filtering based recommendation engine and NPM module built on top of Node.js and Redis. The engine uses the Jaccard coefficient to determine the similarity between users and k-nearest-neighbors to create recommendations. This module is useful for anyone with users, a store of products/movies/items, and the desire to give their users the ability to like/dislike and receive recommendations based on similar users. Raccoon takes care of all the recommendation and rating logic. It can be paired with any database as it does not keep track of any user/item information besides a unique ID. 6 | 7 | Updated for ES6. 8 | 9 | #### Request of you 10 | If you enjoy using this module, please contribute by trying the benchmark repo and helping to optimize raccoon. Thanks! https://github.com/guymorita/benchmark_raccoon_movielens 11 | 12 | [![Coverage Status](https://coveralls.io/repos/guymorita/recommendationRaccoon/badge.png?branch=master)](https://coveralls.io/r/guymorita/recommendationRaccoon?branch=master) 13 | [![Build Status](https://travis-ci.org/guymorita/recommendationRaccoon.svg?branch=master)](https://travis-ci.org/guymorita/recommendationRaccoon) 14 | 15 | 16 | 17 | 18 | ## Demo App 19 | 20 | #### Benchmark / Performance Repo https://github.com/guymorita/benchmark_raccoon_movielens 21 | 22 | #### Demo / UI Repo: https://github.com/guymorita/Mosaic-Films---Recommendation-Engine-Demo 23 | 24 | ## Requirements 25 | 26 | * Node.js 6.x 27 | * Redis 28 | * Async 29 | * Underscore 30 | * Bluebird 31 | 32 | ## Installation 33 | 34 | ``` bash 35 | npm install raccoon 36 | ``` 37 | 38 | ## Quickstart 39 | 40 | Raccoon keeps track of the ratings and recommendations from your users. It does not need to store any meta data of the user or product aside from an id. To get started: 41 | 42 | #### Install Raccoon: 43 | ``` bash 44 | npm install raccoon 45 | ``` 46 | 47 | #### Setup Redis: 48 | If local: 49 | ``` bash 50 | npm install redis 51 | redis-server 52 | ``` 53 | If remote or you need to customize the connection settings use the process.env variables: 54 | - RACCOON_REDIS_URL 55 | - RACCOON_REDIS_PORT 56 | - RACCOON_REDIS_AUTH 57 | 58 | #### Require raccoon: 59 | ``` js 60 | const raccoon = require('raccoon'); 61 | ``` 62 | 63 | #### Add in ratings & Ask for recommendations: 64 | ``` js 65 | raccoon.liked('garyId', 'movieId').then(() => { 66 | return raccoon.liked('garyId', 'movie2Id'); 67 | }).then(() => { 68 | return raccoon.liked('chrisId', 'movieId'); 69 | }).then(() => { 70 | return raccoon.recommendFor('chrisId', 10); 71 | }).then((recs) => { 72 | console.log('recs', recs); 73 | // results will be an array of x ranked recommendations for chris 74 | // in this case it would contain movie2 75 | }); 76 | ``` 77 | 78 | 79 | ## config 80 | 81 | ``` js 82 | // these are the default values but you can change them 83 | raccoon.config.nearestNeighbors = 5; // number of neighbors you want to compare a user against 84 | raccoon.config.className = 'movie'; // prefix for your items (used for redis) 85 | raccoon.config.numOfRecsStore = 30; // number of recommendations to store per user 86 | ``` 87 | 88 | ## Full Usage 89 | 90 | ### Inputs 91 | 92 | #### Likes: 93 | ``` js 94 | raccoon.liked('userId', 'itemId').then(() => { 95 | }); 96 | // after a user likes an item, the rating data is immediately 97 | // stored in Redis in various sets for the user/item, then the similarity, 98 | // wilson score and recommendations are updated for that user. 99 | ``` 100 | 101 | ``` js 102 | raccoon.liked('userId', 'itemId', options).then(() => { 103 | }); 104 | // available options are: 105 | { 106 | updateRecs: false 107 | // this will stop the update sequence for this rating 108 | // and greatly speed up the time to input all the data 109 | // however, there will not be any recommendations at the end. 110 | // if you fire a like/dislike with updateRecs on it will only update 111 | // recommendations for that user. 112 | // default === true 113 | } 114 | 115 | // options are available to liked, disliked, unliked, and undisliked. 116 | 117 | ``` 118 | 119 | ``` js 120 | raccoon.unliked('userId', 'itemId').then(() => { 121 | }); 122 | // removes the liked rating from all sets and updates. not the same as disliked. 123 | ``` 124 | 125 | #### Dislikes: 126 | ``` js 127 | raccoon.disliked('userId', 'itemId').then(() => { 128 | }); 129 | // negative rating of the item. if user1 liked movie1 and user2 disliked it, their 130 | // jaccard would be -1 meaning the have opposite preferences. 131 | ``` 132 | 133 | ``` js 134 | raccoon.undisliked('userId', 'itemId').then(() => { 135 | }); 136 | // similar to unliked. removes the negative disliked rating as if it was never rated. 137 | ``` 138 | 139 | ### Recommendations 140 | 141 | ``` js 142 | raccoon.recommendFor('userId', 'numberOfRecs').then((results) => { 143 | // returns an ranked sorted array of itemIds which represent the top recommendations 144 | // for that individual user based on knn. 145 | // numberOfRecs is the number of recommendations you want to receive. 146 | // asking for recommendations queries the 'recommendedZSet' sorted set for the user. 147 | // the movies in this set were calculated in advance when the user last rated 148 | // something. 149 | // ex. results = ['batmanId', 'supermanId', 'chipmunksId'] 150 | }); 151 | 152 | raccoon.mostSimilarUsers('userId').then((results) => { 153 | // returns an array of the 'similarityZSet' ranked sorted set for the user which 154 | // represents their ranked similarity to all other users given the 155 | // Jaccard Coefficient. the value is between -1 and 1. -1 means that the 156 | // user is the exact opposite, 1 means they're exactly the same. 157 | // ex. results = ['garyId', 'andrewId', 'jakeId'] 158 | }); 159 | 160 | raccoon.leastSimilarUsers('userId').then((results) => { 161 | // same as mostSimilarUsers but the opposite. 162 | // ex. results = ['timId', 'haoId', 'phillipId'] 163 | }); 164 | ``` 165 | 166 | ### User Statistics 167 | 168 | #### Ratings: 169 | ``` js 170 | raccoon.bestRated().then((results) => { 171 | // returns an array of the 'scoreboard' sorted set which represents the global 172 | // ranking of items based on the Wilson Score Interval. in short it represents the 173 | // 'best rated' items based on the ratio of likes/dislikes and cuts out outliers. 174 | // ex. results = ['iceageId', 'sleeplessInSeattleId', 'theDarkKnightId'] 175 | }); 176 | 177 | raccoon.worstRated().then((results) => { 178 | // same as bestRated but in reverse. 179 | }); 180 | ``` 181 | 182 | #### Liked/Disliked lists and counts: 183 | ``` js 184 | raccoon.mostLiked().then((results) => { 185 | // returns an array of the 'mostLiked' sorted set which represents the global 186 | // number of likes for all the items. does not factor in dislikes. 187 | }); 188 | 189 | raccoon.mostDisliked().then((results) => { 190 | // same as mostLiked but the opposite. 191 | }); 192 | 193 | raccoon.likedBy('itemId').then((results) => { 194 | // returns an array which lists all the users who liked that item. 195 | }); 196 | 197 | raccoon.likedCount('itemId').then((results) => { 198 | // returns the number of users who have liked that item. 199 | }); 200 | 201 | raccoon.dislikedBy('itemId').then((results) => { 202 | // same as likedBy but for disliked. 203 | }); 204 | 205 | raccoon.dislikedCount('itemId').then((results) => { 206 | // same as likedCount but for disliked. 207 | }); 208 | 209 | raccoon.allLikedFor('userId').then((results) => { 210 | // returns an array of all the items that user has liked. 211 | }); 212 | 213 | raccoon.allDislikedFor('userId').then((results) => { 214 | // returns an array of all the items that user has disliked. 215 | }); 216 | 217 | raccoon.allWatchedFor('userId').then((results) => { 218 | // returns an array of all the items that user has liked or disliked. 219 | }); 220 | ``` 221 | 222 | 223 | ## Recommendation Engine Components 224 | 225 | ### Jaccard Coefficient for Similarity 226 | 227 | There are many ways to gauge the likeness of two users. The original implementation of recommendation Raccoon used the Pearson Coefficient which was good for measuring discrete values in a small range (i.e. 1-5 stars). However, to optimize for quicker calcuations and a simplier interface, recommendation Raccoon instead uses the Jaccard Coefficient which is useful for measuring binary rating data (i.e. like/dislike). Many top companies have gone this route such as Youtube because users were primarily rating things 4-5 or 1. The choice to use the Jaccard's instead of Pearson's was largely inspired by David Celis who designed Recommendable, the top recommendation engine on Rails. The Jaccard Coefficient also pairs very well with Redis which is able to union/diff sets of like/dislikes at O(N). 228 | 229 | ### K-Nearest Neighbors Algorithm for Recommendations 230 | 231 | To deal with large user bases, it's essential to make optimizations that don't involve comparing every user against every other user. One way to deal with this is using the K-Nearest Neighbors algorithm which allows you to only compare a user against their 'nearest' neighbors. After a user's similarity is calculated with the Jaccard Coefficient, a sorted set is created which represents how similar that user is to every other. The top users from that list are considered their nearest neighbors. recommendation Raccoon uses a default value of 5, but this can easily be changed based on your needs. 232 | 233 | ### Wilson Score Confidence Interval for a Bernoulli Parameter 234 | 235 | If you've ever been to Amazon or another site with tons of reviews, you've probably ran into a sorted page of top ratings only to find some of the top items have only one review. The Wilson Score Interval at 95% calculates the chance that the 'real' fraction of positive ratings is at least x. This allows for you to leave off the items/products that have not been rated enough or have an abnormally high ratio. It's a great proxy for a 'best rated' list. 236 | 237 | ### Redis 238 | 239 | When combined with hiredis, redis can get/set at ~40,000 operations/second using 50 concurrent connections without pipelining. In short, Redis is extremely fast at set math and is a natural fit for a recommendation engine of this scale. Redis is integral to many top companies such as Twitter which uses it for their Timeline (substituted Memcached). 240 | 241 | 242 | 243 | ## Features to Contribute 244 | 245 | * Help optimize for the Movielens 100k data set. Here: https://github.com/guymorita/benchmark_raccoon_movielens 246 | 247 | ## Run tests 248 | 249 | ``` bash 250 | grunt test 251 | grunt mochacov:coverage 252 | ``` 253 | 254 | ## Tech Stack 255 | 256 | recommendationRaccoon is written fully in Javascript. It utilizes the asyncronous, non-blocking features of Node.js for the core of app. The recommendations and ratings are stored in an intermediate data store called Redis which performs extremely well compared to database systems that write every change to disk before committing the transaction. Redis holds the entire dataset in memory. For the actual handling of the parallel asyncronous functions, raccoon uses the async library for Node.js. 257 | 258 | For testing, raccoon uses Mocha Chai as a testing suite, automates it with Grunt.js and gets test coverage with Blanket.js/Travis-CI/Coveralls. 259 | 260 | ## Links 261 | 262 | * Code: 'git clone git://github.com/guymorita/recommendationRaccoon.git' 263 | * NPM Module: 'https://npmjs.org/package/raccoon' 264 | * Benchmark / Performance repo: 'https://github.com/guymorita/benchmark_raccoon_movielens' 265 | * Demo / UI App repo: 'https://github.com/guymorita/Mosaic-Films---Recommendation-Engine-Demo' 266 | -------------------------------------------------------------------------------- /lib/algorithms.js: -------------------------------------------------------------------------------- 1 | 2 | const async = require('async'), 3 | config = require('./config.js'), 4 | _ = require('underscore'), 5 | Key = require('./key'); 6 | 7 | // the jaccard coefficient outputs an objective measurement of the similarity between two objects. in this case, two users. the coefficient 8 | // is the result of summing the two users likes/dislikes incommon then summing they're likes/dislikes that they disagree on. this sum is 9 | // then divided by the number of items they both reviewed. 10 | const jaccardCoefficient = function(userId1, userId2, callback){ 11 | let similarity = 0, 12 | finalJaccard = 0, 13 | ratedInCommon = 0; 14 | 15 | const user1LikedSet = Key.userLikedSet(userId1), 16 | user1DislikedSet = Key.userDislikedSet(userId1), 17 | user2LikedSet = Key.userLikedSet(userId2), 18 | user2DislikedSet = Key.userDislikedSet(userId2); 19 | 20 | // retrieving a set of the users likes incommon 21 | client.sinter(user1LikedSet,user2LikedSet, function(err, results1){ 22 | // retrieving a set of the users dislike incommon 23 | client.sinter(user1DislikedSet,user2DislikedSet, function(err, results2){ 24 | // retrieving a set of the users like and dislikes that they disagree on 25 | client.sinter(user1LikedSet,user2DislikedSet, function(err, results3){ 26 | // retrieving a set of the users like and dislikes that they disagree on 27 | client.sinter(user1DislikedSet,user2LikedSet, function(err, results4){ 28 | // calculating the sum of the similarities minus the sum of the disagreements 29 | similarity = (results1.length+results2.length-results3.length-results4.length); 30 | // calculating the number of movies rated incommon 31 | ratedInCommon = (results1.length+results2.length+results3.length+results4.length); 32 | // calculating the the modified jaccard score. similarity / num of comparisons made incommon 33 | finalJaccardScore = similarity / ratedInCommon; 34 | // calling the callback function passed to jaccard with the new score 35 | callback(finalJaccardScore); 36 | }); 37 | }); 38 | }); 39 | }); 40 | }; 41 | 42 | // this function updates the similarity for one user versus all others. at scale this probably needs to be refactored to compare a user 43 | // against clusters of users instead of against all. every comparison will be a value between -1 and 1 representing simliarity. 44 | // -1 is exact opposite, 1 is exactly the same. 45 | exports.updateSimilarityFor = function(userId, cb){ 46 | // turning the userId into a string. depending on the db they might send an object, in which it won't compare properly when comparing 47 | // to other users 48 | userId = String(userId); 49 | // initializing variables 50 | let userRatedItemIds, itemLiked, itemDisliked, itemLikeDislikeKeys; 51 | // setting the redis key for the user's similarity set 52 | const similarityZSet = Key.similarityZSet(userId); 53 | // creating a combined set with the all of a users likes and dislikes 54 | client.sunion(Key.userLikedSet(userId), Key.userDislikedSet(userId), function(err, userRatedItemIds){ 55 | // if they have rated anything 56 | if (userRatedItemIds.length > 0){ 57 | // creating a list of redis keys to look up all of the likes and dislikes for a given set of items 58 | itemLikeDislikeKeys = _.map(userRatedItemIds, function(itemId, key){ 59 | // key for that item being liked 60 | itemLiked = Key.itemLikedBySet(itemId); 61 | // key for the item being disliked 62 | itemDisliked = Key.itemDislikedBySet(itemId); 63 | // returning an array of those keys 64 | return [itemLiked, itemDisliked]; 65 | }); 66 | } 67 | // flattening the array of all the likes/dislikes for the items a user rated 68 | itemLikeDislikeKeys = _.flatten(itemLikeDislikeKeys); 69 | // builds one set of all the users who liked and disliked the same items 70 | client.sunion(itemLikeDislikeKeys, function(err, otherUserIdsWhoRated){ 71 | // running in async parallel, going through the array of user ids who also rated the same things 72 | async.each(otherUserIdsWhoRated, 73 | // running a function on each item in the list 74 | function(otherUserId, callback){ 75 | // if there is only one other user or the other user is the same user 76 | if (otherUserIdsWhoRated.length === 1 || userId === otherUserId){ 77 | // then call the callback and exciting the similarity check 78 | callback(); 79 | } 80 | // if the userid is not the same as the user 81 | if (userId !== otherUserId){ 82 | // calculate the jaccard coefficient for similarity. it will return a value between -1 and 1 showing the two users 83 | // similarity 84 | jaccardCoefficient(userId, otherUserId, function(result) { 85 | // with the returned similarity score, add it to a sorted set named above 86 | client.zadd(similarityZSet, result, otherUserId, function(err){ 87 | // call the async callback function once finished to indicate that the process is finished 88 | callback(); 89 | }); 90 | }); 91 | } 92 | }, 93 | // once all the async comparisons have been made, call the final callback based to the original function 94 | function(err){ 95 | cb(); 96 | } 97 | ); 98 | }); 99 | }); 100 | }; 101 | 102 | exports.predictFor = function(userId, itemId){ 103 | userId = String(userId); 104 | itemId = String(itemId); 105 | let finalSimilaritySum = 0.0; 106 | let prediction = 0.0; 107 | const similarityZSet = Key.similarityZSet(userId); 108 | const likedBySet = Key.itemLikedBySet(itemId); 109 | const dislikedBySet = Key.itemDislikedBySet(itemId); 110 | 111 | return new Promise((resolve, reject) => { 112 | exports.similaritySum(similarityZSet, likedBySet, function(result1){ 113 | exports.similaritySum(similarityZSet, dislikedBySet, function(result2){ 114 | finalSimilaritySum = result1 - result2; 115 | client.scard(likedBySet, function(err, likedByCount){ 116 | client.scard(dislikedBySet, function(err, dislikedByCount){ 117 | prediction = finalSimilaritySum / parseFloat(likedByCount + dislikedByCount); 118 | if (isFinite(prediction)){ 119 | resolve(prediction); 120 | } else { 121 | resolve(0.0); 122 | } 123 | }); 124 | }); 125 | }); 126 | }); 127 | }); 128 | }; 129 | 130 | exports.similaritySum = function(simSet, compSet, cb){ 131 | let similarSum = 0.0; 132 | client.smembers(compSet, function(err, userIds){ 133 | async.each(userIds, 134 | function(userId, callback){ 135 | client.zscore(simSet, userId, function(err, zScore){ 136 | const newScore = parseFloat(zScore) || 0.0; 137 | similarSum += newScore; 138 | callback(); 139 | }); 140 | }, 141 | function(err){ 142 | cb(similarSum); 143 | } 144 | ); 145 | }); 146 | }; 147 | 148 | // after the similarity is updated for the user, the users recommendations are updated 149 | // recommendations consist of a sorted set in Redis. the values of this set are 150 | // names of the items and the score is what raccoon estimates that user would rate it 151 | // the values are generally not going to be -1 or 1 exactly because there isn't 100% 152 | // certainty. 153 | exports.updateRecommendationsFor = function(userId, cb){ 154 | // turning the user input into a string so it can be compared properly 155 | userId = String(userId); 156 | // creating two blank arrays 157 | let setsToUnion = []; 158 | let scoreMap = []; 159 | // initializing the redis keys for temp sets, the similarity set and the recommended set 160 | const tempAllLikedSet = Key.tempAllLikedSet(userId); 161 | const similarityZSet = Key.similarityZSet(userId); 162 | const recommendedZSet = Key.recommendedZSet(userId); 163 | // returns an array of the users that are most similar within k nearest neighbors 164 | client.zrevrange(similarityZSet, 0, config.nearestNeighbors-1, function(err, mostSimilarUserIds){ 165 | // returns an array of the users that are least simimilar within k nearest neighbors 166 | client.zrange(similarityZSet, 0, config.nearestNeighbors-1, function(err, leastSimilarUserIds){ 167 | // iterate through the user ids to create the redis keys for all those users likes 168 | _.each(mostSimilarUserIds, function(usrId, key){ 169 | setsToUnion.push(Key.userLikedSet(usrId)); 170 | }); 171 | // if you want to factor in the least similar least likes, you change this in config 172 | // left it off because it was recommending items that every disliked universally 173 | _.each(leastSimilarUserIds, function(usrId, key){ 174 | setsToUnion.push(Key.userDislikedSet(usrId)); 175 | }); 176 | // if there is at least one set in the array, continue 177 | if (setsToUnion.length > 0){ 178 | setsToUnion.unshift(tempAllLikedSet); 179 | client.sunionstore(setsToUnion, function(err) { 180 | // using the new array of all the items that were liked by people similar and disliked by people opposite, create a new set with all the 181 | // items that the current user hasn't already rated 182 | client.sdiff(tempAllLikedSet, Key.userLikedSet(userId), Key.userDislikedSet(userId), function(err, notYetRatedItems){ 183 | // with the array of items that user has not yet rated, iterate through all of them and predict what the current user would rate it 184 | async.each(notYetRatedItems, 185 | function(itemId, callback){ 186 | exports.predictFor(userId, itemId).then((score) => { 187 | // push the score and item to the score map array. 188 | scoreMap.push([score, itemId]); 189 | callback(); 190 | }); 191 | }, 192 | // using score map which is an array of what the current user would rate all the unrated items, 193 | // add them to that users sorted recommended set 194 | function(err){ 195 | client.del(recommendedZSet, function(err){ 196 | async.each(scoreMap, 197 | function(scorePair, callback){ 198 | client.zadd(recommendedZSet, scorePair[0], scorePair[1], function(err){ 199 | callback(); 200 | }); 201 | }, 202 | // after all the additions have been made to the recommended set, 203 | function(err){ 204 | client.del(tempAllLikedSet, function(err){ 205 | client.zcard(recommendedZSet, function(err, length){ 206 | client.zremrangebyrank(recommendedZSet, 0, length-config.numOfRecsStore-1, function(err){ 207 | cb(); 208 | }); 209 | }); 210 | }); 211 | } 212 | ); 213 | }); 214 | } 215 | ); 216 | }); 217 | }); 218 | } else { 219 | cb(); 220 | } 221 | }); 222 | }); 223 | }; 224 | 225 | // the wilson score is a proxy for 'best rated'. it represents the best finding the best ratio of likes and also eliminating 226 | // outliers. the wilson score is a value between 0 and 1. 227 | exports.updateWilsonScore = function(itemId, callback){ 228 | // creating the redis keys for scoreboard and to get the items liked and disliked sets 229 | const scoreboard = Key.scoreboardZSet(); 230 | const likedBySet = Key.itemLikedBySet(itemId); 231 | const dislikedBySet = Key.itemDislikedBySet(itemId); 232 | // used for a confidence interval of 95% 233 | const z = 1.96; 234 | // initializing variables to calculate wilson score 235 | let n, pOS, score; 236 | // getting the liked count for the item 237 | client.scard(likedBySet, function(err, likedResults){ 238 | // getting the disliked count for the item 239 | client.scard(dislikedBySet, function(err, dislikedResults){ 240 | // if the total count is greater than zero 241 | if ((likedResults + dislikedResults) > 0){ 242 | // set n to the sum of the total ratings for the item 243 | n = likedResults + dislikedResults; 244 | // set pOS to the num of liked results divided by the number rated 245 | // pOS represents the proportion of successes or likes in this case 246 | pOS = likedResults / parseFloat(n); 247 | // try the following equation 248 | try { 249 | // calculating the wilson score 250 | // http://www.evanmiller.org/how-not-to-sort-by-average-rating.html 251 | score = (pOS + z*z/(2*n) - z*Math.sqrt((pOS*(1-pOS)+z*z/(4*n))/n))/(1+z*z/n); 252 | } catch (e) { 253 | // if an error occurs, set the score to 0.0 and console log the error message. 254 | console.log(e.name + ": " + e.message); 255 | score = 0.0; 256 | } 257 | // add that score to the overall scoreboard. if that item already exists, the score will be updated. 258 | client.zadd(scoreboard, score, itemId, function(err){ 259 | // call the final callback sent to the initial function. 260 | callback(); 261 | }); 262 | } 263 | }); 264 | }); 265 | }; 266 | --------------------------------------------------------------------------------