├── .gitignore ├── index.js ├── lib ├── mock.py ├── products.py ├── init.py ├── recommending.py ├── silhouette.py ├── structure.py ├── util.py ├── customers.py ├── exec.py ├── normalize.py ├── clustering.py └── run.py ├── package.json ├── LICENSE ├── node ├── variables.js ├── engine.js ├── analytics.js └── helpers.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | npm-debug.log -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var RecommendationEngine = require('./node/engine'); 2 | var Analytics = require('./node/analytics'); 3 | var RecommendationVariables = require('./node/variables').GetSet; 4 | var helpers = require('./node/helpers'); 5 | 6 | var Recommender = {}; 7 | 8 | helpers._extend(Recommender, RecommendationEngine, Analytics, RecommendationVariables); 9 | 10 | module.exports = Recommender; 11 | -------------------------------------------------------------------------------- /lib/mock.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import util as u 3 | import random 4 | 5 | def mockProducts(productNum): 6 | products = u.generateItems(productNum, u.symbols) 7 | return products 8 | 9 | def mockCustomers(nameNum): 10 | names = u.generateItems(nameNum, u.alphabet) 11 | return names 12 | 13 | def mockDataBuilder(names, products): 14 | num = random.randint(8,20) 15 | for i in range(0, len(names)): 16 | for j in range(0, num): 17 | randProduct = products[random.randint(0, len(products)-1)] 18 | c.customers[i].purchaseItem(randProduct) 19 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "product-recommender", 3 | "version": "1.3.2", 4 | "description": "npm module that utilizes a python machine learning recommendation engine to give easy access to product recommendations.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/lramsey/product-recommender.git" 12 | }, 13 | "keywords": [ 14 | "python", 15 | "product recommendation", 16 | "recommendation", 17 | "machine learning", 18 | "analytics", 19 | "product-demo" 20 | ], 21 | "author": "Luke Ramsey", 22 | "license": "MIT", 23 | "bugs": { 24 | "url": "https://github.com/lramsey/product-recommender/issues", 25 | "email": "lramsey177@gmail.com" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Luke 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /lib/products.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import util as u 3 | import math 4 | 5 | productMatrix = {} 6 | productFreq = {} 7 | 8 | products = [] 9 | productsMap = {} 10 | 11 | def addProducts(prods): 12 | global products 13 | products = prods 14 | for i in range(0, len(prods)): 15 | productsMap[prods[i]] = i 16 | 17 | def productSim(item1, item2): 18 | matrix = c.matrix 19 | one = productsMap[item1] 20 | two = productsMap[item2] 21 | 22 | intersection = 0.0 23 | union = 0.0 24 | for i in range(0,len(matrix)): 25 | if matrix[i][one] > 0 and matrix[i][two] > 0: 26 | union += 1 27 | intersection += 1 28 | elif matrix[i][one] > 0 or matrix[i][two] > 0: 29 | union += 1 30 | return math.tanh(math.sqrt(union - intersection)) 31 | 32 | def productMatrixiser(): 33 | productMatrix = [] 34 | for i in range(0, len(products)): 35 | sims = [] 36 | for j in range(0, len(products)): 37 | sims.append(productSim(products[i], products[j])) 38 | productMatrix.append(sims) 39 | return u.scaleFeatures(productMatrix) 40 | -------------------------------------------------------------------------------- /lib/init.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import products as p 3 | import mock as m 4 | import run 5 | 6 | names = [] 7 | 8 | def addCustomers(nameList): 9 | global names 10 | names = nameList 11 | for i in range(0, len(names)): 12 | c.Customer(names[i]) 13 | c.customersMap[names[i]] = i 14 | 15 | def dataBuilder(matrix): 16 | for i in range(0, len(matrix)): 17 | for j in range(0,len(matrix[i])): 18 | num = matrix[i][j] 19 | while num > 0: 20 | c.customers[i].purchaseItem(p.products[j]) 21 | num -= 1 22 | 23 | def init(nameList, productList, matrix): 24 | if isinstance(productList, int): 25 | products = m.mockProducts(productList) 26 | p.addProducts(products) 27 | if isinstance(nameList, int): 28 | nameList = m.mockCustomers(nameList) 29 | addCustomers(nameList) 30 | 31 | if isinstance(matrix, list): 32 | '''expected data: list of customers, list of products, list customer arrays containing 33 | product purchases in same order as product list.''' 34 | dataBuilder(matrix); 35 | else: 36 | m.mockDataBuilder(names, products) 37 | c.matrixBuilder() 38 | 39 | recommend = run.run(names, matrix) 40 | while recommend == 'again': 41 | recommend = run.run(names, matrix) 42 | return recommend 43 | -------------------------------------------------------------------------------- /lib/recommending.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import products as p 3 | import math 4 | 5 | recommendationMatrix = [] 6 | goodClusters = [] 7 | 8 | def buildRecommendations(names, clusters): 9 | global recommendationMatrix 10 | recommendationMatrix = [] 11 | global goodClusters 12 | goodClusters = clusters 13 | for i in range(0, len(names)): 14 | recObj = {} 15 | recommendations = [] 16 | history = c.matrix[i] 17 | for j in range(0, len(clusters)): 18 | recommendations = recommendations + clusterRecommender(names[i], history, clusters[j], j, recObj) 19 | recommendations.sort() 20 | recommendationMatrix.append(recommendations) 21 | return recommendationMatrix 22 | 23 | def clusterRecommender(name, hist, cluster, index, recObj): 24 | # cluster map 25 | clusterIndex = cluster[2][name] 26 | # centroid of user's cluster 27 | centroid = cluster[1][clusterIndex] 28 | silhouette = cluster[5] 29 | results = findDiffs(hist, centroid, silhouette, index, recObj) 30 | return results 31 | 32 | def findDiffs(hist, avg, sil, index, recObj): 33 | normals = [] 34 | for i in range(0,len(avg)): 35 | normalized = sil * math.fabs(hist[i]-avg[i]) 36 | val = recObj.get(p.products[i],0) 37 | if normalized > val: 38 | normals.append({normalized: p.products[i]}) 39 | recObj[p.products[i]] = normalized 40 | normals.sort() 41 | return normals 42 | -------------------------------------------------------------------------------- /lib/silhouette.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import util as u 3 | 4 | matrix = [[]] 5 | silhouettesList = [] 6 | centroids = [] 7 | 8 | def __init__(mat, cents): 9 | global matrix 10 | matrix = mat 11 | global centroids 12 | centroids = cents 13 | 14 | def averageSilhouettes(clusters, matrix, centroids): 15 | __init__(matrix, centroids) 16 | silhouettes = 0.0 17 | for i in range(0, len(clusters)): 18 | center = centroids[i] 19 | neighbor = neighboringCentroid(clusters[i], i) 20 | s = 0.0 21 | for j in range(0, len(clusters[i])): 22 | point = customerPoint(clusters[i][j]) 23 | s += silhouette(point, center, neighbor) 24 | clustSil = s/len(clusters[i]) 25 | silhouettesList.append(clustSil) 26 | silhouettes += s 27 | return silhouettes/len(c.customers) 28 | 29 | def silhouette(point, centroid, neighbor): 30 | a = u.dist(point, centroid) 31 | b = u.dist(point, neighbor) 32 | sil = (b-a)/max(a,b) 33 | return sil 34 | 35 | def neighboringCentroid(cluster, index): 36 | amin = len(customerPoint(cluster[0])) 37 | neighborIndex = -.1 38 | for i in range(0,len(centroids)): 39 | if i == index: 40 | continue 41 | dist = u.dist(centroids[i], centroids[index]) 42 | if dist < amin: 43 | amin = dist 44 | neighborIndex = i 45 | neighbor = centroids[neighborIndex] 46 | return neighbor 47 | 48 | def customerPoint(customer): 49 | return matrix[c.customersMap[customer.name]] 50 | -------------------------------------------------------------------------------- /lib/structure.py: -------------------------------------------------------------------------------- 1 | import products as p 2 | import customers as c 3 | import silhouette as s 4 | import clustering as cl 5 | import numpy as np 6 | import run 7 | 8 | def redoMatrix(clusters, i, clusterMat=[], clusterMap={}, indexProds=[]): 9 | for j in range(0, len(clusters[i])): 10 | clusterMat.append(run.transpose[p.productsMap[clusters[i][j]]]) 11 | clusterMap[clusters[i][j]] = j 12 | indexProds.append(clusters[i][j]) 13 | 14 | def subMatrices(clusters): 15 | results = [] 16 | maps = [] 17 | indexMap = [] 18 | for i in range(0,len(clusters)): 19 | clusterMat = [] 20 | clusterMap = {} 21 | indexProds = [] 22 | redoMatrix(clusters, i, clusterMat, clusterMap, indexProds) 23 | mat = np.array(clusterMat).transpose() 24 | results.append(mat) 25 | maps.append(clusterMap) 26 | indexMap.append(indexProds) 27 | return [results, maps, indexMap] 28 | 29 | def createClusterHelpers(indexMap, subMatrix, aMap): 30 | cl.__init__(subMatrix, c.customers, aMap) 31 | clust = [] 32 | results = cl.kMeans(25,8) 33 | clusters = results[0] 34 | # index 0 35 | clust.append(clusters) 36 | centroids = results[1] 37 | # index 1 38 | clust.append(centroids) 39 | # index 2 40 | clust.append(cl.clusterMap) 41 | # index 3 42 | clust.append(indexMap) 43 | avgSils = s.averageSilhouettes(clust[0], subMatrix, centroids) 44 | # index 4 45 | clust.append(s.silhouettesList) 46 | # index 5 47 | clust.append(avgSils) 48 | return clust 49 | 50 | def createClusterMap(clusters): 51 | clusterMap = {} 52 | for i in range(0, len(clusters)): 53 | for j in range(0, len(clusters[i])): 54 | clusterMap[clusters[i][j]] = i 55 | return clusterMap -------------------------------------------------------------------------------- /lib/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | alphabet = ['b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] 5 | symbols = ['~','=','@','#','<', '>','$','%','^','&','*','(',')','_','+','{','}','/','|'] 6 | 7 | def reducer(num,arr, modulos=[]): 8 | modulos.append(num%len(arr)) 9 | if num/len(arr) > 0: 10 | return reducer(num/len(arr), arr, modulos) 11 | return modulos 12 | 13 | def generateItems(num, arr): 14 | size = int(math.ceil(math.log(num, len(arr)))) 15 | results = [] 16 | for i in range(0, num): 17 | characters = reducer(i, arr) 18 | text = '' 19 | while len(characters) > 0: 20 | text += arr[characters.pop()] 21 | if len(text) < size: 22 | for j in range(0, size - len(text)): 23 | text = 'a' + text 24 | results.append(text) 25 | return results 26 | 27 | def dist(v1, v2): 28 | comb = (v1 - v2)**2. 29 | distance = np.sum(comb)**(1./2) 30 | return distance 31 | 32 | def findCenter(points): 33 | point = points[0] 34 | for i in range(1,len(points)): 35 | point += points[i] 36 | return point/len(points + 0.) 37 | 38 | def scaleFeatures(matrix): 39 | matrix = np.array(matrix) 40 | amax = np.amax(matrix) 41 | amin = rightHandMin(matrix) 42 | scaledMatrix = (matrix - amin)*(1/(amax-amin)) 43 | scaledMatrix = setDiagonals(scaledMatrix, -1) 44 | return scaledMatrix.tolist() 45 | 46 | def setDiagonals(matrix, value): 47 | for i in range(0,len(matrix)): 48 | matrix[i][i] = -1 49 | return matrix 50 | 51 | def rightHandMin(matrix): 52 | amin = 1.0 53 | for i in range(0,len(matrix)): 54 | for j in range(i+1,len(matrix[i])): 55 | if matrix[i][j] < amin: 56 | amin = matrix[i][j] 57 | return amin 58 | -------------------------------------------------------------------------------- /lib/customers.py: -------------------------------------------------------------------------------- 1 | import products as p 2 | import util as u 3 | import numpy as np 4 | import math 5 | 6 | customers = [] 7 | customersMap = {} 8 | matrix = [] 9 | 10 | class Customer(object): 11 | def __init__(self, name): 12 | self.purchases = {} 13 | self.purchasesArr = [] 14 | for i in range(0,len(p.products)): 15 | self.purchases[p.products[i]] = 0 16 | self.purchasesArr.append(0) 17 | self.name = name 18 | customers.append(self) 19 | 20 | def purchaseItem(self, item): 21 | prodCount = self.purchases.get(item, 0) 22 | prodCount += 1 23 | self.purchases[item] = prodCount 24 | self.purchasesArr[p.productsMap[item]] = prodCount 25 | 26 | def customerSim(person1, person2): 27 | one = person1.purchases 28 | two = person2.purchases 29 | intersection = 0.0 30 | union = 0.0 31 | for i in one: 32 | if one[i] > 0 and two[i] > 0: 33 | union += 1 34 | intersection += 1; 35 | elif one[i] > 0 or two[i] > 0: 36 | union += 1 37 | 38 | return math.tanh(math.sqrt(union - intersection)) 39 | 40 | def customerMatrixiser(): 41 | customerMatrix = [] 42 | for i in range(0, len(customers)): 43 | sims = [] 44 | for j in range(0,len(customers)): 45 | sims.append(customerSim(customers[i], customers[j])) 46 | customerMatrix.append(sims) 47 | return u.scaleFeatures(customerMatrix) 48 | 49 | 50 | def matrixBuilder(): 51 | global matrix 52 | matr = [] 53 | for i in range(0, len(customers)): 54 | row = np.zeros(len(customers[0].purchasesArr)) 55 | for j in range(0, len(customers[i].purchasesArr)): 56 | if customers[i].purchasesArr[j] > 0: 57 | row[j] = 1 58 | matr.append(row) 59 | matrix = np.array(matr) 60 | global maxRow 61 | maxRow = row 62 | -------------------------------------------------------------------------------- /lib/exec.py: -------------------------------------------------------------------------------- 1 | import jsonpickle as j 2 | import init as i 3 | import argparse 4 | import ast 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('names') 8 | parser.add_argument('products') 9 | parser.add_argument('matrix') 10 | parser.add_argument('path') 11 | args = parser.parse_args() 12 | names = ast.literal_eval(args.names) 13 | products = ast.literal_eval(args.products) 14 | path = args.path 15 | 16 | if path: 17 | matrixPath = args.matrix 18 | with open(matrixPath) as f: 19 | matrix = f.read().splitlines() 20 | for index in range(0, len(matrix)): 21 | matrix[index] = matrix[index].split(',') 22 | matrix[index] = map(int,matrix[index]) 23 | f.close() 24 | else: 25 | matrix = ast.literal_eval(args.matrix) 26 | 27 | if type(names) != list: 28 | names = len(matrix) 29 | if type(products) != list: 30 | products = len(matrix[0]) 31 | 32 | results = i.init(names, products, matrix) 33 | 34 | def sanitizeResults(res): 35 | if not isinstance(results[2], list): 36 | results[2] = results[2].tolist() 37 | for l in range(6,9): 38 | if len(results[l]) > 0: 39 | for m in range(0,len(results[l])): 40 | for n in range(0,len(results[l][m][0])): 41 | for o in range(0,len(results[l][m][0][n])): 42 | results[l][m][0][n][o] = results[l][m][0][n][o].name 43 | 44 | for n in range(0,len(results[l][m][1])): 45 | results[l][m][1][n] = results[l][m][1][n].tolist() 46 | 47 | for n in range(0,len(results[l][m][4])): 48 | results[l][m][4][n] = float(results[l][m][4][n]) 49 | 50 | results[l][m][5] = float(results[l][m][5]) 51 | 52 | results[9] = results[9].tolist() 53 | results[6] = results[6][0] 54 | 55 | sanitizeResults(results) 56 | 57 | print j.encode(results) 58 | -------------------------------------------------------------------------------- /node/variables.js: -------------------------------------------------------------------------------- 1 | // private object 2 | var rec = {}; 3 | var GetSet = {}; 4 | var Variables = {'rec': rec, 'GetSet': GetSet}; 5 | 6 | rec.results = null; 7 | rec.rawResults = null; 8 | rec.customers = null; 9 | rec.products = null; 10 | rec.purchaseHistory = null; 11 | rec.hasPurchased = null; 12 | rec.customersMap = null; 13 | rec.productsMap = null; 14 | rec.productClusters = null; 15 | rec.productClustersMap = null; 16 | rec.customerMatrix = null; 17 | rec.productMatrix = null; 18 | rec.customerClusterHelpers = null; 19 | rec.customerClusters = null; 20 | rec.recommendationMatrix = null; 21 | rec.subClustersHelpers = null; 22 | rec.subClusters = null; 23 | rec.powerClustersHelpers = null; 24 | rec.powerClusters = null; 25 | rec.powerRecMatrix = null; 26 | rec.pastRecommendations = null; 27 | 28 | // can only directly access these keys through below methods 29 | GetSet.getRecVariable = function(key){ 30 | if(rec[key] === undefined){ 31 | throw new Error('not a valid recommendation variable'); 32 | } else if(rec[key] === null){ 33 | throw new Error('variable is null. please run setRecVariables'); 34 | } else{ 35 | return rec[key]; 36 | } 37 | }; 38 | 39 | GetSet.getRecKeys = function(){ 40 | return Object.keys(rec); 41 | }; 42 | 43 | // use with caution 44 | GetSet.loadRecVariable = function(key, value){ 45 | if(rec[key] === undefined){ 46 | throw new Error('not a valid recommendation variable'); 47 | } else{ 48 | rec[key] = value; 49 | } 50 | }; 51 | 52 | // use with caution 53 | GetSet.loadRecVariables = function(keys, values){ 54 | if(keys.length > values.length){ 55 | throw new Error('Each key must have a value'); 56 | } else if (keys.length < values.length){ 57 | throw new Error('Each value must have a key'); 58 | } else{ 59 | keys.forEach(function(key, i){ 60 | this.loadRecVariable(key, values[i]); 61 | }); 62 | } 63 | }; 64 | 65 | module.exports = Variables; 66 | -------------------------------------------------------------------------------- /lib/normalize.py: -------------------------------------------------------------------------------- 1 | import products as p 2 | import util as u 3 | import clustering as cl 4 | import structure as st 5 | import numpy as np 6 | 7 | # ceil is a float between 0 and 1, max percent of total population in group 8 | # floor is similar 9 | def normalizeProdClusters(clusts, centroids, mats, maps, floor, ceil): 10 | displacement = 0 11 | again = False 12 | for i in range(0,len(clusts)): 13 | ratio = len(clusts[i - displacement])/len(clusts) 14 | if ratio < floor: 15 | again = True 16 | merge(clusts, centroids, mats, maps, i - displacement) 17 | displacement += 1 18 | mats = st.subMatrices(clusts)[0] 19 | 20 | displacement = 0 21 | for i in range(0,len(clusts)): 22 | t1 = len(clusts[i])/(0.0 + len(p.products)) > ceil 23 | t2 = len(clusts[i]) > 8 24 | if (t1 and t2): 25 | again = True 26 | dissolve(clusts, centroids, mats, maps, i - displacement) 27 | displacement += 1 28 | subs = st.subMatrices(clusts) 29 | if(again): 30 | clusts = normalizeProdClusters(clusts, centroids, subs[0], subs[1], floor, ceil) 31 | else: 32 | displacement = 0 33 | for i in range(0,len(clusts)): 34 | if not isinstance(clusts[i-displacement],list): 35 | clusts.pop(i - displacement) 36 | displacement += 1 37 | return clusts 38 | 39 | def merge(clusts, centroids, mats, maps, i): 40 | minDist = -1 41 | index = -1 42 | cent = centroids[i] 43 | for j in range(0, len(centroids)): 44 | distance = u.dist(cent,centroids[j]) 45 | if (i == j): 46 | continue 47 | elif (minDist == -1) or distance < minDist: 48 | minDist = distance 49 | index = j 50 | for j in range(0, len(clusts[i])): 51 | if not isinstance(clusts[index],list): 52 | clusts[index] = clusts[index].tolist() 53 | clusts[index].append(clusts[i][j]) 54 | 55 | newMat = [] 56 | newMap = {} 57 | st.redoMatrix(clusts, index, newMat, newMap) 58 | mats[index] = np.array(newMat) 59 | maps[index] = newMap 60 | newCent = u.findCenter(mats[index]) 61 | centroids[index] = newCent 62 | 63 | maps.pop(i) 64 | mats.pop(i) 65 | centroids.pop(i) 66 | clusts.pop(i) 67 | 68 | def dissolve(clusts, centroids, mats, maps, i): 69 | trans = mats[i].transpose() 70 | cl.__init__(trans, clusts[i], maps[i]) 71 | num = len(clusts[i])/8+1 72 | results = cl.kMeans(num, 20) 73 | 74 | pClusts = results[0] 75 | pCents = results[1] 76 | clusts.pop(i) 77 | centroids.pop(i) 78 | mats.pop(i) 79 | maps.pop(i) 80 | 81 | for j in range(0, len(pClusts)): 82 | clusts.append(pClusts[j]) 83 | centroids.append(pCents[j]) 84 | newMat = [] 85 | newMap = {} 86 | st.redoMatrix(clusts,len(clusts)-1,newMat, newMap) 87 | mats.append(newMat) 88 | maps.append(newMap) 89 | -------------------------------------------------------------------------------- /node/engine.js: -------------------------------------------------------------------------------- 1 | var rec = require('./variables').rec; 2 | var Engine = {}; 3 | 4 | Engine.setRecVariables = function(matrix, cb, names, prods){ 5 | var path; 6 | if(Array.isArray(matrix)){ 7 | path = false; 8 | } else if (typeof matrix === 'string'){ 9 | path = true; 10 | } 11 | cb = cb || function(){}; 12 | 13 | names = JSON.stringify(names); 14 | prods = JSON.stringify(prods); 15 | 16 | var python = require('child_process').spawn( 17 | 'python', 18 | [__dirname + '/../lib/exec.py', names, prods, matrix, path]); 19 | output = ''; 20 | python.stdout.on('data', function(data){ 21 | output += data; 22 | }); 23 | python.stdout.on('close', function(){ 24 | _buildRecVariables(output); 25 | args = Array.prototype.slice.call(arguments,4); 26 | cb.apply(this,args); 27 | }); 28 | }; 29 | 30 | var _buildRecVariables = function(output){ 31 | var results = JSON.parse(output); 32 | 33 | rec.rawResults = results; 34 | rec.customers = results[0]; 35 | rec.products = results[4]; 36 | rec.purchaseHistory = results[14]; 37 | rec.hasPurchased = results[9]; 38 | rec.customersMap = results[1]; 39 | rec.productsMap = results[3]; 40 | rec.productClusters = results[2]; 41 | rec.productClustersMap = results[10]; 42 | rec.customerMatrix = results[12]; 43 | rec.productMatrix = results[13]; 44 | rec.customerClusterHelpers = results[6]; 45 | rec.customerClusters = rec.customerClusterHelpers[0]; 46 | rec.recommendationMatrix = rec.customerClusterHelpers[6]; 47 | 48 | rec.subClustersHelpers = []; 49 | var productClusterLocator = results[11]; 50 | 51 | productClusterLocator.forEach(function(locator){ 52 | if(locator[0] === 'sub'){ 53 | rec.subClustersHelpers.push(results[7][locator[1]]); 54 | } else{ 55 | rec.subClustersHelpers.push(results[8][locator[1]]); 56 | } 57 | }); 58 | 59 | rec.subClusters = []; 60 | rec.subClustersHelpers.forEach(function(helper){ 61 | rec.subClusters.push(helper[0]); 62 | }); 63 | 64 | rec.powerClustersHelpers = results[8]; 65 | rec.powerClusters = []; 66 | 67 | rec.powerClustersHelpers.forEach(function(helper){ 68 | rec.powerClusters.push(helper[0]); 69 | }); 70 | rec.powerRecMatrix = results[5]; 71 | 72 | rec.pastRecommendations = {}; 73 | rec.customers.forEach(function(customer){ 74 | rec.pastRecommendations[customer] = {}; 75 | }); 76 | 77 | rec.results = [rec.customers, rec.products, rec.purchaseHistory, rec.hasPurchased, rec.customersMap, 78 | rec.productsMap, rec.productClusters, rec.productClustersMap, rec.customerMatrix, rec.productMatrix, 79 | rec.customerClusterHelpers,rec.customerClusters, rec.recommendationMatrix, rec.subClustersHelpers, 80 | rec.subClusters, rec.powerClustersHelpers, rec.powerClusters, rec.powerRecMatrix, rec.pastRecommendations]; 81 | }; 82 | 83 | module.exports = Engine; 84 | -------------------------------------------------------------------------------- /node/analytics.js: -------------------------------------------------------------------------------- 1 | var rec = require('./variables').rec; 2 | var helpers = require('./helpers'); 3 | var Analytics = {}; 4 | 5 | Analytics.recommender = function(name, matrix){ 6 | matrix = matrix || rec.recommendationMatrix; 7 | helpers._nameChecker(name); 8 | helpers._recVariableChecker(); 9 | var recommendation = matrix[rec.customersMap[name]].pop(); 10 | var attraction = Object.keys(recommendation)[0]; 11 | var product = recommendation[attraction]; 12 | if(rec.pastRecommendations[name][product] === true){ 13 | return this.recommender(name,matrix); 14 | } else{ 15 | rec.pastRecommendations[name][product] = true; 16 | } 17 | return product; 18 | }; 19 | 20 | Analytics.recommendByProduct = function(name, product){ 21 | var matrix; 22 | if(product === undefined){ 23 | matrix = rec.recommendationMatrix; 24 | } 25 | else{ 26 | index = helpers._productClusterFinder(product); 27 | matrix = rec.subClustersHelpers[index][6]; 28 | } 29 | return this.recommender(name, matrix); 30 | }; 31 | 32 | Analytics.powerRecommendation = function(name){ 33 | return this.recommender(name, rec.powerRecMatrix); 34 | }; 35 | 36 | Analytics.pastCustomerRecommendations = function(name){ 37 | helpers._nameChecker(name); 38 | helpers._recVariableChecker(); 39 | return rec.pastRecommendations[name]; 40 | }; 41 | 42 | Analytics.getCustomerCluster = function(name){ 43 | helpers._nameChecker(name); 44 | helpers._recVariableChecker(); 45 | var index = rec.customerClusterHelpers[2][name]; 46 | var cluster = rec.customerClusters[index].slice(); 47 | return cluster; 48 | }; 49 | 50 | Analytics.getCustomerClusterByProduct = function(name, product){ 51 | helpers._nameChecker(name); 52 | helpers._productChecker(product); 53 | helpers._recVariableChecker(); 54 | var subClustIndex = helpers._productClusterFinder(product); 55 | var map = rec.subClustersHelpers[index][2]; 56 | var cluster = rec.subClusters[index]; 57 | var related = cluster[map[name]].slice(); 58 | return related; 59 | }; 60 | 61 | Analytics.getProductCluster = function(product){ 62 | var index = helpers._productClusterFinder(product); 63 | var cluster = rec.productClusters[index].slice(); 64 | return cluster; 65 | }; 66 | 67 | Analytics.relatedCustomers = function(name){ 68 | var cluster = this.getCustomerCluster(name); 69 | cluster.splice(cluster.indexOf(name), 1); 70 | return cluster; 71 | }; 72 | 73 | Analytics.relatedCustomersByProduct = function(name, product){ 74 | var related = this.getCustomerClusterByProduct(name, product); 75 | related.splice(related.indexOf(name), 1); 76 | return related; 77 | }; 78 | 79 | Analytics.relatedProducts = function(product){ 80 | var cluster = this.getProductCluster(product); 81 | cluster.splice(cluster.indexOf(product), 1); 82 | return cluster; 83 | }; 84 | 85 | Analytics.nearestNeighbors = function(name, num, overflow){ 86 | return helpers._findNearestNeighbors(name, num, 'customers', overflow); 87 | }; 88 | 89 | Analytics.nearestNeighborhoods = function(name, num){ 90 | return helpers._findNearestNeighborhoods(name, num, 'customers'); 91 | }; 92 | 93 | Analytics.nearestProducts = function(name, num, overflow){ 94 | return helpers._findNearestNeighbors(name, num, 'products', overflow); 95 | }; 96 | 97 | Analytics.nearestProductNeighborhoods = function(name, num){ 98 | return helpers._findNearestNeighborhoods(name, num, 'products'); 99 | }; 100 | 101 | module.exports = Analytics; 102 | -------------------------------------------------------------------------------- /lib/clustering.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import products as p 3 | import random as r 4 | import numpy as np 5 | 6 | matrix = [[]] 7 | items = [] 8 | clusterMap = {} 9 | centroidList = [] 10 | itemsMap = {} 11 | 12 | def __init__(mat, it,itmap=p.productsMap): 13 | global matrix 14 | matrix = mat 15 | global items 16 | items = it 17 | global itemsMap 18 | itemsMap = itmap 19 | 20 | # Initial centroids are points within data set. 21 | # Future centroids are mean of cluster 22 | def centroidBuilder(num): 23 | centroids = [] 24 | indexes = range(len(matrix)) 25 | for i in range(0,num): 26 | rand = r.randrange(len(indexes)) 27 | centroids.append(matrix[rand]) 28 | return np.array(centroids) 29 | 30 | def centerPoint(populus): 31 | point = [] 32 | if len(populus) != 0: 33 | if isinstance(populus[0], str): 34 | getPoint = productPoint 35 | else: 36 | getPoint = customerPoint 37 | for i in range(0, len(matrix[0])): 38 | mag = 0.0 39 | for j in range(0, len(populus)): 40 | mag += getPoint(populus, i, j) 41 | vector = mag/len(populus) 42 | point.append(vector) 43 | point = np.array(point) 44 | 45 | else: 46 | point = centroidBuilder(1)[0] 47 | return point 48 | 49 | def customerPoint(populus, i, j): 50 | cmap = c.customersMap[populus[j].name] 51 | mat = matrix[cmap] 52 | return mat[i] 53 | 54 | def productPoint(populus, i, j): 55 | imap = itemsMap[populus[j]] 56 | mat = matrix[imap] 57 | return mat[i] 58 | 59 | def findCenter(vector, centroids, num): 60 | minDist = len(matrix[0]) 61 | center = -1 62 | for i in range(0, num): 63 | data = (vector-centroids[i])**2.0 64 | localDist = np.sum(data)**(1.0/2.0) 65 | if localDist < minDist: 66 | minDist = localDist 67 | center = i 68 | return center 69 | 70 | def clusterizer(centroids, num): 71 | centers = [] 72 | clusters = [] 73 | 74 | for i in range(0,num): 75 | clusters.append([]) 76 | 77 | for i in range(0, len(matrix)): 78 | center = findCenter(matrix[i], centroids, num) 79 | centers.append(center) 80 | 81 | for i in range(0, len(items)): 82 | index = centers[i] 83 | clusters[index].append(items[i]) 84 | 85 | return np.array(clusters) 86 | 87 | def kMeans(num, end=5, centroids=np.array([0]), count=1): 88 | if num > len(matrix): 89 | num = len(matrix)/2 90 | if not centroids.any(): 91 | centroids = centroidBuilder(num) 92 | 93 | clusters = clusterizer(centroids, num) 94 | again = False 95 | if count == end: 96 | return endCluster(clusters, centroids) 97 | 98 | else: 99 | for i in range(0, len(clusters)): 100 | point = centerPoint(clusters[i]) 101 | for j in range(0, len(point)): 102 | if point[j] != centroids[i][j]: 103 | centroids[i] = point 104 | again = True 105 | if again: 106 | clusters = kMeans(num, end, centroids, count+1) 107 | else: 108 | return endCluster(clusters, centroids) 109 | return clusters 110 | 111 | def endCluster(clusters, centroids): 112 | results = [] 113 | cents = [] 114 | for i in range(0,len(clusters)): 115 | if len(clusters[i]) != 0: 116 | results.append(clusters[i]) 117 | cents.append(centroids[i]) 118 | if not isinstance(results[0][0],str): 119 | cleanupCluster(results, centroids) 120 | return [results, cents] 121 | 122 | def cleanupCluster(clust, cent): 123 | global centroidList 124 | centroidList = cent 125 | global clusterMap 126 | clusterMap = {} 127 | for i in range(0, len(clust)): 128 | for j in range(0,len(clust[i])): 129 | clusterMap[clust[i][j].name] = i 130 | -------------------------------------------------------------------------------- /lib/run.py: -------------------------------------------------------------------------------- 1 | import customers as c 2 | import products as p 3 | import recommending as r 4 | import normalize as n 5 | import structure as st 6 | import clustering as cl 7 | 8 | transpose = [] 9 | names = [] 10 | products = [] 11 | 12 | def buildProductClusters(): 13 | global transpose 14 | transpose = c.matrix.transpose() 15 | cl.__init__(transpose, p.products) 16 | catNum = len(p.products)/8 + 1 17 | outputs = cl.kMeans(catNum,8) 18 | return outputs 19 | 20 | def buildCustomerHelpers(): 21 | customerClustersHelpers = st.createClusterHelpers(p.products, c.matrix, p.productsMap) 22 | customerClustersHelpers.append(r.buildRecommendations(names,[customerClustersHelpers])) 23 | return customerClustersHelpers 24 | 25 | def buildSubHelpers(indexMaps, subMatrices, aMaps): 26 | subClustersHelpers = [] 27 | for i in range(0, len(subMatrices)): 28 | subCluster = st.createClusterHelpers(indexMaps[i], subMatrices[i], aMaps[i]) 29 | subCluster.append(r.buildRecommendations(names, [subCluster])) 30 | subClustersHelpers.append(subCluster) 31 | return subClustersHelpers 32 | 33 | def buildPowerHelpers(subClustersHelpers, customerClustersHelpers): 34 | powerClustersHelpers = [] 35 | powerI = [] 36 | powerCount = 0 37 | productClusterLocator = [] 38 | for i in range(0, len(subClustersHelpers)): 39 | if subClustersHelpers[i][5] >= customerClustersHelpers[5]: 40 | powerClustersHelpers.append(subClustersHelpers[i]) 41 | powerI.append(i) 42 | productClusterLocator.append(['power', powerCount]) 43 | powerCount += 1 44 | else: 45 | productClusterLocator.append(['sub', i - powerCount]) 46 | displacement = 0 47 | for i in range(0,len(powerI)): 48 | subClustersHelpers.pop(powerI[i]-displacement) 49 | displacement += 1 50 | 51 | return [powerClustersHelpers, productClusterLocator] 52 | 53 | def run(nameList, matrix): 54 | global products 55 | products = p.products 56 | global names 57 | names = nameList 58 | 59 | outputs = buildProductClusters() 60 | productClusters = outputs[0] 61 | centroids = outputs[1] 62 | 63 | inputs = st.subMatrices(productClusters) 64 | productClusters = n.normalizeProdClusters(productClusters, centroids, inputs[0], inputs[1], 0.2, 0.4) 65 | 66 | inputs = st.subMatrices(productClusters) 67 | subMats = inputs[0] 68 | maps = inputs[1] 69 | indexMap = inputs[2] 70 | 71 | 72 | customerClustersHelpers = buildCustomerHelpers() 73 | subClustersHelpers = buildSubHelpers(indexMap, subMats, maps) 74 | 75 | powerups = buildPowerHelpers(subClustersHelpers,customerClustersHelpers) 76 | powerClustersHelpers = powerups[0] 77 | productClusterLocator = powerups[1] 78 | 79 | powerRecMatrix = r.buildRecommendations(names, powerClustersHelpers) 80 | productClustersMap = st.createClusterMap(productClusters) 81 | 82 | customerMatrix = c.customerMatrixiser() 83 | productMatrix = p.productMatrixiser() 84 | 85 | hasPurchased = matrix 86 | if(len(powerClustersHelpers) == 0): 87 | return 'again' 88 | else: 89 | results = [] 90 | # index 0 91 | results.append(names) 92 | # index 1 93 | results.append(c.customersMap) 94 | # index 2 95 | results.append(productClusters) 96 | # index 3 97 | results.append(p.productsMap) 98 | # index 4 99 | results.append(products) 100 | # index 5 101 | results.append(powerRecMatrix) 102 | # index 6 103 | results.append([customerClustersHelpers]) 104 | # index 7 105 | results.append(subClustersHelpers) 106 | # index 8 107 | results.append(powerClustersHelpers) 108 | # index 9 109 | results.append(c.matrix) 110 | # index 10 111 | results.append(productClustersMap) 112 | # index 11 113 | results.append(productClusterLocator) 114 | # index 12 115 | results.append(customerMatrix) 116 | # index 13 117 | results.append(productMatrix) 118 | # index 14 119 | results.append(hasPurchased) 120 | 121 | return results 122 | -------------------------------------------------------------------------------- /node/helpers.js: -------------------------------------------------------------------------------- 1 | var rec = require('./variables').rec; 2 | var helpers = {}; 3 | 4 | helpers._findNearestNeighborhoods = function(name, num, type){ 5 | var map; 6 | var matrix; 7 | var list; 8 | if(type === 'customers'){ 9 | map = rec.customersMap; 10 | matrix = rec.customerMatrix; 11 | list = rec.customers; 12 | this._nameChecker(name); 13 | } else if(type === 'products'){ 14 | map = rec.productsMap; 15 | matrix = rec.productMatrix; 16 | list = rec.products; 17 | this._productChecker(name); 18 | } else{ 19 | throw new Error('Invalid type. Find neighbors for customers or products.'); 20 | } 21 | this._recVariableChecker(); 22 | num = num || 1; 23 | if(typeof(num) !== 'number' || num%1 !== 0){ 24 | throw new Error('second parameter should be an integer'); 25 | } 26 | var index = map[name]; 27 | var dists = matrix[index]; 28 | var similarity = []; 29 | var results = []; 30 | var ind; 31 | var obj; 32 | 33 | dists.forEach(function(dist, i){ 34 | if(index !== i){ 35 | if(similarity.length < num || dist < similarity[similarity.length-1]){ 36 | ind = this._binarySearch(dist, similarity); 37 | if(similarity[ind] === dist){ 38 | results[ind][dist].push(list[i]); 39 | } else{ 40 | if(dist < similarity[similarity.length-1] && similarity.length >= num){ 41 | similarity.pop(); 42 | results.pop(); 43 | } 44 | similarity.splice(ind, 0, dist); 45 | obj = {}; 46 | obj[dist] = [list[i]]; 47 | results.splice(ind, 0, obj); 48 | } 49 | } 50 | else if (dist === similarity[similarity.length-1]){ 51 | results[similarity.length-1][dist].push(list[i]); 52 | } 53 | } 54 | }.bind(this)); 55 | 56 | return results; 57 | }; 58 | 59 | 60 | helpers._findNearestNeighbors = function(name, num, type, overflow){ 61 | var results = []; 62 | var i; 63 | var neighborhood; 64 | var neighbor; 65 | num = num || 1; 66 | if(overflow === undefined){ 67 | overflow = true; 68 | } 69 | 70 | var neighbors = this._findNearestNeighborhoods(name, num, type); 71 | for(i = 0; i < num; i++){ 72 | if(results.length < num){ 73 | for(var j in neighbors[i]){ 74 | neighborhood = neighbors[i][j]; 75 | for(var k = 0; k < neighborhood.length; k++){ 76 | neighbor = neighborhood[k]; 77 | results.push(neighbor); 78 | } 79 | } 80 | } else { 81 | break; 82 | } 83 | } 84 | 85 | if(!overflow){ 86 | var len = results.length; 87 | for(i = num; i < len; i++){ 88 | results.pop(); 89 | } 90 | } 91 | return results; 92 | }; 93 | 94 | helpers._nameChecker = function(name){ 95 | if(name === undefined || typeof(rec.customersMap[name]) !== 'number'){ 96 | throw new Error('invalid name. name does not exist in the data set.'); 97 | } 98 | }; 99 | 100 | helpers._productChecker = function(product){ 101 | if(product === undefined || typeof(rec.productsMap[product]) !== 'number'){ 102 | throw new Error('invalid product. product does not exist in the data set.'); 103 | } 104 | }; 105 | 106 | helpers._recVariableChecker = function(){ 107 | if (rec.results === 'null'){ 108 | throw new Error('recommendation variables are null. please run setRecVariables'); 109 | } 110 | }; 111 | 112 | helpers._productClusterFinder = function(product){ 113 | this._recVariableChecker(); 114 | this._productChecker(product); 115 | var index = rec.productClustersMap[product]; 116 | return index; 117 | }; 118 | 119 | helpers._binarySearch = function(item, arr, low, high){ 120 | low = low || 0; 121 | high = high || arr.length; 122 | var median = Math.floor((low+high)/2); 123 | if(low === high){ 124 | return high; 125 | } 126 | else if(item < arr[median]){ 127 | if(low === median){ 128 | return low; 129 | } 130 | return this._binarySearch(item, arr, low, median); 131 | } 132 | else if(item > arr[median]){ 133 | if(low === high-1){ 134 | return this._binarySearch(item, arr, low+1, high); 135 | } 136 | return this._binarySearch(item, arr, median, high); 137 | } 138 | else{ 139 | return median; 140 | } 141 | }; 142 | 143 | helpers._extend = function(obj1){ 144 | args = Array.prototype.slice.call(arguments, 1); 145 | args.forEach(function(obj){ 146 | for(var key in obj){ 147 | obj1[key] = obj[key]; 148 | } 149 | }); 150 | }; 151 | 152 | module.exports = helpers; 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Product-Recommender NPM Module 2 | 3 | ## Contents 4 | 5 | [What is Product-Recommender?](#about) 6 | [Setup Process](#setup) 7 | [API](#use) 8 | [Recommendation Engine](#learn) 9 | [Analytics](#analysis) 10 | [Recommendation Variables](#vars) 11 | 12 | ## What is Product-Recommender? 13 | 14 | Product-Recommender is an npm module that utlizes a python machine learning recommendation engine to give easy access to customer purchase recommendations. Product-Recommender exists on the npm registry under the name "product-recommender." This repo will have the up to date version of my python recommendation engine, but if you wish to investigate the earlier development of the code, please check out my python-recommender github repo. To express your opinions on Product-Recommender, please send an email to lramsey177@gmail.com or submit a github issues request. A demo application of Product-Recommender named Product-Demo currently exists on npm. To check out that demo application, install Product-Demo with npm. 15 | 16 | npm install product-demo 17 | 18 | The algorithm in the lib folder uses unsupervised machine learning to find trends within customer purchase data. The algorithm expects three inputs: A list of customer names, a list of products under investigation, and a nested list that mentions how much of each product a customer has purchased. The algorithm divides this product purchase quantity into two states: whether a customer has bought a product or has not bought a product. 19 | 20 | Customers are divided into clusters based on similar purchase histories through a k-means algorithm. From these clusters, the algorithm derives recommendations by comparing an individual customer's buying patterns with the average buying patterns of his or her cluster. In addition, k-means is also used to group together products that tend to be bought together, and more focused customer clusters are constructed based on these product groups. To learn more about the structure of my recommendation engine, please check out my blog post, Anatomy of a Recommendation. 21 | 22 | Using the node.js command line interface, the python recommendation engine can be launched as a child process, with the results streamed to node. These results are divided into various variables based on the type of data they hold, and a user can gain access to all this raw analysis. Or, a user may use helper methods within Product-Recommender to parse out desired information, such as which product to recommend to a consumer. 23 | 24 | ## Setup Process 25 | 26 | To utilize the Product-Recommender NPM module, the first step would be to make sure one has successfully installed node.js, npm, and a python version of >= 2.7 or >= 3.2. To install these items, I would recommend you check out http://nodejs.org/download/ and https://www.python.org/download/. 27 | 28 | In addition to these prerequisites, there are a couple python modules that you will need to install as well. These modules are numpy and jsonpickle. The install process for these modules is fairly simple, often just a few lines in the terminal. For install instructions on numpy, please go to http://www.scipy.org/install.html. For jsonpickle, please look at http://jsonpickle.github.io/#download-install. Some more python modules used in this project are argparse, ast, math, and random, though these should be included in the Python Standard Library so there is likely no need to download these. 29 | 30 | Once all of these dependencies are installed, adding the product-recommender module to your node project is as simple as navigating to your project directory in the terminal and typing 'npm install product-recommender'. 31 | 32 | ## API 33 | 34 | Product-Recommender consists of three core parts, Recommendation Engine, Analytics, and Recommendation Variables. In the Recommendation Engine section, the machine learning algorithm is run. The results from this algorithm are then parsed and saved in the recommendation variables. In the Analytics section, methods are provided that use the recommendation variables to produce desired outcomes. The Analytics methods do such things as finding groups of similar customer or producing a product recommendation for a customer. In the Recommendation Variable section, I give you access to the Recommendation variables used by my Analytics methods, so you can use them however you see fit. 35 | 36 | When using product-recommender though, the first step is to make sure you have required the module on the page you are using.As an example, for the rest of the readme project I will use the rec variable to represent my required module. 37 | 38 | var rec = require('product-recommender') 39 | 40 | The three sections are listed below. Pleas use these links to help in your navigation. 41 | 42 | [Recommendation Engine](#learn) 43 | [Analytics](#analysis) 44 | [Recommendation Variables](#vars) 45 | 46 | ## 1. Recommendation Engine 47 | 48 | This section consists of the setRecVariables method, which invokes the python recommendation engine. When the algorithm has finished streaming its results to node, the setRecVariables method then parses these results and assigns values to each of the recommendation variables. Whenever one wants a fresh reading of the recommendation engine's analysis, one simply needs to run setRecVariables method again and the recommendation variables will be set to the values of the latest analysis. 49 | 50 | The setRecVariables method takes four parameters. Any additional arguments will be added as arguments to the callback parameter. 51 | 52 | rec.setRecVariables(history, callback, names, products) 53 | 54 | **history** 55 | 56 | The history argument contains either a matrix or a string that references the directory structure of a file containing a matrix. A matrix is a 2d-array. Each entry in the outer array contains the purchase history of an individual customer. The length of the outer array is the same length as the names array below, so each customer in the data set is represented in the history parameter. The length of each inner array is the length of the products array below. Each inner array contains information on which product from the products list each customer has purchased. The order of products in the inner array should match the order in the products array, so each column of the 2-D array refers to a particular product. 57 | 58 | var history = [ [1,0,0], [1,1,1], [0,1,1] ] 59 | 60 | If the history parameter contains a file directory instead, the matrix contained within that file will be read directly into the Python algorithm. This is necessary for data analysis on larger matrices, but could be done for matrices of any size. 61 | 62 | var history = '/some/valid/directory/matrix.txt' 63 | 64 | **callback** 65 | 66 | The callback parameter consists of a custom callback function that will run after my product recommendation engine has finished streaming its results to node.js. The algorithm runs asynchronously, so properly putting your continuing product-recommender logic inside a callback function is essential to using product-recommender. To add arguments to the callback function, include more than 4 parameters for setRecVariables. Any argument beyond the fourth named parameter in the rec.setRecVariables method will be added as arguments to the callback function. 67 | 68 | var callback = function(){ results = rec.getRecVariables('results')) } 69 | 70 | **names** 71 | 72 | The names argument consists of an array of customer names/unique identifiers. Each element in this array should be a unique string. 73 | 74 | If a names argument is not used, the parameter will default to a number n which is the length of the outer array in the history paremeter nested array. A names array of length n will be created and filled with random unique name strings for each index in the array. 75 | 76 | var names = [ 'Steve', 'Henry', 'Thea', 'Patrick' ] 77 | 78 | **products** 79 | 80 | The products argument consists of an array of product names/unique identifiers. Like the names parameter, each element in this array should be a unique string. 81 | 82 | If a products argument is not used, the parameter will default to a number n which is the same length of any of the inner arrays in the history parameter nested array. A products array of length n will be created and filled with random unique product strings for each index in the array. 83 | 84 | var products = [ 'shoes', 'socks', 'shirts', 'shorts' ] 85 | 86 | [API](#use) 87 | 88 | ## 2. Analytics 89 | 90 | This grouping consists of methods that are designed to analyze the data held in the recommendation variables to produce a desired result. These methods accomplish such goals as determining which cluster is most relevant to a customer's purchase of a particular product or producing a product recommendation for a customer based on recent buying patterns. Now I will describe the analytics functions. 91 | 92 | **recommender(customer, recMatrix)** 93 | 94 | This method accepts inputs of a customer string from the customer array and the optional input of a recommendation matrix. The method considers the input customer's row from the recommendation matrix and pops off the product that my algorithm has determined is the strongest recommendation in that group. The recommended product is returned. 95 | 96 | The recommendation matrix parameter allows the more custom recommendation matrices built around buying trends in related product groups to be used instead of the default matrix. If a user buys a pair of shoes, he may have a certain taste in products similar to shoes that is different than his taste in other areas. By placing the recommendation matrix concerned with the shoe products in the recommender method, the recommendation can be fine tuned based on the reality that the user recently bought shoes. With this custom matrix parameter, one can choose which product group of interest one wants the recommendation tailored towards. 97 | 98 | rec.recommender('Steve', matrix) 99 | 100 | **recommendByProduct(customer, product)** 101 | 102 | This method takes a customer string and a product string as parameters. The method then determines which product group the product belongs to, and accesses the proper recommendation matrix based on that product group. This custom recommendation matrix is then used to return a product more focused on the customers buying patterns in relation the input product. 103 | 104 | rec.recommendByProduct('Steve', 'shoes') 105 | 106 | **powerRecommendation(customer)** 107 | 108 | This method creates a recommendation for a particular customer, based on the rec.powerRecMatrix recommendation variable. This matrix only contains results from clusters with a higher silhouette score than customerClusters. The matrix weighs the best results from each power cluster, and ranks them. This way, upon calling powerRecommendation, the highest perfoming result across all the power clusters is returned. 109 | 110 | rec.powerRecommendation('Steve') 111 | 112 | **pastCustomerRecommendations(customer)** 113 | 114 | This method returns an object that reveals which products have been recommended to customers through the different recommendation analytics methods. To check if a product has been recommended, use the product's name as a key on the object. If the value returned for that product key is true, the product has been recommended to listed customer. If the value returned is undefined, the product has not been recommended yet to that customer. 115 | 116 | rec.pastCustomerRecommendatons('Steve') 117 | 118 | **getCustomerCluster(customer)** 119 | 120 | This method accepts a customer string as a parameter and returns an array of the global customer cluster that contains that input customer. These customers in the returned cluster all have similar buying patterns across all products. 121 | 122 | rec.getCustomerCluster('Steve') 123 | 124 | **getCustomerClusterByProducts(customer, product)** 125 | 126 | This method accepts a customer string and product string as parameters and returns a cluster that is chosen based on the input customer's buying trends in relation to products similar to the input product. This cluster is an array that contains customer names, including the input customer. 127 | 128 | rec.getCustomerClustersByProducts('Steve', 'shoes') 129 | 130 | **getProductCluster(product)** 131 | 132 | This method accepts a product string as a parameter and returns an array of products similar to the input product, including the input product. This returned array is the product cluster that contains the input product. 133 | 134 | rec.getProductCluster('shoes') 135 | 136 | **relatedCustomers(customer)** 137 | 138 | This method accepts a customer string as a parameter and returns an array of customers with similar purchase histories based on clustering. This result returns the members of that customer's global cluster group, excluding the input customer. To investigate more focused groups based on certain product patterns, please use the the relatedCustomersByProduct method. 139 | 140 | rec.relatedCustomers('Steve') 141 | 142 | **relatedCustomersByProduct(customer, product)** 143 | 144 | This method takes a customer string and product string as a parameter. Based on the input product, a cluster is selected that reflects the customer's taste in relation to that product. The method returns members of that cluster, excluding the input customer. These customers have similar buying patterns with the input customer in terms of products similar to the product parameter. 145 | 146 | rec.relatedCustomersByProduct('Steve', 'shoes') 147 | 148 | **relatedProducts(product)** 149 | 150 | This method takes a product string as a parameter. The method returns an array of products my algorithm has judged to be similar based on the aggregate purchase history of my customers. The returned products are in the same product cluster as the input product, excluding the input product. 151 | 152 | rec.relatedProducts('shoes') 153 | 154 | **nearestNeighbors(customer, num, overflow)** 155 | 156 | This method takes a customer string as a parameter, and contains an optional num parameter that will default to 1 if it is not included as an argument. The optional overflow parameter defaults to true. The method reveals which customers are the lowest distance away from the listed customer. An array is returned that contains at least the num closest customers to the listed customer. The array is ordered, so customers at a lower index are either closer or the same distance away as customers at a higher index. The array may be greater than length num if when index num has been reached there is a tie to who is the closest customer. If the overflow parameter is set to false, then the array will be limited to length num, even in the case of ties. 157 | 158 | rec.nearestNeighbors('Steve', 4, false) 159 | 160 | **nearestNeighborhoods(customer, num)** 161 | 162 | This method takes a customer string as a parameter, and takes an optional num parameter which will default to 1 if not included. This method returns an ordered array that shows the num closest relative distances different customers are from the listed customer, along with the customers who are that distance away. The customerMatrix is used to find this relative distance. Each entry in the returned array is an object, with a single key referring to a distance. The corresponding value for that distance key is an array filled with all customers who are that distance away from the listed customer. 163 | 164 | rec.nearestNeighborhoops('Steve', 4) 165 | 166 | **nearestProducts(product, num, overflow)** 167 | 168 | This method takes a product string as well as optional nums and overflow paremeters. The method behaves similarly to nearestNeighbors, but instead of finding the closest customers to a particular customers, it finds the products most similar to the listed product. 169 | 170 | rec.nearestProducts('shoes', 2) 171 | 172 | **nearestProductNeighborhoods(product, num)** 173 | 174 | This method takes a product string and an optional num parameter. This method behaves similarly to the nearestNeighborhoods method, but instead of searching for the closest customers to a listed customer, it looks for the closest products to a listed product. 175 | 176 | rec.nearestProductNeighborhoods('shoes', 2) 177 | 178 | [API](#use) 179 | 180 | ## 3. Recommendation Variables 181 | 182 | The recommendation variables hold the fine-grained results from my product recommendation algorithm. These results can be accessed overall by the results variable, or can be broken into various categories. To access a recommendation variable, one would call the getRecVariable method, passing the desired variable name in as a key. 183 | 184 | rec.getRecVariable(key); 185 | 186 | To receive an array containing all of the recommendation keys, one can call the getRecKeys() method. 187 | 188 | rec.getRecKeys(); 189 | 190 | To manually set a value on a recommendation variable, use the loadRecVariable() method. This method takes two parameterss, a string that signifies the name of the recommendation variable to change, and a value which will be set to that recommendation variable. Any value entered here to a recommendation variable could cause errors in the module if it is not the same format the recommendation variable typically would be after executing setRecVariables. Also, altering some recommendation variables but not others could lead to outcomes that are not consistent to one set of the recommendation engine's analysis. Could be used to load past results from the recommendation engine. Use this method with caution. 191 | 192 | rec.loadRecVariable(key, value) 193 | 194 | To manually set several values on a recommendation variable, use the loadRecVariables method. The first parameter is an array of strings filled with the name of recommendation variables to change. The second parameter is an array of values which to assign to the previous mentioned recommendation variables. The value that is in the same index as the recommendation variable name in the key array will be assigned to that recommendation variable. If the key and value arrays are of different lengths, the code will throw an error. Could be used to load past results from the recommendation engine. Again, use with caution. 195 | 196 | rec.loadRecVariables(keys, values) 197 | 198 | Initially the recommendation variables will be set to null, until a call is made to launch the python recommendation engine. Now, I will describe the recommendation variables. 199 | 200 | **results** 201 | 202 | An array containing all of the below recommendation variables except for rawResults. 203 | 204 | **rawResults** 205 | 206 | An array containing the raw results sent from my product recommendation algorithm. 207 | 208 | **customers** 209 | 210 | An array containing the name string of every customer. The order of customers in this array matches the customer order in the succeeding matrices. 211 | 212 | **products** 213 | 214 | An array containing the name string of every product. The order of products in this array matches the product order in the history matrix. 215 | 216 | **purchaseHistory** 217 | 218 | A nested array containing the raw purchase history nested array passed in as a parameter to the setRecVariables method. Each index in the outer array refers to a customer, and each index in the inner array refers to how much of a particular product a customer bought. 219 | 220 | **hasPurchased** 221 | 222 | A nested array containing a normalized version of the history array passed in as a parameter to the setRecVariables method. For every integer greater than 1 in the nested array, that value will be set to one. This matrix serves as a boolean indicator on whether a customer has bought a product or not. This matrix is used in my product recommendaton analysis, whereas the history matrix is not. 223 | 224 | **customersMap** 225 | 226 | An object that contains each customer's string name as keys, with corresponding values referring to which index that string is stored in the customers array and various matrices. 227 | 228 | **productsMap** 229 | 230 | An object that contains each product's string name as keys, with corresponding values referring to which index that string is stored in the products array and history matrix. 231 | 232 | **productClusters** 233 | 234 | A nested array structure. Each array refers to a grouping of products that the product-recommendation algorithm has determined are similar based on aggregate customer buying patterns. 235 | 236 | **productClustersMap** 237 | 238 | An object that takes a product as a key. The corresponding value would be the index one can use in the productClusters array to find the group of products most similar to the input product. 239 | 240 | **customerMatrix** 241 | 242 | A nested array that describes how relatively far away each of the customers are from each other. Each row shows how close a particular customer is to all the other customers. Each index in the inner array compares how close that customer is to the customer referenced in the outer array index. A value of -1 will occur if the inner array customer and outer array customer are the same. A value of 0 will be assigned to the inner index customer closest to the outer index customer. A value of 1 wil be assigned to the outer customer farthest from the inner customer. All other customers will be scaled between 0 and 1 based on their relative distance. 243 | 244 | **productMatrix** 245 | 246 | Like the customerMatrix above, except here the nested array structure concerns itself with how far away the products are from each other. 247 | 248 | **customerClusters** 249 | 250 | A nested array structure. Each array refers to a grouping of customers based on similar purchase trends based on the total product set. 251 | 252 | **recommendationMatrix** 253 | 254 | A nested array that organizes customer recommendations based on the groupings listed in the customerClusters variable. These recommendations are ordered, so the last product in each inner array is the product my algorithm has determined is the best product to recommend based on the customerClusters. Also, these products contain metadata 255 | 256 | **customerClusterHelpers** 257 | 258 | An array of seven useful tools in relation to the customerClusters. I will describe each element by its index in the array. 259 | 260 | Index 0 contains the customerClusters. 261 | 262 | Index 1 contains an array of centroid values for each cluster, where centroid 263 | refers to the average purchase trends of every customer within a group. 264 | 265 | Index 2 contains a clusterMap object that contains a key of customer names and a corresponding number referring to which cluster that customer is in. That number refers to the index of a particular cluster in the customerClusters array. 266 | 267 | Index 3 contains an indexMap object that contains a key of a customer name and a corresponding number that refers to what index inside that customer's cluster the customer name currently lies. 268 | 269 | Index 4 contains an array of silhouettes. A silhouette is a number between 0 and 1 that represents how much closer members of a cluster are with their own cluster center in comparison to the cluster center of the next closest cluster. The closer the silhouette is to 1, the stronger the clusters. Each silhouette score refers to a grouping in the customerClusters array, and the order is the same as the order in that array. 270 | 271 | Index 5 contains an average of all the cluster silhouettes in index 4. This serves as a rough indicator of the strength of all the clustering in customerClusters. 272 | 273 | Index 6 contains the recommendationMatrix that is built up based on the customerClusters. This presents ordered product recommendations for each customer, with the last element of each customer array referring to the product my algorithm most highly recommends based on the customerCluster. 274 | 275 | **subClusters** 276 | 277 | An array containing all the more focused customer clustering that is determined by the product groupings found in productClusters. Customers in these subClusters are grouped with other customers based on their similar buying patterns in relation to these smaller product groups. 278 | 279 | **subClustersHelpers** 280 | 281 | An array containing a series of 7 element arrays similar to the customerClusterHelpers, but with various subClusters replacing the customerClusters. Each element in the subClusters array will have its own subClusterHelper. 282 | 283 | **powerClusters** 284 | 285 | An array of that contains elements from the subClusters, but has removed subClusters with a relatively low average silhouette score. 286 | 287 | **powerClustersHelpers** 288 | 289 | Similar to subClusterHelpers, but containing elements from the powerClusters array. 290 | 291 | **powerRecMatrix** 292 | 293 | A recommendation matrix built by compiling together the results from the powerClusters. Only recommendation clusters that have a silhouette score higher than than the score associated with customerClusters are included. The strongest elements from each type of cluster are weighted by silhouette scores and ordered by recommendation strength. 294 | 295 | **pastRecommendations** 296 | 297 | An object that contains a key for each customer in the dataset. Each customer key has a corresponding value that is an object. The inner object contains keys with a true for each product which has been recommended to the designated customer. 298 | 299 | 300 | [API](#use) 301 | [Contents](#contents) 302 | --------------------------------------------------------------------------------