├── .gitignore ├── .settings ├── org.eclipse.m2e.core.prefs └── org.eclipse.jdt.core.prefs ├── src ├── main │ ├── resources │ │ └── log4j.properties │ └── java │ │ └── com │ │ └── aerospike │ │ └── recommendation │ │ └── rest │ │ ├── NoMoviesFound.java │ │ ├── CustomerNotFound.java │ │ ├── JSONRecord.java │ │ ├── AerospikeRecommendationService.java │ │ └── RESTController.java └── test │ └── java │ └── com │ └── aerospike │ └── recommendation │ └── rest │ └── RecommendTest.java ├── .project ├── .classpath ├── pom.xml └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 3 | org.eclipse.jdt.core.compiler.compliance=1.6 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.6 6 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.logger.com.aerospike.recommendation=debug, CONSOLE 2 | 3 | # CONSOLE is set to be a ConsoleAppender using a PatternLayout. 4 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 5 | log4j.appender.CONSOLE.Threshold=TRACE 6 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.CONSOLE.layout.ConversionPattern=%r %-5p %c{1} %x - %m%n 8 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/recommendation/rest/NoMoviesFound.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.recommendation.rest; 2 | 3 | public class NoMoviesFound extends RuntimeException { 4 | /** 5 | * 6 | */ 7 | private static final long serialVersionUID = -4277831144559006904L; 8 | private String customerID; 9 | 10 | public NoMoviesFound() { 11 | super(); 12 | } 13 | 14 | 15 | public NoMoviesFound(String user) { 16 | super("No movies found for customer: " + user); 17 | this.customerID = user; 18 | } 19 | 20 | 21 | public String getCustomerID() { 22 | return customerID; 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/recommendation/rest/CustomerNotFound.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.recommendation.rest; 2 | 3 | public class CustomerNotFound extends RuntimeException { 4 | /** 5 | * 6 | */ 7 | private static final long serialVersionUID = -2689850285266341615L; 8 | private String customerID; 9 | 10 | public CustomerNotFound() { 11 | super(); 12 | } 13 | 14 | 15 | public CustomerNotFound(String user) { 16 | super("Customer not found: " + user); 17 | this.customerID = user; 18 | } 19 | 20 | 21 | public String getCustomerID() { 22 | return customerID; 23 | } 24 | 25 | 26 | } 27 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | aerospike-recommendation-example 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/recommendation/rest/JSONRecord.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.recommendation.rest; 2 | 3 | import java.util.Map; 4 | 5 | import org.json.simple.JSONArray; 6 | import org.json.simple.JSONObject; 7 | 8 | import com.aerospike.client.Record; 9 | import com.aerospike.client.query.ResultSet; 10 | 11 | /** 12 | * JSONRecord is used to convert an Aerospike Record 13 | * returned from the cluster to JSON format 14 | * @author peter 15 | * 16 | */ 17 | @SuppressWarnings("serial") 18 | public class JSONRecord extends JSONObject { 19 | @SuppressWarnings("unchecked") 20 | public JSONRecord(Record record){ 21 | put("generation", record.generation); 22 | put("expiration", record.expiration); 23 | put("bins", new JSONObject(record.bins)); 24 | if (record.duplicates != null){ 25 | JSONArray duplicates = new JSONArray(); 26 | for (Map duplicate : record.duplicates){ 27 | duplicates.add(new JSONObject(duplicate)); 28 | } 29 | put("duplicates", duplicates); 30 | } 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/aerospike/recommendation/rest/RecommendTest.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.recommendation.rest; 2 | 3 | import java.util.Properties; 4 | 5 | import org.junit.After; 6 | import org.junit.Assert; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | import org.springframework.boot.SpringApplication; 10 | import org.springframework.context.ApplicationContext; 11 | 12 | import com.aerospike.client.AerospikeClient; 13 | 14 | public class RecommendTest { 15 | 16 | @Before 17 | public void setUp() throws Exception { 18 | } 19 | 20 | @After 21 | public void tearDown() throws Exception { 22 | } 23 | 24 | @Test 25 | public void test() throws Exception{ 26 | // set properties 27 | Properties as = System.getProperties(); 28 | as.put("seedHost", "192.168.51.199"); 29 | as.put("port", "3000"); 30 | as.put("namespace", "test"); 31 | // start app 32 | ApplicationContext appCon = SpringApplication.run(AerospikeRecommendationService.class, new String[0]); 33 | appCon.getBean(AerospikeRecommendationService.class); 34 | RESTController controller = appCon.getBean(RESTController.class); 35 | 36 | controller.getRecommendationFor("15836679"); 37 | //controller.getRecommendationFor("15089729"); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/recommendation/rest/AerospikeRecommendationService.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.recommendation.rest; 2 | 3 | import java.util.Properties; 4 | 5 | import javax.servlet.MultipartConfigElement; 6 | 7 | import org.apache.commons.cli.CommandLine; 8 | import org.apache.commons.cli.CommandLineParser; 9 | import org.apache.commons.cli.Options; 10 | import org.apache.commons.cli.ParseException; 11 | import org.apache.commons.cli.PosixParser; 12 | import org.springframework.boot.SpringApplication; 13 | import org.springframework.boot.autoconfigure.EnableAutoConfiguration; 14 | import org.springframework.context.annotation.Bean; 15 | import org.springframework.context.annotation.ComponentScan; 16 | import org.springframework.context.annotation.Configuration; 17 | 18 | import com.aerospike.client.AerospikeClient; 19 | import com.aerospike.client.AerospikeException; 20 | 21 | @Configuration 22 | @EnableAutoConfiguration 23 | @ComponentScan 24 | public class AerospikeRecommendationService { 25 | 26 | @Bean 27 | public AerospikeClient asClient() throws AerospikeException { 28 | Properties as = System.getProperties(); 29 | return new AerospikeClient(as.getProperty("seedHost"), Integer.parseInt(as.getProperty("port"))); 30 | } 31 | @Bean 32 | public MultipartConfigElement multipartConfigElement() { 33 | return new MultipartConfigElement(""); 34 | } 35 | 36 | 37 | 38 | public static void main(String[] args) throws ParseException { 39 | 40 | Options options = new Options(); 41 | options.addOption("h", "host", true, "Aerospike server hostname (default: localhost)"); 42 | options.addOption("p", "port", true, "Aerospike server port (default: 3000)"); 43 | options.addOption("n", "namespace", true, "Aerospike namespace (default: test)"); 44 | 45 | // parse the command line args 46 | CommandLineParser parser = new PosixParser(); 47 | CommandLine cl = parser.parse(options, args, false); 48 | 49 | // set properties 50 | Properties as = System.getProperties(); 51 | String host = cl.getOptionValue("h", "localhost"); 52 | as.put("seedHost", host); 53 | String portString = cl.getOptionValue("p", "3000"); 54 | as.put("port", portString); 55 | String nameSpace = cl.getOptionValue("n", "test"); 56 | as.put("namespace", nameSpace); 57 | 58 | // start app 59 | SpringApplication.run(AerospikeRecommendationService.class, args); 60 | 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.aerospike 5 | aerospike-recommendation-example 6 | 1.0.0 7 | Simple Recommendation Engine 8 | This is a simple recommendation engine using a RESTful web service using Spring Boot and Aerospike 9 | 10 | org.springframework.boot 11 | spring-boot-starter-parent 12 | 0.5.0.M4 13 | 14 | 15 | 16 | 17 | org.springframework.boot 18 | spring-boot-starter-web 19 | 20 | 21 | org.springframework.boot 22 | spring-boot-starter-actuator 23 | 24 | 25 | 26 | com.aerospike 27 | aerospike-client 28 | 3.0.20 29 | 30 | 31 | 32 | commons-cli 33 | commons-cli 34 | 1.2 35 | 36 | 37 | 38 | log4j 39 | log4j 40 | 1.2.14 41 | 42 | 43 | com.googlecode.json-simple 44 | json-simple 45 | 1.1.1 46 | 47 | 48 | 49 | 50 | com.aerospike.recommendation.rest.AerospikeRecommendationService 51 | 52 | 53 | 54 | 55 | 56 | maven-compiler-plugin 57 | 2.3.2 58 | 59 | 60 | org.springframework.boot 61 | spring-boot-maven-plugin 62 | 63 | 64 | 65 | 66 | 67 | 68 | spring-snapshots 69 | Spring Snapshots 70 | http://repo.spring.io/libs-snapshot 71 | 72 | true 73 | 74 | 75 | 76 | 77 | 78 | spring-snapshots 79 | Spring Snapshots 80 | http://repo.spring.io/libs-snapshot 81 | 82 | true 83 | 84 | 85 | 86 | 87 | Aerospike Inc 88 | Http://www.aerospike.com 89 | 90 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | #Building a simple recommendation engine using RESTful Web Service, Spring Boot and Aerospike 2 | --------------------------------------------------------------------------------------------- 3 | Recommendation engines are used in applications to personalize the user experience. For example, eCommerce applications recommend products to a customer that other customers, with similar profiles, have viewed or purchased. 4 | 5 | Spring Boot is a powerful jump-start into Spring. It allows you to build powerful applications with production grade services with little effort on your part. 6 | 7 | Aerospike is a high available, low latency NoSQL database that scales linearly. It is an in-memory database optimized to use both DRAM and native Flash. Aerospike boasts latencies of 1 to 3 ms consistently across throughput loads on a correctly sized cluster. Aerospike also has high reliability and is ACID compliant. Their oldest customer has many terabytes of data and has never been offline, even during Hurricane Sandy in New York City. 8 | 9 | ##What you will build 10 | This guide will take you through creating a simple recommendation engine. This engine will use Similarity Vectors to recommend products to a user. 11 | The algorithm for this is quite easy, but you do require a lot of data to make it work. 12 | 13 | Time is of the essence. Your application may be a mobile app, a web application or a Real Time Bidding application for online advertising. 14 | In each case you will need to go to the database and retrieve your data within 2-5ms so your application can respond within 50ms with a recommendation. You could try this any database, but Aerospike is very fast ( 1- 5ms latency) and the latency remains flat as the transaction rate grows. 15 | 16 | You will build a simple recommendation RESTful web service with Spring Boot and Aerospike. 17 | The recommendation service accepts an HTTP GET request: 18 | 19 | http://localhost:8080/recommendation/{customer} 20 | 21 | It responds with the following JSON array of recomendations: 22 | 23 | [{"expiration":130019315,"bins":{"title":"Classic Albums: Meat Loaf: Bat Out of Hell","yearOfRelease":"1999"},"generation":4},{"expiration":130019337,"bins":{"title":"Rudolph the Red-Nosed Reindeer","yearOfRelease":"1964"},"generation":4},{"expiration":130019338,"bins":{"title":"The Bad and the Beautiful","yearOfRelease":"1952"},"generation":4},{"expiration":130019384,"bins":{"title":"Jingle All the Way","yearOfRelease":"1996"},"generation":4},{"expiration":130019386,"bins":{"title":"The Killing","yearOfRelease":"1956"},"generation":4},{"expiration":130019400,"bins":{"title":"Silkwood","yearOfRelease":"1983"},"generation":4},{"expiration":130019404,"bins":{"title":"Chain of Command","yearOfRelease":"2000"},"generation":4}] 24 | 25 | There are also many features added to your application out-of-the-box for managing the service in a production (or other) environment. 26 | 27 | ##Algorithm 28 | People act on products. People view products, kick the tires, bounce on the bed, etc; and sometimes this leads to a purchase. So there two actions people have with products: 29 | * View 30 | * Purchase 31 | An individual person will have a history of Views and Purchases. 32 | 33 | A simple recommendation algorithm is to find another user who is similar and recommend products that the other user has viewed/purchased to this user. It is a good idea to eliminate the duplicates so that this user is only recommended products that they have not seen. 34 | 35 | How do you do this? You need to maintain a history of a user’s Views and Purchases, e.g: 36 | 37 | ####Movie Customers 38 | |customerId|watched| 39 | |----|---------| 40 | |893988|List(Map("movie-id"->"1", "rating"->3, "customer-id"->"893988", "date"->"2005-11-17"), ...| 41 | |712664|List(Map("movie-id"->"3", "rating"->5, "customer-id"->"712664", "date"->"2004-02-01"), ...| 42 | 43 | You also maintain a list of who purchased a product e.g. 44 | 45 | ####Movie Titles 46 | |movieId|yearOfRelease|title|watchBy| 47 | |-------|-------------|-----|-------| 48 | |89|2000|Chain of Command|List(Map("movie-id"->"89", "rating"->2, "customer-id"->"712664", "date"->"2001-08-02"), ...| 49 | |83|1983|Silkwood|List(Map("movie-id"->"83", "rating"->3, "customer-id"->"716091", "date"->"2000-01-08"), ...| 50 | |78|1996|Jingle All the Way|List(Map("movie-id"->"78", "rating"->3, "customer-id"->"1943087", "date"->"2001-09-14"), ...| 51 | 52 | From this data, you can see that Jane Doe and John Smith have similar purchase histories, but Albert Citizen does not. 53 | 54 | If Jane Doe uses your application, you could recommend to her the same things that John Smith purchased, minus the products that are common to both John and Jane. You may also prioritize which products to recommend based on a category (a similarity weight) i.e. the “dog” related products may have more relevance to Jane than the Bose Headset. 55 | 56 | ###How do you find similarity? 57 | Similarity can be found using several algorithms, e.g. Cosine Similarity. In this example, you will use a very simple algorithm using a simple score. 58 | 59 | ###Scenario 60 | 1. Jane Doe accesses the application 61 | 2. Retrieve Jane’s cusromer profile 62 | 3. Retrieve the movie Profile for each of Jane’s views, this can be a batch operation in Aerospike that retrieves a list of records in one lump 63 | 4. For each product 64 | a. Retrieve the customer profile 65 | b. See if this profile is similar to Jane’s by giving it a score (using Cosine similarity) 66 | 5. Using the customers profile with the highest similarity score, recommend the products in this user profile to Jane. 67 | 68 | ##How to build 69 | It is easy to build a single runnable Jar with Maven 70 | 71 | mvn package 72 | 73 | ##Loading the test data 74 | Aerospike has the ability to backup and restore the data in an entire cluster. The test data for this application is stored as am Aerospike backup file. 75 | 76 | Download the file at https://drive.google.com/a/aerospike.com/folderview?id=0B8luCpttpeaAVWZkYl85a2ktaXc&usp=sharing 77 | and restore it to your Aerospike cluster using the following command: 78 | 79 | asrestore -h 127.0.0.1 -p 3000 -d 80 | 81 | 82 | ##Running the package 83 | The package is a RESTful service using Spring Boot, packaged in a runnable jar 84 | 85 | java -jar aerospike-recommendation-example-.jar 86 | 87 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/recommendation/rest/RESTController.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.recommendation.rest; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | import java.util.Properties; 7 | 8 | import org.apache.log4j.Logger; 9 | import org.json.simple.JSONArray; 10 | import org.springframework.beans.factory.annotation.Autowired; 11 | import org.springframework.stereotype.Controller; 12 | import org.springframework.web.bind.annotation.PathVariable; 13 | import org.springframework.web.bind.annotation.RequestMapping; 14 | import org.springframework.web.bind.annotation.RequestMethod; 15 | import org.springframework.web.bind.annotation.ResponseBody; 16 | 17 | import com.aerospike.client.AerospikeClient; 18 | import com.aerospike.client.AerospikeException; 19 | import com.aerospike.client.Key; 20 | import com.aerospike.client.Record; 21 | import com.aerospike.client.policy.Policy; 22 | 23 | @Controller 24 | public class RESTController { 25 | private static final int MOVIE_REVIEW_LIMIT = 20; 26 | public static final String NAME_SPACE = "test"; 27 | public static final String PRODUCT_SET = "MOVIE_TITLES"; 28 | public static final String USERS_SET = "MOVIE_CUSTOMERS"; 29 | 30 | public static final String DATE = "date"; 31 | public static final String RATING = "rating"; 32 | public static final String CUSTOMER_ID = "customer-id"; 33 | public static final String MOVIE_ID = "movie-id"; 34 | public static final String WATCHED_BY = "watchedBy"; 35 | public static final String TITLE = "title"; 36 | public static final String YEAR_OF_RELEASE = "yearOfRelease"; 37 | public static final String CUSTOMER_WATCHED = "watched"; 38 | private static Logger log = Logger.getLogger(RESTController.class); 39 | @Autowired 40 | AerospikeClient client; 41 | 42 | static final String nameSpace; 43 | static { 44 | Properties as = System.getProperties(); 45 | nameSpace = (String) as.get("namespace"); 46 | } 47 | /** 48 | * get a recommendation for a specific customer 49 | * @param user a unique ID for a customer 50 | * @return 51 | * @throws Exception 52 | */ 53 | @SuppressWarnings("unchecked") 54 | @RequestMapping(value="/recommendation/{customer}", method=RequestMethod.GET) 55 | public @ResponseBody JSONArray getRecommendationFor(@PathVariable("customer") String customerID) throws Exception { 56 | log.debug("Finding recomendations for " + customerID); 57 | Policy policy = new Policy(); 58 | 59 | /* 60 | * Get the customer's purchase history as a list of ratings 61 | */ 62 | Record thisUser = null; 63 | try{ 64 | thisUser = client.get(policy, new Key(NAME_SPACE, USERS_SET, customerID)); 65 | if (thisUser == null){ 66 | log.debug("Could not find user: " + customerID ); 67 | throw new CustomerNotFound(customerID); 68 | } 69 | } catch (AerospikeException e){ 70 | log.debug("Could not find user: " + customerID ); 71 | throw new CustomerNotFound(customerID); 72 | } 73 | /* 74 | * get the movies watched and rated 75 | */ 76 | List> customerWatched = (List>) thisUser.getValue(CUSTOMER_WATCHED); 77 | if (customerWatched == null || customerWatched.size()==0){ 78 | // customer Hasen't Watched anything 79 | log.debug("No movies found for customer: " + customerID ); 80 | throw new NoMoviesFound(customerID); 81 | } 82 | 83 | /* 84 | * build a vector list of movies watched 85 | */ 86 | List thisCustomerMovieVector = makeVector(customerWatched); 87 | 88 | 89 | Record bestMatchedCustomer = null; 90 | double bestScore = 0; 91 | /* 92 | * for each movie this customer watched, iterate 93 | * through the other customers that also watched 94 | * the movie 95 | */ 96 | for (Map wr : customerWatched){ 97 | Key movieKey = new Key(NAME_SPACE, PRODUCT_SET, (String) wr.get(MOVIE_ID) ); 98 | Record movieRecord = client.get(policy, movieKey); 99 | 100 | List> whoWatched = (List>) movieRecord.getValue(WATCHED_BY); 101 | 102 | if (!(whoWatched == null)){ 103 | int end = Math.min(MOVIE_REVIEW_LIMIT, whoWatched.size()); 104 | /* 105 | * Some movies are watched by >100k customers, only look at the last n movies, or the 106 | * number of customers, whichever is smaller 107 | */ 108 | for (int index = 0; index < end; index++){ 109 | Map watchedBy = whoWatched.get(index); 110 | String similarCustomerId = (String) watchedBy.get(CUSTOMER_ID); 111 | if (!similarCustomerId.equals(customerID)) { 112 | // find user with the highest similarity 113 | 114 | Record similarCustomer = client.get(policy, new Key(NAME_SPACE, USERS_SET, similarCustomerId)); 115 | 116 | List> similarCustomerWatched = (List>) similarCustomer.getValue(CUSTOMER_WATCHED); 117 | double score = easySimilarity(thisCustomerMovieVector, similarCustomerWatched); 118 | if (score > bestScore){ 119 | bestScore = score; 120 | bestMatchedCustomer = similarCustomer; 121 | } 122 | } 123 | } 124 | } 125 | } 126 | log.debug("Best customer: " + bestMatchedCustomer); 127 | log.debug("Best score: " + bestScore); 128 | // return the best matched user's purchases as the recommendation 129 | List bestMatchedPurchases = new ArrayList(); 130 | for (Map watched : (List>)bestMatchedCustomer.getValue(CUSTOMER_WATCHED)){ 131 | Integer movieID = Integer.parseInt((String) watched.get(MOVIE_ID)); 132 | if ((!thisCustomerMovieVector.contains(movieID))&&(movieID != null)){ 133 | bestMatchedPurchases.add(movieID); 134 | } 135 | } 136 | 137 | // get the movies 138 | Key[] recomendedMovieKeys = new Key[bestMatchedPurchases.size()]; 139 | int index = 0; 140 | for (int recomendedMovieID : bestMatchedPurchases){ 141 | recomendedMovieKeys[index] = new Key(NAME_SPACE, PRODUCT_SET, String.valueOf(recomendedMovieID)); 142 | log.debug("Added Movie key: " + recomendedMovieKeys[index]); 143 | index++; 144 | } 145 | Record[] recommendedMovies = client.get(policy, recomendedMovieKeys, TITLE, YEAR_OF_RELEASE); 146 | 147 | // This is a diagnostic step 148 | if (log.isDebugEnabled()){ 149 | log.debug("Recomended Movies:"); 150 | for (Record rec : recommendedMovies){ 151 | log.debug(rec); 152 | } 153 | } 154 | 155 | // Turn the Aerospike records into a JSONArray 156 | JSONArray recommendations = new JSONArray(); 157 | for (Record rec: recommendedMovies){ 158 | if (rec != null) 159 | recommendations.add(new JSONRecord(rec)); 160 | } 161 | log.debug("Found these recomendations: " + recommendations); 162 | return recommendations; 163 | } 164 | /** 165 | * Produces a Integer vector from the movie IDs 166 | * @param ratingList 167 | * @return 168 | */ 169 | private List makeVector(List> ratingList){ 170 | List movieVector = new ArrayList(); 171 | for (Map one : ratingList){ 172 | String movieString = (String)one.get(MOVIE_ID); 173 | if (movieString == null) 174 | movieVector.add(0L); 175 | else 176 | movieVector.add(Long.parseLong(movieString)); 177 | } 178 | return movieVector; 179 | } 180 | /** 181 | * This is a very rudimentary algorithm using Cosine similarity 182 | * @param customerWatched 183 | * @param similarCustomerWatched 184 | * @return 185 | */ 186 | private double easySimilarity(List thisCustomerVector, List> similarCustomerWatched){ 187 | double incommon = 0; 188 | /* 189 | * this is the place where you can create clever 190 | * similarity score. 191 | * 192 | * This algorithm simple returns how many movies these customers have in common. 193 | * 194 | * You could use any similarity algorithm you wish 195 | */ 196 | List similarCustomerVector = makeVector(similarCustomerWatched); 197 | 198 | return cosineSimilarity(thisCustomerVector, similarCustomerVector); 199 | } 200 | 201 | /** 202 | * Cosing similarity 203 | * @param vec1 204 | * @param vec2 205 | * @return 206 | */ 207 | private double cosineSimilarity(List vec1, List vec2) { 208 | double dp = dotProduct(vec1, vec2); 209 | double magnitudeA = magnitude(vec1); 210 | double magnitudeB = magnitude(vec2); 211 | return dp / magnitudeA * magnitudeB; 212 | } 213 | /** 214 | * Magnitude 215 | * @param vec 216 | * @return 217 | */ 218 | private double magnitude(List vec) { 219 | double sum_mag = 0; 220 | for(Long value : vec) { 221 | sum_mag += value * value; 222 | } 223 | return Math.sqrt(sum_mag); 224 | } 225 | /** 226 | * Dot product 227 | * @param vec1 228 | * @param vec2 229 | * @return 230 | */ 231 | private double dotProduct(List vec1, List vec2) { 232 | double sum = 0; 233 | if (vec1.size() > vec2.size()) { 234 | int diff = vec1.size() - vec2.size(); 235 | for (int i = 0; i < diff; i++) 236 | vec2.add(0L); 237 | 238 | } else if (vec1.size() < vec2.size()) { 239 | int diff = vec2.size() - vec1.size(); 240 | for (int i = 0; i < diff; i++) 241 | vec1.add(0L); 242 | } 243 | for(int i = 0; i