├── .gitignore ├── LICENSE ├── README.md ├── TODO.txt ├── __init__.py ├── algorithms ├── __init__.py ├── itemBasedRecommender.py ├── itemSimilarity.py ├── userBasedRecommender.py └── userSimilarity.py ├── config.py ├── data ├── kmeans_data.txt ├── ml-100k │ ├── README │ ├── allbut.pl │ ├── mku.sh │ ├── u.data │ ├── u.genre │ ├── u.info │ ├── u.item │ ├── u.occupation │ ├── u.user │ ├── u1.base │ ├── u1.test │ ├── u2.base │ ├── u2.test │ ├── u3.base │ ├── u3.test │ ├── u4.base │ ├── u4.test │ ├── u5.base │ ├── u5.test │ ├── ua.base │ ├── ua.test │ ├── ub.base │ └── ub.test ├── ratings.txt ├── ratings1.txt ├── ratings2.txt ├── ratings3.txt └── results │ ├── user_cf_mr_predict.csv │ ├── user_cf_predict.csv │ ├── user_cf_test.csv │ ├── user_cf_train.csv │ └── user_item_cf_spark.txt ├── setup.py ├── train_and_test.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | .idea 3 | .idea_modules 4 | .sbt 5 | # C extensions 6 | *.so 7 | 8 | # Packages 9 | *.egg 10 | *.egg-info 11 | dist 12 | build 13 | eggs 14 | parts 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # Data files 39 | ratings10m.txt 40 | data/ratings10m.txt 41 | tests/data/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Evan Casey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | spark-knn-recommender 2 | =========== 3 | 4 | Spark-knn-recommender is a fast, scalable recommendation engine built on top of [PySpark](http://spark.apache.org/docs/0.9.0/python-programming-guide.html), the Python API for [Apache Spark](http://spark.apache.org/). It can be deployed locally or on [Amazon EMR](https://aws.amazon.com/elasticmapreduce/). 5 | 6 | Requirements 7 | ------------ 8 | 9 | * Python >= 2.7.3 10 | * Spark >= 0.7.0 11 | * Numpy 12 | 13 | Installation 14 | ------------ 15 | 16 | First, clone this repo onto your local machine: 17 | ```bash 18 | $ git clone https://github.com/evancasey/spark-knn-recommender.git 19 | ``` 20 | 21 | Set your spark cluster configuration in config.py: 22 | ```python 23 | 24 | # change if you are running on Amazon EMR 25 | CLUSTER_CONFIG = "local" 26 | 27 | # fill this in with pySpark path 28 | PYSPARK_HOME = "../spark/pyspark" 29 | SPARKLER_HOME = "../spark/python/sparkler" 30 | ``` 31 | 32 | In the root directory, run: 33 | ```bash 34 | $ python setup.py 35 | ``` 36 | 37 | Using spark-knn-recommender 38 | -------------- 39 | Run: 40 | ```bash 41 | $ python train_and_test.py 42 | ``` 43 | 44 | 45 | Running on Amazon EMR 46 | --------------------- 47 | 48 | * create an Amazan Web Services Account 49 | * sign up for Elastic MapReduce 50 | * install the [Amazon EMR CLI](http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/emr-cli-install.html) 51 | 52 | Run the Spark/Shark bootstrap script: 53 | ```bash 54 | $ ./elastic-mapreduce --create --alive --name "Spark/Shark Cluster" --bootstrap-action s3://elasticmapreduce/samples/spark/0.8.1/install-spark-shark.sh --bootstrap-name "Spark/Shark" --instance-type m1.xlarge --instance-count 3 --jobflow-role spark 55 | Created job flow j-2Y0VECUPLFW94 56 | ``` 57 | 58 | SSH into the master node of your cluster (replace the job ID below with your job ID): 59 | 60 | ```bash 61 | ./elastic-mapreduce -j j-2Y0VECUPLFW94 --ssh 62 | ``` 63 | 64 | ## MIT License 65 | 66 | Copyright (c) 2011-2016 67 | 68 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 69 | 70 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 71 | 72 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 73 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | Update pseudocode with better notation including: 2 | -Function imports 3 | -Helper function code (sims,weighted sums) 4 | -Spark builtins (join,broadcast,groupby,etc) 5 | -Metric space/domain of variables and dimensions 6 | -Diagrams 7 | -Final formatting 8 | 9 | Code: 10 | -PyFiles 11 | -README 12 | -Fix O(N^2) bottleneck 13 | -Use totals and sim_sums as acculumator variables 14 | 15 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evancasey/spark-knn-recommender/5ce96bb9bcea22ed8a859bac9988726da78fc1aa/__init__.py -------------------------------------------------------------------------------- /algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("..") 4 | 5 | import config 6 | 7 | sys.path.append("../" + config.PYSPARK_MODULE_HOME) -------------------------------------------------------------------------------- /algorithms/itemBasedRecommender.py: -------------------------------------------------------------------------------- 1 | # Item-based Collaborative Filtering on pySpark with cosine similarity and weighted sums 2 | 3 | import sys 4 | from collections import defaultdict 5 | from itertools import combinations 6 | import numpy as np 7 | import random 8 | import csv 9 | import pdb 10 | 11 | from pyspark import SparkContext 12 | from recsys.evaluation.prediction import MAE 13 | 14 | def parseVector(line): 15 | ''' 16 | Parse each line of the specified data file, assuming a "|" delimiter. 17 | Converts each rating to a float 18 | ''' 19 | line = line.split("|") 20 | return line[0],(line[1],float(line[2])) 21 | 22 | def sampleInteractions(user_id,items_with_rating,n): 23 | ''' 24 | For users with # interactions > n, replace their interaction history 25 | with a sample of n items_with_rating 26 | ''' 27 | if len(items_with_rating) > n: 28 | return user_id, random.sample(items_with_rating,n) 29 | else: 30 | return user_id, items_with_rating 31 | 32 | def findItemPairs(user_id,items_with_rating): 33 | ''' 34 | For each user, find all item-item pairs combos. (i.e. items with the same user) 35 | ''' 36 | for item1,item2 in combinations(items_with_rating,2): 37 | return (item1[0],item2[0]),(item1[1],item2[1]) 38 | 39 | def calcSim(item_pair,rating_pairs): 40 | ''' 41 | For each item-item pair, return the specified similarity measure, 42 | along with co_raters_count 43 | ''' 44 | sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0) 45 | 46 | for rating_pair in rating_pairs: 47 | sum_xx += np.float(rating_pair[0]) * np.float(rating_pair[0]) 48 | sum_yy += np.float(rating_pair[1]) * np.float(rating_pair[1]) 49 | sum_xy += np.float(rating_pair[0]) * np.float(rating_pair[1]) 50 | # sum_y += rt[1] 51 | # sum_x += rt[0] 52 | n += 1 53 | 54 | cos_sim = cosine(sum_xy,np.sqrt(sum_xx),np.sqrt(sum_yy)) 55 | return item_pair, (cos_sim,n) 56 | 57 | def cosine(dot_product,rating_norm_squared,rating2_norm_squared): 58 | ''' 59 | The cosine between two vectors A, B 60 | dotProduct(A, B) / (norm(A) * norm(B)) 61 | ''' 62 | numerator = dot_product 63 | denominator = rating_norm_squared * rating2_norm_squared 64 | return (numerator / (float(denominator))) if denominator else 0.0 65 | 66 | def correlation(size, dot_product, rating_sum, \ 67 | rating2sum, rating_norm_squared, rating2_norm_squared): 68 | ''' 69 | The correlation between two vectors A, B is 70 | [n * dotProduct(A, B) - sum(A) * sum(B)] / 71 | sqrt{ [n * norm(A)^2 - sum(A)^2] [n * norm(B)^2 - sum(B)^2] } 72 | 73 | ''' 74 | numerator = size * dot_product - rating_sum * rating2sum 75 | denominator = sqrt(size * rating_norm_squared - rating_sum * rating_sum) * \ 76 | sqrt(size * rating2_norm_squared - rating2sum * rating2sum) 77 | 78 | return (numerator / (float(denominator))) if denominator else 0.0 79 | 80 | def keyOnFirstItem(item_pair,item_sim_data): 81 | ''' 82 | For each item-item pair, make the first item's id the key 83 | ''' 84 | (item1_id,item2_id) = item_pair 85 | return item1_id,(item2_id,item_sim_data) 86 | 87 | def nearestNeighbors(item_id,items_and_sims,n): 88 | ''' 89 | Sort the predictions list by similarity and select the top-N neighbors 90 | ''' 91 | items_and_sims.sort(key=lambda x: x[1][0],reverse=True) 92 | return item_id, items_and_sims[:n] 93 | 94 | def topNRecommendations(user_id,items_with_rating,item_sims,n): 95 | ''' 96 | Calculate the top-N item recommendations for each user using the 97 | weighted sums method 98 | ''' 99 | 100 | # initialize dicts to store the score of each individual item, 101 | # since an item can exist in more than one item neighborhood 102 | totals = defaultdict(int) 103 | sim_sums = defaultdict(int) 104 | 105 | for (item,rating) in items_with_rating: 106 | 107 | # lookup the nearest neighbors for this item 108 | nearest_neighbors = item_sims.get(item,None) 109 | 110 | if nearest_neighbors: 111 | for (neighbor,(sim,count)) in nearest_neighbors: 112 | if neighbor != item: 113 | 114 | # update totals and sim_sums with the rating data 115 | totals[neighbor] += sim * rating 116 | sim_sums[neighbor] += sim 117 | 118 | # create the normalized list of scored items 119 | scored_items = [(total/sim_sums[item],item) for item,total in totals.items()] 120 | 121 | # sort the scored items in ascending order 122 | scored_items.sort(reverse=True) 123 | 124 | # take out the item score 125 | # ranked_items = [x[1] for x in scored_items] 126 | 127 | return user_id,scored_items[:n] 128 | 129 | if __name__ == "__main__": 130 | if len(sys.argv) < 3: 131 | print >> sys.stderr, \ 132 | "Usage: PythonUserCF " 133 | exit(-1) 134 | 135 | sc = SparkContext(sys.argv[1], "PythonUserCF") 136 | lines = sc.textFile(sys.argv[2]) 137 | 138 | ''' 139 | Obtain the sparse user-item matrix: 140 | user_id -> [(item_id_1, rating_1), 141 | [(item_id_2, rating_2), 142 | ...] 143 | ''' 144 | user_item_pairs = lines.map(parseVector).groupByKey().map( 145 | lambda p: sampleInteractions(p[0],p[1],500)).cache() 146 | 147 | ''' 148 | Get all item-item pair combos: 149 | (item1,item2) -> [(item1_rating,item2_rating), 150 | (item1_rating,item2_rating), 151 | ...] 152 | ''' 153 | 154 | pairwise_items = user_item_pairs.filter( 155 | lambda p: len(p[1]) > 1).map( 156 | lambda p: findItemPairs(p[0],p[1])).groupByKey() 157 | 158 | ''' 159 | Calculate the cosine similarity for each item pair and select the top-N nearest neighbors: 160 | (item1,item2) -> (similarity,co_raters_count) 161 | ''' 162 | 163 | item_sims = pairwise_items.map( 164 | lambda p: calcSim(p[0],p[1])).map( 165 | lambda p: keyOnFirstItem(p[0],p[1])).groupByKey().map( 166 | lambda p : (p[0], list(p[1]))).map( 167 | lambda p: nearestNeighbors(p[0],p[1],50)).collect() 168 | 169 | ''' 170 | Preprocess the item similarity matrix into a dictionary and store it as a broadcast variable: 171 | ''' 172 | 173 | item_sim_dict = {} 174 | for (item,data) in item_sims: 175 | item_sim_dict[item] = data 176 | 177 | isb = sc.broadcast(item_sim_dict) 178 | 179 | ''' 180 | Calculate the top-N item recommendations for each user 181 | user_id -> [item1,item2,item3,...] 182 | ''' 183 | user_item_recs = user_item_pairs.map( 184 | lambda p: topNRecommendations(p[0],p[1],isb.value,500)).collect() 185 | 186 | ''' 187 | Read in test data and calculate MAE 188 | ''' 189 | 190 | test_ratings = defaultdict(list) 191 | 192 | # read in the test data 193 | f = open("tests/data/cftest.txt", 'rt') 194 | reader = csv.reader(f, delimiter='|') 195 | for row in reader: 196 | user = row[0] 197 | item = row[1] 198 | rating = row[2] 199 | test_ratings[user] += [(item,rating)] 200 | 201 | # create train-test rating tuples 202 | preds = [] 203 | for (user,items_with_rating) in user_item_recs: 204 | for (rating,item) in items_with_rating: 205 | for (test_item,test_rating) in test_ratings[user]: 206 | if str(test_item) == str(item): 207 | preds.append((rating,float(test_rating))) 208 | 209 | mae = MAE(preds) 210 | result = mae.compute() 211 | print "Mean Absolute Error: ",result 212 | -------------------------------------------------------------------------------- /algorithms/itemSimilarity.py: -------------------------------------------------------------------------------- 1 | # Item-Item Similarity computation on pySpark with cosine similarity 2 | 3 | import sys 4 | from itertools import combinations 5 | import numpy as np 6 | 7 | from pyspark import SparkContext 8 | 9 | 10 | def parseVector(line): 11 | ''' 12 | Parse each line of the specified data file, assuming a "|" delimiter. 13 | Converts each rating to a float 14 | ''' 15 | line = line.split("|") 16 | return line[0],(line[1],float(line[2])) 17 | 18 | def findItemPairs(user_id,items_with_rating): 19 | ''' 20 | For each user, find all item-item pairs combos. (i.e. items with the same user) 21 | ''' 22 | for item1,item2 in combinations(items_with_rating,2): 23 | return (item1[0],item2[0]),(item1[1],item2[1]) 24 | 25 | def calcSim(item_pair,rating_pairs): 26 | ''' 27 | For each item-item pair, return the specified similarity measure, 28 | along with co_raters_count 29 | ''' 30 | sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0) 31 | 32 | for rating_pair in rating_pairs: 33 | sum_xx += np.float(rating_pair[0]) * np.float(rating_pair[0]) 34 | sum_yy += np.float(rating_pair[1]) * np.float(rating_pair[1]) 35 | sum_xy += np.float(rating_pair[0]) * np.float(rating_pair[1]) 36 | # sum_y += rt[1] 37 | # sum_x += rt[0] 38 | n += 1 39 | 40 | cos_sim = cosine(sum_xy,np.sqrt(sum_xx),np.sqrt(sum_yy)) 41 | 42 | return item_pair, (cos_sim,n) 43 | 44 | 45 | def cosine(dot_product,rating_norm_squared,rating2_norm_squared): 46 | ''' 47 | The cosine between two vectors A, B 48 | dotProduct(A, B) / (norm(A) * norm(B)) 49 | ''' 50 | numerator = dot_product 51 | denominator = rating_norm_squared * rating2_norm_squared 52 | 53 | return (numerator / (float(denominator))) if denominator else 0.0 54 | 55 | if __name__ == "__main__": 56 | if len(sys.argv) < 3: 57 | print >> sys.stderr, \ 58 | "Usage: PythonUserCF " 59 | exit(-1) 60 | 61 | sc = SparkContext(sys.argv[1], "PythonUserCF") 62 | lines = sc.textFile(sys.argv[2]) 63 | 64 | ''' 65 | Obtain the sparse user-item matrix 66 | user_id -> [(item_id_1, rating_1), 67 | [(item_id_2, rating_2), 68 | ...] 69 | ''' 70 | user_item_pairs = lines.map(parseVector).groupByKey().cache() 71 | 72 | ''' 73 | Get all item-item pair combos 74 | (item1,item2) -> [(item1_rating,item2_rating), 75 | (item1_rating,item2_rating), 76 | ...] 77 | ''' 78 | 79 | pairwise_items = user_item_pairs.filter( 80 | lambda p: len(p[1]) > 1).map( 81 | lambda p: findItemPairs(p[0],p[1])).groupByKey() 82 | 83 | ''' 84 | Calculate the cosine similarity for each item pair 85 | (item1,item2) -> (similarity,co_raters_count) 86 | ''' 87 | 88 | item_sims = pairwise_items.map( 89 | lambda p: calcSim(p[0],p[1])).collect() -------------------------------------------------------------------------------- /algorithms/userBasedRecommender.py: -------------------------------------------------------------------------------- 1 | # User-based Collaborative Filtering on pySpark with cosine similarity and weighted sums 2 | 3 | import sys 4 | from collections import defaultdict 5 | from itertools import combinations 6 | import random 7 | import numpy as np 8 | import pdb 9 | 10 | from pyspark import SparkContext 11 | 12 | 13 | def parseVectorOnUser(line): 14 | ''' 15 | Parse each line of the specified data file, assuming a "|" delimiter. 16 | Key is user_id, converts each rating to a float. 17 | ''' 18 | line = line.split("|") 19 | return line[0],(line[1],float(line[2])) 20 | 21 | def parseVectorOnItem(line): 22 | ''' 23 | Parse each line of the specified data file, assuming a "|" delimiter. 24 | Key is item_id, converts each rating to a float. 25 | ''' 26 | line = line.split("|") 27 | return line[1],(line[0],float(line[2])) 28 | 29 | def sampleInteractions(item_id,users_with_rating,n): 30 | ''' 31 | For items with # interactions > n, replace their interaction history 32 | with a sample of n users_with_rating 33 | ''' 34 | if len(users_with_rating) > n: 35 | return item_id, random.sample(users_with_rating,n) 36 | else: 37 | return item_id, users_with_rating 38 | 39 | def findUserPairs(item_id,users_with_rating): 40 | ''' 41 | For each item, find all user-user pairs combos. (i.e. users with the same item) 42 | ''' 43 | for user1,user2 in combinations(users_with_rating,2): 44 | return (user1[0],user2[0]),(user1[1],user2[1]) 45 | 46 | def calcSim(user_pair,rating_pairs): 47 | ''' 48 | For each user-user pair, return the specified similarity measure, 49 | along with co_raters_count. 50 | ''' 51 | sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0) 52 | 53 | for rating_pair in rating_pairs: 54 | sum_xx += np.float(rating_pair[0]) * np.float(rating_pair[0]) 55 | sum_yy += np.float(rating_pair[1]) * np.float(rating_pair[1]) 56 | sum_xy += np.float(rating_pair[0]) * np.float(rating_pair[1]) 57 | # sum_y += rt[1] 58 | # sum_x += rt[0] 59 | n += 1 60 | 61 | cos_sim = cosine(sum_xy,np.sqrt(sum_xx),np.sqrt(sum_yy)) 62 | return user_pair, (cos_sim,n) 63 | 64 | def cosine(dot_product,rating_norm_squared,rating2_norm_squared): 65 | ''' 66 | The cosine between two vectors A, B 67 | dotProduct(A, B) / (norm(A) * norm(B)) 68 | ''' 69 | numerator = dot_product 70 | denominator = rating_norm_squared * rating2_norm_squared 71 | 72 | return (numerator / (float(denominator))) if denominator else 0.0 73 | 74 | def keyOnFirstUser(user_pair,item_sim_data): 75 | ''' 76 | For each user-user pair, make the first user's id the key 77 | ''' 78 | (user1_id,user2_id) = user_pair 79 | return user1_id,(user2_id,item_sim_data) 80 | 81 | def nearestNeighbors(user,users_and_sims,n): 82 | ''' 83 | Sort the predictions list by similarity and select the top-N neighbors 84 | ''' 85 | users_and_sims.sort(key=lambda x: x[1][0],reverse=True) 86 | return user, users_and_sims[:n] 87 | 88 | def topNRecommendations(user_id,user_sims,users_with_rating,n): 89 | ''' 90 | Calculate the top-N item recommendations for each user using the 91 | weighted sums method 92 | ''' 93 | 94 | # initialize dicts to store the score of each individual item, 95 | # since an item can exist in more than one item neighborhood 96 | totals = defaultdict(int) 97 | sim_sums = defaultdict(int) 98 | 99 | for (neighbor,(sim,count)) in user_sims: 100 | 101 | # lookup the item predictions for this neighbor 102 | unscored_items = users_with_rating.get(neighbor,None) 103 | 104 | if unscored_items: 105 | for (item,rating) in unscored_items: 106 | if neighbor != item: 107 | 108 | # update totals and sim_sums with the rating data 109 | totals[neighbor] += sim * rating 110 | sim_sums[neighbor] += sim 111 | 112 | # create the normalized list of scored items 113 | scored_items = [(total/sim_sums[item],item) for item,total in totals.items()] 114 | 115 | # sort the scored items in ascending order 116 | scored_items.sort(reverse=True) 117 | 118 | # take out the item score 119 | ranked_items = [x[1] for x in scored_items] 120 | 121 | return user_id,ranked_items[:n] 122 | 123 | if __name__ == "__main__": 124 | if len(sys.argv) < 3: 125 | print >> sys.stderr, \ 126 | "Usage: PythonUserCF " 127 | exit(-1) 128 | 129 | sc = SparkContext(sys.argv[1],"PythonUserItemCF") 130 | lines = sc.textFile(sys.argv[2]) 131 | 132 | ''' 133 | Obtain the sparse item-user matrix: 134 | item_id -> ((user_1,rating),(user2,rating)) 135 | ''' 136 | item_user_pairs = lines.map(parseVectorOnItem).groupByKey().map( 137 | lambda p: sampleInteractions(p[0],p[1],500)).cache() 138 | 139 | ''' 140 | Get all item-item pair combos: 141 | (user1_id,user2_id) -> [(rating1,rating2), 142 | (rating1,rating2), 143 | (rating1,rating2), 144 | ...] 145 | ''' 146 | pairwise_users = item_user_pairs.filter( 147 | lambda p: len(p[1]) > 1).map( 148 | lambda p: findUserPairs(p[0],p[1])).groupByKey() 149 | 150 | ''' 151 | Calculate the cosine similarity for each user pair and select the top-N nearest neighbors: 152 | (user1,user2) -> (similarity,co_raters_count) 153 | ''' 154 | user_sims = pairwise_users.map( 155 | lambda p: calcSim(p[0],p[1])).map( 156 | lambda p: keyOnFirstUser(p[0],p[1])).groupByKey().map( 157 | lambda p: nearestNeighbors(p[0],p[1],50)) 158 | 159 | ''' 160 | Obtain the the item history for each user and store it as a broadcast variable 161 | user_id -> [(item_id_1, rating_1), 162 | [(item_id_2, rating_2), 163 | ...] 164 | ''' 165 | 166 | user_item_hist = lines.map(parseVectorOnUser).groupByKey().collect() 167 | 168 | ui_dict = {} 169 | for (user,items) in user_item_hist: 170 | ui_dict[user] = items 171 | 172 | uib = sc.broadcast(ui_dict) 173 | 174 | ''' 175 | Calculate the top-N item recommendations for each user 176 | user_id -> [item1,item2,item3,...] 177 | ''' 178 | user_item_recs = user_sims.map( 179 | lambda p: topNRecommendations(p[0],p[1],uib.value,100)).collect() 180 | 181 | 182 | -------------------------------------------------------------------------------- /algorithms/userSimilarity.py: -------------------------------------------------------------------------------- 1 | # User-User Similarity computation on pySpark 2 | 3 | import sys 4 | from itertools import combinations 5 | import numpy as np 6 | import pdb 7 | 8 | from pyspark import SparkContext 9 | 10 | 11 | def parseVector(line): 12 | ''' 13 | Parse each line of the specified data file, assuming a "|" delimiter. 14 | Converts each rating to a float 15 | ''' 16 | line = line.split("|") 17 | return line[1],(line[0],float(line[2])) 18 | 19 | def keyOnUserPair(item_id,user_and_rating_pair): 20 | ''' 21 | Convert each item and co_rating user pairs to a new vector 22 | keyed on the user pair ids, with the co_ratings as their value. 23 | ''' 24 | (user1_with_rating,user2_with_rating) = user_and_rating_pair 25 | user1_id,user2_id = user1_with_rating[0],user2_with_rating[0] 26 | user1_rating,user2_rating = user1_with_rating[1],user2_with_rating[1] 27 | return (user1_id,user2_id),(user1_rating,user2_rating) 28 | 29 | def calcSim(user_pair,rating_pairs): 30 | ''' 31 | For each user-user pair, return the specified similarity measure, 32 | along with co_raters_count. 33 | ''' 34 | sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0) 35 | 36 | for rating_pair in rating_pairs: 37 | sum_xx += np.float(rating_pair[0]) * np.float(rating_pair[0]) 38 | sum_yy += np.float(rating_pair[1]) * np.float(rating_pair[1]) 39 | sum_xy += np.float(rating_pair[0]) * np.float(rating_pair[1]) 40 | # sum_y += rt[1] 41 | # sum_x += rt[0] 42 | n += 1 43 | 44 | cos_sim = cosine(sum_xy,np.sqrt(sum_xx),np.sqrt(sum_yy)) 45 | return user_pair, (cos_sim,n) 46 | 47 | def cosine(dot_product,rating_norm_squared,rating2_norm_squared): 48 | ''' 49 | The cosine between two vectors A, B 50 | dotProduct(A, B) / (norm(A) * norm(B)) 51 | ''' 52 | numerator = dot_product 53 | denominator = rating_norm_squared * rating2_norm_squared 54 | 55 | return (numerator / (float(denominator))) if denominator else 0.0 56 | 57 | 58 | if __name__ == "__main__": 59 | if len(sys.argv) < 3: 60 | print >> sys.stderr, \ 61 | "Usage: PythonUserCF " 62 | exit(-1) 63 | 64 | sc = SparkContext(sys.argv[1], "PythonUserCF") 65 | lines = sc.textFile(sys.argv[2]) 66 | 67 | ''' 68 | Parse the vector with item_id as the key: 69 | item_id -> (user_id,rating) 70 | ''' 71 | item_user = lines.map(parseVector).cache() 72 | 73 | ''' 74 | Get co_rating users by joining on item_id: 75 | item_id -> ((user_1,rating),(user2,rating)) 76 | ''' 77 | item_user_pairs = item_user.join(item_user) 78 | 79 | ''' 80 | Key each item_user_pair on the user_pair and get rid of non-unique 81 | user pairs, then aggregate all co-rating pairs: 82 | (user1_id,user2_id) -> [(rating1,rating2), 83 | (rating1,rating2), 84 | (rating1,rating2), 85 | ...] 86 | ''' 87 | user_item_rating_pairs = item_user_pairs.map( 88 | lambda p: keyOnUserPair(p[0],p[1])).filter( 89 | lambda p: p[0][0] != p[0][1]).groupByKey() 90 | 91 | ''' 92 | Calculate the cosine similarity for each user pair: 93 | (user1,user2) -> (similarity,co_raters_count) 94 | ''' 95 | user_pair_sims = user_item_rating_pairs.map( 96 | lambda p: calcSim(p[0],p[1])) 97 | 98 | for p in user_pair_sims.collect(): 99 | print p -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # local settings 2 | CLUSTER_CONFIG = "local[4]" 3 | PYSPARK_HOME = "../build/spark-0.7.0/pyspark" 4 | PYSPARK_MODULE_HOME = "../build/spark-0.7.0/python/pyspark" 5 | SPARKLER_HOME = "../build/spark-0.7.0/python/sparkler" 6 | 7 | # Amazon EMR settings 8 | # CLUSTER_CONFIG = "spark://:7077" 9 | # PYSPARK_HOME = "../spark-0.8.1-emr/pyspark" 10 | # PYSPARK_MODULE_HOME = "../spark-0.8.1-emr/python/pyspark" 11 | # SPARKLER_HOME = "../spark-0.8.1-emr/python/sparkler" -------------------------------------------------------------------------------- /data/kmeans_data.txt: -------------------------------------------------------------------------------- 1 | 0.0 0.0 0.0 2 | 0.1 0.1 0.1 3 | 0.2 0.2 0.2 4 | 9.0 9.0 9.0 5 | 9.1 9.1 9.1 6 | 9.2 9.2 9.2 7 | -------------------------------------------------------------------------------- /data/ml-100k/README: -------------------------------------------------------------------------------- 1 | SUMMARY & USAGE LICENSE 2 | ============================================= 3 | 4 | MovieLens data sets were collected by the GroupLens Research Project 5 | at the University of Minnesota. 6 | 7 | This data set consists of: 8 | * 100,000 ratings (1-5) from 943 users on 1682 movies. 9 | * Each user has rated at least 20 movies. 10 | * Simple demographic info for the users (age, gender, occupation, zip) 11 | 12 | The data was collected through the MovieLens web site 13 | (movielens.umn.edu) during the seven-month period from September 19th, 14 | 1997 through April 22nd, 1998. This data has been cleaned up - users 15 | who had less than 20 ratings or did not have complete demographic 16 | information were removed from this data set. Detailed descriptions of 17 | the data file can be found at the end of this file. 18 | 19 | Neither the University of Minnesota nor any of the researchers 20 | involved can guarantee the correctness of the data, its suitability 21 | for any particular purpose, or the validity of results based on the 22 | use of the data set. The data set may be used for any research 23 | purposes under the following conditions: 24 | 25 | * The user may not state or imply any endorsement from the 26 | University of Minnesota or the GroupLens Research Group. 27 | 28 | * The user must acknowledge the use of the data set in 29 | publications resulting from the use of the data set, and must 30 | send us an electronic or paper copy of those publications. 31 | 32 | * The user may not redistribute the data without separate 33 | permission. 34 | 35 | * The user may not use this information for any commercial or 36 | revenue-bearing purposes without first obtaining permission 37 | from a faculty member of the GroupLens Research Project at the 38 | University of Minnesota. 39 | 40 | If you have any further questions or comments, please contact Jon Herlocker 41 | . 42 | 43 | ACKNOWLEDGEMENTS 44 | ============================================== 45 | 46 | Thanks to Al Borchers for cleaning up this data and writing the 47 | accompanying scripts. 48 | 49 | PUBLISHED WORK THAT HAS USED THIS DATASET 50 | ============================================== 51 | 52 | Herlocker, J., Konstan, J., Borchers, A., Riedl, J.. An Algorithmic 53 | Framework for Performing Collaborative Filtering. Proceedings of the 54 | 1999 Conference on Research and Development in Information 55 | Retrieval. Aug. 1999. 56 | 57 | FURTHER INFORMATION ABOUT THE GROUPLENS RESEARCH PROJECT 58 | ============================================== 59 | 60 | The GroupLens Research Project is a research group in the Department 61 | of Computer Science and Engineering at the University of Minnesota. 62 | Members of the GroupLens Research Project are involved in many 63 | research projects related to the fields of information filtering, 64 | collaborative filtering, and recommender systems. The project is lead 65 | by professors John Riedl and Joseph Konstan. The project began to 66 | explore automated collaborative filtering in 1992, but is most well 67 | known for its world wide trial of an automated collaborative filtering 68 | system for Usenet news in 1996. The technology developed in the 69 | Usenet trial formed the base for the formation of Net Perceptions, 70 | Inc., which was founded by members of GroupLens Research. Since then 71 | the project has expanded its scope to research overall information 72 | filtering solutions, integrating in content-based methods as well as 73 | improving current collaborative filtering technology. 74 | 75 | Further information on the GroupLens Research project, including 76 | research publications, can be found at the following web site: 77 | 78 | http://www.grouplens.org/ 79 | 80 | GroupLens Research currently operates a movie recommender based on 81 | collaborative filtering: 82 | 83 | http://www.movielens.org/ 84 | 85 | DETAILED DESCRIPTIONS OF DATA FILES 86 | ============================================== 87 | 88 | Here are brief descriptions of the data. 89 | 90 | ml-data.tar.gz -- Compressed tar file. To rebuild the u data files do this: 91 | gunzip ml-data.tar.gz 92 | tar xvf ml-data.tar 93 | mku.sh 94 | 95 | u.data -- The full u data set, 100000 ratings by 943 users on 1682 items. 96 | Each user has rated at least 20 movies. Users and items are 97 | numbered consecutively from 1. The data is randomly 98 | ordered. This is a tab separated list of 99 | user id | item id | rating | timestamp. 100 | The time stamps are unix seconds since 1/1/1970 UTC 101 | 102 | u.info -- The number of users, items, and ratings in the u data set. 103 | 104 | u.item -- Information about the items (movies); this is a tab separated 105 | list of 106 | movie id | movie title | release date | video release date | 107 | IMDb URL | unknown | Action | Adventure | Animation | 108 | Children's | Comedy | Crime | Documentary | Drama | Fantasy | 109 | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | 110 | Thriller | War | Western | 111 | The last 19 fields are the genres, a 1 indicates the movie 112 | is of that genre, a 0 indicates it is not; movies can be in 113 | several genres at once. 114 | The movie ids are the ones used in the u.data data set. 115 | 116 | u.genre -- A list of the genres. 117 | 118 | u.user -- Demographic information about the users; this is a tab 119 | separated list of 120 | user id | age | gender | occupation | zip code 121 | The user ids are the ones used in the u.data data set. 122 | 123 | u.occupation -- A list of the occupations. 124 | 125 | u1.base -- The data sets u1.base and u1.test through u5.base and u5.test 126 | u1.test are 80%/20% splits of the u data into training and test data. 127 | u2.base Each of u1, ..., u5 have disjoint test sets; this if for 128 | u2.test 5 fold cross validation (where you repeat your experiment 129 | u3.base with each training and test set and average the results). 130 | u3.test These data sets can be generated from u.data by mku.sh. 131 | u4.base 132 | u4.test 133 | u5.base 134 | u5.test 135 | 136 | ua.base -- The data sets ua.base, ua.test, ub.base, and ub.test 137 | ua.test split the u data into a training set and a test set with 138 | ub.base exactly 10 ratings per user in the test set. The sets 139 | ub.test ua.test and ub.test are disjoint. These data sets can 140 | be generated from u.data by mku.sh. 141 | 142 | allbut.pl -- The script that generates training and test sets where 143 | all but n of a users ratings are in the training data. 144 | 145 | mku.sh -- A shell script to generate all the u data sets from u.data. 146 | -------------------------------------------------------------------------------- /data/ml-100k/allbut.pl: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl 2 | 3 | # get args 4 | if (@ARGV < 3) { 5 | print STDERR "Usage: $0 base_name start stop max_test [ratings ...]\n"; 6 | exit 1; 7 | } 8 | $basename = shift; 9 | $start = shift; 10 | $stop = shift; 11 | $maxtest = shift; 12 | 13 | # open files 14 | open( TESTFILE, ">$basename.test" ) or die "Cannot open $basename.test for writing\n"; 15 | open( BASEFILE, ">$basename.base" ) or die "Cannot open $basename.base for writing\n"; 16 | 17 | # init variables 18 | $testcnt = 0; 19 | 20 | while (<>) { 21 | ($user) = split; 22 | if (! defined $ratingcnt{$user}) { 23 | $ratingcnt{$user} = 0; 24 | } 25 | ++$ratingcnt{$user}; 26 | if (($testcnt < $maxtest || $maxtest <= 0) 27 | && $ratingcnt{$user} >= $start && $ratingcnt{$user} <= $stop) { 28 | ++$testcnt; 29 | print TESTFILE; 30 | } 31 | else { 32 | print BASEFILE; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /data/ml-100k/mku.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | trap `rm -f tmp.$$; exit 1` 1 2 15 4 | 5 | for i in 1 2 3 4 5 6 | do 7 | head -`expr $i \* 20000` u.data | tail -20000 > tmp.$$ 8 | sort -t" " -k 1,1n -k 2,2n tmp.$$ > u$i.test 9 | head -`expr \( $i - 1 \) \* 20000` u.data > tmp.$$ 10 | tail -`expr \( 5 - $i \) \* 20000` u.data >> tmp.$$ 11 | sort -t" " -k 1,1n -k 2,2n tmp.$$ > u$i.base 12 | done 13 | 14 | allbut.pl ua 1 10 100000 u.data 15 | sort -t" " -k 1,1n -k 2,2n ua.base > tmp.$$ 16 | mv tmp.$$ ua.base 17 | sort -t" " -k 1,1n -k 2,2n ua.test > tmp.$$ 18 | mv tmp.$$ ua.test 19 | 20 | allbut.pl ub 11 20 100000 u.data 21 | sort -t" " -k 1,1n -k 2,2n ub.base > tmp.$$ 22 | mv tmp.$$ ub.base 23 | sort -t" " -k 1,1n -k 2,2n ub.test > tmp.$$ 24 | mv tmp.$$ ub.test 25 | 26 | -------------------------------------------------------------------------------- /data/ml-100k/u.genre: -------------------------------------------------------------------------------- 1 | unknown|0 2 | Action|1 3 | Adventure|2 4 | Animation|3 5 | Children's|4 6 | Comedy|5 7 | Crime|6 8 | Documentary|7 9 | Drama|8 10 | Fantasy|9 11 | Film-Noir|10 12 | Horror|11 13 | Musical|12 14 | Mystery|13 15 | Romance|14 16 | Sci-Fi|15 17 | Thriller|16 18 | War|17 19 | Western|18 20 | 21 | -------------------------------------------------------------------------------- /data/ml-100k/u.info: -------------------------------------------------------------------------------- 1 | 943 users 2 | 1682 items 3 | 100000 ratings 4 | -------------------------------------------------------------------------------- /data/ml-100k/u.item: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evancasey/spark-knn-recommender/5ce96bb9bcea22ed8a859bac9988726da78fc1aa/data/ml-100k/u.item -------------------------------------------------------------------------------- /data/ml-100k/u.occupation: -------------------------------------------------------------------------------- 1 | administrator 2 | artist 3 | doctor 4 | educator 5 | engineer 6 | entertainment 7 | executive 8 | healthcare 9 | homemaker 10 | lawyer 11 | librarian 12 | marketing 13 | none 14 | other 15 | programmer 16 | retired 17 | salesman 18 | scientist 19 | student 20 | technician 21 | writer 22 | -------------------------------------------------------------------------------- /data/ml-100k/u.user: -------------------------------------------------------------------------------- 1 | 1|24|M|technician|85711 2 | 2|53|F|other|94043 3 | 3|23|M|writer|32067 4 | 4|24|M|technician|43537 5 | 5|33|F|other|15213 6 | 6|42|M|executive|98101 7 | 7|57|M|administrator|91344 8 | 8|36|M|administrator|05201 9 | 9|29|M|student|01002 10 | 10|53|M|lawyer|90703 11 | 11|39|F|other|30329 12 | 12|28|F|other|06405 13 | 13|47|M|educator|29206 14 | 14|45|M|scientist|55106 15 | 15|49|F|educator|97301 16 | 16|21|M|entertainment|10309 17 | 17|30|M|programmer|06355 18 | 18|35|F|other|37212 19 | 19|40|M|librarian|02138 20 | 20|42|F|homemaker|95660 21 | 21|26|M|writer|30068 22 | 22|25|M|writer|40206 23 | 23|30|F|artist|48197 24 | 24|21|F|artist|94533 25 | 25|39|M|engineer|55107 26 | 26|49|M|engineer|21044 27 | 27|40|F|librarian|30030 28 | 28|32|M|writer|55369 29 | 29|41|M|programmer|94043 30 | 30|7|M|student|55436 31 | 31|24|M|artist|10003 32 | 32|28|F|student|78741 33 | 33|23|M|student|27510 34 | 34|38|F|administrator|42141 35 | 35|20|F|homemaker|42459 36 | 36|19|F|student|93117 37 | 37|23|M|student|55105 38 | 38|28|F|other|54467 39 | 39|41|M|entertainment|01040 40 | 40|38|M|scientist|27514 41 | 41|33|M|engineer|80525 42 | 42|30|M|administrator|17870 43 | 43|29|F|librarian|20854 44 | 44|26|M|technician|46260 45 | 45|29|M|programmer|50233 46 | 46|27|F|marketing|46538 47 | 47|53|M|marketing|07102 48 | 48|45|M|administrator|12550 49 | 49|23|F|student|76111 50 | 50|21|M|writer|52245 51 | 51|28|M|educator|16509 52 | 52|18|F|student|55105 53 | 53|26|M|programmer|55414 54 | 54|22|M|executive|66315 55 | 55|37|M|programmer|01331 56 | 56|25|M|librarian|46260 57 | 57|16|M|none|84010 58 | 58|27|M|programmer|52246 59 | 59|49|M|educator|08403 60 | 60|50|M|healthcare|06472 61 | 61|36|M|engineer|30040 62 | 62|27|F|administrator|97214 63 | 63|31|M|marketing|75240 64 | 64|32|M|educator|43202 65 | 65|51|F|educator|48118 66 | 66|23|M|student|80521 67 | 67|17|M|student|60402 68 | 68|19|M|student|22904 69 | 69|24|M|engineer|55337 70 | 70|27|M|engineer|60067 71 | 71|39|M|scientist|98034 72 | 72|48|F|administrator|73034 73 | 73|24|M|student|41850 74 | 74|39|M|scientist|T8H1N 75 | 75|24|M|entertainment|08816 76 | 76|20|M|student|02215 77 | 77|30|M|technician|29379 78 | 78|26|M|administrator|61801 79 | 79|39|F|administrator|03755 80 | 80|34|F|administrator|52241 81 | 81|21|M|student|21218 82 | 82|50|M|programmer|22902 83 | 83|40|M|other|44133 84 | 84|32|M|executive|55369 85 | 85|51|M|educator|20003 86 | 86|26|M|administrator|46005 87 | 87|47|M|administrator|89503 88 | 88|49|F|librarian|11701 89 | 89|43|F|administrator|68106 90 | 90|60|M|educator|78155 91 | 91|55|M|marketing|01913 92 | 92|32|M|entertainment|80525 93 | 93|48|M|executive|23112 94 | 94|26|M|student|71457 95 | 95|31|M|administrator|10707 96 | 96|25|F|artist|75206 97 | 97|43|M|artist|98006 98 | 98|49|F|executive|90291 99 | 99|20|M|student|63129 100 | 100|36|M|executive|90254 101 | 101|15|M|student|05146 102 | 102|38|M|programmer|30220 103 | 103|26|M|student|55108 104 | 104|27|M|student|55108 105 | 105|24|M|engineer|94043 106 | 106|61|M|retired|55125 107 | 107|39|M|scientist|60466 108 | 108|44|M|educator|63130 109 | 109|29|M|other|55423 110 | 110|19|M|student|77840 111 | 111|57|M|engineer|90630 112 | 112|30|M|salesman|60613 113 | 113|47|M|executive|95032 114 | 114|27|M|programmer|75013 115 | 115|31|M|engineer|17110 116 | 116|40|M|healthcare|97232 117 | 117|20|M|student|16125 118 | 118|21|M|administrator|90210 119 | 119|32|M|programmer|67401 120 | 120|47|F|other|06260 121 | 121|54|M|librarian|99603 122 | 122|32|F|writer|22206 123 | 123|48|F|artist|20008 124 | 124|34|M|student|60615 125 | 125|30|M|lawyer|22202 126 | 126|28|F|lawyer|20015 127 | 127|33|M|none|73439 128 | 128|24|F|marketing|20009 129 | 129|36|F|marketing|07039 130 | 130|20|M|none|60115 131 | 131|59|F|administrator|15237 132 | 132|24|M|other|94612 133 | 133|53|M|engineer|78602 134 | 134|31|M|programmer|80236 135 | 135|23|M|student|38401 136 | 136|51|M|other|97365 137 | 137|50|M|educator|84408 138 | 138|46|M|doctor|53211 139 | 139|20|M|student|08904 140 | 140|30|F|student|32250 141 | 141|49|M|programmer|36117 142 | 142|13|M|other|48118 143 | 143|42|M|technician|08832 144 | 144|53|M|programmer|20910 145 | 145|31|M|entertainment|V3N4P 146 | 146|45|M|artist|83814 147 | 147|40|F|librarian|02143 148 | 148|33|M|engineer|97006 149 | 149|35|F|marketing|17325 150 | 150|20|F|artist|02139 151 | 151|38|F|administrator|48103 152 | 152|33|F|educator|68767 153 | 153|25|M|student|60641 154 | 154|25|M|student|53703 155 | 155|32|F|other|11217 156 | 156|25|M|educator|08360 157 | 157|57|M|engineer|70808 158 | 158|50|M|educator|27606 159 | 159|23|F|student|55346 160 | 160|27|M|programmer|66215 161 | 161|50|M|lawyer|55104 162 | 162|25|M|artist|15610 163 | 163|49|M|administrator|97212 164 | 164|47|M|healthcare|80123 165 | 165|20|F|other|53715 166 | 166|47|M|educator|55113 167 | 167|37|M|other|L9G2B 168 | 168|48|M|other|80127 169 | 169|52|F|other|53705 170 | 170|53|F|healthcare|30067 171 | 171|48|F|educator|78750 172 | 172|55|M|marketing|22207 173 | 173|56|M|other|22306 174 | 174|30|F|administrator|52302 175 | 175|26|F|scientist|21911 176 | 176|28|M|scientist|07030 177 | 177|20|M|programmer|19104 178 | 178|26|M|other|49512 179 | 179|15|M|entertainment|20755 180 | 180|22|F|administrator|60202 181 | 181|26|M|executive|21218 182 | 182|36|M|programmer|33884 183 | 183|33|M|scientist|27708 184 | 184|37|M|librarian|76013 185 | 185|53|F|librarian|97403 186 | 186|39|F|executive|00000 187 | 187|26|M|educator|16801 188 | 188|42|M|student|29440 189 | 189|32|M|artist|95014 190 | 190|30|M|administrator|95938 191 | 191|33|M|administrator|95161 192 | 192|42|M|educator|90840 193 | 193|29|M|student|49931 194 | 194|38|M|administrator|02154 195 | 195|42|M|scientist|93555 196 | 196|49|M|writer|55105 197 | 197|55|M|technician|75094 198 | 198|21|F|student|55414 199 | 199|30|M|writer|17604 200 | 200|40|M|programmer|93402 201 | 201|27|M|writer|E2A4H 202 | 202|41|F|educator|60201 203 | 203|25|F|student|32301 204 | 204|52|F|librarian|10960 205 | 205|47|M|lawyer|06371 206 | 206|14|F|student|53115 207 | 207|39|M|marketing|92037 208 | 208|43|M|engineer|01720 209 | 209|33|F|educator|85710 210 | 210|39|M|engineer|03060 211 | 211|66|M|salesman|32605 212 | 212|49|F|educator|61401 213 | 213|33|M|executive|55345 214 | 214|26|F|librarian|11231 215 | 215|35|M|programmer|63033 216 | 216|22|M|engineer|02215 217 | 217|22|M|other|11727 218 | 218|37|M|administrator|06513 219 | 219|32|M|programmer|43212 220 | 220|30|M|librarian|78205 221 | 221|19|M|student|20685 222 | 222|29|M|programmer|27502 223 | 223|19|F|student|47906 224 | 224|31|F|educator|43512 225 | 225|51|F|administrator|58202 226 | 226|28|M|student|92103 227 | 227|46|M|executive|60659 228 | 228|21|F|student|22003 229 | 229|29|F|librarian|22903 230 | 230|28|F|student|14476 231 | 231|48|M|librarian|01080 232 | 232|45|M|scientist|99709 233 | 233|38|M|engineer|98682 234 | 234|60|M|retired|94702 235 | 235|37|M|educator|22973 236 | 236|44|F|writer|53214 237 | 237|49|M|administrator|63146 238 | 238|42|F|administrator|44124 239 | 239|39|M|artist|95628 240 | 240|23|F|educator|20784 241 | 241|26|F|student|20001 242 | 242|33|M|educator|31404 243 | 243|33|M|educator|60201 244 | 244|28|M|technician|80525 245 | 245|22|M|student|55109 246 | 246|19|M|student|28734 247 | 247|28|M|engineer|20770 248 | 248|25|M|student|37235 249 | 249|25|M|student|84103 250 | 250|29|M|executive|95110 251 | 251|28|M|doctor|85032 252 | 252|42|M|engineer|07733 253 | 253|26|F|librarian|22903 254 | 254|44|M|educator|42647 255 | 255|23|M|entertainment|07029 256 | 256|35|F|none|39042 257 | 257|17|M|student|77005 258 | 258|19|F|student|77801 259 | 259|21|M|student|48823 260 | 260|40|F|artist|89801 261 | 261|28|M|administrator|85202 262 | 262|19|F|student|78264 263 | 263|41|M|programmer|55346 264 | 264|36|F|writer|90064 265 | 265|26|M|executive|84601 266 | 266|62|F|administrator|78756 267 | 267|23|M|engineer|83716 268 | 268|24|M|engineer|19422 269 | 269|31|F|librarian|43201 270 | 270|18|F|student|63119 271 | 271|51|M|engineer|22932 272 | 272|33|M|scientist|53706 273 | 273|50|F|other|10016 274 | 274|20|F|student|55414 275 | 275|38|M|engineer|92064 276 | 276|21|M|student|95064 277 | 277|35|F|administrator|55406 278 | 278|37|F|librarian|30033 279 | 279|33|M|programmer|85251 280 | 280|30|F|librarian|22903 281 | 281|15|F|student|06059 282 | 282|22|M|administrator|20057 283 | 283|28|M|programmer|55305 284 | 284|40|M|executive|92629 285 | 285|25|M|programmer|53713 286 | 286|27|M|student|15217 287 | 287|21|M|salesman|31211 288 | 288|34|M|marketing|23226 289 | 289|11|M|none|94619 290 | 290|40|M|engineer|93550 291 | 291|19|M|student|44106 292 | 292|35|F|programmer|94703 293 | 293|24|M|writer|60804 294 | 294|34|M|technician|92110 295 | 295|31|M|educator|50325 296 | 296|43|F|administrator|16803 297 | 297|29|F|educator|98103 298 | 298|44|M|executive|01581 299 | 299|29|M|doctor|63108 300 | 300|26|F|programmer|55106 301 | 301|24|M|student|55439 302 | 302|42|M|educator|77904 303 | 303|19|M|student|14853 304 | 304|22|F|student|71701 305 | 305|23|M|programmer|94086 306 | 306|45|M|other|73132 307 | 307|25|M|student|55454 308 | 308|60|M|retired|95076 309 | 309|40|M|scientist|70802 310 | 310|37|M|educator|91711 311 | 311|32|M|technician|73071 312 | 312|48|M|other|02110 313 | 313|41|M|marketing|60035 314 | 314|20|F|student|08043 315 | 315|31|M|educator|18301 316 | 316|43|F|other|77009 317 | 317|22|M|administrator|13210 318 | 318|65|M|retired|06518 319 | 319|38|M|programmer|22030 320 | 320|19|M|student|24060 321 | 321|49|F|educator|55413 322 | 322|20|M|student|50613 323 | 323|21|M|student|19149 324 | 324|21|F|student|02176 325 | 325|48|M|technician|02139 326 | 326|41|M|administrator|15235 327 | 327|22|M|student|11101 328 | 328|51|M|administrator|06779 329 | 329|48|M|educator|01720 330 | 330|35|F|educator|33884 331 | 331|33|M|entertainment|91344 332 | 332|20|M|student|40504 333 | 333|47|M|other|V0R2M 334 | 334|32|M|librarian|30002 335 | 335|45|M|executive|33775 336 | 336|23|M|salesman|42101 337 | 337|37|M|scientist|10522 338 | 338|39|F|librarian|59717 339 | 339|35|M|lawyer|37901 340 | 340|46|M|engineer|80123 341 | 341|17|F|student|44405 342 | 342|25|F|other|98006 343 | 343|43|M|engineer|30093 344 | 344|30|F|librarian|94117 345 | 345|28|F|librarian|94143 346 | 346|34|M|other|76059 347 | 347|18|M|student|90210 348 | 348|24|F|student|45660 349 | 349|68|M|retired|61455 350 | 350|32|M|student|97301 351 | 351|61|M|educator|49938 352 | 352|37|F|programmer|55105 353 | 353|25|M|scientist|28480 354 | 354|29|F|librarian|48197 355 | 355|25|M|student|60135 356 | 356|32|F|homemaker|92688 357 | 357|26|M|executive|98133 358 | 358|40|M|educator|10022 359 | 359|22|M|student|61801 360 | 360|51|M|other|98027 361 | 361|22|M|student|44074 362 | 362|35|F|homemaker|85233 363 | 363|20|M|student|87501 364 | 364|63|M|engineer|01810 365 | 365|29|M|lawyer|20009 366 | 366|20|F|student|50670 367 | 367|17|M|student|37411 368 | 368|18|M|student|92113 369 | 369|24|M|student|91335 370 | 370|52|M|writer|08534 371 | 371|36|M|engineer|99206 372 | 372|25|F|student|66046 373 | 373|24|F|other|55116 374 | 374|36|M|executive|78746 375 | 375|17|M|entertainment|37777 376 | 376|28|F|other|10010 377 | 377|22|M|student|18015 378 | 378|35|M|student|02859 379 | 379|44|M|programmer|98117 380 | 380|32|M|engineer|55117 381 | 381|33|M|artist|94608 382 | 382|45|M|engineer|01824 383 | 383|42|M|administrator|75204 384 | 384|52|M|programmer|45218 385 | 385|36|M|writer|10003 386 | 386|36|M|salesman|43221 387 | 387|33|M|entertainment|37412 388 | 388|31|M|other|36106 389 | 389|44|F|writer|83702 390 | 390|42|F|writer|85016 391 | 391|23|M|student|84604 392 | 392|52|M|writer|59801 393 | 393|19|M|student|83686 394 | 394|25|M|administrator|96819 395 | 395|43|M|other|44092 396 | 396|57|M|engineer|94551 397 | 397|17|M|student|27514 398 | 398|40|M|other|60008 399 | 399|25|M|other|92374 400 | 400|33|F|administrator|78213 401 | 401|46|F|healthcare|84107 402 | 402|30|M|engineer|95129 403 | 403|37|M|other|06811 404 | 404|29|F|programmer|55108 405 | 405|22|F|healthcare|10019 406 | 406|52|M|educator|93109 407 | 407|29|M|engineer|03261 408 | 408|23|M|student|61755 409 | 409|48|M|administrator|98225 410 | 410|30|F|artist|94025 411 | 411|34|M|educator|44691 412 | 412|25|M|educator|15222 413 | 413|55|M|educator|78212 414 | 414|24|M|programmer|38115 415 | 415|39|M|educator|85711 416 | 416|20|F|student|92626 417 | 417|27|F|other|48103 418 | 418|55|F|none|21206 419 | 419|37|M|lawyer|43215 420 | 420|53|M|educator|02140 421 | 421|38|F|programmer|55105 422 | 422|26|M|entertainment|94533 423 | 423|64|M|other|91606 424 | 424|36|F|marketing|55422 425 | 425|19|M|student|58644 426 | 426|55|M|educator|01602 427 | 427|51|M|doctor|85258 428 | 428|28|M|student|55414 429 | 429|27|M|student|29205 430 | 430|38|M|scientist|98199 431 | 431|24|M|marketing|92629 432 | 432|22|M|entertainment|50311 433 | 433|27|M|artist|11211 434 | 434|16|F|student|49705 435 | 435|24|M|engineer|60007 436 | 436|30|F|administrator|17345 437 | 437|27|F|other|20009 438 | 438|51|F|administrator|43204 439 | 439|23|F|administrator|20817 440 | 440|30|M|other|48076 441 | 441|50|M|technician|55013 442 | 442|22|M|student|85282 443 | 443|35|M|salesman|33308 444 | 444|51|F|lawyer|53202 445 | 445|21|M|writer|92653 446 | 446|57|M|educator|60201 447 | 447|30|M|administrator|55113 448 | 448|23|M|entertainment|10021 449 | 449|23|M|librarian|55021 450 | 450|35|F|educator|11758 451 | 451|16|M|student|48446 452 | 452|35|M|administrator|28018 453 | 453|18|M|student|06333 454 | 454|57|M|other|97330 455 | 455|48|M|administrator|83709 456 | 456|24|M|technician|31820 457 | 457|33|F|salesman|30011 458 | 458|47|M|technician|Y1A6B 459 | 459|22|M|student|29201 460 | 460|44|F|other|60630 461 | 461|15|M|student|98102 462 | 462|19|F|student|02918 463 | 463|48|F|healthcare|75218 464 | 464|60|M|writer|94583 465 | 465|32|M|other|05001 466 | 466|22|M|student|90804 467 | 467|29|M|engineer|91201 468 | 468|28|M|engineer|02341 469 | 469|60|M|educator|78628 470 | 470|24|M|programmer|10021 471 | 471|10|M|student|77459 472 | 472|24|M|student|87544 473 | 473|29|M|student|94708 474 | 474|51|M|executive|93711 475 | 475|30|M|programmer|75230 476 | 476|28|M|student|60440 477 | 477|23|F|student|02125 478 | 478|29|M|other|10019 479 | 479|30|M|educator|55409 480 | 480|57|M|retired|98257 481 | 481|73|M|retired|37771 482 | 482|18|F|student|40256 483 | 483|29|M|scientist|43212 484 | 484|27|M|student|21208 485 | 485|44|F|educator|95821 486 | 486|39|M|educator|93101 487 | 487|22|M|engineer|92121 488 | 488|48|M|technician|21012 489 | 489|55|M|other|45218 490 | 490|29|F|artist|V5A2B 491 | 491|43|F|writer|53711 492 | 492|57|M|educator|94618 493 | 493|22|M|engineer|60090 494 | 494|38|F|administrator|49428 495 | 495|29|M|engineer|03052 496 | 496|21|F|student|55414 497 | 497|20|M|student|50112 498 | 498|26|M|writer|55408 499 | 499|42|M|programmer|75006 500 | 500|28|M|administrator|94305 501 | 501|22|M|student|10025 502 | 502|22|M|student|23092 503 | 503|50|F|writer|27514 504 | 504|40|F|writer|92115 505 | 505|27|F|other|20657 506 | 506|46|M|programmer|03869 507 | 507|18|F|writer|28450 508 | 508|27|M|marketing|19382 509 | 509|23|M|administrator|10011 510 | 510|34|M|other|98038 511 | 511|22|M|student|21250 512 | 512|29|M|other|20090 513 | 513|43|M|administrator|26241 514 | 514|27|M|programmer|20707 515 | 515|53|M|marketing|49508 516 | 516|53|F|librarian|10021 517 | 517|24|M|student|55454 518 | 518|49|F|writer|99709 519 | 519|22|M|other|55320 520 | 520|62|M|healthcare|12603 521 | 521|19|M|student|02146 522 | 522|36|M|engineer|55443 523 | 523|50|F|administrator|04102 524 | 524|56|M|educator|02159 525 | 525|27|F|administrator|19711 526 | 526|30|M|marketing|97124 527 | 527|33|M|librarian|12180 528 | 528|18|M|student|55104 529 | 529|47|F|administrator|44224 530 | 530|29|M|engineer|94040 531 | 531|30|F|salesman|97408 532 | 532|20|M|student|92705 533 | 533|43|M|librarian|02324 534 | 534|20|M|student|05464 535 | 535|45|F|educator|80302 536 | 536|38|M|engineer|30078 537 | 537|36|M|engineer|22902 538 | 538|31|M|scientist|21010 539 | 539|53|F|administrator|80303 540 | 540|28|M|engineer|91201 541 | 541|19|F|student|84302 542 | 542|21|M|student|60515 543 | 543|33|M|scientist|95123 544 | 544|44|F|other|29464 545 | 545|27|M|technician|08052 546 | 546|36|M|executive|22911 547 | 547|50|M|educator|14534 548 | 548|51|M|writer|95468 549 | 549|42|M|scientist|45680 550 | 550|16|F|student|95453 551 | 551|25|M|programmer|55414 552 | 552|45|M|other|68147 553 | 553|58|M|educator|62901 554 | 554|32|M|scientist|62901 555 | 555|29|F|educator|23227 556 | 556|35|F|educator|30606 557 | 557|30|F|writer|11217 558 | 558|56|F|writer|63132 559 | 559|69|M|executive|10022 560 | 560|32|M|student|10003 561 | 561|23|M|engineer|60005 562 | 562|54|F|administrator|20879 563 | 563|39|F|librarian|32707 564 | 564|65|M|retired|94591 565 | 565|40|M|student|55422 566 | 566|20|M|student|14627 567 | 567|24|M|entertainment|10003 568 | 568|39|M|educator|01915 569 | 569|34|M|educator|91903 570 | 570|26|M|educator|14627 571 | 571|34|M|artist|01945 572 | 572|51|M|educator|20003 573 | 573|68|M|retired|48911 574 | 574|56|M|educator|53188 575 | 575|33|M|marketing|46032 576 | 576|48|M|executive|98281 577 | 577|36|F|student|77845 578 | 578|31|M|administrator|M7A1A 579 | 579|32|M|educator|48103 580 | 580|16|M|student|17961 581 | 581|37|M|other|94131 582 | 582|17|M|student|93003 583 | 583|44|M|engineer|29631 584 | 584|25|M|student|27511 585 | 585|69|M|librarian|98501 586 | 586|20|M|student|79508 587 | 587|26|M|other|14216 588 | 588|18|F|student|93063 589 | 589|21|M|lawyer|90034 590 | 590|50|M|educator|82435 591 | 591|57|F|librarian|92093 592 | 592|18|M|student|97520 593 | 593|31|F|educator|68767 594 | 594|46|M|educator|M4J2K 595 | 595|25|M|programmer|31909 596 | 596|20|M|artist|77073 597 | 597|23|M|other|84116 598 | 598|40|F|marketing|43085 599 | 599|22|F|student|R3T5K 600 | 600|34|M|programmer|02320 601 | 601|19|F|artist|99687 602 | 602|47|F|other|34656 603 | 603|21|M|programmer|47905 604 | 604|39|M|educator|11787 605 | 605|33|M|engineer|33716 606 | 606|28|M|programmer|63044 607 | 607|49|F|healthcare|02154 608 | 608|22|M|other|10003 609 | 609|13|F|student|55106 610 | 610|22|M|student|21227 611 | 611|46|M|librarian|77008 612 | 612|36|M|educator|79070 613 | 613|37|F|marketing|29678 614 | 614|54|M|educator|80227 615 | 615|38|M|educator|27705 616 | 616|55|M|scientist|50613 617 | 617|27|F|writer|11201 618 | 618|15|F|student|44212 619 | 619|17|M|student|44134 620 | 620|18|F|writer|81648 621 | 621|17|M|student|60402 622 | 622|25|M|programmer|14850 623 | 623|50|F|educator|60187 624 | 624|19|M|student|30067 625 | 625|27|M|programmer|20723 626 | 626|23|M|scientist|19807 627 | 627|24|M|engineer|08034 628 | 628|13|M|none|94306 629 | 629|46|F|other|44224 630 | 630|26|F|healthcare|55408 631 | 631|18|F|student|38866 632 | 632|18|M|student|55454 633 | 633|35|M|programmer|55414 634 | 634|39|M|engineer|T8H1N 635 | 635|22|M|other|23237 636 | 636|47|M|educator|48043 637 | 637|30|M|other|74101 638 | 638|45|M|engineer|01940 639 | 639|42|F|librarian|12065 640 | 640|20|M|student|61801 641 | 641|24|M|student|60626 642 | 642|18|F|student|95521 643 | 643|39|M|scientist|55122 644 | 644|51|M|retired|63645 645 | 645|27|M|programmer|53211 646 | 646|17|F|student|51250 647 | 647|40|M|educator|45810 648 | 648|43|M|engineer|91351 649 | 649|20|M|student|39762 650 | 650|42|M|engineer|83814 651 | 651|65|M|retired|02903 652 | 652|35|M|other|22911 653 | 653|31|M|executive|55105 654 | 654|27|F|student|78739 655 | 655|50|F|healthcare|60657 656 | 656|48|M|educator|10314 657 | 657|26|F|none|78704 658 | 658|33|M|programmer|92626 659 | 659|31|M|educator|54248 660 | 660|26|M|student|77380 661 | 661|28|M|programmer|98121 662 | 662|55|M|librarian|19102 663 | 663|26|M|other|19341 664 | 664|30|M|engineer|94115 665 | 665|25|M|administrator|55412 666 | 666|44|M|administrator|61820 667 | 667|35|M|librarian|01970 668 | 668|29|F|writer|10016 669 | 669|37|M|other|20009 670 | 670|30|M|technician|21114 671 | 671|21|M|programmer|91919 672 | 672|54|F|administrator|90095 673 | 673|51|M|educator|22906 674 | 674|13|F|student|55337 675 | 675|34|M|other|28814 676 | 676|30|M|programmer|32712 677 | 677|20|M|other|99835 678 | 678|50|M|educator|61462 679 | 679|20|F|student|54302 680 | 680|33|M|lawyer|90405 681 | 681|44|F|marketing|97208 682 | 682|23|M|programmer|55128 683 | 683|42|M|librarian|23509 684 | 684|28|M|student|55414 685 | 685|32|F|librarian|55409 686 | 686|32|M|educator|26506 687 | 687|31|F|healthcare|27713 688 | 688|37|F|administrator|60476 689 | 689|25|M|other|45439 690 | 690|35|M|salesman|63304 691 | 691|34|M|educator|60089 692 | 692|34|M|engineer|18053 693 | 693|43|F|healthcare|85210 694 | 694|60|M|programmer|06365 695 | 695|26|M|writer|38115 696 | 696|55|M|other|94920 697 | 697|25|M|other|77042 698 | 698|28|F|programmer|06906 699 | 699|44|M|other|96754 700 | 700|17|M|student|76309 701 | 701|51|F|librarian|56321 702 | 702|37|M|other|89104 703 | 703|26|M|educator|49512 704 | 704|51|F|librarian|91105 705 | 705|21|F|student|54494 706 | 706|23|M|student|55454 707 | 707|56|F|librarian|19146 708 | 708|26|F|homemaker|96349 709 | 709|21|M|other|N4T1A 710 | 710|19|M|student|92020 711 | 711|22|F|student|15203 712 | 712|22|F|student|54901 713 | 713|42|F|other|07204 714 | 714|26|M|engineer|55343 715 | 715|21|M|technician|91206 716 | 716|36|F|administrator|44265 717 | 717|24|M|technician|84105 718 | 718|42|M|technician|64118 719 | 719|37|F|other|V0R2H 720 | 720|49|F|administrator|16506 721 | 721|24|F|entertainment|11238 722 | 722|50|F|homemaker|17331 723 | 723|26|M|executive|94403 724 | 724|31|M|executive|40243 725 | 725|21|M|student|91711 726 | 726|25|F|administrator|80538 727 | 727|25|M|student|78741 728 | 728|58|M|executive|94306 729 | 729|19|M|student|56567 730 | 730|31|F|scientist|32114 731 | 731|41|F|educator|70403 732 | 732|28|F|other|98405 733 | 733|44|F|other|60630 734 | 734|25|F|other|63108 735 | 735|29|F|healthcare|85719 736 | 736|48|F|writer|94618 737 | 737|30|M|programmer|98072 738 | 738|35|M|technician|95403 739 | 739|35|M|technician|73162 740 | 740|25|F|educator|22206 741 | 741|25|M|writer|63108 742 | 742|35|M|student|29210 743 | 743|31|M|programmer|92660 744 | 744|35|M|marketing|47024 745 | 745|42|M|writer|55113 746 | 746|25|M|engineer|19047 747 | 747|19|M|other|93612 748 | 748|28|M|administrator|94720 749 | 749|33|M|other|80919 750 | 750|28|M|administrator|32303 751 | 751|24|F|other|90034 752 | 752|60|M|retired|21201 753 | 753|56|M|salesman|91206 754 | 754|59|F|librarian|62901 755 | 755|44|F|educator|97007 756 | 756|30|F|none|90247 757 | 757|26|M|student|55104 758 | 758|27|M|student|53706 759 | 759|20|F|student|68503 760 | 760|35|F|other|14211 761 | 761|17|M|student|97302 762 | 762|32|M|administrator|95050 763 | 763|27|M|scientist|02113 764 | 764|27|F|educator|62903 765 | 765|31|M|student|33066 766 | 766|42|M|other|10960 767 | 767|70|M|engineer|00000 768 | 768|29|M|administrator|12866 769 | 769|39|M|executive|06927 770 | 770|28|M|student|14216 771 | 771|26|M|student|15232 772 | 772|50|M|writer|27105 773 | 773|20|M|student|55414 774 | 774|30|M|student|80027 775 | 775|46|M|executive|90036 776 | 776|30|M|librarian|51157 777 | 777|63|M|programmer|01810 778 | 778|34|M|student|01960 779 | 779|31|M|student|K7L5J 780 | 780|49|M|programmer|94560 781 | 781|20|M|student|48825 782 | 782|21|F|artist|33205 783 | 783|30|M|marketing|77081 784 | 784|47|M|administrator|91040 785 | 785|32|M|engineer|23322 786 | 786|36|F|engineer|01754 787 | 787|18|F|student|98620 788 | 788|51|M|administrator|05779 789 | 789|29|M|other|55420 790 | 790|27|M|technician|80913 791 | 791|31|M|educator|20064 792 | 792|40|M|programmer|12205 793 | 793|22|M|student|85281 794 | 794|32|M|educator|57197 795 | 795|30|M|programmer|08610 796 | 796|32|F|writer|33755 797 | 797|44|F|other|62522 798 | 798|40|F|writer|64131 799 | 799|49|F|administrator|19716 800 | 800|25|M|programmer|55337 801 | 801|22|M|writer|92154 802 | 802|35|M|administrator|34105 803 | 803|70|M|administrator|78212 804 | 804|39|M|educator|61820 805 | 805|27|F|other|20009 806 | 806|27|M|marketing|11217 807 | 807|41|F|healthcare|93555 808 | 808|45|M|salesman|90016 809 | 809|50|F|marketing|30803 810 | 810|55|F|other|80526 811 | 811|40|F|educator|73013 812 | 812|22|M|technician|76234 813 | 813|14|F|student|02136 814 | 814|30|M|other|12345 815 | 815|32|M|other|28806 816 | 816|34|M|other|20755 817 | 817|19|M|student|60152 818 | 818|28|M|librarian|27514 819 | 819|59|M|administrator|40205 820 | 820|22|M|student|37725 821 | 821|37|M|engineer|77845 822 | 822|29|F|librarian|53144 823 | 823|27|M|artist|50322 824 | 824|31|M|other|15017 825 | 825|44|M|engineer|05452 826 | 826|28|M|artist|77048 827 | 827|23|F|engineer|80228 828 | 828|28|M|librarian|85282 829 | 829|48|M|writer|80209 830 | 830|46|M|programmer|53066 831 | 831|21|M|other|33765 832 | 832|24|M|technician|77042 833 | 833|34|M|writer|90019 834 | 834|26|M|other|64153 835 | 835|44|F|executive|11577 836 | 836|44|M|artist|10018 837 | 837|36|F|artist|55409 838 | 838|23|M|student|01375 839 | 839|38|F|entertainment|90814 840 | 840|39|M|artist|55406 841 | 841|45|M|doctor|47401 842 | 842|40|M|writer|93055 843 | 843|35|M|librarian|44212 844 | 844|22|M|engineer|95662 845 | 845|64|M|doctor|97405 846 | 846|27|M|lawyer|47130 847 | 847|29|M|student|55417 848 | 848|46|M|engineer|02146 849 | 849|15|F|student|25652 850 | 850|34|M|technician|78390 851 | 851|18|M|other|29646 852 | 852|46|M|administrator|94086 853 | 853|49|M|writer|40515 854 | 854|29|F|student|55408 855 | 855|53|M|librarian|04988 856 | 856|43|F|marketing|97215 857 | 857|35|F|administrator|V1G4L 858 | 858|63|M|educator|09645 859 | 859|18|F|other|06492 860 | 860|70|F|retired|48322 861 | 861|38|F|student|14085 862 | 862|25|M|executive|13820 863 | 863|17|M|student|60089 864 | 864|27|M|programmer|63021 865 | 865|25|M|artist|11231 866 | 866|45|M|other|60302 867 | 867|24|M|scientist|92507 868 | 868|21|M|programmer|55303 869 | 869|30|M|student|10025 870 | 870|22|M|student|65203 871 | 871|31|M|executive|44648 872 | 872|19|F|student|74078 873 | 873|48|F|administrator|33763 874 | 874|36|M|scientist|37076 875 | 875|24|F|student|35802 876 | 876|41|M|other|20902 877 | 877|30|M|other|77504 878 | 878|50|F|educator|98027 879 | 879|33|F|administrator|55337 880 | 880|13|M|student|83702 881 | 881|39|M|marketing|43017 882 | 882|35|M|engineer|40503 883 | 883|49|M|librarian|50266 884 | 884|44|M|engineer|55337 885 | 885|30|F|other|95316 886 | 886|20|M|student|61820 887 | 887|14|F|student|27249 888 | 888|41|M|scientist|17036 889 | 889|24|M|technician|78704 890 | 890|32|M|student|97301 891 | 891|51|F|administrator|03062 892 | 892|36|M|other|45243 893 | 893|25|M|student|95823 894 | 894|47|M|educator|74075 895 | 895|31|F|librarian|32301 896 | 896|28|M|writer|91505 897 | 897|30|M|other|33484 898 | 898|23|M|homemaker|61755 899 | 899|32|M|other|55116 900 | 900|60|M|retired|18505 901 | 901|38|M|executive|L1V3W 902 | 902|45|F|artist|97203 903 | 903|28|M|educator|20850 904 | 904|17|F|student|61073 905 | 905|27|M|other|30350 906 | 906|45|M|librarian|70124 907 | 907|25|F|other|80526 908 | 908|44|F|librarian|68504 909 | 909|50|F|educator|53171 910 | 910|28|M|healthcare|29301 911 | 911|37|F|writer|53210 912 | 912|51|M|other|06512 913 | 913|27|M|student|76201 914 | 914|44|F|other|08105 915 | 915|50|M|entertainment|60614 916 | 916|27|M|engineer|N2L5N 917 | 917|22|F|student|20006 918 | 918|40|M|scientist|70116 919 | 919|25|M|other|14216 920 | 920|30|F|artist|90008 921 | 921|20|F|student|98801 922 | 922|29|F|administrator|21114 923 | 923|21|M|student|E2E3R 924 | 924|29|M|other|11753 925 | 925|18|F|salesman|49036 926 | 926|49|M|entertainment|01701 927 | 927|23|M|programmer|55428 928 | 928|21|M|student|55408 929 | 929|44|M|scientist|53711 930 | 930|28|F|scientist|07310 931 | 931|60|M|educator|33556 932 | 932|58|M|educator|06437 933 | 933|28|M|student|48105 934 | 934|61|M|engineer|22902 935 | 935|42|M|doctor|66221 936 | 936|24|M|other|32789 937 | 937|48|M|educator|98072 938 | 938|38|F|technician|55038 939 | 939|26|F|student|33319 940 | 940|32|M|administrator|02215 941 | 941|20|M|student|97229 942 | 942|48|F|librarian|78209 943 | 943|22|M|student|77841 944 | -------------------------------------------------------------------------------- /data/ratings.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evancasey/spark-knn-recommender/5ce96bb9bcea22ed8a859bac9988726da78fc1aa/data/ratings.txt -------------------------------------------------------------------------------- /data/ratings1.txt: -------------------------------------------------------------------------------- 1 | 196|Kolya (1996)|3 2 | 186|L.A. Confidential (1997)|3 3 | 22|Heavyweights (1994)|1 4 | 244|Legends of the Fall (1994)|2 5 | 166|Jackie Brown (1997)|1 6 | 298|Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)|4 7 | 115|Hunt for Red October, The (1990)|2 8 | 253|Jungle Book, The (1994)|5 9 | 305|Grease (1978)|3 10 | 6|Remains of the Day, The (1993)|3 11 | 62|Men in Black (1997)|2 12 | 286|Romy and Michele's High School Reunion (1997)|5 13 | 200|Star Trek: First Contact (1996)|5 14 | 210|To Wong Foo, Thanks for Everything! Julie Newmar (1995)|3 15 | 224|Batman Forever (1995)|3 16 | 303|Only You (1994)|3 17 | 122|Age of Innocence, The (1993)|5 18 | 194|Sabrina (1995)|2 19 | 291|Just Cause (1995)|4 20 | 234|Endless Summer 2, The (1994)|2 21 | 119|Man Without a Face, The (1993)|4 22 | 167|Sabrina (1954)|4 23 | 299|Die Hard (1988)|4 24 | 291|Twister (1996)|2 25 | 230|Empire Strikes Back, The (1980)|4 26 | 59|Empire Strikes Back, The (1980)|4 27 | 59|My Fair Lady (1964)|2 28 | 230|My Fair Lady (1964)|2 29 | 308|Toy Story (1995)|4 30 | 95|Broken Arrow (1996)|2 31 | 38|Aladdin (1992)|5 32 | 102|Casper (1995)|2 33 | 63|Restoration (1995)|4 34 | 160|Jaws (1975)|5 35 | 50|Chasing Amy (1997)|3 36 | 301|Silence of the Lambs, The (1991)|4 37 | 225|Right Stuff, The (1983)|4 38 | 290|Sleepless in Seattle (1993)|4 39 | 97|Sting, The (1973)|3 40 | 157|Sabrina (1995)|4 41 | 181|Curdled (1996)|1 42 | 278|Rear Window (1954)|5 43 | 276|Speechless (1994)|1 44 | 7|Crumb (1994)|4 45 | 10|French Twist (Gazon maudit) (1995)|4 46 | 284|Fly Away Home (1996)|4 47 | 201|Trigger Effect, The (1996)|2 48 | 276|Tales from the Hood (1995)|3 49 | 287|Cop Land (1997)|5 50 | 246|Evil Dead II (1987)|5 51 | 242|Beautiful Thing (1996)|5 52 | 249|Last of the Mohicans, The (1992)|5 53 | 99|Get Shorty (1995)|5 54 | 178|Kiss the Girls (1997)|3 55 | 251|Fargo (1996)|4 56 | 81|Fantasia (1940)|2 57 | 260|Murder at 1600 (1997)|4 58 | 25|Return of the Jedi (1983)|5 59 | 59|Dead Poets Society (1989)|5 60 | 72|Conan the Barbarian (1981)|2 61 | 87|Naked Gun 33 1/3: The Final Insult (1994)|4 62 | 290|Sound of Music, The (1965)|5 63 | 42|E.T. the Extra-Terrestrial (1982)|5 64 | 292|Boot, Das (1981)|4 65 | 115|Angels and Insects (1995)|3 66 | 20|Scream (1996)|1 67 | 201|Nightmare on Elm Street, A (1984)|4 68 | 13|Ben-Hur (1959)|3 69 | 246|City of Lost Children, The (1995)|4 70 | 138|Brothers McMullen, The (1995)|5 71 | 167|Young Guns (1988)|1 72 | 60|To Kill a Mockingbird (1962)|5 73 | 57|Fly Away Home (1996)|5 74 | 223|Sabrina (1995)|4 75 | 189|Wings of Desire (1987)|4 76 | 243|Mr. Holland's Opus (1995)|3 77 | 92|House Arrest (1996)|1 78 | 246|Old Yeller (1957)|3 79 | 194|Jean de Florette (1986)|4 80 | 241|Seven Years in Tibet (1997)|2 81 | 178|Grosse Pointe Blank (1997)|4 82 | 254|That Darn Cat! (1965)|3 83 | 293|Copycat (1995)|3 84 | 127|Star Trek III: The Search for Spock (1984)|5 85 | 225|Jerry Maguire (1996)|5 86 | 299|Star Trek III: The Search for Spock (1984)|3 87 | 225|North by Northwest (1959)|5 88 | 276|Outbreak (1995)|3 89 | 291|Die Hard (1988)|5 90 | 222|Dangerous Minds (1995)|4 91 | 267|Miller's Crossing (1990)|5 92 | 42|Batman (1989)|3 93 | 11|Truth About Cats & Dogs, The (1996)|4 94 | 95|Sword in the Stone, The (1963)|4 95 | 8|Bean (1997)|4 96 | 162|Birdcage, The (1996)|4 97 | 87|Con Air (1997)|4 98 | 279|Monty Python's Life of Brian (1979)|5 99 | 145|Sense and Sensibility (1995)|2 100 | 119|Backbeat (1993)|5 101 | 62|African Queen, The (1951)|4 102 | 62|Adventures of Priscilla, Queen of the Desert, The (1994)|3 103 | 28|This Is Spinal Tap (1984)|4 104 | 135|Taxi Driver (1976)|4 105 | 32|Liar Liar (1997)|3 106 | 90|Adventures of Priscilla, Queen of the Desert, The (1994)|5 107 | 286|Young Frankenstein (1974)|4 108 | 293|Executive Decision (1996)|3 109 | 216|Die Hard (1988)|4 110 | 166|Conspiracy Theory (1997)|5 111 | 250|It's a Wonderful Life (1946)|4 112 | 271|Wizard of Oz, The (1939)|5 113 | 160|Raiders of the Lost Ark (1981)|5 114 | 265|Twister (1996)|4 115 | 198|African Queen, The (1951)|3 116 | 42|Terminator 2: Judgment Day (1991)|5 117 | 168|Willy Wonka and the Chocolate Factory (1971)|5 118 | 110|Devil's Advocate, The (1997)|4 119 | 58|Die Hard (1988)|4 120 | 90|Quiet Man, The (1952)|4 121 | 271|Jackie Brown (1997)|4 122 | 62|Muppet Treasure Island (1996)|3 123 | 279|Bogus (1996)|3 124 | 237|Annie Hall (1977)|4 125 | 94|Swimming with Sharks (1995)|4 126 | 128|My Fair Lady (1964)|3 127 | 298|In the Name of the Father (1993)|4 128 | 44|Terminator, The (1984)|5 129 | 264|Shining, The (1980)|5 130 | 194|True Lies (1994)|2 131 | 72|Terminator, The (1984)|5 132 | 222|Amistad (1997)|5 133 | 250|Mimic (1997)|3 134 | 41|Hunt for Red October, The (1990)|3 135 | 224|Devil's Own, The (1997)|3 136 | 82|2001: A Space Odyssey (1968)|3 137 | 262|My Family (1995)|4 138 | 293|Courage Under Fire (1996)|3 139 | 216|Pump Up the Volume (1990)|3 140 | 250|Homeward Bound: The Incredible Journey (1993)|3 141 | 59|Taxi Driver (1976)|5 142 | 286|Tales From the Crypt Presents: Demon Knight (1995)|5 143 | 244|One Fine Day (1996)|4 144 | 7|Vertigo (1958)|4 145 | 174|Bio-Dome (1996)|1 146 | 87|Sabrina (1995)|4 147 | 194|Blue Sky (1994)|2 148 | 82|Get on the Bus (1996)|2 149 | 13|Ninotchka (1939)|2 150 | 13|Good Will Hunting (1997)|4 151 | 244|Father of the Bride Part II (1995)|2 152 | 305|To Kill a Mockingbird (1962)|5 153 | 95|Roommates (1995)|2 154 | 43|Postino, Il (1994)|2 155 | 299|Before Sunrise (1995)|4 156 | 57|Mary Poppins (1964)|3 157 | 84|Mission: Impossible (1996)|3 158 | 269|Bonnie and Clyde (1967)|4 159 | 299|Truth About Cats & Dogs, The (1996)|3 160 | 194|Red Rock West (1992)|4 161 | 160|2001: A Space Odyssey (1968)|4 162 | 99|Chasing Amy (1997)|3 163 | 10|Sabrina (1954)|4 164 | 259|Rock, The (1996)|4 165 | 85|To Kill a Mockingbird (1962)|3 166 | 303|City of Lost Children, The (1995)|4 167 | 213|Heat (1995)|5 168 | 121|Annie Hall (1977)|3 169 | 90|Silence of the Lambs, The (1991)|5 170 | 49|Interview with the Vampire (1994)|2 171 | 42|It Could Happen to You (1994)|3 172 | 155|Dante's Peak (1997)|2 173 | 68|Rock, The (1996)|4 174 | 172|Good, The Bad and The Ugly, The (1966)|4 175 | 19|Get Shorty (1995)|4 176 | 268|Batman Returns (1992)|4 177 | 5|GoldenEye (1995)|3 178 | 305|Rock, The (1996)|2 179 | 44|Liar Liar (1997)|4 180 | 43|Big Night (1996)|4 181 | 279|Kazaam (1996)|1 182 | 80|Red Rock West (1992)|5 183 | 254|Abyss, The (1989)|4 184 | 298|River Wild, The (1994)|3 185 | 279|Ghost in the Shell (Kokaku kidotai) (1995)|1 186 | 66|Face/Off (1997)|4 187 | 18|Birds, The (1963)|3 188 | 268|Cool Runnings (1993)|2 189 | 99|Fugitive, The (1993)|4 190 | 13|Silence of the Lambs, The (1991)|4 191 | 26|Contact (1997)|3 192 | 7|Jackie Chan's First Strike (1996)|4 193 | 222|Jumanji (1995)|4 194 | 200|Cape Fear (1962)|5 195 | 119|Conspiracy Theory (1997)|4 196 | 213|Empire Strikes Back, The (1980)|5 197 | 276|Murder at 1600 (1997)|3 198 | 94|Assassins (1995)|3 199 | 130|Tales From the Crypt Presents: Demon Knight (1995)|4 200 | 38|Conspiracy Theory (1997)|4 201 | 160|Canadian Bacon (1994)|3 202 | 293|Clockers (1995)|3 203 | 26|Chain Reaction (1996)|2 204 | 130|When Harry Met Sally... (1989)|4 205 | 92|Joe's Apartment (1996)|3 206 | 256|Jaws 2 (1978)|4 207 | 1|Three Colors: White (1994)|4 208 | 72|Hoop Dreams (1994)|4 209 | 56|Jumanji (1995)|3 210 | 13|Wonderland (1997)|4 211 | 15|Mission: Impossible (1996)|2 212 | 92|Firm, The (1993)|3 213 | 207|First Wives Club, The (1996)|2 214 | 292|Raiders of the Lost Ark (1981)|5 215 | 232|Casablanca (1942)|5 216 | 251|Saint, The (1997)|2 217 | 224|Brothers McMullen, The (1995)|3 218 | 181|Mirror Has Two Faces, The (1996)|4 219 | 259|My Best Friend's Wedding (1997)|4 220 | 305|Courage Under Fire (1996)|4 221 | 52|Up Close and Personal (1996)|3 222 | 161|Groundhog Day (1993)|5 223 | 148|Close Shave, A (1995)|5 224 | 125|Mars Attacks! (1996)|2 225 | 97|Star Trek: The Wrath of Khan (1982)|5 226 | 58|Flirting With Disaster (1996)|4 227 | 83|Jaws (1975)|4 228 | 90|Wag the Dog (1997)|4 229 | 272|12 Angry Men (1957)|5 230 | 194|Return of the Jedi (1983)|3 231 | 125|Philadelphia Story, The (1940)|4 232 | 110|Leave It to Beaver (1997)|1 233 | 299|Postino, Il (1994)|4 234 | 151|Richard III (1995)|5 235 | 269|Godfather, The (1972)|4 236 | 6|Postino, Il (1994)|5 237 | 54|Diabolique (1996)|3 238 | 303|Forrest Gump (1994)|5 239 | 16|Renaissance Man (1994)|1 240 | 301|Tommy Boy (1995)|4 241 | 276|Pete's Dragon (1977)|3 242 | 305|Pink Floyd - The Wall (1982)|2 243 | 194|Grumpier Old Men (1995)|2 244 | 91|Dante's Peak (1997)|2 245 | 87|Waterworld (1995)|4 246 | 294|Mystery Science Theater 3000: The Movie (1996)|4 247 | 286|Delicatessen (1991)|4 248 | 200|Schindler's List (1993)|5 249 | 229|Conspiracy Theory (1997)|1 250 | 178|Speed (1994)|4 251 | 303|Pollyanna (1960)|2 252 | 62|What's Eating Gilbert Grape (1993)|4 253 | 207|Primal Fear (1996)|3 254 | 92|Empire Strikes Back, The (1980)|4 255 | 301|Brady Bunch Movie, The (1995)|4 256 | 36|Mad City (1997)|5 257 | 70|Real Genius (1985)|3 258 | 63|Kolya (1996)|3 259 | 28|Evil Dead II (1987)|3 260 | 279|Crow, The (1994)|4 261 | 250|Twelve Monkeys (1995)|4 262 | 14|Silence of the Lambs, The (1991)|3 263 | 299|Tie Me Up! Tie Me Down! (1990)|3 264 | 194|Outbreak (1995)|3 265 | 303|One Fine Day (1996)|3 266 | 119|Jerry Maguire (1996)|5 267 | 295|Cape Fear (1991)|5 268 | 268|Chain Reaction (1996)|2 269 | 268|GoldenEye (1995)|2 270 | 66|Contact (1997)|4 271 | 233|Groundhog Day (1993)|5 272 | 83|Angels in the Outfield (1994)|4 273 | 214|U Turn (1997)|3 274 | 192|First Wives Club, The (1996)|2 275 | 100|Apostle, The (1997)|4 276 | 268|Lawnmower Man, The (1992)|1 277 | 301|Pulp Fiction (1994)|4 278 | 307|Blade Runner (1982)|5 279 | 234|20,000 Leagues Under the Sea (1954)|3 280 | 83|Cliffhanger (1993)|4 281 | 181|Mimic (1997)|2 282 | 297|Gone with the Wind (1939)|4 283 | 38|Fish Called Wanda, A (1988)|5 284 | 7|Adventures of Priscilla, Queen of the Desert, The (1994)|4 285 | 264|Celluloid Closet, The (1995)|4 286 | 181|Love Jones (1997)|1 287 | 201|Unhook the Stars (1996)|1 288 | 85|Streetcar Named Desire, A (1951)|4 289 | 269|Clueless (1995)|3 290 | 59|Rudy (1993)|3 291 | 286|Sound of Music, The (1965)|4 292 | 193|Terminator 2: Judgment Day (1991)|1 293 | 113|Fan, The (1996)|5 294 | 292|Seven (Se7en) (1995)|5 295 | 130|Romy and Michele's High School Reunion (1997)|3 296 | 275|Silence of the Lambs, The (1991)|4 297 | 189|Great Escape, The (1963)|5 298 | 219|Jurassic Park (1993)|1 299 | 218|This Is Spinal Tap (1984)|5 300 | 123|To Kill a Mockingbird (1962)|3 301 | 119|Star Trek: First Contact (1996)|5 302 | 158|Good, The Bad and The Ugly, The (1966)|4 303 | 222|Twister (1996)|4 304 | 302|Murder at 1600 (1997)|2 305 | 279|Dumbo (1941)|3 306 | 301|Fugitive, The (1993)|5 307 | 181|Four Rooms (1995)|2 308 | 201|Kicking and Screaming (1995)|1 309 | 13|Nikita (La Femme Nikita) (1990)|3 310 | 1|Grand Day Out, A (1992)|3 311 | 145|Jerry Maguire (1996)|5 312 | 23|True Lies (1994)|4 313 | 201|Addiction, The (1995)|4 314 | 296|Singin' in the Rain (1952)|5 315 | 42|Broken Arrow (1996)|3 316 | 33|Love Jones (1997)|3 317 | 301|Waterworld (1995)|3 318 | 16|Shawshank Redemption, The (1994)|5 319 | 95|2001: A Space Odyssey (1968)|3 320 | 154|One Flew Over the Cuckoo's Nest (1975)|4 321 | 77|Maltese Falcon, The (1941)|5 322 | 296|People vs. Larry Flynt, The (1996)|5 323 | 302|Ulee's Gold (1997)|2 324 | 244|Cape Fear (1962)|3 325 | 222|Firm, The (1993)|4 326 | 13|Field of Dreams (1989)|5 327 | 16|Singin' in the Rain (1952)|5 328 | 270|Jaws 2 (1978)|4 329 | 145|Mr. Holland's Opus (1995)|2 330 | 187|Shawshank Redemption, The (1994)|5 331 | 200|Fly Away Home (1996)|5 332 | 170|MatchMaker, The (1997)|5 333 | 101|Fled (1996)|3 334 | 184|Cape Fear (1991)|3 335 | 128|Back to the Future (1985)|4 336 | 181|Kicked in the Head (1997)|1 337 | 184|Fish Called Wanda, A (1988)|3 338 | 1|Desperado (1995)|4 339 | 1|Glengarry Glen Ross (1992)|4 340 | 184|Mother (1996)|5 341 | 54|Fan, The (1996)|3 342 | 94|Alien: Resurrection (1997)|4 343 | 128|People vs. Larry Flynt, The (1996)|4 344 | 23|Dante's Peak (1997)|2 345 | 301|Star Trek VI: The Undiscovered Country (1991)|3 346 | 301|Amadeus (1984)|3 347 | 112|Afterglow (1997)|1 348 | 82|Alien (1979)|3 349 | 222|Circle of Friends (1995)|3 350 | 218|Duck Soup (1933)|3 351 | 308|Family Thing, A (1996)|4 352 | 303|Citizen Kane (1941)|5 353 | 133|Tomorrow Never Dies (1997)|3 354 | 215|Unbearable Lightness of Being, The (1988)|2 355 | 69|When the Cats Away (Chacun cherche son chat) (1996)|5 356 | 254|Somewhere in Time (1980)|4 357 | 276|GoldenEye (1995)|4 358 | 104|Shadow Conspiracy (1997)|1 359 | 63|Bottle Rocket (1996)|3 360 | 267|Kingpin (1996)|4 361 | 13|Pulp Fiction (1994)|5 362 | 240|Peacemaker, The (1997)|3 363 | 286|Jerry Maguire (1996)|2 364 | 294|Starship Troopers (1997)|5 365 | 90|It's My Party (1995)|4 366 | 18|Brothers McMullen, The (1995)|4 367 | 92|Star Trek III: The Search for Spock (1984)|3 368 | 308|Once Upon a Time in America (1984)|4 369 | 144|Blade Runner (1982)|3 370 | 191|L.A. Confidential (1997)|4 371 | 59|Indian in the Cupboard, The (1995)|3 372 | 200|Terminator 2: Judgment Day (1991)|5 373 | 16|Graduate, The (1967)|5 374 | 61|Volcano (1997)|3 375 | 271|Bridge on the River Kwai, The (1957)|4 376 | 271|Strictly Ballroom (1992)|3 377 | 142|Wrong Trousers, The (1993)|5 378 | 275|Eraser (1996)|3 379 | 222|Willy Wonka and the Chocolate Factory (1971)|3 380 | 87|To Wong Foo, Thanks for Everything! Julie Newmar (1995)|3 381 | 207|Contact (1997)|4 382 | 272|Stag (1997)|2 383 | 177|Game, The (1997)|4 384 | 207|Twelfth Night (1996)|2 385 | 299|Coneheads (1993)|3 386 | 271|Miracle on 34th Street (1994)|4 387 | 305|Bob Roberts (1992)|4 388 | 49|Dazed and Confused (1993)|2 389 | 94|Scout, The (1994)|3 390 | 130|Trees Lounge (1996)|3 391 | 10|Brazil (1985)|3 392 | 203|Mother (1996)|3 393 | 191|English Patient, The (1996)|4 394 | 43|Dante's Peak (1997)|3 395 | 21|Heavenly Creatures (1994)|5 396 | 197|Terminator 2: Judgment Day (1991)|5 397 | 13|Apostle, The (1997)|2 398 | 194|While You Were Sleeping (1995)|3 399 | 234|Akira (1988)|4 400 | 308|Ghost (1990)|4 401 | 308|Cook the Thief His Wife & Her Lover, The (1989)|4 402 | 269|Down by Law (1986)|5 403 | 94|Hunt for Red October, The (1990)|4 404 | 268|Stargate (1994)|3 405 | 272|Usual Suspects, The (1995)|5 406 | 121|Absolute Power (1997)|3 407 | 296|Angels and Insects (1995)|5 408 | 134|English Patient, The (1996)|3 409 | 180|Like Water For Chocolate (Como agua para chocolate) (1992)|5 410 | 234|Lost Horizon (1937)|3 411 | 104|Rock, The (1996)|2 412 | 38|Lawnmower Man 2: Beyond Cyberspace (1996)|1 413 | 269|That Thing You Do! (1996)|1 414 | 7|Return of the Pink Panther, The (1974)|4 415 | 234|Foreign Correspondent (1940)|3 416 | 275|Mission: Impossible (1996)|2 417 | 52|Fifth Element, The (1997)|3 418 | 102|Mulholland Falls (1996)|3 419 | 13|Blues Brothers, The (1980)|4 420 | 178|Corrina, Corrina (1994)|4 421 | 236|Lion King, The (1994)|3 422 | 256|French Kiss (1995)|5 423 | 263|Aliens (1986)|5 424 | 244|Blues Brothers, The (1980)|3 425 | 279|Low Down Dirty Shame, A (1994)|4 426 | 43|One Fine Day (1996)|4 427 | 83|Free Willy (1993)|2 428 | 151|Graduate, The (1967)|5 429 | 254|American Werewolf in London, An (1981)|2 430 | 109|Crying Game, The (1992)|3 431 | 297|Home for the Holidays (1995)|3 432 | 249|Full Metal Jacket (1987)|4 433 | 144|Little Women (1994)|4 434 | 301|It Happened One Night (1934)|4 435 | 64|Man Without a Face, The (1993)|3 436 | 92|Dumbo (1941)|2 437 | 222|Dances with Wolves (1990)|4 438 | 268|American Werewolf in London, An (1981)|3 439 | 293|2001: A Space Odyssey (1968)|5 440 | 213|Princess Bride, The (1987)|5 441 | 160|Crossing Guard, The (1995)|2 442 | 13|African Queen, The (1951)|4 443 | 59|To Die For (1995)|5 444 | 5|From Dusk Till Dawn (1996)|4 445 | 125|Return of the Pink Panther, The (1974)|5 446 | 174|Apt Pupil (1998)|5 447 | 114|Dial M for Murder (1954)|3 448 | 213|Boot, Das (1981)|4 449 | 23|Dead Poets Society (1989)|2 450 | 128|Mr. Holland's Opus (1995)|4 451 | 239|Pulp Fiction (1994)|4 452 | 181|Once Upon a Time... When We Were Colored (1995)|1 453 | 291|Hot Shots! Part Deux (1993)|4 454 | 250|Raising Arizona (1987)|4 455 | 201|Once Upon a Time in America (1984)|3 456 | 60|Three Colors: Blue (1993)|5 457 | 181|Crash (1996)|2 458 | 119|Spy Hard (1996)|3 459 | 287|Toy Story (1995)|5 460 | 216|Star Trek: The Wrath of Khan (1982)|3 461 | 216|Shine (1996)|4 462 | 203|Courage Under Fire (1996)|4 463 | 92|Hour of the Pig, The (1993)|3 464 | 13|Flubber (1997)|3 465 | 213|Aliens (1986)|4 466 | 286|Scream (1996)|5 467 | 117|Multiplicity (1996)|2 468 | 99|Truth About Cats & Dogs, The (1996)|1 469 | 11|Heavenly Creatures (1994)|3 470 | 65|Ed Wood (1994)|2 471 | 295|Sting, The (1973)|4 472 | 269|Bram Stoker's Dracula (1992)|2 473 | 85|George of the Jungle (1997)|2 474 | 250|Hunchback of Notre Dame, The (1996)|5 475 | 137|Die Hard (1988)|5 476 | 201|Naked (1993)|2 477 | 257|Big Night (1996)|4 478 | 111|Conspiracy Theory (1997)|4 479 | 91|North by Northwest (1959)|4 480 | 215|M*A*S*H (1970)|4 481 | 181|Smile Like Yours, A (1997)|1 482 | 189|Adventures of Pinocchio, The (1996)|5 483 | 1|Angels and Insects (1995)|4 484 | 303|Pinocchio (1940)|4 485 | 299|Ice Storm, The (1997)|3 486 | 187|Indiana Jones and the Last Crusade (1989)|4 487 | 222|Bed of Roses (1996)|2 488 | 214|Speed (1994)|4 489 | 293|Devil in a Blue Dress (1995)|3 490 | 285|Amadeus (1984)|4 491 | 303|Lost World: Jurassic Park, The (1997)|3 492 | 96|Reservoir Dogs (1992)|4 493 | 72|Tank Girl (1995)|3 494 | 115|Bottle Rocket (1996)|4 495 | 7|Duck Soup (1933)|3 496 | 116|Fallen (1998)|3 497 | 73|North by Northwest (1959)|4 498 | 269|Chasing Amy (1997)|5 499 | 263|Mary Poppins (1964)|5 500 | 70|Highlander (1986)|3 501 | 221|Trainspotting (1996)|4 502 | 72|GoodFellas (1990)|5 503 | 25|One Flew Over the Cuckoo's Nest (1975)|4 504 | 290|Star Wars (1977)|5 505 | 189|Ben-Hur (1959)|4 506 | 299|Ulee's Gold (1997)|3 507 | 264|Liar Liar (1997)|3 508 | 200|Powder (1995)|5 509 | 187|2001: A Space Odyssey (1968)|4 510 | 184|Godfather: Part II, The (1974)|4 511 | 63|Evita (1996)|2 512 | 13|Star Trek III: The Search for Spock (1984)|4 513 | 298|Sabrina (1954)|3 514 | 235|Psycho (1960)|4 515 | 62|Tin Men (1987)|4 516 | 246|Home Alone (1990)|2 517 | 54|Ransom (1996)|5 518 | 63|Beautiful Girls (1996)|3 519 | 11|Dave (1993)|3 520 | 92|Monty Python and the Holy Grail (1974)|4 521 | 8|Die Hard: With a Vengeance (1995)|3 522 | 307|Raiders of the Lost Ark (1981)|4 523 | 303|Shining, The (1980)|4 524 | 256|Days of Thunder (1990)|2 525 | 72|Outbreak (1995)|3 526 | 164|Thinner (1996)|2 527 | 117|Swingers (1996)|4 528 | 224|Firm, The (1993)|4 529 | 193|Fools Rush In (1997)|3 530 | 94|Army of Darkness (1993)|2 531 | 281|Bean (1997)|2 532 | 130|Mystery Science Theater 3000: The Movie (1996)|3 533 | 128|Bridges of Madison County, The (1995)|1 534 | 94|First Knight (1995)|1 535 | 182|That Thing You Do! (1996)|3 536 | 129|Picture Perfect (1997)|1 537 | 254|Star Trek III: The Search for Spock (1984)|4 538 | 64|Muriel's Wedding (1994)|4 539 | 151|Aliens (1986)|2 540 | 45|Birdcage, The (1996)|4 541 | 193|Peacemaker, The (1997)|3 542 | 276|Dead Man (1995)|4 543 | 276|Priest (1994)|3 544 | 234|Godfather: Part II, The (1974)|4 545 | 181|Mrs. Brown (Her Majesty, Mrs. Brown) (1997)|1 546 | 21|Mary Reilly (1996)|1 547 | 293|Austin Powers: International Man of Mystery (1997)|3 548 | 264|Mallrats (1995)|5 549 | 10|Laura (1944)|5 550 | 197|Jackie Brown (1997)|3 551 | 276|Bedknobs and Broomsticks (1971)|3 552 | 308|To Kill a Mockingbird (1962)|4 553 | 221|Killing Zoe (1994)|4 554 | 131|Spitfire Grill, The (1996)|4 555 | 268|Great White Hype, The (1996)|2 556 | 109|Babe (1995)|3 557 | 198|Quiz Show (1994)|3 558 | 230|Kull the Conqueror (1997)|4 559 | 181|Last Supper, The (1995)|1 560 | 192|Evening Star, The (1996)|4 561 | 234|Omen, The (1976)|3 562 | 90|Kundun (1997)|4 563 | 193|With Honors (1994)|4 564 | 128|Rear Window (1954)|5 565 | 126|Great Expectations (1998)|2 566 | 244|Hunt for Red October, The (1990)|4 567 | 90|Evita (1996)|3 568 | 157|Birdcage, The (1996)|3 569 | 305|Lion King, The (1994)|3 570 | 119|Adventures of Priscilla, Queen of the Desert, The (1994)|5 571 | 21|Star Trek: First Contact (1996)|2 572 | 231|Return of the Jedi (1983)|4 573 | 280|People vs. Larry Flynt, The (1996)|3 574 | 288|Wizard of Oz, The (1939)|3 575 | 279|Line King: Al Hirschfeld, The (1996)|2 576 | 301|Desperado (1995)|4 577 | 72|Little Women (1994)|3 578 | 90|George of the Jungle (1997)|2 579 | 308|Professional, The (1994)|3 580 | 59|Ransom (1996)|3 581 | 94|Michael Collins (1996)|4 582 | 130|Grifters, The (1990)|4 583 | 26|Shiloh (1997)|3 584 | 56|Independence Day (ID4) (1996)|5 585 | 82|People vs. Larry Flynt, The (1996)|2 586 | 62|Usual Suspects, The (1995)|4 587 | 276|To Wong Foo, Thanks for Everything! Julie Newmar (1995)|3 588 | 181|Shiloh (1997)|1 589 | 152|In & Out (1997)|3 590 | 178|That Thing You Do! (1996)|4 591 | 217|Eraser (1996)|4 592 | 79|Ulee's Gold (1997)|4 593 | 138|Maltese Falcon, The (1941)|4 594 | 308|Hudsucker Proxy, The (1994)|5 595 | 75|Tin Cup (1996)|2 596 | 269|Nikita (La Femme Nikita) (1990)|4 597 | 307|Home Alone (1990)|3 598 | 222|French Kiss (1995)|3 599 | 121|Jane Eyre (1996)|3 600 | 269|Braveheart (1995)|1 601 | 13|My Fellow Americans (1996)|4 602 | 230|Ransom (1996)|5 603 | 269|Streetcar Named Desire, A (1951)|4 604 | 239|Red Firecracker, Green Firecracker (1994)|5 605 | 245|Grumpier Old Men (1995)|5 606 | 56|Broken Arrow (1996)|3 607 | 295|Orlando (1993)|5 608 | 271|Grumpier Old Men (1995)|2 609 | 222|Andre (1994)|2 610 | 69|Beavis and Butt-head Do America (1996)|3 611 | 10|Twelve Monkeys (1995)|4 612 | 22|Houseguest (1994)|3 613 | 294|Island of Dr. Moreau, The (1996)|3 614 | 82|Juror, The (1996)|1 615 | 279|Three Musketeers, The (1993)|4 616 | 269|Jaws (1975)|1 617 | 6|Silence of the Lambs, The (1991)|5 618 | 243|Hamlet (1996)|4 619 | 298|Return of the Jedi (1983)|4 620 | 282|Crash (1996)|1 621 | 78|Dante's Peak (1997)|1 622 | 118|Shining, The (1980)|5 623 | 283|Faithful (1996)|5 624 | 171|Rosewood (1997)|4 625 | 70|Bram Stoker's Dracula (1992)|4 626 | 10|Fargo (1996)|5 627 | 245|Return of the Jedi (1983)|4 628 | 107|Game, The (1997)|3 629 | 246|Mary Shelley's Frankenstein (1994)|1 630 | 13|Mr. Magoo (1997)|1 631 | 276|Four Weddings and a Funeral (1994)|4 632 | 244|From Dusk Till Dawn (1996)|2 633 | 189|Pulp Fiction (1994)|5 634 | 226|Kolya (1996)|5 635 | 62|Con Air (1997)|4 636 | 276|Parent Trap, The (1961)|4 637 | 214|Philadelphia Story, The (1940)|4 638 | 306|Mars Attacks! (1996)|4 639 | 222|Brothers McMullen, The (1995)|3 640 | 280|Crying Game, The (1992)|5 641 | 60|Duck Soup (1933)|5 642 | 56|Lion King, The (1994)|4 643 | 42|Sabrina (1995)|5 644 | 1|Groundhog Day (1993)|5 645 | 13|Rising Sun (1993)|4 646 | 173|Evita (1996)|4 647 | 15|MatchMaker, The (1997)|1 648 | 185|Taxi Driver (1976)|4 649 | 280|Money Train (1995)|3 650 | 244|Muriel's Wedding (1994)|4 651 | 150|Donnie Brasco (1997)|4 652 | 7|Bringing Up Baby (1938)|4 653 | 178|In the Name of the Father (1993)|4 654 | 178|Ransom (1996)|3 655 | 95|Assassins (1995)|3 656 | 234|Thieves (Voleurs, Les) (1996)|3 657 | 97|Star Trek: First Contact (1996)|5 658 | 109|Godfather, The (1972)|2 659 | 117|Chasing Amy (1997)|5 660 | 269|Singin' in the Rain (1952)|2 661 | 130|Bushwhacked (1995)|3 662 | 264|Stand by Me (1986)|4 663 | 207|Mighty Aphrodite (1995)|3 664 | 42|Beauty and the Beast (1991)|5 665 | 246|Jack (1996)|2 666 | 87|Clueless (1995)|4 667 | 101|Fly Away Home (1996)|3 668 | 256|Godfather, The (1972)|4 669 | 92|It Could Happen to You (1994)|3 670 | 181|Beautiful Girls (1996)|2 671 | 213|Mars Attacks! (1996)|1 672 | 92|Pretty Woman (1990)|2 673 | 292|High Noon (1952)|5 674 | 246|Alien 3 (1992)|4 675 | 274|That Thing You Do! (1996)|5 676 | 188|American President, The (1995)|5 677 | 18|Remains of the Day, The (1993)|4 678 | 5|Amityville: A New Generation (1993)|1 679 | 236|Sophie's Choice (1982)|3 680 | 193|Spy Hard (1996)|4 681 | 144|Strictly Ballroom (1992)|4 682 | 90|Purple Noon (1960)|5 683 | 48|Father of the Bride (1950)|4 684 | 5|101 Dalmatians (1996)|2 685 | 22|Supercop (1992)|5 686 | 311|Fantasia (1940)|4 687 | 8|Braveheart (1995)|5 688 | 276|Full Metal Jacket (1987)|4 689 | 222|Princess Bride, The (1987)|5 690 | 72|Michael (1996)|4 691 | 299|Citizen Kane (1941)|4 692 | 1|Delicatessen (1991)|5 693 | 308|Breakdown (1997)|3 694 | 165|When Harry Met Sally... (1989)|4 695 | 222|I.Q. (1994)|3 696 | 181|Independence Day (ID4) (1996)|4 697 | 200|Seven (Se7en) (1995)|5 698 | 234|So Dear to My Heart (1949)|4 699 | 244|Enchanted April (1991)|4 700 | 90|Birdcage, The (1996)|5 701 | 208|When Harry Met Sally... (1989)|5 702 | 263|Terminator 2: Judgment Day (1991)|4 703 | 134|Dante's Peak (1997)|4 704 | 279|Terminal Velocity (1994)|4 705 | 2|Rosewood (1997)|4 706 | 288|Stalingrad (1993)|2 707 | 49|L.A. Confidential (1997)|4 708 | 286|Fish Called Wanda, A (1988)|5 709 | 205|Fly Away Home (1996)|3 710 | 22|Hot Shots! Part Deux (1993)|4 711 | 234|Schindler's List (1993)|4 712 | 223|Conspiracy Theory (1997)|3 713 | 15|Birdcage, The (1996)|3 714 | 268|Long Kiss Goodnight, The (1996)|4 715 | 94|Higher Learning (1995)|3 716 | 274|Mission: Impossible (1996)|4 717 | 7|East of Eden (1955)|5 718 | 268|Bram Stoker's Dracula (1992)|2 719 | 16|Professional, The (1994)|5 720 | 164|Chamber, The (1996)|3 721 | 290|Top Gun (1986)|4 722 | 92|Boot, Das (1981)|4 723 | 239|Living in Oblivion (1995)|5 724 | 56|Star Trek: The Motion Picture (1979)|5 725 | 248|Jaws (1975)|4 726 | 234|Richard III (1995)|3 727 | 280|House Arrest (1996)|2 728 | 308|Godfather: Part II, The (1974)|5 729 | 276|Shawshank Redemption, The (1994)|5 730 | 192|Booty Call (1997)|3 731 | 122|My Left Foot (1989)|4 732 | 85|Beauty and the Beast (1991)|3 733 | 262|Island of Dr. Moreau, The (1996)|2 734 | 201|Good Will Hunting (1997)|3 735 | 181|Touch (1997)|2 736 | 295|Pretty Woman (1990)|4 737 | 263|Speed (1994)|4 738 | 295|Strange Days (1995)|4 739 | 201|What Happened Was... (1994)|4 740 | 93|Space Jam (1996)|3 741 | 159|Grumpier Old Men (1995)|5 742 | 158|Alien 3 (1992)|2 743 | 293|E.T. the Extra-Terrestrial (1982)|3 744 | 82|Eraser (1996)|3 745 | 276|Return of the Jedi (1983)|5 746 | 13|Mulholland Falls (1996)|5 747 | 217|GoldenEye (1995)|3 748 | 83|Fried Green Tomatoes (1991)|4 749 | 189|Angels and Insects (1995)|5 750 | 222|Speechless (1994)|4 751 | 146|Fast, Cheap & Out of Control (1997)|5 752 | 267|Independence Day (ID4) (1996)|3 753 | 126|Liar Liar (1997)|3 754 | 181|Adventures of Pinocchio, The (1996)|1 755 | 125|Hot Shots! Part Deux (1993)|4 756 | 43|Striptease (1996)|4 757 | 13|Dumb & Dumber (1994)|1 758 | 253|George of the Jungle (1997)|2 759 | 42|Dolores Claiborne (1994)|3 760 | 77|Miller's Crossing (1990)|4 761 | 291|Perfect World, A (1993)|5 762 | 268|Muppet Treasure Island (1996)|3 763 | 262|Apollo 13 (1995)|3 764 | 234|Hudsucker Proxy, The (1994)|3 765 | 29|Devil's Own, The (1997)|3 766 | 236|Priest (1994)|5 767 | 158|Nell (1994)|3 768 | 156|High Noon (1952)|4 769 | 232|Madness of King George, The (1994)|5 770 | 168|Michael (1996)|5 771 | 37|Scream (1996)|4 772 | 141|Devil's Own, The (1997)|3 773 | 235|Star Trek IV: The Voyage Home (1986)|4 774 | 102|Four Weddings and a Funeral (1994)|3 775 | 77|Empire Strikes Back, The (1980)|3 776 | 90|Rebel Without a Cause (1955)|5 777 | 186|Clear and Present Danger (1994)|5 778 | 44|Fried Green Tomatoes (1991)|5 779 | 118|Prophecy, The (1995)|5 780 | 7|High Noon (1952)|5 781 | 49|That Darn Cat! (1997)|2 782 | 62|Crow, The (1994)|1 783 | 42|Grumpier Old Men (1995)|4 784 | 178|Heathers (1989)|4 785 | 85|Legends of the Fall (1994)|2 786 | 77|Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)|5 787 | 58|Red Firecracker, Green Firecracker (1994)|2 788 | 56|Multiplicity (1996)|4 789 | 197|Leave It to Beaver (1997)|1 790 | 286|Snow White and the Seven Dwarfs (1937)|4 791 | 90|Contact (1997)|3 792 | 181|Denise Calls Up (1995)|1 793 | 295|Henry V (1989)|4 794 | 224|Forrest Gump (1994)|4 795 | 272|In the Name of the Father (1993)|4 796 | 221|Basquiat (1996)|3 797 | 66|Excess Baggage (1997)|1 798 | 207|Schindler's List (1993)|5 799 | 234|Roman Holiday (1953)|3 800 | 7|Quiet Man, The (1952)|5 801 | 87|Jurassic Park (1993)|5 802 | 195|Dracula: Dead and Loving It (1995)|1 803 | 44|Star Trek: The Motion Picture (1979)|5 804 | 306|Marvin's Room (1996)|4 805 | 194|Empire Strikes Back, The (1980)|3 806 | 94|Stargate (1994)|3 807 | 167|Arsenic and Old Lace (1944)|4 808 | 108|Fargo (1996)|4 809 | 230|Fly Away Home (1996)|5 810 | 181|Flower of My Secret, The (Flor de mi secreto, La) (1995)|1 811 | 54|L.A. Confidential (1997)|4 812 | 90|Braveheart (1995)|4 813 | 181|City Hall (1996)|2 814 | 286|One Flew Over the Cuckoo's Nest (1975)|4 815 | 14|Full Monty, The (1997)|4 816 | 311|Clockwork Orange, A (1971)|2 817 | 92|Independence Day (ID4) (1996)|5 818 | 21|Amityville II: The Possession (1982)|1 819 | 244|Die Hard: With a Vengeance (1995)|1 820 | 181|Mission: Impossible (1996)|4 821 | 65|Menace II Society (1993)|4 822 | 37|Money Train (1995)|2 823 | 44|Birds, The (1963)|5 824 | 244|Alien (1979)|4 825 | 1|Hunt for Red October, The (1990)|4 826 | 270|Birdcage, The (1996)|5 827 | 299|Age of Innocence, The (1993)|2 828 | 94|Blown Away (1994)|3 829 | 286|Real Genius (1985)|4 830 | 239|Good Will Hunting (1997)|5 831 | 216|Professional, The (1994)|5 832 | 254|Independence Day (ID4) (1996)|3 833 | 62|Alien 3 (1992)|2 834 | 178|True Lies (1994)|4 835 | 194|Taxi Driver (1976)|4 836 | 268|Before Sunrise (1995)|3 837 | 188|Sound of Music, The (1965)|5 838 | 276|Liar Liar (1997)|4 839 | 158|Flirting With Disaster (1996)|4 840 | 207|That Thing You Do! (1996)|3 841 | 161|Hoop Dreams (1994)|1 842 | 305|Chinatown (1974)|4 843 | 47|Lost Highway (1997)|3 844 | 64|Shadowlands (1993)|4 845 | 191|Tomorrow Never Dies (1997)|3 846 | 7|Miracle on 34th Street (1994)|5 847 | 59|True Romance (1993)|5 848 | 69|Chasing Amy (1997)|5 849 | 10|Smoke (1995)|3 850 | 21|Bound (1996)|4 851 | 58|Dead Man Walking (1995)|4 852 | 194|Sleeper (1973)|3 853 | 7|Shining, The (1980)|5 854 | 113|Spitfire Grill, The (1996)|5 855 | 173|Conspiracy Theory (1997)|5 856 | 95|Under Siege (1992)|4 857 | 16|Sting, The (1973)|5 858 | 59|Dante's Peak (1997)|4 859 | 311|Chinatown (1974)|3 860 | 292|Wild Bunch, The (1969)|4 861 | 43|Unforgiven (1992)|4 862 | 79|Star Wars (1977)|4 863 | 235|Four Weddings and a Funeral (1994)|5 864 | 125|Henry V (1989)|5 865 | 284|Murder at 1600 (1997)|3 866 | 303|Top Gun (1986)|5 867 | 254|Miracle on 34th Street (1994)|3 868 | 255|Quest, The (1996)|1 869 | 104|In & Out (1997)|2 870 | 90|Raise the Red Lantern (1991)|5 871 | 6|Secret of Roan Inish, The (1994)|4 872 | 279|Cable Guy, The (1996)|1 873 | 286|Face/Off (1997)|4 874 | 222|Omen, The (1976)|3 875 | 297|Priest (1994)|5 876 | 42|Sword in the Stone, The (1963)|3 877 | 130|Assassins (1995)|4 878 | 254|One Flew Over the Cuckoo's Nest (1975)|3 879 | 109|Trainspotting (1996)|1 880 | 230|That Darn Cat! (1965)|2 881 | 244|Rainmaker, The (1997)|3 882 | 6|In & Out (1997)|2 883 | 36|Saint, The (1997)|4 884 | 256|Birds, The (1963)|3 885 | 102|Boot, Das (1981)|1 886 | 104|Secrets & Lies (1996)|4 887 | 21|Carrie (1976)|5 888 | 111|In & Out (1997)|4 889 | 18|Close Shave, A (1995)|5 890 | 25|Star Trek: First Contact (1996)|4 891 | 110|Renaissance Man (1994)|3 892 | 270|Silence of the Lambs, The (1991)|5 893 | 68|Jerry Maguire (1996)|5 894 | 83|Field of Dreams (1989)|4 895 | 6|Contact (1997)|2 896 | 89|When Harry Met Sally... (1989)|5 897 | 128|In the Name of the Father (1993)|4 898 | 305|Wings of Desire (1987)|4 899 | 184|Very Brady Sequel, A (1996)|2 900 | 286|Brazil (1985)|5 901 | 279|SubUrbia (1997)|3 902 | 256|Remains of the Day, The (1993)|5 903 | 221|Hoop Dreams (1994)|5 904 | 140|Kiss the Girls (1997)|3 905 | 190|Substitute, The (1996)|2 906 | 11|Star Trek VI: The Undiscovered Country (1991)|3 907 | 201|Unforgiven (1992)|5 908 | 150|Return of the Jedi (1983)|5 909 | 126|Devil's Own, The (1997)|3 910 | 20|Young Frankenstein (1974)|2 911 | 144|Ransom (1996)|4 912 | 181|Chain Reaction (1996)|1 913 | 109|Clear and Present Danger (1994)|4 914 | 85|Koyaanisqatsi (1983)|3 915 | 213|Gone with the Wind (1939)|3 916 | 222|Tales From the Crypt Presents: Demon Knight (1995)|1 917 | 223|Seven (Se7en) (1995)|3 918 | 215|William Shakespeare's Romeo and Juliet (1996)|4 919 | 218|Young Frankenstein (1974)|3 920 | 174|Thousand Acres, A (1997)|5 921 | 275|Blues Brothers, The (1980)|3 922 | 68|Ransom (1996)|1 923 | 268|Romeo Is Bleeding (1993)|4 924 | 160|Like Water For Chocolate (Como agua para chocolate) (1992)|4 925 | 195|Heat (1995)|4 926 | 224|12 Angry Men (1957)|4 927 | 5|Operation Dumbo Drop (1995)|1 928 | 99|Con Air (1997)|5 929 | 2|Shall We Dance? (1996)|5 930 | 292|Dead Man Walking (1995)|4 931 | 72|Speed (1994)|4 932 | 85|Star Trek: The Wrath of Khan (1982)|3 933 | 83|River Wild, The (1994)|5 934 | 92|Escape from L.A. (1996)|2 935 | 7|Misérables, Les (1995)|3 936 | 87|Brady Bunch Movie, The (1995)|2 937 | 287|Down Periscope (1996)|4 938 | 1|Dirty Dancing (1987)|2 939 | 234|Sophie's Choice (1982)|2 940 | 222|Natural Born Killers (1994)|5 941 | 24|Shawshank Redemption, The (1994)|5 942 | 7|Waterworld (1995)|3 943 | 82|Pulp Fiction (1994)|3 944 | 161|Schindler's List (1993)|3 945 | 196|Mrs. Doubtfire (1993)|4 946 | 56|Nightmare Before Christmas, The (1993)|4 947 | 82|Matilda (1996)|3 948 | 7|Dragonheart (1996)|2 949 | 256|Nick of Time (1995)|4 950 | 226|Pulp Fiction (1994)|4 951 | 279|Last Supper, The (1995)|5 952 | 308|Shall We Dance? (1937)|3 953 | 16|Babe (1995)|5 954 | 180|Groundhog Day (1993)|3 955 | 203|Welcome to the Dollhouse (1995)|4 956 | 145|Pulp Fiction (1994)|5 957 | 288|Ice Storm, The (1997)|4 958 | 84|Ransom (1996)|3 959 | 44|Thin Blue Line, The (1988)|3 960 | 17|Mighty Aphrodite (1995)|3 961 | 313|Rock, The (1996)|4 962 | 148|Toy Story (1995)|4 963 | 197|Wag the Dog (1997)|4 964 | 21|Abyss, The (1989)|5 965 | 279|Maximum Risk (1996)|3 966 | 239|Adventures of Robin Hood, The (1938)|5 967 | 185|Marvin's Room (1996)|5 968 | 297|Blade Runner (1982)|4 969 | 303|Crow, The (1994)|4 970 | 186|Fifth Element, The (1997)|1 971 | 73|Akira (1988)|3 972 | 104|Father of the Bride Part II (1995)|2 973 | 94|When Harry Met Sally... (1989)|3 974 | 239|Sting, The (1973)|5 975 | 197|Lawrence of Arabia (1962)|5 976 | 280|Toy Story (1995)|4 977 | 1|Rock, The (1996)|3 978 | 224|Romeo Is Bleeding (1993)|1 979 | 303|Striking Distance (1993)|1 980 | 60|On Golden Pond (1981)|4 981 | 198|Contact (1997)|4 982 | 239|Third Man, The (1949)|5 983 | 6|Forrest Gump (1994)|3 984 | 233|Showgirls (1995)|4 985 | 85|Grifters, The (1990)|4 986 | 110|Net, The (1995)|3 987 | 184|Down by Law (1986)|3 988 | 99|Picture Perfect (1997)|1 989 | 13|Cinderella (1950)|2 990 | 201|Miller's Crossing (1990)|4 991 | 13|Amityville: Dollhouse (1996)|1 992 | 214|Breakfast at Tiffany's (1961)|3 993 | 296|Star Trek: The Wrath of Khan (1982)|4 994 | 222|Searching for Bobby Fischer (1993)|3 995 | 279|Exit to Eden (1994)|4 996 | 217|GoodFellas (1990)|2 997 | 85|Heathers (1989)|3 998 | 239|Jaws (1975)|3 999 | 13|Mask, The (1994)|4 1000 | 194|Firm, The (1993)|3 1001 | 208|Being There (1979)|5 1002 | 109|12 Angry Men (1957)|3 1003 | -------------------------------------------------------------------------------- /data/ratings2.txt: -------------------------------------------------------------------------------- 1 | Jack Matthews|Lady in the Water|3.0 2 | Jack Matthews|Snakes on a Plane|4.0 3 | Jack Matthews|You, Me and Dupree|3.5 4 | Jack Matthews|Superman Returns|5.0 5 | Jack Matthews|The Night Listener|3.0 6 | Mick LaSalle|Lady in the Water|3.0 7 | Mick LaSalle|Snakes on a Plane|4.0 8 | Mick LaSalle|Just My Luck|2.0 9 | Mick LaSalle|Superman Returns|3.0 10 | Mick LaSalle|You, Me and Dupree|2.0 11 | Mick LaSalle|The Night Listener|3.0 12 | Claudia Puig|Snakes on a Plane|3.5 13 | Claudia Puig|Just My Luck|3.0 14 | Claudia Puig|You, Me and Dupree|2.5 15 | Claudia Puig|Superman Returns|4.0 16 | Claudia Puig|The Night Listener|4.5 17 | Lisa Rose|Lady in the Water|2.5 18 | Lisa Rose|Snakes on a Plane|3.5 19 | Lisa Rose|Just My Luck|3.0 20 | Lisa Rose|Superman Returns|3.5 21 | Lisa Rose|The Night Listener|3.0 22 | Lisa Rose|You, Me and Dupree|2.5 23 | Toby|Snakes on a Plane|4.5 24 | Toby|Superman Returns|4.0 25 | Toby|You, Me and Dupree|1.0 26 | Gene Seymour|Lady in the Water|3.0 27 | Gene Seymour|Snakes on a Plane|3.5 28 | Gene Seymour|Just My Luck|1.5 29 | Gene Seymour|Superman Returns|5.0 30 | Gene Seymour|You, Me and Dupree|3.5 31 | Gene Seymour|The Night Listener|3.0 32 | Michael Phillips|Lady in the Water|2.5 33 | Michael Phillips|Snakes on a Plane|3.0 34 | Michael Phillips|Superman Returns|3.5 35 | Michael Phillips|The Night Listener|4.0 36 | -------------------------------------------------------------------------------- /data/results/user_cf_mr_predict.csv: -------------------------------------------------------------------------------- 1 | LadyintheWater,JustMyLuck,TheNightListener 2 | LadyintheWater,SnakesonaPlane,YouMeandDupree 3 | LadyintheWater,JustMyLuck,YouMeandDupree 4 | LadyintheWater 5 | TheNightListener,SupermanReturns,JustMyLuck 6 | -------------------------------------------------------------------------------- /data/results/user_cf_predict.csv: -------------------------------------------------------------------------------- 1 | Just My Luck 2 | Lady in the Water 3 | The Night Listener,Lady in the Water,Just My Luck 4 | "You, Me and Dupree",Just My Luck 5 | -------------------------------------------------------------------------------- /data/results/user_cf_test.csv: -------------------------------------------------------------------------------- 1 | YouMeandDupree,LadyintheWater 2 | YouMeandDupree,SupermanReturns,SnakesonaPlane 3 | TheNightListener,JustMyLuck 4 | SnakesonaPlane,SupermanReturns,LadyintheWater 5 | YouMeandDupree 6 | SnakesonaPlane,LadyintheWater,YouMeandDupree 7 | TheNightListener,SupermanReturns 8 | -------------------------------------------------------------------------------- /data/results/user_cf_train.csv: -------------------------------------------------------------------------------- 1 | JackMatthews,SnakesonaPlane,4.0 2 | JackMatthews,SupermanReturns,5.0 3 | JackMatthews,TheNightListener,3.0 4 | MickLaSalle,LadyintheWater,3.0 5 | MickLaSalle,JustMyLuck,2.0 6 | MickLaSalle,TheNightListener,3.0 7 | ClaudiaPuig,SnakesonaPlane,3.5 8 | ClaudiaPuig,YouMeandDupree,2.5 9 | ClaudiaPuig,SupermanReturns,4.0 10 | LisaRose,JustMyLuck,3.0 11 | LisaRose,TheNightListener,3.0 12 | LisaRose,YouMeandDupree,2.5 13 | Toby,SnakesonaPlane,4.5 14 | Toby,SupermanReturns,4.0 15 | GeneSeymour,JustMyLuck,1.5 16 | GeneSeymour,SupermanReturns,5.0 17 | GeneSeymour,TheNightListener,3.0 18 | MichaelPhillips,LadyintheWater,2.5 19 | MichaelPhillips,SnakesonaPlane,3.0 20 | -------------------------------------------------------------------------------- /data/results/user_item_cf_spark.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evancasey/spark-knn-recommender/5ce96bb9bcea22ed8a859bac9988726da78fc1aa/data/results/user_item_cf_spark.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import config 2 | import sys,os 3 | 4 | os.system("sudo mkdir " + config.SPARKLER_HOME) 5 | -------------------------------------------------------------------------------- /train_and_test.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import sys,os 3 | 4 | import config 5 | from utils import run_kmeans, run_usercf, run_itemcf 6 | from algorithms import * 7 | 8 | DATA_KMEANS = "data/kmeans_data.txt" 9 | DATA_CF_LOCAL = "tests/data/cftrain.txt" 10 | DATA_CF_S3 = "s3n://sparkler-data/ratings10m.txt" 11 | 12 | if __name__ == "__main__": 13 | 14 | # Copy contents of algorithms into pyspark home 15 | # TODO: use spark_home from install.sh (make install.sh set it in config?) 16 | os.system("sudo cp -avr algorithms/* " + config.SPARKLER_HOME) 17 | 18 | # run_kmeans(DATA_KMEANS, 2, 5) 19 | 20 | # run_usercf(DATA_CF_LOCAL) 21 | 22 | run_itemcf(DATA_CF_LOCAL) 23 | 24 | # run_itemcf(DATA_CF_S3) 25 | 26 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import config 2 | import sys,os 3 | 4 | def run_kmeans(data,k,converge_dist): 5 | ''' Execute kmeans.py from pySpark home ''' 6 | 7 | os.system("./" + config.PYSPARK_HOME + " " + config.SPARKLER_HOME + "/kmeans.py " + config.CLUSTER_CONFIG + " " + data + " " + str(k) + " " + str(converge_dist)) 8 | 9 | def run_usercf(data): 10 | ''' Execute user_cf.py from pySpark home ''' 11 | 12 | os.system("./" + config.PYSPARK_HOME + " " + config.SPARKLER_HOME + "/userBasedRecommender.py " + config.CLUSTER_CONFIG + " " + data) 13 | 14 | def run_itemcf(data): 15 | ''' Execute item_cf.py from pySpark home ''' 16 | 17 | os.system("./" + config.PYSPARK_HOME + " " + config.SPARKLER_HOME + "/itemBasedRecommender.py " + config.CLUSTER_CONFIG + " " + data) --------------------------------------------------------------------------------