├── .gitignore ├── README.md ├── autorec-paper.pdf ├── cf_autoencoder_estimator.ipynb ├── conda_environment.yml ├── data ├── dataprep.py └── ml-100k │ ├── README │ ├── u.data │ ├── u.genre │ ├── u.info │ ├── u.item │ ├── u.occupation │ └── u.user ├── matrix_factorization_estimator.ipynb └── matrix_factorization_started.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | .idea* 92 | 93 | # data folders 94 | _*/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tf-reco 2 | 3 | ## Installation Guide 4 | 5 | Python version: Python 3.x 6 | 7 | Follow the instructions on the Anaconda download site (https://www.continuum.io/downloads) to download and install Anaconda. 8 | 9 | Follow the instruction on the Tensorflow site, install CPU-only version of TensorFlow for Python 3.x 10 | 11 | Window User: https://www.tensorflow.org/versions/r1.2/install/install_windows#installing_with_anaconda 12 | 13 | Mac User: https://www.tensorflow.org/versions/r1.2/install/install_mac#installing_with_anaconda 14 | 15 | Ubuntu User: https://www.tensorflow.org/versions/r1.2/install/install_linux#installing_with_anaconda 16 | 17 | Install scikit-learn and jupyter in the same environment you installed TensorFlow 18 | 19 | ``` 20 | conda install scikit-learn 21 | conda install jupyter 22 | ``` 23 | -------------------------------------------------------------------------------- /autorec-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tf-dl-workshop/tf-reco-workshop/2e4db76dd5fd46158911b71c4efb49ea81c967b8/autorec-paper.pdf -------------------------------------------------------------------------------- /cf_autoencoder_estimator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "import numpy as np\n", 13 | "from data import dataprep\n", 14 | "from tensorflow.contrib import layers\n", 15 | "from tensorflow.contrib.learn import *\n", 16 | "\n", 17 | "from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib\n", 18 | "from tensorflow.python.estimator.inputs import numpy_io" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "train_arr, test_arr, train_eval_arr = dataprep.ae_train_test()\n", 30 | "\n", 31 | "train_rating = {'ratings': train_arr.astype(np.float32)}\n", 32 | "test_eval_rating = {'ratings': train_eval_arr.astype(np.float32), 'targets': test_arr.astype(np.float32)}\n", 33 | "train_eval_rating = {'ratings': train_eval_arr.astype(np.float32), 'targets': train_eval_arr.astype(np.float32)}" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "def auto_encoder(features, targets, mode, params):\n", 45 | " ratings = features['ratings']\n", 46 | "\n", 47 | " with tf.name_scope(\"dense_to_sparse\"):\n", 48 | " idx = tf.where(tf.not_equal(ratings, 0.0))\n", 49 | " sparse_ratings = tf.SparseTensor(idx, tf.gather_nd(ratings, idx), ratings.get_shape())\n", 50 | "\n", 51 | " with tf.variable_scope(\"encoder\"):\n", 52 | " encoder_w = tf.get_variable(\"e_w\", shape=[params['n_users'], params['n_dims']])\n", 53 | " encoder_b = tf.get_variable(\"e_b\", shape=[params['n_dims']], initializer=tf.zeros_initializer)\n", 54 | "\n", 55 | " encoding_op = tf.sigmoid(tf.sparse_tensor_dense_matmul(sparse_ratings, encoder_w) + encoder_b)\n", 56 | "\n", 57 | " with tf.variable_scope(\"decoder\"):\n", 58 | " decoder_w = tf.get_variable(\"d_w\", shape=[params['n_dims'], params['n_users']])\n", 59 | " decoder_b = tf.get_variable(\"d_b\", shape=[params['n_users']], initializer=tf.zeros_initializer)\n", 60 | "\n", 61 | " decoding_op = tf.identity(tf.matmul(encoding_op, decoder_w) + decoder_b)\n", 62 | "\n", 63 | " loss = None\n", 64 | " rmse = None\n", 65 | " if mode != model_fn_lib.ModeKeys.INFER:\n", 66 | " with tf.name_scope(\"loss\"):\n", 67 | " # backpropagate only partial observed ratings\n", 68 | " neg_sparse_decoding = tf.SparseTensor(idx, tf.negative(tf.gather_nd(decoding_op, idx)),\n", 69 | " decoding_op.get_shape())\n", 70 | " reg_loss = layers.apply_regularization(layers.l2_regularizer(scale=params['l2reg']),\n", 71 | " weights_list=[encoder_w, decoder_w])\n", 72 | " diff = tf.sparse_add(sparse_ratings, neg_sparse_decoding).values\n", 73 | " loss = tf.reduce_mean(tf.square(diff)) + reg_loss\n", 74 | " if mode == model_fn_lib.ModeKeys.EVAL:\n", 75 | " with tf.name_scope(\"eval_loss\"):\n", 76 | " targets = features['targets']\n", 77 | " target_idx = tf.where(tf.not_equal(targets, 0.0))\n", 78 | " sparse_targets = tf.SparseTensor(target_idx, tf.gather_nd(targets, target_idx), targets.get_shape())\n", 79 | " neg_sparse_decoding = tf.SparseTensor(target_idx, tf.negative(tf.gather_nd(decoding_op, target_idx)),\n", 80 | " decoding_op.get_shape())\n", 81 | " diff = tf.sparse_add(sparse_targets, neg_sparse_decoding).values\n", 82 | " rmse = tf.sqrt(tf.reduce_mean(tf.square(diff)))\n", 83 | "\n", 84 | " for v in tf.trainable_variables():\n", 85 | " tf.summary.histogram(name=v.name.replace(\":0\",\"\"), values=v)\n", 86 | "\n", 87 | " predictions = decoding_op\n", 88 | " eval_metric_ops = {'rmse': rmse}\n", 89 | " train_op = layers.optimize_loss(\n", 90 | " loss=loss,\n", 91 | " global_step=tf.contrib.framework.get_global_step(),\n", 92 | " learning_rate=params[\"learning_rate\"],\n", 93 | " optimizer=tf.train.AdamOptimizer,\n", 94 | " summaries=[\n", 95 | " \"learning_rate\",\n", 96 | " \"loss\",\n", 97 | " \"gradients\",\n", 98 | " \"gradient_norm\",\n", 99 | " ])\n", 100 | "\n", 101 | " return ModelFnOps(mode, predictions, loss, train_op, eval_metric_ops)\n" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": true 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "model_params = dict(\n", 113 | " n_items=train_arr.shape[0],\n", 114 | " n_users=train_arr.shape[1],\n", 115 | " n_dims=40,\n", 116 | " l2reg=0.001,\n", 117 | " learning_rate=0.001\n", 118 | ")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# input queue for training\n", 128 | "train_input_fn = numpy_io.numpy_input_fn(\n", 129 | " x=train_rating, y=np.zeros(shape=[train_arr.shape[0], 1]), batch_size=256, shuffle=True, num_epochs=None)\n", 130 | "# input queue for evaluation on test data\n", 131 | "test_eval_input_fn = numpy_io.numpy_input_fn(\n", 132 | " x=test_eval_rating, y=np.zeros(shape=[test_arr.shape[0], 1]), batch_size=test_arr.shape[0], shuffle=False,\n", 133 | " num_epochs=None)\n", 134 | "# input queue for evaluation on training data\n", 135 | "train_eval_input_fn = numpy_io.numpy_input_fn(\n", 136 | " x=train_eval_rating, y=np.zeros(shape=[test_arr.shape[0], 1]), batch_size=test_arr.shape[0], shuffle=False,\n", 137 | " num_epochs=None)\n", 138 | "\n", 139 | "monitor_test = monitors.ValidationMonitor(input_fn=test_eval_input_fn, eval_steps=1, every_n_steps=100,\n", 140 | " name='test')\n", 141 | "monitor_train = monitors.ValidationMonitor(input_fn=train_eval_input_fn, eval_steps=1, every_n_steps=100,\n", 142 | " name='train')" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "autoencoder_cf = Estimator(\n", 152 | " model_fn=auto_encoder,\n", 153 | " params=model_params,\n", 154 | " model_dir='_summary/auto_rec',\n", 155 | " config=RunConfig(save_checkpoints_secs=10))\n", 156 | "\n", 157 | "autoencoder_cf.fit(input_fn=train_input_fn, steps=5000, monitors=[monitor_test, monitor_train])" 158 | ] 159 | } 160 | ], 161 | "metadata": { 162 | "kernelspec": { 163 | "display_name": "Python 3", 164 | "language": "python", 165 | "name": "python3" 166 | }, 167 | "language_info": { 168 | "codemirror_mode": { 169 | "name": "ipython", 170 | "version": 3 171 | }, 172 | "file_extension": ".py", 173 | "mimetype": "text/x-python", 174 | "name": "python", 175 | "nbconvert_exporter": "python", 176 | "pygments_lexer": "ipython3", 177 | "version": "3.6.1" 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } 183 | -------------------------------------------------------------------------------- /conda_environment.yml: -------------------------------------------------------------------------------- 1 | name: intro_dl 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - ca-certificates=2017.1.23=0 7 | - certifi=2017.1.23=py35_0 8 | - mock=2.0.0=py35_0 9 | - ncurses=5.9=10 10 | - openssl=1.0.2h=3 11 | - pbr=1.10.0=py35_0 12 | - pip=9.0.1=py35_0 13 | - protobuf=3.1.0=py35_0 14 | - python=3.5.3=1 15 | - readline=6.2=0 16 | - setuptools=33.1.0=py35_0 17 | - six=1.10.0=py35_1 18 | - sqlite=3.13.0=1 19 | - tensorflow=0.12.1=py35_2 20 | - tk=8.5.19=1 21 | - wheel=0.29.0=py35_0 22 | - xz=5.2.2=0 23 | - zlib=1.2.11=0 24 | - appnope=0.1.0=py35_0 25 | - cycler=0.10.0=py35_0 26 | - decorator=4.0.11=py35_0 27 | - entrypoints=0.2.2=py35_1 28 | - freetype=2.5.5=2 29 | - icu=54.1=0 30 | - ipykernel=4.5.2=py35_0 31 | - ipython=5.1.0=py35_1 32 | - ipython_genutils=0.1.0=py35_0 33 | - ipywidgets=5.2.2=py35_1 34 | - jinja2=2.9.4=py35_0 35 | - jsonschema=2.5.1=py35_0 36 | - jupyter=1.0.0=py35_3 37 | - jupyter_client=4.4.0=py35_0 38 | - jupyter_console=5.0.0=py35_0 39 | - jupyter_core=4.2.1=py35_0 40 | - libpng=1.6.27=0 41 | - markupsafe=0.23=py35_2 42 | - matplotlib=2.0.0=np112py35_0 43 | - mistune=0.7.3=py35_1 44 | - mkl=2017.0.1=0 45 | - nbconvert=4.2.0=py35_0 46 | - nbformat=4.2.0=py35_0 47 | - notebook=4.3.1=py35_0 48 | - numpy=1.12.0=py35_0 49 | - pandas=0.19.2=np112py35_1 50 | - path.py=10.0=py35_0 51 | - pexpect=4.2.1=py35_0 52 | - pickleshare=0.7.4=py35_0 53 | - prompt_toolkit=1.0.9=py35_0 54 | - ptyprocess=0.5.1=py35_0 55 | - pygments=2.1.3=py35_0 56 | - pyparsing=2.1.4=py35_0 57 | - pyqt=5.6.0=py35_2 58 | - python-dateutil=2.6.0=py35_0 59 | - pytz=2016.10=py35_0 60 | - pyzmq=16.0.2=py35_0 61 | - qt=5.6.2=0 62 | - qtconsole=4.2.1=py35_1 63 | - scikit-learn=0.18.1=np112py35_1 64 | - scipy=0.19.0=np112py35_0 65 | - simplegeneric=0.8.1=py35_1 66 | - sip=4.18=py35_0 67 | - terminado=0.6=py35_0 68 | - tornado=4.4.2=py35_0 69 | - traitlets=4.3.1=py35_0 70 | - wcwidth=0.1.7=py35_0 71 | - widgetsnbextension=1.2.6=py35_0 72 | - pip: 73 | - ipython-genutils==0.1.0 74 | - jupyter-client==4.4.0 75 | - jupyter-console==5.0.0 76 | - jupyter-core==4.2.1 77 | - prompt-toolkit==1.0.9 78 | prefix: //anaconda/envs/intro_dl 79 | 80 | -------------------------------------------------------------------------------- /data/dataprep.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import numpy as np 4 | from sklearn.model_selection import train_test_split 5 | 6 | 7 | def ae_train_test(): 8 | cols = ['user_id', 'movie_id', 'rating', 'timestamp'] 9 | 10 | rating_df = pd.read_csv('data/ml-100k/u.data', sep='\t', names=cols) 11 | 12 | seed = 123 13 | while True: 14 | train, test = train_test_split(rating_df, test_size=0.2, random_state=seed) 15 | train_matrix = train.pivot(index='movie_id', columns='user_id', values='rating').fillna(0.0) 16 | test_matrix = test.pivot(index='movie_id', columns='user_id', values='rating').fillna(0.0) 17 | if len(set(train_matrix.columns).difference(set(test_matrix.columns))) > 0: 18 | seed += 1 19 | else: 20 | break 21 | 22 | test_matrix = test_matrix.filter(items=train_matrix.index, axis=0) 23 | 24 | train_eval_matrix = train_matrix.loc[test_matrix.index, test_matrix.columns] 25 | 26 | return train_matrix.as_matrix(), test_matrix.as_matrix(), train_eval_matrix.as_matrix() 27 | 28 | 29 | def mf_train_test(): 30 | cols = ['user_id', 'movie_id', 'rating', 'timestamp'] 31 | rating_df = pd.read_csv('data/ml-100k/u.data', sep='\t', names=cols) 32 | seed = 123 33 | train, test = train_test_split(rating_df, test_size=0.2, random_state=seed) 34 | train.is_copy = False 35 | test.is_copy = False 36 | test = test[(test.movie_id.isin(train.movie_id)) & (test.user_id.isin(train.user_id))] 37 | train.loc[:, 'user_id'] += -1 38 | test.loc[:, 'user_id'] += -1 39 | train.loc[:, 'movie_id'] += -1 40 | test.loc[:, 'movie_id'] += -1 41 | 42 | train_dict = dict() 43 | test_dict = dict() 44 | 45 | train_dict['user_id'] = train.user_id.as_matrix() 46 | train_dict['movie_id'] = train.movie_id.as_matrix() 47 | train_dict['rating'] = train.rating.as_matrix().astype(np.float32) 48 | 49 | test_dict['user_id'] = test.user_id.as_matrix() 50 | test_dict['movie_id'] = test.movie_id.as_matrix() 51 | test_dict['rating'] = test.rating.as_matrix().astype(np.float32) 52 | 53 | return train_dict, test_dict 54 | -------------------------------------------------------------------------------- /data/ml-100k/README: -------------------------------------------------------------------------------- 1 | SUMMARY & USAGE LICENSE 2 | ============================================= 3 | 4 | MovieLens data sets were collected by the GroupLens Research Project 5 | at the University of Minnesota. 6 | 7 | This data set consists of: 8 | * 100,000 ratings (1-5) from 943 users on 1682 movies. 9 | * Each user has rated at least 20 movies. 10 | * Simple demographic info for the users (age, gender, occupation, zip) 11 | 12 | The data was collected through the MovieLens web site 13 | (movielens.umn.edu) during the seven-month period from September 19th, 14 | 1997 through April 22nd, 1998. This data has been cleaned up - users 15 | who had less than 20 ratings or did not have complete demographic 16 | information were removed from this data set. Detailed descriptions of 17 | the data file can be found at the end of this file. 18 | 19 | Neither the University of Minnesota nor any of the researchers 20 | involved can guarantee the correctness of the data, its suitability 21 | for any particular purpose, or the validity of results based on the 22 | use of the data set. The data set may be used for any research 23 | purposes under the following conditions: 24 | 25 | * The user may not state or imply any endorsement from the 26 | University of Minnesota or the GroupLens Research Group. 27 | 28 | * The user must acknowledge the use of the data set in 29 | publications resulting from the use of the data set 30 | (see below for citation information). 31 | 32 | * The user may not redistribute the data without separate 33 | permission. 34 | 35 | * The user may not use this information for any commercial or 36 | revenue-bearing purposes without first obtaining permission 37 | from a faculty member of the GroupLens Research Project at the 38 | University of Minnesota. 39 | 40 | If you have any further questions or comments, please contact GroupLens 41 | . 42 | 43 | CITATION 44 | ============================================== 45 | 46 | To acknowledge use of the dataset in publications, please cite the 47 | following paper: 48 | 49 | F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: 50 | History and Context. ACM Transactions on Interactive Intelligent 51 | Systems (TiiS) 5, 4, Article 19 (December 2015), 19 pages. 52 | DOI=http://dx.doi.org/10.1145/2827872 53 | 54 | 55 | ACKNOWLEDGEMENTS 56 | ============================================== 57 | 58 | Thanks to Al Borchers for cleaning up this data and writing the 59 | accompanying scripts. 60 | 61 | PUBLISHED WORK THAT HAS USED THIS DATASET 62 | ============================================== 63 | 64 | Herlocker, J., Konstan, J., Borchers, A., Riedl, J.. An Algorithmic 65 | Framework for Performing Collaborative Filtering. Proceedings of the 66 | 1999 Conference on Research and Development in Information 67 | Retrieval. Aug. 1999. 68 | 69 | FURTHER INFORMATION ABOUT THE GROUPLENS RESEARCH PROJECT 70 | ============================================== 71 | 72 | The GroupLens Research Project is a research group in the Department 73 | of Computer Science and Engineering at the University of Minnesota. 74 | Members of the GroupLens Research Project are involved in many 75 | research projects related to the fields of information filtering, 76 | collaborative filtering, and recommender systems. The project is lead 77 | by professors John Riedl and Joseph Konstan. The project began to 78 | explore automated collaborative filtering in 1992, but is most well 79 | known for its world wide trial of an automated collaborative filtering 80 | system for Usenet news in 1996. The technology developed in the 81 | Usenet trial formed the base for the formation of Net Perceptions, 82 | Inc., which was founded by members of GroupLens Research. Since then 83 | the project has expanded its scope to research overall information 84 | filtering solutions, integrating in content-based methods as well as 85 | improving current collaborative filtering technology. 86 | 87 | Further information on the GroupLens Research project, including 88 | research publications, can be found at the following web site: 89 | 90 | http://www.grouplens.org/ 91 | 92 | GroupLens Research currently operates a movie recommender based on 93 | collaborative filtering: 94 | 95 | http://www.movielens.org/ 96 | 97 | DETAILED DESCRIPTIONS OF DATA FILES 98 | ============================================== 99 | 100 | Here are brief descriptions of the data. 101 | 102 | ml-data.tar.gz -- Compressed tar file. To rebuild the u data files do this: 103 | gunzip ml-data.tar.gz 104 | tar xvf ml-data.tar 105 | mku.sh 106 | 107 | u.data -- The full u data set, 100000 ratings by 943 users on 1682 items. 108 | Each user has rated at least 20 movies. Users and items are 109 | numbered consecutively from 1. The data is randomly 110 | ordered. This is a tab separated list of 111 | user id | item id | rating | timestamp. 112 | The time stamps are unix seconds since 1/1/1970 UTC 113 | 114 | u.info -- The number of users, items, and ratings in the u data set. 115 | 116 | u.item -- Information about the items (movies); this is a tab separated 117 | list of 118 | movie id | movie title | release date | video release date | 119 | IMDb URL | unknown | Action | Adventure | Animation | 120 | Children's | Comedy | Crime | Documentary | Drama | Fantasy | 121 | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | 122 | Thriller | War | Western | 123 | The last 19 fields are the genres, a 1 indicates the movie 124 | is of that genre, a 0 indicates it is not; movies can be in 125 | several genres at once. 126 | The movie ids are the ones used in the u.data data set. 127 | 128 | u.genre -- A list of the genres. 129 | 130 | u.user -- Demographic information about the users; this is a tab 131 | separated list of 132 | user id | age | gender | occupation | zip code 133 | The user ids are the ones used in the u.data data set. 134 | 135 | u.occupation -- A list of the occupations. 136 | 137 | u1.base -- The data sets u1.base and u1.test through u5.base and u5.test 138 | u1.test are 80%/20% splits of the u data into training and test data. 139 | u2.base Each of u1, ..., u5 have disjoint test sets; this if for 140 | u2.test 5 fold cross validation (where you repeat your experiment 141 | u3.base with each training and test set and average the results). 142 | u3.test These data sets can be generated from u.data by mku.sh. 143 | u4.base 144 | u4.test 145 | u5.base 146 | u5.test 147 | 148 | ua.base -- The data sets ua.base, ua.test, ub.base, and ub.test 149 | ua.test split the u data into a training set and a test set with 150 | ub.base exactly 10 ratings per user in the test set. The sets 151 | ub.test ua.test and ub.test are disjoint. These data sets can 152 | be generated from u.data by mku.sh. 153 | 154 | allbut.pl -- The script that generates training and test sets where 155 | all but n of a users ratings are in the training data. 156 | 157 | mku.sh -- A shell script to generate all the u data sets from u.data. 158 | -------------------------------------------------------------------------------- /data/ml-100k/u.genre: -------------------------------------------------------------------------------- 1 | unknown|0 2 | Action|1 3 | Adventure|2 4 | Animation|3 5 | Children's|4 6 | Comedy|5 7 | Crime|6 8 | Documentary|7 9 | Drama|8 10 | Fantasy|9 11 | Film-Noir|10 12 | Horror|11 13 | Musical|12 14 | Mystery|13 15 | Romance|14 16 | Sci-Fi|15 17 | Thriller|16 18 | War|17 19 | Western|18 20 | 21 | -------------------------------------------------------------------------------- /data/ml-100k/u.info: -------------------------------------------------------------------------------- 1 | 943 users 2 | 1682 items 3 | 100000 ratings 4 | -------------------------------------------------------------------------------- /data/ml-100k/u.item: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tf-dl-workshop/tf-reco-workshop/2e4db76dd5fd46158911b71c4efb49ea81c967b8/data/ml-100k/u.item -------------------------------------------------------------------------------- /data/ml-100k/u.occupation: -------------------------------------------------------------------------------- 1 | administrator 2 | artist 3 | doctor 4 | educator 5 | engineer 6 | entertainment 7 | executive 8 | healthcare 9 | homemaker 10 | lawyer 11 | librarian 12 | marketing 13 | none 14 | other 15 | programmer 16 | retired 17 | salesman 18 | scientist 19 | student 20 | technician 21 | writer 22 | -------------------------------------------------------------------------------- /data/ml-100k/u.user: -------------------------------------------------------------------------------- 1 | 1|24|M|technician|85711 2 | 2|53|F|other|94043 3 | 3|23|M|writer|32067 4 | 4|24|M|technician|43537 5 | 5|33|F|other|15213 6 | 6|42|M|executive|98101 7 | 7|57|M|administrator|91344 8 | 8|36|M|administrator|05201 9 | 9|29|M|student|01002 10 | 10|53|M|lawyer|90703 11 | 11|39|F|other|30329 12 | 12|28|F|other|06405 13 | 13|47|M|educator|29206 14 | 14|45|M|scientist|55106 15 | 15|49|F|educator|97301 16 | 16|21|M|entertainment|10309 17 | 17|30|M|programmer|06355 18 | 18|35|F|other|37212 19 | 19|40|M|librarian|02138 20 | 20|42|F|homemaker|95660 21 | 21|26|M|writer|30068 22 | 22|25|M|writer|40206 23 | 23|30|F|artist|48197 24 | 24|21|F|artist|94533 25 | 25|39|M|engineer|55107 26 | 26|49|M|engineer|21044 27 | 27|40|F|librarian|30030 28 | 28|32|M|writer|55369 29 | 29|41|M|programmer|94043 30 | 30|7|M|student|55436 31 | 31|24|M|artist|10003 32 | 32|28|F|student|78741 33 | 33|23|M|student|27510 34 | 34|38|F|administrator|42141 35 | 35|20|F|homemaker|42459 36 | 36|19|F|student|93117 37 | 37|23|M|student|55105 38 | 38|28|F|other|54467 39 | 39|41|M|entertainment|01040 40 | 40|38|M|scientist|27514 41 | 41|33|M|engineer|80525 42 | 42|30|M|administrator|17870 43 | 43|29|F|librarian|20854 44 | 44|26|M|technician|46260 45 | 45|29|M|programmer|50233 46 | 46|27|F|marketing|46538 47 | 47|53|M|marketing|07102 48 | 48|45|M|administrator|12550 49 | 49|23|F|student|76111 50 | 50|21|M|writer|52245 51 | 51|28|M|educator|16509 52 | 52|18|F|student|55105 53 | 53|26|M|programmer|55414 54 | 54|22|M|executive|66315 55 | 55|37|M|programmer|01331 56 | 56|25|M|librarian|46260 57 | 57|16|M|none|84010 58 | 58|27|M|programmer|52246 59 | 59|49|M|educator|08403 60 | 60|50|M|healthcare|06472 61 | 61|36|M|engineer|30040 62 | 62|27|F|administrator|97214 63 | 63|31|M|marketing|75240 64 | 64|32|M|educator|43202 65 | 65|51|F|educator|48118 66 | 66|23|M|student|80521 67 | 67|17|M|student|60402 68 | 68|19|M|student|22904 69 | 69|24|M|engineer|55337 70 | 70|27|M|engineer|60067 71 | 71|39|M|scientist|98034 72 | 72|48|F|administrator|73034 73 | 73|24|M|student|41850 74 | 74|39|M|scientist|T8H1N 75 | 75|24|M|entertainment|08816 76 | 76|20|M|student|02215 77 | 77|30|M|technician|29379 78 | 78|26|M|administrator|61801 79 | 79|39|F|administrator|03755 80 | 80|34|F|administrator|52241 81 | 81|21|M|student|21218 82 | 82|50|M|programmer|22902 83 | 83|40|M|other|44133 84 | 84|32|M|executive|55369 85 | 85|51|M|educator|20003 86 | 86|26|M|administrator|46005 87 | 87|47|M|administrator|89503 88 | 88|49|F|librarian|11701 89 | 89|43|F|administrator|68106 90 | 90|60|M|educator|78155 91 | 91|55|M|marketing|01913 92 | 92|32|M|entertainment|80525 93 | 93|48|M|executive|23112 94 | 94|26|M|student|71457 95 | 95|31|M|administrator|10707 96 | 96|25|F|artist|75206 97 | 97|43|M|artist|98006 98 | 98|49|F|executive|90291 99 | 99|20|M|student|63129 100 | 100|36|M|executive|90254 101 | 101|15|M|student|05146 102 | 102|38|M|programmer|30220 103 | 103|26|M|student|55108 104 | 104|27|M|student|55108 105 | 105|24|M|engineer|94043 106 | 106|61|M|retired|55125 107 | 107|39|M|scientist|60466 108 | 108|44|M|educator|63130 109 | 109|29|M|other|55423 110 | 110|19|M|student|77840 111 | 111|57|M|engineer|90630 112 | 112|30|M|salesman|60613 113 | 113|47|M|executive|95032 114 | 114|27|M|programmer|75013 115 | 115|31|M|engineer|17110 116 | 116|40|M|healthcare|97232 117 | 117|20|M|student|16125 118 | 118|21|M|administrator|90210 119 | 119|32|M|programmer|67401 120 | 120|47|F|other|06260 121 | 121|54|M|librarian|99603 122 | 122|32|F|writer|22206 123 | 123|48|F|artist|20008 124 | 124|34|M|student|60615 125 | 125|30|M|lawyer|22202 126 | 126|28|F|lawyer|20015 127 | 127|33|M|none|73439 128 | 128|24|F|marketing|20009 129 | 129|36|F|marketing|07039 130 | 130|20|M|none|60115 131 | 131|59|F|administrator|15237 132 | 132|24|M|other|94612 133 | 133|53|M|engineer|78602 134 | 134|31|M|programmer|80236 135 | 135|23|M|student|38401 136 | 136|51|M|other|97365 137 | 137|50|M|educator|84408 138 | 138|46|M|doctor|53211 139 | 139|20|M|student|08904 140 | 140|30|F|student|32250 141 | 141|49|M|programmer|36117 142 | 142|13|M|other|48118 143 | 143|42|M|technician|08832 144 | 144|53|M|programmer|20910 145 | 145|31|M|entertainment|V3N4P 146 | 146|45|M|artist|83814 147 | 147|40|F|librarian|02143 148 | 148|33|M|engineer|97006 149 | 149|35|F|marketing|17325 150 | 150|20|F|artist|02139 151 | 151|38|F|administrator|48103 152 | 152|33|F|educator|68767 153 | 153|25|M|student|60641 154 | 154|25|M|student|53703 155 | 155|32|F|other|11217 156 | 156|25|M|educator|08360 157 | 157|57|M|engineer|70808 158 | 158|50|M|educator|27606 159 | 159|23|F|student|55346 160 | 160|27|M|programmer|66215 161 | 161|50|M|lawyer|55104 162 | 162|25|M|artist|15610 163 | 163|49|M|administrator|97212 164 | 164|47|M|healthcare|80123 165 | 165|20|F|other|53715 166 | 166|47|M|educator|55113 167 | 167|37|M|other|L9G2B 168 | 168|48|M|other|80127 169 | 169|52|F|other|53705 170 | 170|53|F|healthcare|30067 171 | 171|48|F|educator|78750 172 | 172|55|M|marketing|22207 173 | 173|56|M|other|22306 174 | 174|30|F|administrator|52302 175 | 175|26|F|scientist|21911 176 | 176|28|M|scientist|07030 177 | 177|20|M|programmer|19104 178 | 178|26|M|other|49512 179 | 179|15|M|entertainment|20755 180 | 180|22|F|administrator|60202 181 | 181|26|M|executive|21218 182 | 182|36|M|programmer|33884 183 | 183|33|M|scientist|27708 184 | 184|37|M|librarian|76013 185 | 185|53|F|librarian|97403 186 | 186|39|F|executive|00000 187 | 187|26|M|educator|16801 188 | 188|42|M|student|29440 189 | 189|32|M|artist|95014 190 | 190|30|M|administrator|95938 191 | 191|33|M|administrator|95161 192 | 192|42|M|educator|90840 193 | 193|29|M|student|49931 194 | 194|38|M|administrator|02154 195 | 195|42|M|scientist|93555 196 | 196|49|M|writer|55105 197 | 197|55|M|technician|75094 198 | 198|21|F|student|55414 199 | 199|30|M|writer|17604 200 | 200|40|M|programmer|93402 201 | 201|27|M|writer|E2A4H 202 | 202|41|F|educator|60201 203 | 203|25|F|student|32301 204 | 204|52|F|librarian|10960 205 | 205|47|M|lawyer|06371 206 | 206|14|F|student|53115 207 | 207|39|M|marketing|92037 208 | 208|43|M|engineer|01720 209 | 209|33|F|educator|85710 210 | 210|39|M|engineer|03060 211 | 211|66|M|salesman|32605 212 | 212|49|F|educator|61401 213 | 213|33|M|executive|55345 214 | 214|26|F|librarian|11231 215 | 215|35|M|programmer|63033 216 | 216|22|M|engineer|02215 217 | 217|22|M|other|11727 218 | 218|37|M|administrator|06513 219 | 219|32|M|programmer|43212 220 | 220|30|M|librarian|78205 221 | 221|19|M|student|20685 222 | 222|29|M|programmer|27502 223 | 223|19|F|student|47906 224 | 224|31|F|educator|43512 225 | 225|51|F|administrator|58202 226 | 226|28|M|student|92103 227 | 227|46|M|executive|60659 228 | 228|21|F|student|22003 229 | 229|29|F|librarian|22903 230 | 230|28|F|student|14476 231 | 231|48|M|librarian|01080 232 | 232|45|M|scientist|99709 233 | 233|38|M|engineer|98682 234 | 234|60|M|retired|94702 235 | 235|37|M|educator|22973 236 | 236|44|F|writer|53214 237 | 237|49|M|administrator|63146 238 | 238|42|F|administrator|44124 239 | 239|39|M|artist|95628 240 | 240|23|F|educator|20784 241 | 241|26|F|student|20001 242 | 242|33|M|educator|31404 243 | 243|33|M|educator|60201 244 | 244|28|M|technician|80525 245 | 245|22|M|student|55109 246 | 246|19|M|student|28734 247 | 247|28|M|engineer|20770 248 | 248|25|M|student|37235 249 | 249|25|M|student|84103 250 | 250|29|M|executive|95110 251 | 251|28|M|doctor|85032 252 | 252|42|M|engineer|07733 253 | 253|26|F|librarian|22903 254 | 254|44|M|educator|42647 255 | 255|23|M|entertainment|07029 256 | 256|35|F|none|39042 257 | 257|17|M|student|77005 258 | 258|19|F|student|77801 259 | 259|21|M|student|48823 260 | 260|40|F|artist|89801 261 | 261|28|M|administrator|85202 262 | 262|19|F|student|78264 263 | 263|41|M|programmer|55346 264 | 264|36|F|writer|90064 265 | 265|26|M|executive|84601 266 | 266|62|F|administrator|78756 267 | 267|23|M|engineer|83716 268 | 268|24|M|engineer|19422 269 | 269|31|F|librarian|43201 270 | 270|18|F|student|63119 271 | 271|51|M|engineer|22932 272 | 272|33|M|scientist|53706 273 | 273|50|F|other|10016 274 | 274|20|F|student|55414 275 | 275|38|M|engineer|92064 276 | 276|21|M|student|95064 277 | 277|35|F|administrator|55406 278 | 278|37|F|librarian|30033 279 | 279|33|M|programmer|85251 280 | 280|30|F|librarian|22903 281 | 281|15|F|student|06059 282 | 282|22|M|administrator|20057 283 | 283|28|M|programmer|55305 284 | 284|40|M|executive|92629 285 | 285|25|M|programmer|53713 286 | 286|27|M|student|15217 287 | 287|21|M|salesman|31211 288 | 288|34|M|marketing|23226 289 | 289|11|M|none|94619 290 | 290|40|M|engineer|93550 291 | 291|19|M|student|44106 292 | 292|35|F|programmer|94703 293 | 293|24|M|writer|60804 294 | 294|34|M|technician|92110 295 | 295|31|M|educator|50325 296 | 296|43|F|administrator|16803 297 | 297|29|F|educator|98103 298 | 298|44|M|executive|01581 299 | 299|29|M|doctor|63108 300 | 300|26|F|programmer|55106 301 | 301|24|M|student|55439 302 | 302|42|M|educator|77904 303 | 303|19|M|student|14853 304 | 304|22|F|student|71701 305 | 305|23|M|programmer|94086 306 | 306|45|M|other|73132 307 | 307|25|M|student|55454 308 | 308|60|M|retired|95076 309 | 309|40|M|scientist|70802 310 | 310|37|M|educator|91711 311 | 311|32|M|technician|73071 312 | 312|48|M|other|02110 313 | 313|41|M|marketing|60035 314 | 314|20|F|student|08043 315 | 315|31|M|educator|18301 316 | 316|43|F|other|77009 317 | 317|22|M|administrator|13210 318 | 318|65|M|retired|06518 319 | 319|38|M|programmer|22030 320 | 320|19|M|student|24060 321 | 321|49|F|educator|55413 322 | 322|20|M|student|50613 323 | 323|21|M|student|19149 324 | 324|21|F|student|02176 325 | 325|48|M|technician|02139 326 | 326|41|M|administrator|15235 327 | 327|22|M|student|11101 328 | 328|51|M|administrator|06779 329 | 329|48|M|educator|01720 330 | 330|35|F|educator|33884 331 | 331|33|M|entertainment|91344 332 | 332|20|M|student|40504 333 | 333|47|M|other|V0R2M 334 | 334|32|M|librarian|30002 335 | 335|45|M|executive|33775 336 | 336|23|M|salesman|42101 337 | 337|37|M|scientist|10522 338 | 338|39|F|librarian|59717 339 | 339|35|M|lawyer|37901 340 | 340|46|M|engineer|80123 341 | 341|17|F|student|44405 342 | 342|25|F|other|98006 343 | 343|43|M|engineer|30093 344 | 344|30|F|librarian|94117 345 | 345|28|F|librarian|94143 346 | 346|34|M|other|76059 347 | 347|18|M|student|90210 348 | 348|24|F|student|45660 349 | 349|68|M|retired|61455 350 | 350|32|M|student|97301 351 | 351|61|M|educator|49938 352 | 352|37|F|programmer|55105 353 | 353|25|M|scientist|28480 354 | 354|29|F|librarian|48197 355 | 355|25|M|student|60135 356 | 356|32|F|homemaker|92688 357 | 357|26|M|executive|98133 358 | 358|40|M|educator|10022 359 | 359|22|M|student|61801 360 | 360|51|M|other|98027 361 | 361|22|M|student|44074 362 | 362|35|F|homemaker|85233 363 | 363|20|M|student|87501 364 | 364|63|M|engineer|01810 365 | 365|29|M|lawyer|20009 366 | 366|20|F|student|50670 367 | 367|17|M|student|37411 368 | 368|18|M|student|92113 369 | 369|24|M|student|91335 370 | 370|52|M|writer|08534 371 | 371|36|M|engineer|99206 372 | 372|25|F|student|66046 373 | 373|24|F|other|55116 374 | 374|36|M|executive|78746 375 | 375|17|M|entertainment|37777 376 | 376|28|F|other|10010 377 | 377|22|M|student|18015 378 | 378|35|M|student|02859 379 | 379|44|M|programmer|98117 380 | 380|32|M|engineer|55117 381 | 381|33|M|artist|94608 382 | 382|45|M|engineer|01824 383 | 383|42|M|administrator|75204 384 | 384|52|M|programmer|45218 385 | 385|36|M|writer|10003 386 | 386|36|M|salesman|43221 387 | 387|33|M|entertainment|37412 388 | 388|31|M|other|36106 389 | 389|44|F|writer|83702 390 | 390|42|F|writer|85016 391 | 391|23|M|student|84604 392 | 392|52|M|writer|59801 393 | 393|19|M|student|83686 394 | 394|25|M|administrator|96819 395 | 395|43|M|other|44092 396 | 396|57|M|engineer|94551 397 | 397|17|M|student|27514 398 | 398|40|M|other|60008 399 | 399|25|M|other|92374 400 | 400|33|F|administrator|78213 401 | 401|46|F|healthcare|84107 402 | 402|30|M|engineer|95129 403 | 403|37|M|other|06811 404 | 404|29|F|programmer|55108 405 | 405|22|F|healthcare|10019 406 | 406|52|M|educator|93109 407 | 407|29|M|engineer|03261 408 | 408|23|M|student|61755 409 | 409|48|M|administrator|98225 410 | 410|30|F|artist|94025 411 | 411|34|M|educator|44691 412 | 412|25|M|educator|15222 413 | 413|55|M|educator|78212 414 | 414|24|M|programmer|38115 415 | 415|39|M|educator|85711 416 | 416|20|F|student|92626 417 | 417|27|F|other|48103 418 | 418|55|F|none|21206 419 | 419|37|M|lawyer|43215 420 | 420|53|M|educator|02140 421 | 421|38|F|programmer|55105 422 | 422|26|M|entertainment|94533 423 | 423|64|M|other|91606 424 | 424|36|F|marketing|55422 425 | 425|19|M|student|58644 426 | 426|55|M|educator|01602 427 | 427|51|M|doctor|85258 428 | 428|28|M|student|55414 429 | 429|27|M|student|29205 430 | 430|38|M|scientist|98199 431 | 431|24|M|marketing|92629 432 | 432|22|M|entertainment|50311 433 | 433|27|M|artist|11211 434 | 434|16|F|student|49705 435 | 435|24|M|engineer|60007 436 | 436|30|F|administrator|17345 437 | 437|27|F|other|20009 438 | 438|51|F|administrator|43204 439 | 439|23|F|administrator|20817 440 | 440|30|M|other|48076 441 | 441|50|M|technician|55013 442 | 442|22|M|student|85282 443 | 443|35|M|salesman|33308 444 | 444|51|F|lawyer|53202 445 | 445|21|M|writer|92653 446 | 446|57|M|educator|60201 447 | 447|30|M|administrator|55113 448 | 448|23|M|entertainment|10021 449 | 449|23|M|librarian|55021 450 | 450|35|F|educator|11758 451 | 451|16|M|student|48446 452 | 452|35|M|administrator|28018 453 | 453|18|M|student|06333 454 | 454|57|M|other|97330 455 | 455|48|M|administrator|83709 456 | 456|24|M|technician|31820 457 | 457|33|F|salesman|30011 458 | 458|47|M|technician|Y1A6B 459 | 459|22|M|student|29201 460 | 460|44|F|other|60630 461 | 461|15|M|student|98102 462 | 462|19|F|student|02918 463 | 463|48|F|healthcare|75218 464 | 464|60|M|writer|94583 465 | 465|32|M|other|05001 466 | 466|22|M|student|90804 467 | 467|29|M|engineer|91201 468 | 468|28|M|engineer|02341 469 | 469|60|M|educator|78628 470 | 470|24|M|programmer|10021 471 | 471|10|M|student|77459 472 | 472|24|M|student|87544 473 | 473|29|M|student|94708 474 | 474|51|M|executive|93711 475 | 475|30|M|programmer|75230 476 | 476|28|M|student|60440 477 | 477|23|F|student|02125 478 | 478|29|M|other|10019 479 | 479|30|M|educator|55409 480 | 480|57|M|retired|98257 481 | 481|73|M|retired|37771 482 | 482|18|F|student|40256 483 | 483|29|M|scientist|43212 484 | 484|27|M|student|21208 485 | 485|44|F|educator|95821 486 | 486|39|M|educator|93101 487 | 487|22|M|engineer|92121 488 | 488|48|M|technician|21012 489 | 489|55|M|other|45218 490 | 490|29|F|artist|V5A2B 491 | 491|43|F|writer|53711 492 | 492|57|M|educator|94618 493 | 493|22|M|engineer|60090 494 | 494|38|F|administrator|49428 495 | 495|29|M|engineer|03052 496 | 496|21|F|student|55414 497 | 497|20|M|student|50112 498 | 498|26|M|writer|55408 499 | 499|42|M|programmer|75006 500 | 500|28|M|administrator|94305 501 | 501|22|M|student|10025 502 | 502|22|M|student|23092 503 | 503|50|F|writer|27514 504 | 504|40|F|writer|92115 505 | 505|27|F|other|20657 506 | 506|46|M|programmer|03869 507 | 507|18|F|writer|28450 508 | 508|27|M|marketing|19382 509 | 509|23|M|administrator|10011 510 | 510|34|M|other|98038 511 | 511|22|M|student|21250 512 | 512|29|M|other|20090 513 | 513|43|M|administrator|26241 514 | 514|27|M|programmer|20707 515 | 515|53|M|marketing|49508 516 | 516|53|F|librarian|10021 517 | 517|24|M|student|55454 518 | 518|49|F|writer|99709 519 | 519|22|M|other|55320 520 | 520|62|M|healthcare|12603 521 | 521|19|M|student|02146 522 | 522|36|M|engineer|55443 523 | 523|50|F|administrator|04102 524 | 524|56|M|educator|02159 525 | 525|27|F|administrator|19711 526 | 526|30|M|marketing|97124 527 | 527|33|M|librarian|12180 528 | 528|18|M|student|55104 529 | 529|47|F|administrator|44224 530 | 530|29|M|engineer|94040 531 | 531|30|F|salesman|97408 532 | 532|20|M|student|92705 533 | 533|43|M|librarian|02324 534 | 534|20|M|student|05464 535 | 535|45|F|educator|80302 536 | 536|38|M|engineer|30078 537 | 537|36|M|engineer|22902 538 | 538|31|M|scientist|21010 539 | 539|53|F|administrator|80303 540 | 540|28|M|engineer|91201 541 | 541|19|F|student|84302 542 | 542|21|M|student|60515 543 | 543|33|M|scientist|95123 544 | 544|44|F|other|29464 545 | 545|27|M|technician|08052 546 | 546|36|M|executive|22911 547 | 547|50|M|educator|14534 548 | 548|51|M|writer|95468 549 | 549|42|M|scientist|45680 550 | 550|16|F|student|95453 551 | 551|25|M|programmer|55414 552 | 552|45|M|other|68147 553 | 553|58|M|educator|62901 554 | 554|32|M|scientist|62901 555 | 555|29|F|educator|23227 556 | 556|35|F|educator|30606 557 | 557|30|F|writer|11217 558 | 558|56|F|writer|63132 559 | 559|69|M|executive|10022 560 | 560|32|M|student|10003 561 | 561|23|M|engineer|60005 562 | 562|54|F|administrator|20879 563 | 563|39|F|librarian|32707 564 | 564|65|M|retired|94591 565 | 565|40|M|student|55422 566 | 566|20|M|student|14627 567 | 567|24|M|entertainment|10003 568 | 568|39|M|educator|01915 569 | 569|34|M|educator|91903 570 | 570|26|M|educator|14627 571 | 571|34|M|artist|01945 572 | 572|51|M|educator|20003 573 | 573|68|M|retired|48911 574 | 574|56|M|educator|53188 575 | 575|33|M|marketing|46032 576 | 576|48|M|executive|98281 577 | 577|36|F|student|77845 578 | 578|31|M|administrator|M7A1A 579 | 579|32|M|educator|48103 580 | 580|16|M|student|17961 581 | 581|37|M|other|94131 582 | 582|17|M|student|93003 583 | 583|44|M|engineer|29631 584 | 584|25|M|student|27511 585 | 585|69|M|librarian|98501 586 | 586|20|M|student|79508 587 | 587|26|M|other|14216 588 | 588|18|F|student|93063 589 | 589|21|M|lawyer|90034 590 | 590|50|M|educator|82435 591 | 591|57|F|librarian|92093 592 | 592|18|M|student|97520 593 | 593|31|F|educator|68767 594 | 594|46|M|educator|M4J2K 595 | 595|25|M|programmer|31909 596 | 596|20|M|artist|77073 597 | 597|23|M|other|84116 598 | 598|40|F|marketing|43085 599 | 599|22|F|student|R3T5K 600 | 600|34|M|programmer|02320 601 | 601|19|F|artist|99687 602 | 602|47|F|other|34656 603 | 603|21|M|programmer|47905 604 | 604|39|M|educator|11787 605 | 605|33|M|engineer|33716 606 | 606|28|M|programmer|63044 607 | 607|49|F|healthcare|02154 608 | 608|22|M|other|10003 609 | 609|13|F|student|55106 610 | 610|22|M|student|21227 611 | 611|46|M|librarian|77008 612 | 612|36|M|educator|79070 613 | 613|37|F|marketing|29678 614 | 614|54|M|educator|80227 615 | 615|38|M|educator|27705 616 | 616|55|M|scientist|50613 617 | 617|27|F|writer|11201 618 | 618|15|F|student|44212 619 | 619|17|M|student|44134 620 | 620|18|F|writer|81648 621 | 621|17|M|student|60402 622 | 622|25|M|programmer|14850 623 | 623|50|F|educator|60187 624 | 624|19|M|student|30067 625 | 625|27|M|programmer|20723 626 | 626|23|M|scientist|19807 627 | 627|24|M|engineer|08034 628 | 628|13|M|none|94306 629 | 629|46|F|other|44224 630 | 630|26|F|healthcare|55408 631 | 631|18|F|student|38866 632 | 632|18|M|student|55454 633 | 633|35|M|programmer|55414 634 | 634|39|M|engineer|T8H1N 635 | 635|22|M|other|23237 636 | 636|47|M|educator|48043 637 | 637|30|M|other|74101 638 | 638|45|M|engineer|01940 639 | 639|42|F|librarian|12065 640 | 640|20|M|student|61801 641 | 641|24|M|student|60626 642 | 642|18|F|student|95521 643 | 643|39|M|scientist|55122 644 | 644|51|M|retired|63645 645 | 645|27|M|programmer|53211 646 | 646|17|F|student|51250 647 | 647|40|M|educator|45810 648 | 648|43|M|engineer|91351 649 | 649|20|M|student|39762 650 | 650|42|M|engineer|83814 651 | 651|65|M|retired|02903 652 | 652|35|M|other|22911 653 | 653|31|M|executive|55105 654 | 654|27|F|student|78739 655 | 655|50|F|healthcare|60657 656 | 656|48|M|educator|10314 657 | 657|26|F|none|78704 658 | 658|33|M|programmer|92626 659 | 659|31|M|educator|54248 660 | 660|26|M|student|77380 661 | 661|28|M|programmer|98121 662 | 662|55|M|librarian|19102 663 | 663|26|M|other|19341 664 | 664|30|M|engineer|94115 665 | 665|25|M|administrator|55412 666 | 666|44|M|administrator|61820 667 | 667|35|M|librarian|01970 668 | 668|29|F|writer|10016 669 | 669|37|M|other|20009 670 | 670|30|M|technician|21114 671 | 671|21|M|programmer|91919 672 | 672|54|F|administrator|90095 673 | 673|51|M|educator|22906 674 | 674|13|F|student|55337 675 | 675|34|M|other|28814 676 | 676|30|M|programmer|32712 677 | 677|20|M|other|99835 678 | 678|50|M|educator|61462 679 | 679|20|F|student|54302 680 | 680|33|M|lawyer|90405 681 | 681|44|F|marketing|97208 682 | 682|23|M|programmer|55128 683 | 683|42|M|librarian|23509 684 | 684|28|M|student|55414 685 | 685|32|F|librarian|55409 686 | 686|32|M|educator|26506 687 | 687|31|F|healthcare|27713 688 | 688|37|F|administrator|60476 689 | 689|25|M|other|45439 690 | 690|35|M|salesman|63304 691 | 691|34|M|educator|60089 692 | 692|34|M|engineer|18053 693 | 693|43|F|healthcare|85210 694 | 694|60|M|programmer|06365 695 | 695|26|M|writer|38115 696 | 696|55|M|other|94920 697 | 697|25|M|other|77042 698 | 698|28|F|programmer|06906 699 | 699|44|M|other|96754 700 | 700|17|M|student|76309 701 | 701|51|F|librarian|56321 702 | 702|37|M|other|89104 703 | 703|26|M|educator|49512 704 | 704|51|F|librarian|91105 705 | 705|21|F|student|54494 706 | 706|23|M|student|55454 707 | 707|56|F|librarian|19146 708 | 708|26|F|homemaker|96349 709 | 709|21|M|other|N4T1A 710 | 710|19|M|student|92020 711 | 711|22|F|student|15203 712 | 712|22|F|student|54901 713 | 713|42|F|other|07204 714 | 714|26|M|engineer|55343 715 | 715|21|M|technician|91206 716 | 716|36|F|administrator|44265 717 | 717|24|M|technician|84105 718 | 718|42|M|technician|64118 719 | 719|37|F|other|V0R2H 720 | 720|49|F|administrator|16506 721 | 721|24|F|entertainment|11238 722 | 722|50|F|homemaker|17331 723 | 723|26|M|executive|94403 724 | 724|31|M|executive|40243 725 | 725|21|M|student|91711 726 | 726|25|F|administrator|80538 727 | 727|25|M|student|78741 728 | 728|58|M|executive|94306 729 | 729|19|M|student|56567 730 | 730|31|F|scientist|32114 731 | 731|41|F|educator|70403 732 | 732|28|F|other|98405 733 | 733|44|F|other|60630 734 | 734|25|F|other|63108 735 | 735|29|F|healthcare|85719 736 | 736|48|F|writer|94618 737 | 737|30|M|programmer|98072 738 | 738|35|M|technician|95403 739 | 739|35|M|technician|73162 740 | 740|25|F|educator|22206 741 | 741|25|M|writer|63108 742 | 742|35|M|student|29210 743 | 743|31|M|programmer|92660 744 | 744|35|M|marketing|47024 745 | 745|42|M|writer|55113 746 | 746|25|M|engineer|19047 747 | 747|19|M|other|93612 748 | 748|28|M|administrator|94720 749 | 749|33|M|other|80919 750 | 750|28|M|administrator|32303 751 | 751|24|F|other|90034 752 | 752|60|M|retired|21201 753 | 753|56|M|salesman|91206 754 | 754|59|F|librarian|62901 755 | 755|44|F|educator|97007 756 | 756|30|F|none|90247 757 | 757|26|M|student|55104 758 | 758|27|M|student|53706 759 | 759|20|F|student|68503 760 | 760|35|F|other|14211 761 | 761|17|M|student|97302 762 | 762|32|M|administrator|95050 763 | 763|27|M|scientist|02113 764 | 764|27|F|educator|62903 765 | 765|31|M|student|33066 766 | 766|42|M|other|10960 767 | 767|70|M|engineer|00000 768 | 768|29|M|administrator|12866 769 | 769|39|M|executive|06927 770 | 770|28|M|student|14216 771 | 771|26|M|student|15232 772 | 772|50|M|writer|27105 773 | 773|20|M|student|55414 774 | 774|30|M|student|80027 775 | 775|46|M|executive|90036 776 | 776|30|M|librarian|51157 777 | 777|63|M|programmer|01810 778 | 778|34|M|student|01960 779 | 779|31|M|student|K7L5J 780 | 780|49|M|programmer|94560 781 | 781|20|M|student|48825 782 | 782|21|F|artist|33205 783 | 783|30|M|marketing|77081 784 | 784|47|M|administrator|91040 785 | 785|32|M|engineer|23322 786 | 786|36|F|engineer|01754 787 | 787|18|F|student|98620 788 | 788|51|M|administrator|05779 789 | 789|29|M|other|55420 790 | 790|27|M|technician|80913 791 | 791|31|M|educator|20064 792 | 792|40|M|programmer|12205 793 | 793|22|M|student|85281 794 | 794|32|M|educator|57197 795 | 795|30|M|programmer|08610 796 | 796|32|F|writer|33755 797 | 797|44|F|other|62522 798 | 798|40|F|writer|64131 799 | 799|49|F|administrator|19716 800 | 800|25|M|programmer|55337 801 | 801|22|M|writer|92154 802 | 802|35|M|administrator|34105 803 | 803|70|M|administrator|78212 804 | 804|39|M|educator|61820 805 | 805|27|F|other|20009 806 | 806|27|M|marketing|11217 807 | 807|41|F|healthcare|93555 808 | 808|45|M|salesman|90016 809 | 809|50|F|marketing|30803 810 | 810|55|F|other|80526 811 | 811|40|F|educator|73013 812 | 812|22|M|technician|76234 813 | 813|14|F|student|02136 814 | 814|30|M|other|12345 815 | 815|32|M|other|28806 816 | 816|34|M|other|20755 817 | 817|19|M|student|60152 818 | 818|28|M|librarian|27514 819 | 819|59|M|administrator|40205 820 | 820|22|M|student|37725 821 | 821|37|M|engineer|77845 822 | 822|29|F|librarian|53144 823 | 823|27|M|artist|50322 824 | 824|31|M|other|15017 825 | 825|44|M|engineer|05452 826 | 826|28|M|artist|77048 827 | 827|23|F|engineer|80228 828 | 828|28|M|librarian|85282 829 | 829|48|M|writer|80209 830 | 830|46|M|programmer|53066 831 | 831|21|M|other|33765 832 | 832|24|M|technician|77042 833 | 833|34|M|writer|90019 834 | 834|26|M|other|64153 835 | 835|44|F|executive|11577 836 | 836|44|M|artist|10018 837 | 837|36|F|artist|55409 838 | 838|23|M|student|01375 839 | 839|38|F|entertainment|90814 840 | 840|39|M|artist|55406 841 | 841|45|M|doctor|47401 842 | 842|40|M|writer|93055 843 | 843|35|M|librarian|44212 844 | 844|22|M|engineer|95662 845 | 845|64|M|doctor|97405 846 | 846|27|M|lawyer|47130 847 | 847|29|M|student|55417 848 | 848|46|M|engineer|02146 849 | 849|15|F|student|25652 850 | 850|34|M|technician|78390 851 | 851|18|M|other|29646 852 | 852|46|M|administrator|94086 853 | 853|49|M|writer|40515 854 | 854|29|F|student|55408 855 | 855|53|M|librarian|04988 856 | 856|43|F|marketing|97215 857 | 857|35|F|administrator|V1G4L 858 | 858|63|M|educator|09645 859 | 859|18|F|other|06492 860 | 860|70|F|retired|48322 861 | 861|38|F|student|14085 862 | 862|25|M|executive|13820 863 | 863|17|M|student|60089 864 | 864|27|M|programmer|63021 865 | 865|25|M|artist|11231 866 | 866|45|M|other|60302 867 | 867|24|M|scientist|92507 868 | 868|21|M|programmer|55303 869 | 869|30|M|student|10025 870 | 870|22|M|student|65203 871 | 871|31|M|executive|44648 872 | 872|19|F|student|74078 873 | 873|48|F|administrator|33763 874 | 874|36|M|scientist|37076 875 | 875|24|F|student|35802 876 | 876|41|M|other|20902 877 | 877|30|M|other|77504 878 | 878|50|F|educator|98027 879 | 879|33|F|administrator|55337 880 | 880|13|M|student|83702 881 | 881|39|M|marketing|43017 882 | 882|35|M|engineer|40503 883 | 883|49|M|librarian|50266 884 | 884|44|M|engineer|55337 885 | 885|30|F|other|95316 886 | 886|20|M|student|61820 887 | 887|14|F|student|27249 888 | 888|41|M|scientist|17036 889 | 889|24|M|technician|78704 890 | 890|32|M|student|97301 891 | 891|51|F|administrator|03062 892 | 892|36|M|other|45243 893 | 893|25|M|student|95823 894 | 894|47|M|educator|74075 895 | 895|31|F|librarian|32301 896 | 896|28|M|writer|91505 897 | 897|30|M|other|33484 898 | 898|23|M|homemaker|61755 899 | 899|32|M|other|55116 900 | 900|60|M|retired|18505 901 | 901|38|M|executive|L1V3W 902 | 902|45|F|artist|97203 903 | 903|28|M|educator|20850 904 | 904|17|F|student|61073 905 | 905|27|M|other|30350 906 | 906|45|M|librarian|70124 907 | 907|25|F|other|80526 908 | 908|44|F|librarian|68504 909 | 909|50|F|educator|53171 910 | 910|28|M|healthcare|29301 911 | 911|37|F|writer|53210 912 | 912|51|M|other|06512 913 | 913|27|M|student|76201 914 | 914|44|F|other|08105 915 | 915|50|M|entertainment|60614 916 | 916|27|M|engineer|N2L5N 917 | 917|22|F|student|20006 918 | 918|40|M|scientist|70116 919 | 919|25|M|other|14216 920 | 920|30|F|artist|90008 921 | 921|20|F|student|98801 922 | 922|29|F|administrator|21114 923 | 923|21|M|student|E2E3R 924 | 924|29|M|other|11753 925 | 925|18|F|salesman|49036 926 | 926|49|M|entertainment|01701 927 | 927|23|M|programmer|55428 928 | 928|21|M|student|55408 929 | 929|44|M|scientist|53711 930 | 930|28|F|scientist|07310 931 | 931|60|M|educator|33556 932 | 932|58|M|educator|06437 933 | 933|28|M|student|48105 934 | 934|61|M|engineer|22902 935 | 935|42|M|doctor|66221 936 | 936|24|M|other|32789 937 | 937|48|M|educator|98072 938 | 938|38|F|technician|55038 939 | 939|26|F|student|33319 940 | 940|32|M|administrator|02215 941 | 941|20|M|student|97229 942 | 942|48|F|librarian|78209 943 | 943|22|M|student|77841 944 | -------------------------------------------------------------------------------- /matrix_factorization_estimator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "import numpy as np\n", 13 | "from data import dataprep\n", 14 | "from tensorflow.contrib import layers\n", 15 | "from tensorflow.contrib.learn import *\n", 16 | "from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib\n", 17 | "from tensorflow.python.estimator.inputs import numpy_io" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "train_dict, test_dict = dataprep.mf_train_test()\n", 27 | "rating_train = train_dict.pop(\"rating\")\n", 28 | "rating_test = test_dict.pop(\"rating\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "model_params = dict(\n", 40 | " n_user=train_dict['user_id'].max() + 1,\n", 41 | " n_movie=train_dict['movie_id'].max() + 1,\n", 42 | " n_dim=20,\n", 43 | " reg_param=0.01,\n", 44 | " learning_rate=0.01,\n", 45 | ")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def mf_function(features, targets, mode, params):\n", 57 | " users = features['user_id']\n", 58 | " movies = features['movie_id']\n", 59 | " ratings = targets\n", 60 | "\n", 61 | " with tf.variable_scope(\"embedding\"):\n", 62 | " user_weight = tf.get_variable(\"user_w\"\n", 63 | " , shape=[params['n_user'], params['n_dim']]\n", 64 | " , dtype=tf.float32\n", 65 | " , initializer=layers.xavier_initializer())\n", 66 | " user_bias = tf.get_variable(\"user_b\"\n", 67 | " , shape=[params['n_user']]\n", 68 | " , dtype=tf.float32\n", 69 | " , initializer=tf.zeros_initializer)\n", 70 | " movie_weight = tf.get_variable(\"movie_w\"\n", 71 | " , shape=[params['n_movie'], params['n_dim']]\n", 72 | " , dtype=tf.float32\n", 73 | " , initializer=layers.xavier_initializer())\n", 74 | " movie_bias = tf.get_variable(\"movie_b\"\n", 75 | " , shape=[params['n_movie']]\n", 76 | " , dtype=tf.float32\n", 77 | " , initializer=tf.zeros_initializer)\n", 78 | " for v in tf.trainable_variables():\n", 79 | " tf.summary.histogram(name=v.name.replace(\":0\",\"\"), values=v)\n", 80 | "\n", 81 | " with tf.name_scope(\"inference\"):\n", 82 | " user_emb = tf.nn.embedding_lookup(user_weight, users)\n", 83 | " u_b = tf.nn.embedding_lookup(user_bias, users)\n", 84 | " movie_emb = tf.nn.embedding_lookup(movie_weight, movies)\n", 85 | " m_b = tf.nn.embedding_lookup(movie_bias, movies)\n", 86 | " pred = tf.reduce_sum(tf.multiply(user_emb, movie_emb), 1) + u_b + m_b\n", 87 | "\n", 88 | " with tf.name_scope(\"loss\"):\n", 89 | " reg_loss = layers.apply_regularization(layers.l2_regularizer(scale=params['reg_param']),\n", 90 | " weights_list=[user_weight, movie_weight])\n", 91 | " loss = tf.nn.l2_loss(pred - ratings) + reg_loss\n", 92 | " rmse = tf.sqrt(tf.reduce_mean(tf.pow(pred - ratings, 2)))\n", 93 | " \n", 94 | "\n", 95 | " eval_metric_ops = {'rmse': rmse}\n", 96 | " train_op = layers.optimize_loss(\n", 97 | " loss=loss,\n", 98 | " global_step=tf.contrib.framework.get_global_step(),\n", 99 | " learning_rate=params[\"learning_rate\"],\n", 100 | " optimizer=tf.train.AdamOptimizer,\n", 101 | " summaries=[\n", 102 | " \"learning_rate\",\n", 103 | " \"loss\",\n", 104 | " \"gradients\",\n", 105 | " \"gradient_norm\",\n", 106 | " ])\n", 107 | "\n", 108 | " return ModelFnOps(mode=mode, predictions=pred, loss=loss,\n", 109 | " train_op=train_op, eval_metric_ops=eval_metric_ops)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "scrolled": true 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "# input queue for training\n", 121 | "train_input_fn = numpy_io.numpy_input_fn(\n", 122 | " x=train_dict, y=rating_train, batch_size=10000, shuffle=True, num_epochs=None)\n", 123 | "# input queue for evaluation on test data\n", 124 | "test_input_fn = numpy_io.numpy_input_fn(\n", 125 | " x=test_dict, y=rating_test, batch_size=rating_test.shape[0], shuffle=False, num_epochs=None)\n", 126 | "\n", 127 | "monitor_test = monitors.ValidationMonitor(input_fn=test_input_fn, eval_steps=1, every_n_steps=10,\n", 128 | " name='test')\n", 129 | "\n", 130 | "monitor_train = monitors.ValidationMonitor(input_fn=train_input_fn, eval_steps=1, every_n_steps=10,\n", 131 | " name='train')\n", 132 | "\n", 133 | "mf_estimator = Estimator(\n", 134 | " model_fn=mf_function,\n", 135 | " params=model_params,\n", 136 | " model_dir='_summary/mf_estimator',\n", 137 | " config=RunConfig(save_checkpoints_secs=5))\n", 138 | "\n", 139 | "mf_estimator.fit(input_fn=train_input_fn, steps=400, monitors=[monitor_test, monitor_train])" 140 | ] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "Python 3", 146 | "language": "python", 147 | "name": "python3" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 3 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython3", 159 | "version": "3.6.1" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 1 164 | } 165 | -------------------------------------------------------------------------------- /matrix_factorization_started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf\n", 10 | "import numpy as np\n", 11 | "from tensorflow.contrib import layers\n", 12 | "from data import dataprep\n", 13 | "print(tf.__version__)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "train_dict, test_dict = dataprep.mf_train_test()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "n_user = train_dict['user_id'].max() + 1\n", 36 | "n_movie = train_dict['movie_id'].max() + 1\n", 37 | "n_dim = 20\n", 38 | "reg_param = 0.01\n", 39 | "learning_rate = 0.01" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# create input place_holders\n", 51 | "users = tf.placeholder(shape=[None], dtype=tf.int64)\n", 52 | "movies = tf.placeholder(shape=[None], dtype=tf.int64)\n", 53 | "ratings = tf.placeholder(shape=[None], dtype=tf.float32)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "with tf.variable_scope(\"embedding\"):\n", 65 | " user_weight = tf.get_variable(\"user_w\"\n", 66 | " , shape=[n_user, n_dim]\n", 67 | " , dtype=tf.float32\n", 68 | " , initializer=layers.xavier_initializer())\n", 69 | " user_bias = tf.get_variable(\"user_b\"\n", 70 | " , shape=[n_user]\n", 71 | " , dtype=tf.float32\n", 72 | " , initializer=tf.zeros_initializer)\n", 73 | " movie_weight = tf.get_variable(\"movie_w\"\n", 74 | " , shape=[n_movie, n_dim]\n", 75 | " , dtype=tf.float32\n", 76 | " , initializer=layers.xavier_initializer())\n", 77 | " movie_bias = tf.get_variable(\"movie_b\"\n", 78 | " , shape=[n_movie]\n", 79 | " , dtype=tf.float32\n", 80 | " , initializer=tf.zeros_initializer)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "tf.reset_default_graph()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "with tf.name_scope(\"inference\"):\n", 103 | " user_emb = tf.nn.embedding_lookup(user_weight, users)\n", 104 | " u_b = tf.nn.embedding_lookup(user_bias, users)\n", 105 | " movie_emb = tf.nn.embedding_lookup(movie_weight, movies)\n", 106 | " m_b = tf.nn.embedding_lookup(movie_bias, movies)\n", 107 | " pred = tf.reduce_sum(tf.multiply(user_emb, movie_emb), 1) + u_b + m_b" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "with tf.name_scope(\"loss\"):\n", 119 | " reg_loss = layers.apply_regularization(layers.l2_regularizer(scale=reg_param),\n", 120 | " weights_list=[user_weight, movie_weight])\n", 121 | " loss = tf.nn.l2_loss(pred - ratings) + reg_loss\n", 122 | " train_ops = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)\n", 123 | " rmse = tf.sqrt(tf.reduce_mean(tf.pow(pred - ratings, 2)))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "with tf.Session() as sess:\n", 133 | " sess.run(tf.global_variables_initializer())\n", 134 | " train_input_dict = {users: train_dict['user_id']\n", 135 | " , movies: train_dict['movie_id']\n", 136 | " , ratings: train_dict['rating']}\n", 137 | " test_input_dict = {users: test_dict['user_id']\n", 138 | " , movies: test_dict['movie_id']\n", 139 | " , ratings: test_dict['rating']}\n", 140 | " for i in range(1000):\n", 141 | " sess.run([train_ops], feed_dict=train_input_dict)\n", 142 | " if i % 10 == 0:\n", 143 | " rmse_train = sess.run(rmse, feed_dict=train_input_dict)\n", 144 | " rmse_test = sess.run(rmse, feed_dict=test_input_dict)\n", 145 | " print(\"train rmse: %.3f , test rmse %.3f\" % (rmse_train, rmse_test))" 146 | ] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 3", 152 | "language": "python", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 3 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython3", 165 | "version": "3.6.1" 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 1 170 | } 171 | --------------------------------------------------------------------------------