├── requirements.txt ├── setup.cfg ├── setup.py ├── recsys_models ├── __init__.py ├── pipeline.py ├── data │ ├── sampling.py │ └── __init__.py └── models │ ├── __init__.py │ ├── bpr.py │ ├── transrec.py │ └── fpmc.py ├── .gitignore ├── README.md ├── sample_pipeline.ipynb └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.14.5 2 | pandas>=0.23.3 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # Get the long description from README.md 4 | with open('README.md', encoding='utf-8') as f: 5 | long_description = f.read() 6 | 7 | with open('requirements.txt', 'r') as f: 8 | install_requires = f.read().splitlines() 9 | 10 | setup( 11 | name = 'recsys_models', 12 | packages = find_packages(), 13 | version = '0.1.3', 14 | description = 'TensorFlow Recommender Systems Models for Implicit Feedback', 15 | author = 'Shuyang Li', 16 | author_email = 'shuyangli94@gmail.com', 17 | url = 'https://github.com/shuyangli94/RecSysModels', 18 | license = 'GPLv3+', 19 | keywords = ['recommender systems', 'recommender', 'recommendation system', 'tensorflow'], 20 | classifiers = [ 21 | 'Intended Audience :: Developers', 22 | 'Intended Audience :: Education', 23 | 'Intended Audience :: Science/Research', 24 | 'Topic :: Scientific/Engineering', 25 | 'License :: OSI Approved :: BSD License', 26 | 'Programming Language :: Python', 27 | 'Programming Language :: Python :: 3', 28 | 'Programming Language :: Python :: 3.6', 29 | ], 30 | install_requires = install_requires, 31 | long_description = long_description, 32 | long_description_content_type = 'text/markdown', 33 | ) 34 | -------------------------------------------------------------------------------- /recsys_models/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from collections import namedtuple 4 | DatasetTuple = namedtuple('DatasetTuple', field_names=['x_train', 'x_test', 'y_train', 'y_test']) 5 | 6 | def load_mnist(cache_location='C:/SHUYANG/mnist'): 7 | ''' 8 | Loads and returns MNIST data 9 | 10 | Args: 11 | cache_location (str, optional): Defaults to 'C:/SHUYANG/mnist'. Location of cached MNIST training and testing feature/label arrays 12 | 13 | Returns: 14 | DatasetTuple: Contains the various numpy arrays: 15 | x_train: Array of matrices representing scaled pixel values (0 to 255 -> 0 to 1) for 60,000 training images 16 | x_test: Array of matrices representing scaled pixel values (0 to 255 -> 0 to 1) for 10,000 testing images 17 | y_train: Array of 60,000 training labels (1-9, integer) 18 | y_test: Array of 10,000 testing labels (1-9, integer) 19 | ''' 20 | 21 | npz_loc = cache_location if cache_location.endswith('.npz') else '{}.npz'.format(cache_location) 22 | try: 23 | # Load from NPZ cache 24 | mnist = np.load(npz_loc) 25 | 26 | # Return relevant data 27 | print('Loaded MNIST data from local cache {}'.format(npz_loc)) 28 | return DatasetTuple(mnist['x_train'], mnist['x_test'], mnist['y_train'], mnist['y_test']) 29 | except Exception as e: 30 | print('Unable to load from cache {} - downloading from AWS: {}'.format(npz_loc, e)) 31 | 32 | # Retrieve and format MNIST 33 | mnist = tf.keras.datasets.mnist 34 | (x_train, y_train),(x_test, y_test) = mnist.load_data() 35 | x_train, x_test = x_train / 255.0, x_test / 255.0 36 | 37 | # Save to NPZ 38 | np.savez_compressed(cache_location, x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test) 39 | return DatasetTuple(x_train, x_test, y_train, y_test) 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Data files 107 | *.csv 108 | *.msgpack 109 | *.pkl 110 | *.dat 111 | *.xlsx 112 | 113 | # VSCode 114 | .vscode/ 115 | 116 | # Trained embeddings & documentation outputs 117 | embeddings/ 118 | PLOTS/ 119 | 120 | # Output types (images, PDFs, web pages...) 121 | *.png 122 | *.PNG 123 | *.jpg 124 | *.JPG 125 | *.pdf 126 | *.html 127 | 128 | # Random seed file for SSH into jupyter lab 129 | .rnd 130 | 131 | # Datasets (too large) 132 | datasets/ 133 | 134 | # Other reference code (too large including data) 135 | OTHER\ PAPER\ CODE/ 136 | 137 | # Old / scratch 138 | *OLD_* 139 | 140 | # Models 141 | tf_models 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RecSysModels 2 | Here we have implemented various Recommender System algorithms for implicit feedback and sequential recommendation. These algorithms are implemented in Python and [TensorFlow](https://www.tensorflow.org). This package aims to provide clear, annotated, and efficient implementations of these algorithms along with wrapper classes and methods for easy experimentation and usage. 3 | 4 | ## Implicit Feedback 5 | This package focuses on recommendations based on sequential and [implicit feedback](http://yifanhu.net/PUB/cf.pdf). In these settings there is no explicit numerical rating of items by users - only the record of actions they have taken. Thus there is only observed positive feedback - if a user `u` has not interacted with item `i`, it could either be because they dislike the item (negative) or they merely have not come upon this item yet (positive). 6 | 7 | The algorithms implemented here approach the implicit feedback recommendation problem from a pairwise ranking perspective, where we assume that an item a user has interacted with should be ranked higher than an item that the user has not yet interacted with. 8 | 9 | ## Algorithms Implemented 10 | - Bayesian Personalized Ranking (__BPR__), from ['BPR: Bayesian Personalized Ranking from Implicit Feedback'](https://arxiv.org/abs/1205.2618) (Rendle et al. 2009) 11 | - Factorized Personalized Markov Chains (__FPMC__), from ['Factorizing personalized Markov chains for next-basket recommendation'](https://dl.acm.org/citation.cfm?id=1772773) (Rendle et al. 2010) 12 | - __TransRec__, from ['Translation-based Recommendation'](https://arxiv.org/abs/1707.02410) (He, et al. 2017) 13 | 14 | ## Installation 15 | `RecSysModels` is on [`PyPI`](https://pypi.org/), so you can install the package with `pip`: 16 | ```bash 17 | $ pip install recsys_models 18 | ``` 19 | 20 | ## Dependencies 21 | - [`Python 3+`](https://www.python.org/) (3.6 may be required for Tensorflow-GPU on Windows) 22 | - [`tensorflow`](https://www.tensorflow.org/install/) or [`tensorflow-gpu`](https://www.tensorflow.org/install/gpu) 23 | - [`numpy`](http://www.numpy.org/) 24 | - [`pandas`](https://pandas.pydata.org/pandas-docs/stable/index.html) 25 | - [`Jupyter`/`JupyterLab`](https://jupyter.org/) (If you want to run the notebook) 26 | 27 | ## Sample Usage 28 | See the [`sample_pipeline Jupyter Notebook`](https://github.com/shuyangli94/RecSysModels/blob/master/sample_pipeline.ipynb) for sample usage. In order to run this, you will need to download the [MovieLens 1M Dataset](https://grouplens.org/datasets/movielens/1m/) released in 2003 by the wonderful folks at the [GroupLens Lab](https://grouplens.org/) at the University of Minnesota. 29 | 30 | ## Interoperability 31 | For interoperability, this package supports initializing a model with pretrained weights in the form of `numpy` arrays exported from models trained under other frameworks. Please see individual model files (e.g. [BPR](https://github.com/shuyangli94/RecSysModels/blob/master/recsys_models/models/bpr.py)) for a description of trainable variables and their shapes. 32 | 33 | 34 | ###### This package is released under [GNU GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) by [Shuyang Li](http://shuyangli.me/) -------------------------------------------------------------------------------- /recsys_models/pipeline.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | Model training pipelines 5 | 6 | Author: Shuyang Li 7 | License: GNU GPLv3 8 | ''' 9 | import tensorflow as tf 10 | from datetime import datetime 11 | from recsys_models.data.sampling import uniform_sample_from_df 12 | 13 | def train_model(session, model, train_df, validation_mat, test_mat, 14 | n_iterations=2500, batch_size=512, 15 | min_epochs=10, max_epochs=200, stopping_threshold=1e-5, 16 | **sampling_kwargs): 17 | ''' 18 | Perform mini-batch optimization with the given model on the provided data. 19 | 20 | Arguments: 21 | session {tf.Session} -- TensorFlow session instance 22 | model {RecSysModel} -- RecSysModel instance 23 | train_df {pd.DataFrame} -- DF of user-item interactions for training data 24 | validation_mat {np.array} -- matrix / 2d array of validation ranking tuples 25 | test_mat {np.array} -- matrix / 2d array of test ranking tuples 26 | 27 | Keyword Arguments: 28 | n_iterations {int} -- Number of batches per epoch (Default: {2500}) 29 | batch_size {int} -- Number of training examples per mini-batch (Default: {512}) 30 | min_epochs {int} -- Train model for at least this many epochs (Default: {10}) 31 | max_epochs {int} -- Maximum number of epochs for which to train model (Default: {200}) 32 | stopping_threshold {float} -- Stop the training if validation AUC change falls below this value(Default: {1e-5}) 33 | 34 | Returns: 35 | RecSysModel -- Trained model object 36 | float -- Training AUC 37 | float -- Validation AUC 38 | float -- Test AUC 39 | ''' 40 | start = datetime.now() 41 | 42 | # Get initial validation AUC 43 | prior_auc = model.evaluate_auc(session, validation_mat) 44 | test_auc = model.evaluate_auc(session, test_mat) 45 | print('{} - Prior: {:.5f} Validation AUC, {:.5f} Testing AUC'.format( 46 | datetime.now() - start, prior_auc, test_auc 47 | )) 48 | 49 | # Epochs of training 50 | epoch_num = 0 51 | for epoch_num in range(max_epochs): 52 | 53 | # Make epoch training batch 54 | training_batch = uniform_sample_from_df( 55 | train_df, n_iterations * batch_size, **sampling_kwargs 56 | ) 57 | 58 | # Train the model 59 | epoch_loss = model.train_epoch(session, training_batch, n_iterations, batch_size) 60 | 61 | # Get the full training/validation AUCs 62 | train_auc = model.evaluate_auc(session, training_batch) 63 | validation_auc = model.evaluate_auc(session, validation_mat) 64 | 65 | # Compute change in validation AUC for stoppage 66 | delta_auc = validation_auc - prior_auc 67 | prior_auc = validation_auc 68 | print('[{} - Epoch {}] {:.5f} Loss, {:.5f} Training AUC, {:.5f} Validation AUC ({:.5f} Change)'.format( 69 | datetime.now() - start, epoch_num + 1, epoch_loss, train_auc, validation_auc, delta_auc 70 | )) 71 | 72 | # Stopping condition (give it a few epochs to find its bearings) 73 | if epoch_num > min_epochs and delta_auc < stopping_threshold: 74 | break 75 | 76 | # Evaluate the final trained model 77 | test_auc = model.evaluate_auc(session, test_mat) 78 | print('[{} - Epoch {}] - STOPPED. Final test AUC: {:.5f}'.format( 79 | datetime.now() - start, epoch_num + 1, test_auc 80 | )) 81 | 82 | return model, train_auc, validation_auc, test_auc -------------------------------------------------------------------------------- /recsys_models/data/sampling.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | Sampling and train/test/validation splitting utilities 5 | 6 | Author: Shuyang Li 7 | License: GNU GPLv3 8 | ''' 9 | 10 | import os 11 | import gc 12 | import pandas as pd 13 | import numpy as np 14 | from datetime import datetime 15 | import random 16 | 17 | def sample_unobserved(df, user_interactions_dict, n_items, size=500000, use_original_actions=False): 18 | ''' 19 | Samples unobserved items for each (user, item) interaction pair. 20 | Creates pairwise comparison tuples (user, observed item, unobserved item) where the 21 | observed items are drawn from the provided DF. 22 | 23 | Arguments: 24 | df {pd.DataFrame} -- DataFrame of user-item interactions 25 | user_interactions_dict {dict} -- Dictionary of user : observed items for that user 26 | n_items {int} -- Number of unique items 27 | 28 | Keyword Arguments: 29 | size {int} -- Desired number of rows in the output DataFrame (Default: {500000}) 30 | use_original_actions {bool} -- If True, `size` argument is ignored and the original interaction 31 | rows are used. (Default: {False}) 32 | 33 | Returns: 34 | pd.DataFrame -- DataFrame with 'j' unobserved items 35 | ''' 36 | if use_original_actions: 37 | output_df = df.copy() 38 | else: 39 | output_df = df.sample(n=size, replace=True).reset_index(drop=True) 40 | js = [] 41 | 42 | # Iterating through a series or list is WAY faster than iterrows on a DF!! 43 | for u in output_df['u']: 44 | j = random.randint(0, n_items - 1) 45 | while j in user_interactions_dict[u]: 46 | j = random.randint(0, n_items - 1) 47 | js.append(j) 48 | 49 | output_df['j'] = js 50 | return output_df 51 | 52 | def uniform_sample_from_df( 53 | base_df, size, n_items, items_by_user, 54 | sample_columns=['u', 'i', 'j'], column_order=['u', 'i', 'j'], 55 | item_properties_df=None): 56 | ''' 57 | Uniformly samples user-item-unobserved interactions from a provided DataFrame. 58 | This function is more general than `sample_unobserved` and allows for column selection, 59 | ordering, and attaching item properties/features. 60 | 61 | Arguments: 62 | base_df {dict} -- User-item interactions DF from which we generate positive samples 63 | size {int} -- Number of interactions to sample 64 | n_items {int} -- Number of total items 65 | items_by_user {dict} -- Mapping of user -> IDs of items they have interacted with 66 | 67 | Keyword Arguments: 68 | sample_columns {list} -- Which columns to sample from base DF (default: {['u', 'i']}) 69 | column_order {list} -- Order of columns in matrix output (default: {['u', 'i', 'j']}) 70 | item_properties_df {pd.DataFrame} -- DF containing attributes/features per item 71 | (default: {None}) 72 | 73 | Returns: 74 | np.ndarray -- 2D array of sampled rows 75 | ''' 76 | # Uniformly sample positive+negative interactions 77 | batch_df = sample_unobserved(base_df[sample_columns], items_by_user, n_items, size) 78 | 79 | # Fill out item properties if we are using features 80 | if item_properties_df is not None: 81 | j_properties = item_properties_df.loc[batch_df['j'].values] 82 | for col in j_properties.columns: 83 | batch_df[col] = j_properties[col].values 84 | 85 | return batch_df[column_order].values 86 | 87 | def get_user_interactions_df(base_df): 88 | ''' 89 | Gets a DataFrame of items and # interactions indexed by user 90 | 91 | Arguments: 92 | base_df {pd.DataFrame} -- DataFrame of user-item interactions 93 | 94 | Returns: 95 | pd.DataFrame -- DataFrame indexed by user, with the following columns: 96 | count -- # of interactions made by this user 97 | items -- Set of items that the user has interacted with 98 | ''' 99 | by_user_df = base_df.groupby(['u']).agg({ 100 | 'i': [ 101 | 'count', 102 | lambda x: set(x) 103 | ] 104 | }) 105 | by_user_df.columns = ['count', 'items'] 106 | return by_user_df 107 | 108 | def train_test_validation_split(df, size): 109 | ''' 110 | Generates training, testing, and validation DataFrames from a provided interactions DF 111 | 112 | First, we generate holdout sets: 113 | Latest interaction/user -> test holdout 114 | Second-to-last interaction/user -> validation holdout 115 | 116 | Test and validation datasets are created thusly: 117 | (u, i) observed item interactions are drawn from the respective holdout sets 118 | (j), the unobserved item for user u, is randomly sampled 119 | 120 | Arguments: 121 | df {pd.DataFrame} -- DataFrame of all user-item interactions 122 | size {int} -- Number of rows in each evaluation set (test/validation) 123 | 124 | Returns: 125 | pd.DataFrame -- Training DF, WITHOUT unobserved items (j) 126 | pd.DataFrame -- Validation DF, WITH unobserved items already sampled (j) 127 | pd.DataFrame -- Testing DF, WITH unobserved items already sampled (j) 128 | pd.DataFrame -- Per-user interactions DF, with the following columns: 129 | 'count' -- # of interactions by that user 130 | 'items' -- Set of IDs for items that the user has interacted with 131 | ''' 132 | start = datetime.now() 133 | n_items = df['i'].nunique() 134 | 135 | # Create the user -> n_interactions, items mapping DF 136 | by_user_df = get_user_interactions_df(df) 137 | by_user_dict = by_user_df['items'].to_dict() 138 | print('{} - Created full user : item, interaction mappings df and user : items dict'.format( 139 | datetime.now() - start 140 | )) 141 | 142 | # Create holdout sets and training DF 143 | holdout = df.groupby(['u'], as_index=False).tail(2) 144 | holdout_test = holdout.groupby(['u'], as_index=False).tail(1) 145 | train_df = df.loc[~df.index.isin(holdout.index)].copy() 146 | holdout_validation = holdout.loc[~holdout.index.isin(holdout_test.index)] 147 | print('{} - Created holdout sets of most recent interactions/user'.format( 148 | datetime.now() - start 149 | )) 150 | 151 | # Create validation DF 152 | validation_df = sample_unobserved( 153 | holdout_validation, 154 | by_user_dict, 155 | n_items, 156 | size) 157 | print('{} - Created bootstrap validation DF of size {}'.format( 158 | datetime.now() - start, len(validation_df) 159 | )) 160 | 161 | # Create test DF 162 | test_df = sample_unobserved( 163 | holdout_test, 164 | by_user_dict, 165 | n_items, 166 | size) 167 | print('{} - Created bootstrap testing DF of size {}'.format( 168 | datetime.now() - start, len(test_df) 169 | )) 170 | 171 | return train_df, validation_df, test_df, by_user_df 172 | -------------------------------------------------------------------------------- /recsys_models/models/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | This module contains implementations for various implicit feedback recommender systems models 5 | 6 | Author: Shuyang Li 7 | License: GNU GPLv3 8 | ''' 9 | 10 | from abc import ABCMeta, abstractmethod 11 | from datetime import datetime 12 | import os 13 | import pickle 14 | import pandas as pd 15 | import numpy as np 16 | 17 | class RecSysModel(metaclass=ABCMeta): 18 | """ 19 | Encapsulating class for Recommender System models. 20 | """ 21 | def __init__(self): 22 | self.model_id = 'TEST' 23 | self.loss = None 24 | self.training_optimizer = None 25 | self.p_uij = None 26 | 27 | ###################################### 28 | # Model Saving/Serialization 29 | ###################################### 30 | 31 | @property 32 | def params(self): 33 | ''' 34 | Returns trained weights for all parameters in the model. 35 | 36 | Arguments: 37 | session {tf.Session} -- TensorFlow session object 38 | 39 | Returns: 40 | object -- Model parameter 1 41 | ... 42 | ''' 43 | raise NotImplementedError 44 | 45 | @abstractmethod 46 | def get_weights(self, session): 47 | ''' 48 | Returns trained weights for all parameters in the model. 49 | 50 | Arguments: 51 | session {tf.Session} -- TensorFlow session object 52 | 53 | Returns: 54 | np.ndarray -- Weights for parameter 1 55 | ... 56 | ''' 57 | raise NotImplementedError 58 | 59 | def save(self, session, loc, suffix=''): 60 | ''' 61 | Saves model parameters and weights to the following files, respectively: 62 | 63 | 64 | params.pkl 65 | weights.pkl 66 | 67 | Arguments: 68 | session {tf.Session} -- TensorFlow session 69 | loc {str} -- Parent folder to store the item 70 | 71 | Keyword Arguments: 72 | suffix {str} -- Optimal string to append to model storage. (Default: {''}) 73 | ''' 74 | start = datetime.now() 75 | 76 | # Create the model folder if it doesn't exist already 77 | model_folder = os.path.join(loc, '{}{}'.format(self.model_id, suffix)) 78 | if not os.path.exists(model_folder): 79 | os.mkdir(model_folder) 80 | 81 | # Save parameters 82 | params_file_loc = os.path.join(model_folder, 'params.pkl') 83 | with open(params_file_loc, 'wb') as model_params_file: 84 | pickle.dump( 85 | self.params, 86 | model_params_file, 87 | protocol=pickle.HIGHEST_PROTOCOL 88 | ) 89 | print('{} - Saved parameters to {}'.format( 90 | datetime.now() - start, params_file_loc 91 | )) 92 | 93 | # Save weights 94 | weights_file_loc = os.path.join(model_folder, 'weights.pkl') 95 | with open(weights_file_loc, 'wb') as model_weights_file: 96 | pickle.dump( 97 | self.get_weights(session), 98 | model_weights_file, 99 | protocol=pickle.HIGHEST_PROTOCOL 100 | ) 101 | print('{} - Saved weights to {}'.format( 102 | datetime.now() - start, weights_file_loc 103 | )) 104 | 105 | @classmethod 106 | def load(cls, loc): 107 | ''' 108 | Given a model folder with saved parameters and weights, reconstruct the model 109 | 110 | Arguments: 111 | loc {str} -- Location of the saved model folder, containing: 112 | params.pkl 113 | weights.pkl 114 | 115 | Returns: 116 | RecSysModel -- Model with loaded pretrained weights. 117 | ''' 118 | # Load parameters 119 | with open(os.path.join(loc, 'params.pkl'), 'rb') as model_params_file: 120 | params_list = list(pickle.load(model_params_file)) 121 | if params_list is None: 122 | params_list = [] 123 | 124 | # Load weights 125 | with open(os.path.join(loc, 'weights.pkl'), 'rb') as model_weights_file: 126 | weights_list = list(pickle.load(model_weights_file)) 127 | if weights_list is None: 128 | weights_list = [] 129 | 130 | return cls(*(params_list + weights_list)) 131 | 132 | ###################################### 133 | # Training and Evaluation 134 | ###################################### 135 | 136 | @abstractmethod 137 | def _session_run(self, session, input_batch, *args): 138 | ''' 139 | Computes graph variables based on inputs. 140 | 141 | Arguments: 142 | session {tf.Session} -- TF Session 143 | input_batch {np.ndarray} -- 2d array or matrix 144 | 145 | Arbitrary Arguments: 146 | *args {tf.Variable} -- TF variables to be computed 147 | 148 | Returns: 149 | list -- TF Variable values 150 | ''' 151 | raise NotImplementedError 152 | 153 | @abstractmethod 154 | def debug(self, session, input_batch): 155 | ''' 156 | Debugger - indicates where variables are NaN / 157 | 158 | Arguments: 159 | session {tf.Session} -- TF Session 160 | input_batch {np.ndarray} -- 2d array or matrix 161 | 162 | Raises: 163 | Exception 164 | ''' 165 | raise NotImplementedError 166 | 167 | def train_batch(self, session, input_batch): 168 | ''' 169 | Training with a single batch 170 | 171 | Arguments: 172 | session {tf.Session} -- TF Session 173 | input_batch {np.ndarray} -- 2d array or matrix 174 | 175 | Returns: 176 | float -- Batch loss 177 | ''' 178 | batch_loss, _ = self._session_run(session, input_batch, self.loss, self.training_optimizer) 179 | 180 | # Identify errors in batch loss 181 | if np.isnan(batch_loss): 182 | self.debug(session, input_batch) 183 | 184 | return batch_loss 185 | 186 | def train_epoch(self, session, input_matrix, n_iterations, batch_size): 187 | ''' 188 | Trains for a single epoch 189 | 190 | Arguments: 191 | session {tf.Session} -- TF Session 192 | input_matrix {np.ndarray} -- 2d array or matrix containing all of the training data 193 | for that epoch 194 | n_iterations {int} -- Number of batches per epoch 195 | batch_size {int} -- Number of training examples per batch 196 | 197 | Returns: 198 | float -- Epoch loss 199 | ''' 200 | epoch_loss = 0.0 201 | 202 | # Train on each batch 203 | for iter_num in range(1, n_iterations + 1): 204 | input_batch = input_matrix[batch_size * (iter_num-1) : batch_size * iter_num, :] 205 | batch_loss = self.train_batch(session, input_batch) 206 | epoch_loss += batch_loss 207 | 208 | epoch_loss = epoch_loss / float(n_iterations) 209 | return epoch_loss 210 | 211 | def evaluate_auc(self, session, input_data): 212 | ''' 213 | Evaluate the rankings for testing/validation data 214 | 215 | Arguments: 216 | session {tf.Session} -- TF Session 217 | input_data {np.ndarray} -- 2d array or matrix 218 | 219 | Returns: 220 | float -- AUC for the input data 221 | ''' 222 | # Get predictions 223 | ranking_predictions = self._session_run(session, input_data, self.p_uij)[0] 224 | 225 | # This is the magic - it's cheaper to plug it into a pandas DF and then 226 | # groupby-mean-mean to do mean-of-means on x_uij -> AUC 227 | pred_df = pd.DataFrame(input_data[:, :3], columns=['u', 'i', 'j']) 228 | pred_df['ranking'] = ranking_predictions 229 | pred_df['prediction'] = pred_df['ranking'] > 0 230 | auc = pred_df[['u', 'prediction']].groupby(['u']).mean()['prediction'].mean() 231 | return auc 232 | 233 | def pop_rec(train_df, eval_df): 234 | ''' 235 | PopRec model: For a triplet (u, i, j) of observed item i and unobserved item j, rank first the 236 | item that was most popular in the training data. 237 | 238 | Arguments: 239 | train_df {pd.DataFrame} -- DF of training user-item interactions 240 | eval_df {pd.DataFrame} -- DF of evaluation user-item interactions 241 | 242 | Returns: 243 | float -- PopRec AUC 244 | ''' 245 | # Get popularity of each item in the training data 246 | train_popularities = train_df[['u', 'i']].groupby(['i'])['u'].count().to_dict() 247 | 248 | # For each (u, i, j) triplet, positive prediction is whether i is more popular than j. 249 | predictions = [train_popularities.get(i, 0) > train_popularities.get(j, 0) for \ 250 | i, j in zip(eval_df['i'], eval_df['j'])] 251 | 252 | # AUC = mean of per-user AUC 253 | auc = eval_df[['u']].assign(yhat=predictions).groupby(['u'])['yhat'].mean().mean() 254 | return auc 255 | -------------------------------------------------------------------------------- /recsys_models/data/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | This module contains data preprocessing and sampling utilities. 5 | 6 | Author: Shuyang Li 7 | License: GNU GPLv3 8 | ''' 9 | 10 | import os 11 | import gc 12 | import pandas as pd 13 | import numpy as np 14 | from datetime import datetime 15 | 16 | USER_ITEM_COLS = { 17 | 'allrecipes': ['user_id', 'recipe_id'], 18 | 'movielens': ['user_id', 'item_id'] 19 | } 20 | 21 | def print_basic_stats(df, user_col, item_col): 22 | ''' 23 | Prints basic summary stats for a DF: 24 | # users 25 | # items 26 | # interactions 27 | sparsity % 28 | avg. interactions/user 29 | avg. interactions/item 30 | memory usage 31 | 32 | Arguments: 33 | df {pd.DataFrame} -- DataFrame with user and item interactions recorded 34 | user_col {str} -- user ID column name 35 | item_col {str} -- item ID column name 36 | ''' 37 | gc.collect() 38 | 39 | n_items = df[item_col].nunique() 40 | n_users = df[user_col].nunique() 41 | print('{} Users interacted with {} items {} times ({:.4f}% sparsity, {:.3f} actions/user, {:.3f} actions/item)'.format( 42 | n_users, n_items, len(df), 100.0 * (1.0 - float(len(df))/(n_items * n_users)), 43 | len(df)/n_users, len(df)/n_items 44 | )) 45 | 46 | # Get base dataset memory usage 47 | print('') 48 | print(df.info(memory_usage='deep')) 49 | print('') 50 | 51 | def get_base_data(dataset): 52 | ''' 53 | Retrieve base data DF (unprocessed) 54 | 55 | Arguments: 56 | dataset {str} -- Name of dataset (e.g. `allrecipes`, `movielens`) 57 | 58 | Raises: 59 | Exception -- Specified dataset not supported 60 | 61 | Returns: 62 | pd.DataFrame -- DataFrame with the following columns: 63 | user_col 64 | item_col 65 | 'date' 66 | str -- Name of original user ID column 67 | str -- Name of original item ID column 68 | ''' 69 | # Sanity check 70 | if dataset not in USER_ITEM_COLS: 71 | raise Exception('Requested dataset {} not supported. Use one from: {}'.format( 72 | dataset, set(USER_ITEM_COLS.keys()) 73 | )) 74 | 75 | start = datetime.now() 76 | user_col, item_col = USER_ITEM_COLS[dataset] 77 | if dataset == 'allrecipes': 78 | # Get original msgpack 79 | df = pd.read_msgpack('allrecipes_uri_enriched.msgpack') 80 | elif dataset == 'movielens': 81 | movie_dir = os.path.join('datasets', 'ml-1m') 82 | interactions_loc = os.path.join(movie_dir, 'interactions.msgpack') 83 | 84 | # Get original msgpack 85 | df = pd.read_msgpack(interactions_loc) 86 | 87 | # Process date 88 | df['date'] = pd.to_datetime(df['timestamp'].apply(datetime.utcfromtimestamp)) 89 | 90 | df = df.drop(columns=[c for c in df.columns if c not in {user_col, item_col, 'date'}]) 91 | print('{} - Retrieved base DF for {}'.format( 92 | datetime.now() - start, dataset 93 | )) 94 | 95 | return df, user_col, item_col 96 | 97 | def process_temporal_columns(df): 98 | ''' 99 | Gets the following temporal columns: 100 | year 101 | month 102 | day_of_week 103 | day_of_year (adjusted for leap year, Feb 29 -> 0) 104 | 105 | Arguments: 106 | df {pd.DataFrame} -- DataFrame with a 'date' column 107 | 108 | Returns: 109 | pd.DataFrame -- DataFrame with additional information 110 | ''' 111 | start = datetime.now() 112 | df['year'] = df['date'].dt.year.astype(int) 113 | df['month'] = df['date'].dt.month.astype(int) 114 | df['day_of_week'] = df['date'].dt.dayofweek.astype(int) 115 | 116 | # Adjust day of year -> move february 29 from day 60 to day 0 117 | df['day_of_year'] = df['date'].dt.dayofyear.astype(int) 118 | df['is_leap_year'] = df['date'].dt.is_leap_year.astype(int) 119 | df.loc[(df['day_of_year'] == 60) & (df['is_leap_year'] == True), 'day_of_year'] = 0 120 | df.loc[(df['day_of_year'] > 60) & (df['is_leap_year'] == True), 'day_of_year'] -= 1 121 | 122 | df = df.drop(columns=['is_leap_year']) 123 | print('{} - Added proper temporal columns to df'.format( 124 | datetime.now() - start 125 | )) 126 | return df 127 | 128 | def kcore_interaction_stats(df, user_col, item_col, core): 129 | ''' 130 | Performs k-core on a graph. Preserves all users with at least k interactions and all items with at least k interactions. 131 | 132 | Arguments: 133 | df {pd.DataFrame} -- DataFrame with user and item interactions recorded 134 | user_col {str} -- user ID column name 135 | item_col {str} -- item ID column name 136 | core {int} -- cores 137 | 138 | Returns: 139 | invalid_users -- set of IDs of users with fewer than k interactions 140 | invalid_items -- set of IDs of items with fewer than k interactions 141 | ''' 142 | n_items = df[item_col].nunique() 143 | n_users = df[user_col].nunique() 144 | user_degrees = df.groupby([user_col])[item_col].count() 145 | item_degrees = df.groupby([item_col])[user_col].count() 146 | invalid_users = set(user_degrees[user_degrees < core].index) 147 | invalid_items = set(item_degrees[item_degrees < core].index) 148 | print('Removing {}/{} users ({:.2f} %) and {}/{} items ({:.2f} %) from {} total interactions ({:.5f}% Sparsity)'.format( 149 | len(invalid_users), n_users, 100 * len(invalid_users) / n_users, 150 | len(invalid_items), n_items, 100 * len(invalid_items) / n_items, 151 | len(df), 100 * (1 - len(df) / (n_items * n_users)) 152 | )) 153 | return invalid_users, invalid_items 154 | 155 | def kcore(df, user_col, item_col, core): 156 | ''' 157 | Performs k-core on a graph. Preserves all users with at least k interactions and all items with at least k interactions. 158 | 159 | Arguments: 160 | df {pd.DataFrame} -- DataFrame with user and item interactions recorded 161 | user_col {str} -- user ID column name 162 | item_col {str} -- item ID column name 163 | core {int} -- cores 164 | 165 | Returns: 166 | pd.DataFrame -- k-core graph DF 167 | ''' 168 | start = datetime.now() 169 | iters = 0 170 | while True: 171 | invalid_users, invalid_items, = kcore_interaction_stats(df, user_col, item_col, core) 172 | iters += 1 173 | if len(invalid_users) == 0 and len(invalid_items) == 0: 174 | print('{} - Done: {}-core decomposition after {} iterations'.format( 175 | datetime.now() - start, core, iters 176 | )) 177 | break 178 | 179 | # Remove invalid users and items 180 | df = df[~df[user_col].isin(invalid_users)] 181 | df = df[~df[item_col].isin(invalid_items)] 182 | 183 | return df 184 | 185 | def map_user_items(df, user_col, item_col): 186 | ''' 187 | Maps user/item IDs to integer IDs. 188 | 189 | Arguments: 190 | df {pd.DataFrame} -- DataFrame with user and item interactions recorded 191 | user_col {str} -- user ID column name 192 | item_col {str} -- item ID column name 193 | 194 | Returns: 195 | pd.DataFrame -- DataFrame with mapped [0, n_users) users and [0, n_items) items, as well as 196 | prior item for each item/user combo. 197 | ''' 198 | start = datetime.now() 199 | 200 | # Get all unique reviewers and items across training and test set 201 | unique_users = np.array(list(df[user_col].unique())) 202 | unique_items = np.array(list(df[item_col].unique())) 203 | n_items = len(unique_items) 204 | n_users = len(unique_users) 205 | 206 | # MAP USERS AND ITEMS TO INT 207 | df = df.drop(columns=['u', 'i'], errors='ignore') 208 | user_map = pd.DataFrame(list(zip(unique_users, np.arange(n_users))), columns=[user_col, 'u']) 209 | item_map = pd.DataFrame(list(zip(unique_items, np.arange(n_items))), columns=[item_col, 'i']) 210 | df = pd.merge(df, user_map, on=user_col) 211 | df = pd.merge(df, item_map, on=item_col) 212 | df['u'] = df['u'].astype(np.int32) 213 | df['i'] = df['i'].astype(np.int32) 214 | print('{} - Mapped u-i indices'.format(datetime.now() - start)) 215 | 216 | # Create 'prior item' 217 | df = df.sort_values(['date']) 218 | df['prior'] = df.groupby(['u'])['i'].shift(1) 219 | df = df.dropna(subset=['prior']) 220 | df['prior'] = df['prior'].astype(int) 221 | print('{} - Created "prior" column'.format(datetime.now() - start)) 222 | 223 | return df 224 | 225 | def get_processed_df(dataset, cores, overwrite=False): 226 | ''' 227 | Processes a DataFrame to include the following columns: 228 | original user ID 229 | original item ID 230 | 'u' - mapped integer user ID 231 | 'prior' - mapped integer item ID of previous item (in per-user item sequence) 232 | 'i' - mapped integer item ID 233 | 'date' 234 | 'year' 235 | 'month' 236 | 'day_of_year' 237 | 'day_of_week' 238 | 239 | Applies the following preprocessing steps: 240 | K-core 241 | Get temporal columns 242 | Map user/item IDs to contiguous integer series 243 | 244 | Arguments: 245 | dataset {str} -- Name of a supported dataset (e.g. `allrecipes`, `gk`, `movielens`) 246 | cores {int} -- Minimum # of interactions per user and item 247 | 248 | Keyword Arguments: 249 | overwrite {bool} -- Whether to re-process data (default: {False}) 250 | 251 | Returns: 252 | pd.DataFrame -- Processed DataFrame 253 | str -- Original user ID column name 254 | str -- Original item ID column name 255 | ''' 256 | # Sanity check 257 | if dataset not in USER_ITEM_COLS: 258 | raise Exception('Requested dataset {} not supported. Use one from: {}'.format( 259 | dataset, set(USER_ITEM_COLS.keys()) 260 | )) 261 | 262 | dataset_name = '{}_{}.msgpack'.format(dataset, cores) 263 | user_col, item_col = USER_ITEM_COLS[dataset] 264 | 265 | # If preprocessed and stored ahead of time, retrieve it 266 | if os.path.exists(dataset_name) and not overwrite: 267 | start = datetime.now() 268 | df = pd.read_msgpack(dataset_name) 269 | print('{} - Retrieved preprocessed data for {}, {}-cores'.format( 270 | datetime.now() - start, dataset, cores 271 | )) 272 | print_basic_stats(df, user_col, item_col) 273 | return df, user_col, item_col 274 | 275 | # Get basic data 276 | df, user_col, item_col = get_base_data(dataset) 277 | 278 | # Get temporal columns 279 | df = process_temporal_columns(df) 280 | 281 | # K-core 282 | df = kcore(df, user_col, item_col, cores) 283 | 284 | # Mapping 285 | df = map_user_items(df, user_col, item_col) 286 | 287 | # Get stats 288 | gc.collect() 289 | print_basic_stats(df, user_col, item_col) 290 | 291 | # Store DF 292 | start = datetime.now() 293 | df.to_msgpack(dataset_name) 294 | print('{} - Processed and stored dataset to {}'.format( 295 | datetime.now() - start, dataset_name 296 | )) 297 | 298 | return df, user_col, item_col -------------------------------------------------------------------------------- /recsys_models/models/bpr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | TensorFlow implementation of Bayesian Personalized Ranking for implicit feedback, 5 | with underlying Matrix Factorization model 6 | 7 | Source paper(s): 8 | BPR: Bayesian Personalized Ranking from Implicit Feedback 9 | Rendle, et al. 2009 10 | 11 | Author: Shuyang Li 12 | License: GNU GPLv3 13 | ''' 14 | 15 | import tensorflow as tf 16 | from recsys_models.models import RecSysModel 17 | import pandas as pd 18 | import numpy as np 19 | 20 | class BPR_MF(RecSysModel): 21 | ''' 22 | Bayesian Personalized Ranking with Matrix Factorization 23 | 24 | Input format: 25 | (u, i, j) - user u, observed item i, unobserved item j 26 | 27 | Intuition for pairwise ranking for implicit feedback: 28 | Users will rank observed items above unobserved items 29 | We want to maximize the log likelihood of the batch rankings 30 | 31 | Variables 32 | gamma_u -> user embedding 33 | gamma_i -> observed item embedding 34 | gamma_j -> unobserved item embedding 35 | beta_i -> observed item bias 36 | beta_j -> unobserved item bias 37 | 38 | Weights to be optimized are on the order of: 39 | k * (n_items + n_users) + n_items 40 | 41 | Weights: 42 | U_mf (n_users x k) -- User latent factor matrix 43 | I_mf (n_items x k) -- Item latent factor matrix 44 | Bi_emb (n_items x 1) -- Item biases 45 | 46 | Optimization Criterion: 47 | Maximize sum of log-likelihoods of rankings: 48 | ln(sigmoid(p_ui - p_uj)) 49 | Where 50 | p_ui -> P(i|u), prop. to 51 | p_uj -> P(j|u), prop. to 52 | 53 | Regularization: 54 | embeddings (gamma_u, gamma_i, gamma_j) 55 | biases (beta_i, beta_j) 56 | ''' 57 | def __init__( 58 | self, n_users, n_items, k=2, lambda_emb=1e-6, lambda_bias=1e-6, 59 | opt_type=tf.contrib.opt.LazyAdamOptimizer, 60 | opt_args=dict(), 61 | U_mf_weights=None, 62 | I_mf_weights=None, 63 | Bi_mf_weights=None): 64 | ''' 65 | Arguments: 66 | n_users {int} -- Number of users 67 | n_items {int} -- Number of items 68 | 69 | Keyword Arguments: 70 | k {int} -- Latent dimensionality (default: {2}) 71 | lambda_emb {float} -- Embedding regularization rate (default: {1e-6}) 72 | lambda_bias {float} -- Bias term regularization rate (default: {1e-6}) 73 | opt_type {tf.train.Optimizer} -- TF optimizer class (default: {tf.contrib.opt.LazyAdamOptimizer}) 74 | opt_args {dict} -- Dictionary of arguments for the TF optimizer class 75 | U_mf_weights {np.ndarray} -- Initial weights for latent user factors (default: {None}) 76 | I_mf_weights {np.ndarray} -- Initial weights for latent item factors (default: {None}) 77 | Bi_mf_weights {np.ndarray} -- Initial weights for item biases (default: {None}) 78 | ''' 79 | # Parameters 80 | self.n_users = n_users # Number of users 81 | self.n_items = n_items # Number of items 82 | self.k = k # Latent dimensionality 83 | self.lambda_emb = lambda_emb # Regularization rate (embeddings) 84 | self.lambda_bias = lambda_bias # Regularization rate (bias) 85 | self.optimizer = opt_type(**opt_args) # Optimizer 86 | self.opt_type = opt_type # Optimizer class 87 | self.opt_args = opt_args # Optimizer arguments 88 | 89 | # Model ID 90 | self.model_id = 'bpr-mf_{}k_{}l2_{}l2bias'.format( 91 | self.k, self.lambda_emb, self.lambda_bias 92 | ) 93 | 94 | # Initialized variable weights (None-type checks because of ambiguous truth values for arr) 95 | U_mf_init = U_mf_weights if U_mf_weights is not None else \ 96 | tf.nn.l2_normalize( 97 | tf.random_normal([n_users, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 98 | axis=1 99 | ) 100 | I_mf_init = I_mf_weights if I_mf_weights is not None else \ 101 | tf.nn.l2_normalize( 102 | tf.random_normal([n_items, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 103 | axis=1 104 | ) 105 | Bi_mf_init = Bi_mf_weights if Bi_mf_weights is not None else \ 106 | tf.zeros([n_items, 1], dtype=tf.float32) 107 | 108 | # (Batch) Placeholders 109 | self.u = tf.placeholder(tf.int32, [None]) # User ID 110 | self.i = tf.placeholder(tf.int32, [None]) # Observed item ID 111 | self.j = tf.placeholder(tf.int32, [None]) # Unobserved item ID 112 | 113 | # Variables - normalize to unit ball to mitigate curse of dimensionality (Lin et al. 2015) 114 | self.U_mf = tf.Variable( # User embedding matrix 115 | initial_value=U_mf_init, 116 | trainable=True 117 | ) 118 | self.I_mf = tf.Variable( # Item embedding matrix 119 | initial_value=I_mf_init, 120 | trainable=True 121 | ) 122 | self.Bi_mf = tf.Variable( # Item bias embedding vector 123 | initial_value=Bi_mf_init, 124 | trainable=True 125 | ) 126 | 127 | # Batch Embeddings 128 | self.u_embeddings = tf.nn.embedding_lookup(self.U_mf, self.u) 129 | self.i_embeddings = tf.nn.embedding_lookup(self.I_mf, self.i) 130 | self.j_embeddings = tf.nn.embedding_lookup(self.I_mf, self.j) 131 | self.i_biases = tf.nn.embedding_lookup(self.Bi_mf, self.i) 132 | self.j_biases = tf.nn.embedding_lookup(self.Bi_mf, self.j) 133 | 134 | # Likelihoods 135 | self.p_ui = tf.reduce_sum( 136 | tf.multiply(self.u_embeddings, self.i_embeddings), 1, keepdims=True 137 | ) + self.i_biases 138 | self.p_uj = tf.reduce_sum( 139 | tf.multiply(self.u_embeddings, self.j_embeddings), 1, keepdims=True 140 | ) + self.j_biases 141 | self.p_uij = self.p_ui - self.p_uj 142 | # Add epsilon for validity at 0 143 | self.log_likelihood = tf.log(tf.sigmoid(self.p_uij) + 1e-8) 144 | 145 | # Regularization - Factorization terms 146 | self.l2_emb = self.lambda_emb * tf.add_n([ 147 | tf.nn.l2_loss(self.u_embeddings), 148 | tf.nn.l2_loss(self.i_embeddings), 149 | tf.nn.l2_loss(self.j_embeddings) 150 | ]) 151 | 152 | # Regularization - Bias terms 153 | self.l2_bias = self.lambda_bias * tf.add_n([ 154 | tf.nn.l2_loss(self.i_biases), 155 | tf.nn.l2_loss(self.j_biases) 156 | ]) 157 | 158 | # Loss 159 | self.loss = self.l2_emb + self.l2_bias - tf.reduce_mean(self.log_likelihood) 160 | 161 | # Training optimizer 162 | self.training_optimizer = self.optimizer.minimize(self.loss) 163 | 164 | ###################################### 165 | # Model Saving/Serialization 166 | ###################################### 167 | 168 | @property 169 | def params(self): 170 | return [ 171 | self.n_users, # Number of users 172 | self.n_items, # Number of items 173 | self.k, # Latent dimensionality 174 | self.lambda_emb, # Regularization rate (embeddings) 175 | self.lambda_bias, # Regularization rate (bias) 176 | self.opt_type, # Optimizer class 177 | self.opt_args, # Optimizer arguments 178 | ] 179 | 180 | def get_weights(self, session): 181 | ''' 182 | Returns trained weights for all parameters in the model. 183 | 184 | Arguments: 185 | session {tf.Session} -- TensorFlow session object 186 | 187 | Returns: 188 | np.ndarray -- Trained user embedding matrix weights 189 | np.ndarray -- Trained item embedding matrix weights 190 | np.ndarray -- Trained item bias weights 191 | ''' 192 | # User embedding matrix 193 | U_mf_weights = session.run(self.U_mf) 194 | 195 | # Item embedding matrix 196 | I_mf_weights = session.run(self.I_mf) 197 | 198 | # Item bias embedding vector 199 | Bi_mf_weights = session.run(self.Bi_mf) 200 | 201 | return U_mf_weights, I_mf_weights, Bi_mf_weights 202 | 203 | ###################################### 204 | # Training and Evaluation 205 | ###################################### 206 | 207 | def _session_run(self, session, input_batch, *args): 208 | ''' 209 | Computes graph variables based on inputs. 210 | 211 | Arguments: 212 | session {tf.Session} -- TF Session 213 | input_batch {np.ndarray} -- 2d array or matrix with the following column order: 214 | user ID (u) 215 | observed item ID (i) 216 | unobserved item ID (j) 217 | 218 | Arbitrary Arguments: 219 | *args {tf.Variable} -- TF variables to be computed 220 | 221 | Returns: 222 | list -- TF Variable values 223 | ''' 224 | return session.run( 225 | args, 226 | feed_dict={ 227 | self.u: input_batch[:, 0], 228 | self.i: input_batch[:, 1], 229 | self.j: input_batch[:, 2] 230 | } 231 | ) 232 | 233 | def debug(self, session, input_batch): 234 | ''' 235 | Debugger - indicates where variables are NaN / 236 | 237 | Arguments: 238 | session {tf.Session} -- TF Session 239 | input_batch {np.ndarray} -- 2d array or matrix with the following column order: 240 | user ID (u) 241 | observed item ID (i) 242 | unobserved item ID (j) 243 | 244 | Raises: 245 | Exception 246 | ''' 247 | # Common intermediaries 248 | p_ui, p_uj, p_uij, log_likelihood, l2_emb, l2_bias = \ 249 | self._session_run( 250 | session, input_batch, 251 | self.p_ui, 252 | self.p_uj, 253 | self.p_uij, 254 | self.log_likelihood, 255 | self.l2_emb, 256 | self.l2_bias 257 | ) 258 | 259 | # Identify problematic i-preferences 260 | nan_pui_ix = np.argwhere(np.isnan(p_ui)) 261 | if nan_pui_ix.size > 0: 262 | print('ERROR - NaN p_ui at {} from batch data {}'.format( 263 | nan_pui_ix, input_batch[nan_pui_ix, :] 264 | )) 265 | 266 | # Identify problematic j-preferences 267 | nan_puj_ix = np.argwhere(np.isnan(p_uj)) 268 | if nan_puj_ix.size > 0: 269 | print('ERROR - NaN p_uj at {} from batch data {}'.format( 270 | nan_puj_ix, input_batch[nan_puj_ix, :] 271 | )) 272 | 273 | # Identify problematic p_uij = p_ui - p_uj 274 | nan_puij_ix = np.argwhere(np.isnan(p_uij)) 275 | if nan_puij_ix.size > 0: 276 | print('ERROR - NaN p_uij at {} from batch data {}'.format( 277 | nan_puij_ix, input_batch[nan_puij_ix, :] 278 | )) 279 | 280 | # Identify problematic Log Likelihood log(sig(p_uij)) 281 | nan_LL_ix = np.argwhere(np.isnan(log_likelihood)) 282 | if nan_LL_ix.size > 0: 283 | print('ERROR - NaN Log Likelihood at {} from batch data {}'.format( 284 | nan_LL_ix, input_batch[nan_LL_ix, :] 285 | )) 286 | 287 | # Identify problematic L2 regularization term (embeddings) 288 | nan_l2e_ix = np.argwhere(np.isnan(l2_emb)) 289 | if nan_l2e_ix.size > 0: 290 | print('ERROR - NaN Embedding Reg term at {} from batch data {}'.format( 291 | nan_l2e_ix, input_batch[nan_l2e_ix, :] 292 | )) 293 | 294 | # Identify problematic L2 regularization term (bias) 295 | nan_l2b_ix = np.argwhere(np.isnan(l2_bias)) 296 | if nan_l2b_ix.size > 0: 297 | print('ERROR - NaN Bias Reg term at {} from batch data {}'.format( 298 | nan_l2b_ix, input_batch[nan_l2b_ix, :] 299 | )) 300 | 301 | raise Exception('ERROR IN BATCH') 302 | -------------------------------------------------------------------------------- /recsys_models/models/transrec.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | TensorFlow implementation of Translational Recommendations for implicit feedback 5 | 6 | Source paper(s): 7 | Translation-based recommendation 8 | He, et al. 2017 9 | 10 | Author: Shuyang Li 11 | License: GNU GPLv3 12 | ''' 13 | 14 | import tensorflow as tf 15 | from recsys_models.models import RecSysModel 16 | import pandas as pd 17 | import numpy as np 18 | 19 | class TransRec(RecSysModel): 20 | ''' 21 | Translation-Based Recommender System 22 | 23 | Input format: 24 | (u, p, i, j) - user u, prior item p, observed item i, unobserved item j 25 | 26 | Intuition for pairwise ranking for implicit feedback: 27 | Users will rank observed items above unobserved items 28 | We want to maximize the log likelihood of the batch rankings 29 | 30 | Intuition for translation embedding: 31 | Items are embedded as points in the shared embedding space 32 | Users embedded as translational vectors in the shared embedding space 33 | Prior Item (point) + User (vector) -> Next Item (point location), and thus: 34 | P(i|u, p) ~ L2 loss of gamma_i - (gamma_p + gamma_u) 35 | 36 | Variables 37 | gamma_u -> user translation vector embedding 38 | gamma_p -> prior item point embedding 39 | gamma_i -> observed item point embedding 40 | gamma_j -> unobserved item point embedding 41 | beta_i -> observed item bias 42 | beta_j -> unobserved item bias 43 | global_u -> global user translation bias 44 | 45 | Weights to be optimized are on the order of: 46 | k * (n_items + n_users + 1) + n_items 47 | 48 | Weights: 49 | U_emb (n_users x k) -- User translation embeddings 50 | I_emb (n_items x k) -- Item point embeddings 51 | Bi_emb (n_items x 1) -- Item biases 52 | Global_u (k x 1) -- Global user translation vector basis (alpha-analogue) 53 | 54 | Optimization Criterion: 55 | Maximize sum of log-likelihoods of rankings: 56 | ln(sigmoid(p_ui - p_uj)) 57 | Where 58 | p_ui -> P(i|u), prop. to ||gamma_u + gamma_p - gamma_i|| 59 | p_uj -> P(j|u), prop. to ||gamma_u + gamma_p - gamma_j|| 60 | 61 | Regularization: 62 | embeddings (gamma_u, gamma_p, gamma_i, gamma_j) 63 | biases (beta_i, beta_j, global_u) 64 | ''' 65 | def __init__( 66 | self, n_users, n_items, k=2, lambda_emb=1e-6, lambda_bias=1e-6, 67 | opt_type=tf.contrib.opt.LazyAdamOptimizer, 68 | opt_args=dict(), 69 | U_emb_weights=None, 70 | I_emb_weights=None, 71 | Bi_emb_weights=None, 72 | Global_u_weights=None): 73 | ''' 74 | Arguments: 75 | n_users {int} -- Number of users 76 | n_items {int} -- Number of items 77 | 78 | Keyword Arguments: 79 | k {int} -- Latent dimensionality (default: {2}) 80 | lambda_emb {float} -- Embedding regularization rate (default: {1e-6}) 81 | lambda_bias {float} -- Bias term regularization rate (default: {1e-6}) 82 | opt_type {tf.train.Optimizer} -- TF optimizer class (default: {tf.contrib.opt.LazyAdamOptimizer}) 83 | opt_args {dict} -- Dictionary of arguments for the TF optimizer class 84 | U_emb_weights {np.ndarray} -- Initial weights for latent user factors (default: {None}) 85 | I_emb_weights {np.ndarray} -- Initial weights for latent item factors (default: {None}) 86 | Bi_emb_weights {np.ndarray} -- Initial weights for item biases (default: {None}) 87 | Global_u_weights {np.ndarray} -- Initial values for global translation vector prior (default: {None}) 88 | ''' 89 | # Parameters 90 | self.n_users = n_users # Number of users 91 | self.n_items = n_items # Number of items 92 | self.k = k # Latent dimensionality 93 | self.lambda_emb = lambda_emb # Regularization rate (embeddings) 94 | self.lambda_bias = lambda_bias # Regularization rate (bias) 95 | self.optimizer = opt_type(**opt_args) # Optimizer 96 | self.opt_type = opt_type # Optimizer class 97 | self.opt_args = opt_args # Optimizer arguments 98 | 99 | # Model ID 100 | self.model_id = 'transrec_{}k_{}l2_{}l2bias'.format( 101 | self.k, self.lambda_emb, self.lambda_bias 102 | ) 103 | 104 | # Initialized variable weights (None-type checks because of ambiguous truth values for arr) 105 | U_emb_init = U_emb_weights if U_emb_weights is not None else \ 106 | tf.zeros([n_users, k], dtype=tf.float32) 107 | I_emb_init = I_emb_weights if I_emb_weights is not None else \ 108 | tf.nn.l2_normalize( 109 | tf.random_normal([n_items, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 110 | axis=1 111 | ) 112 | Bi_emb_init = Bi_emb_weights if Bi_emb_weights is not None else \ 113 | tf.zeros([n_items, 1], dtype=tf.float32) 114 | Global_u_init = Global_u_weights if Global_u_weights is not None else \ 115 | tf.nn.l2_normalize( 116 | tf.random_normal([1, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 117 | axis=1 118 | ) 119 | 120 | # (Batch) Placeholders 121 | self.u = tf.placeholder(tf.int32, [None]) # User ID 122 | self.p = tf.placeholder(tf.int32, [None]) # Prior item ID 123 | self.i = tf.placeholder(tf.int32, [None]) # Observed item ID 124 | self.j = tf.placeholder(tf.int32, [None]) # Unobserved item ID 125 | 126 | # Variables - normalize to unit ball to mitigate curse of dimensionality (Lin et al. 2015) 127 | self.U_emb = tf.Variable( # User vector embedding matrix 128 | initial_value=U_emb_init, 129 | trainable=True 130 | ) 131 | self.I_emb = tf.Variable( # Item point embedding matrix 132 | initial_value=I_emb_init, 133 | trainable=True 134 | ) 135 | self.Bi_emb = tf.Variable( # Item bias embedding vector 136 | initial_value=Bi_emb_init, 137 | trainable=True 138 | ) 139 | self.Global_u = tf.Variable( # Global user translation prior 140 | initial_value=Global_u_init, 141 | trainable=True 142 | ) 143 | 144 | # Batch Embeddings 145 | self.u_trans_vec = self.Global_u + tf.nn.embedding_lookup(self.U_emb, self.u) 146 | self.p_points = tf.nn.embedding_lookup(self.I_emb, self.p) 147 | self.i_points = tf.nn.embedding_lookup(self.I_emb, self.i) 148 | self.j_points = tf.nn.embedding_lookup(self.I_emb, self.j) 149 | self.i_biases = tf.nn.embedding_lookup(self.Bi_emb, self.i) 150 | self.j_biases = tf.nn.embedding_lookup(self.Bi_emb, self.j) 151 | 152 | # Likelihoods 153 | self.p_ui = self.i_biases - tf.sqrt( 154 | tf.reduce_mean( 155 | tf.square(self.p_points + self.u_trans_vec - self.i_points), 156 | 1, keepdims=True 157 | ) + 1e-8 158 | ) 159 | self.p_uj = self.j_biases - tf.sqrt( 160 | tf.reduce_mean( 161 | tf.square(self.p_points + self.u_trans_vec - self.j_points), 162 | 1, keepdims=True 163 | ) + 1e-8 164 | ) 165 | self.p_uij = self.p_ui - self.p_uj 166 | # Add epsilon for validity at 0 167 | self.log_likelihood = tf.log(tf.sigmoid(self.p_uij) + 1e-8) 168 | 169 | # Regularization - Factorization terms 170 | self.l2_emb = self.lambda_emb * tf.add_n([ 171 | tf.nn.l2_loss(self.u_trans_vec), 172 | tf.nn.l2_loss(self.p_points), 173 | tf.nn.l2_loss(self.i_points), 174 | tf.nn.l2_loss(self.j_points), 175 | tf.nn.l2_loss(self.Global_u) 176 | ]) 177 | 178 | # Regularization - Bias terms 179 | self.l2_bias = self.lambda_bias * tf.add_n([ 180 | tf.nn.l2_loss(self.i_biases), 181 | tf.nn.l2_loss(self.j_biases) 182 | ]) 183 | 184 | # Loss 185 | self.loss = self.l2_emb + self.l2_bias - tf.reduce_sum(self.log_likelihood) 186 | 187 | # Training optimizer 188 | self.training_optimizer = self.optimizer.minimize(self.loss) 189 | 190 | ###################################### 191 | # Model Saving/Serialization 192 | ###################################### 193 | 194 | @property 195 | def params(self): 196 | return [ 197 | self.n_users, # Number of users 198 | self.n_items, # Number of items 199 | self.k, # Latent dimensionality 200 | self.lambda_emb, # Regularization rate (embeddings) 201 | self.lambda_bias, # Regularization rate (bias) 202 | self.opt_type, # Optimizer class 203 | self.opt_args, # Optimizer arguments 204 | ] 205 | 206 | def get_weights(self, session): 207 | ''' 208 | Returns trained weights for all parameters in the model. 209 | 210 | Arguments: 211 | session {tf.Session} -- TensorFlow session object 212 | 213 | Returns: 214 | np.ndarray -- Trained user embedding matrix weights 215 | np.ndarray -- Trained item embedding matrix weights 216 | np.ndarray -- Trained item bias weights 217 | np.ndarray -- Trained global translation vector prior weights 218 | ''' 219 | # User embedding matrix 220 | U_emb_weights = session.run(self.U_emb) 221 | 222 | # Item embedding matrix 223 | I_emb_weights = session.run(self.I_emb) 224 | 225 | # Item bias embedding vector 226 | Bi_emb_weights = session.run(self.Bi_emb) 227 | 228 | # Global translation vector prior 229 | Global_u_weights = session.run(self.Global_u) 230 | 231 | return U_emb_weights, I_emb_weights, Bi_emb_weights, Global_u_weights 232 | 233 | ###################################### 234 | # Training and Evaluation 235 | ###################################### 236 | 237 | def _session_run(self, session, input_batch, *args): 238 | ''' 239 | Computes graph variables based on inputs. 240 | 241 | Arguments: 242 | session {tf.Session} -- TF Session 243 | input_batch {np.ndarray} -- 2d array or matrix with the following column order: 244 | user ID (u) 245 | observed item ID (i) 246 | unobserved item ID (j) 247 | 248 | Arbitrary Arguments: 249 | *args {tf.Variable} -- TF variables to be computed 250 | 251 | Returns: 252 | list -- TF Variable values 253 | ''' 254 | return session.run( 255 | args, 256 | feed_dict={ 257 | self.u: input_batch[:, 0], 258 | self.p: input_batch[:, 1], 259 | self.i: input_batch[:, 2], 260 | self.j: input_batch[:, 3] 261 | } 262 | ) 263 | 264 | def debug(self, session, input_batch): 265 | ''' 266 | Debugger - indicates where variables are NaN / 267 | 268 | Arguments: 269 | session {tf.Session} -- TF Session 270 | input_batch {np.ndarray} -- 2d array or matrix with the following column order: 271 | user ID (u) 272 | prior item ID (p) 273 | observed item ID (i) 274 | unobserved item ID (j) 275 | 276 | Raises: 277 | Exception 278 | ''' 279 | # Common intermediaries 280 | p_ui, p_uj, p_uij, log_likelihood, l2_emb, l2_bias = \ 281 | self._session_run( 282 | session, input_batch, 283 | self.p_ui, 284 | self.p_uj, 285 | self.p_uij, 286 | self.log_likelihood, 287 | self.l2_emb, 288 | self.l2_bias 289 | ) 290 | 291 | # Identify problematic i-preferences 292 | nan_pui_ix = np.argwhere(np.isnan(p_ui)) 293 | if nan_pui_ix.size > 0: 294 | print('ERROR - NaN p_ui at {} from batch data {}'.format( 295 | nan_pui_ix, input_batch[nan_pui_ix, :] 296 | )) 297 | 298 | # Identify problematic j-preferences 299 | nan_puj_ix = np.argwhere(np.isnan(p_uj)) 300 | if nan_puj_ix.size > 0: 301 | print('ERROR - NaN p_uj at {} from batch data {}'.format( 302 | nan_puj_ix, input_batch[nan_puj_ix, :] 303 | )) 304 | 305 | # Identify problematic p_uij = p_ui - p_uj 306 | nan_puij_ix = np.argwhere(np.isnan(p_uij)) 307 | if nan_puij_ix.size > 0: 308 | print('ERROR - NaN p_uij at {} from batch data {}'.format( 309 | nan_puij_ix, input_batch[nan_puij_ix, :] 310 | )) 311 | 312 | # Identify problematic Log Likelihood log(sig(p_uij)) 313 | nan_LL_ix = np.argwhere(np.isnan(log_likelihood)) 314 | if nan_LL_ix.size > 0: 315 | print('ERROR - NaN Log Likelihood at {} from batch data {}'.format( 316 | nan_LL_ix, input_batch[nan_LL_ix, :] 317 | )) 318 | 319 | # Identify problematic L2 regularization term (embeddings) 320 | nan_l2e_ix = np.argwhere(np.isnan(l2_emb)) 321 | if nan_l2e_ix.size > 0: 322 | print('ERROR - NaN Embedding Reg term at {} from batch data {}'.format( 323 | nan_l2e_ix, input_batch[nan_l2e_ix, :] 324 | )) 325 | 326 | # Identify problematic L2 regularization term (bias) 327 | nan_l2b_ix = np.argwhere(np.isnan(l2_bias)) 328 | if nan_l2b_ix.size > 0: 329 | print('ERROR - NaN Bias Reg term at {} from batch data {}'.format( 330 | nan_l2b_ix, input_batch[nan_l2b_ix, :] 331 | )) 332 | 333 | raise Exception('ERROR IN BATCH') 334 | -------------------------------------------------------------------------------- /recsys_models/models/fpmc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | -*- coding: utf-8 -*- 3 | 4 | TensorFlow implementation of Factorized Personalized Markov Chains for sequential 5 | recommendations with implicit feedback. 6 | 7 | Source paper(s): 8 | Factorizing personalized markov chains for next-basket recommendation 9 | Rendle, et al. 2010 10 | 11 | Author: Shuyang Li 12 | License: GNU GPLv3 13 | ''' 14 | 15 | import tensorflow as tf 16 | from datetime import datetime 17 | import pandas as pd 18 | import numpy as np 19 | from recsys_models.models import RecSysModel 20 | 21 | class FPMC(RecSysModel): 22 | ''' 23 | Factorized Personalized Markov Chains 24 | 25 | Input format: 26 | (u, p, i, j) - user u, prior item p, observed item i, unobserved item j 27 | 28 | Intuition for pairwise ranking for implicit feedback: 29 | Users will rank observed items above unobserved items 30 | We want to maximize the log likelihood of the batch rankings 31 | 32 | Intuition for FPMC structure: 33 | Sequential actions (p -> i) are modeled via transition "cube": p -> i item transition 34 | matrix for each user u. The sequential interaction can be decomposed into: 35 | -- Matrix Factorization of user preference for the prior item 36 | + -- Matrix factorization of user preference for the next item 37 | + -- Markov chain to model p -> i 38 | Ultimately for the ranking case, is preserved. 39 | 40 | Variables 41 | mf_u -> user preference embedding 42 | mf_i -> observed item preference embedding 43 | mf_j -> unobserved item preference embedding 44 | mc_p -> transition embedding for prior item 45 | mc_i -> transition embedding for observed item 46 | mc_j -> transition embedding for unobserved item 47 | beta_i -> observed item bias 48 | beta_j -> unobserved item bias 49 | 50 | Weights to be optimized are on the order of: 51 | k * (3 * n_items + n_users) + n_items 52 | 53 | Weights: 54 | U_mf (n_users x k) -- User latent factor matrix 55 | I_mf (n_items x k) -- Item latent factor matrix 56 | P_mc (n_items x k) -- Prior item markov matrix 57 | I_mc (n_items x k) -- Next item markov matrix 58 | Bi_emb (n_items x 1) -- Item biases 59 | 60 | Optimization Criterion: 61 | Maximize sum of log-likelihoods of rankings: 62 | ln(sigmoid(p_ui - p_uj)) 63 | Where 64 | p_ui -> P(i|u), prop. to + 65 | p_uj -> P(j|u), prop. to + 66 | 67 | Regularization: 68 | embeddings (mf_u, mf_i, mf_j, mc_p, mc_i, mc_j) 69 | biases (beta_i, beta_j) 70 | ''' 71 | def __init__( 72 | self, n_users, n_items, k=2, lambda_emb=1e-6, lambda_bias=1e-6, 73 | opt_type=tf.contrib.opt.LazyAdamOptimizer, 74 | opt_args=dict(), 75 | U_mf_weights=None, 76 | I_mf_weights=None, 77 | P_mc_weights=None, 78 | I_mc_weights=None, 79 | Bi_mf_weights=None): 80 | ''' 81 | Arguments: 82 | n_users {int} -- Number of users 83 | n_items {int} -- Number of items 84 | 85 | Keyword Arguments: 86 | k {int} -- Latent dimensionality (default: {2}) 87 | lambda_emb {float} -- Embedding regularization rate (default: {1e-6}) 88 | lambda_bias {float} -- Bias term regularization rate (default: {1e-6}) 89 | opt_type {tf.train.Optimizer} -- TF optimizer class (default: {tf.contrib.opt.LazyAdamOptimizer}) 90 | opt_args {dict} -- Dictionary of arguments for the TF optimizer class 91 | U_mf_weights {np.ndarray} -- Initial weights for latent user factors (default: {None}) 92 | I_mf_weights {np.ndarray} -- Initial weights for latent item factors (default: {None}) 93 | P_mc_weights {np.ndarray} -- Initial weights for prior transition factors (default: {None}) 94 | I_mc_weights {np.ndarray} -- Initial weights for item transition factors (default: {None}) 95 | Bi_mf_weights {np.ndarray} -- Initial weights for item biases (default: {None}) 96 | ''' 97 | # Parameters 98 | self.n_users = n_users # Number of users 99 | self.n_items = n_items # Number of items 100 | self.k = k # Latent dimensionality 101 | self.lambda_emb = lambda_emb # Regularization rate (embeddings) 102 | self.lambda_bias = lambda_bias # Regularization rate (bias) 103 | self.optimizer = opt_type(**opt_args) # Optimizer 104 | self.opt_type = opt_type # Optimizer class 105 | self.opt_args = opt_args # Optimizer arguments 106 | 107 | # Model ID 108 | self.model_id = 'fpmc_{}k_{}l2_{}l2bias'.format( 109 | self.k, self.lambda_emb, self.lambda_bias 110 | ) 111 | 112 | # Initialized variable weights (None-type checks because of ambiguous truth values for arr) 113 | U_mf_init = U_mf_weights if U_mf_weights is not None else \ 114 | tf.nn.l2_normalize( 115 | tf.random_normal([n_users, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 116 | axis=1 117 | ) 118 | I_mf_init = I_mf_weights if I_mf_weights is not None else \ 119 | tf.nn.l2_normalize( 120 | tf.random_normal([n_items, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 121 | axis=1 122 | ) 123 | P_mc_init = P_mc_weights if P_mc_weights is not None else \ 124 | tf.nn.l2_normalize( 125 | tf.random_normal([n_items, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 126 | axis=1 127 | ) 128 | I_mc_init = I_mc_weights if I_mc_weights is not None else \ 129 | tf.nn.l2_normalize( 130 | tf.random_normal([n_items, k], stddev=(1.0/(k**0.5)), dtype=tf.float32), 131 | axis=1 132 | ) 133 | Bi_mf_init = Bi_mf_weights if Bi_mf_weights is not None else \ 134 | tf.zeros([n_items, 1], dtype=tf.float32) 135 | 136 | # (Batch) Placeholders 137 | self.u = tf.placeholder(tf.int32, [None]) # User ID 138 | self.p = tf.placeholder(tf.int32, [None]) # Prior item ID 139 | self.i = tf.placeholder(tf.int32, [None]) # Observed item ID 140 | self.j = tf.placeholder(tf.int32, [None]) # Unobserved item ID 141 | 142 | # Variables - normalize to unit ball to mitigate curse of dimensionality (Lin et al. 2015) 143 | self.U_mf = tf.Variable( # User embedding matrix 144 | initial_value=U_mf_init, 145 | trainable=True 146 | ) 147 | self.I_mf = tf.Variable( # Item embedding matrix 148 | initial_value=I_mf_init, 149 | trainable=True 150 | ) 151 | self.P_mc = tf.Variable( # Prior item embedding matrix for transition 152 | initial_value=P_mc_init, 153 | trainable=True 154 | ) 155 | self.I_mc = tf.Variable( # Following item embedding matrix for transition 156 | initial_value=I_mc_init, 157 | trainable=True 158 | ) 159 | self.Bi_mf = tf.Variable( # Item bias embedding vector 160 | initial_value=Bi_mf_init, 161 | trainable=True 162 | ) 163 | 164 | # Batch Embeddings 165 | self.u_mf = tf.nn.embedding_lookup(self.U_mf, self.u) 166 | self.i_mf = tf.nn.embedding_lookup(self.I_mf, self.i) 167 | self.j_mf = tf.nn.embedding_lookup(self.I_mf, self.j) 168 | self.p_mc = tf.nn.embedding_lookup(self.P_mc, self.p) 169 | self.i_mc = tf.nn.embedding_lookup(self.I_mc, self.i) 170 | self.j_mc = tf.nn.embedding_lookup(self.I_mc, self.j) 171 | # Bias terms 172 | self.i_biases = tf.nn.embedding_lookup(self.Bi_mf, self.i) 173 | self.j_biases = tf.nn.embedding_lookup(self.Bi_mf, self.j) 174 | 175 | # Likelihoods 176 | self.p_ui = tf.reduce_sum(tf.multiply(self.u_mf, self.i_mf), 1, keepdims=True) + \ 177 | tf.reduce_sum(tf.multiply(self.p_mc, self.i_mc), 1, keepdims=True) + \ 178 | self.i_biases 179 | self.p_uj = tf.reduce_sum(tf.multiply(self.u_mf, self.j_mf), 1, keepdims=True) + \ 180 | tf.reduce_sum(tf.multiply(self.p_mc, self.j_mc), 1, keepdims=True) + \ 181 | self.j_biases 182 | self.p_uij = self.p_ui - self.p_uj 183 | # Add epsilon for validity at 0 184 | self.log_likelihood = tf.log(tf.sigmoid(self.p_uij) + 1e-8) 185 | 186 | # Regularization - Factorization terms 187 | self.l2_emb = self.lambda_emb * tf.add_n([ 188 | tf.nn.l2_loss(self.u_mf), 189 | tf.nn.l2_loss(self.i_mf), 190 | tf.nn.l2_loss(self.j_mf), 191 | tf.nn.l2_loss(self.p_mc), 192 | tf.nn.l2_loss(self.i_mc), 193 | tf.nn.l2_loss(self.j_mc), 194 | ]) 195 | 196 | # Regularization - Bias terms 197 | self.l2_bias = self.lambda_bias * tf.add_n([ 198 | tf.nn.l2_loss(self.i_biases), 199 | tf.nn.l2_loss(self.j_biases) 200 | ]) 201 | 202 | # Loss 203 | self.loss = self.l2_emb + self.l2_bias - tf.reduce_mean(self.log_likelihood) 204 | 205 | # Training optimizer 206 | self.training_optimizer = self.optimizer.minimize(self.loss) 207 | 208 | ###################################### 209 | # Model Saving/Serialization 210 | ###################################### 211 | 212 | @property 213 | def params(self): 214 | return [ 215 | self.n_users, # Number of users 216 | self.n_items, # Number of items 217 | self.k, # Latent dimensionality 218 | self.lambda_emb, # Regularization rate (embeddings) 219 | self.lambda_bias, # Regularization rate (bias) 220 | self.opt_type, # Optimizer class 221 | self.opt_args, # Optimizer arguments 222 | ] 223 | 224 | def get_weights(self, session): 225 | ''' 226 | Returns trained weights for all parameters in the model. 227 | 228 | Arguments: 229 | session {tf.Session} -- TensorFlow session object 230 | 231 | Returns: 232 | np.ndarray -- Trained user embedding matrix weights 233 | np.ndarray -- Trained item embedding matrix weights 234 | np.ndarray -- Trained prior item transition embedding weights 235 | np.ndarray -- Trained next item transition embedding weights 236 | np.ndarray -- Trained item bias weights 237 | ''' 238 | # User embedding matrix 239 | U_mf_weights = session.run(self.U_mf) 240 | 241 | # Item embedding matrix 242 | I_mf_weights = session.run(self.I_mf) 243 | 244 | # Prior item transition embedding matrix 245 | P_mc_weights = session.run(self.P_mc) 246 | 247 | # Next item transition embedding matrix 248 | I_mc_weights = session.run(self.I_mc) 249 | 250 | # Item bias embedding vector 251 | Bi_mf_weights = session.run(self.Bi_mf) 252 | 253 | return U_mf_weights, I_mf_weights, P_mc_weights, I_mc_weights, Bi_mf_weights 254 | 255 | ###################################### 256 | # Training and Evaluation 257 | ###################################### 258 | 259 | def _session_run(self, session, input_batch, *args): 260 | ''' 261 | Computes graph variables based on inputs. 262 | 263 | Arguments: 264 | session {tf.Session} -- TF Session 265 | input_batch {np.ndarray} -- 2d array or matrix with the following column order: 266 | user ID (u) 267 | prior observed item (p) 268 | observed item ID (i) 269 | unobserved item ID (j) 270 | 271 | Arbitrary Arguments: 272 | *args {tf.Variable} -- TF variables to be computed 273 | 274 | Returns: 275 | list -- TF Variable values 276 | ''' 277 | return session.run( 278 | args, 279 | feed_dict={ 280 | self.u: input_batch[:, 0], 281 | self.p: input_batch[:, 1], 282 | self.i: input_batch[:, 2], 283 | self.j: input_batch[:, 3] 284 | } 285 | ) 286 | 287 | def debug(self, session, input_batch): 288 | ''' 289 | Debugger - indicates where variables are NaN / 290 | 291 | Arguments: 292 | session {tf.Session} -- TF Session 293 | input_batch {np.ndarray} -- 2d array or matrix with the following column order: 294 | user ID (u) 295 | prior item ID (p) 296 | observed item ID (i) 297 | unobserved item ID (j) 298 | 299 | Raises: 300 | Exception 301 | ''' 302 | # Common intermediaries 303 | p_ui, p_uj, p_uij, log_likelihood, l2_emb, l2_bias = \ 304 | self._session_run( 305 | session, input_batch, 306 | self.p_ui, 307 | self.p_uj, 308 | self.p_uij, 309 | self.log_likelihood, 310 | self.l2_emb, 311 | self.l2_bias 312 | ) 313 | 314 | # Identify problematic i-preferences 315 | nan_pui_ix = np.argwhere(np.isnan(p_ui)) 316 | if nan_pui_ix.size > 0: 317 | print('ERROR - NaN p_ui at {} from batch data {}'.format( 318 | nan_pui_ix, input_batch[nan_pui_ix, :] 319 | )) 320 | 321 | # Identify problematic j-preferences 322 | nan_puj_ix = np.argwhere(np.isnan(p_uj)) 323 | if nan_puj_ix.size > 0: 324 | print('ERROR - NaN p_uj at {} from batch data {}'.format( 325 | nan_puj_ix, input_batch[nan_puj_ix, :] 326 | )) 327 | 328 | # Identify problematic p_uij = p_ui - p_uj 329 | nan_puij_ix = np.argwhere(np.isnan(p_uij)) 330 | if nan_puij_ix.size > 0: 331 | print('ERROR - NaN p_uij at {} from batch data {}'.format( 332 | nan_puij_ix, input_batch[nan_puij_ix, :] 333 | )) 334 | 335 | # Identify problematic Log Likelihood log(sig(p_uij)) 336 | nan_LL_ix = np.argwhere(np.isnan(log_likelihood)) 337 | if nan_LL_ix.size > 0: 338 | print('ERROR - NaN Log Likelihood at {} from batch data {}'.format( 339 | nan_LL_ix, input_batch[nan_LL_ix, :] 340 | )) 341 | 342 | # Identify problematic L2 regularization term (embeddings) 343 | nan_l2e_ix = np.argwhere(np.isnan(l2_emb)) 344 | if nan_l2e_ix.size > 0: 345 | print('ERROR - NaN Embedding Reg term at {} from batch data {}'.format( 346 | nan_l2e_ix, input_batch[nan_l2e_ix, :] 347 | )) 348 | 349 | # Identify problematic L2 regularization term (bias) 350 | nan_l2b_ix = np.argwhere(np.isnan(l2_bias)) 351 | if nan_l2b_ix.size > 0: 352 | print('ERROR - NaN Bias Reg term at {} from batch data {}'.format( 353 | nan_l2b_ix, input_batch[nan_l2b_ix, :] 354 | )) 355 | 356 | raise Exception('ERROR IN BATCH') -------------------------------------------------------------------------------- /sample_pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "0:00:00 - Initialized environment\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "# IMPORTS\n", 18 | "from itertools import chain, zip_longest\n", 19 | "from datetime import datetime\n", 20 | "import pandas as pd\n", 21 | "import numpy as np\n", 22 | "import gc\n", 23 | "import random\n", 24 | "import os\n", 25 | "import json\n", 26 | "\n", 27 | "gc.collect()\n", 28 | "start = datetime.now()\n", 29 | "print('{} - Initialized environment'.format(\n", 30 | " datetime.now() - start\n", 31 | "))" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Quick Guide\n", 39 | "Here we will train a Bayesian Personalized Ranking (BPR-MF) model on the MovieLens 1M dataset.\n", 40 | "\n", 41 | "To start, we assume that the data (ratings.dat, etc.) files have been extracted to the `datasets/ml_1m` folder.\n", 42 | "\n", 43 | "The following block will process the raw data file to make it into a pandas DataFrame." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "0:00:00.092786 - Retrieved interactions df.\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "def get_interactions(filename):\n", 61 | " # Separators are ::, but the c engine doesn't handle multiple-char separators. Thus we're just going to have to manually interleave and break.\n", 62 | " columns = ['user_id', 'item_id', 'rating', 'timestamp']\n", 63 | " interleave = list(map(str, np.arange(len(columns)-1)))\n", 64 | " read_names = [x for x in chain(*zip_longest(columns, interleave)) if x is not None]\n", 65 | " # Read the actual file in\n", 66 | " interactions_df = pd.read_csv(filename, sep=':', header=None, names=read_names).drop(columns=interleave)\n", 67 | " return interactions_df\n", 68 | "\n", 69 | "movie_dir = os.path.join('datasets', 'ml-1m')\n", 70 | "ratings_file = os.path.join(movie_dir, 'ratings.dat')\n", 71 | "interactions_loc = os.path.join(movie_dir, 'interactions.msgpack')\n", 72 | "\n", 73 | "start = datetime.now()\n", 74 | "try:\n", 75 | " df = pd.read_msgpack(interactions_loc)\n", 76 | " print('{} - Retrieved interactions df.'.format(datetime.now() - start))\n", 77 | "except Exception as e:\n", 78 | " print('Error unpickling {}, reconstructing from ratings.dat: {}'.format(interactions_loc, e))\n", 79 | " df = get_interactions(ratings_file)\n", 80 | " print('{} - Processed interactions from ratings.dat'.format(datetime.now() - start))\n", 81 | " df.to_msgpack(interactions_loc)\n", 82 | " print('{} - Serialized interactions to {}'.format(datetime.now() - start, interactions_loc))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/html": [ 93 | "
\n", 94 | "\n", 107 | "\n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | "
user_iditem_idratingtimestamp
0111935978300760
116613978302109
219143978301968
3134084978300275
4123555978824291
\n", 155 | "
" 156 | ], 157 | "text/plain": [ 158 | " user_id item_id rating timestamp\n", 159 | "0 1 1193 5 978300760\n", 160 | "1 1 661 3 978302109\n", 161 | "2 1 914 3 978301968\n", 162 | "3 1 3408 4 978300275\n", 163 | "4 1 2355 5 978824291" 164 | ] 165 | }, 166 | "execution_count": 3, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "df.head(5)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Train/Test/Validation Splits\n", 180 | "Perform the following operations to prepare our MovieLens interactions data for the recommender system model:\n", 181 | "- K-core (preserve only users and items with more than 5 interactions)\n", 182 | "- Create temporal columns\n", 183 | "- Map user/item IDs to contiguous integer series" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 4, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "0:00:00.434865 - Added proper temporal columns to df\n", 196 | "Removing 0/6040 users (0.00 %) and 290/3706 items (7.83 %) from 1000209 total interactions (95.53164% Sparsity)\n", 197 | "Removing 0/6040 users (0.00 %) and 0/3416 items (0.00 %) from 999611 total interactions (95.15520% Sparsity)\n", 198 | "0:00:00.363996 - Done: 5-core decomposition after 2 iterations\n", 199 | "0:00:00.450793 - Mapped u-i indices\n", 200 | "0:00:00.845737 - Created \"prior\" column\n", 201 | "6040 Users interacted with 3416 items 993571 times (95.1845% sparsity, 164.499 actions/user, 290.858 actions/item)\n", 202 | "\n", 203 | "\n", 204 | "Int64Index: 993571 entries, 289422 to 856074\n", 205 | "Data columns (total 12 columns):\n", 206 | "user_id 993571 non-null int64\n", 207 | "item_id 993571 non-null int64\n", 208 | "rating 993571 non-null int64\n", 209 | "timestamp 993571 non-null int64\n", 210 | "date 993571 non-null datetime64[ns]\n", 211 | "year 993571 non-null int32\n", 212 | "month 993571 non-null int32\n", 213 | "day_of_week 993571 non-null int32\n", 214 | "day_of_year 993571 non-null int32\n", 215 | "u 993571 non-null int32\n", 216 | "i 993571 non-null int32\n", 217 | "prior 993571 non-null int32\n", 218 | "dtypes: datetime64[ns](1), int32(7), int64(4)\n", 219 | "memory usage: 72.0 MB\n", 220 | "None\n", 221 | "\n", 222 | "0:00:00.337128 - Created full user : item, interaction mappings df and user : items dict\n", 223 | "0:00:00.514655 - Created holdout sets of most recent interactions/user\n", 224 | "0:00:04.002325 - Created bootstrap validation DF of size 2000000\n", 225 | "0:00:07.623639 - Created bootstrap testing DF of size 2000000\n", 226 | "0:00:07.888900 - Generated train/validation/test splits and user : items dictionary mappings\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "'''\n", 232 | "INITIALIZE data and train/validation/test splits\n", 233 | "'''\n", 234 | "from recsys_models.data import process_temporal_columns, kcore, map_user_items, print_basic_stats\n", 235 | "from recsys_models.data.sampling import train_test_validation_split, get_user_interactions_df\n", 236 | "\n", 237 | "user_col = 'user_id'\n", 238 | "item_col = 'item_id'\n", 239 | "\n", 240 | "# Get temporal columns\n", 241 | "df['date'] = pd.to_datetime(df['timestamp'].apply(datetime.utcfromtimestamp))\n", 242 | "df = process_temporal_columns(df)\n", 243 | "\n", 244 | "# K-core\n", 245 | "cores = 5\n", 246 | "df = kcore(df, user_col, item_col, cores)\n", 247 | "\n", 248 | "# User/item ix -> id mappings\n", 249 | "df = map_user_items(df, user_col, item_col)\n", 250 | "\n", 251 | "# Get stats\n", 252 | "gc.collect()\n", 253 | "print_basic_stats(df, user_col, item_col)\n", 254 | "n_users = df[user_col].nunique()\n", 255 | "n_items = df[item_col].nunique()\n", 256 | "\n", 257 | "# Create train, validation, and test DFs by holding out the latest interaction per user for test and second-to-last for validation\n", 258 | "start = datetime.now()\n", 259 | "eval_size = 2000000\n", 260 | "train_df, validation_df, test_df, all_int_by_user_df = train_test_validation_split(df, eval_size)\n", 261 | "train_items_by_user = train_df.groupby(['u'])['i'].agg(lambda x: set(x)).to_dict()\n", 262 | "print('{} - Generated train/validation/test splits and user : items dictionary mappings'.format(\n", 263 | " datetime.now() - start\n", 264 | "))" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 5, 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/html": [ 275 | "
\n", 276 | "\n", 289 | "\n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | "
user_iditem_idratingtimestampdateyearmonthday_of_weekday_of_yearuiprior
2894226040238449567039542000-04-25 23:05:542000411156039323669
138498604059359567039542000-04-25 23:05:542000411156039128323
450326040196149567039772000-04-25 23:06:17200041115603941128
\n", 355 | "
" 356 | ], 357 | "text/plain": [ 358 | " user_id item_id rating timestamp date year month \\\n", 359 | "289422 6040 2384 4 956703954 2000-04-25 23:05:54 2000 4 \n", 360 | "138498 6040 593 5 956703954 2000-04-25 23:05:54 2000 4 \n", 361 | "45032 6040 1961 4 956703977 2000-04-25 23:06:17 2000 4 \n", 362 | "\n", 363 | " day_of_week day_of_year u i prior \n", 364 | "289422 1 115 6039 323 669 \n", 365 | "138498 1 115 6039 128 323 \n", 366 | "45032 1 115 6039 41 128 " 367 | ] 368 | }, 369 | "metadata": {}, 370 | "output_type": "display_data" 371 | }, 372 | { 373 | "data": { 374 | "text/html": [ 375 | "
\n", 376 | "\n", 389 | "\n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | "
user_iditem_idratingtimestampdateyearmonthday_of_weekday_of_yearuipriorj
02469370249741602322000-11-14 00:03:52200011131824682112171586
1461449249641436392000-07-21 01:40:39200074202461318013052045
22260192339745858422000-11-18 22:17:22200011532222592907332681
\n", 459 | "
" 460 | ], 461 | "text/plain": [ 462 | " user_id item_id rating timestamp date year month \\\n", 463 | "0 2469 3702 4 974160232 2000-11-14 00:03:52 2000 11 \n", 464 | "1 4614 492 4 964143639 2000-07-21 01:40:39 2000 7 \n", 465 | "2 2260 1923 3 974585842 2000-11-18 22:17:22 2000 11 \n", 466 | "\n", 467 | " day_of_week day_of_year u i prior j \n", 468 | "0 1 318 2468 211 217 1586 \n", 469 | "1 4 202 4613 1801 305 2045 \n", 470 | "2 5 322 2259 290 733 2681 " 471 | ] 472 | }, 473 | "metadata": {}, 474 | "output_type": "display_data" 475 | }, 476 | { 477 | "data": { 478 | "text/html": [ 479 | "
\n", 480 | "\n", 493 | "\n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | "
user_iditem_idratingtimestampdateyearmonthday_of_weekday_of_yearuipriorj
05632233659590151662000-05-22 17:06:0620005014256314354227
15633128759590143842000-05-22 16:53:042000501425632691119
25903161759574657922000-05-04 18:43:1220005312459022469051965
\n", 563 | "
" 564 | ], 565 | "text/plain": [ 566 | " user_id item_id rating timestamp date year month \\\n", 567 | "0 5632 2336 5 959015166 2000-05-22 17:06:06 2000 5 \n", 568 | "1 5633 1287 5 959014384 2000-05-22 16:53:04 2000 5 \n", 569 | "2 5903 1617 5 957465792 2000-05-04 18:43:12 2000 5 \n", 570 | "\n", 571 | " day_of_week day_of_year u i prior j \n", 572 | "0 0 142 5631 435 42 27 \n", 573 | "1 0 142 5632 6 9 1119 \n", 574 | "2 3 124 5902 246 905 1965 " 575 | ] 576 | }, 577 | "metadata": {}, 578 | "output_type": "display_data" 579 | } 580 | ], 581 | "source": [ 582 | "display(train_df.head(3))\n", 583 | "display(validation_df.head(3))\n", 584 | "display(test_df.head(3))" 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": {}, 590 | "source": [ 591 | "## Training Models\n", 592 | "Now we can initialize our model with a few parameters, train a BPR-MF model, and compare it to the PopRec baseline (pick the most popular item as seen in the training set):" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 6, 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "name": "stdout", 602 | "output_type": "stream", 603 | "text": [ 604 | "0:00:06.907555 - PopRec:\n", 605 | "Training AUC:\t\t0.84690\n", 606 | "Validation AUC:\t\t0.80072\n", 607 | "Testing AUC:\t\t0.79488\n" 608 | ] 609 | } 610 | ], 611 | "source": [ 612 | "'''\n", 613 | "PopRec Baseline - Pick the more popular item based on training interactions\n", 614 | "'''\n", 615 | "from recsys_models.models import pop_rec\n", 616 | "from recsys_models.data.sampling import sample_unobserved\n", 617 | "\n", 618 | "start = datetime.now()\n", 619 | "pop_auc_tr = pop_rec(train_df, sample_unobserved(train_df, train_items_by_user, n_items, len(test_df)))\n", 620 | "pop_auc_v = pop_rec(train_df, validation_df)\n", 621 | "pop_auc_t = pop_rec(train_df, test_df)\n", 622 | "print('{} - PopRec:\\nTraining AUC:\\t\\t{:.5f}\\nValidation AUC:\\t\\t{:.5f}\\nTesting AUC:\\t\\t{:.5f}'.format(\n", 623 | " datetime.now() - start,\n", 624 | " pop_auc_tr,\n", 625 | " pop_auc_v,\n", 626 | " pop_auc_t\n", 627 | "))" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 7, 633 | "metadata": {}, 634 | "outputs": [ 635 | { 636 | "name": "stdout", 637 | "output_type": "stream", 638 | "text": [ 639 | "0:00:00.075769 - Generated u-i-j matrices for validation and testing\n", 640 | "\n", 641 | "=== BEGIN Optimization for bpr-mf_5k_0.0001l2_0.0001l2bias ===\n", 642 | " 200 Max epochs, with early stoppage at 1e-05 Validation AUC change\n", 643 | " 1000 Iterations per epoch with 512-sized batches\n", 644 | "0:00:00.267288 - Prior: 0.50051 Validation AUC, 0.50301 Testing AUC\n", 645 | "[0:00:03.625273 - Epoch 1] 0.54022 Loss, 0.85301 Training AUC, 0.79636 Validation AUC (0.29585 Change)\n", 646 | "[0:00:06.588379 - Epoch 2] 0.44859 Loss, 0.85319 Training AUC, 0.79956 Validation AUC (0.00320 Change)\n", 647 | "[0:00:09.493608 - Epoch 3] 0.43666 Loss, 0.86849 Training AUC, 0.81308 Validation AUC (0.01352 Change)\n", 648 | "[0:00:12.403825 - Epoch 4] 0.42238 Loss, 0.88661 Training AUC, 0.82824 Validation AUC (0.01516 Change)\n", 649 | "[0:00:15.316034 - Epoch 5] 0.41697 Loss, 0.89285 Training AUC, 0.83717 Validation AUC (0.00893 Change)\n", 650 | "[0:00:18.308031 - Epoch 6] 0.41347 Loss, 0.89770 Training AUC, 0.84078 Validation AUC (0.00361 Change)\n", 651 | "[0:00:21.266120 - Epoch 7] 0.41205 Loss, 0.89975 Training AUC, 0.84310 Validation AUC (0.00232 Change)\n", 652 | "[0:00:24.194288 - Epoch 8] 0.40971 Loss, 0.90223 Training AUC, 0.84572 Validation AUC (0.00262 Change)\n", 653 | "[0:00:27.143370 - Epoch 9] 0.40962 Loss, 0.90138 Training AUC, 0.84689 Validation AUC (0.00117 Change)\n", 654 | "[0:00:30.182272 - Epoch 10] 0.40814 Loss, 0.90566 Training AUC, 0.84853 Validation AUC (0.00164 Change)\n", 655 | "[0:00:33.464463 - Epoch 11] 0.40859 Loss, 0.90471 Training AUC, 0.84958 Validation AUC (0.00105 Change)\n", 656 | "[0:00:36.586114 - Epoch 12] 0.40836 Loss, 0.90554 Training AUC, 0.85102 Validation AUC (0.00144 Change)\n", 657 | "[0:00:39.721725 - Epoch 13] 0.40745 Loss, 0.90614 Training AUC, 0.85128 Validation AUC (0.00026 Change)\n", 658 | "[0:00:42.667878 - Epoch 14] 0.40746 Loss, 0.90627 Training AUC, 0.85215 Validation AUC (0.00088 Change)\n", 659 | "[0:00:45.623938 - Epoch 15] 0.40742 Loss, 0.90760 Training AUC, 0.85243 Validation AUC (0.00028 Change)\n", 660 | "[0:00:48.707722 - Epoch 16] 0.40721 Loss, 0.90854 Training AUC, 0.85286 Validation AUC (0.00043 Change)\n", 661 | "[0:00:51.748591 - Epoch 17] 0.40714 Loss, 0.90701 Training AUC, 0.85298 Validation AUC (0.00012 Change)\n", 662 | "[0:00:54.752522 - Epoch 18] 0.40656 Loss, 0.90801 Training AUC, 0.85359 Validation AUC (0.00061 Change)\n", 663 | "[0:00:57.740533 - Epoch 19] 0.40596 Loss, 0.90852 Training AUC, 0.85442 Validation AUC (0.00083 Change)\n", 664 | "[0:01:00.749514 - Epoch 20] 0.40555 Loss, 0.90919 Training AUC, 0.85420 Validation AUC (-0.00022 Change)\n", 665 | "[0:01:00.867198 - Epoch 20] - STOPPED. Final test AUC: 0.84519\n", 666 | "0:00:00.002993 - Saved parameters to tf_models\\bpr-mf_5k_0.0001l2_0.0001l2bias_ml-1m\\params.pkl\n", 667 | "0:00:00.026897 - Saved weights to tf_models\\bpr-mf_5k_0.0001l2_0.0001l2bias_ml-1m\\weights.pkl\n", 668 | "0:01:02.821943 - Saved model to tf_models\\bpr-mf_5k_0.0001l2_0.0001l2bias_ml-1m\n", 669 | "\n" 670 | ] 671 | } 672 | ], 673 | "source": [ 674 | "'''\n", 675 | "Run BPR\n", 676 | "'''\n", 677 | "import tensorflow as tf\n", 678 | "from recsys_models.models.bpr import BPR_MF\n", 679 | "from recsys_models.pipeline import train_model\n", 680 | "\n", 681 | "# Set training parameters\n", 682 | "max_epochs = 200\n", 683 | "n_iterations = 1000\n", 684 | "batch_size = 512\n", 685 | "stopping_threshold = 1e-5\n", 686 | "\n", 687 | "# Get the validation and testing matrices\n", 688 | "start = datetime.now()\n", 689 | "validation_data = validation_df[['u', 'i', 'j']].values\n", 690 | "test_data = test_df[['u', 'i', 'j']].values\n", 691 | "print('{} - Generated u-i-j matrices for validation and testing'.format(\n", 692 | " datetime.now() - start\n", 693 | "))\n", 694 | "\n", 695 | "# Initialize the graph\n", 696 | "tf.reset_default_graph()\n", 697 | "model = BPR_MF(n_users, n_items, k=5, lambda_emb=1e-4, lambda_bias=1e-4,\n", 698 | " opt_type=tf.contrib.opt.LazyAdamOptimizer, opt_args={'learning_rate': 0.007})\n", 699 | "print('\\n=== BEGIN Optimization for {} ==='.format(model.model_id))\n", 700 | "print(' {} Max epochs, with early stoppage at {} Validation AUC change'.format(max_epochs, stopping_threshold))\n", 701 | "print(' {} Iterations per epoch with {}-sized batches'.format(n_iterations, batch_size))\n", 702 | "\n", 703 | "# Open session and initialize graph weights\n", 704 | "session = tf.Session()\n", 705 | "session.run(tf.global_variables_initializer())\n", 706 | "\n", 707 | "# Train the model!\n", 708 | "model, train_auc, validation_auc, test_auc = train_model(\n", 709 | " session, model, train_df, validation_data, test_data,\n", 710 | " n_iterations=n_iterations, batch_size=batch_size,\n", 711 | " min_epochs=10, max_epochs=max_epochs,\n", 712 | " stopping_threshold=stopping_threshold,\n", 713 | " sample_columns=['u', 'i'], column_order=['u', 'i', 'j'],\n", 714 | " n_items=n_items, items_by_user=train_items_by_user\n", 715 | ")\n", 716 | "\n", 717 | "# Save model\n", 718 | "suffix = '_ml-1m'\n", 719 | "full_model_id = '{}{}'.format(model.model_id, suffix)\n", 720 | "model_folder = os.path.join('tf_models', full_model_id)\n", 721 | "if not os.path.exists(model_folder):\n", 722 | " os.makedirs(model_folder)\n", 723 | "model.save(session, 'tf_models', suffix=suffix)\n", 724 | "print('{} - Saved model to {}'.format(\n", 725 | " datetime.now() - start, model_folder\n", 726 | "))\n", 727 | "\n", 728 | "# Cleanup\n", 729 | "session.close()\n", 730 | "gc.collect()\n", 731 | "print()" 732 | ] 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": {}, 737 | "source": [ 738 | "## Loading Weights from Pretrained Model\n", 739 | "We can load weights from another model and initialize the weight matrices.\n", 740 | "\n", 741 | "We can thusly evaluate existing models using the RecSysModels framework:" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 8, 747 | "metadata": {}, 748 | "outputs": [ 749 | { 750 | "name": "stdout", 751 | "output_type": "stream", 752 | "text": [ 753 | "0:00:00.392998 - Pretrained model from tf_models\\bpr-mf_5k_0.0001l2_0.0001l2bias_ml-1m evaluated on test data, with AUC: 0.84519\n", 754 | "\n" 755 | ] 756 | } 757 | ], 758 | "source": [ 759 | "'''\n", 760 | "Retrieve pretrained weights and evaluate with model\n", 761 | "'''\n", 762 | "start = datetime.now()\n", 763 | "\n", 764 | "# Initialize TF session\n", 765 | "tf.reset_default_graph()\n", 766 | "session = tf.Session()\n", 767 | "\n", 768 | "# Retrieve the model we just trained\n", 769 | "model2 = BPR_MF.load(model_folder)\n", 770 | "session.run(tf.global_variables_initializer())\n", 771 | "\n", 772 | "# Evaluate on the test data:\n", 773 | "test_auc_2 = model2.evaluate_auc(session, test_data)\n", 774 | "print('{} - Pretrained model from {} evaluated on test data, with AUC: {:.5f}'.format(\n", 775 | " datetime.now() - start,\n", 776 | " model_folder,\n", 777 | " test_auc_2\n", 778 | "))\n", 779 | "\n", 780 | "# Cleanup\n", 781 | "session.close()\n", 782 | "gc.collect()\n", 783 | "print()" 784 | ] 785 | } 786 | ], 787 | "metadata": { 788 | "kernelspec": { 789 | "display_name": "Python 3", 790 | "language": "python", 791 | "name": "python3" 792 | }, 793 | "language_info": { 794 | "codemirror_mode": { 795 | "name": "ipython", 796 | "version": 3 797 | }, 798 | "file_extension": ".py", 799 | "mimetype": "text/x-python", 800 | "name": "python", 801 | "nbconvert_exporter": "python", 802 | "pygments_lexer": "ipython3", 803 | "version": "3.6.6" 804 | } 805 | }, 806 | "nbformat": 4, 807 | "nbformat_minor": 2 808 | } 809 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------