├── deep_recommenders ├── __init__.py ├── keras │ ├── __init__.py │ └── models │ │ ├── __init__.py │ │ ├── retrieval │ │ ├── __init__.py │ │ ├── gcn.py │ │ ├── sbcnm.py │ │ └── factorized_top_k.py │ │ ├── nlp │ │ ├── __init__.py │ │ ├── multi_head_attention.py │ │ └── transformer.py │ │ └── ranking │ │ ├── __init__.py │ │ ├── deepfm.py │ │ ├── fm.py │ │ ├── dcn.py │ │ ├── xdeepfm.py │ │ └── din.py ├── estimator │ ├── __init__.py │ └── models │ │ ├── __init__.py │ │ ├── multi_task_learning │ │ ├── __init__.py │ │ ├── esmm.py │ │ └── mixture_of_experts.py │ │ ├── ranking │ │ ├── __init__.py │ │ ├── deepfm.py │ │ ├── wide_and_deep.py │ │ └── fnn.py │ │ └── feature_interaction │ │ ├── __init__.py │ │ ├── dnn.py │ │ └── fm.py └── datasets │ ├── __init__.py │ ├── synthetic_for_multi_task.py │ ├── cora.py │ └── movielens.py ├── requirements.txt ├── .gitignore ├── tests ├── testing.sh ├── estimator │ ├── test_fm.py │ ├── test_esmm.py │ └── test_mixture_of_experts.py ├── datasets │ ├── test_synthetic_for_multi_task.py │ └── test_movielens.py └── keras │ ├── test_dcn.py │ ├── test_transformer.py │ ├── test_sbcnm.py │ ├── test_deepfm.py │ ├── test_din.py │ ├── test_gcn.py │ ├── test_xdeepfm.py │ ├── test_fm.py │ └── test_factorized_top_k.py ├── .github └── workflows │ ├── continuous_integration.yml │ └── codeql-analysis.yml ├── .travis.yml ├── examples ├── train_deepfm_on_movielens_keras.py ├── train_gcn_on_cora_keras.py ├── train_transformer_on_imdb_keras.py ├── train_mmoe_on_synthetic_estimator.py ├── train_deepfm_on_movielens_estimator.py ├── train_fnn_on_movielens_estimator.py ├── train_fm_on_movielens_estimator.py └── train_wdl_on_movielens_estimator.py ├── README.md └── LICENSE /deep_recommenders/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /deep_recommenders/keras/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.20.1 2 | scipy==1.7.1 3 | faiss-cpu==1.6.3 4 | absl-py==0.13.0 5 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | __pycache__ 4 | 5 | *.zip 6 | *.tgz 7 | env 8 | .vscode 9 | .idea 10 | data 11 | logs 12 | *.tfrecords -------------------------------------------------------------------------------- /deep_recommenders/keras/models/retrieval/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.keras.models.retrieval.factorized_top_k import FactorizedTopK 5 | from deep_recommenders.keras.models.retrieval.gcn import GCN 6 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.keras.models.nlp.multi_head_attention import MultiHeadAttention 5 | from deep_recommenders.keras.models.nlp.transformer import Transformer 6 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/multi_task_learning/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.estimator.models.multi_task_learning.mixture_of_experts import MMoE 5 | from deep_recommenders.estimator.models.multi_task_learning.esmm import ESMM 6 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/ranking/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.keras.models.ranking.fm import FM 5 | from deep_recommenders.keras.models.ranking.fm import FactorizationMachine 6 | from deep_recommenders.keras.models.ranking.deepfm import DeepFM 7 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/ranking/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.estimator.models.ranking.fnn import FNN 5 | from deep_recommenders.estimator.models.ranking.wide_and_deep import WDL 6 | from deep_recommenders.estimator.models.ranking.deepfm import DeepFM 7 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/feature_interaction/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.estimator.models.feature_interaction.fm import fm 5 | from deep_recommenders.estimator.models.feature_interaction.fm import FM 6 | from deep_recommenders.estimator.models.feature_interaction.dnn import dnn 7 | -------------------------------------------------------------------------------- /deep_recommenders/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from deep_recommenders.datasets.movielens import MovieLens 5 | from deep_recommenders.datasets.movielens import MovielensRanking 6 | from deep_recommenders.datasets.synthetic_for_multi_task import SyntheticForMultiTask 7 | from deep_recommenders.datasets.cora import Cora 8 | -------------------------------------------------------------------------------- /tests/testing.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function testing() { 4 | for file in $(ls "$1"); do 5 | if [ "${file##*.}"x = "py"x ]; then 6 | python "$1"/"$file" >/dev/null 2>&1 7 | if [ $? -eq 0 ]; then 8 | echo -e "Test[OK]" "$1"/"$file" 9 | else 10 | echo -e "Test[ERROR]" "$1"/"$file" 11 | exit 1 12 | fi 13 | fi 14 | done 15 | } 16 | 17 | testing tests/"$1" 18 | -------------------------------------------------------------------------------- /tests/estimator/test_fm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | sys.dont_write_bytecode = True 6 | 7 | import tensorflow as tf 8 | 9 | if tf.__version__ >= "2.0.0": 10 | import tensorflow.compat.v1 as tf 11 | tf.disable_eager_execution() 12 | 13 | from deep_recommenders.estimator.models.feature_interaction import fm 14 | 15 | 16 | class TestFM(tf.test.TestCase): 17 | 18 | def test_fm(self): 19 | inputs = tf.random_normal(shape=(10, 2, 3)) 20 | 21 | with self.session() as sess: 22 | y = fm(inputs) 23 | init = tf.global_variables_initializer() 24 | sess.run(init) 25 | pred = sess.run(y) 26 | self.assertAllEqual(pred.shape, (10, 1)) 27 | 28 | 29 | if __name__ == '__main__': 30 | tf.test.main() 31 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/feature_interaction/dnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import tensorflow as tf 4 | 5 | if tf.__version__ >= "2.0.0": 6 | import tensorflow.compat.v1 as tf 7 | 8 | 9 | def dnn(inputs, 10 | hidden_units, 11 | activation=tf.nn.relu, 12 | batch_normalization=False, 13 | dropout=None, 14 | **kwargs): 15 | 16 | x = inputs 17 | for units in hidden_units[:-1]: 18 | x = tf.layers.dense(x, 19 | units, 20 | activation, 21 | **kwargs) 22 | 23 | if batch_normalization is True: 24 | x = tf.nn.batch_normalization(x) 25 | 26 | if dropout is not None: 27 | x = tf.nn.dropout(x, rate=dropout) 28 | 29 | outputs = tf.layers.dense(x, hidden_units[-1], **kwargs) 30 | 31 | return outputs 32 | -------------------------------------------------------------------------------- /tests/datasets/test_synthetic_for_multi_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | sys.dont_write_bytecode = True 6 | 7 | import tensorflow as tf 8 | from absl.testing import parameterized 9 | 10 | if tf.__version__ < "2.0.0": 11 | tf.enable_eager_execution() 12 | 13 | if tf.__version__ >= "2.0.0": 14 | import tensorflow.compat.v1 as tf 15 | 16 | from deep_recommenders.datasets import SyntheticForMultiTask 17 | 18 | 19 | class TestSyntheticForMultiTask(tf.test.TestCase, parameterized.TestCase): 20 | 21 | @parameterized.parameters(16, 64, 256, 1024) 22 | def test_input_fn(self, dim): 23 | synthetic = SyntheticForMultiTask(1000, example_dim=dim) 24 | dataset = synthetic.input_fn() 25 | for features, labels in dataset.take(1): 26 | self.assertAllEqual(len(features.keys()), dim) 27 | self.assertAllEqual(len(labels.keys()), 2) 28 | 29 | @parameterized.parameters(16, 64, 256, 512) 30 | def test_batch_size(self, batch_size): 31 | synthetic = SyntheticForMultiTask(1000) 32 | dataset = synthetic.input_fn(batch_size=batch_size) 33 | for features, labels in dataset.take(1): 34 | self.assertAllEqual(features["C0"].shape, (batch_size, 1)) 35 | 36 | 37 | if __name__ == '__main__': 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /tests/datasets/test_movielens.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | sys.dont_write_bytecode = True 6 | 7 | import tensorflow as tf 8 | from absl.testing import parameterized 9 | 10 | if tf.__version__ < "2.0.0": 11 | tf.enable_eager_execution() 12 | 13 | if tf.__version__ >= "2.0.0": 14 | import tensorflow.compat.v1 as tf 15 | 16 | from deep_recommenders.datasets import MovieLens 17 | 18 | 19 | class TestMovieLens(tf.test.TestCase, parameterized.TestCase): 20 | 21 | @parameterized.parameters(16, 64, 256, 1024) 22 | def test_batch(self, batch_size): 23 | movielens = MovieLens() 24 | dataset = movielens.dataset(batch_size=batch_size) 25 | for x, y in dataset.take(1): 26 | self.assertAllEqual(x["UserID"].shape, (batch_size,)) 27 | self.assertAllEqual(y.shape, (batch_size,)) 28 | 29 | @parameterized.parameters(1, 2, 3) 30 | def test_repeat(self, epochs): 31 | movielens = MovieLens() 32 | dataset = movielens.dataset(epochs, 2048) 33 | steps = 0 34 | for _ in dataset: 35 | steps += 1 36 | expect_steps = (movielens.num_ratings * epochs) // 2048 + 1 37 | self.assertAllEqual(steps, expect_steps) 38 | 39 | def test_map(self): 40 | movielens = MovieLens() 41 | dataset = movielens.dataset() 42 | dataset = dataset.map(lambda _, y: tf.where(y > 3, tf.ones_like(y), tf.zeros_like(y))) 43 | for y in dataset.take(1): 44 | self.assertLess(tf.reduce_sum(y), 256) 45 | 46 | 47 | if __name__ == '__main__': 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /.github/workflows/continuous_integration.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push] 3 | jobs: 4 | Testing: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | fail-fast: true 8 | matrix: 9 | python: [3.7, 3.8] 10 | tensorflow: ["1.15", "2.0", "2.1", "2.2", "2.3", "2.4", "2.5"] 11 | module: [estimator, keras] 12 | exclude: 13 | - python: 3.8 14 | tensorflow: "1.15" 15 | - python: 3.8 16 | tensorflow: "2.0" 17 | - python: 3.8 18 | tensorflow: "2.1" 19 | - module: keras 20 | tensorflow: "1.15" 21 | - module: keras 22 | tensorflow: "2.0" 23 | - module: keras 24 | tensorflow: "2.1" 25 | - module: keras 26 | tensorflow: "2.2" 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python }} 33 | - name: Install dependencies 34 | run: | 35 | python -m pip install --upgrade pip 36 | pip install tensorflow==${{ matrix.tensorflow }} 37 | pip install -r requirements.txt 38 | - name: Set python path environment variables 39 | run: export PYTHONPATH=$PYTHONPATH:/ 40 | - name: Test with pytest 41 | run: | 42 | pip install pytest-cov 43 | pytest --cov=deep_recommenders/${{ matrix.module }} tests/${{ matrix.module }} 44 | env: 45 | PYTHONPATH: . 46 | - name: Coverage with codecov 47 | run: | 48 | pip install codecov 49 | codecov 50 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/ranking/deepfm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import tensorflow as tf 4 | 5 | from deep_recommenders.estimator.models.feature_interaction import FM 6 | from deep_recommenders.estimator.models.feature_interaction import dnn 7 | 8 | 9 | class DeepFM(object): 10 | 11 | def __init__(self, 12 | indicator_columns, 13 | embedding_columns, 14 | dnn_units, 15 | dnn_activation=tf.nn.relu, 16 | dnn_batch_normalization=False, 17 | dnn_dropout=None, 18 | **dnn_kwargs): 19 | self._indicator_columns = indicator_columns 20 | self._embedding_columns = embedding_columns 21 | self._dnn_hidden_units = dnn_units 22 | self._dnn_activation = dnn_activation 23 | self._dnn_batch_norm = dnn_batch_normalization 24 | self._dnn_dropout = dnn_dropout 25 | self._dnn_kwargs = dnn_kwargs 26 | 27 | def __call__(self, *args, **kwargs): 28 | return self.call(*args, **kwargs) 29 | 30 | def call(self, features): 31 | fm = FM(self._indicator_columns, self._embedding_columns) 32 | 33 | fm_outputs = fm(features) 34 | concat_embeddings = tf.concat(fm.embeddings, axis=1) 35 | 36 | dnn_outputs = dnn(concat_embeddings, 37 | self._dnn_hidden_units + [1], 38 | activation=self._dnn_activation, 39 | batch_normalization=self._dnn_batch_norm, 40 | dropout=self._dnn_dropout, 41 | **self._dnn_kwargs) 42 | 43 | return tf.nn.sigmoid(fm_outputs + dnn_outputs) 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/estimator/test_esmm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | sys.dont_write_bytecode = True 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | if tf.__version__ >= "2.0.0": 11 | import tensorflow.compat.v1 as tf 12 | tf.disable_eager_execution() 13 | 14 | from absl.testing import parameterized 15 | from deep_recommenders.datasets import SyntheticForMultiTask 16 | from deep_recommenders.estimator.models.multi_task_learning import ESMM 17 | 18 | 19 | class TestESMM(tf.test.TestCase, parameterized.TestCase): 20 | 21 | @parameterized.parameters(32, 64, 128, 512) 22 | def test_mmoe(self, batch_size): 23 | 24 | def build_columns(): 25 | return [ 26 | tf.feature_column.numeric_column("C{}".format(i)) 27 | for i in range(100) 28 | ] 29 | 30 | columns = build_columns() 31 | model = ESMM(columns, hidden_units=[32, 10]) 32 | 33 | dataset = SyntheticForMultiTask(5000) 34 | 35 | with self.session() as sess: 36 | iterator = tf.data.make_one_shot_iterator(dataset.input_fn(batch_size=batch_size)) 37 | x, y = iterator.get_next() 38 | p_cvr, p_ctr, p_ctcvr = model(x) 39 | sess.run(tf.global_variables_initializer()) 40 | p_cvr = sess.run(p_cvr) 41 | p_ctr = sess.run(p_ctr) 42 | p_ctcvr = sess.run(p_ctcvr) 43 | self.assertAllEqual(p_cvr.shape, (batch_size, 1)) 44 | self.assertAllEqual(p_ctr.shape, (batch_size, 1)) 45 | self.assertAllEqual(p_ctcvr.shape, (batch_size, 1)) 46 | 47 | 48 | if __name__ == '__main__': 49 | tf.logging.set_verbosity(tf.logging.INFO) 50 | tf.test.main() 51 | -------------------------------------------------------------------------------- /tests/keras/test_dcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from deep_recommenders.keras.models.ranking.dcn import Cross 12 | 13 | 14 | class TestDCN(tf.test.TestCase): 15 | 16 | def test_cross_full_matrix(self): 17 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 18 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 19 | 20 | cross = Cross(projection_dim=None, kernel_init="ones") 21 | output = cross(x0, x) 22 | self.evaluate(tf.compat.v1.global_variables_initializer()) 23 | self.assertAllClose(np.asarray([[0.55, 0.8, 1.05]]), output) 24 | 25 | def test_cross_save_model(self): 26 | 27 | def get_model(): 28 | x0 = tf.keras.layers.Input(shape=(13,)) 29 | x1 = Cross(projection_dim=None)(x0, x0) 30 | x2 = Cross(projection_dim=None)(x0, x1) 31 | logits = tf.keras.layers.Dense(units=1)(x2) 32 | return tf.keras.Model(x0, logits) 33 | 34 | model = get_model() 35 | random_input = np.random.uniform(size=(10, 13)) 36 | model_pred = model.predict(random_input) 37 | 38 | with tempfile.TemporaryDirectory() as tmp: 39 | path = os.path.join(tmp, "dcn_model") 40 | model.save(path) 41 | loaded_model = tf.keras.models.load_model(path) 42 | loaded_pred = loaded_model.predict(random_input) 43 | for i in range(len(model.layers)): 44 | assert model.layers[i].get_config() == loaded_model.layers[i].get_config() 45 | self.assertAllClose(model_pred, loaded_pred) 46 | 47 | 48 | if __name__ == "__main__": 49 | tf.test.main() 50 | -------------------------------------------------------------------------------- /tests/estimator/test_mixture_of_experts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | if tf.__version__ >= "2.0.0": 10 | import tensorflow.compat.v1 as tf 11 | tf.disable_eager_execution() 12 | 13 | from absl.testing import parameterized 14 | from deep_recommenders.datasets import SyntheticForMultiTask 15 | from deep_recommenders.estimator.models.multi_task_learning import MMoE 16 | 17 | 18 | class TestMixtureOfExperts(tf.test.TestCase, parameterized.TestCase): 19 | 20 | @parameterized.parameters(32, 64, 128, 512) 21 | def test_mmoe(self, batch_size): 22 | 23 | def build_columns(): 24 | return [ 25 | tf.feature_column.numeric_column("C{}".format(i)) 26 | for i in range(100) 27 | ] 28 | 29 | columns = build_columns() 30 | model = MMoE(columns, 31 | num_tasks=2, 32 | num_experts=2, 33 | task_hidden_units=[32, 10], 34 | expert_hidden_units=[64, 32]) 35 | 36 | dataset = SyntheticForMultiTask(5000) 37 | 38 | with self.session() as sess: 39 | iterator = tf.data.make_one_shot_iterator(dataset.input_fn(batch_size=batch_size)) 40 | x, y = iterator.get_next() 41 | y_pred = model(x) 42 | sess.run(tf.global_variables_initializer()) 43 | a = sess.run(y_pred[0]) 44 | b = sess.run(y_pred[1]) 45 | self.assertAllEqual(len(y_pred), 2) 46 | self.assertAllEqual(a.shape, (batch_size, 1)) 47 | self.assertAllEqual(b.shape, (batch_size, 1)) 48 | 49 | 50 | if __name__ == '__main__': 51 | tf.logging.set_verbosity(tf.logging.INFO) 52 | tf.test.main() 53 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/multi_task_learning/esmm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | if tf.__version__ >= "2.0.0": 7 | import tensorflow.compat.v1 as tf 8 | 9 | from deep_recommenders.estimator.models.feature_interaction import dnn 10 | 11 | 12 | class ESMM(object): 13 | 14 | def __init__(self, 15 | feature_columns, 16 | hidden_units, 17 | activation=tf.nn.relu, 18 | batch_normalization=False, 19 | dropout=None, 20 | **kwargs): 21 | self._columns = feature_columns 22 | self._hidden_units = hidden_units 23 | self._activation = activation 24 | self._batch_norm = batch_normalization 25 | self._dropout = dropout 26 | self._configs = kwargs 27 | 28 | def __call__(self, *args, **kwargs): 29 | return self.call(*args, **kwargs) 30 | 31 | def call(self, features): 32 | 33 | dnn_inputs = tf.feature_column.input_layer(features, self._columns) 34 | 35 | with tf.variable_scope("pCVR"): 36 | cvr = dnn(dnn_inputs, 37 | self._hidden_units + [1], 38 | activation=self._activation, 39 | batch_normalization=self._batch_norm, 40 | dropout=self._dropout, 41 | **self._configs) 42 | p_cvr = tf.nn.sigmoid(cvr) 43 | 44 | with tf.variable_scope("pCTR"): 45 | ctr = dnn(dnn_inputs, 46 | self._hidden_units + [1], 47 | activation=self._activation, 48 | batch_normalization=self._batch_norm, 49 | dropout=self._dropout, 50 | **self._configs) 51 | p_ctr = tf.nn.sigmoid(ctr) 52 | 53 | p_ctcvr = tf.math.multiply(p_ctr, p_cvr) 54 | 55 | return p_cvr, p_ctr, p_ctcvr 56 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.7" 4 | - "3.8" 5 | env: 6 | - TENSORFLOW_VERSION=1.15 MODULE=estimator 7 | - TENSORFLOW_VERSION=2.0 MODULE=estimator 8 | - TENSORFLOW_VERSION=2.1 MODULE=estimator 9 | - TENSORFLOW_VERSION=2.2 MODULE=estimator 10 | - TENSORFLOW_VERSION=2.3 MODULE=estimator 11 | - TENSORFLOW_VERSION=2.4 MODULE=estimator 12 | - TENSORFLOW_VERSION=2.5 MODULE=estimator 13 | - TENSORFLOW_VERSION=2.6 MODULE=estimator 14 | - TENSORFLOW_VERSION=1.15 MODULE=keras 15 | - TENSORFLOW_VERSION=2.0 MODULE=keras 16 | - TENSORFLOW_VERSION=2.1 MODULE=keras 17 | - TENSORFLOW_VERSION=2.2 MODULE=keras 18 | - TENSORFLOW_VERSION=2.3 MODULE=keras 19 | - TENSORFLOW_VERSION=2.4 MODULE=keras 20 | - TENSORFLOW_VERSION=2.5 MODULE=keras 21 | - TENSORFLOW_VERSION=2.6 MODULE=keras 22 | matrix: 23 | allow_failures: 24 | - env: TENSORFLOW_VERSION=1.15 MODULE=keras 25 | - env: TENSORFLOW_VERSION=2.0 MODULE=keras 26 | - env: TENSORFLOW_VERSION=2.1 MODULE=keras 27 | - env: TENSORFLOW_VERSION=2.2 MODULE=keras 28 | exclude: 29 | - python: 3.8 30 | env: TENSORFLOW_VERSION=1.15 MODULE=estimator 31 | - python: 3.8 32 | env: TENSORFLOW_VERSION=2.0 MODULE=estimator 33 | - python: 3.8 34 | env: TENSORFLOW_VERSION=2.1 MODULE=estimator 35 | - python: 3.8 36 | env: TENSORFLOW_VERSION=1.15 MODULE=keras 37 | - python: 3.8 38 | env: TENSORFLOW_VERSION=2.0 MODULE=keras 39 | - python: 3.8 40 | env: TENSORFLOW_VERSION=2.1 MODULE=keras 41 | 42 | sudo: false 43 | 44 | before_install: 45 | - pip install pytest-cov==2.4.0 46 | - pip install tensorflow==$TENSORFLOW_VERSION 47 | - pip install codecov 48 | - export PYTHONPATH=$PYTHONPATH:$pwd 49 | 50 | install: 51 | - pip install -r requirements.txt 52 | 53 | script: 54 | - pytest --cov=deep_recommenders/$MODULE tests/$MODULE 55 | 56 | notifications: 57 | email: wangyao.sr@gmail.com 58 | 59 | after_success: 60 | - codecov 61 | 62 | 63 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/feature_interaction/fm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | if tf.__version__ >= "2.0.0": 7 | import tensorflow.compat.v1 as tf 8 | 9 | 10 | def fm(x): 11 | """ 12 | Second order interaction in Factorization Machine 13 | :param x: 14 | type: tf.Tensor 15 | shape: (batch_size, num_features, embedding_dim) 16 | :return: tf.Tensor 17 | """ 18 | 19 | if x.shape.rank != 3: 20 | raise ValueError("The rank of `x` should be 3. Got rank = {}.".format(x.shape.rank)) 21 | 22 | sum_square = tf.square(tf.reduce_sum(x, axis=1)) 23 | square_sum = tf.reduce_sum(tf.square(x), axis=1) 24 | 25 | return 0.5 * tf.reduce_sum( 26 | tf.subtract(sum_square, square_sum), axis=1, keepdims=True) 27 | 28 | 29 | class FM(object): 30 | """ 31 | Factorization Machine 32 | """ 33 | 34 | def __init__(self, indicator_columns, embedding_columns): 35 | self._indicator_columns = indicator_columns 36 | self._embedding_columns = embedding_columns 37 | 38 | def __call__(self, *args, **kwargs): 39 | return self.call(*args, **kwargs) 40 | 41 | def call(self, features): 42 | 43 | with tf.variable_scope("linear"): 44 | linear_outputs = tf.feature_column.linear_model(features, self._indicator_columns) 45 | 46 | with tf.variable_scope("factorized"): 47 | self.embeddings = [] 48 | for embedding_column in self._embedding_columns: 49 | feature_name = embedding_column.name.replace("_embedding", "") 50 | feature = {feature_name: features.get(feature_name)} 51 | embedding = tf.feature_column.input_layer(feature, embedding_column) 52 | self.embeddings.append(embedding) 53 | stack_embeddings = tf.stack(self.embeddings, axis=1) 54 | factorized_outputs = fm(stack_embeddings) 55 | 56 | return linear_outputs + factorized_outputs 57 | -------------------------------------------------------------------------------- /tests/keras/test_transformer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from deep_recommenders.keras.models.nlp import Transformer 12 | 13 | 14 | class TestTransformer(tf.test.TestCase): 15 | 16 | def test_save_model(self): 17 | 18 | def get_model(): 19 | encoder_inputs = tf.keras.Input(shape=(256,), name='encoder_inputs') 20 | decoder_inputs = tf.keras.Input(shape=(256,), name='decoder_inputs') 21 | outputs = Transformer(5000, 22 | model_dim=8, 23 | n_heads=2, 24 | encoder_stack=2, 25 | decoder_stack=2, 26 | feed_forward_size=50)(encoder_inputs, decoder_inputs) 27 | outputs = tf.keras.layers.GlobalAveragePooling1D()(outputs) 28 | outputs = tf.keras.layers.Dense(1, activation='sigmoid')(outputs) 29 | return tf.keras.Model(inputs=[encoder_inputs, decoder_inputs], outputs=outputs) 30 | 31 | model = get_model() 32 | encoder_random_input = np.random.randint(size=(10, 256), low=0, high=5000) 33 | decoder_random_input = np.random.randint(size=(10, 256), low=0, high=5000) 34 | model_pred = model.predict([encoder_random_input, decoder_random_input]) 35 | 36 | with tempfile.TemporaryDirectory() as tmp: 37 | path = os.path.join(tmp, "transformer_model") 38 | model.save(path) 39 | loaded_model = tf.keras.models.load_model(path) 40 | loaded_pred = loaded_model.predict([encoder_random_input, decoder_random_input]) 41 | for i in range(len(model.layers)): 42 | assert model.layers[i].get_config() == loaded_model.layers[i].get_config() 43 | self.assertAllClose(model_pred, loaded_pred) 44 | 45 | 46 | if __name__ == '__main__': 47 | tf.test.main() 48 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/ranking/wide_and_deep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.estimator.models.feature_interaction import dnn 7 | 8 | 9 | class WDL(object): 10 | 11 | def __init__(self, 12 | indicator_columns, 13 | embedding_columns, 14 | dnn_units, 15 | dnn_activation=tf.nn.relu, 16 | dnn_batch_normalization=False, 17 | dnn_dropout=None, 18 | **dnn_kwargs): 19 | self._indicator_columns = indicator_columns 20 | self._embedding_columns = embedding_columns 21 | self._dnn_hidden_units = dnn_units 22 | self._dnn_activation = dnn_activation 23 | self._dnn_batch_norm = dnn_batch_normalization 24 | self._dnn_dropout = dnn_dropout 25 | self._dnn_kwargs = dnn_kwargs 26 | 27 | def __call__(self, *args, **kwargs): 28 | return self.call(*args, **kwargs) 29 | 30 | def call(self, features): 31 | with tf.variable_scope("wide"): 32 | linear_outputs = tf.feature_column.linear_model(features, 33 | self._indicator_columns) 34 | with tf.variable_scope("deep"): 35 | embeddings = [] 36 | for embedding_column in self._embedding_columns: 37 | feature_name = embedding_column.name.replace("_embedding", "") 38 | feature = {feature_name: features.get(feature_name)} 39 | embedding = tf.feature_column.input_layer(feature, embedding_column) 40 | embeddings.append(embedding) 41 | concat_embeddings = tf.concat(embeddings, axis=1) 42 | dnn_outputs = dnn(concat_embeddings, 43 | self._dnn_hidden_units + [1], 44 | activation=self._dnn_activation, 45 | batch_normalization=self._dnn_batch_norm, 46 | dropout=self._dnn_dropout, 47 | **self._dnn_kwargs) 48 | return tf.nn.sigmoid(linear_outputs + dnn_outputs) 49 | -------------------------------------------------------------------------------- /tests/keras/test_sbcnm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from absl.testing import parameterized 10 | 11 | from deep_recommenders.keras.models.retrieval import sbcnm 12 | 13 | 14 | class TestSBCNM(tf.test.TestCase, parameterized.TestCase): 15 | 16 | @parameterized.parameters(3, 5, 10, 15) 17 | def test_hard_negative_mining(self, num_hard_negatives): 18 | 19 | logits_shape = (2, 20) 20 | rng = np.random.RandomState(42) # pylint: disable=no-member 21 | 22 | logits = rng.uniform(size=logits_shape).astype(np.float32) 23 | labels = rng.permutation(np.eye(*logits_shape).T).T.astype(np.float32) 24 | 25 | out_logits, out_labels = sbcnm.HardNegativeMining(num_hard_negatives)(logits, labels) 26 | 27 | self.assertEqual(out_logits.shape[-1], num_hard_negatives + 1) 28 | 29 | self.assertAllClose( 30 | tf.reduce_sum(out_logits * out_labels, axis=-1), 31 | tf.reduce_sum(logits * labels, axis=-1)) 32 | 33 | logits = logits + labels * 1000.0 34 | 35 | out_logits, out_labels = sbcnm.HardNegativeMining(num_hard_negatives)(logits, labels) 36 | out_logits, out_labels = out_logits.numpy(), out_labels.numpy() 37 | 38 | # Highest K logits are always returned. 39 | self.assertAllClose( 40 | np.sort(logits, axis=1)[:, -num_hard_negatives - 1:], 41 | np.sort(out_logits)) 42 | 43 | def test_remove_accidental_negative(self): 44 | 45 | logits_shape = (2, 4) 46 | rng = np.random.RandomState(42) # pylint: disable=no-member 47 | 48 | logits = rng.uniform(size=logits_shape).astype(np.float32) 49 | labels = rng.permutation(np.eye(*logits_shape).T).T.astype(np.float32) 50 | identifiers = rng.randint(0, 3, size=logits_shape[-1]) 51 | 52 | out_logits = sbcnm.RemoveAccidentalNegative()(logits, labels, identifiers) 53 | 54 | self.assertAllClose(tf.reduce_sum(out_logits * labels, axis=1), 55 | tf.reduce_sum(logits * labels, axis=1)) 56 | 57 | 58 | if __name__ == "__main__": 59 | tf.test.main() 60 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/ranking/deepfm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.keras.models.ranking import FM 7 | 8 | 9 | class DeepFM(tf.keras.Model): 10 | 11 | def __init__(self, 12 | indicator_columns, 13 | embedding_columns, 14 | dnn_units_size, 15 | dnn_activation="relu", 16 | **kwargs): 17 | 18 | super(DeepFM, self).__init__(**kwargs) 19 | self._indicator_columns = indicator_columns 20 | self._embedding_columns = embedding_columns 21 | self._dnn_units_size = dnn_units_size 22 | self._dnn_activation = dnn_activation 23 | 24 | self._sparse_features_layer = tf.keras.layers.DenseFeatures(self._indicator_columns) 25 | self._embedding_features_layer = { 26 | c.categorical_column.key: tf.keras.layers.DenseFeatures(c) 27 | for c in self._embedding_columns 28 | } 29 | self._fm = FM() 30 | self._dnn = tf.keras.Sequential([ 31 | tf.keras.layers.Dense(units, activation=self._dnn_activation) 32 | for units in self._dnn_units_size 33 | ] + [tf.keras.layers.Dense(1)] 34 | ) 35 | 36 | def call(self, inputs, **kwargs): 37 | sparse_features = self._sparse_features_layer(inputs) 38 | embeddings = [] 39 | for column_name, column_input in inputs.items(): 40 | dense_features = self._embedding_features_layer.get(column_name) 41 | if dense_features is not None: 42 | embedding = dense_features({column_name: column_input}) 43 | embeddings.append(embedding) 44 | stack_embeddings = tf.stack(embeddings, axis=1) 45 | concat_embeddings = tf.concat(embeddings, axis=1) 46 | outputs = self._fm(sparse_features, stack_embeddings) + self._dnn(concat_embeddings) 47 | return tf.keras.activations.sigmoid(outputs) 48 | 49 | def get_config(self): 50 | config = { 51 | "dnn_units_size": self._dnn_units_size, 52 | "dnn_activation": self._dnn_activation 53 | } 54 | base_config = super(DeepFM, self).get_config() 55 | return {**base_config, **config} 56 | -------------------------------------------------------------------------------- /tests/keras/test_deepfm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from deep_recommenders.keras.models.ranking import DeepFM 12 | 13 | 14 | class TestDeepFM(tf.test.TestCase): 15 | 16 | def test_model_train(self): 17 | 18 | def build_columns(): 19 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 20 | "user_id", 100) 21 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 22 | "movie_id", 100) 23 | base_columns = [user_id, movie_id] 24 | _indicator_columns = [ 25 | tf.feature_column.indicator_column(c) 26 | for c in base_columns 27 | ] 28 | _embedding_columns = [ 29 | tf.feature_column.embedding_column(c, dimension=16) 30 | for c in base_columns 31 | ] 32 | return _indicator_columns, _embedding_columns 33 | 34 | indicator_columns, embedding_columns = build_columns() 35 | model = DeepFM(indicator_columns, embedding_columns, dnn_units_size=[10, 5]) 36 | model.compile(loss=tf.keras.losses.binary_crossentropy, 37 | optimizer=tf.keras.optimizers.Adam()) 38 | dataset = tf.data.Dataset.from_tensor_slices(({ 39 | "user_id": [["1"]] * 1000, 40 | "movie_id": [["2"]] * 1000 41 | }, np.random.randint(0, 1, size=(1000, 1)))) 42 | model.fit(dataset, 43 | steps_per_epoch=100, 44 | verbose=-1) 45 | test_data = {"user_id": np.asarray([["1"], ["2"]]), 46 | "movie_id": np.asarray([["1"], ["2"]])} 47 | model_pred = model.predict(test_data) 48 | 49 | with tempfile.TemporaryDirectory() as tmp: 50 | path = os.path.join(tmp, "FM") 51 | model.save(path) 52 | loaded_model = tf.keras.models.load_model(path) 53 | loaded_pred = loaded_model.predict(test_data) 54 | for model_layer, loaded_layer in zip(model.layers, loaded_model.layers): 55 | assert model_layer.get_config() == loaded_layer.get_config() 56 | self.assertAllEqual(model_pred, loaded_pred) 57 | 58 | 59 | if __name__ == '__main__': 60 | tf.test.main() 61 | -------------------------------------------------------------------------------- /examples/train_deepfm_on_movielens_keras.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | 7 | from deep_recommenders.datasets import MovielensRanking 8 | from deep_recommenders.keras.models.ranking import DeepFM 9 | 10 | 11 | def build_columns(): 12 | movielens = MovielensRanking() 13 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 14 | "user_id", movielens.num_users) 15 | user_gender = tf.feature_column.categorical_column_with_vocabulary_list( 16 | "user_gender", movielens.gender_vocab) 17 | user_age = tf.feature_column.categorical_column_with_vocabulary_list( 18 | "user_age", movielens.age_vocab) 19 | user_occupation = tf.feature_column.categorical_column_with_vocabulary_list( 20 | "user_occupation", movielens.occupation_vocab) 21 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 22 | "movie_id", movielens.num_movies) 23 | movie_genres = tf.feature_column.categorical_column_with_vocabulary_list( 24 | "movie_genres", movielens.gender_vocab) 25 | 26 | base_columns = [user_id, user_gender, user_age, user_occupation, movie_id, movie_genres] 27 | indicator_columns = [ 28 | tf.feature_column.indicator_column(c) 29 | for c in base_columns 30 | ] 31 | embedding_columns = [ 32 | tf.feature_column.embedding_column(c, dimension=16) 33 | for c in base_columns 34 | ] 35 | return indicator_columns, embedding_columns 36 | 37 | 38 | def main(): 39 | movielens = MovielensRanking() 40 | indicator_columns, embedding_columns = build_columns() 41 | 42 | model = DeepFM(indicator_columns, embedding_columns, dnn_units_size=[256, 32]) 43 | model.compile(loss=tf.keras.losses.binary_crossentropy, 44 | optimizer=tf.keras.optimizers.Adam(), 45 | metrics=[tf.keras.metrics.AUC(), 46 | tf.keras.metrics.Precision(), 47 | tf.keras.metrics.Recall()]) 48 | 49 | model.fit(movielens.training_input_fn, 50 | epochs=10, 51 | steps_per_epoch=movielens.train_steps_per_epoch, 52 | validation_data=movielens.testing_input_fn, 53 | validation_steps=movielens.test_steps, 54 | callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)]) 55 | 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /examples/train_gcn_on_cora_keras.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # TensorFlow 2.3 Passing. 5 | 6 | import tensorflow as tf 7 | 8 | from deep_recommenders.datasets import Cora 9 | from deep_recommenders.keras.models.retrieval import GCN 10 | 11 | 12 | def train_model(): 13 | cora = Cora() 14 | ids, features, labels = cora.load_content() 15 | graph = cora.build_graph(ids) 16 | spectral_graph = cora.spectral_graph(graph) 17 | cora.sample_train_nodes(labels) 18 | train, valid, test = cora.split_labels(labels) 19 | 20 | def build_model(): 21 | g = tf.keras.layers.Input(shape=(None,)) 22 | feats = tf.keras.layers.Input(shape=(features.shape[-1],)) 23 | x = GCN(32)(feats, g) 24 | outputs = GCN(cora.num_classes, activation="softmax")(x, g) 25 | return tf.keras.Model([g, feats], outputs) 26 | 27 | model = build_model() 28 | model.compile( 29 | optimizer=tf.keras.optimizers.Adam(0.01), 30 | loss="categorical_crossentropy", 31 | weighted_metrics=["acc"] 32 | ) 33 | 34 | train_labels, train_mask = train 35 | valid_labels, valid_mask = valid 36 | test_labels, test_mask = test 37 | 38 | batch_size = graph.shape[0] 39 | 40 | model.fit([spectral_graph, features], 41 | train_labels, 42 | sample_weight=train_mask, 43 | validation_data=([spectral_graph, features], valid_labels, valid_mask), 44 | batch_size=batch_size, 45 | epochs=200, 46 | shuffle=False, 47 | verbose=2, 48 | callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)]) 49 | 50 | eval_results = model.evaluate([spectral_graph, features], 51 | test_labels, 52 | sample_weight=test_mask, 53 | batch_size=batch_size, 54 | verbose=0) 55 | print("Test Loss: {:.4f}".format(eval_results[0])) 56 | print("Test Accuracy: {:.4f}".format(eval_results[1])) 57 | 58 | 59 | def get_embeddings(model, graph, features): 60 | input_layer, output_layer = model.input, model.layers[-1].output 61 | embedding_model = tf.keras.Model(input_layer, output_layer) 62 | embeddings = embedding_model.predict([graph, features], batch_size=graph.shape[0]) 63 | return embeddings 64 | 65 | 66 | if __name__ == "__main__": 67 | train_model() 68 | -------------------------------------------------------------------------------- /deep_recommenders/datasets/synthetic_for_multi_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | 8 | def synthetic_data(num_examples, example_dim=100, c=0.3, p=0.8, m=5): 9 | 10 | mu1 = np.random.normal(size=example_dim) 11 | mu1 = (mu1 - np.mean(mu1)) / (np.std(mu1) * np.sqrt(example_dim)) 12 | 13 | mu2 = np.random.normal(size=example_dim) 14 | mu2 -= mu2.dot(mu1) * mu1 15 | mu2 /= np.linalg.norm(mu2) 16 | 17 | w1 = c * mu1 18 | w2 = c * (p * mu1 + np.sqrt(1. - p ** 2) * mu2) 19 | 20 | alpha = np.random.normal(size=m) 21 | beta = np.random.normal(size=m) 22 | 23 | examples = np.random.normal(size=(num_examples, example_dim)) 24 | 25 | w1x = np.matmul(examples, w1) 26 | w2x = np.matmul(examples, w2) 27 | 28 | sin1, sin2 = 0., 0. 29 | for i in range(m): 30 | sin1 += np.sin(alpha[i] * w1x + beta[i]) 31 | sin2 += np.sin(alpha[i] * w2x + beta[i]) 32 | 33 | y1 = w1x + sin1 + np.random.normal(size=num_examples, scale=0.01) 34 | y2 = w2x + sin2 + np.random.normal(size=num_examples, scale=0.01) 35 | 36 | return examples.astype(np.float32), (y1.astype(np.float32), y2.astype(np.float32)) 37 | 38 | 39 | class SyntheticForMultiTask(object): 40 | 41 | def __init__(self, num_examples, example_dim=100, c=0.3, p=0.8, m=5): 42 | self._num_examples = num_examples 43 | self._example_dim = example_dim 44 | self._c = c 45 | self._p = p 46 | self._m = m 47 | 48 | def input_fn(self, epochs=1, batch_size=512, buffer_size=512): 49 | synthetic = synthetic_data(self._num_examples, 50 | self._example_dim, 51 | c=self._c, 52 | p=self._p, 53 | m=self._m) 54 | 55 | def _parse_example(features, labels): 56 | feature_columns = tf.split(features, self._example_dim, axis=1) 57 | features = {"C{}".format(i): col for i, col in enumerate(feature_columns)} 58 | labels = {"labels{}".format(i): lab for i, lab in enumerate(labels)} 59 | return features, labels 60 | 61 | dataset = tf.data.Dataset.from_tensor_slices(synthetic) 62 | dataset = dataset.repeat(epochs) 63 | dataset = dataset.batch(batch_size) 64 | dataset = dataset.map(_parse_example, num_parallel_calls=-1) 65 | dataset = dataset.prefetch(buffer_size) 66 | return dataset 67 | -------------------------------------------------------------------------------- /examples/train_transformer_on_imdb_keras.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.keras.models.nlp import Transformer 7 | 8 | 9 | def load_dataset(vocab_size, max_len): 10 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(maxlen=max_len, num_words=vocab_size) 11 | x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_len) 12 | x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_len) 13 | x_train_masks = tf.equal(x_train, 0) 14 | x_test_masks = tf.equal(x_test, 0) 15 | y_train = tf.keras.utils.to_categorical(y_train) 16 | y_test = tf.keras.utils.to_categorical(y_test) 17 | return (x_train, x_train_masks, y_train), (x_test, x_test_masks, y_test) 18 | 19 | 20 | def build_model(vocab_size, max_len, model_dim=8, n_heads=2, encoder_stack=2, decoder_stack=2, ff_size=50): 21 | encoder_inputs = tf.keras.Input(shape=(max_len,), name='encoder_inputs') 22 | decoder_inputs = tf.keras.Input(shape=(max_len,), name='decoder_inputs') 23 | outputs = Transformer( 24 | vocab_size, 25 | model_dim, 26 | n_heads=n_heads, 27 | encoder_stack=encoder_stack, 28 | decoder_stack=decoder_stack, 29 | feed_forward_size=ff_size 30 | )(encoder_inputs, decoder_inputs) 31 | outputs = tf.keras.layers.GlobalAveragePooling1D()(outputs) 32 | outputs = tf.keras.layers.Dense(2, activation='softmax')(outputs) 33 | return tf.keras.Model(inputs=[encoder_inputs, decoder_inputs], outputs=outputs) 34 | 35 | 36 | def train_model(vocab_size=5000, max_len=128, batch_size=128, epochs=10): 37 | 38 | train, test = load_dataset(vocab_size, max_len) 39 | 40 | x_train, x_train_masks, y_train = train 41 | x_test, x_test_masks, y_test = test 42 | 43 | model = build_model(vocab_size, max_len) 44 | 45 | model.compile(optimizer=tf.keras.optimizers.Adam(beta_1=0.9, beta_2=0.98, epsilon=1e-9), 46 | loss='categorical_crossentropy', metrics=['accuracy']) 47 | 48 | es = tf.keras.callbacks.EarlyStopping(patience=3) 49 | model.fit([x_train, x_train_masks], y_train, 50 | batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[es]) 51 | 52 | test_metrics = model.evaluate([x_test, x_test_masks], y_test, batch_size=batch_size, verbose=0) 53 | print("loss on Test: %.4f" % test_metrics[0]) 54 | print("accu on Test: %.4f" % test_metrics[1]) 55 | 56 | 57 | if __name__ == '__main__': 58 | train_model() 59 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/ranking/fm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | 7 | @tf.keras.utils.register_keras_serializable() 8 | class FM(tf.keras.layers.Layer): 9 | """ Factorization Machine """ 10 | 11 | def __init__(self, **kwargs): 12 | super(FM, self).__init__(**kwargs) 13 | 14 | def build(self, input_shape): 15 | 16 | self._linear = tf.keras.layers.Dense( 17 | units=1, 18 | kernel_initializer="zeros", 19 | name="linear" 20 | ) 21 | self.built = True 22 | 23 | def call(self, sparse_inputs, embedding_inputs=None, **kwargs): 24 | 25 | if embedding_inputs is None: 26 | return self._linear(sparse_inputs) 27 | 28 | x_sum = tf.reduce_sum(embedding_inputs, axis=1) 29 | x_square_sum = tf.reduce_sum(tf.pow(embedding_inputs, 2), axis=1) 30 | 31 | interaction = 0.5 * tf.reduce_sum( 32 | tf.subtract( 33 | tf.pow(x_sum, 2), 34 | x_square_sum 35 | ), axis=1, keepdims=True) 36 | 37 | return self._linear(sparse_inputs) + interaction 38 | 39 | 40 | class FactorizationMachine(tf.keras.Model): 41 | 42 | def __init__(self, indicator_columns, embedding_columns, **kwargs): 43 | super(FactorizationMachine, self).__init__(**kwargs) 44 | self._indicator_columns = indicator_columns 45 | self._embedding_columns = embedding_columns 46 | 47 | self._sparse_features_layer = tf.keras.layers.DenseFeatures(self._indicator_columns) 48 | self._embedding_features_layer = { 49 | c.categorical_column.key: tf.keras.layers.DenseFeatures(c) 50 | for c in self._embedding_columns 51 | } 52 | self._kernel = FM() 53 | 54 | def call(self, inputs, training=None, mask=None): 55 | sparse_features = self._sparse_features_layer(inputs) 56 | embeddings = [] 57 | for column_name, column_input in inputs.items(): 58 | dense_features = self._embedding_features_layer.get(column_name) 59 | if dense_features is not None: 60 | embedding = dense_features({column_name: column_input}) 61 | embeddings.append(embedding) 62 | stack_embeddings = tf.stack(embeddings, axis=1) 63 | outputs = self._kernel(sparse_features, stack_embeddings) 64 | return tf.nn.sigmoid(outputs) 65 | 66 | def get_config(self): 67 | config = { 68 | "indicator_columns": self._indicator_columns, 69 | "embedding_columns": self._embedding_columns 70 | } 71 | base_config = super(FactorizationMachine, self).get_config() 72 | return {**base_config, **config} 73 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/retrieval/gcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import tensorflow as tf 4 | 5 | 6 | @tf.keras.utils.register_keras_serializable() 7 | class GCN(tf.keras.layers.Layer): 8 | 9 | def __init__(self, 10 | units: int, 11 | residual=False, 12 | use_bias=False, 13 | activation="relu", 14 | kernel_initializer="truncated_normal", 15 | kernel_regularizer=None, 16 | bias_initializer="zeros", 17 | bias_regularizer=None, 18 | **kwargs): 19 | super().__init__(**kwargs) 20 | 21 | self._units = units 22 | self._residual = residual 23 | self._use_bias = use_bias 24 | self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) 25 | self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) 26 | self._bias_initializer = tf.keras.initializers.get(bias_initializer) 27 | self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) 28 | self._kernel_activation = tf.keras.activations.get(activation) 29 | 30 | def build(self, input_shape): 31 | 32 | self._kernel = tf.keras.layers.Dense( 33 | self._units, 34 | activation=self._kernel_activation, 35 | kernel_initializer=self._kernel_initializer, 36 | kernel_regularizer=self._kernel_regularizer, 37 | bias_initializer=self._bias_initializer, 38 | bias_regularizer=self._bias_regularizer, 39 | use_bias=self._use_bias 40 | ) 41 | self.built = True 42 | 43 | def call(self, features, adj, **kwargs): 44 | 45 | if isinstance(adj, tf.SparseTensor): 46 | agg_embeddings = tf.sparse.sparse_dense_matmul(adj, features) 47 | else: 48 | agg_embeddings = tf.linalg.matmul(adj, features) 49 | 50 | outputs = self._kernel(agg_embeddings) 51 | 52 | if self._residual is True: 53 | outputs += features 54 | 55 | return outputs 56 | 57 | def get_config(self): 58 | config = { 59 | "units": self._units, 60 | "use_bias": self._use_bias, 61 | "activation": tf.keras.activations.serialize(self._kernel_activation), 62 | "kernel_initializer": tf.keras.initializers.serialize(self._kernel_initializer), 63 | "kernel_regularizer": tf.keras.regularizers.serialize(self._kernel_regularizer), 64 | "bias_initializer": tf.keras.initializers.serialize(self._bias_initializer), 65 | "bias_regularizer": tf.keras.regularizers.serialize(self._bias_regularizer), 66 | } 67 | base_config = super(GCN, self).get_config() 68 | return {**base_config, **config} 69 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | name: "CodeQL" 7 | 8 | on: 9 | push: 10 | branches: [master] 11 | pull_request: 12 | # The branches below must be a subset of the branches above 13 | branches: [master] 14 | schedule: 15 | - cron: '0 5 * * 6' 16 | 17 | jobs: 18 | analyze: 19 | name: Analyze 20 | runs-on: ubuntu-latest 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | # Override automatic language detection by changing the below list 26 | # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] 27 | language: ['python'] 28 | # Learn more... 29 | # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection 30 | 31 | steps: 32 | - name: Checkout repository 33 | uses: actions/checkout@v2 34 | with: 35 | # We must fetch at least the immediate parents so that if this is 36 | # a pull request then we can checkout the head. 37 | fetch-depth: 2 38 | 39 | # If this run was triggered by a pull request event, then checkout 40 | # the head of the pull request instead of the merge commit. 41 | - run: git checkout HEAD^2 42 | if: ${{ github.event_name == 'pull_request' }} 43 | 44 | # Initializes the CodeQL tools for scanning. 45 | - name: Initialize CodeQL 46 | uses: github/codeql-action/init@v1 47 | with: 48 | languages: ${{ matrix.language }} 49 | # If you wish to specify custom queries, you can do so here or in a config file. 50 | # By default, queries listed here will override any specified in a config file. 51 | # Prefix the list here with "+" to use these queries and those in the config file. 52 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 53 | 54 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 55 | # If this step fails, then you should remove it and run the build manually (see below) 56 | - name: Autobuild 57 | uses: github/codeql-action/autobuild@v1 58 | 59 | # ℹ️ Command-line programs to run using the OS shell. 60 | # 📚 https://git.io/JvXDl 61 | 62 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 63 | # and modify them (or add more) to build your code if your project 64 | # uses a compiled language 65 | 66 | #- run: | 67 | # make bootstrap 68 | # make release 69 | 70 | - name: Perform CodeQL Analysis 71 | uses: github/codeql-action/analyze@v1 72 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/multi_task_learning/mixture_of_experts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | if tf.__version__ >= "2.0.0": 8 | import tensorflow.compat.v1 as tf 9 | 10 | from deep_recommenders.estimator.models.feature_interaction import dnn 11 | 12 | 13 | class MMoE(object): 14 | 15 | def __init__(self, 16 | feature_columns, 17 | num_tasks, 18 | num_experts, 19 | expert_hidden_units, 20 | task_hidden_units, 21 | task_hidden_activation=tf.nn.relu, 22 | task_batch_normalization=False, 23 | task_dropout=None, 24 | expert_hidden_activation=tf.nn.relu, 25 | expert_batch_normalization=False, 26 | expert_dropout=None): 27 | 28 | self._columns = feature_columns 29 | 30 | self._num_tasks = num_tasks 31 | self._num_experts = num_experts 32 | self._expert_hidden_units = expert_hidden_units 33 | self._task_hidden_units = task_hidden_units 34 | 35 | self._task_hidden_activation = task_hidden_activation 36 | self._task_batch_norm = task_batch_normalization 37 | self._task_dropout = task_dropout 38 | 39 | self._expert_hidden_activation = expert_hidden_activation 40 | self._expert_batch_norm = expert_batch_normalization 41 | self._expert_dropout = expert_dropout 42 | 43 | def __call__(self, *args, **kwargs): 44 | return self.call(*args, **kwargs) 45 | 46 | def gating_network(self, inputs): 47 | """ 48 | Gating network: y = SoftMax(W * inputs) 49 | """ 50 | x = tf.layers.dense(inputs, 51 | units=self._num_experts, 52 | use_bias=False) 53 | 54 | return tf.nn.softmax(x) 55 | 56 | def call(self, features): 57 | 58 | inputs = tf.feature_column.input_layer(features, self._columns) 59 | 60 | with tf.variable_scope("mixture_of_experts"): 61 | experts_outputs = [] 62 | for _ in range(self._num_experts): 63 | x = dnn(inputs, 64 | self._expert_hidden_units, 65 | activation=self._expert_hidden_activation, 66 | batch_normalization=self._expert_batch_norm, 67 | dropout=self._expert_dropout) 68 | experts_outputs.append(x) 69 | moe_outputs = tf.stack(experts_outputs, axis=1) 70 | 71 | with tf.variable_scope("multi_gate"): 72 | mg_outputs = [] 73 | for _ in range(self._num_experts): 74 | gate = self.gating_network(inputs) 75 | gate = tf.expand_dims(gate, axis=1) 76 | output = tf.linalg.matmul(gate, moe_outputs) 77 | mg_outputs.append(tf.squeeze(output, axis=1)) 78 | 79 | outputs = [] 80 | for idx in range(self._num_tasks): 81 | with tf.variable_scope("task{}".format(idx)): 82 | x = dnn(mg_outputs[idx], 83 | self._task_hidden_units + [1], 84 | activation=self._task_hidden_activation, 85 | batch_normalization=self._task_batch_norm, 86 | dropout=self._task_dropout) 87 | 88 | outputs.append(x) 89 | 90 | return outputs 91 | -------------------------------------------------------------------------------- /tests/keras/test_din.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from absl.testing import parameterized 12 | from deep_recommenders.keras.models.ranking import din 13 | 14 | 15 | class TestDIN(tf.test.TestCase, parameterized.TestCase): 16 | 17 | def test_activation_unit_noiteract(self): 18 | 19 | x = np.random.normal(size=(3, 5)) 20 | y = np.random.normal(size=(3, 5)) 21 | 22 | activation_unit = din.ActivationUnit(10, kernel_init="ones") 23 | outputs = activation_unit(x, y) 24 | 25 | dense = tf.keras.layers.Dense(10, activation="relu", kernel_initializer="ones") 26 | expected_outputs = tf.math.reduce_sum( 27 | dense(np.concatenate([x, y], axis=1)), axis=1, keepdims=True) 28 | 29 | self.evaluate(tf.compat.v1.global_variables_initializer()) 30 | self.assertAllClose(outputs, expected_outputs) 31 | 32 | def test_activation_unit_iteract(self): 33 | 34 | x = np.random.normal(size=(3, 5)) 35 | y = np.random.normal(size=(3, 5)) 36 | 37 | interacter = tf.keras.layers.Subtract() 38 | 39 | activation_unit = din.ActivationUnit(10, 40 | interacter=interacter, kernel_init="ones") 41 | outputs = activation_unit(x, y) 42 | 43 | dense = tf.keras.layers.Dense(10, activation="relu", kernel_initializer="ones") 44 | expected_outputs = tf.math.reduce_sum( 45 | dense(np.concatenate([x, y, x - y], axis=1)), axis=1, keepdims=True) 46 | 47 | self.evaluate(tf.compat.v1.global_variables_initializer()) 48 | self.assertAllClose(outputs, expected_outputs) 49 | 50 | @parameterized.parameters(1e-7, 1e-8, 1e-9, 1e-10) 51 | def test_dice(self, epsilon): 52 | 53 | inputs = np.asarray([[-0.2, -0.1, 0.1, 0.2]]).astype(np.float32) 54 | 55 | outputs = din.Dice(epsilon=epsilon)(inputs) 56 | 57 | p = (inputs - inputs.mean()) / np.math.sqrt(inputs.std() + epsilon) 58 | p = 1 / (1 + np.exp(-p)) 59 | 60 | x = tf.where(inputs > 0, x=inputs, y=tf.zeros_like(inputs)) 61 | expected_outputs = tf.where(x > 0, x=p*x, y=(1-p)*x) 62 | 63 | self.evaluate(tf.compat.v1.global_variables_initializer()) 64 | self.assertAllClose(outputs, expected_outputs) 65 | 66 | def test_din(self): 67 | 68 | def build_model(): 69 | x = tf.keras.layers.Input(shape=(5,)) 70 | y = tf.keras.layers.Input(shape=(5,)) 71 | interacter = tf.keras.layers.Subtract() 72 | activation_unit = din.ActivationUnit(10, interacter=interacter) 73 | outputs = activation_unit(x, y) 74 | return tf.keras.Model([x, y], outputs) 75 | 76 | x_embeddings = np.random.normal(size=(10, 5)) 77 | y_embeddings = np.random.normal(size=(10, 5)) 78 | labels = np.random.normal(size=(10,)) 79 | 80 | model = build_model() 81 | model.compile(loss="mse") 82 | model.fit([x_embeddings, y_embeddings], labels, verbose=0) 83 | 84 | model_pred = model.predict([x_embeddings, y_embeddings]) 85 | 86 | with tempfile.TemporaryDirectory() as tmp: 87 | path = os.path.join(tmp, "din_model") 88 | model.save( 89 | path, 90 | options=tf.saved_model.SaveOptions(namespace_whitelist=["din"])) 91 | loaded_model = tf.keras.models.load_model(path) 92 | loaded_pred = loaded_model.predict([x_embeddings, y_embeddings]) 93 | 94 | self.assertAllEqual(model_pred, loaded_pred) 95 | 96 | 97 | if __name__ == "__main__": 98 | tf.test.main() 99 | -------------------------------------------------------------------------------- /examples/train_mmoe_on_synthetic_estimator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.datasets import SyntheticForMultiTask 7 | from deep_recommenders.estimator.models.multi_task_learning import MMoE 8 | 9 | 10 | EXAMPLE_DIM = 256 11 | 12 | 13 | def build_columns(): 14 | return [ 15 | tf.feature_column.numeric_column("C{}".format(i)) 16 | for i in range(EXAMPLE_DIM) 17 | ] 18 | 19 | 20 | def model_fn(features, labels, mode): 21 | columns = build_columns() 22 | outputs = MMoE(columns, 23 | num_tasks=2, 24 | num_experts=2, 25 | task_hidden_units=[32, 10], 26 | expert_hidden_units=[64, 32])(features) 27 | 28 | predictions = { 29 | "predictions0": outputs[0], 30 | "predictions1": outputs[1] 31 | } 32 | 33 | if mode == tf.estimator.ModeKeys.PREDICT: 34 | return tf.estimator.EstimatorSpec(mode, predictions=predictions) 35 | 36 | labels0 = tf.expand_dims(labels["labels0"], axis=1) 37 | labels1 = tf.expand_dims(labels["labels1"], axis=1) 38 | 39 | loss0 = tf.losses.mean_squared_error(labels=labels0, predictions=outputs[0]) 40 | loss1 = tf.losses.mean_squared_error(labels=labels1, predictions=outputs[1]) 41 | 42 | total_loss = loss0 + loss1 43 | 44 | tf.summary.scalar("task0_loss", loss0) 45 | tf.summary.scalar("task1_loss", loss1) 46 | tf.summary.scalar("total_loss", total_loss) 47 | 48 | metrics = { 49 | "task0_mse": tf.metrics.mean_squared_error(labels0, outputs[0]), 50 | "task1_mse": tf.metrics.mean_squared_error(labels1, outputs[1]) 51 | } 52 | 53 | if mode == tf.estimator.ModeKeys.EVAL: 54 | return tf.estimator.EstimatorSpec(mode, loss=total_loss, eval_metric_ops=metrics) 55 | 56 | optimizer = tf.train.AdamOptimizer(learning_rate=0.01) 57 | train_op = tf.group( 58 | optimizer.minimize(loss=loss0, global_step=tf.train.get_global_step()), 59 | optimizer.minimize(loss=loss1, global_step=tf.train.get_global_step()), 60 | ) 61 | 62 | return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op) 63 | 64 | 65 | def build_estimator(model_dir=None, inter_op=8, intra_op=8): 66 | 67 | config_proto = tf.ConfigProto(device_count={'GPU': 0}, 68 | inter_op_parallelism_threads=inter_op, 69 | intra_op_parallelism_threads=intra_op) 70 | 71 | run_config = tf.estimator.RunConfig().replace( 72 | tf_random_seed=42, 73 | keep_checkpoint_max=10, 74 | save_checkpoints_steps=1000, 75 | log_step_count_steps=100, 76 | session_config=config_proto) 77 | 78 | return tf.estimator.Estimator(model_fn=model_fn, 79 | model_dir=model_dir, 80 | config=run_config) 81 | 82 | 83 | def main(): 84 | tf.logging.set_verbosity(tf.logging.INFO) 85 | estimator = build_estimator() 86 | early_stop_hook = tf.estimator.experimental.stop_if_no_decrease_hook(estimator, "loss", 1000) 87 | 88 | synthetic = SyntheticForMultiTask(512 * 1000, example_dim=EXAMPLE_DIM) 89 | train_spec = tf.estimator.TrainSpec(lambda: synthetic.input_fn().take(800), 90 | max_steps=None, 91 | hooks=[early_stop_hook]) 92 | eval_spec = tf.estimator.EvalSpec(lambda: synthetic.input_fn().skip(800).take(200), 93 | steps=None, 94 | start_delay_secs=60, 95 | throttle_secs=60) 96 | tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) 97 | 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /examples/train_deepfm_on_movielens_estimator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | 7 | from deep_recommenders.datasets import MovielensRanking 8 | from deep_recommenders.estimator.models.ranking import DeepFM 9 | 10 | 11 | def build_columns(): 12 | movielens = MovielensRanking() 13 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 14 | "user_id", movielens.num_users) 15 | user_gender = tf.feature_column.categorical_column_with_vocabulary_list( 16 | "user_gender", movielens.gender_vocab) 17 | user_age = tf.feature_column.categorical_column_with_vocabulary_list( 18 | "user_age", movielens.age_vocab) 19 | user_occupation = tf.feature_column.categorical_column_with_vocabulary_list( 20 | "user_occupation", movielens.occupation_vocab) 21 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 22 | "movie_id", movielens.num_movies) 23 | movie_genres = tf.feature_column.categorical_column_with_vocabulary_list( 24 | "movie_genres", movielens.gender_vocab) 25 | 26 | base_columns = [user_id, user_gender, user_age, user_occupation, movie_id, movie_genres] 27 | indicator_columns = [ 28 | tf.feature_column.indicator_column(c) 29 | for c in base_columns 30 | ] 31 | embedding_columns = [ 32 | tf.feature_column.embedding_column(c, dimension=16) 33 | for c in base_columns 34 | ] 35 | return indicator_columns, embedding_columns 36 | 37 | 38 | def model_fn(features, labels, mode): 39 | indicator_columns, embedding_columns = build_columns() 40 | outputs = DeepFM(indicator_columns, embedding_columns, [64, 32])(features) 41 | 42 | predictions = {"predictions": outputs} 43 | 44 | if mode == tf.estimator.ModeKeys.PREDICT: 45 | return tf.estimator.EstimatorSpec(mode, predictions=predictions) 46 | 47 | loss = tf.losses.log_loss(labels, outputs) 48 | metrics = {"auc": tf.metrics.auc(labels, outputs)} 49 | if mode == tf.estimator.ModeKeys.EVAL: 50 | return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) 51 | 52 | optimizer = tf.train.AdamOptimizer(learning_rate=0.01) 53 | train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) 54 | 55 | return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) 56 | 57 | 58 | def build_estimator(model_dir=None, inter_op=8, intra_op=8): 59 | config_proto = tf.ConfigProto(device_count={'GPU': 0}, 60 | inter_op_parallelism_threads=inter_op, 61 | intra_op_parallelism_threads=intra_op) 62 | 63 | run_config = tf.estimator.RunConfig().replace( 64 | tf_random_seed=42, 65 | keep_checkpoint_max=10, 66 | save_checkpoints_steps=1000, 67 | log_step_count_steps=100, 68 | session_config=config_proto) 69 | 70 | return tf.estimator.Estimator(model_fn=model_fn, 71 | model_dir=model_dir, 72 | config=run_config) 73 | 74 | 75 | def main(): 76 | tf.logging.set_verbosity(tf.logging.INFO) 77 | estimator = build_estimator() 78 | early_stop_hook = tf.estimator.experimental.stop_if_no_decrease_hook(estimator, "loss", 1000) 79 | 80 | movielens = MovielensRanking() 81 | train_spec = tf.estimator.TrainSpec(lambda: movielens.training_input_fn, 82 | max_steps=None, 83 | hooks=[early_stop_hook]) 84 | eval_spec = tf.estimator.EvalSpec(lambda: movielens.testing_input_fn, 85 | steps=None, 86 | start_delay_secs=0, 87 | throttle_secs=0) 88 | tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /deep_recommenders/estimator/models/ranking/fnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.estimator.models.feature_interaction import dnn 7 | 8 | 9 | class FNN(object): 10 | 11 | def __init__(self, 12 | indicator_columns, 13 | embedding_columns, 14 | warmup_from_fm, 15 | dnn_units, 16 | dnn_activation=tf.nn.relu, 17 | dnn_batch_normalization=False, 18 | dnn_dropout=None, 19 | **dnn_kwargs): 20 | self._indicator_columns = indicator_columns 21 | self._embedding_columns = embedding_columns 22 | self._warmup_from_fm = warmup_from_fm 23 | self._dnn_hidden_units = dnn_units 24 | self._dnn_activation = dnn_activation 25 | self._dnn_batch_norm = dnn_batch_normalization 26 | self._dnn_dropout = dnn_dropout 27 | self._dnn_kwargs = dnn_kwargs 28 | 29 | def __call__(self, *args, **kwargs): 30 | return self.call(*args, **kwargs) 31 | 32 | def warm_up(self): 33 | with tf.Session(graph=tf.Graph()) as sess: 34 | tf.saved_model.load(sess, ["serve"], self._warmup_from_fm) 35 | linear_variables = tf.get_collection( 36 | tf.GraphKeys.MODEL_VARIABLES, "linear") 37 | linear_variables = { 38 | var.name.split("/")[2].replace("_indicator", "") 39 | if "bias" not in var.name else "bias": sess.run(var) 40 | for var in linear_variables 41 | } 42 | factorized_variables = tf.get_collection( 43 | tf.GraphKeys.MODEL_VARIABLES, "factorized") 44 | factorized_variables = { 45 | var.name.split("/")[2].replace("_embedding", ""): sess.run(var) 46 | for var in factorized_variables 47 | } 48 | return linear_variables, factorized_variables 49 | 50 | def call(self, features): 51 | linear_variables, factorized_variables = self.warm_up() 52 | 53 | weights = [] 54 | for indicator_column in self._indicator_columns: 55 | feature_name = indicator_column.categorical_column.key 56 | feature = {feature_name: features.get(feature_name)} 57 | sparse = tf.feature_column.input_layer(feature, indicator_column) 58 | weights_initializer = tf.constant_initializer(linear_variables.get(feature_name)) 59 | weight = tf.layers.dense(sparse, 60 | units=1, 61 | use_bias=False, 62 | kernel_initializer=weights_initializer) 63 | weights.append(weight) 64 | concat_weights = tf.concat(weights, axis=1) 65 | 66 | embeddings = [] 67 | for embedding_column in self._embedding_columns: 68 | feature_name = embedding_column.categorical_column.key 69 | feature = {feature_name: features.get(feature_name)} 70 | embedding_column = tf.feature_column.embedding_column( 71 | embedding_column.categorical_column, 72 | embedding_column.dimension, 73 | initializer=tf.constant_initializer(factorized_variables.get(feature_name)) 74 | ) 75 | embedding = tf.feature_column.input_layer(feature, embedding_column) 76 | embeddings.append(embedding) 77 | concat_embeddings = tf.concat(embeddings, axis=1) 78 | 79 | bias = tf.expand_dims(linear_variables.get("bias"), axis=0) 80 | bias = tf.tile(bias, [tf.shape(concat_weights)[0], 1]) 81 | 82 | dnn_inputs = tf.concat([bias, concat_weights, concat_embeddings], axis=1) 83 | 84 | outputs = dnn(dnn_inputs, 85 | self._dnn_hidden_units + [1], 86 | activation=self._dnn_activation, 87 | batch_normalization=self._dnn_batch_norm, 88 | dropout=self._dnn_dropout, 89 | **self._dnn_kwargs) 90 | return tf.nn.sigmoid(outputs) 91 | -------------------------------------------------------------------------------- /examples/train_fnn_on_movielens_estimator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.datasets import MovielensRanking 7 | from deep_recommenders.estimator.models.ranking import FNN 8 | 9 | 10 | def build_columns(): 11 | movielens = MovielensRanking() 12 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 13 | "user_id", movielens.num_users) 14 | user_gender = tf.feature_column.categorical_column_with_vocabulary_list( 15 | "user_gender", movielens.gender_vocab) 16 | user_age = tf.feature_column.categorical_column_with_vocabulary_list( 17 | "user_age", movielens.age_vocab) 18 | user_occupation = tf.feature_column.categorical_column_with_vocabulary_list( 19 | "user_occupation", movielens.occupation_vocab) 20 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 21 | "movie_id", movielens.num_movies) 22 | movie_genres = tf.feature_column.categorical_column_with_vocabulary_list( 23 | "movie_genres", movielens.gender_vocab) 24 | 25 | base_columns = [user_id, user_gender, user_age, user_occupation, movie_id, movie_genres] 26 | indicator_columns = [ 27 | tf.feature_column.indicator_column(c) 28 | for c in base_columns 29 | ] 30 | embedding_columns = [ 31 | tf.feature_column.embedding_column(c, dimension=16) 32 | for c in base_columns 33 | ] 34 | return indicator_columns, embedding_columns 35 | 36 | 37 | def model_fn(features, labels, mode, params): 38 | indicator_columns, embedding_columns = build_columns() 39 | fnn = FNN(indicator_columns, embedding_columns, params["warm_up_from_fm"], [64, 32]) 40 | 41 | outputs = fnn(features) 42 | predictions = {"predictions": outputs} 43 | 44 | if mode == tf.estimator.ModeKeys.PREDICT: 45 | return tf.estimator.EstimatorSpec(mode, predictions=predictions) 46 | 47 | loss = tf.losses.log_loss(labels, outputs) 48 | metrics = {"auc": tf.metrics.auc(labels, outputs)} 49 | if mode == tf.estimator.ModeKeys.EVAL: 50 | return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) 51 | 52 | optimizer = tf.train.AdamOptimizer(learning_rate=0.01) 53 | train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) 54 | 55 | return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) 56 | 57 | 58 | def build_estimator(params, model_dir=None, inter_op=8, intra_op=8): 59 | config_proto = tf.ConfigProto(device_count={'GPU': 0}, 60 | inter_op_parallelism_threads=inter_op, 61 | intra_op_parallelism_threads=intra_op) 62 | 63 | run_config = tf.estimator.RunConfig().replace( 64 | tf_random_seed=42, 65 | keep_checkpoint_max=10, 66 | save_checkpoints_steps=1000, 67 | log_step_count_steps=100, 68 | session_config=config_proto) 69 | 70 | return tf.estimator.Estimator(model_fn=model_fn, 71 | model_dir=model_dir, 72 | config=run_config, 73 | params=params) 74 | 75 | 76 | def main(): 77 | tf.logging.set_verbosity(tf.logging.INFO) 78 | # First: train FM model with movielens 79 | # eg. python train_fm_on_movielens_estimator.py 80 | # Second: warm up from FM model. 81 | estimator = build_estimator({"warm_up_from_fm": "FM"}) 82 | 83 | early_stop_hook = tf.estimator.experimental.stop_if_no_decrease_hook(estimator, "loss", 1000) 84 | movielens = MovielensRanking() 85 | train_spec = tf.estimator.TrainSpec(lambda: movielens.training_input_fn, 86 | max_steps=None, 87 | hooks=[early_stop_hook]) 88 | eval_spec = tf.estimator.EvalSpec(lambda: movielens.testing_input_fn, 89 | steps=None, 90 | start_delay_secs=0, 91 | throttle_secs=0) 92 | tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /tests/keras/test_gcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import scipy as sp 10 | import tensorflow as tf 11 | from absl.testing import parameterized 12 | 13 | from deep_recommenders.keras.models.retrieval import GCN 14 | 15 | 16 | class TestGCN(tf.test.TestCase, parameterized.TestCase): 17 | 18 | def test_gcn_adj_sparse_matrix(self): 19 | adj = np.asarray([ 20 | [0, 1, 0], 21 | [1, 0, 0], 22 | [0, 1, 1] 23 | ]).astype(np.float32) 24 | embeddings = np.asarray([ 25 | [0.1, 0.2, 0.3, 0.0], 26 | [0.4, 0.5, 0.6, 0.0], 27 | [0.7, 0.8, 0.9, 0.0] 28 | ]).astype(np.float32) 29 | 30 | W = np.ones(shape=(4, 2)) 31 | agg_embeddings = adj @ embeddings 32 | dense_outputs = agg_embeddings @ W 33 | expect_outputs = tf.nn.relu(dense_outputs) 34 | 35 | coo = sp.sparse.coo_matrix(adj) 36 | indices = np.mat([coo.row, coo.col]).transpose() 37 | sparse_adj = tf.SparseTensor(indices, coo.data, coo.shape) 38 | 39 | outputs = GCN(2, kernel_initializer="ones")(embeddings, sparse_adj) 40 | 41 | self.evaluate(tf.compat.v1.global_variables_initializer()) 42 | self.assertAllClose(outputs, expect_outputs) 43 | 44 | def test_gcn_adj_full_matrix(self): 45 | adj = np.asarray([ 46 | [0, 1, 0], 47 | [1, 0, 0], 48 | [0, 1, 1] 49 | ]).astype(np.float32) 50 | embeddings = np.asarray([ 51 | [0.1, 0.2, 0.3, 0.0], 52 | [0.4, 0.5, 0.6, 0.0], 53 | [0.7, 0.8, 0.9, 0.0] 54 | ]).astype(np.float32) 55 | 56 | W = np.ones(shape=(4, 2)) 57 | agg_embeddings = adj @ embeddings 58 | dense_outputs = agg_embeddings @ W 59 | expect_outputs = tf.nn.relu(dense_outputs) 60 | 61 | outputs = GCN(2, kernel_initializer="ones")(embeddings, adj) 62 | 63 | self.evaluate(tf.compat.v1.global_variables_initializer()) 64 | self.assertAllClose(outputs, expect_outputs) 65 | 66 | @parameterized.parameters( 67 | (8, 4), 68 | (16, 8), 69 | (32, 16), 70 | ) 71 | def test_gcn_train(self, num_nodes, embeddings_dim): 72 | 73 | def get_model(): 74 | adj = tf.keras.layers.Input(shape=(num_nodes,), sparse=True) 75 | embeddings = tf.keras.layers.Input(shape=(embeddings_dim,)) 76 | 77 | x = GCN(16)(embeddings, adj) 78 | x = GCN(16)(x, adj) 79 | outputs = GCN(2, activation="softmax")(x, adj) 80 | return tf.keras.Model([adj, embeddings], outputs) 81 | 82 | np.random.seed(42) 83 | 84 | adj = sp.sparse.random(num_nodes, num_nodes).tocsr() 85 | adj.sort_indices() 86 | 87 | embeddings = np.random.normal(size=(num_nodes, embeddings_dim)).astype(np.float32) 88 | 89 | targets = np.random.randint(2, size=num_nodes).astype(np.float32) 90 | targets = np.stack([targets, 1 - targets], axis=1) 91 | 92 | model = get_model() 93 | model.compile(optimizer=tf.keras.optimizers.Adam(0.01), loss="categorical_crossentropy") 94 | model.fit(x=[adj, embeddings], y=targets, batch_size=num_nodes, verbose=0, shuffle=False) 95 | 96 | model_pred = model.predict([adj, embeddings]) 97 | 98 | with tempfile.TemporaryDirectory() as tmp: 99 | path = os.path.join(tmp, "gcn") 100 | model.save( 101 | path, 102 | options=tf.saved_model.SaveOptions(namespace_whitelist=["GCN"])) 103 | loaded_model = tf.keras.models.load_model(path) 104 | loaded_pred = loaded_model.predict([adj, embeddings], batch_size=num_nodes) 105 | for model_layer, loaded_layer in zip(model.layers, loaded_model.layers): 106 | assert model_layer.get_config() == loaded_layer.get_config() 107 | self.assertAllEqual(model_pred, loaded_pred) 108 | 109 | 110 | if __name__ == "__main__": 111 | tf.test.main() 112 | -------------------------------------------------------------------------------- /tests/keras/test_xdeepfm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from deep_recommenders.keras.models.ranking.xdeepfm import CIN 12 | 13 | 14 | class TestXDeepFM(tf.test.TestCase): 15 | 16 | def test_invalid_inputs_type(self): 17 | """ 测试输入类型 """ 18 | with self.assertRaisesRegexp(ValueError, 19 | r"`CIN` layer's inputs type should be `tuple`."): 20 | inputs = np.random.normal(size=(2, 3, 5)).astype(np.float32) 21 | CIN(feature_map=3)(inputs) 22 | 23 | def test_invalid_inputs_ndim(self): 24 | """ 测试输入维度 """ 25 | with self.assertRaisesRegexp(ValueError, 26 | r"`x0` and `x` dim should be 3."): 27 | inputs = np.random.normal(size=(2, 15)).astype(np.float32) 28 | CIN(feature_map=3)((inputs, inputs)) 29 | 30 | def test_outputs(self): 31 | """ 测试输出是否正确 """ 32 | x0 = np.asarray([[[0.1, 0.2, 0.3],[0.4, 0.5, 0.6]]]).astype(np.float32) 33 | x = np.asarray([[[0.1, 0.2, 0.3],[0.4, 0.5, 0.6]]]).astype(np.float32) 34 | outputs = CIN( 35 | feature_map=2, 36 | activation="relu", 37 | kernel_init="ones")((x0, x)) 38 | expect_outputs = np.asarray([ 39 | [[0.25, 0.49, 0.81], 40 | [0.25, 0.49, 0.81]] 41 | ]).astype(np.float32) 42 | self.evaluate(tf.compat.v1.global_variables_initializer()) 43 | self.assertAllClose(outputs, expect_outputs) 44 | 45 | def test_bias(self): 46 | """ 测试bias """ 47 | x0 = np.asarray([[[0.1, 0.2, 0.3],[0.4, 0.5, 0.6]]]).astype(np.float32) 48 | x = np.asarray([[[0.1, 0.2, 0.3],[0.4, 0.5, 0.6]]]).astype(np.float32) 49 | outputs = CIN( 50 | feature_map=2, 51 | use_bias=True, 52 | activation="relu", 53 | kernel_init="ones", 54 | bias_init="ones")((x0, x)) 55 | expect_outputs = np.asarray([ 56 | [[1.25, 1.49, 1.81], 57 | [1.25, 1.49, 1.81]] 58 | ]).astype(np.float32) 59 | self.evaluate(tf.compat.v1.global_variables_initializer()) 60 | self.assertAllClose(outputs, expect_outputs) 61 | 62 | def test_train_model(self): 63 | """ 测试模型训练 """ 64 | 65 | def get_model(): 66 | x0 = tf.keras.layers.Input(shape=(12, 10)) 67 | x = CIN(feature_map=3)((x0, x0)) 68 | x = CIN(feature_map=3)((x0, x)) 69 | x = tf.keras.layers.Flatten()(x) 70 | outputs = tf.keras.layers.Dense(1)(x) 71 | model = tf.keras.Model(x0, outputs) 72 | return model 73 | 74 | x0 = np.random.uniform(size=(10, 12, 10)) 75 | y = np.random.uniform(size=(10,)) 76 | 77 | model = get_model() 78 | model.compile(loss="mse") 79 | model.fit(x0, y, verbose=0) 80 | 81 | def test_save_model(self): 82 | """ 测试模型保存 """ 83 | 84 | def get_model(): 85 | x0 = tf.keras.layers.Input(shape=(12, 10)) 86 | x = CIN(feature_map=3)((x0, x0)) 87 | x = CIN(feature_map=3)((x0, x)) 88 | x = tf.keras.layers.Flatten()(x) 89 | logits = tf.keras.layers.Dense(1)(x) 90 | model = tf.keras.Model(x0, logits) 91 | return model 92 | 93 | x0 = np.random.uniform(size=(10, 12, 10)) 94 | 95 | model = get_model() 96 | model_pred = model.predict(x0) 97 | 98 | with tempfile.TemporaryDirectory() as tmp: 99 | path = os.path.join(tmp, "xDeepFM") 100 | model.save( 101 | path, 102 | options=tf.saved_model.SaveOptions(namespace_whitelist=["xDeepFm"])) 103 | loaded_model = tf.keras.models.load_model(path) 104 | loaded_pred = loaded_model.predict(x0) 105 | for model_layer, loaded_layer in zip(model.layers, loaded_model.layers): 106 | assert model_layer.get_config() == loaded_layer.get_config() 107 | self.assertAllEqual(model_pred, loaded_pred) 108 | 109 | 110 | if __name__ == "__main__": 111 | tf.test.main() 112 | -------------------------------------------------------------------------------- /examples/train_fm_on_movielens_estimator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.datasets import MovielensRanking 7 | from deep_recommenders.estimator.models.feature_interaction import FM 8 | 9 | 10 | def build_columns(): 11 | movielens = MovielensRanking() 12 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 13 | "user_id", movielens.num_users) 14 | user_gender = tf.feature_column.categorical_column_with_vocabulary_list( 15 | "user_gender", movielens.gender_vocab) 16 | user_age = tf.feature_column.categorical_column_with_vocabulary_list( 17 | "user_age", movielens.age_vocab) 18 | user_occupation = tf.feature_column.categorical_column_with_vocabulary_list( 19 | "user_occupation", movielens.occupation_vocab) 20 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 21 | "movie_id", movielens.num_movies) 22 | movie_genres = tf.feature_column.categorical_column_with_vocabulary_list( 23 | "movie_genres", movielens.gender_vocab) 24 | 25 | base_columns = [user_id, user_gender, user_age, user_occupation, movie_id, movie_genres] 26 | indicator_columns = [ 27 | tf.feature_column.indicator_column(c) 28 | for c in base_columns 29 | ] 30 | embedding_columns = [ 31 | tf.feature_column.embedding_column(c, dimension=16) 32 | for c in base_columns 33 | ] 34 | return indicator_columns, embedding_columns 35 | 36 | 37 | def model_fn(features, labels, mode): 38 | indicator_columns, embedding_columns = build_columns() 39 | outputs = FM(indicator_columns, embedding_columns)(features) 40 | 41 | predictions = {"predictions": outputs} 42 | 43 | if mode == tf.estimator.ModeKeys.PREDICT: 44 | return tf.estimator.EstimatorSpec(mode, predictions=predictions) 45 | 46 | loss = tf.losses.sigmoid_cross_entropy(labels, outputs) 47 | metrics = {"auc": tf.metrics.auc(labels, tf.nn.sigmoid(outputs))} 48 | if mode == tf.estimator.ModeKeys.EVAL: 49 | return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) 50 | 51 | optimizer = tf.train.AdamOptimizer(learning_rate=0.01) 52 | train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) 53 | 54 | return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) 55 | 56 | 57 | def build_estimator(model_dir=None, inter_op=8, intra_op=8): 58 | config_proto = tf.ConfigProto(device_count={'GPU': 0}, 59 | inter_op_parallelism_threads=inter_op, 60 | intra_op_parallelism_threads=intra_op) 61 | 62 | run_config = tf.estimator.RunConfig().replace( 63 | tf_random_seed=42, 64 | keep_checkpoint_max=10, 65 | save_checkpoints_steps=1000, 66 | log_step_count_steps=100, 67 | session_config=config_proto) 68 | 69 | return tf.estimator.Estimator(model_fn=model_fn, 70 | model_dir=model_dir, 71 | config=run_config) 72 | 73 | 74 | def export_saved_model(estimator, export_path): 75 | indicator_columns, embedding_columns = build_columns() 76 | columns = indicator_columns + embedding_columns 77 | 78 | feature_spec = tf.feature_column.make_parse_example_spec(columns) 79 | example_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec) 80 | estimator.export_saved_model(export_path, example_input_fn) 81 | 82 | 83 | def main(): 84 | tf.logging.set_verbosity(tf.logging.INFO) 85 | estimator = build_estimator() 86 | early_stop_hook = tf.estimator.experimental.stop_if_no_decrease_hook(estimator, "loss", 1000) 87 | 88 | movielens = MovielensRanking() 89 | train_spec = tf.estimator.TrainSpec(lambda: movielens.training_input_fn, 90 | max_steps=None, 91 | hooks=[early_stop_hook]) 92 | eval_spec = tf.estimator.EvalSpec(lambda: movielens.testing_input_fn, 93 | steps=None, 94 | start_delay_secs=0, 95 | throttle_secs=0) 96 | tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) 97 | 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/ranking/dcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | from typing import Optional, Union, Text 4 | 5 | import tensorflow as tf 6 | 7 | 8 | @tf.keras.utils.register_keras_serializable() 9 | class Cross(tf.keras.layers.Layer): 10 | """ Cross net in Deep & Cross Network (DCN) """ 11 | 12 | def __init__(self, 13 | projection_dim: Optional[int] = None, 14 | diag_scale: Optional[float] = 0.0, 15 | use_bias: bool = True, 16 | kernel_init: Union[Text, tf.keras.initializers.Initializer] = "truncated_normal", 17 | kernel_regu: Union[Text, None, tf.keras.regularizers.Regularizer] = None, 18 | bias_init: Union[Text, tf.keras.initializers.Initializer] = "zeros", 19 | bias_regu: Union[Text, None, tf.keras.regularizers.Regularizer] = None, 20 | **kwargs): 21 | 22 | super(Cross, self).__init__(**kwargs) 23 | 24 | self._projection_dim = projection_dim 25 | self._diag_scale = diag_scale 26 | self._use_bias = use_bias 27 | self._kernel_init = tf.keras.initializers.get(kernel_init) 28 | self._kernel_regu = tf.keras.regularizers.get(kernel_regu) 29 | self._bias_init = tf.keras.initializers.get(bias_init) 30 | self._bias_regu = tf.keras.regularizers.get(bias_regu) 31 | 32 | assert self._diag_scale >= 0, \ 33 | ValueError("diag scale must be non-negative, got {}".format(self._diag_scale)) 34 | 35 | def build(self, input_shape): 36 | last_dim = input_shape[-1] 37 | 38 | if self._projection_dim is None: 39 | self._dense = tf.keras.layers.Dense( 40 | last_dim, 41 | kernel_initializer=self._kernel_init, 42 | kernel_regularizer=self._kernel_regu, 43 | bias_initializer=self._bias_init, 44 | bias_regularizer=self._bias_regu, 45 | use_bias=self._use_bias 46 | ) 47 | else: 48 | if self._projection_dim < 0 or self._projection_dim > last_dim / 2: 49 | raise ValueError( 50 | "`projection_dim` should be smaller than last_dim / 2 to improve " 51 | "the model efficiency, and should be positive. Got " 52 | "`projection_dim` {}, and last dimension of input {}".format( 53 | self._projection_dim, last_dim)) 54 | self._dense_u = tf.keras.layers.Dense( 55 | self._projection_dim, 56 | kernel_initializer=self._kernel_init, 57 | kernel_regularizer=self._kernel_regu, 58 | use_bias=False, 59 | ) 60 | self._dense_v = tf.keras.layers.Dense( 61 | last_dim, 62 | kernel_initializer=self._kernel_init, 63 | bias_initializer=self._bias_init, 64 | kernel_regularizer=self._kernel_regu, 65 | bias_regularizer=self._bias_regu, 66 | use_bias=self._use_bias, 67 | ) 68 | super(Cross, self).build(input_shape) 69 | 70 | def call(self, x0, x=None, **kwargs): 71 | 72 | if x is None: 73 | x = x0 74 | 75 | if x0.shape[-1] != x.shape[-1]: 76 | raise ValueError("`x0` and `x` dim mismatch. " 77 | "Got `x0` dim = {} and `x` dim = {}".format( 78 | x0.shape[-1], x.shape[-1])) 79 | 80 | if self._projection_dim is None: 81 | prod_output = self._dense(x) 82 | else: 83 | prod_output = self._dense_v(self._dense_u(x)) 84 | 85 | if self._diag_scale: 86 | prod_output = prod_output + self._diag_scale * x 87 | 88 | return x0 * prod_output + x 89 | 90 | def get_config(self): 91 | config = { 92 | "projection_dim": 93 | self._projection_dim, 94 | "diag_scale": 95 | self._diag_scale, 96 | "use_bias": 97 | self._use_bias, 98 | "kernel_init": 99 | tf.keras.initializers.serialize(self._kernel_init), 100 | "kernel_regu": 101 | tf.keras.regularizers.serialize(self._kernel_regu), 102 | "bias_init": 103 | tf.keras.initializers.serialize(self._bias_init), 104 | "bias_regu": 105 | tf.keras.regularizers.serialize(self._bias_regu), 106 | } 107 | base_config = super(Cross, self).get_config() 108 | return {**base_config, **config} 109 | 110 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/ranking/xdeepfm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | from typing import Optional, Union, Text, Tuple 4 | 5 | import tensorflow as tf 6 | 7 | 8 | @tf.keras.utils.register_keras_serializable() 9 | class CIN(tf.keras.layers.Layer): 10 | """ Compressed Interaction Network in xDeepFM """ 11 | 12 | def __init__(self, 13 | feature_map: Optional[int] = 3, 14 | use_bias: bool = False, 15 | activation: Union[Text, None, tf.keras.layers.Layer] = "sigmoid", 16 | kernel_init: Union[Text, tf.keras.initializers.Initializer] = "truncated_normal", 17 | kernel_regu: Union[Text, None, tf.keras.regularizers.Regularizer] = None, 18 | bias_init: Union[Text, tf.keras.initializers.Initializer] = "zeros", 19 | bias_regu: Union[Text, None, tf.keras.regularizers.Regularizer] = None, 20 | **kwargs): 21 | 22 | super(CIN, self).__init__(**kwargs) 23 | 24 | self._feature_map = feature_map 25 | self._use_bias = use_bias 26 | 27 | if isinstance(activation, tf.keras.layers.Layer): 28 | self._activation = activation 29 | elif isinstance(activation, str): 30 | self._activation = tf.keras.activations.get(activation) 31 | else: 32 | self._activation = None 33 | 34 | self._kernel_init = tf.keras.initializers.get(kernel_init) 35 | self._kernel_regu = tf.keras.regularizers.get(kernel_regu) 36 | self._bias_init = tf.keras.initializers.get(bias_init) 37 | self._bias_regu = tf.keras.regularizers.get(bias_regu) 38 | 39 | def build(self, input_shape): 40 | 41 | if not isinstance(input_shape, tuple): 42 | raise ValueError("`CIN` layer's inputs type should be `tuple`." 43 | "Got `CIN` layer's inputs type = `{}`".format( 44 | type(input_shape))) 45 | 46 | if len(input_shape) != 2: 47 | raise ValueError("`CIN` Layer inputs tuple length should be 2." 48 | "Got `length` = {}".format(len(input_shape))) 49 | 50 | x0_shape, x_shape = input_shape 51 | self._x0_fields = x0_shape[1] 52 | self._x_fields = x_shape[1] 53 | 54 | self._kernel = self.add_weight( 55 | shape=(1, self._x0_fields * self._x_fields, self._feature_map), 56 | initializer=self._kernel_init, 57 | regularizer=self._kernel_regu, 58 | trainable=True, 59 | name="kernel" 60 | ) 61 | if self._use_bias is True: 62 | self._bias = self.add_weight( 63 | shape=(self._feature_map,), 64 | initializer=self._bias_init, 65 | regularizer=self._bias_regu, 66 | trainable=True, 67 | name="bias" 68 | ) 69 | self.built = True 70 | 71 | def call(self, inputs: Tuple[tf.Tensor, tf.Tensor], **kwargs): 72 | 73 | x0, x = inputs 74 | 75 | if tf.keras.backend.ndim(x0) != 3 or \ 76 | tf.keras.backend.ndim(x) != 3: 77 | raise ValueError("`x0` and `x` dim should be 3." 78 | "Got `x0` dim = {}, `x` dim = {}".format( 79 | tf.keras.backend.ndim(x0), 80 | tf.keras.backend.ndim(x))) 81 | 82 | field_dim = x0.shape[-1] 83 | x0 = tf.split(x0, field_dim, axis=-1) 84 | x = tf.split(x, field_dim, axis=-1) 85 | 86 | outer = tf.matmul(x0, x, transpose_b=True) 87 | outer = tf.reshape(outer, shape=[field_dim, -1, self._x0_fields * self._x_fields]) 88 | outer = tf.transpose(outer, perm=[1, 0, 2]) 89 | 90 | conv_out = tf.nn.conv1d(outer, self._kernel, stride=1, padding="VALID") 91 | 92 | if self._use_bias is True: 93 | conv_out = tf.nn.bias_add(conv_out, self._bias) 94 | 95 | outputs = self._activation(conv_out) 96 | return tf.transpose(outputs, perm=[0, 2, 1]) 97 | 98 | def get_config(self): 99 | config = { 100 | "feature_map": 101 | self._feature_map, 102 | "use_bias": 103 | self._use_bias, 104 | "activation": 105 | tf.keras.activations.serialize(self._activation), 106 | "kernel_init": 107 | tf.keras.initializers.serialize(self._kernel_init), 108 | "kernel_regu": 109 | tf.keras.regularizers.serialize(self._kernel_regu), 110 | "bias_init": 111 | tf.keras.initializers.serialize(self._bias_init), 112 | "bias_regu": 113 | tf.keras.regularizers.serialize(self._bias_regu), 114 | } 115 | base_config = super(CIN, self).get_config() 116 | return {**base_config, **config} 117 | -------------------------------------------------------------------------------- /deep_recommenders/datasets/cora.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import numpy as np 5 | import scipy.sparse as sp 6 | 7 | 8 | class Cora(object): 9 | 10 | def __init__(self, extract_path="."): 11 | self._download_url = "https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz" 12 | self._extract_path = extract_path 13 | self._cora_path = os.path.join(extract_path, "cora") 14 | self._cora_cites = os.path.join(self._cora_path, "cora.cites") 15 | self._cora_content = os.path.join(self._cora_path, "cora.content") 16 | 17 | if not os.path.exists(self._cora_cites) or \ 18 | not os.path.exists(self._cora_content): 19 | self._download() 20 | 21 | self._cora_classes = [ 22 | "Case_Based", 23 | "Genetic_Algorithms", 24 | "Neural_Networks", 25 | "Probabilistic_Methods", 26 | "Reinforcement_Learning", 27 | "Rule_Learning", 28 | "Theory" 29 | ] 30 | 31 | @property 32 | def num_classes(self): 33 | return len(self._cora_classes) 34 | 35 | def _download(self, filename="cora.tgz"): 36 | import requests 37 | import tarfile 38 | r = requests.get(self._download_url) 39 | with open(filename, "wb") as f: 40 | f.write(r.content) 41 | tarobj = tarfile.open(filename, "r:gz") 42 | for tarinfo in tarobj: 43 | tarobj.extract(tarinfo.name, self._extract_path) 44 | tarobj.close() 45 | 46 | def load_content(self, normalize=True): 47 | content = np.genfromtxt(self._cora_content, dtype=np.str) 48 | ids, features, labels = content[:, 0], content[:, 1:-1], content[:, -1] 49 | features = sp.csr_matrix(features, dtype=np.float32) 50 | if normalize is True: 51 | features /= features.sum(axis=1).reshape(-1, 1) 52 | return ids, features, labels 53 | 54 | def build_graph(self, nodes): 55 | idx_map = {int(j): i for i, j in enumerate(nodes)} 56 | edges_unordered = np.genfromtxt(self._cora_cites, dtype=np.int32) 57 | edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), 58 | dtype=np.int32).reshape(edges_unordered.shape) 59 | graph = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), 60 | shape=(nodes.shape[0], nodes.shape[0]), dtype=np.float32) 61 | graph += graph.T - sp.diags(graph.diagonal()) # Convert symmetric matrix 62 | return graph 63 | 64 | @staticmethod 65 | def spectral_graph(graph): 66 | graph = graph + sp.eye(graph.shape[0]) # graph G with added self-connections 67 | # D^{-1/2} * A * D^{-1/2} 68 | d = sp.diags(np.power(np.array(graph.sum(1)), -0.5).flatten(), 0) 69 | spectral_graph = graph.dot(d).transpose().dot(d).tocsr() 70 | return spectral_graph 71 | 72 | def sample_train_nodes(self, labels, num_per_class=20): 73 | train_nodes = [] 74 | for cls in self._cora_classes: 75 | cls_index = np.where(labels == cls)[0] 76 | cls_sample = np.random.choice(cls_index, num_per_class, replace=False) 77 | train_nodes += cls_sample.tolist() 78 | return train_nodes 79 | 80 | def encode_labels(self, labels): 81 | labels_map = {} 82 | num_classes = len(self._cora_classes) 83 | for i, cls in enumerate(self._cora_classes): 84 | cls_label = np.zeros(shape=(num_classes,)) 85 | cls_label[i] = 1. 86 | labels_map[cls] = cls_label 87 | encoded_labels = list(map(labels_map.get, labels)) 88 | return np.array(encoded_labels, dtype=np.int32) 89 | 90 | def split_labels(self, labels, num_valid_nodes=500): 91 | num_nodes = labels.shape[0] 92 | all_index = np.arange(num_nodes) 93 | train_index = self.sample_train_nodes(labels) 94 | valid_index = list(set(all_index) - set(train_index)) 95 | valid_index, test_index = valid_index[:num_valid_nodes], valid_index[num_valid_nodes:] 96 | 97 | encoded_labels = self.encode_labels(labels) 98 | 99 | def _sample_mask(index_ls): 100 | mask = np.zeros(num_nodes) 101 | mask[index_ls] = 1 102 | return np.array(mask, dtype=np.bool) 103 | 104 | def _get_labels(index_ls): 105 | _labels = np.zeros(encoded_labels.shape, dtype=np.int32) 106 | _labels[index_ls] = encoded_labels[index_ls] 107 | _mask = _sample_mask(index_ls) 108 | return _labels, _mask 109 | 110 | train_labels, train_mask = _get_labels(train_index) 111 | valid_labels, valid_mask = _get_labels(valid_index) 112 | test_labels, test_mask = _get_labels(test_index) 113 | 114 | return (train_labels, train_mask), \ 115 | (valid_labels, valid_mask), \ 116 | (test_labels, test_mask) 117 | -------------------------------------------------------------------------------- /tests/keras/test_fm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import tempfile 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from deep_recommenders.keras.models.ranking import FM 12 | from deep_recommenders.keras.models.ranking import FactorizationMachine 13 | 14 | 15 | class TestFM(tf.test.TestCase): 16 | 17 | def test_fm_layer(self): 18 | sparse_inputs = np.random.randint(0, 2, size=(10, 10)).astype(np.float32) 19 | embedding_inputs = np.random.normal(size=(10, 5, 5)).astype(np.float32) 20 | 21 | x_sum = np.sum(embedding_inputs, axis=1) 22 | x_square_sum = np.sum(np.power(embedding_inputs, 2), axis=1) 23 | expected_outputs = 0.5 * np.sum(np.power(x_sum, 2) - x_square_sum, axis=1, keepdims=True) 24 | 25 | outputs = FM()(sparse_inputs, embedding_inputs) 26 | self.assertAllClose(outputs, expected_outputs) 27 | 28 | def test_fm_layer_train(self): 29 | 30 | def get_model(): 31 | sparse_inputs = tf.keras.layers.Input(shape=(10,)) 32 | embedding_inputs = tf.keras.layers.Input(shape=(5, 5,)) 33 | x = FM()(sparse_inputs, embedding_inputs) 34 | logits = tf.keras.layers.Dense(1)(x) 35 | return tf.keras.Model([sparse_inputs, embedding_inputs], logits) 36 | 37 | model = get_model() 38 | random_sparse_inputs = np.random.randint(0, 2, size=(10, 10)) 39 | random_embedding_inputs = np.random.uniform(size=(10, 5, 5)) 40 | random_outputs = np.random.uniform(size=(10,)) 41 | model.compile(loss="mse") 42 | model.fit([random_sparse_inputs, random_embedding_inputs], random_outputs, verbose=0) 43 | 44 | def test_fm_layer_save(self): 45 | 46 | def get_model(): 47 | sparse_inputs = tf.keras.layers.Input(shape=(10,)) 48 | embedding_inputs = tf.keras.layers.Input(shape=(5, 5,)) 49 | x = FM()(sparse_inputs, embedding_inputs) 50 | logits = tf.keras.layers.Dense(1)(x) 51 | return tf.keras.Model([sparse_inputs, embedding_inputs], logits) 52 | 53 | model = get_model() 54 | random_sparse_inputs = np.random.randint(0, 2, size=(10, 10)) 55 | random_embedding_inputs = np.random.uniform(size=(10, 5, 5)) 56 | model_pred = model.predict([random_sparse_inputs, random_embedding_inputs]) 57 | 58 | with tempfile.TemporaryDirectory() as tmp: 59 | path = os.path.join(tmp, "fm") 60 | model.save(path, options=tf.saved_model.SaveOptions(namespace_whitelist=["FM"])) 61 | loaded_model = tf.keras.models.load_model(path) 62 | loaded_pred = loaded_model.predict([random_sparse_inputs, random_embedding_inputs]) 63 | for model_layer, loaded_layer in zip(model.layers, loaded_model.layers): 64 | assert model_layer.get_config() == loaded_layer.get_config() 65 | self.assertAllEqual(model_pred, loaded_pred) 66 | 67 | def test_model(self): 68 | 69 | def build_columns(): 70 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 71 | "user_id", 100) 72 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 73 | "movie_id", 100) 74 | base_columns = [user_id, movie_id] 75 | _indicator_columns = [ 76 | tf.feature_column.indicator_column(c) 77 | for c in base_columns 78 | ] 79 | _embedding_columns = [ 80 | tf.feature_column.embedding_column(c, dimension=16) 81 | for c in base_columns 82 | ] 83 | return _indicator_columns, _embedding_columns 84 | 85 | indicator_columns, embedding_columns = build_columns() 86 | model = FactorizationMachine(indicator_columns, embedding_columns) 87 | model.compile(loss=tf.keras.losses.binary_crossentropy, 88 | optimizer=tf.keras.optimizers.Adam()) 89 | dataset = tf.data.Dataset.from_tensor_slices(({ 90 | "user_id": [["1"]] * 1000, 91 | "movie_id": [["2"]] * 1000 92 | }, np.random.randint(0, 1, size=(1000, 1)))) 93 | model.fit(dataset, 94 | steps_per_epoch=100, 95 | verbose=-1) 96 | test_data = {"user_id": np.asarray([["1"], ["2"]]), 97 | "movie_id": np.asarray([["1"], ["2"]])} 98 | model_pred = model.predict(test_data) 99 | 100 | with tempfile.TemporaryDirectory() as tmp: 101 | path = os.path.join(tmp, "FM") 102 | model.save(path) 103 | loaded_model = tf.keras.models.load_model(path) 104 | loaded_pred = loaded_model.predict(test_data) 105 | for model_layer, loaded_layer in zip(model.layers, loaded_model.layers): 106 | assert model_layer.get_config() == loaded_layer.get_config() 107 | self.assertAllEqual(model_pred, loaded_pred) 108 | 109 | 110 | if __name__ == "__main__": 111 | tf.test.main() 112 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/ranking/din.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import tensorflow as tf 4 | 5 | 6 | @tf.keras.utils.register_keras_serializable() 7 | class ActivationUnit(tf.keras.layers.Layer): 8 | 9 | def __init__(self, 10 | units, 11 | interacter=None, 12 | use_bias=True, 13 | activation="relu", 14 | kernel_init="truncated_normal", 15 | kernel_regu=None, 16 | bias_init="zeros", 17 | bias_regu=None, 18 | **kwargs): 19 | super(ActivationUnit, self).__init__(**kwargs) 20 | 21 | self._kernel_units = units 22 | self._interacter = interacter 23 | self._use_bias = use_bias 24 | 25 | if isinstance(activation, tf.keras.layers.Layer): 26 | self._kernel_activation = activation 27 | elif isinstance(activation, str): 28 | self._kernel_activation = tf.keras.activations.get(activation) 29 | else: 30 | self._kernel_activation = None 31 | 32 | self._kernel_init = tf.keras.initializers.get(kernel_init) 33 | self._kernel_regu = tf.keras.regularizers.get(kernel_regu) 34 | self._bias_init = tf.keras.initializers.get(bias_init) 35 | self._bias_regu = tf.keras.regularizers.get(bias_regu) 36 | 37 | def build(self, input_shape): 38 | 39 | self.dense_kernel = tf.keras.layers.Dense( 40 | self._kernel_units, 41 | activation=self._kernel_activation, 42 | use_bias=self._use_bias, 43 | kernel_initializer=self._kernel_init, 44 | kernel_regularizer=self._kernel_regu, 45 | bias_initializer=self._bias_init, 46 | bias_regularizer=self._bias_regu 47 | ) 48 | self.dense_output = tf.keras.layers.Dense( 49 | 1, 50 | activation=None, 51 | use_bias=self._use_bias, 52 | kernel_initializer=self._kernel_init, 53 | kernel_regularizer=self._kernel_regu, 54 | bias_initializer=self._bias_init, 55 | bias_regularizer=self._bias_regu 56 | ) 57 | self.built = True 58 | 59 | def call(self, x_embeddings, y_embeddings=None, **kwargs): 60 | 61 | if y_embeddings is None: 62 | y_embeddings = x_embeddings 63 | 64 | x = tf.concat([x_embeddings, y_embeddings], axis=1) 65 | 66 | if self._interacter is not None: 67 | x = tf.concat([ 68 | x, self._interacter([x_embeddings, y_embeddings])], axis=1) 69 | 70 | x = self.dense_kernel(x) 71 | return self.dense_output(x) 72 | 73 | def get_config(self): 74 | config = { 75 | "units": self._kernel_units, 76 | "interacter": self._interacter, 77 | "use_bias": self._use_bias, 78 | "activation": tf.keras.activations.serialize(self._kernel_activation), 79 | "kernel_init": tf.keras.initializers.serialize(self._kernel_init), 80 | "kernel_regu": tf.keras.regularizers.serialize(self._kernel_regu), 81 | "bias_init": tf.keras.initializers.serialize(self._bias_init), 82 | "bias_regu": tf.keras.regularizers.serialize(self._bias_regu), 83 | } 84 | base_config = super(ActivationUnit, self).get_config() 85 | return {**base_config, **config} 86 | 87 | 88 | @tf.keras.utils.register_keras_serializable() 89 | class Dice(tf.keras.layers.Layer): 90 | 91 | def __init__(self, 92 | epsilon: float = 1e-8, 93 | alpha_initializer="zeros", 94 | alpha_regularizer=None, 95 | **kwargs): 96 | super(Dice, self).__init__(**kwargs) 97 | 98 | self._epsilon = epsilon 99 | self._alpha_initializer = alpha_initializer 100 | self._alpha_regularizer = alpha_regularizer 101 | 102 | def build(self, input_shape): 103 | 104 | self.prelu = tf.keras.layers.PReLU( 105 | alpha_initializer=self._alpha_initializer, 106 | alpha_regularizer=self._alpha_regularizer 107 | ) 108 | self.built = True 109 | 110 | def call(self, inputs, **kwargs): 111 | 112 | inputs_mean = tf.math.reduce_mean(inputs, axis=1, keepdims=True) 113 | inputs_var = tf.math.reduce_std(inputs, axis=1, keepdims=True) 114 | 115 | p = tf.nn.sigmoid((inputs - inputs_mean) / (tf.sqrt(inputs_var + self._epsilon))) 116 | 117 | x = self.prelu(inputs) 118 | 119 | outputs = tf.where(x > 0, x=p * x, y=(1 - p) * x) 120 | 121 | return outputs 122 | 123 | def get_config(self): 124 | config = { 125 | "epsilon": self._epsilon, 126 | "alpha_initializer": tf.keras.initializers.serialize(self._alpha_initializer), 127 | "alpha_regularizer": tf.keras.regularizers.serialize(self._alpha_regularizer) 128 | } 129 | base_config = super(Dice, self).get_config() 130 | return {**base_config, **config} 131 | -------------------------------------------------------------------------------- /examples/train_wdl_on_movielens_estimator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import tensorflow as tf 5 | 6 | from deep_recommenders.datasets import MovielensRanking 7 | from deep_recommenders.estimator.models.ranking import WDL 8 | 9 | 10 | def build_columns(): 11 | movielens = MovielensRanking() 12 | user_id = tf.feature_column.categorical_column_with_hash_bucket( 13 | "user_id", movielens.num_users) 14 | user_gender = tf.feature_column.categorical_column_with_vocabulary_list( 15 | "user_gender", movielens.gender_vocab) 16 | user_age = tf.feature_column.categorical_column_with_vocabulary_list( 17 | "user_age", movielens.age_vocab) 18 | user_occupation = tf.feature_column.categorical_column_with_vocabulary_list( 19 | "user_occupation", movielens.occupation_vocab) 20 | movie_id = tf.feature_column.categorical_column_with_hash_bucket( 21 | "movie_id", movielens.num_movies) 22 | movie_genres = tf.feature_column.categorical_column_with_vocabulary_list( 23 | "movie_genres", movielens.gender_vocab) 24 | 25 | base_columns = [user_id, user_gender, user_age, user_occupation, movie_id, movie_genres] 26 | indicator_columns = [ 27 | tf.feature_column.indicator_column(c) 28 | for c in base_columns 29 | ] 30 | embedding_columns = [ 31 | tf.feature_column.embedding_column(c, dimension=16) 32 | for c in base_columns 33 | ] 34 | return indicator_columns, embedding_columns 35 | 36 | 37 | def cross_product_transformation(): 38 | crossed_columns = [ 39 | tf.feature_column.crossed_column(['user_gender', 'user_age'], 14), 40 | tf.feature_column.crossed_column(['user_gender', 'user_occupation'], 40), 41 | tf.feature_column.crossed_column(['user_age', 'user_occupation'], 140), 42 | ] 43 | crossed_product_columns = [ 44 | tf.feature_column.indicator_column(c) 45 | for c in crossed_columns 46 | ] 47 | return crossed_product_columns 48 | 49 | 50 | def model_fn(features, labels, mode): 51 | indicator_columns, embedding_columns = build_columns() 52 | crossed_product_columns = cross_product_transformation() 53 | outputs = WDL(indicator_columns + crossed_product_columns, embedding_columns, [64, 16])(features) 54 | 55 | predictions = {"predictions": outputs} 56 | 57 | if mode == tf.estimator.ModeKeys.PREDICT: 58 | return tf.estimator.EstimatorSpec(mode, predictions=predictions) 59 | 60 | loss = tf.losses.log_loss(labels, outputs) 61 | metrics = {"auc": tf.metrics.auc(labels, outputs)} 62 | if mode == tf.estimator.ModeKeys.EVAL: 63 | return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) 64 | 65 | wide_variables = tf.get_collection(tf.GraphKeys.MODEL_VARIABLES, "wide") 66 | wide_optimizer = tf.train.FtrlOptimizer(0.01, l1_regularization_strength=0.5) 67 | wide_train_op = wide_optimizer.minimize(loss=loss, 68 | global_step=tf.train.get_global_step(), 69 | var_list=wide_variables) 70 | 71 | deep_variables = tf.get_collection(tf.GraphKeys.MODEL_VARIABLES, "deep") 72 | deep_optimizer = tf.train.AdamOptimizer(0.01) 73 | deep_train_op = deep_optimizer.minimize(loss=loss, 74 | global_step=tf.train.get_global_step(), 75 | var_list=deep_variables) 76 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 77 | train_op = tf.group(update_ops, wide_train_op, deep_train_op) 78 | 79 | return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) 80 | 81 | 82 | def build_estimator(model_dir=None, inter_op=8, intra_op=8): 83 | config_proto = tf.ConfigProto(device_count={'GPU': 0}, 84 | inter_op_parallelism_threads=inter_op, 85 | intra_op_parallelism_threads=intra_op) 86 | 87 | run_config = tf.estimator.RunConfig().replace( 88 | tf_random_seed=42, 89 | keep_checkpoint_max=10, 90 | save_checkpoints_steps=1000, 91 | log_step_count_steps=100, 92 | session_config=config_proto) 93 | 94 | return tf.estimator.Estimator(model_fn=model_fn, 95 | model_dir=model_dir, 96 | config=run_config) 97 | 98 | 99 | def main(): 100 | tf.logging.set_verbosity(tf.logging.INFO) 101 | estimator = build_estimator() 102 | early_stop_hook = tf.estimator.experimental.stop_if_no_decrease_hook(estimator, "loss", 1000) 103 | 104 | movielens = MovielensRanking() 105 | train_spec = tf.estimator.TrainSpec(lambda: movielens.training_input_fn, 106 | max_steps=None, 107 | hooks=[early_stop_hook]) 108 | eval_spec = tf.estimator.EvalSpec(lambda: movielens.testing_input_fn, 109 | steps=None, 110 | start_delay_secs=0, 111 | throttle_secs=0) 112 | tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | -------------------------------------------------------------------------------- /tests/keras/test_factorized_top_k.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | sys.dont_write_bytecode = True 5 | 6 | import os 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | from absl.testing import parameterized 11 | from deep_recommenders.keras.models.retrieval import factorized_top_k 12 | from deep_recommenders.keras.models.retrieval import FactorizedTopK 13 | 14 | 15 | class TestFactorizedTopK(tf.test.TestCase, parameterized.TestCase): 16 | 17 | def test_take_long_axis(self): 18 | arr = tf.constant([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) 19 | indices = tf.constant([[0, 1], [2, 1]]) 20 | out = factorized_top_k._take_long_axis(arr, indices) 21 | expected_out = tf.constant([[0.1, 0.2], [0.6, 0.5]]) 22 | self.evaluate(tf.compat.v1.global_variables_initializer()) 23 | self.assertAllClose(out, expected_out) 24 | 25 | def test_exclude(self): 26 | scores = tf.constant([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) 27 | identifiers = tf.constant([[0, 1, 2], [3, 4, 5]]) 28 | exclude = tf.constant([[1, 2], [3, 5]]) 29 | k = 1 30 | x, y = factorized_top_k._exclude(scores, identifiers, exclude, k) 31 | expected_x = tf.constant([[0.1], [0.5]]) 32 | expected_y = tf.constant([[0], [4]]) 33 | self.evaluate(tf.compat.v1.global_variables_initializer()) 34 | self.assertAllClose((x, y), (expected_x, expected_y)) 35 | 36 | @parameterized.parameters(np.str, np.float32, np.float64, np.int32, np.int64) 37 | def test_faiss(self, identifier_dtype): 38 | num_candidates, num_queries = (5000, 4) 39 | 40 | rng = np.random.RandomState(42) # pylint: disable=no-member 41 | candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32) 42 | query = rng.normal(size=(num_queries, 4)).astype(np.float32) 43 | candidate_names = np.arange(num_candidates).astype(identifier_dtype) 44 | 45 | faiss_topk = factorized_top_k.Faiss(k=10) 46 | faiss_topk.index(candidates, candidate_names) 47 | 48 | for _ in range(100): 49 | pre_serialization_results = faiss_topk(query[:2]) 50 | 51 | path = os.path.join(self.get_temp_dir(), "query_model") 52 | faiss_topk.save( 53 | path, 54 | options=tf.saved_model.SaveOptions(namespace_whitelist=["Faiss"])) 55 | loaded = tf.keras.models.load_model(path) 56 | 57 | for _ in range(100): 58 | post_serialization_results = loaded(tf.constant(query[:2])) 59 | 60 | self.assertAllEqual(post_serialization_results, pre_serialization_results) 61 | 62 | @parameterized.parameters(np.float32, np.float64) 63 | def test_faiss_with_no_identifiers(self, candidate_dtype): 64 | """ 测试构建无唯一标识索引 """ 65 | num_candidates = 5000 66 | 67 | candidates = np.random.normal(size=(num_candidates, 4)).astype(candidate_dtype) 68 | faiss_topk = factorized_top_k.Faiss(k=10) 69 | faiss_topk.index(candidates, identifiers=None) 70 | self.evaluate(tf.compat.v1.global_variables_initializer()) 71 | self.assertAllClose(num_candidates, faiss_topk._searcher.ntotal) 72 | 73 | @parameterized.parameters(np.str, np.float32, np.float64, np.int32, np.int64) 74 | def test_faiss_with_dataset(self, identifier_dtype): 75 | num_candidates = 5000 76 | 77 | candidates = tf.data.Dataset.from_tensor_slices( 78 | np.random.normal(size=(num_candidates, 4)).astype(np.float32)) 79 | identifiers = tf.data.Dataset.from_tensor_slices( 80 | np.arange(num_candidates).astype(identifier_dtype)) 81 | faiss_topk = factorized_top_k.Faiss(k=10) 82 | faiss_topk.index(candidates.batch(100), identifiers=identifiers) 83 | self.evaluate(tf.compat.v1.global_variables_initializer()) 84 | self.assertAllClose(num_candidates, faiss_topk._searcher.ntotal) 85 | 86 | @parameterized.parameters( 87 | factorized_top_k.Streaming, 88 | factorized_top_k.BruteForce, 89 | factorized_top_k.Faiss, 90 | None) 91 | def test_factorized_topk_metrics(self, top_k_layer): 92 | 93 | rng = np.random.RandomState(42) # pylint: disable=no-member 94 | 95 | num_candidates, num_queries, embedding_dim = (100, 10, 4) 96 | 97 | candidates = rng.normal(size=(num_candidates, embedding_dim)).astype(np.float32) 98 | queries = rng.normal(size=(num_queries, embedding_dim)).astype(np.float32) 99 | true_candidates = rng.normal(size=(num_queries, embedding_dim)).astype(np.float32) 100 | 101 | positive_scores = (queries * true_candidates).sum(axis=1, keepdims=True) 102 | candidate_scores = queries @ candidates.T 103 | 104 | all_scores = np.concatenate([positive_scores, candidate_scores], axis=1) 105 | 106 | ks = [1, 5, 10, 50] 107 | 108 | candidates = tf.data.Dataset.from_tensor_slices(candidates).batch(32) 109 | 110 | if top_k_layer is not None: 111 | candidates = top_k_layer().index(candidates) 112 | 113 | metric = FactorizedTopK( 114 | candidates=candidates, 115 | metrics=[ 116 | tf.keras.metrics.TopKCategoricalAccuracy( 117 | k=x, name=f"top_{x}_categorical_accuracy") for x in ks 118 | ], 119 | k=max(ks), 120 | ) 121 | 122 | metric.update_state( 123 | query_embeddings=queries, true_candidate_embeddings=true_candidates) 124 | 125 | for k, metric_value in zip(ks, metric.result()): 126 | in_top_k = tf.math.in_top_k( 127 | targets=np.zeros(num_queries).astype(np.int32), 128 | predictions=all_scores, 129 | k=k) 130 | self.assertAllClose(metric_value, in_top_k.numpy().mean()) 131 | 132 | 133 | if __name__ == "__main__": 134 | tf.test.main() 135 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/retrieval/sbcnm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | from typing import Tuple, Optional 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | 9 | MAX_FLOAT = np.finfo(np.float32).max / 100.0 10 | MIN_FLOAT = np.finfo(np.float32).min / 100.0 11 | 12 | from deep_recommenders.keras.models.retrieval import FactorizedTopK 13 | 14 | 15 | def _gather_elements_along_row(data: tf.Tensor, 16 | column_indices: tf.Tensor) -> tf.Tensor: 17 | """与factorized_top_k中_take_long_axis相同""" 18 | with tf.control_dependencies( 19 | [tf.assert_equal(tf.shape(data)[0], tf.shape(column_indices)[0])]): 20 | num_row = tf.shape(data)[0] 21 | num_column = tf.shape(data)[1] 22 | num_gathered = tf.shape(column_indices)[1] 23 | row_indices = tf.tile( 24 | tf.expand_dims(tf.range(num_row), -1), 25 | [1, num_gathered]) 26 | flat_data = tf.reshape(data, [-1]) 27 | flat_indices = tf.reshape( 28 | row_indices * num_column + column_indices, [-1]) 29 | return tf.reshape( 30 | tf.gather(flat_data, flat_indices), [num_row, num_gathered]) 31 | 32 | 33 | class HardNegativeMining(tf.keras.layers.Layer): 34 | """Hard Negative""" 35 | 36 | def __init__(self, num_hard_negatives: int, **kwargs): 37 | super(HardNegativeMining, self).__init__(**kwargs) 38 | 39 | self._num_hard_negatives = num_hard_negatives 40 | 41 | def call(self, logits: tf.Tensor, labels: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: 42 | num_sampled = tf.minimum(self._num_hard_negatives + 1, tf.shape(logits)[1]) 43 | 44 | _, indices = tf.nn.top_k(logits + labels * MAX_FLOAT, k=num_sampled, sorted=False) 45 | 46 | logits = _gather_elements_along_row(logits, indices) 47 | labels = _gather_elements_along_row(labels, indices) 48 | 49 | return logits, labels 50 | 51 | 52 | class RemoveAccidentalNegative(tf.keras.layers.Layer): 53 | 54 | def call(self, 55 | logits: tf.Tensor, 56 | labels: tf.Tensor, 57 | identifiers: tf.Tensor) -> tf.Tensor: 58 | """Zeros logits of accidental negatives 59 | Args: 60 | logits: [batch_size, num_candidates] 2D tensor 61 | labels: [batch_size, num_candidates] one-hot 2D tensor 62 | identifiers: [num_candidates] candidates identifiers tensor 63 | Returns: 64 | logits: Modified logits. 65 | """ 66 | identifiers = tf.expand_dims(identifiers, 1) 67 | positive_indices = tf.math.argmax(labels, axis=1) 68 | positive_identifier = tf.gather(identifiers, positive_indices) 69 | 70 | duplicate = tf.equal(positive_identifier, tf.transpose(identifiers)) 71 | duplicate = tf.cast(duplicate, labels.dtype) 72 | 73 | duplicate = duplicate - labels 74 | 75 | return logits + duplicate * MIN_FLOAT 76 | 77 | 78 | class SamplingProbabilityCorrection(tf.keras.layers.Layer): 79 | """Sampling probability correction.""" 80 | 81 | def call(self, 82 | logits: tf.Tensor, 83 | candidate_sampling_probability: tf.Tensor) -> tf.Tensor: 84 | """Corrects the input logits to account for candidate sampling probability.""" 85 | 86 | return logits - tf.math.log(candidate_sampling_probability) 87 | 88 | 89 | class Retrieval(tf.keras.layers.Layer): 90 | """检索任务""" 91 | 92 | def __init__(self, 93 | loss: Optional[tf.keras.losses.Loss] = None, 94 | metrics: Optional[FactorizedTopK] = None, 95 | temperature: Optional[float] = None, 96 | num_hard_negatives: Optional[int] = None, 97 | **kwargs): 98 | super(Retrieval, self).__init__(**kwargs) 99 | 100 | self._loss = tf.keras.losses.CategoricalCrossentropy( 101 | from_logits=True, reduction=tf.keras.losses.Reduction.SUM 102 | ) if loss is None else loss 103 | 104 | self._factorized_metrics = metrics 105 | self._temperature = temperature 106 | self._num_hard_negatives = num_hard_negatives 107 | 108 | @property 109 | def factorized_metrics(self) -> Optional[FactorizedTopK]: 110 | """The metrics object used to compute retrieval metrics.""" 111 | 112 | return self._factorized_metrics 113 | 114 | @factorized_metrics.setter 115 | def factorized_metrics(self, value: Optional[FactorizedTopK]) -> None: 116 | """Sets factorized metrics.""" 117 | 118 | self._factorized_metrics = value 119 | 120 | def call(self, 121 | query_embeddings: tf.Tensor, 122 | candidate_embeddings: tf.Tensor, 123 | sample_weight: Optional[tf.Tensor] = None, 124 | candidate_sampling_probability: Optional[tf.Tensor] = None, 125 | candidate_ids: Optional[tf.Tensor] = None, 126 | compute_metrics: bool = True) -> tf.Tensor: 127 | """Compute loss and metrics""" 128 | 129 | scores = tf.matmul(query_embeddings, candidate_embeddings, transpose_b=True) 130 | 131 | num_queries = tf.shape(scores)[0] 132 | num_candidates = tf.shape(scores)[1] 133 | 134 | labels = tf.eye(num_queries, num_candidates) 135 | 136 | if candidate_sampling_probability is not None: 137 | scores = deep_recommenders.keras.layers.embedding.loss.SamplingProbablityCorrection()( 138 | scores, candidate_sampling_probability) 139 | 140 | if candidate_ids is not None: 141 | scores = deep_recommenders.keras.layers.embedding.loss.RemoveAccidentalNegative()( 142 | scores, labels, candidate_ids) 143 | 144 | if self._num_hard_negatives is not None: 145 | scores, labels = deep_recommenders.keras.layers.embedding.loss.HardNegativeMining( 146 | self._num_hard_negatives)(scores, labels) 147 | 148 | if self._temperature is not None: 149 | scores = scores / self._temperature 150 | 151 | loss = self._loss(y_true=labels, y_pred=scores, sample_weight=sample_weight) 152 | 153 | if compute_metrics is False: 154 | return loss 155 | 156 | if not self._factorized_metrics: 157 | return loss 158 | 159 | update_op = self._factorized_metrics.update_state( 160 | query_embeddings, candidate_embeddings) 161 | 162 | with tf.control_dependencies([update_op]): 163 | return tf.identity(loss) 164 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/nlp/multi_head_attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import tensorflow as tf 5 | import tensorflow.keras.backend as K 6 | 7 | 8 | @tf.keras.utils.register_keras_serializable() 9 | class Embedding(tf.keras.layers.Layer): 10 | 11 | def __init__(self, vocab_size, model_dim, **kwargs): 12 | self._vocab_size = vocab_size 13 | self._model_dim = model_dim 14 | super(Embedding, self).__init__(**kwargs) 15 | 16 | def build(self, input_shape): 17 | self.embeddings = self.add_weight( 18 | shape=(self._vocab_size, self._model_dim), 19 | initializer='glorot_uniform', 20 | name="embeddings") 21 | super(Embedding, self).build(input_shape) 22 | 23 | def call(self, inputs, **kwargs): 24 | if K.dtype(inputs) != 'int32': 25 | inputs = K.cast(inputs, 'int32') 26 | embeddings = K.gather(self.embeddings, inputs) 27 | embeddings *= self._model_dim ** 0.5 # Scale 28 | return embeddings 29 | 30 | def compute_output_shape(self, input_shape): 31 | 32 | return input_shape + (self._model_dim,) 33 | 34 | 35 | @tf.keras.utils.register_keras_serializable() 36 | class ScaledDotProductAttention(tf.keras.layers.Layer): 37 | 38 | def __init__(self, masking=True, future=False, dropout_rate=0., **kwargs): 39 | self._masking = masking 40 | self._future = future 41 | self._dropout_rate = dropout_rate 42 | self._masking_num = -2**32+1 43 | super(ScaledDotProductAttention, self).__init__(**kwargs) 44 | 45 | def mask(self, inputs, masks): 46 | masks = K.cast(masks, 'float32') 47 | masks = K.tile(masks, [K.shape(inputs)[0] // K.shape(masks)[0], 1]) 48 | masks = K.expand_dims(masks, 1) 49 | outputs = inputs + masks * self._masking_num 50 | return outputs 51 | 52 | def future_mask(self, inputs): 53 | diag_vals = tf.ones_like(inputs[0, :, :]) 54 | tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense() 55 | future_masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(inputs)[0], 1, 1]) 56 | paddings = tf.ones_like(future_masks) * self._masking_num 57 | outputs = tf.where(tf.equal(future_masks, 0), paddings, inputs) 58 | return outputs 59 | 60 | def call(self, inputs, **kwargs): 61 | if self._masking: 62 | assert len(inputs) == 4, "inputs should be set [queries, keys, values, masks]." 63 | queries, keys, values, masks = inputs 64 | else: 65 | assert len(inputs) == 3, "inputs should be set [queries, keys, values]." 66 | queries, keys, values = inputs 67 | 68 | if K.dtype(queries) != 'float32': queries = K.cast(queries, 'float32') 69 | if K.dtype(keys) != 'float32': keys = K.cast(keys, 'float32') 70 | if K.dtype(values) != 'float32': values = K.cast(values, 'float32') 71 | 72 | matmul = K.batch_dot(queries, tf.transpose(keys, [0, 2, 1])) # MatMul 73 | scaled_matmul = matmul / int(queries.shape[-1]) ** 0.5 # Scale 74 | if self._masking: 75 | scaled_matmul = self.mask(scaled_matmul, masks) # Mask(opt.) 76 | 77 | if self._future: 78 | scaled_matmul = self.future_mask(scaled_matmul) 79 | 80 | softmax_out = K.softmax(scaled_matmul) # SoftMax 81 | # Dropout 82 | out = K.dropout(softmax_out, self._dropout_rate) 83 | 84 | outputs = K.batch_dot(out, values) 85 | 86 | return outputs 87 | 88 | def compute_output_shape(self, input_shape): 89 | return input_shape 90 | 91 | 92 | @tf.keras.utils.register_keras_serializable() 93 | class MultiHeadAttention(tf.keras.layers.Layer): 94 | 95 | def __init__(self, n_heads, head_dim, dropout_rate=.1, masking=True, future=False, trainable=True, **kwargs): 96 | self._n_heads = n_heads 97 | self._head_dim = head_dim 98 | self._dropout_rate = dropout_rate 99 | self._masking = masking 100 | self._future = future 101 | self._trainable = trainable 102 | super(MultiHeadAttention, self).__init__(**kwargs) 103 | 104 | def build(self, input_shape): 105 | self._weights_queries = self.add_weight( 106 | shape=(input_shape[0][-1], self._n_heads * self._head_dim), 107 | initializer='glorot_uniform', 108 | trainable=self._trainable, 109 | name='weights_queries') 110 | self._weights_keys = self.add_weight( 111 | shape=(input_shape[1][-1], self._n_heads * self._head_dim), 112 | initializer='glorot_uniform', 113 | trainable=self._trainable, 114 | name='weights_keys') 115 | self._weights_values = self.add_weight( 116 | shape=(input_shape[2][-1], self._n_heads * self._head_dim), 117 | initializer='glorot_uniform', 118 | trainable=self._trainable, 119 | name='weights_values') 120 | super(MultiHeadAttention, self).build(input_shape) 121 | 122 | def call(self, inputs, **kwargs): 123 | if self._masking: 124 | assert len(inputs) == 4, "inputs should be set [queries, keys, values, masks]." 125 | queries, keys, values, masks = inputs 126 | else: 127 | assert len(inputs) == 3, "inputs should be set [queries, keys, values]." 128 | queries, keys, values = inputs 129 | 130 | queries_linear = K.dot(queries, self._weights_queries) 131 | keys_linear = K.dot(keys, self._weights_keys) 132 | values_linear = K.dot(values, self._weights_values) 133 | 134 | queries_multi_heads = tf.concat(tf.split(queries_linear, self._n_heads, axis=2), axis=0) 135 | keys_multi_heads = tf.concat(tf.split(keys_linear, self._n_heads, axis=2), axis=0) 136 | values_multi_heads = tf.concat(tf.split(values_linear, self._n_heads, axis=2), axis=0) 137 | 138 | if self._masking: 139 | att_inputs = [queries_multi_heads, keys_multi_heads, values_multi_heads, masks] 140 | else: 141 | att_inputs = [queries_multi_heads, keys_multi_heads, values_multi_heads] 142 | 143 | attention = ScaledDotProductAttention( 144 | masking=self._masking, future=self._future, dropout_rate=self._dropout_rate) 145 | att_out = attention(att_inputs) 146 | 147 | outputs = tf.concat(tf.split(att_out, self._n_heads, axis=0), axis=2) 148 | 149 | return outputs 150 | 151 | def compute_output_shape(self, input_shape): 152 | return input_shape 153 | -------------------------------------------------------------------------------- /deep_recommenders/datasets/movielens.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import random 5 | import tensorflow as tf 6 | 7 | 8 | def _download_and_unzip(filename="ml-1m.zip"): 9 | import requests 10 | import zipfile 11 | url = "https://files.grouplens.org/datasets/movielens/ml-1m.zip" 12 | r = requests.get(url) 13 | with open(filename, "wb") as f: 14 | f.write(r.content) 15 | f = zipfile.ZipFile(filename) 16 | f.extractall() 17 | 18 | 19 | def _data_shard(filename, num_shards=4): 20 | cmd = "wc -l < {}".format(filename) 21 | cmd_res = os.popen(cmd) 22 | total_lines = int(cmd_res.read().strip()) 23 | block_lines = total_lines // num_shards 24 | num_lines, num_shard = 0, 0 25 | with open(filename, "r", encoding="unicode_escape") as f: 26 | for line in f: 27 | if num_lines % block_lines == 0: 28 | if num_shard < num_shards: 29 | _f = open(filename+str(num_shard), "w") 30 | num_shard += 1 31 | _f.write(line) 32 | num_lines += 1 33 | 34 | 35 | def _shuffle_data(filename): 36 | shuffled_filename = f"{filename}.shuffled" 37 | with open(filename, "r") as f: 38 | lines = f.readlines() 39 | random.shuffle(lines) 40 | with open(shuffled_filename, "w") as f: 41 | f.writelines(lines) 42 | return shuffled_filename 43 | 44 | 45 | def _load_data(filename, columns): 46 | data = {} 47 | with open(filename, "r", encoding="unicode_escape") as f: 48 | for line in f: 49 | ls = line.strip("\n").split("::") 50 | data[ls[0]] = dict(zip(columns[1:], ls[1:])) 51 | return data 52 | 53 | 54 | def _serialize_example(feature): 55 | serialize_feature = {} 56 | for c in ["Age", "Occupation", "Rating", "Timestamp"]: 57 | serialize_feature[c] = tf.train.Feature(int64_list=tf.train.Int64List(value=[feature[c]])) 58 | for c in ["UserID", "MovieID", "Gender", "Zip-code", "Title"]: 59 | serialize_feature[c] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[feature[c]])) 60 | serialize_feature["Genres"] = tf.train.Feature(bytes_list=tf.train.BytesList(value=feature["Genres"])) 61 | example_proto = tf.train.Example(features=tf.train.Features(feature=serialize_feature)) 62 | return example_proto.SerializeToString() 63 | 64 | 65 | def serialize_tfrecords(tfrecords_fn, datadir="ml-1m", download=False): 66 | 67 | if download is True: 68 | print("Downloading MovieLens-1M dataset ...") 69 | _download_and_unzip(datadir + ".zip") 70 | 71 | users_data = _load_data(datadir + "/users.dat", 72 | columns=["UserID", "Gender", "Age", "Occupation", "Zip-code"]) 73 | movies_data = _load_data(datadir + "/movies.dat", 74 | columns=["MovieID", "Title", "Genres"]) 75 | 76 | ratings_columns = ["UserID", "MovieID", "Rating", "Timestamp"] 77 | writer = tf.io.TFRecordWriter(tfrecords_fn) 78 | shuffled_filename = _shuffle_data(datadir + "/ratings.dat") 79 | f = open(shuffled_filename, "r", encoding="unicode_escape") 80 | for line in f: 81 | ls = line.strip().split("::") 82 | rating = dict(zip(ratings_columns, ls)) 83 | rating.update(users_data.get(ls[0])) 84 | rating.update(movies_data.get(ls[1])) 85 | for c in ["Age", "Occupation", "Rating", "Timestamp"]: 86 | rating[c] = int(rating[c]) 87 | for c in ["UserID", "MovieID", "Gender", "Zip-code", "Title"]: 88 | rating[c] = rating[c].encode("utf-8") 89 | rating["Genres"] = [x.encode("utf-8") for x in rating["Genres"].split("|")] 90 | serialized = _serialize_example(rating) 91 | writer.write(serialized) 92 | writer.close() 93 | f.close() 94 | 95 | 96 | class MovieLens(object): 97 | 98 | def __init__(self, filename="movielens.tfrecords"): 99 | self._filename = os.path.join(os.path.dirname(__file__), filename) 100 | self._columns = ["UserID", "MovieID", "Rating", "Timestamp", 101 | "Gender", "Age", "Occupation", "Zip-code", 102 | "Title", "Genres"] 103 | self.num_ratings = 1000209 104 | self.num_users = 6040 105 | self.num_movies = 3952 106 | self.gender_vocab = ["F", "M"] 107 | self.age_vocab = [1, 18, 25, 35, 45, 50, 56] 108 | self.occupation_vocab = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 109 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 110 | self.genres_vocab = ["Action", "Adventure", "Animation", "Children's", "Comedy", 111 | "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", 112 | "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"] 113 | 114 | def dataset(self, epochs=1, batch_size=256): 115 | 116 | def _parse_example(serialized_example): 117 | features = {} 118 | for c in ["Age", "Occupation", "Rating", "Timestamp"]: 119 | features[c] = tf.io.FixedLenFeature([], tf.int64) 120 | for c in ["UserID", "MovieID", "Gender", "Zip-code", "Title"]: 121 | features[c] = tf.io.FixedLenFeature([], tf.string) 122 | features["Genres"] = tf.io.VarLenFeature(tf.string) 123 | example = tf.io.parse_example(serialized_example, features) 124 | ratings = example.pop("Rating") 125 | return example, ratings 126 | 127 | ds = tf.data.TFRecordDataset(self._filename) 128 | ds = ds.repeat(epochs) 129 | ds = ds.batch(batch_size) 130 | ds = ds.map(_parse_example, num_parallel_calls=-1) 131 | return ds 132 | 133 | 134 | class MovielensRanking(MovieLens): 135 | 136 | def __init__(self, 137 | epochs: int = 10, 138 | batch_size: int = 1024, 139 | buffer_size: int = 1024, 140 | train_size: float = 0.8, 141 | *args, **kwargs): 142 | super(MovielensRanking, self).__init__(*args, **kwargs) 143 | self._epochs = epochs 144 | self._batch_size = batch_size 145 | self._buffer_size = buffer_size 146 | self._train_size = train_size 147 | 148 | @property 149 | def train_steps(self): 150 | num_train_ratings = self.num_ratings * self._epochs * self._train_size 151 | return int(num_train_ratings // self._batch_size) 152 | 153 | @property 154 | def train_steps_per_epoch(self): 155 | num_train_ratings = self.num_ratings * self._train_size 156 | return int(num_train_ratings // self._batch_size) 157 | 158 | @property 159 | def test_steps(self): 160 | return self.num_ratings // self._batch_size - self.train_steps_per_epoch 161 | 162 | @property 163 | def training_input_fn(self): 164 | return self.input_fn().take(self.train_steps) 165 | 166 | @property 167 | def testing_input_fn(self): 168 | return self.input_fn().skip(self.train_steps).take(self.test_steps) 169 | 170 | def input_fn(self): 171 | dataset = self.dataset(self._epochs, self._batch_size) 172 | dataset = dataset.map(lambda x, y: ( 173 | { 174 | "user_id": x["UserID"], 175 | "user_gender": x["Gender"], 176 | "user_age": x["Age"], 177 | "user_occupation": x["Occupation"], 178 | "movie_id": x["MovieID"], 179 | "movie_genres": x["Genres"] 180 | }, 181 | tf.expand_dims(tf.where(y > 3, 182 | tf.ones_like(y, dtype=tf.float32), 183 | tf.zeros_like(y, dtype=tf.float32)), axis=1) 184 | )) 185 | dataset = dataset.prefetch(self._buffer_size) 186 | return dataset 187 | 188 | 189 | if __name__ == '__main__': 190 | serialize_tfrecords("movielens.tfrecords", download=True) 191 | 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Recommenders 2 | [![Python](https://img.shields.io/badge/python-3.7_|_3.8-brightgreen)](requirements.txt) 3 | [![TensorFlow](https://img.shields.io/badge/tensorflow-1.15_|_2.0+-brightgreen)](requirements.txt) 4 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/c4b6335acf254697b80714b81e8154d7)](https://www.codacy.com/gh/LongmaoTeamTf/deep_recommenders/dashboard?utm_source=github.com&utm_medium=referral&utm_content=LongmaoTeamTf/deep_recommenders&utm_campaign=Badge_Grade) 5 | 6 | [![CodeQL](https://github.com/LongmaoTeamTf/deep_recommenders/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/LongmaoTeamTf/deep_recommenders/actions/workflows/codeql-analysis.yml) 7 | [![CI](https://github.com/LongmaoTeamTf/deep_recommenders/actions/workflows/continuous_integration.yml/badge.svg)](https://github.com/LongmaoTeamTf/deep_recommenders/actions/workflows/continuous_integration.yml) 8 | [![codecov](https://codecov.io/gh/LongmaoTeamTf/deep_recommenders/branch/master/graph/badge.svg?token=X9VQCCLJJX)](https://codecov.io/gh/LongmaoTeamTf/deep_recommenders) 9 | [![License](https://img.shields.io/badge/license-Apache_2.0-green)](LICENSE) 10 | 11 | Deep Recommenders is an open-source recommendation system algorithm library 12 | built by `tf.estimator` and `tf.keras` that the advanced APIs of TensorFlow. 13 | 14 | 🤗️ This Library mainly used for self-learning and improvement, 15 | but also hope to help friends and classmates who are interested in the recommendation system to make progress together! 16 | 17 | ## Models 18 | 19 | ### Ranking 20 | 21 | - **FM** 22 | [[Estimator]](examples/train_fm_on_movielens_estimator.py) 23 | [ 24 | *Factorization Machines, Osaka, 2010* 25 | ](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) 26 | - **FFM** 27 | [ 28 | *Field-aware Factorization Machines for CTR Prediction, RecSys, 2016* 29 | ](https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf) 30 | - **LS-PLM** 31 | [ 32 | *Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction, Alibaba, 2017* 33 | ](https://arxiv.org/pdf/1704.05194.pdf) 34 | - **WDL** 35 | [[Estimator]](examples/train_wdl_on_movielens_estimator.py) 36 | [ 37 | *Wide & Deep Learning for Recommender Systems, Google, DLRS, 2016* 38 | ](https://arxiv.org/abs/1606.07792) 39 | - **PNN** 40 | [ 41 | *Product-based Neural Networks for User Response Prediction, IEEE, 2016* 42 | ](https://arxiv.org/abs/1611.00144) 43 | - **FNN** 44 | [[Estimator]](examples/train_fnn_on_movielens_estimator.py) 45 | [ 46 | *Deep Learning over Multi-field Categorical Data: A Case Study on User Response Prediction, RayCloud, ECIR, 2016* 47 | ](https://arxiv.org/abs/1601.02376) 48 | - **NFM** 49 | [ 50 | *Neural Factorization Machines for Sparse Predictive Analytics, SIGIR, 2017* 51 | ](https://arxiv.org/pdf/1708.05027.pdf) 52 | - **AFM** 53 | [ 54 | *Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks, IJCAI, 2017* 55 | ](https://www.ijcai.org/proceedings/2017/0435.pdf) 56 | - **DeepFM** 57 | [[Estimator]](examples/train_deepfm_on_movielens_estimator.py) 58 | [[Keras]](examples/train_deepfm_on_movielens_keras.py) 59 | [ 60 | *DeepFM: A Factorization-Machine based Neural Network for CTR Prediction, Huawei, IJCAI, 2017* 61 | ](https://www.ijcai.org/proceedings/2017/0239.pdf) 62 | - **DCN** 63 | [ 64 | *Deep & Cross Network for Ad Click Predictions, Google, KDD, 2017* 65 | ](https://arxiv.org/abs/1708.05123) 66 | - **xDeepFM** 67 | [ 68 | *xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems, Microsoft, KDD, 2018* 69 | ](https://arxiv.org/pdf/1803.05170.pdf) 70 | - **DIN** 71 | [ 72 | *Deep Interest Network for Click-Through Rate Prediction, Alibaba, KDD, 2018* 73 | ](https://arxiv.org/abs/1706.06978) 74 | - **DIEN** 75 | [ 76 | *Deep Interest Evolution Network for Click-Through Rate Prediction, Alibaba, AAAI, 2019* 77 | ](https://arxiv.org/abs/1809.03672) 78 | - **DLRM** 79 | [ 80 | *Deep Learning Recommendation Model for Personalization and Recommendation Systems, Facebook, 2019* 81 | ](https://arxiv.org/abs/1906.00091) 82 | 83 | ### Retrieval 84 | 85 | - **DSSM** 86 | [ 87 | *Learning Deep Structured Semantic Models for Web Search using Clickthrough Data, Microsoft, CIKM, 2013* 88 | ](https://dl.acm.org/doi/10.1145/2505515.2505665) 89 | - **YoutubeNet** 90 | [ 91 | *Deep Neural Networks for YouTube Recommendations, Google, RecSys, 2016* 92 | ](https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/45530.pdf) 93 | - **SBCNM** 94 | [ 95 | *Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations, Google, RecSys, 2019* 96 | ](https://dl.acm.org/doi/10.1145/3298689.3346996) 97 | - **EBR** 98 | [ 99 | *Embedding-based Retrieval in Facebook Search, Facebook, KDD, 2020* 100 | ](https://arxiv.org/abs/2006.11632) 101 | - **Item2Vec** 102 | [ 103 | *Item2Vec: Neural Item Embedding for Collaborative Filtering, Microsoft, MLSP, 2016* 104 | ](https://arxiv.org/vc/arxiv/papers/1603/1603.04259v2.pdf) 105 | - **Airbnb** 106 | [ 107 | *Real-time Personalization using Embeddings for Search Ranking at Airbnb, Airbnb, KDD, 2018* 108 | ](https://dl.acm.org/doi/10.1145/3219819.3219885) 109 | - **DeepWalk** 110 | [ 111 | *DeepWalk: Online Learning of Social Representations, StonyBrook, KDD, 2014* 112 | ](https://arxiv.org/abs/1403.6652) 113 | - **EGES** 114 | [ 115 | *Billion-scale Commodity Embedding for E-commerce Recommendation in Alibaba, Alibaba, KDD, 2018* 116 | ](https://arxiv.org/abs/1803.02349) 117 | - **GCN** 118 | [[Keras]](experiments/gcn.ipynb) 119 | [ 120 | *Semi-Supervised Classification with Graph Convolutional Networks, ICLR, 2017* 121 | ](https://arxiv.org/abs/1609.02907) 122 | - **GraphSAGE** 123 | [ 124 | *Inductive Representation Learning on Large Graphs, NIPS, 2017* 125 | ](https://arxiv.org/abs/1706.02216) 126 | - **PinSage** 127 | [ 128 | *Graph Convolutional Neural Networks for Web-Scale Recommender Systems, Pinterest, KDD, 2018* 129 | ](https://arxiv.org/abs/1806.01973) 130 | - **IntentGC** 131 | [ 132 | *IntentGC: a Scalable Graph Convolution Framework Fusing Heterogeneous Information for Recommendation, Alibaba, KDD, 2019* 133 | ](https://arxiv.org/abs/1907.12377) 134 | - **GraphTR** 135 | [ 136 | *Graph Neural Network for Tag Ranking in Tag-enhanced Video Recommendation, Tencent, CIKM, 2020* 137 | ](https://dl.acm.org/doi/abs/10.1145/3340531.3416021) 138 | 139 | ### Multi-task learning 140 | 141 | - **MMoE** 142 | [[Estimator]](examples/train_mmoe_on_synthetic_estimator.py) 143 | [ 144 | *Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts, Google, KDD, 2018* 145 | ](https://dl.acm.org/doi/pdf/10.1145/3219819.3220007) 146 | - **ESMM** 147 | [ 148 | *Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate, Alibaba, SIGIR, 2018* 149 | ](https://arxiv.org/pdf/1804.07931.pdf) 150 | 151 | ### NLP 152 | 153 | - **Word2Vec** 154 | [ 155 | *Distributed Representations of Words and Phrases and their Compositionality, Google, NIPS, 2013* 156 | ](https://papers.nips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf) 157 | 158 | - **Transformer** 159 | [[Keras]](experiments/transformer.ipynb) 160 | [ 161 | *Attention Is All You Need, Google, NeurlPS, 2017* 162 | ](https://arxiv.org/abs/1706.03762) 163 | 164 | - **BERT** 165 | [ 166 | *BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding, Google, NAACL, 2019* 167 | ](https://arxiv.org/abs/1810.04805) 168 | 169 | ## Supports 170 | 171 | [1.15-passing]: https://img.shields.io/badge/1.15-passing-brightgreen 172 | [1.15-failing]: https://img.shields.io/badge/1.15-failing-red 173 | [2.0+-passing]: https://img.shields.io/badge/2.0+-passing-brightgreen 174 | [2.3+-passing]: https://img.shields.io/badge/2.3+-passing-brightgreen 175 | 176 | | Modules | TensorFlow | 177 | | ------- | ---------------- | 178 | | *deep_recommenders.estimator* | ![1.15-passing]
![2.0+-passing] 179 | | *deep_recommenders.keras* | ![1.15-failing]
![2.3+-passing] 180 | 181 | ## License 182 | [Apache License 2.0](LICENSE) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/nlp/transformer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensorflow.keras.backend as K 6 | from tensorflow.keras.layers import Layer 7 | from tensorflow.keras.callbacks import Callback 8 | 9 | from deep_recommenders.keras.models.nlp import MultiHeadAttention 10 | 11 | 12 | @tf.keras.utils.register_keras_serializable() 13 | class PositionEncoding(Layer): 14 | 15 | def __init__(self, model_dim, **kwargs): 16 | self._model_dim = model_dim 17 | super(PositionEncoding, self).__init__(**kwargs) 18 | 19 | def call(self, inputs, **kwargs): 20 | seq_length = inputs.shape[1] 21 | position_encodings = np.zeros((seq_length, self._model_dim)) 22 | for pos in range(seq_length): 23 | for i in range(self._model_dim): 24 | position_encodings[pos, i] = pos / np.power(10000, (i-i%2) / self._model_dim) 25 | position_encodings[:, 0::2] = np.sin(position_encodings[:, 0::2]) # 2i 26 | position_encodings[:, 1::2] = np.cos(position_encodings[:, 1::2]) # 2i+1 27 | position_encodings = K.cast(position_encodings, 'float32') 28 | return position_encodings 29 | 30 | def compute_output_shape(self, input_shape): 31 | return input_shape 32 | 33 | 34 | @tf.keras.utils.register_keras_serializable() 35 | class Add(Layer): 36 | 37 | def __init__(self, **kwargs): 38 | super(Add, self).__init__(**kwargs) 39 | 40 | def call(self, inputs, **kwargs): 41 | input_a, input_b = inputs 42 | return input_a + input_b 43 | 44 | def compute_output_shape(self, input_shape): 45 | return input_shape[0] 46 | 47 | 48 | @tf.keras.utils.register_keras_serializable() 49 | class PositionWiseFeedForward(Layer): 50 | 51 | def __init__(self, model_dim, inner_dim, trainable=True, **kwargs): 52 | self._model_dim = model_dim 53 | self._inner_dim = inner_dim 54 | self._trainable = trainable 55 | super(PositionWiseFeedForward, self).__init__(**kwargs) 56 | 57 | def build(self, input_shape): 58 | self.weights_inner = self.add_weight( 59 | shape=(input_shape[-1], self._inner_dim), 60 | initializer='glorot_uniform', 61 | trainable=self._trainable, 62 | name="weights_inner") 63 | self.weights_out = self.add_weight( 64 | shape=(self._inner_dim, self._model_dim), 65 | initializer='glorot_uniform', 66 | trainable=self._trainable, 67 | name="weights_out") 68 | self.bias_inner = self.add_weight( 69 | shape=(self._inner_dim,), 70 | initializer='uniform', 71 | trainable=self._trainable, 72 | name="bias_inner") 73 | self.bias_out = self.add_weight( 74 | shape=(self._model_dim,), 75 | initializer='uniform', 76 | trainable=self._trainable, 77 | name="bias_out") 78 | super(PositionWiseFeedForward, self).build(input_shape) 79 | 80 | def call(self, inputs, **kwargs): 81 | if K.dtype(inputs) != 'float32': 82 | inputs = K.cast(inputs, 'float32') 83 | inner_out = K.relu(K.dot(inputs, self.weights_inner) + self.bias_inner) 84 | outputs = K.dot(inner_out, self.weights_out) + self.bias_out 85 | return outputs 86 | 87 | def compute_output_shape(self, input_shape): 88 | return self._model_dim 89 | 90 | 91 | @tf.keras.utils.register_keras_serializable() 92 | class LayerNormalization(Layer): 93 | 94 | def __init__(self, epsilon=1e-8, **kwargs): 95 | self._epsilon = epsilon 96 | super(LayerNormalization, self).__init__(**kwargs) 97 | 98 | def build(self, input_shape): 99 | self.beta = self.add_weight( 100 | shape=(input_shape[-1],), 101 | initializer='zero', 102 | name='beta') 103 | self.gamma = self.add_weight( 104 | shape=(input_shape[-1],), 105 | initializer='one', 106 | name='gamma') 107 | super(LayerNormalization, self).build(input_shape) 108 | 109 | def call(self, inputs, **kwargs): 110 | mean, variance = tf.nn.moments(inputs, [-1], keepdims=True) 111 | normalized = (inputs - mean) / ((variance + self._epsilon) ** 0.5) 112 | outputs = self.gamma * normalized + self.beta 113 | return outputs 114 | 115 | def compute_output_shape(self, input_shape): 116 | return input_shape 117 | 118 | 119 | @tf.keras.utils.register_keras_serializable() 120 | class Transformer(Layer): 121 | 122 | def __init__(self, 123 | vocab_size, 124 | model_dim, 125 | n_heads=8, 126 | encoder_stack=6, 127 | decoder_stack=6, 128 | feed_forward_size=2048, 129 | dropout_rate=0.1, 130 | **kwargs): 131 | 132 | self._vocab_size = vocab_size 133 | self._model_dim = model_dim 134 | self._n_heads = n_heads 135 | self._encoder_stack = encoder_stack 136 | self._decoder_stack = decoder_stack 137 | self._feed_forward_size = feed_forward_size 138 | self._dropout_rate = dropout_rate 139 | super(Transformer, self).__init__(**kwargs) 140 | 141 | def build(self, input_shape): 142 | self.embeddings = self.add_weight( 143 | shape=(self._vocab_size, self._model_dim), 144 | initializer='glorot_uniform', 145 | trainable=True, 146 | name="embeddings") 147 | self.EncoderPositionEncoding = PositionEncoding(self._model_dim) 148 | self.EncoderMultiHeadAttentions = [ 149 | MultiHeadAttention(self._n_heads, self._model_dim // self._n_heads) 150 | for _ in range(self._encoder_stack) 151 | ] 152 | self.EncoderLayerNorms0 = [ 153 | LayerNormalization() 154 | for _ in range(self._encoder_stack) 155 | ] 156 | self.EncoderPositionWiseFeedForwards = [ 157 | PositionWiseFeedForward(self._model_dim, self._feed_forward_size) 158 | for _ in range(self._encoder_stack) 159 | ] 160 | self.EncoderLayerNorms1 = [ 161 | LayerNormalization() 162 | for _ in range(self._encoder_stack) 163 | ] 164 | self.DecoderPositionEncoding = PositionEncoding(self._model_dim) 165 | self.DecoderMultiHeadAttentions0 = [ 166 | MultiHeadAttention(self._n_heads, self._model_dim // self._n_heads, future=True) 167 | for _ in range(self._decoder_stack) 168 | ] 169 | self.DecoderLayerNorms0 = [ 170 | LayerNormalization() 171 | for _ in range(self._decoder_stack) 172 | ] 173 | self.DecoderMultiHeadAttentions1 = [ 174 | MultiHeadAttention(self._n_heads, self._model_dim // self._n_heads) 175 | for _ in range(self._decoder_stack) 176 | ] 177 | self.DecoderLayerNorms1 = [ 178 | LayerNormalization() 179 | for _ in range(self._decoder_stack) 180 | ] 181 | self.DecoderPositionWiseFeedForwards = [ 182 | PositionWiseFeedForward(self._model_dim, self._feed_forward_size) 183 | for _ in range(self._decoder_stack) 184 | ] 185 | self.DecoderLayerNorms2 = [ 186 | LayerNormalization() 187 | for _ in range(self._decoder_stack) 188 | ] 189 | super(Transformer, self).build(input_shape) 190 | 191 | def encoder(self, inputs): 192 | if K.dtype(inputs) != 'int32': 193 | inputs = K.cast(inputs, 'int32') 194 | 195 | masks = K.equal(inputs, 0) 196 | # Embeddings 197 | embeddings = K.gather(self.embeddings, inputs) 198 | embeddings *= self._model_dim ** 0.5 # Scale 199 | # Position Encodings 200 | position_encodings = self.EncoderPositionEncoding(embeddings) 201 | # Embeddings + Position-encodings 202 | encodings = embeddings + position_encodings 203 | # Dropout 204 | encodings = K.dropout(encodings, self._dropout_rate) 205 | 206 | for i in range(self._encoder_stack): 207 | # Multi-head-Attention 208 | attention = self.EncoderMultiHeadAttentions[i] 209 | attention_input = [encodings, encodings, encodings, masks] 210 | attention_out = attention(attention_input) 211 | # Add & Norm 212 | attention_out += encodings 213 | attention_out = self.EncoderLayerNorms0[i](attention_out) 214 | # Feed-Forward 215 | ff = self.EncoderPositionWiseFeedForwards[i] 216 | ff_out = ff(attention_out) 217 | # Add & Norm 218 | ff_out += attention_out 219 | encodings = self.EncoderLayerNorms1[i](ff_out) 220 | 221 | return encodings, masks 222 | 223 | def decoder(self, inputs): 224 | decoder_inputs, encoder_encodings, encoder_masks = inputs 225 | if K.dtype(decoder_inputs) != 'int32': 226 | decoder_inputs = K.cast(decoder_inputs, 'int32') 227 | 228 | decoder_masks = K.equal(decoder_inputs, 0) 229 | # Embeddings 230 | embeddings = K.gather(self.embeddings, decoder_inputs) 231 | embeddings *= self._model_dim ** 0.5 # Scale 232 | # Position Encodings 233 | position_encodings = self.DecoderPositionEncoding(embeddings) 234 | # Embeddings + Position-encodings 235 | encodings = embeddings + position_encodings 236 | # Dropout 237 | encodings = K.dropout(encodings, self._dropout_rate) 238 | 239 | for i in range(self._decoder_stack): 240 | # Masked-Multi-head-Attention 241 | masked_attention = self.DecoderMultiHeadAttentions0[i] 242 | masked_attention_input = [encodings, encodings, encodings, decoder_masks] 243 | masked_attention_out = masked_attention(masked_attention_input) 244 | # Add & Norm 245 | masked_attention_out += encodings 246 | masked_attention_out = self.DecoderLayerNorms0[i](masked_attention_out) 247 | 248 | # Multi-head-Attention 249 | attention = self.DecoderMultiHeadAttentions1[i] 250 | attention_input = [masked_attention_out, encoder_encodings, encoder_encodings, encoder_masks] 251 | attention_out = attention(attention_input) 252 | # Add & Norm 253 | attention_out += masked_attention_out 254 | attention_out = self.DecoderLayerNorms1[i](attention_out) 255 | 256 | # Feed-Forward 257 | ff = self.DecoderPositionWiseFeedForwards[i] 258 | ff_out = ff(attention_out) 259 | # Add & Norm 260 | ff_out += attention_out 261 | encodings = self.DecoderLayerNorms2[i](ff_out) 262 | 263 | # Pre-SoftMax 与 Embeddings 共享参数 264 | linear_projection = K.dot(encodings, K.transpose(self.embeddings)) 265 | outputs = K.softmax(linear_projection) 266 | return outputs 267 | 268 | def call(self, encoder_inputs, decoder_inputs, **kwargs): 269 | encoder_encodings, encoder_masks = self.encoder(encoder_inputs) 270 | encoder_outputs = self.decoder([decoder_inputs, encoder_encodings, encoder_masks]) 271 | return encoder_outputs 272 | 273 | def compute_output_shape(self, input_shape): 274 | return input_shape[0][0], input_shape[0][1], self._vocab_size 275 | 276 | def get_config(self): 277 | config = { 278 | "vocab_size": self._vocab_size, 279 | "model_dim": self._model_dim, 280 | "n_heads": self._n_heads, 281 | "encoder_stack": self._encoder_stack, 282 | "decoder_stack": self._decoder_stack, 283 | "feed_forward_size": self._feed_forward_size, 284 | "dropout_rate": self._dropout_rate 285 | } 286 | base_config = super(Transformer, self).get_config() 287 | return {**base_config, **config} 288 | 289 | 290 | class Noam(Callback): 291 | 292 | def __init__(self, model_dim, step_num=0, warmup_steps=4000, verbose=False): 293 | self._model_dim = model_dim 294 | self._step_num = step_num 295 | self._warmup_steps = warmup_steps 296 | self.verbose = verbose 297 | super(Noam, self).__init__() 298 | 299 | def on_train_begin(self, logs=None): 300 | logs = logs or {} 301 | init_lr = self._model_dim ** -.5 * self._warmup_steps ** -1.5 302 | K.set_value(self.model.optimizer.lr, init_lr) 303 | 304 | def on_batch_end(self, epoch, logs=None): 305 | logs = logs or {} 306 | self._step_num += 1 307 | lrate = self._model_dim ** -.5 * K.minimum(self._step_num ** -.5, self._step_num * self._warmup_steps ** -1.5) 308 | K.set_value(self.model.optimizer.lr, lrate) 309 | 310 | def on_epoch_begin(self, epoch, logs=None): 311 | if self.verbose: 312 | lrate = K.get_value(self.model.optimizer.lr) 313 | print(f"epoch {epoch} lr: {lrate}") 314 | 315 | def on_epoch_end(self, epoch, logs=None): 316 | logs = logs or {} 317 | logs['lr'] = K.get_value(self.model.optimizer.lr) 318 | 319 | 320 | def label_smoothing(inputs, epsilon=0.1): 321 | """目标平滑""" 322 | output_dim = inputs.shape[-1] 323 | smooth_label = (1 - epsilon) * inputs + (epsilon / output_dim) 324 | return smooth_label 325 | 326 | -------------------------------------------------------------------------------- /deep_recommenders/keras/models/retrieval/factorized_top_k.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | from typing import Dict, Optional, Text, Tuple, Union, Sequence, List 4 | 5 | import abc 6 | import contextlib 7 | import tensorflow as tf 8 | 9 | import faiss 10 | import numpy as np 11 | 12 | 13 | @contextlib.contextmanager 14 | def _wrap_batch_too_small_error(k: int): 15 | """ Candidate batch too small error """ 16 | try: 17 | yield 18 | except tf.errors.InvalidArgumentError as e: 19 | error_msg = str(e) 20 | if "input must have at least k columns" in error_msg: 21 | raise ValueError("Tried to retrieve k={k} top items, but candidate batch too small." 22 | "To resolve this, 1. increase batch-size, 2. set `drop_remainder`=True, " 23 | "3. set `handle_incomplete_batches`=True in constructor.".format(k=k)) 24 | 25 | 26 | def _take_long_axis(arr: tf.Tensor, indices: tf.Tensor) -> tf.Tensor: 27 | """从原始数据arr中,根据indices指定的下标,取出元素 28 | Args: 29 | arr: 原始数据,2D 30 | indices: 下标,2D 31 | Returns: 32 | 根据下标取出的数据,2D 33 | """ 34 | row_indices = tf.tile( 35 | tf.expand_dims(tf.range(tf.shape(indices)[0]), 1), 36 | [1, tf.shape(indices)[1]]) 37 | gather_indices = tf.concat( 38 | [tf.reshape(row_indices, (-1, 1)), 39 | tf.reshape(indices, (-1, 1))], axis=1) 40 | 41 | return tf.reshape(tf.gather_nd(arr, gather_indices), tf.shape(indices)) 42 | 43 | 44 | def _exclude(scores: tf.Tensor, 45 | identifiers: tf.Tensor, 46 | exclude: tf.Tensor, 47 | k: int) -> Tuple[tf.Tensor, tf.Tensor]: 48 | """从TopK中的items移除指定的候选item 49 | Args: 50 | scores: candidate scores. 2D 51 | identifiers: candidate identifiers. 2D 52 | exclude: identifiers to exclude. 2D 53 | k: 返回候选个数 54 | Returns: 55 | Tuple(top k candidates scores, top k candidates indentifiers) 56 | """ 57 | indents = tf.expand_dims(identifiers, -1) 58 | exclude = tf.expand_dims(exclude, 1) 59 | 60 | isin = tf.math.reduce_any(tf.math.equal(indents, exclude), -1) 61 | 62 | adjusted_scores = (scores - tf.cast(isin, tf.float32) * 1.0e5) 63 | 64 | k = tf.math.minimum(k, tf.shape(scores)[1]) 65 | 66 | _, indices = tf.math.top_k(adjusted_scores, k=k) 67 | return _take_long_axis(scores, indices), _take_long_axis(identifiers, indices) 68 | 69 | 70 | class TopK(tf.keras.Model, abc.ABC): 71 | """TopK layer 接口 72 | 注意,必须实现两个方法 73 | 1、index: 创建索引 74 | 2、call: 检索索引 75 | """ 76 | 77 | def __init__(self, k: int, *args, **kwargs): 78 | super().__init__(*args, **kwargs) 79 | 80 | self._k = k 81 | 82 | @abc.abstractmethod 83 | def index(self, 84 | candidates: Union[tf.Tensor, tf.data.Dataset], 85 | identifiers: Optional[Union[tf.Tensor, tf.data.Dataset]] = None) -> "TopK": 86 | """创建索引 87 | args: 88 | candidates: 候选 embeddings 89 | identifiers: 候选 embeddings对应标识 (Opt) 90 | returns: 91 | Self. 92 | """ 93 | 94 | raise NotImplementedError("Implementers must provide `index` method.") 95 | 96 | @abc.abstractmethod 97 | def call(self, 98 | queries: Union[tf.Tensor, Dict[Text, tf.Tensor]], 99 | k: Optional[int] = None, 100 | **kwargs) -> Tuple[tf.Tensor, tf.Tensor]: 101 | """检索索引 102 | args: 103 | queries: queries embeddings, 104 | k: 返回候选个数 105 | returns: 106 | Tuple(top k candidates scores, top k candidates indentifiers) 107 | """ 108 | 109 | raise NotImplementedError() 110 | 111 | @tf.function 112 | def query_with_exclusions( # pylint: disable=method-hidden 113 | self, 114 | queries: Union[tf.Tensor, Dict[Text, tf.Tensor]], 115 | exclusions: tf.Tensor, 116 | k: Optional[int] = None) -> Tuple[tf.Tensor, tf.Tensor]: 117 | """检索索引并过滤exclusions 118 | Args: 119 | queries: queries embeddings, 120 | exclusions: candidates identifiers. 从TopK的候选集中过滤指定的item. 121 | k: 返回候选个数 122 | Returns: 123 | Tuple(top k candidates scores, top k candidates indetifiers) 124 | """ 125 | k = k if k is not None else self._k 126 | 127 | adjusted_k = k + exclusions.shape[1] 128 | scores, identifiers = self(queries=queries, k=adjusted_k) 129 | return _exclude(scores, identifiers, exclusions, adjusted_k) 130 | 131 | def _reset_tf_function_cache(self): 132 | """Resets the tf.function cache.""" 133 | 134 | if hasattr(self.query_with_exclusions, "python_function"): 135 | self.query_with_exclusions = tf.function( 136 | self.query_with_exclusions.python_function) 137 | 138 | 139 | class Streaming(TopK): 140 | """Retrieves top k scoring items and identifiers from large dataset.""" 141 | 142 | def __init__(self, 143 | k: int = 10, 144 | query_model: Optional[tf.keras.Model] = None, 145 | handle_incomplete_batches: bool = True, 146 | num_parallel_calls: int = tf.data.experimental.AUTOTUNE, 147 | sorted_order: bool = True, 148 | *args, 149 | **kwargs): 150 | super().__init__(k, *args, **kwargs) 151 | 152 | self._query_model = query_model 153 | self._handle_incomplete_batches = handle_incomplete_batches 154 | self._num_parallel_calls = num_parallel_calls 155 | self._sorted_order = sorted_order 156 | 157 | self._candidates = None 158 | self._identifiers = None 159 | 160 | self._counter = self.add_weight("counter", dtype=tf.int32, trainable=False) 161 | 162 | def index(self, 163 | candidates: tf.data.Dataset, 164 | identifiers: Optional[tf.data.Dataset] = None, 165 | **kwargs) -> "Streaming": 166 | """构建索引 167 | Args: 168 | candidates: 候选embeddings的Dataset 169 | identifiers: 候选 embeddings对应标识的Dataset(Opt) 170 | Returns: 171 | Self. 172 | """ 173 | self._candidates = candidates 174 | self._identifiers = identifiers 175 | 176 | return self 177 | 178 | def call(self, 179 | queries: Union[tf.Tensor, Dict[Text, tf.Tensor]], 180 | k: Optional[int] = None, 181 | **kwargs) -> Tuple[tf.Tensor, tf.Tensor]: 182 | """检索索引 183 | args: 184 | queries: queries embeddings, 185 | k: 返回候选个数 186 | returns: 187 | Tuple(top k candidates scores, top k candidates identifiers) 188 | """ 189 | k = k if k is not None else self._k 190 | 191 | if self._candidates is None: 192 | raise ValueError("The `index` method must be called first to " 193 | "create the retrieval index.") 194 | 195 | if self._query_model is not None: 196 | queries = self._query_model(queries) 197 | 198 | # 重置计数器 199 | self._counter.assign(0) 200 | 201 | def top_scores(candidate_index: tf.Tensor, 202 | candidate_batch: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: 203 | """计算一个batch的候选集中的topK的scores和indices""" 204 | scores = tf.matmul(queries, candidate_batch, transpose_b=True) 205 | 206 | if self._handle_incomplete_batches is True: 207 | k_ = tf.math.minimum(k, tf.shape(scores)[1]) 208 | else: 209 | k_ = k 210 | 211 | scores, indices = tf.math.top_k(scores, k=k_, sorted=self._sorted_order) 212 | 213 | return scores, tf.gather(candidate_index, indices) 214 | 215 | def top_k(state: Tuple[tf.Tensor, tf.Tensor], 216 | x: Tuple[tf.Tensor, tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]: 217 | """Reduction function. 218 | 合并现在的topk和新的topk,重新从中选出topk 219 | """ 220 | state_scores, state_indices = state 221 | x_scores, x_indices = x 222 | 223 | joined_scores = tf.concat([state_scores, x_scores], axis=1) 224 | joined_indices = tf.concat([state_indices, x_indices], axis=1) 225 | 226 | if self._handle_incomplete_batches is True: 227 | k_ = tf.math.minimum(k, tf.shape(joined_scores)[1]) 228 | else: 229 | k_ = k 230 | 231 | scores, indices = tf.math.top_k(joined_scores, k=k_, sorted=self._sorted_order) 232 | 233 | return scores, tf.gather(joined_indices, indices, batch_dims=1) 234 | 235 | # 初始化state 236 | if self._identifiers is not None: 237 | index_dtype = self._identifiers.element_spec.dtype 238 | else: 239 | index_dtype = tf.int32 240 | 241 | initial_state = (tf.zeros((tf.shape(queries)[0], 0), dtype=tf.float32), 242 | tf.zeros((tf.shape(queries)[0], 0), dtype=index_dtype)) 243 | 244 | def enumerate_rows(batch: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: 245 | """Enumerates rows in each batch using a total element counter.""" 246 | starting_counter = self._counter.read_value() 247 | end_counter = self._counter.assign_add(tf.shape(batch)[0]) 248 | 249 | return tf.range(starting_counter, end_counter), batch 250 | 251 | if self._identifiers is not None: 252 | dataset = tf.data.Dataset.zip((self._identifiers, self._candidates)) 253 | else: 254 | dataset = self._candidates.map(enumerate_rows) 255 | 256 | with _wrap_batch_too_small_error(k): 257 | result = (dataset 258 | .map(top_scores, num_parallel_calls=self._num_parallel_calls) # Map: 计算每个batch的TopK 259 | .reduce(initial_state, top_k)) # Reduce: 计算全局TopK 260 | return result 261 | 262 | 263 | class BruteForce(TopK): 264 | """暴力检索""" 265 | 266 | def __init__(self, 267 | k: int = 10, 268 | query_model: Optional[tf.keras.Model] = None, 269 | *args, 270 | **kwargs): 271 | super().__init__(k, *args, **kwargs) 272 | 273 | self._query_model = query_model 274 | 275 | def index(self, 276 | candidates: Union[tf.Tensor, tf.data.Dataset], 277 | identifiers: Optional[Union[tf.Tensor, tf.data.Dataset]] = None) -> "BruteForce": 278 | 279 | if isinstance(candidates, tf.data.Dataset): 280 | candidates = tf.concat(list(candidates), axis=0) 281 | 282 | if identifiers is None: 283 | identifiers = tf.range(candidates.shape[0]) 284 | 285 | if isinstance(identifiers, tf.data.Dataset): 286 | identifiers = tf.concat(list(identifiers), axis=0) 287 | 288 | if tf.rank(candidates) != 2: 289 | raise ValueError("`candidates` ndim should be 2. " 290 | "Got `ndim` = {}".format(tf.rank(candidates))) 291 | 292 | self._candidates = self.add_weight( 293 | name="candidates", 294 | dtype=candidates.dtype, 295 | shape=candidates.shape, 296 | initializer=tf.keras.initializers.Zeros(), 297 | trainable=False 298 | ) 299 | 300 | identifiers_initial_value = tf.zeros((), dtype=identifiers.dtype) 301 | 302 | self._identifiers = self.add_weight( 303 | name="identifiers", 304 | dtype=identifiers.dtype, 305 | shape=identifiers.shape, 306 | initializer=tf.keras.initializers.Constant(value=identifiers_initial_value), 307 | trainable=False 308 | ) 309 | 310 | self._candidates.assign(candidates) 311 | self._identifiers.assign(identifiers) 312 | 313 | self._reset_tf_function_cache() 314 | return self 315 | 316 | def call(self, 317 | queries: Union[tf.Tensor, Dict[Text, tf.Tensor]], 318 | k: Optional[int] = None, 319 | **kwargs) -> Tuple[tf.Tensor, tf.Tensor]: 320 | 321 | k = k if k is not None else self._k 322 | 323 | if self._candidates is None: 324 | raise ValueError("The `index` method must be called first to " 325 | "create the retrieval index.") 326 | 327 | if self._query_model is not None: 328 | queries = self._query_model(queries) 329 | 330 | scores = tf.matmul(queries, self._candidates, transpose_b=True) 331 | 332 | scores, indices = tf.math.top_k(scores, k=k) 333 | 334 | return scores, tf.gather(self._identifiers, indices) 335 | 336 | 337 | class Faiss(TopK): 338 | """(Facebook)Faiss retrieval index for a factorized retrieval model""" 339 | 340 | def __init__(self, 341 | k: int = 10, 342 | query_model: Optional[tf.keras.Model] = None, 343 | nlist: Optional[int] = 1, 344 | nprobe: Optional[int] = 1, 345 | normalize: bool = False, 346 | *args, 347 | **kwargs): 348 | super().__init__(k, *args, **kwargs) 349 | 350 | self._query_model = query_model 351 | self._nlist = nlist 352 | self._nprobe = nprobe 353 | self._normalize = normalize 354 | 355 | def build_searcher( 356 | candidates: Union[np.ndarray, tf.Tensor], 357 | identifiers: Optional[Union[np.ndarray, tf.Tensor]] = None, 358 | ) -> Union[faiss.swigfaiss.IndexIDMap, faiss.swigfaiss.IndexIVFFlat]: 359 | 360 | if isinstance(candidates, tf.Tensor): 361 | candidates = candidates.numpy() 362 | 363 | if candidates.dtype != "float32": 364 | candidates = candidates.astype(np.float32) 365 | 366 | d = candidates.shape[1] 367 | quantizer = faiss.IndexFlatIP(d) 368 | index = faiss.IndexIVFFlat(quantizer, d, self._nlist, faiss.METRIC_INNER_PRODUCT) 369 | if self._normalize is True: 370 | faiss.normalize_L2(candidates) 371 | index.train(candidates) # pylint: disable=no-value-for-parameter 372 | 373 | if identifiers is not None: 374 | if isinstance(identifiers, tf.Tensor): 375 | identifiers = identifiers.numpy() 376 | if identifiers.dtype != np.int64: 377 | try: 378 | identifiers = identifiers.astype(np.int64) 379 | except: 380 | raise ValueError("`identifiers` dtype must be `int64`." 381 | "Got `dtype` = {}".format(identifiers.dtype)) 382 | 383 | index.add_with_ids(candidates, identifiers) # pylint: disable=no-value-for-parameter 384 | else: 385 | index.add(candidates) 386 | 387 | return index 388 | 389 | self._build_searcher = build_searcher 390 | self._searcher = None 391 | self._identifiers = None 392 | 393 | def index(self, 394 | candidates: Union[tf.Tensor, tf.data.Dataset], 395 | identifiers: Optional[Union[tf.Tensor, tf.data.Dataset]] = None) -> "Faiss": 396 | 397 | if isinstance(candidates, tf.data.Dataset): 398 | candidates = tf.concat(list(candidates), axis=0) 399 | 400 | if identifiers is None: 401 | identifiers = tf.range(candidates.shape[0]) 402 | 403 | if isinstance(identifiers, tf.data.Dataset): 404 | identifiers = tf.concat(list(identifiers), axis=0) 405 | 406 | if tf.rank(candidates) != 2: 407 | raise ValueError("`candidates` ndim should be 2. " 408 | "Got `ndim` = {}".format(tf.rank(candidates))) 409 | 410 | if identifiers.dtype not in ("int8", "int16", "int32", "int64"): 411 | self._searcher = self._build_searcher(candidates, identifiers=None) 412 | # 初始化identifiers 413 | identifiers_initial_value = tf.zeros((), dtype=identifiers.dtype) 414 | 415 | self._identifiers = self.add_weight( 416 | name="identifiers", 417 | dtype=identifiers.dtype, 418 | shape=identifiers.shape, 419 | initializer=tf.keras.initializers.Constant( 420 | value=identifiers_initial_value), 421 | trainable=False) 422 | self._identifiers.assign(identifiers) 423 | else: 424 | self._searcher = self._build_searcher(candidates, identifiers=identifiers) 425 | 426 | self._reset_tf_function_cache() 427 | 428 | return self 429 | 430 | def call(self, 431 | queries: Union[tf.Tensor, Dict[Text, tf.Tensor]], 432 | k: Optional[int] = None) -> Tuple[tf.Tensor, tf.Tensor]: 433 | 434 | k = k if k is not None else self._k 435 | 436 | if self._searcher is None: 437 | raise ValueError("The `index` method must be called first to " 438 | "create the retrieval index.") 439 | 440 | if self._query_model is not None: 441 | queries = self._query_model(queries) 442 | 443 | if not isinstance(queries, tf.Tensor): 444 | raise ValueError(f"Queries must be a tensor, got {type(queries)}.") 445 | 446 | def _search(queries, k): 447 | queries = tf.make_ndarray(tf.make_tensor_proto(queries)) 448 | 449 | if self._normalize is True: 450 | faiss.normalize_L2(queries) 451 | 452 | self._searcher.nprobe = self._nprobe 453 | distances, indices = self._searcher.search(queries, int(k)) 454 | return distances, indices 455 | 456 | distances, indices = tf.py_function(_search, [queries, k], [tf.float32, tf.int32]) 457 | 458 | if self._identifiers is None: 459 | return distances, indices 460 | 461 | return distances, tf.gather(self._identifiers, indices) 462 | 463 | 464 | class FactorizedTopK(tf.keras.layers.Layer): 465 | """ Metric for a retrieval model. """ 466 | 467 | def __init__(self, 468 | candidates: Union[TopK, tf.data.Dataset], 469 | metrics: Optional[Sequence[tf.keras.metrics.Metric]] = None, 470 | k: int = 100, 471 | name: Text = "factorized_top_k", 472 | **kwargs): 473 | super(FactorizedTopK, self).__init__(name=name, **kwargs) 474 | 475 | if metrics is None: 476 | metrics = [ 477 | tf.keras.metrics.TopKCategoricalAccuracy( 478 | k=n, name=f"{self.name}/top_{n}_categorical_accuracy") 479 | for n in [1, 5, 10, 50, 100] 480 | ] 481 | 482 | if isinstance(candidates, tf.data.Dataset): 483 | candidates = Streaming(k=k).index(candidates) 484 | 485 | self._candidates = candidates 486 | self._metrics = metrics 487 | self._k = k 488 | 489 | def update_state(self, 490 | query_embeddings: tf.Tensor, 491 | true_candidate_embeddings: tf.Tensor) -> tf.Operation: 492 | """Update metric""" 493 | 494 | positive_scores = tf.reduce_sum( 495 | query_embeddings * true_candidate_embeddings, axis=1, keepdims=True) 496 | 497 | top_k_predictions, _ = self._candidates(query_embeddings, k=self._k) 498 | 499 | y_true = tf.concat([ 500 | tf.ones(tf.shape(positive_scores)), 501 | tf.zeros_like(top_k_predictions) 502 | ], axis=1) 503 | y_pred = tf.concat([ 504 | positive_scores, 505 | top_k_predictions 506 | ], axis=1) 507 | 508 | update_ops = [] 509 | for metric in self._metrics: 510 | update_ops.append(metric.update_state(y_true=y_true, y_pred=y_pred)) 511 | 512 | return tf.group(update_ops) 513 | 514 | def reset_states(self) -> None: 515 | """Resets the metrics.""" 516 | for metric in self.metrics: 517 | metric.reset_states() 518 | 519 | def result(self) -> List[tf.Tensor]: 520 | """Returns a list of metric results.""" 521 | 522 | return [metric.result() for metric in self.metrics] 523 | --------------------------------------------------------------------------------