├── .github
└── workflows
│ ├── pylint-presubmit.yml
│ └── test.yaml
├── .gitignore
├── .pylintrc
├── CHANGELOG.md
├── CITATION.cff
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
├── alternate_dcn_structures.png
├── cross_features.gif
├── feature_crossing.png
├── full_logo.png
├── low_rank_dcn.png
├── parallel_deep_cross.png
├── stacked_deep_cross.png
└── stacked_structure.png
├── docs
├── _book.yaml
├── _index.yaml
└── examples
│ ├── _toc.yaml
│ ├── basic_ranking.ipynb
│ ├── basic_retrieval.ipynb
│ ├── context_features.ipynb
│ ├── dcn.ipynb
│ ├── deep_recommenders.ipynb
│ ├── diststrat_retrieval.ipynb
│ ├── efficient_serving.ipynb
│ ├── featurization.ipynb
│ ├── listwise_ranking.ipynb
│ ├── multitask.ipynb
│ ├── quickstart.ipynb
│ ├── ranking_tfx.ipynb
│ ├── sequential_retrieval.ipynb
│ └── tpu_embedding_layer.ipynb
├── requirements.txt
├── setup.py
├── tensorflow_recommenders
├── .flake8
├── __init__.py
├── dev_requirements.txt
├── examples
│ ├── __init__.py
│ ├── movielens.py
│ └── nbtool.py
├── experimental
│ ├── __init__.py
│ ├── layers
│ │ ├── __init__.py
│ │ └── embedding
│ │ │ ├── __init__.py
│ │ │ ├── partial_tpu_embedding.py
│ │ │ └── partial_tpu_embedding_test.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── ranking.py
│ │ └── ranking_test.py
│ └── optimizers
│ │ ├── __init__.py
│ │ ├── clippy_adagrad.py
│ │ ├── clippy_adagrad_test.py
│ │ ├── composite_optimizer.py
│ │ └── composite_optimizer_test.py
├── layers
│ ├── __init__.py
│ ├── blocks.py
│ ├── embedding
│ │ ├── __init__.py
│ │ ├── tpu_embedding_layer.py
│ │ └── tpu_embedding_layer_test.py
│ ├── factorized_top_k.py
│ ├── factorized_top_k_test.py
│ ├── feature_interaction
│ │ ├── __init__.py
│ │ ├── dcn.py
│ │ ├── dcn_test.py
│ │ ├── dot_interaction.py
│ │ ├── dot_interaction_test.py
│ │ ├── multi_layer_dcn.py
│ │ └── multi_layer_dcn_test.py
│ ├── loss.py
│ └── loss_test.py
├── metrics
│ ├── __init__.py
│ ├── factorized_top_k.py
│ └── factorized_top_k_test.py
├── models
│ ├── __init__.py
│ ├── base.py
│ └── base_test.py
├── public.py
├── tasks
│ ├── __init__.py
│ ├── base.py
│ ├── ranking.py
│ ├── ranking_test.py
│ ├── retrieval.py
│ └── retrieval_test.py
└── types.py
└── tools
├── build_api_docs.py
└── build_scripts
├── pip_install.sh
├── release.sh
├── test.sh
└── utils.sh
/.github/workflows/pylint-presubmit.yml:
--------------------------------------------------------------------------------
1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | name: PyLint
17 | on:
18 | pull_request:
19 | paths:
20 | - '**.py'
21 |
22 | jobs:
23 | build:
24 | name: PyLint
25 | runs-on: ubuntu-latest
26 | steps:
27 | - name: Checkout code
28 | uses: actions/checkout@v2
29 | - name: Get file changes
30 | id: get_file_changes
31 | uses: trilom/file-changes-action@v1.2.4
32 | with:
33 | output: ' '
34 | - name: Report list of changed files
35 | run: |
36 | echo Changed files: ${{ steps.get_file_changes.outputs.files }}
37 | - name: Set up Python 3.9
38 | uses: actions/setup-python@v2
39 | with:
40 | python-version: "3.9"
41 | - name: Install Python dependencies
42 | run: |
43 | python -m pip install --upgrade pip
44 | pip install pylint numpy wheel
45 | - name: Run PyLint on changed files
46 | run: |
47 | echo "${{ steps.get_file_changes.outputs.files}}" | tr " " "\n" | grep ".py$" | xargs pylint --rcfile=.pylintrc
48 |
--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
1 | name: TensorFlow Recommenders Test
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 |
8 | runs-on: ubuntu-latest
9 | strategy:
10 | matrix:
11 | python-version: [3.8]
12 |
13 | steps:
14 | - uses: actions/checkout@v2
15 | - name: Set up Python ${{ matrix.python-version }}
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: ${{ matrix.python-version }}
19 | - name: Install dependencies
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install flake8 pytest
23 | pip install -e .[docs]
24 | - name: Lint with flake8
25 | run: |
26 | # stop the build if there are Python syntax errors or undefined names
27 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
28 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
29 | # Disable flake checks initially.
30 | # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E111,E731,F401
31 | - name: Test with pytest
32 | run: |
33 | pytest
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## [unreleased][unreleased]
4 |
5 | ## [0.7.3][2023-02-02]
6 |
7 | ### Changed
8 |
9 | - The `Retrieval` task now accepts a list of factorized metrics instead of a
10 | single optional metric.
11 |
12 | ### Added
13 |
14 | - `tfrs.experimental.optimizers.ClippyAdagrad`: a new optimizer based on
15 | `tf.keras.optimizers.Adagrad` that is able to improve training stability.
16 | - `tfrs.metrics.FactorizedTopK` now accepts sample weights which are used to
17 | compute weighted top k metrics.
18 |
19 | ## [0.7.2][2022-09-28]
20 |
21 | - Improved support for using TPUEmbedding under parameter server strategy.
22 |
23 | ## [0.7.0][2022-07-07]
24 |
25 | A number of changes to make factorized top-K metric computation more accurate
26 | and less prone to user error.
27 |
28 | ### Changed
29 |
30 | - `tfrs.layers.embedding.TPUEmbedding` now supports input features with
31 | dynamic shape. `batch_size` argument is deprecated and no longer required.
32 |
33 | - `tfrs.layers.embedding.TPUEmbedding` now supports running on different
34 | versions of TPU.
35 |
36 | - Pinned TensorFlow to >= 2.9.0 which works with Scann 1.2.7.
37 |
38 | - `tfrs.tasks.Ranking.call` now accepts a `compute_batch_metrics` argument to
39 | allow switching off batch metric computation. Following this change,
40 | 'compute_metrics'argument does not impact computation of batch metrics.
41 |
42 | ### Breaking changes
43 |
44 | - `tfrs.metrics.FactorizedTopK` requires the candidate ids for positive
45 | candidates to be supplied when using approximate top-K sources. Each top-K
46 | layer now has an `exact` method to broadcast its ability to return exact or
47 | approximate top-K results.
48 | - Removed `metrics` constructor parameter for `tfrs.metrics.FactorizedTopK`.
49 | `FactorizedTopK` only makes sense with top-k metrics, and this change
50 | enforces this.
51 | - Replaced the `k` constructor argument in `tfrs.metrics.FactorizedTopK` with
52 | `ks`: a list of `k` values at which to compute the top k metric.
53 |
54 | ### Changed
55 |
56 | - The `tfrs.metrics.FactorizedTopK` metric can now compute candidate-id based
57 | metrics when given the `true_candidate_ids` argument in its `call` method.
58 |
59 | ### Added
60 |
61 | - The `Retrieval` task now also accepts a `loss_metrics` argument.
62 |
63 | ## [0.6.0][2021-08-23]
64 |
65 | ### Changed
66 |
67 | - Pinned TensorFlow to >= 2.6.0, which works with Scann 1.2.3.
68 |
69 | ### Breaking changes
70 |
71 | - `TopK` layer indexing API changed. Indexing with datasets is now done via
72 | the `index_from_dataset` method. This change reduces the possibility of
73 | misaligning embeddings and candidate identifiers when indexing via
74 | indeterministic datasets.
75 |
76 | ## [0.5.2][2021-07-15]
77 |
78 | ### Fixed
79 |
80 | - Fixed error in default arguments to `tfrs.experimental.models.Ranking`
81 | (https://github.com/tensorflow/recommenders/issues/311).
82 | - Fix TPUEmbedding layer to use named parameters.
83 |
84 | ### Added
85 |
86 | - Added `batch_metrics` to `tfrs.tasks.Retrieval` for measuring how good the
87 | model is at picking out the true candidate for a query from other candidates
88 | in the batch.
89 | - Added `tfrs.experimental.layers.embedding.PartialTPUEmbedding` layer, which
90 | uses `tfrs.layers.embedding.TPUEmbedding` for large embedding lookups and
91 | `tf.keras.layers.Embedding` for smaller embedding lookups.
92 |
93 | ## [0.5.1][2021-05-14]
94 |
95 | ### Changed
96 |
97 | - Supplying incompatibly-shaped candidates and identifiers inputs to
98 | `factorized_top_k` layers will now raise (to prevent issues similar to
99 | https://github.com/tensorflow/recommenders/issues/286).
100 |
101 | ## [0.5.0][2021-05-06]
102 |
103 | ### Changed
104 |
105 | - Fixed the bug in `tfrs.layers.loss.SamplingProbablityCorrection` that logits
106 | should subtract the log of item probability.
107 | - `tfrs.experimental.optimizers.CompositeOptimizer`: an optimizer that
108 | composes multiple individual optimizers which can be applied to different
109 | subsets of the model's variables.
110 | - `tfrs.layers.dcn.Cross` and `DotInteraction` layers have been moved to
111 | `tfrs.layers.feature_interaction` package.
112 |
113 | ### Added
114 |
115 | - `tfrs.experimental.models.Ranking`, an experimental pre-built model for
116 | ranking tasks. Can be used as DLRM like model with Dot Product feature
117 | interaction or DCN like model with Cross layer.
118 |
119 | ## [0.4.0][2021-01-20]
120 |
121 | ### Added
122 |
123 | - `TopK` layers now come with a `query_with_exclusions` method, allowing
124 | certain candidates to be excluded from top-k retrieval.
125 | - `TPUEmbedding` Keras layer for accelerating embedding lookups for large
126 | tables with TPU.
127 |
128 | ### Changed
129 |
130 | - `factorized_top_k.Streaming` layer now accepts a query model, like other
131 | `factorized_top_k` layers.
132 |
133 | - Updated ScaNN to 1.2.0, which requires TensorFlow 2.4.x. When not using
134 | ScaNN, any TF >= 2.3 is still supported.
135 |
136 | ## [0.3.2][2020-12-22]
137 |
138 | ### Changed
139 |
140 | - Pinned TensorFlow to >= 2.3 when ScaNN is not being installed. When ScaNN is
141 | being installed, we pin on >= 2.3, < 2.4. This allows users to use TFRS on
142 | TF 2.4 when they are not using ScaNN.
143 |
144 | ## [0.3.1][2020-12-22]
145 |
146 | ### Changed
147 |
148 | - Pinned TensorFlow to 2.3.x and ScaNN to 1.1.1 to ensure TF and ScaNN
149 | versions are in lockstep.
150 |
151 | ## [0.3.0][2020-11-18]
152 |
153 | ### Added
154 |
155 | - Deep cross networks: efficient ways of learning feature interactions.
156 | - ScaNN integration: efficient approximate maximum inner product search for
157 | fast retrieval.
158 |
159 | ## [0.2.0][2020-10-15]
160 |
161 | ### Added
162 |
163 | - `tfrs.tasks.Ranking.call` now accepts a `compute_metrics` argument to allow
164 | switching off metric computation.
165 | - `tfrs.tasks.Ranking` now accepts label and prediction metrics.
166 | - Add metrics setter/getters on `tfrs.tasks.Retrieval`.
167 |
168 | ### Breaking changes
169 |
170 | - Corpus retrieval metrics and layers have been reworked.
171 |
172 | `tfrs.layers.corpus.DatasetTopk` has been removed,
173 | `tfrs.layers.corpus.DatasetIndexedTopK` renamed to
174 | `tfrs.layers.factorized_top_k.Streaming`, `tfrs.layers.ann.BruteForce`
175 | renamed to `tfrs.layers.factorized_top_k.BruteForce`. All top-k retrieval
176 | layers (`BruteForce`, `Streaming`) now follow a common interface.
177 |
178 | ### Changed
179 |
180 | - `Dataset` parallelism enabled by default in `DatasetTopK` and
181 | `DatasetIndexedTopK` layers, bringing over 2x speed-ups to evaluations
182 | workloads.
183 | - `evaluate_metrics` argument to `tfrs.tasks.Retrieval.call` renamed to
184 | `compute_metrics`.
185 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 | - family-names: "Kula"
5 | given-names: "Maciej"
6 | - family-names: "Chen"
7 | given-names: "James"
8 | - family-names: "Yi"
9 | given-names: "Xinyang"
10 | - family-names: "Yao"
11 | given-names: "Tiansheng"
12 | - family-names: "Sathiamoorthy"
13 | given-names: "Maheswaran"
14 | - family-names: "Hong"
15 | given-names: "Lichan"
16 | - family-names: "Chi"
17 | given-names: "Ed"
18 | title: "TensorFlow Recommenders"
19 | date-released: 2020-09-16
20 | url: "https://github.com/tensorflow/recommenders"
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 |
14 |
15 | It helps with the full workflow of building a recommender system: data
16 | preparation, model formulation, training, evaluation, and deployment.
17 |
18 |
19 | It's built on Keras and aims to have a gentle learning curve while
20 | still giving you the flexibility to build complex models.
21 |
22 |
23 | TFRS makes it possible to:
24 |
41 | import tensorflow_datasets as tfds 42 | import tensorflow_recommenders as tfrs 43 | 44 | # Load data on movie ratings. 45 | ratings = tfds.load("movielens/100k-ratings", split="train") 46 | movies = tfds.load("movielens/100k-movies", split="train") 47 | 48 | # Build flexible representation models. 49 | user_model = tf.keras.Sequential([...]) 50 | movie_model = tf.keras.Sequential([...]) 51 | 52 | # Define your objectives. 53 | task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK( 54 | movies.batch(128).map(movie_model) 55 | ) 56 | ) 57 | 58 | # Create a retrieval model. 59 | model = MovielensModel(user_model, movie_model, task) 60 | model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5)) 61 | 62 | # Train. 63 | model.fit(ratings.batch(4096), epochs=3) 64 | 65 | # Set up retrieval using trained representations. 66 | index = tfrs.layers.ann.BruteForce(model.user_model) 67 | index.index_from_dataset( 68 | movies.batch(100).map(lambda title: (title, model.movie_model(title))) 69 | ) 70 | 71 | # Get recommendations. 72 | _, titles = index(np.array(["42"])) 73 | print(f"Recommendations for user 42: {titles[0, :3]}") 74 |75 | {% dynamic if request.tld != 'cn' %} 76 | Run in a Notebook 77 | {% dynamic endif %} 78 | 79 | - classname: devsite-landing-row-cards 80 | items: 81 | - heading: "Introduction to TensorFlow Recommenders" 82 | youtube_id: jz0-satrmrA 83 | buttons: 84 | - label: "Watch the introduction video" 85 | path: https://www.youtube.com/watch?v=jz0-satrmrA&list=PLQY2H8rRoyvy2MiyUBz5RWZr5MPFkV3qz&index=4 86 | - label: "Watch the video series" 87 | path: https://goo.gle/3Bi8NUS 88 | - heading: "TensorFlow Recommenders: Scalable retrieval and feature interaction modelling" 89 | image_path: /resources/images/google-research-card-16x9.png 90 | path: https://blog.tensorflow.org/2020/11/tensorflow-recommenders-scalable-retrieval-feature-interaction-modelling.html 91 | buttons: 92 | - label: "Read on TensorFlow blog" 93 | path: https://blog.tensorflow.org/2020/11/tensorflow-recommenders-scalable-retrieval-feature-interaction-modelling.html 94 | - heading: "Introducing TensorFlow Recommenders" 95 | image_path: /resources/images/tf-logo-card-16x9.png 96 | path: https://blog.tensorflow.org/2020/09/introducing-tensorflow-recommenders.html 97 | buttons: 98 | - label: "Read on TensorFlow blog" 99 | path: https://blog.tensorflow.org/2020/09/introducing-tensorflow-recommenders.html 100 | - classname: devsite-landing-row-cards 101 | items: 102 | - heading: "TensorFlow Recommenders on GitHub" 103 | image_path: /resources/images/github-card-16x9.png 104 | path: https://github.com/tensorflow/recommenders 105 | buttons: 106 | - label: "View on GitHub" 107 | path: https://github.com/tensorflow/recommenders 108 | - heading: "" 109 | options: 110 | - hidden 111 | - heading: "" 112 | options: 113 | - hidden 114 | -------------------------------------------------------------------------------- /docs/examples/_toc.yaml: -------------------------------------------------------------------------------- 1 | toc: 2 | - heading: "Beginner" 3 | style: divider 4 | - title: "Recommender basics" 5 | style: accordion 6 | section: 7 | - title: "Recommending movies: retrieval" 8 | path: /recommenders/examples/basic_retrieval 9 | - title: "Recommending movies: ranking" 10 | path: /recommenders/examples/basic_ranking 11 | - title: "Retrieval with distribution strategy" 12 | path: /recommenders/examples/diststrat_retrieval 13 | - title: "Retrieval with sequential model" 14 | path: /recommenders/examples/sequential_retrieval 15 | - title: "Retrieval with TFX" 16 | status: external 17 | path: /tfx/tutorials/tfx/recommenders 18 | - title: "Ranking with TFX" 19 | path: /recommenders/examples/ranking_tfx 20 | - title: "Large Embeddings with TPU" 21 | path: /recommenders/examples/tpu_embedding_layer 22 | - title: "Using rich features" 23 | style: accordion 24 | section: 25 | - title: "Feature preprocessing" 26 | path: /recommenders/examples/featurization 27 | - title: "Leveraging context features" 28 | path: /recommenders/examples/context_features 29 | - title: "Building deep retrieval models" 30 | path: /recommenders/examples/deep_recommenders 31 | - heading: "Intermediate" 32 | style: divider 33 | - title: "Multitask recommenders" 34 | path: /recommenders/examples/multitask 35 | - title: "Cross networks" 36 | path: /recommenders/examples/dcn 37 | - title: "Efficient serving" 38 | path: /recommenders/examples/efficient_serving 39 | - title: "Listwise ranking" 40 | path: /recommenders/examples/listwise_ranking 41 | -------------------------------------------------------------------------------- /docs/examples/quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "_dEaVsqSgNyQ" 7 | }, 8 | "source": [ 9 | "##### Copyright 2020 The TensorFlow Authors." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "cellView": "form", 17 | "id": "4FyfuZX-gTKS" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", 22 | "# you may not use this file except in compliance with the License.\n", 23 | "# You may obtain a copy of the License at\n", 24 | "#\n", 25 | "# https://www.apache.org/licenses/LICENSE-2.0\n", 26 | "#\n", 27 | "# Unless required by applicable law or agreed to in writing, software\n", 28 | "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", 29 | "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 30 | "# See the License for the specific language governing permissions and\n", 31 | "# limitations under the License." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": { 37 | "id": "sT8AyHRMNh41" 38 | }, 39 | "source": [ 40 | "# TensorFlow Recommenders: Quickstart\n", 41 | "\n", 42 | "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", 43 | " \u003ctd\u003e\n", 44 | " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/recommenders/quickstart\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", 45 | " \u003c/td\u003e\n", 46 | " \u003ctd\u003e\n", 47 | " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", 48 | " \u003c/td\u003e\n", 49 | " \u003ctd\u003e\n", 50 | " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", 51 | " \u003c/td\u003e\n", 52 | " \u003ctd\u003e\n", 53 | " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/recommenders/docs/examples/quickstart.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", 54 | " \u003c/td\u003e\n", 55 | "\u003c/table\u003e" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "8f-reQ11gbLB" 62 | }, 63 | "source": [ 64 | "In this tutorial, we build a simple matrix factorization model using the [MovieLens 100K dataset](https://grouplens.org/datasets/movielens/100k/) with TFRS. We can use this model to recommend movies for a given user." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "qA00wBE2Ntdm" 71 | }, 72 | "source": [ 73 | "### Import TFRS\n", 74 | "\n", 75 | "First, install and import TFRS:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "id": "6yzAaM85Z12D" 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "!pip install -q tensorflow-recommenders\n", 87 | "!pip install -q --upgrade tensorflow-datasets" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "id": "n3oYt3R6Nr9l" 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "from typing import Dict, Text\n", 99 | "\n", 100 | "import numpy as np\n", 101 | "import tensorflow as tf\n", 102 | "\n", 103 | "import tensorflow_datasets as tfds\n", 104 | "import tensorflow_recommenders as tfrs" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": { 110 | "id": "zCxQ1CZcO2wh" 111 | }, 112 | "source": [ 113 | "### Read the data" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "id": "M-mxBYjdO5m7" 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# Ratings data.\n", 125 | "ratings = tfds.load('movielens/100k-ratings', split=\"train\")\n", 126 | "# Features of all the available movies.\n", 127 | "movies = tfds.load('movielens/100k-movies', split=\"train\")\n", 128 | "\n", 129 | "# Select the basic features.\n", 130 | "ratings = ratings.map(lambda x: {\n", 131 | " \"movie_title\": x[\"movie_title\"],\n", 132 | " \"user_id\": x[\"user_id\"]\n", 133 | "})\n", 134 | "movies = movies.map(lambda x: x[\"movie_title\"])" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "id": "5W0HSfmSNCWm" 141 | }, 142 | "source": [ 143 | "Build vocabularies to convert user ids and movie titles into integer indices for embedding layers:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "id": "9I1VTEjHzpfX" 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)\n", 155 | "user_ids_vocabulary.adapt(ratings.map(lambda x: x[\"user_id\"]))\n", 156 | "\n", 157 | "movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)\n", 158 | "movie_titles_vocabulary.adapt(movies)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": { 164 | "id": "Lrch6rVBOB9Q" 165 | }, 166 | "source": [ 167 | "### Define a model\n", 168 | "\n", 169 | "We can define a TFRS model by inheriting from `tfrs.Model` and implementing the `compute_loss` method:" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "id": "e5dNbDZwOIHR" 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "class MovieLensModel(tfrs.Model):\n", 181 | " # We derive from a custom base class to help reduce boilerplate. Under the hood,\n", 182 | " # these are still plain Keras Models.\n", 183 | "\n", 184 | " def __init__(\n", 185 | " self,\n", 186 | " user_model: tf.keras.Model,\n", 187 | " movie_model: tf.keras.Model,\n", 188 | " task: tfrs.tasks.Retrieval):\n", 189 | " super().__init__()\n", 190 | "\n", 191 | " # Set up user and movie representations.\n", 192 | " self.user_model = user_model\n", 193 | " self.movie_model = movie_model\n", 194 | "\n", 195 | " # Set up a retrieval task.\n", 196 | " self.task = task\n", 197 | "\n", 198 | " def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -\u003e tf.Tensor:\n", 199 | " # Define how the loss is computed.\n", 200 | "\n", 201 | " user_embeddings = self.user_model(features[\"user_id\"])\n", 202 | " movie_embeddings = self.movie_model(features[\"movie_title\"])\n", 203 | "\n", 204 | " return self.task(user_embeddings, movie_embeddings)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "id": "wdwtgUCEOI8y" 211 | }, 212 | "source": [ 213 | "Define the two models and the retrieval task." 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "id": "EvtnUN6aUY4U" 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "# Define user and movie models.\n", 225 | "user_model = tf.keras.Sequential([\n", 226 | " user_ids_vocabulary,\n", 227 | " tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)\n", 228 | "])\n", 229 | "movie_model = tf.keras.Sequential([\n", 230 | " movie_titles_vocabulary,\n", 231 | " tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)\n", 232 | "])\n", 233 | "\n", 234 | "# Define your objectives.\n", 235 | "task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(\n", 236 | " movies.batch(128).map(movie_model)\n", 237 | " )\n", 238 | ")" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": { 244 | "id": "BMV0HpzmJGWk" 245 | }, 246 | "source": [ 247 | "\n", 248 | "### Fit and evaluate it.\n", 249 | "\n", 250 | "Create the model, train it, and generate predictions:\n", 251 | "\n" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": { 258 | "id": "H2tQDhqkOKf1" 259 | }, 260 | "outputs": [], 261 | "source": [ 262 | "# Create a retrieval model.\n", 263 | "model = MovieLensModel(user_model, movie_model, task)\n", 264 | "model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))\n", 265 | "\n", 266 | "# Train for 3 epochs.\n", 267 | "model.fit(ratings.batch(4096), epochs=3)\n", 268 | "\n", 269 | "# Use brute-force search to set up retrieval using the trained representations.\n", 270 | "index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)\n", 271 | "index.index_from_dataset(\n", 272 | " movies.batch(100).map(lambda title: (title, model.movie_model(title))))\n", 273 | "\n", 274 | "# Get some recommendations.\n", 275 | "_, titles = index(np.array([\"42\"]))\n", 276 | "print(f\"Top 3 recommendations for user 42: {titles[0, :3]}\")" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "id": "neJAJVwbReNd" 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "" 288 | ] 289 | } 290 | ], 291 | "metadata": { 292 | "colab": { 293 | "collapsed_sections": [], 294 | "name": "quickstart.ipynb", 295 | "private_outputs": true, 296 | "provenance": [], 297 | "toc_visible": true 298 | }, 299 | "kernelspec": { 300 | "display_name": "Python 3", 301 | "language": "python", 302 | "name": "python3" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 0 307 | } 308 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 2.9.0; sys_platform != 'darwin' 2 | tensorflow-macos >= 2.9.0; sys_platform == 'darwin' 3 | absl-py >= 0.1.6 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """TensorFlow Recommenders, a TensorFlow library for recommender systems.""" 16 | 17 | import pathlib 18 | import setuptools 19 | 20 | VERSION = "0.7.3" 21 | 22 | long_description = (pathlib.Path(__file__).parent 23 | .joinpath("README.md") 24 | .read_text()) 25 | 26 | setuptools.setup( 27 | name="tensorflow-recommenders", 28 | version=VERSION, 29 | description="Tensorflow Recommenders, a TensorFlow library for recommender systems.", 30 | long_description=long_description, 31 | long_description_content_type="text/markdown", 32 | url="https://github.com/tensorflow/recommenders", 33 | author="Google Inc.", 34 | author_email="packages@tensorflow.org", 35 | packages=setuptools.find_packages(), 36 | install_requires=pathlib.Path("requirements.txt").read_text().splitlines(), 37 | extras_require={ 38 | "docs": ["fire", "annoy", "scann == 1.2.*", "tensorflow-ranking"], 39 | }, 40 | # PyPI package information. 41 | classifiers=[ 42 | "Development Status :: 3 - Alpha", 43 | "Intended Audience :: Developers", 44 | "Intended Audience :: Education", 45 | "Intended Audience :: Science/Research", 46 | "License :: OSI Approved :: Apache Software License", 47 | "Programming Language :: Python :: 3", 48 | "Programming Language :: Python :: 3.8", 49 | "Programming Language :: Python :: 3.9", 50 | "Programming Language :: Python :: 3.10", 51 | "Topic :: Scientific/Engineering", 52 | "Topic :: Scientific/Engineering :: Mathematics", 53 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 54 | "Topic :: Software Development", 55 | "Topic :: Software Development :: Libraries", 56 | "Topic :: Software Development :: Libraries :: Python Modules", 57 | ], 58 | license="Apache 2.0", 59 | keywords="tensorflow recommenders recommendations", 60 | ) 61 | -------------------------------------------------------------------------------- /tensorflow_recommenders/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/tensorflow_recommenders/.flake8 -------------------------------------------------------------------------------- /tensorflow_recommenders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """TensorFlow Recommenders is a library for building recommender system models. 16 | 17 | It helps with the full workflow of building a recommender system: data 18 | preparation, model formulation, training, evaluation, and deployment. 19 | 20 | It's built on Keras and aims to have a gentle learning curve while still giving 21 | you the flexibility to build complex models. 22 | """ 23 | 24 | __version__ = "v0.7.3" 25 | 26 | from tensorflow_recommenders import examples 27 | from tensorflow_recommenders import experimental 28 | # Internal extension library import. 29 | from tensorflow_recommenders import layers 30 | from tensorflow_recommenders import metrics 31 | from tensorflow_recommenders import models 32 | from tensorflow_recommenders import tasks 33 | from tensorflow_recommenders import types 34 | 35 | 36 | Model = models.Model 37 | -------------------------------------------------------------------------------- /tensorflow_recommenders/dev_requirements.txt: -------------------------------------------------------------------------------- 1 | # Building docs. 2 | fire 3 | git+https://github.com/tensorflow/docs 4 | -------------------------------------------------------------------------------- /tensorflow_recommenders/examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Functions used in examples.""" 17 | 18 | from tensorflow_recommenders.examples import movielens 19 | -------------------------------------------------------------------------------- /tensorflow_recommenders/examples/movielens.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Functions supporting Movielens examples.""" 16 | 17 | import array 18 | import collections 19 | 20 | from typing import Dict, List, Optional, Text, Tuple 21 | 22 | import numpy as np 23 | import tensorflow as tf 24 | 25 | 26 | def evaluate(user_model: tf.keras.Model, 27 | movie_model: tf.keras.Model, 28 | test: tf.data.Dataset, 29 | movies: tf.data.Dataset, 30 | train: Optional[tf.data.Dataset] = None, 31 | k: int = 10) -> Dict[Text, float]: 32 | """Evaluates a Movielens model on the supplied datasets. 33 | 34 | Args: 35 | user_model: User representation model. 36 | movie_model: Movie representation model. 37 | test: Test dataset. 38 | movies: Dataset of movies. 39 | train: Training dataset. If supplied, recommendations for training watches 40 | will be removed. 41 | k: The cutoff value at which to compute precision and recall. 42 | 43 | Returns: 44 | Dictionary of metrics. 45 | """ 46 | 47 | movie_ids = np.concatenate( 48 | list(movies.batch(1000).map(lambda x: x["movie_id"]).as_numpy_iterator())) 49 | 50 | movie_vocabulary = dict(zip(movie_ids.tolist(), range(len(movie_ids)))) 51 | 52 | train_user_to_movies = collections.defaultdict(lambda: array.array("i")) 53 | test_user_to_movies = collections.defaultdict(lambda: array.array("i")) 54 | 55 | if train is not None: 56 | for row in train.as_numpy_iterator(): 57 | user_id = row["user_id"] 58 | movie_id = movie_vocabulary[row["movie_id"]] 59 | train_user_to_movies[user_id].append(movie_id) 60 | 61 | for row in test.as_numpy_iterator(): 62 | user_id = row["user_id"] 63 | movie_id = movie_vocabulary[row["movie_id"]] 64 | test_user_to_movies[user_id].append(movie_id) 65 | 66 | movie_embeddings = np.concatenate( 67 | list(movies.batch(4096).map( 68 | lambda x: movie_model({"movie_id": x["movie_id"]}) 69 | ).as_numpy_iterator())) 70 | 71 | precision_values = [] 72 | recall_values = [] 73 | 74 | for user_id, test_movies in test_user_to_movies.items(): 75 | user_embedding = user_model({"user_id": np.array([user_id])}).numpy() 76 | scores = (user_embedding @ movie_embeddings.T).flatten() 77 | 78 | test_movies = np.frombuffer(test_movies, dtype=np.int32) 79 | 80 | if train is not None: 81 | train_movies = np.frombuffer( 82 | train_user_to_movies[user_id], dtype=np.int32) 83 | scores[train_movies] = -1e6 84 | 85 | top_movies = np.argsort(-scores)[:k] 86 | num_test_movies_in_k = sum(x in top_movies for x in test_movies) 87 | precision_values.append(num_test_movies_in_k / k) 88 | recall_values.append(num_test_movies_in_k / len(test_movies)) 89 | 90 | return { 91 | "precision_at_k": np.mean(precision_values), 92 | "recall_at_k": np.mean(recall_values) 93 | } 94 | 95 | 96 | def _create_feature_dict() -> Dict[Text, List[tf.Tensor]]: 97 | """Helper function for creating an empty feature dict for defaultdict.""" 98 | return {"movie_title": [], "user_rating": []} 99 | 100 | 101 | def _sample_list( 102 | feature_lists: Dict[Text, List[tf.Tensor]], 103 | num_examples_per_list: int, 104 | random_state: Optional[np.random.RandomState] = None, 105 | ) -> Tuple[tf.Tensor, tf.Tensor]: 106 | """Function for sampling a list example from given feature lists.""" 107 | if random_state is None: 108 | random_state = np.random.RandomState() 109 | 110 | sampled_indices = random_state.choice( 111 | range(len(feature_lists["movie_title"])), 112 | size=num_examples_per_list, 113 | replace=False, 114 | ) 115 | sampled_movie_titles = [ 116 | feature_lists["movie_title"][idx] for idx in sampled_indices 117 | ] 118 | sampled_ratings = [ 119 | feature_lists["user_rating"][idx] 120 | for idx in sampled_indices 121 | ] 122 | 123 | return ( 124 | tf.stack(sampled_movie_titles, 0), 125 | tf.stack(sampled_ratings, 0), 126 | ) 127 | 128 | 129 | def sample_listwise( 130 | rating_dataset: tf.data.Dataset, 131 | num_list_per_user: int = 10, 132 | num_examples_per_list: int = 10, 133 | seed: Optional[int] = None, 134 | ) -> tf.data.Dataset: 135 | """Function for converting the MovieLens 100K dataset to a listwise dataset. 136 | 137 | Args: 138 | rating_dataset: 139 | The MovieLens ratings dataset loaded from TFDS with features 140 | "movie_title", "user_id", and "user_rating". 141 | num_list_per_user: 142 | An integer representing the number of lists that should be sampled for 143 | each user in the training dataset. 144 | num_examples_per_list: 145 | An integer representing the number of movies to be sampled for each list 146 | from the list of movies rated by the user. 147 | seed: 148 | An integer for creating `np.random.RandomState`. 149 | 150 | Returns: 151 | A tf.data.Dataset containing list examples. 152 | 153 | Each example contains three keys: "user_id", "movie_title", and 154 | "user_rating". "user_id" maps to a string tensor that represents the user 155 | id for the example. "movie_title" maps to a tensor of shape 156 | [sum(num_example_per_list)] with dtype tf.string. It represents the list 157 | of candidate movie ids. "user_rating" maps to a tensor of shape 158 | [sum(num_example_per_list)] with dtype tf.float32. It represents the 159 | rating of each movie in the candidate list. 160 | """ 161 | random_state = np.random.RandomState(seed) 162 | 163 | example_lists_by_user = collections.defaultdict(_create_feature_dict) 164 | 165 | movie_title_vocab = set() 166 | for example in rating_dataset: 167 | user_id = example["user_id"].numpy() 168 | example_lists_by_user[user_id]["movie_title"].append( 169 | example["movie_title"]) 170 | example_lists_by_user[user_id]["user_rating"].append( 171 | example["user_rating"]) 172 | movie_title_vocab.add(example["movie_title"].numpy()) 173 | 174 | tensor_slices = {"user_id": [], "movie_title": [], "user_rating": []} 175 | 176 | for user_id, feature_lists in example_lists_by_user.items(): 177 | for _ in range(num_list_per_user): 178 | 179 | # Drop the user if they don't have enough ratings. 180 | if len(feature_lists["movie_title"]) < num_examples_per_list: 181 | continue 182 | 183 | sampled_movie_titles, sampled_ratings = _sample_list( 184 | feature_lists, 185 | num_examples_per_list, 186 | random_state=random_state, 187 | ) 188 | tensor_slices["user_id"].append(user_id) 189 | tensor_slices["movie_title"].append(sampled_movie_titles) 190 | tensor_slices["user_rating"].append(sampled_ratings) 191 | 192 | return tf.data.Dataset.from_tensor_slices(tensor_slices) 193 | -------------------------------------------------------------------------------- /tensorflow_recommenders/examples/nbtool.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tools for cleaning and testing notebooks.""" 17 | 18 | import glob 19 | import os 20 | import subprocess 21 | import tempfile 22 | 23 | from typing import Text 24 | 25 | import fire 26 | import nbformat 27 | 28 | 29 | def clean_cell(cell): 30 | """Cleans a cell.""" 31 | metadata = cell.metadata 32 | 33 | for key in ("pinned", "imported_from", "executionInfo", "outputId"): 34 | if key in metadata: 35 | del metadata[key] 36 | 37 | if cell["cell_type"] == "code": 38 | cell["execution_count"] = 0 39 | 40 | 41 | def clean_notebook(notebook): 42 | """Cleans a notebook.""" 43 | colab = notebook["metadata"]["colab"] 44 | 45 | for key in ("defaultview", "views", "last_runtime", "provenance"): 46 | if key in colab: 47 | del colab[key] 48 | 49 | for cell in notebook.cells: 50 | clean_cell(cell) 51 | 52 | return notebook 53 | 54 | 55 | class NBTool: 56 | """Tool for checking and cleaning notebooks.""" 57 | 58 | def format(self, path): 59 | """Formats notebooks.""" 60 | 61 | for notebook_path in glob.glob(os.path.join(path, "*ipynb")): 62 | print(f"Formatting {notebook_path}") 63 | 64 | with open(notebook_path, "r") as notebook_file: 65 | notebook = nbformat.read(notebook_file, as_version=4) 66 | 67 | with open(notebook_path, "w") as notebook_file: 68 | nbformat.write(notebook, notebook_file) 69 | 70 | def clean(self, path: Text): 71 | """Cleans notebooks.""" 72 | for notebook_path in glob.glob(os.path.join(path, "*ipynb")): 73 | print(f"Cleaning {notebook_path}") 74 | 75 | with open(notebook_path, "r") as notebook_file: 76 | notebook = nbformat.read(notebook_file, as_version=4) 77 | 78 | notebook = clean_notebook(notebook) 79 | 80 | with open(notebook_path, "w") as notebook_file: 81 | nbformat.write(notebook, notebook_file) 82 | 83 | def check(self, path: Text): 84 | """Executes a notebook, checking for execution errors.""" 85 | 86 | with tempfile.NamedTemporaryFile(mode="w", delete=True) as fle: 87 | fname = fle.name 88 | args = ["jupyter", "nbconvert", "--to", "notebook", "--execute", 89 | "--ExecutePreprocessor.timeout=600", 90 | "--ExecutePreprocessor.kernel_name=python3", 91 | "--output", fname, path] 92 | 93 | try: 94 | subprocess.check_output(args, stderr=subprocess.STDOUT) 95 | except subprocess.CalledProcessError as e: 96 | raise Exception( 97 | f"Execution of notebook {path} failed: {e.stdout, e.stderr}.") 98 | 99 | def check_all(self, path: Text): 100 | """Runs all notebooks under path.""" 101 | for notebook_path in glob.glob(os.path.join(path, "*ipynb")): 102 | print(f"Executing {notebook_path}") 103 | 104 | self.check(notebook_path) 105 | 106 | 107 | if __name__ == "__main__": 108 | fire.Fire(NBTool, name="nbtool") 109 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Experimental APIs.""" 16 | 17 | from tensorflow_recommenders.experimental import layers 18 | from tensorflow_recommenders.experimental import models 19 | from tensorflow_recommenders.experimental import optimizers 20 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Experimental layers APIs.""" 16 | 17 | from tensorflow_recommenders.experimental.layers import embedding 18 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/layers/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Experimental embedding layers.""" 16 | 17 | from tensorflow_recommenders.experimental.layers.embedding.partial_tpu_embedding import PartialTPUEmbedding 18 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/layers/embedding/partial_tpu_embedding.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Embedding layer for the Ranking model.""" 16 | 17 | from typing import Dict, Optional, Union 18 | 19 | import tensorflow as tf 20 | 21 | from tensorflow_recommenders.layers.embedding.tpu_embedding_layer import TPUEmbedding 22 | 23 | Tensor = Union[tf.Tensor, tf.SparseTensor, tf.RaggedTensor] 24 | 25 | 26 | class PartialTPUEmbedding(tf.keras.layers.Layer): 27 | """Partial TPU Embedding layer. 28 | 29 | This layer is composed of `tfrs.layers.embedding.TPUEmbedding` and 30 | `tf.keras.layers.Embedding` embedding layers. When training on TPUs, it is 31 | preferable to use TPU Embedding layers for large tables (as they are sharded 32 | accross TPU cores) and Keras embedding layer for small tables. 33 | For tables with vocab sizes less than `size_threshold` a Keras embedding 34 | layer will be used, above that threshold a TPU embedding layer will be used. 35 | 36 | This layer will be applied on a dictionary of feature_name, categorical_tensor 37 | pairs and return a dictionary of string-to-tensor of feature_name, 38 | embedded_value pairs. 39 | """ 40 | 41 | def __init__(self, 42 | feature_config, 43 | optimizer: tf.keras.optimizers.Optimizer, 44 | pipeline_execution_with_tensor_core: bool = False, 45 | batch_size: Optional[int] = None, 46 | size_threshold: Optional[int] = 10_000) -> None: 47 | """Initializes the embedding layer. 48 | 49 | Args: 50 | feature_config: A nested structure of 51 | `tf.tpu.experimental.embedding.FeatureConfig` configs. 52 | optimizer: An optimizer used for TPU embeddings. 53 | pipeline_execution_with_tensor_core: If True, the TPU embedding 54 | computations will overlap with the TensorCore computations (and hence 55 | will be one step old with potential correctness drawbacks). Set to True 56 | for improved performance. 57 | batch_size: If set, this will be used as the global batch size and 58 | override the autodetection of the batch size from the layer's input. 59 | This is necesarry if all inputs to the layer's call are SparseTensors. 60 | size_threshold: A threshold for table sizes below which a Keras embedding 61 | layer is used, and above which a TPU embedding layer is used. 62 | Set `size_threshold=0` to use TPU embedding for all tables and 63 | `size_threshold=None` to use only Keras embeddings. 64 | """ 65 | super().__init__() 66 | 67 | tpu_feature_config = {} 68 | table_to_keras_emb = {} 69 | self._keras_embedding_layers = {} 70 | 71 | for name, embedding_feature_config in feature_config.items(): 72 | table_config = embedding_feature_config.table 73 | if size_threshold is not None and table_config.vocabulary_size > size_threshold: 74 | # TPUEmbedding layer. 75 | tpu_feature_config[name] = embedding_feature_config 76 | continue 77 | 78 | # Keras layer. 79 | # Multiple features can reuse the same table. 80 | if table_config not in table_to_keras_emb: 81 | table_to_keras_emb[table_config] = tf.keras.layers.Embedding( 82 | input_dim=table_config.vocabulary_size, 83 | output_dim=table_config.dim, 84 | embeddings_initializer=table_config.initializer or "uniform", 85 | ) 86 | self._keras_embedding_layers[name] = table_to_keras_emb[table_config] 87 | 88 | self._tpu_embedding = None 89 | if tpu_feature_config: 90 | self._tpu_embedding = TPUEmbedding( 91 | tpu_feature_config, optimizer, pipeline_execution_with_tensor_core 92 | ) 93 | 94 | def call(self, inputs: Dict[str, Tensor]) -> Dict[str, tf.Tensor]: 95 | """Computes the output of the embedding layer. 96 | 97 | It expects a string-to-tensor (or SparseTensor/RaggedTensor) dict as input, 98 | and outputs a dictionary of string-to-tensor of feature_name, embedded_value 99 | pairs. Note that SparseTensor/RaggedTensor are only supported for 100 | TPUEmbedding and are not supported for Keras embeddings. 101 | 102 | Args: 103 | inputs: A string-to-tensor (or SparseTensor/RaggedTensor) dictionary. 104 | 105 | Returns: 106 | output: A dictionary of string-to-tensor of feature_name, embedded_value 107 | pairs. 108 | 109 | Raises: 110 | ValueError if no tf.Tensor is passed to a Keras embedding layer. 111 | """ 112 | keras_emb_inputs = { 113 | key: val for key, val in inputs.items() 114 | if key in self._keras_embedding_layers 115 | } 116 | tpu_emb_inputs = { 117 | key: val for key, val in inputs.items() 118 | if key not in self._keras_embedding_layers 119 | } 120 | 121 | output = {} 122 | for key, val in keras_emb_inputs.items(): 123 | if not isinstance(val, tf.Tensor): 124 | raise ValueError("Only tf.Tensor input is supported for Keras embedding" 125 | f" layers, but got: {type(val)}") 126 | 127 | output[key] = self._keras_embedding_layers[key](val) 128 | 129 | if self._tpu_embedding: 130 | tpu_emb_output_dict = self._tpu_embedding(tpu_emb_inputs) # pylint: disable=[not-callable] 131 | output.update(tpu_emb_output_dict) 132 | return output 133 | 134 | @property 135 | def tpu_embedding(self) -> Optional[TPUEmbedding]: 136 | """Returns TPUEmbedding or `None` if only Keras embeddings are used.""" 137 | return self._tpu_embedding 138 | 139 | @property 140 | def keras_embedding_layers(self) -> Dict[str, tf.keras.layers.Embedding]: 141 | """Returns a dictionary mapping feature names to Keras embedding layers.""" 142 | return self._keras_embedding_layers 143 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/layers/embedding/partial_tpu_embedding_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for PartialTPUEmbedding.""" 16 | 17 | import math 18 | from typing import Dict, List 19 | 20 | import tensorflow as tf 21 | 22 | import tensorflow_recommenders as tfrs 23 | 24 | 25 | def _get_tpu_embedding_feature_config( 26 | vocab_sizes: List[int], 27 | embedding_dims: List[int] 28 | ) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]: 29 | """Returns TPU embedding feature config. 30 | 31 | Args: 32 | vocab_sizes: List of sizes of categories/id's in the table. 33 | embedding_dims: Embedding dimensions. 34 | Returns: 35 | A dictionary of feature_name, FeatureConfig pairs. 36 | """ 37 | assert len(vocab_sizes) == len(embedding_dims) 38 | feature_config = {} 39 | 40 | for i, vocab_size in enumerate(vocab_sizes): 41 | table_config = tf.tpu.experimental.embedding.TableConfig( 42 | vocabulary_size=vocab_size, 43 | dim=embedding_dims[i], 44 | combiner="mean", 45 | initializer=tf.initializers.TruncatedNormal( 46 | mean=0.0, stddev=1 / math.sqrt(embedding_dims[i]) 47 | ), 48 | name=f"table_{i}" 49 | ) 50 | feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig( 51 | table=table_config) 52 | 53 | return feature_config 54 | 55 | 56 | class PartialTPUEmbeddingTest(tf.test.TestCase): 57 | 58 | def test_embedding_layer(self): 59 | feature_config = _get_tpu_embedding_feature_config( 60 | vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10]) 61 | 62 | embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding( 63 | feature_config=feature_config, 64 | optimizer=tf.keras.optimizers.legacy.Adam(), 65 | size_threshold=10) 66 | 67 | tpu_embedding_tables = embedding_layer.tpu_embedding.embedding_tables 68 | keras_embedding_layers = embedding_layer.keras_embedding_layers 69 | 70 | self.assertLen(tpu_embedding_tables, 2) 71 | self.assertLen(keras_embedding_layers, 3) 72 | 73 | for tbl_config, weight in tpu_embedding_tables.items(): 74 | print(tbl_config, weight) 75 | if "1" in tbl_config.name: 76 | self.assertEqual(tbl_config.vocabulary_size, 20) 77 | self.assertEqual(tbl_config.dim, 4) 78 | else: 79 | self.assertEqual(tbl_config.vocabulary_size, 15) 80 | self.assertEqual(tbl_config.dim, 10) 81 | 82 | self.assertEqual(keras_embedding_layers["0"].input_dim, 5) 83 | self.assertEqual(keras_embedding_layers["0"].output_dim, 2) 84 | self.assertEqual(keras_embedding_layers["2"].input_dim, 8) 85 | self.assertEqual(keras_embedding_layers["2"].output_dim, 6) 86 | self.assertEqual(keras_embedding_layers["3"].input_dim, 9) 87 | self.assertEqual(keras_embedding_layers["3"].output_dim, 8) 88 | 89 | output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0}) 90 | for key, val in output.items(): 91 | self.assertEqual(val.shape, feature_config[key].table.dim) 92 | 93 | def test_all_keras_embedding(self): 94 | feature_config = _get_tpu_embedding_feature_config( 95 | vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10]) 96 | 97 | embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding( 98 | feature_config=feature_config, 99 | optimizer=tf.keras.optimizers.legacy.Adam(), 100 | size_threshold=None) 101 | 102 | self.assertIsNone(embedding_layer.tpu_embedding) 103 | keras_embedding_layers = embedding_layer.keras_embedding_layers 104 | 105 | self.assertLen(keras_embedding_layers, 5) 106 | 107 | output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0}) 108 | for key, val in output.items(): 109 | self.assertEqual(val.shape, feature_config[key].table.dim) 110 | 111 | def test_all_tpu_embedding(self): 112 | feature_config = _get_tpu_embedding_feature_config( 113 | vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10]) 114 | embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding( 115 | feature_config=feature_config, 116 | optimizer=tf.keras.optimizers.legacy.Adam(), 117 | size_threshold=0) 118 | 119 | self.assertLen(embedding_layer.tpu_embedding.embedding_tables, 5) 120 | 121 | output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0}) 122 | for key, val in output.items(): 123 | self.assertEqual(val.shape, feature_config[key].table.dim) 124 | 125 | def test_all_tpu_embedding_with_pipelining(self): 126 | feature_config = _get_tpu_embedding_feature_config( 127 | vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10]) 128 | embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding( 129 | feature_config=feature_config, 130 | optimizer=tf.keras.optimizers.legacy.Adam(), 131 | pipeline_execution_with_tensor_core=True, 132 | size_threshold=0) 133 | 134 | self.assertLen(embedding_layer.tpu_embedding.embedding_tables, 5) 135 | 136 | output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0}) 137 | for key, val in output.items(): 138 | self.assertEqual(val.shape, feature_config[key].table.dim) 139 | 140 | if __name__ == "__main__": 141 | tf.test.main() 142 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Experimental Models.""" 16 | 17 | from tensorflow_recommenders.experimental.models.ranking import Ranking 18 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/models/ranking.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """A pre-built ranking model.""" 16 | 17 | from typing import cast, Dict, Optional, Sequence, Tuple, Union 18 | 19 | import tensorflow as tf 20 | 21 | from tensorflow_recommenders import layers 22 | from tensorflow_recommenders import models 23 | from tensorflow_recommenders import tasks 24 | from tensorflow_recommenders.layers import feature_interaction as feature_interaction_lib 25 | 26 | 27 | class Ranking(models.Model): 28 | """A configurable ranking model. 29 | 30 | This class represents a sensible and reasonably flexible configuration for a 31 | ranking model that can be used for tasks such as CTR prediction. 32 | 33 | It can be customized as needed, and its constituent blocks can be changed by 34 | passing user-defined alternatives. 35 | 36 | For example: 37 | - Pass 38 | `feature_interaction = tfrs.layers.feature_interaction.DotInteraction()` 39 | to train a DLRM model, or pass 40 | ``` 41 | feature_interaction = tf.keras.Sequential([ 42 | tf.keras.layers.Concatenate(), 43 | tfrs.layers.feature_interaction.Cross() 44 | ]) 45 | ``` 46 | to train a DCN model. 47 | - Pass `task = tfrs.tasks.Ranking(loss=tf.keras.losses.BinaryCrossentropy())` 48 | to train a CTR prediction model, and 49 | `tfrs.tasks.Ranking(loss=tf.keras.losses.MeanSquaredError())` to train 50 | a rating prediction model. 51 | 52 | Changing these should cover a broad range of models, but this class is not 53 | intended to cover all possible use cases. For full flexibility, inherit 54 | from `tfrs.models.Model` and provide your own implementations of 55 | the `compute_loss` and `call` methods. 56 | """ 57 | 58 | def __init__( 59 | self, 60 | embedding_layer: tf.keras.layers.Layer, 61 | bottom_stack: Optional[tf.keras.layers.Layer] = None, 62 | feature_interaction: Optional[tf.keras.layers.Layer] = None, 63 | top_stack: Optional[tf.keras.layers.Layer] = None, 64 | concat_dense: bool = True, 65 | task: Optional[tasks.Task] = None) -> None: 66 | """Initializes the model. 67 | 68 | Args: 69 | embedding_layer: The embedding layer is applied to categorical features. 70 | It expects a string-to-tensor (or SparseTensor/RaggedTensor) dict as 71 | an input, and outputs a dictionary of string-to-tensor of feature_name, 72 | embedded_value pairs. 73 | {feature_name_i: tensor_i} -> {feature_name_i: emb(tensor_i)}. 74 | bottom_stack: The `bottom_stack` layer is applied to dense features before 75 | feature interaction. If None, an MLP with layer sizes [256, 64, 16] is 76 | used. For DLRM model, the output of bottom_stack should be of shape 77 | (batch_size, embedding dimension). 78 | feature_interaction: Feature interaction layer is applied to the 79 | `bottom_stack` output and sparse feature embeddings. If it is None, 80 | DotInteraction layer is used. 81 | top_stack: The `top_stack` layer is applied to the `feature_interaction` 82 | output. The output of top_stack should be in the range [0, 1]. If it is 83 | None, MLP with layer sizes [512, 256, 1] is used. 84 | concat_dense: Weather to concatenate the interaction output with dense 85 | embedding vector again before feeding into the top stack 86 | task: The task which the model should optimize for. Defaults to a 87 | `tfrs.tasks.Ranking` task with a binary cross-entropy loss, suitable for 88 | tasks like click prediction. 89 | """ 90 | 91 | super().__init__() 92 | 93 | self._embedding_layer = embedding_layer 94 | self._concat_dense = concat_dense 95 | self._bottom_stack = ( 96 | bottom_stack 97 | if bottom_stack 98 | else layers.blocks.MLP(units=[256, 64, 16], final_activation="relu") 99 | ) 100 | self._top_stack = ( 101 | top_stack 102 | if top_stack 103 | else layers.blocks.MLP(units=[512, 256, 1], final_activation="sigmoid") 104 | ) 105 | self._feature_interaction = ( 106 | feature_interaction 107 | if feature_interaction 108 | else feature_interaction_lib.DotInteraction() 109 | ) 110 | 111 | if task is not None: 112 | self._task = task 113 | else: 114 | self._task = tasks.Ranking( 115 | loss=tf.keras.losses.BinaryCrossentropy( 116 | reduction=tf.keras.losses.Reduction.NONE 117 | ), 118 | metrics=[ 119 | tf.keras.metrics.AUC(name="auc"), 120 | tf.keras.metrics.BinaryAccuracy(name="accuracy"), 121 | ], 122 | prediction_metrics=[ 123 | tf.keras.metrics.Mean("prediction_mean"), 124 | ], 125 | label_metrics=[ 126 | tf.keras.metrics.Mean("label_mean") 127 | ] 128 | ) 129 | 130 | def compute_loss(self, 131 | inputs: Union[ 132 | # Tuple of (features, labels). 133 | Tuple[ 134 | Dict[str, tf.Tensor], 135 | tf.Tensor 136 | ], 137 | # Tuple of (features, labels, sample weights). 138 | Tuple[ 139 | Dict[str, tf.Tensor], 140 | tf.Tensor, 141 | Optional[tf.Tensor] 142 | ] 143 | ], 144 | training: bool = False) -> tf.Tensor: 145 | """Computes the loss and metrics of the model. 146 | 147 | Args: 148 | inputs: A data structure of tensors of the following format: 149 | ({"dense_features": dense_tensor, 150 | "sparse_features": sparse_tensors}, 151 | label_tensor), or 152 | ({"dense_features": dense_tensor, 153 | "sparse_features": sparse_tensors}, 154 | label_tensor, 155 | sample_weight tensor). 156 | training: Whether the model is in training mode. 157 | 158 | Returns: 159 | Loss tensor. 160 | 161 | Raises: 162 | ValueError if the the shape of the inputs is invalid. 163 | """ 164 | 165 | # We need to work around a bug in mypy - tuple narrowing 166 | # based on length checks doesn't work. 167 | # See https://github.com/python/mypy/issues/1178 for details. 168 | if len(inputs) == 2: 169 | inputs = cast( 170 | Tuple[ 171 | Dict[str, tf.Tensor], 172 | tf.Tensor 173 | ], 174 | inputs 175 | ) 176 | features, labels = inputs 177 | sample_weight = None 178 | elif len(inputs) == 3: 179 | inputs = cast( 180 | Tuple[ 181 | Dict[str, tf.Tensor], 182 | tf.Tensor, 183 | Optional[tf.Tensor], 184 | ], 185 | inputs 186 | ) 187 | features, labels, sample_weight = inputs 188 | else: 189 | raise ValueError( 190 | "Inputs should be either a tuple of (features, labels), " 191 | "or a tuple of (features, labels, sample weights). " 192 | "Got a length {len(inputs)} tuple instead: {inputs}." 193 | ) 194 | 195 | outputs = self(features, training=training) 196 | 197 | loss = self._task(labels, outputs, sample_weight=sample_weight) 198 | loss = tf.reduce_mean(loss) 199 | # Scales loss as the default gradients allreduce performs sum inside the 200 | # optimizer. 201 | return loss / tf.distribute.get_strategy().num_replicas_in_sync 202 | 203 | def call(self, inputs: Dict[str, tf.Tensor]) -> tf.Tensor: 204 | """Executes forward and backward pass, returns loss. 205 | 206 | Args: 207 | inputs: Model function inputs (features and labels). 208 | 209 | Returns: 210 | loss: Scalar tensor. 211 | """ 212 | dense_features = inputs["dense_features"] 213 | sparse_features = inputs["sparse_features"] 214 | 215 | sparse_embeddings = self._embedding_layer(sparse_features) 216 | # Combine a dictionary into a vector and squeeze dimension from 217 | # (batch_size, 1, emb) to (batch_size, emb). 218 | sparse_embeddings = tf.nest.flatten(sparse_embeddings) 219 | 220 | sparse_embedding_vecs = [ 221 | tf.squeeze(sparse_embedding) for sparse_embedding in sparse_embeddings 222 | ] 223 | dense_embedding_vec = self._bottom_stack(dense_features) 224 | 225 | interaction_args = sparse_embedding_vecs + [dense_embedding_vec] 226 | interaction_output = self._feature_interaction(interaction_args) 227 | if self._concat_dense: 228 | feature_interaction_output = tf.concat( 229 | [dense_embedding_vec, interaction_output], axis=1 230 | ) 231 | else: 232 | feature_interaction_output = interaction_output 233 | 234 | prediction = self._top_stack(feature_interaction_output) 235 | 236 | return tf.reshape(prediction, [-1]) 237 | 238 | @property 239 | def embedding_trainable_variables(self) -> Sequence[tf.Variable]: 240 | """Returns trainable variables from embedding tables. 241 | 242 | When training a recommendation model with embedding tables, sometimes it's 243 | preferable to use separate optimizers/learning rates for embedding 244 | variables and dense variables. 245 | `tfrs.experimental.optimizers.CompositeOptimizer` can be used to apply 246 | different optimizers to embedding variables and the remaining variables. 247 | """ 248 | return self._embedding_layer.trainable_variables 249 | 250 | @property 251 | def dense_trainable_variables(self) -> Sequence[tf.Variable]: 252 | """Returns all trainable variables that are not embeddings.""" 253 | dense_vars = [] 254 | for layer in self.layers: 255 | if layer != self._embedding_layer: 256 | dense_vars.extend(layer.trainable_variables) 257 | return dense_vars 258 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/models/ranking_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # pylint: disable=g-long-lambda 16 | """Tests for Ranking.""" 17 | 18 | import itertools 19 | import math 20 | 21 | from typing import List, Dict 22 | 23 | from absl.testing import parameterized 24 | 25 | import tensorflow as tf 26 | 27 | import tensorflow_recommenders as tfrs 28 | 29 | 30 | def _get_tpu_embedding_feature_config( 31 | vocab_sizes: List[int], 32 | embedding_dim: int, 33 | table_name_prefix: str = "embedding_table" 34 | ) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]: 35 | """Returns TPU embedding feature config. 36 | 37 | Args: 38 | vocab_sizes: List of sizes of categories/id's in the table. 39 | embedding_dim: Embedding dimension. 40 | table_name_prefix: A prefix for embedding tables. 41 | Returns: 42 | A dictionary of feature_name, FeatureConfig pairs. 43 | """ 44 | feature_config = {} 45 | 46 | for i, vocab_size in enumerate(vocab_sizes): 47 | table_config = tf.tpu.experimental.embedding.TableConfig( 48 | vocabulary_size=vocab_size, 49 | dim=embedding_dim, 50 | combiner="mean", 51 | initializer=tf.initializers.TruncatedNormal( 52 | mean=0.0, stddev=1 / math.sqrt(embedding_dim) 53 | ), 54 | name=f"{table_name_prefix}_{i}" 55 | ) 56 | feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig( 57 | table=table_config) 58 | 59 | return feature_config 60 | 61 | 62 | def _generate_synthetic_data(num_dense: int, 63 | vocab_sizes: List[int], 64 | dataset_size: int, 65 | batch_size: int, 66 | generate_weights: bool = False) -> tf.data.Dataset: 67 | dense_tensor = tf.random.uniform( 68 | shape=(dataset_size, num_dense), maxval=1.0, dtype=tf.float32) 69 | # The mean is in [0, 1] interval. 70 | dense_tensor_mean = tf.math.reduce_mean(dense_tensor, axis=1) 71 | 72 | sparse_tensors = [] 73 | for size in vocab_sizes: 74 | sparse_tensors.append( 75 | tf.random.uniform( 76 | shape=(dataset_size,), maxval=int(size), dtype=tf.int32)) 77 | 78 | sparse_tensor_elements = { 79 | str(i): sparse_tensors[i] for i in range(len(sparse_tensors)) 80 | } 81 | 82 | sparse_tensors = tf.stack(sparse_tensors, axis=-1) 83 | sparse_tensors_mean = tf.math.reduce_sum(sparse_tensors, axis=1) 84 | # The mean is in [0, 1] interval. 85 | sparse_tensors_mean = tf.cast(sparse_tensors_mean, dtype=tf.float32) 86 | sparse_tensors_mean /= sum(vocab_sizes) 87 | # The label is in [0, 1] interval. 88 | label_tensor = (dense_tensor_mean + sparse_tensors_mean) / 2.0 89 | # Use the threshold 0.5 to convert to 0/1 labels. 90 | label_tensor = tf.cast(label_tensor + 0.5, tf.int32) 91 | 92 | if generate_weights: 93 | weights = tf.random.uniform(shape=(dataset_size, 1)) 94 | 95 | input_elem = ( 96 | {"dense_features": dense_tensor, 97 | "sparse_features": sparse_tensor_elements}, 98 | label_tensor, 99 | weights 100 | ) 101 | else: 102 | input_elem = ( 103 | {"dense_features": dense_tensor, 104 | "sparse_features": sparse_tensor_elements}, 105 | label_tensor, 106 | ) 107 | 108 | dataset = tf.data.Dataset.from_tensor_slices(input_elem) 109 | 110 | return dataset.batch(batch_size, drop_remainder=True) 111 | 112 | 113 | class RankingTest(tf.test.TestCase, parameterized.TestCase): 114 | 115 | @parameterized.parameters( 116 | itertools.product( 117 | # Feature interaction layers. 118 | ( 119 | tfrs.layers.feature_interaction.DotInteraction, 120 | lambda: tf.keras.Sequential([ 121 | tf.keras.layers.Concatenate(), 122 | tfrs.layers.feature_interaction.Cross() 123 | ]), 124 | ), 125 | # Bottom stack. 126 | (lambda: None, lambda: tfrs.layers.blocks.MLP(units=[40, 16])), 127 | # Top stack. 128 | (lambda: None, lambda: tfrs.layers.blocks.MLP( 129 | units=[40, 20, 1], final_activation="sigmoid")), 130 | # Concat Dense. 131 | (True, False), 132 | # Use weights. 133 | (True, False), 134 | # Size threshold. 135 | (None, -1, 20))) 136 | def test_ranking_model(self, 137 | feature_interaction_layer, 138 | bottom_stack, 139 | top_stack, 140 | concat_dense=True, 141 | use_weights=False, 142 | size_threshold=10): 143 | """Tests a ranking model.""" 144 | vocabulary_sizes = [30, 3, 26] 145 | 146 | embedding_feature_config = _get_tpu_embedding_feature_config( 147 | vocab_sizes=vocabulary_sizes, embedding_dim=16) 148 | optimizer = tf.keras.optimizers.legacy.Adam() 149 | 150 | model = tfrs.experimental.models.Ranking( 151 | embedding_layer=tfrs.experimental.layers.embedding.PartialTPUEmbedding( 152 | feature_config=embedding_feature_config, 153 | optimizer=optimizer, 154 | size_threshold=size_threshold), 155 | bottom_stack=bottom_stack(), 156 | feature_interaction=feature_interaction_layer(), 157 | top_stack=top_stack(), 158 | concat_dense=concat_dense) 159 | model.compile(optimizer=optimizer, steps_per_execution=5) 160 | 161 | dataset = _generate_synthetic_data( 162 | num_dense=8, 163 | vocab_sizes=vocabulary_sizes, 164 | dataset_size=64, 165 | batch_size=16, 166 | generate_weights=use_weights) 167 | 168 | model.fit( 169 | dataset.repeat(), validation_data=dataset, epochs=1, steps_per_epoch=5) 170 | 171 | metrics_ = model.evaluate(dataset, return_dict=True) 172 | 173 | self.assertIn("loss", metrics_) 174 | self.assertIn("accuracy", metrics_) 175 | 176 | 177 | if __name__ == "__main__": 178 | tf.test.main() 179 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Experimental Optimizers.""" 16 | 17 | from tensorflow_recommenders.experimental.optimizers.clippy_adagrad import ClippyAdagrad 18 | from tensorflow_recommenders.experimental.optimizers.composite_optimizer import CompositeOptimizer 19 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/optimizers/composite_optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Composite Optimizer.""" 16 | 17 | import collections 18 | from typing import Callable, List, Optional, Sequence, Tuple, Union 19 | 20 | import tensorflow as tf 21 | 22 | Tensor = Union[tf.Tensor, tf.SparseTensor, tf.RaggedTensor] 23 | 24 | 25 | class CompositeOptimizer(tf.keras.optimizers.legacy.Optimizer): 26 | """An optimizer that composes multiple individual optimizers. 27 | 28 | It allows different optimizers to be applied to different subsets of the 29 | model's variables. For example, it makes it possible to apply one 30 | optimizer to the model's embeddings (sparse variables) and another 31 | optimizer to the rest of its variables. 32 | 33 | To specify which optimizer should apply to each variable, pass a list of 34 | pairs of (optimizer instance, function returning a list of variables the 35 | optimizer should apply to). 36 | 37 | For example: 38 | ```python 39 | optimizer = CompositeOptimizer([ 40 | (tf.keras.optimizers.legacy.SGD(), 41 | lambda: model.sparse_trainable_variables), 42 | (tf.keras.optimizers.legacy.Adam(), 43 | lambda: model.dense_trainable_variables), 44 | ]) 45 | ``` 46 | """ 47 | 48 | def __init__(self, 49 | optimizers_and_vars: Sequence[ 50 | Tuple[tf.keras.optimizers.legacy.Optimizer, 51 | Callable[[], Sequence[tf.Variable]]]], 52 | name: str = "CompositeOptimizer") -> None: 53 | """Initializes an CompositeOptimizer instance. 54 | 55 | Args: 56 | optimizers_and_vars: List of tuples of (optimizer instance, function 57 | returning variables that the optimizer should apply to). 58 | name: The optimizer name. 59 | """ 60 | super().__init__(name=name) 61 | if not optimizers_and_vars: 62 | raise ValueError("`optimizers_and_vars` can't be empty") 63 | self._optimizers_and_vars = optimizers_and_vars 64 | for i, optimizer_and_var in enumerate(optimizers_and_vars): 65 | optimizer = optimizer_and_var[0] 66 | self._track_trackable(optimizer, name=f"Optimizer{i}") 67 | 68 | def apply_gradients(self, grads_and_vars: Sequence[Tuple[Tensor, Tensor]], 69 | name: Optional[str] = None, 70 | experimental_aggregate_gradients: bool = True) -> None: 71 | """See base class.""" 72 | var_optimizer_dict = {} 73 | 74 | for optimizer, var_callable in self._optimizers_and_vars: 75 | for v in var_callable(): 76 | if v.ref() in var_optimizer_dict: 77 | raise ValueError( 78 | f"The set of variables handled by each optimizer should be " 79 | f"disjoint, but variable {v} is handled both " 80 | f"by {var_optimizer_dict[v.ref()]} and {optimizer}.") 81 | var_optimizer_dict[v.ref()] = optimizer 82 | 83 | optimizer_grads_and_vars = collections.defaultdict(list) 84 | for g, v in grads_and_vars: 85 | if v.ref() in var_optimizer_dict: 86 | optimizer = var_optimizer_dict[v.ref()] 87 | optimizer_grads_and_vars[optimizer].append((g, v)) 88 | else: 89 | raise ValueError(f"Variable {v} is not handled by any optimizer. " 90 | f"This would cause it to be not trained.") 91 | 92 | for optimizer, opt_grads_and_vars in optimizer_grads_and_vars.items(): 93 | optimizer.apply_gradients( 94 | opt_grads_and_vars, 95 | name=name, 96 | experimental_aggregate_gradients=experimental_aggregate_gradients) 97 | 98 | def get_config(self): 99 | raise NotImplementedError("CompositeOptimizer cannot be serialized because" 100 | " it uses callable to get variables.") 101 | 102 | @property 103 | def iterations(self): 104 | """See base class.""" 105 | # Returning iterations from the first optimizer. 106 | return self._optimizers_and_vars[0][0].iterations 107 | 108 | @iterations.setter 109 | def iterations(self, variable): 110 | """See base class.""" 111 | for optimizer, _ in self._optimizers_and_vars: 112 | optimizer.iterations = variable 113 | 114 | def variables(self): 115 | """Returns the optimizer's variables.""" 116 | # OptimizerV2.variables() returns self._weights, so override that method. 117 | return self.weights 118 | 119 | @property 120 | def weights(self) -> List[tf.Variable]: 121 | """Returns the optimizer's variables.""" 122 | weights = [] 123 | for optimizer, _ in self._optimizers_and_vars: 124 | weights += optimizer.weights 125 | return weights 126 | 127 | @property 128 | def optimizers(self) -> List[tf.keras.optimizers.legacy.Optimizer]: 129 | """Returns the optimizers in composite optimizer (in the original order).""" 130 | return [optimizer for optimizer, _ in self._optimizers_and_vars] 131 | 132 | -------------------------------------------------------------------------------- /tensorflow_recommenders/experimental/optimizers/composite_optimizer_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for CompositeOptimizer.""" 16 | import os.path 17 | import tempfile 18 | 19 | from absl.testing import parameterized 20 | 21 | import numpy as np 22 | import tensorflow as tf 23 | 24 | from tensorflow_recommenders.experimental.optimizers.composite_optimizer import CompositeOptimizer 25 | 26 | 27 | class CompositeOptimizerTest(tf.test.TestCase, parameterized.TestCase): 28 | 29 | @parameterized.parameters( 30 | ("sgd", "adam"), 31 | ("rmsprop", "sgd"), 32 | ("adam", "adagrad"), 33 | ("adagrad", "rmsprop")) 34 | def test_composite_optimizer(self, optimizer1_type, optimizer2_type): 35 | values1 = [1.0, 2.0, 3.0] 36 | values2 = [0.5, 0.0, -2.0] 37 | values3 = [0.1, 0.0, -1.0] 38 | 39 | grad1_values = [0.1, 0.2, 1.0] 40 | grad2_values = [-0.1, 0.05, 2.0] 41 | grad3_values = [2.1, 0.0, 0.3] 42 | 43 | var1 = tf.Variable(values1) 44 | var2 = tf.Variable(values2) 45 | var3 = tf.Variable(values3) 46 | 47 | grads1 = tf.constant(grad1_values) 48 | grads2 = tf.constant(grad2_values) 49 | grads3 = tf.constant(grad3_values) 50 | 51 | optimizer_dict = { 52 | "sgd": tf.keras.optimizers.legacy.SGD, 53 | "adam": tf.keras.optimizers.legacy.Adam, 54 | "rmsprop": tf.keras.optimizers.legacy.RMSprop, 55 | "adagrad": tf.keras.optimizers.legacy.Adagrad, 56 | } 57 | 58 | comp_optimizer1 = optimizer_dict[optimizer1_type]() 59 | comp_optimizer2 = optimizer_dict[optimizer2_type]() 60 | 61 | composite_optimizer = CompositeOptimizer([ 62 | (comp_optimizer1, lambda: [var1]), 63 | (comp_optimizer2, lambda: [var2, var3]), 64 | ]) 65 | 66 | self.assertSequenceEqual(composite_optimizer.optimizers, 67 | [comp_optimizer1, comp_optimizer2]) 68 | 69 | optimizer1 = optimizer_dict[optimizer1_type]() 70 | optimizer2 = optimizer_dict[optimizer2_type]() 71 | 72 | grads_and_vars_1 = [(tf.constant(grad1_values), tf.Variable(values1))] 73 | grads_and_vars_2 = [(tf.constant(grad2_values), tf.Variable(values2)), 74 | (tf.constant(grad3_values), tf.Variable(values3))] 75 | grads_and_vars = list(zip([grads1, grads2, grads3], [var1, var2, var3])) 76 | 77 | for _ in range(10): 78 | # Test that applying a composite optimizer has the same effect as 79 | # applying optimizer1 and optimizer2 separately on subset of gradients/ 80 | # variables. 81 | composite_optimizer.apply_gradients(grads_and_vars) 82 | optimizer1.apply_gradients(grads_and_vars_1) 83 | optimizer2.apply_gradients(grads_and_vars_2) 84 | 85 | self.assertAllClose(grads_and_vars[:1], grads_and_vars_1) 86 | self.assertAllClose(grads_and_vars[1:], grads_and_vars_2) 87 | 88 | def test_incorrect_inputs(self): 89 | var1 = tf.Variable([0.1, 0.2, 1.0]) 90 | var2 = tf.Variable([-5.1, 0.1, 0]) 91 | var3 = tf.Variable([-2.1, 1.3, 0/3]) 92 | 93 | grads1 = tf.constant([0.1, 0.2, 1.0]) 94 | grads2 = tf.constant([0.5, 0.0, -2.0]) 95 | grads3 = tf.constant([-0.2, 0.0, -1.0]) 96 | 97 | # Test same variable in two optimizers. 98 | composite_optimizer = CompositeOptimizer([ 99 | (tf.keras.optimizers.legacy.Adam(), lambda: [var1]), 100 | (tf.keras.optimizers.legacy.Adagrad(), lambda: [var1, var2]), 101 | ]) 102 | 103 | grads_and_vars = list(zip([grads1, grads2], [var1, var2])) 104 | 105 | with self.assertRaises(ValueError): 106 | composite_optimizer.apply_gradients(grads_and_vars) 107 | 108 | # Test missing variable (var3) in optimizers. 109 | composite_optimizer = CompositeOptimizer([ 110 | (tf.keras.optimizers.legacy.Adam(), lambda: [var1]), 111 | (tf.keras.optimizers.legacy.Adagrad(), lambda: [var2]), 112 | ]) 113 | 114 | grads_and_vars = list(zip([grads1, grads2, grads3], [var1, var2, var3])) 115 | 116 | with self.assertRaises(ValueError): 117 | composite_optimizer.apply_gradients(grads_and_vars) 118 | 119 | def test_checkpoint_save_restore_export(self): 120 | # Use a simple LinearModel to test checkpoint save/restore/export. 121 | def get_model() -> tf.keras.Model: 122 | model = tf.keras.experimental.LinearModel(units=10) 123 | 124 | composite_optimizer = CompositeOptimizer([ 125 | (tf.keras.optimizers.legacy.Adam(), 126 | lambda: model.trainable_variables[:1]), 127 | (tf.keras.optimizers.legacy.Adagrad(), 128 | lambda: model.trainable_variables[1:]), 129 | ]) 130 | model.compile(optimizer=composite_optimizer, 131 | loss=tf.keras.losses.MSE) 132 | return model 133 | 134 | batch_size = 16 135 | num_of_batches = 8 136 | rng = np.random.RandomState(42) 137 | 138 | x = rng.normal(size=(num_of_batches * batch_size, 5)) 139 | y = rng.normal(size=(num_of_batches * batch_size, 1)) 140 | training_dataset = tf.data.Dataset.from_tensor_slices((x, y)) 141 | training_dataset = training_dataset.batch(batch_size) 142 | 143 | model = get_model() 144 | model.fit(training_dataset, epochs=1) 145 | 146 | # Check that optimizer iterations match dataset size. 147 | self.assertEqual(model.optimizer.iterations.numpy(), num_of_batches) 148 | # Check that it has state for all the model's variables 149 | self.assertLen(model.optimizer.variables(), 5) 150 | 151 | # Save checkpoint. 152 | checkpoint = tf.train.Checkpoint(model=model) 153 | checkpoint_path = self.get_temp_dir() 154 | checkpoint.write(checkpoint_path) 155 | 156 | # Restore to a fresh instance and check. 157 | new_model = get_model() 158 | # Run only one epoch: if the restore fails, we can tell 159 | # by the number of iterations being 1 rather than `num_batches`. 160 | new_model.fit(training_dataset.take(1)) 161 | 162 | checkpoint = tf.train.Checkpoint(model=new_model) 163 | checkpoint.read(checkpoint_path).assert_consumed() 164 | 165 | # After restoring the checkpoint, optimizer iterations should also be 166 | # restored to its original value. 167 | self.assertEqual(new_model.optimizer.iterations.numpy(), num_of_batches) 168 | # Same for the rest of its variables. 169 | self.assertAllClose( 170 | new_model.optimizer.variables(), 171 | model.optimizer.variables() 172 | ) 173 | 174 | model_pred = new_model.predict(training_dataset) 175 | 176 | with tempfile.TemporaryDirectory() as tmp: 177 | path = os.path.join(tmp, "model_with_composite_optimizer") 178 | new_model.save( 179 | path, 180 | include_optimizer=False, 181 | options=tf.saved_model.SaveOptions(namespace_whitelist=["Addons"])) 182 | loaded_model = tf.keras.models.load_model(path) 183 | loaded_pred = loaded_model.predict(training_dataset) 184 | 185 | self.assertEqual( 186 | model.layers[0].get_config(), loaded_model.layers[0].get_config()) 187 | self.assertAllEqual(model_pred, loaded_pred) 188 | 189 | 190 | if __name__ == "__main__": 191 | tf.test.main() 192 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Layers.""" 17 | 18 | from tensorflow_recommenders.layers import blocks 19 | from tensorflow_recommenders.layers import embedding 20 | from tensorflow_recommenders.layers import factorized_top_k 21 | from tensorflow_recommenders.layers import feature_interaction 22 | from tensorflow_recommenders.layers import loss 23 | from tensorflow_recommenders.layers.feature_interaction import dcn 24 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/blocks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Convenience blocks for building models.""" 16 | 17 | from typing import List, Optional 18 | 19 | import tensorflow as tf 20 | 21 | from tensorflow_recommenders import types 22 | 23 | 24 | class MLP(tf.keras.layers.Layer): 25 | """Sequential multi-layer perceptron (MLP) block.""" 26 | 27 | def __init__( 28 | self, 29 | units: List[int], 30 | use_bias: bool = True, 31 | activation: Optional[types.Activation] = "relu", 32 | final_activation: Optional[types.Activation] = None, 33 | **kwargs) -> None: 34 | """Initializes the MLP layer. 35 | 36 | Args: 37 | units: Sequential list of layer sizes. 38 | use_bias: Whether to include a bias term. 39 | activation: Type of activation to use on all except the last layer. 40 | final_activation: Type of activation to use on last layer. 41 | **kwargs: Extra args passed to the Keras Layer base class. 42 | """ 43 | 44 | super().__init__(**kwargs) 45 | 46 | self._sublayers = [] 47 | 48 | for num_units in units[:-1]: 49 | self._sublayers.append( 50 | tf.keras.layers.Dense( 51 | num_units, activation=activation, use_bias=use_bias)) 52 | self._sublayers.append( 53 | tf.keras.layers.Dense( 54 | units[-1], activation=final_activation, use_bias=use_bias)) 55 | 56 | def call(self, x: tf.Tensor) -> tf.Tensor: 57 | """Performs the forward computation of the block.""" 58 | for layer in self._sublayers: 59 | x = layer(x) 60 | 61 | return x 62 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Embedding layers.""" 16 | 17 | from tensorflow_recommenders.layers.embedding.tpu_embedding_layer import TPUEmbedding 18 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/factorized_top_k_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tests for factorized top K layers.""" 17 | 18 | import itertools 19 | import os 20 | 21 | from typing import Any, Dict, Iterator 22 | 23 | from absl.testing import parameterized 24 | 25 | import numpy as np 26 | import tensorflow as tf 27 | 28 | from tensorflow_recommenders.layers import factorized_top_k 29 | 30 | 31 | def test_cases( 32 | k=(5, 10), 33 | batch_size=(3, 16), 34 | num_queries=(3, 15, 16), 35 | num_candidates=(1024, 128), 36 | indices_dtype=(str, None), 37 | use_exclusions=(True, False)) -> Iterator[Dict[str, Any]]: 38 | """Generates test cases. 39 | 40 | Generates all possible combinations of input arguments as test cases. 41 | 42 | Args: 43 | k: The number of candidates to retrieve. 44 | batch_size: The query batch size. 45 | num_queries: Number of queries. 46 | num_candidates: Number of candidates. 47 | indices_dtype: The type of indices. 48 | use_exclusions: Whether to test exclusions. 49 | 50 | Yields: 51 | Keyword argument dicts. 52 | """ 53 | 54 | keys = ("k", "batch_size", "num_queries", "num_candidates", "indices_dtype", 55 | "use_exclusions") 56 | 57 | for values in itertools.product(k, batch_size, num_queries, num_candidates, 58 | indices_dtype, use_exclusions): 59 | yield dict(zip(keys, values)) 60 | 61 | 62 | class FactorizedTopKTestBase(tf.test.TestCase, parameterized.TestCase): 63 | 64 | def run_save_and_restore_test(self, layer, query, num): 65 | for _ in range(num): 66 | pre_serialization_results = layer(query) 67 | 68 | path = os.path.join(self.get_temp_dir(), "layer") 69 | layer.save( 70 | path, options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])) 71 | restored = tf.keras.models.load_model(path) 72 | 73 | for _ in range(num): 74 | post_serialization_results = restored(tf.constant(query)) 75 | 76 | self.assertAllEqual(post_serialization_results, pre_serialization_results) 77 | 78 | def run_top_k_test(self, 79 | layer_class, 80 | k, 81 | batch_size, 82 | num_queries, 83 | num_candidates, 84 | indices_dtype, 85 | use_exclusions, 86 | random_seed=42, 87 | check_export=True): 88 | 89 | layer = layer_class(k=k) 90 | 91 | rng = np.random.RandomState(random_seed) 92 | candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32) 93 | query = rng.normal(size=(num_queries, 4)).astype(np.float32) 94 | 95 | candidate_indices = np.arange(num_candidates).astype( 96 | indices_dtype if indices_dtype is not None else np.int32) 97 | 98 | exclude = rng.randint(0, num_candidates, size=(num_queries, 5)) 99 | 100 | scores = np.dot(query, candidates.T) 101 | 102 | # Set scores of candidates chosen for exclusion to a low value. 103 | adjusted_scores = scores.copy() 104 | if use_exclusions: 105 | exclude_identifiers = candidate_indices[exclude] 106 | for row_idx, row in enumerate(exclude): 107 | for col_idx in set(row): 108 | adjusted_scores[row_idx, col_idx] -= 1000.0 109 | else: 110 | exclude_identifiers = None 111 | 112 | # Get indices based on adjusted scores, but retain actual scores. 113 | indices = np.argsort(-adjusted_scores, axis=1)[:, :k] 114 | expected_top_scores = np.take_along_axis(scores, indices, 1) 115 | expected_top_indices = candidate_indices[indices] 116 | 117 | candidates = tf.data.Dataset.from_tensor_slices(candidates).batch( 118 | batch_size) 119 | 120 | if indices_dtype is not None: 121 | identifiers = tf.data.Dataset.from_tensor_slices(candidate_indices).batch( 122 | batch_size) 123 | candidates = tf.data.Dataset.zip((identifiers, candidates)) 124 | 125 | # Call twice to ensure the results are repeatable. 126 | for _ in range(2): 127 | if use_exclusions: 128 | layer.index_from_dataset(candidates) 129 | top_scores, top_indices = layer.query_with_exclusions( 130 | query, exclude_identifiers) 131 | else: 132 | layer.index_from_dataset(candidates) 133 | top_scores, top_indices = layer(query) 134 | 135 | self.assertAllEqual(top_scores.shape, expected_top_scores.shape) 136 | self.assertAllEqual(top_indices.shape, expected_top_indices.shape) 137 | self.assertAllClose(top_scores, expected_top_scores, atol=1e-4) 138 | 139 | self.assertAllEqual(top_indices.numpy().astype(indices_dtype), 140 | expected_top_indices) 141 | 142 | if not check_export: 143 | return 144 | 145 | # Save and restore to check export. 146 | path = os.path.join(self.get_temp_dir(), "layer") 147 | layer.save( 148 | path, options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])) 149 | restored = tf.keras.models.load_model(path) 150 | 151 | if use_exclusions: 152 | _, restored_top_indices = restored.query_with_exclusions( 153 | query, exclude_identifiers) 154 | else: 155 | _, restored_top_indices = restored(query) 156 | 157 | self.assertAllEqual(restored_top_indices.numpy().astype(indices_dtype), 158 | expected_top_indices) 159 | 160 | 161 | class StreamingTest(FactorizedTopKTestBase): 162 | 163 | @parameterized.parameters(test_cases()) 164 | def test_streaming(self, *args, **kwargs): 165 | self.run_top_k_test( 166 | factorized_top_k.Streaming, *args, check_export=False, **kwargs) 167 | 168 | 169 | class BruteForceTest(FactorizedTopKTestBase): 170 | 171 | @parameterized.parameters(test_cases()) 172 | def test_brute_force(self, *args, **kwargs): 173 | self.run_top_k_test(factorized_top_k.BruteForce, *args, **kwargs) 174 | 175 | 176 | class ScannTest(FactorizedTopKTestBase): 177 | 178 | @parameterized.parameters(str, np.float32, np.float64, np.int32, np.int64) 179 | def test_scann(self, identifier_dtype): 180 | 181 | num_candidates, num_queries = (1000, 4) 182 | 183 | rng = np.random.RandomState(42) 184 | candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32) 185 | query = rng.normal(size=(num_queries, 4)).astype(np.float32) 186 | candidate_names = np.arange(num_candidates).astype(identifier_dtype) 187 | 188 | scann = factorized_top_k.ScaNN() 189 | scann.index(candidates, candidate_names) 190 | 191 | self.run_save_and_restore_test(scann, query, 100) 192 | 193 | def test_scann_dataset_arg_no_identifiers(self): 194 | 195 | num_candidates, num_queries = (100, 4) 196 | 197 | rng = np.random.RandomState(42) 198 | candidates = tf.data.Dataset.from_tensor_slices( 199 | rng.normal(size=(num_candidates, 4)).astype(np.float32)) 200 | query = rng.normal(size=(num_queries, 4)).astype(np.float32) 201 | 202 | scann = factorized_top_k.ScaNN() 203 | scann.index_from_dataset(candidates.batch(100)) 204 | 205 | self.run_save_and_restore_test(scann, query, 100) 206 | 207 | def test_scann_dataset_arg_with_identifiers(self): 208 | 209 | num_candidates, num_queries = (100, 4) 210 | 211 | rng = np.random.RandomState(42) 212 | candidates = tf.data.Dataset.from_tensor_slices( 213 | rng.normal(size=(num_candidates, 4)).astype(np.float32)) 214 | query = rng.normal(size=(num_queries, 4)).astype(np.float32) 215 | identifiers = tf.data.Dataset.from_tensor_slices(np.arange(num_candidates)) 216 | 217 | index = factorized_top_k.ScaNN() 218 | index.index_from_dataset(identifiers.zip(candidates).batch(100)) 219 | 220 | self.run_save_and_restore_test(index, query, 100) 221 | 222 | @parameterized.parameters(factorized_top_k.ScaNN, factorized_top_k.BruteForce) 223 | def test_raise_on_incorrect_input_shape( 224 | self, layer_class: factorized_top_k.TopK): 225 | 226 | num_candidates = 100 227 | candidates = tf.data.Dataset.from_tensor_slices( 228 | np.random.normal(size=(num_candidates, 4)).astype(np.float32)) 229 | identifiers = tf.data.Dataset.from_tensor_slices( 230 | np.arange(num_candidates - 1)) 231 | 232 | with self.assertRaises(ValueError): 233 | index = layer_class() 234 | index.index_from_dataset( 235 | tf.data.Dataset.zip((identifiers.batch(20), candidates.batch(100))) 236 | ) 237 | 238 | @parameterized.parameters(test_cases()) 239 | def test_scann_top_k(self, k, batch_size, num_queries, num_candidates, 240 | indices_dtype, use_exclusions): 241 | 242 | def scann(k): 243 | """Returns brute-force-like ScaNN for testing.""" 244 | return factorized_top_k.ScaNN( 245 | k=k, 246 | num_leaves=1, 247 | num_leaves_to_search=1, 248 | num_reordering_candidates=num_candidates) 249 | 250 | self.run_top_k_test(scann, k, batch_size, num_queries, num_candidates, 251 | indices_dtype, use_exclusions) 252 | 253 | 254 | if __name__ == "__main__": 255 | tf.test.main() 256 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Feature Interaction layers.""" 16 | 17 | from tensorflow_recommenders.layers.feature_interaction.dcn import Cross 18 | from tensorflow_recommenders.layers.feature_interaction.dot_interaction import DotInteraction 19 | from tensorflow_recommenders.layers.feature_interaction.multi_layer_dcn import MultiLayerDCN 20 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/dcn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implements `Cross` Layer, the cross layer in Deep & Cross Network (DCN).""" 16 | 17 | from typing import Union, Text, Optional 18 | 19 | import tensorflow as tf 20 | 21 | 22 | @tf.keras.utils.register_keras_serializable() 23 | class Cross(tf.keras.layers.Layer): 24 | """Cross Layer in Deep & Cross Network to learn explicit feature interactions. 25 | 26 | A layer that creates explicit and bounded-degree feature interactions 27 | efficiently. The `call` method accepts `inputs` as a tuple of size 2 28 | tensors. The first input `x0` is the base layer that contains the original 29 | features (usually the embedding layer); the second input `xi` is the output 30 | of the previous `Cross` layer in the stack, i.e., the i-th `Cross` 31 | layer. For the first `Cross` layer in the stack, x0 = xi. 32 | 33 | The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, 34 | where .* designates elementwise multiplication, W could be a full-rank 35 | matrix, or a low-rank matrix U*V to reduce the computational cost, and 36 | diag_scale increases the diagonal of W to improve training stability ( 37 | especially for the low-rank case). 38 | 39 | References: 40 | 1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) 41 | See Eq. (1) for full-rank and Eq. (2) for low-rank version. 42 | 2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) 43 | 44 | Example: 45 | 46 | ```python 47 | # after embedding layer in a functional model: 48 | input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64) 49 | x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6) 50 | x1 = Cross()(x0, x0) 51 | x2 = Cross()(x0, x1) 52 | logits = tf.keras.layers.Dense(units=10)(x2) 53 | model = tf.keras.Model(input, logits) 54 | ``` 55 | 56 | Args: 57 | projection_dim: project dimension to reduce the computational cost. 58 | Default is `None` such that a full (`input_dim` by `input_dim`) matrix 59 | W is used. If enabled, a low-rank matrix W = U*V will be used, where U 60 | is of size `input_dim` by `projection_dim` and V is of size 61 | `projection_dim` by `input_dim`. `projection_dim` need to be smaller 62 | than `input_dim`/2 to improve the model efficiency. In practice, we've 63 | observed that `projection_dim` = d/4 consistently preserved the 64 | accuracy of a full-rank version. 65 | diag_scale: a non-negative float used to increase the diagonal of the 66 | kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an 67 | identity matrix. 68 | use_bias: whether to add a bias term for this layer. If set to False, 69 | no bias term will be used. 70 | preactivation: Activation applied to output matrix of the layer, before 71 | multiplication with the input. Can be used to control the scale of the 72 | layer's outputs and improve stability. 73 | kernel_initializer: Initializer to use on the kernel matrix. 74 | bias_initializer: Initializer to use on the bias vector. 75 | kernel_regularizer: Regularizer to use on the kernel matrix. 76 | bias_regularizer: Regularizer to use on bias vector. 77 | 78 | Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs. 79 | Output shape: A single (batch_size, `input_dim`) dimensional output. 80 | """ 81 | 82 | def __init__( 83 | self, 84 | projection_dim: Optional[int] = None, 85 | diag_scale: Optional[float] = 0.0, 86 | use_bias: bool = True, 87 | preactivation: Optional[Union[str, tf.keras.layers.Activation]] = None, 88 | kernel_initializer: Union[ 89 | Text, tf.keras.initializers.Initializer] = "truncated_normal", 90 | bias_initializer: Union[Text, 91 | tf.keras.initializers.Initializer] = "zeros", 92 | kernel_regularizer: Union[Text, None, 93 | tf.keras.regularizers.Regularizer] = None, 94 | bias_regularizer: Union[Text, None, 95 | tf.keras.regularizers.Regularizer] = None, 96 | **kwargs): 97 | 98 | super(Cross, self).__init__(**kwargs) 99 | 100 | self._projection_dim = projection_dim 101 | self._diag_scale = diag_scale 102 | self._use_bias = use_bias 103 | self._preactivation = tf.keras.activations.get(preactivation) 104 | self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) 105 | self._bias_initializer = tf.keras.initializers.get(bias_initializer) 106 | self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) 107 | self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) 108 | self._input_dim = None 109 | 110 | self._supports_masking = True 111 | 112 | if self._diag_scale < 0: # pytype: disable=unsupported-operands 113 | raise ValueError( 114 | "`diag_scale` should be non-negative. Got `diag_scale` = {}".format( 115 | self._diag_scale)) 116 | 117 | def build(self, input_shape): 118 | last_dim = input_shape[-1] 119 | 120 | if self._projection_dim is None: 121 | self._dense = tf.keras.layers.Dense( 122 | last_dim, 123 | kernel_initializer=_clone_initializer(self._kernel_initializer), 124 | bias_initializer=self._bias_initializer, 125 | kernel_regularizer=self._kernel_regularizer, 126 | bias_regularizer=self._bias_regularizer, 127 | use_bias=self._use_bias, 128 | dtype=self.dtype, 129 | activation=self._preactivation, 130 | ) 131 | else: 132 | self._dense_u = tf.keras.layers.Dense( 133 | self._projection_dim, 134 | kernel_initializer=_clone_initializer(self._kernel_initializer), 135 | kernel_regularizer=self._kernel_regularizer, 136 | use_bias=False, 137 | dtype=self.dtype, 138 | ) 139 | self._dense_v = tf.keras.layers.Dense( 140 | last_dim, 141 | kernel_initializer=_clone_initializer(self._kernel_initializer), 142 | bias_initializer=self._bias_initializer, 143 | kernel_regularizer=self._kernel_regularizer, 144 | bias_regularizer=self._bias_regularizer, 145 | use_bias=self._use_bias, 146 | dtype=self.dtype, 147 | activation=self._preactivation, 148 | ) 149 | self.built = True 150 | 151 | def call(self, x0: tf.Tensor, x: Optional[tf.Tensor] = None) -> tf.Tensor: 152 | """Computes the feature cross. 153 | 154 | Args: 155 | x0: The input tensor 156 | x: Optional second input tensor. If provided, the layer will compute 157 | crosses between x0 and x; if not provided, the layer will compute 158 | crosses between x0 and itself. 159 | 160 | Returns: 161 | Tensor of crosses. 162 | """ 163 | 164 | if not self.built: 165 | self.build(x0.shape) 166 | 167 | if x is None: 168 | x = x0 169 | 170 | if x0.shape[-1] != x.shape[-1]: 171 | raise ValueError( 172 | "`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x " 173 | "dimension {}. This case is not supported yet.".format( 174 | x0.shape[-1], x.shape[-1])) 175 | 176 | if self._projection_dim is None: 177 | prod_output = self._dense(x) 178 | else: 179 | prod_output = self._dense_v(self._dense_u(x)) 180 | 181 | prod_output = tf.cast(prod_output, self.compute_dtype) 182 | 183 | if self._diag_scale: 184 | prod_output = prod_output + self._diag_scale * x 185 | 186 | return x0 * prod_output + x 187 | 188 | def get_config(self): 189 | config = { 190 | "projection_dim": 191 | self._projection_dim, 192 | "diag_scale": 193 | self._diag_scale, 194 | "use_bias": 195 | self._use_bias, 196 | "preactivation": 197 | tf.keras.activations.serialize(self._preactivation), 198 | "kernel_initializer": 199 | tf.keras.initializers.serialize(self._kernel_initializer), 200 | "bias_initializer": 201 | tf.keras.initializers.serialize(self._bias_initializer), 202 | "kernel_regularizer": 203 | tf.keras.regularizers.serialize(self._kernel_regularizer), 204 | "bias_regularizer": 205 | tf.keras.regularizers.serialize(self._bias_regularizer), 206 | } 207 | base_config = super().get_config() 208 | return dict(list(base_config.items()) + list(config.items())) 209 | 210 | 211 | def _clone_initializer(initializer): 212 | return initializer.__class__.from_config(initializer.get_config()) 213 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/dcn_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for Cross layer.""" 16 | 17 | import os 18 | import tempfile 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from tensorflow_recommenders.layers.feature_interaction.dcn import Cross 24 | 25 | 26 | class CrossTest(tf.test.TestCase): 27 | # Do not use layer_test due to multiple inputs. 28 | 29 | def test_full_matrix(self): 30 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 31 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 32 | layer = Cross(projection_dim=None, kernel_initializer="ones") 33 | output = layer(x0, x) 34 | self.evaluate(tf.compat.v1.global_variables_initializer()) 35 | self.assertAllClose(np.asarray([[0.55, 0.8, 1.05]]), output) 36 | 37 | def test_low_rank_matrix(self): 38 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 39 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 40 | layer = Cross(projection_dim=1, kernel_initializer="ones") 41 | output = layer(x0, x) 42 | self.evaluate(tf.compat.v1.global_variables_initializer()) 43 | self.assertAllClose(np.asarray([[0.55, 0.8, 1.05]]), output) 44 | 45 | def test_one_input(self): 46 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 47 | layer = Cross(projection_dim=None, kernel_initializer="ones") 48 | output = layer(x0) 49 | self.evaluate(tf.compat.v1.global_variables_initializer()) 50 | self.assertAllClose(np.asarray([[0.16, 0.32, 0.48]]), output) 51 | 52 | def test_unsupported_input_dim(self): 53 | with self.assertRaisesRegex(ValueError, 54 | r"dimension mismatch"): 55 | x0 = np.random.random((12, 5)) 56 | x = np.random.random((12, 7)) 57 | layer = Cross() 58 | layer(x0, x) 59 | 60 | def test_invalid_diag_scale(self): 61 | with self.assertRaisesRegex(ValueError, 62 | r"`diag_scale` should be non-negative"): 63 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 64 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 65 | layer = Cross(diag_scale=-1.) 66 | layer(x0, x) 67 | 68 | def test_bias(self): 69 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 70 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 71 | layer = Cross(projection_dim=None, kernel_initializer="ones", 72 | bias_initializer="ones") 73 | output = layer(x0, x) 74 | self.evaluate(tf.compat.v1.global_variables_initializer()) 75 | self.assertAllClose(np.asarray([[0.65, 1., 1.35]]), output) 76 | 77 | def test_serialization(self): 78 | layer = Cross(projection_dim=None, preactivation="swish") 79 | serialized_layer = tf.keras.layers.serialize(layer) 80 | new_layer = tf.keras.layers.deserialize(serialized_layer) 81 | self.assertEqual(layer.get_config(), new_layer.get_config()) 82 | 83 | def test_diag_scale(self): 84 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 85 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 86 | layer = Cross( 87 | projection_dim=None, diag_scale=1., kernel_initializer="ones") 88 | output = layer(x0, x) 89 | self.evaluate(tf.compat.v1.global_variables_initializer()) 90 | self.assertAllClose(np.asarray([[0.59, 0.9, 1.23]]), output) 91 | 92 | def test_preactivation(self): 93 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 94 | x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32) 95 | layer = Cross( 96 | projection_dim=None, 97 | preactivation=tf.zeros_like 98 | ) 99 | output = layer(x0, x) 100 | self.evaluate(tf.compat.v1.global_variables_initializer()) 101 | self.assertAllClose(x, output) 102 | 103 | def test_save_model(self): 104 | 105 | def get_model(): 106 | x0 = tf.keras.layers.Input(shape=(13,)) 107 | x1 = Cross(projection_dim=None)(x0, x0) 108 | x2 = Cross(projection_dim=None)(x0, x1) 109 | logits = tf.keras.layers.Dense(units=1)(x2) 110 | model = tf.keras.Model(x0, logits) 111 | return model 112 | 113 | model = get_model() 114 | random_input = np.random.uniform(size=(10, 13)) 115 | model_pred = model.predict(random_input) 116 | 117 | with tempfile.TemporaryDirectory() as tmp: 118 | path = os.path.join(tmp, "dcn_model") 119 | model.save( 120 | path, 121 | options=tf.saved_model.SaveOptions(namespace_whitelist=["Addons"])) 122 | loaded_model = tf.keras.models.load_model(path) 123 | loaded_pred = loaded_model.predict(random_input) 124 | for i in range(3): 125 | assert model.layers[i].get_config() == loaded_model.layers[i].get_config() 126 | self.assertAllEqual(model_pred, loaded_pred) 127 | 128 | 129 | if __name__ == "__main__": 130 | tf.test.main() 131 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/dot_interaction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implements `Dot Interaction` Layer of DLRM model.""" 16 | 17 | from typing import List, Optional 18 | 19 | import tensorflow as tf 20 | 21 | 22 | class DotInteraction(tf.keras.layers.Layer): 23 | """Dot interaction layer. 24 | 25 | See theory in the DLRM paper: https://arxiv.org/pdf/1906.00091.pdf, 26 | section 2.1.3. Sparse activations and dense activations are combined. 27 | Dot interaction is applied to a batch of input Tensors [e1,...,e_k] of the 28 | same dimension and the output is a batch of Tensors with all distinct pairwise 29 | dot products of the form dot(e_i, e_j) for i <= j if self self_interaction is 30 | True, otherwise dot(e_i, e_j) i < j. 31 | 32 | Attributes: 33 | self_interaction: Boolean indicating if features should self-interact. 34 | If it is True, then the diagonal enteries of the interaction matric are 35 | also taken. 36 | skip_gather: An optimization flag. If it's set then the upper triangle part 37 | of the dot interaction matrix dot(e_i, e_j) is set to 0. The resulting 38 | activations will be of dimension [num_features * num_features] from which 39 | half will be zeros. Otherwise activations will be only lower triangle part 40 | of the interaction matrix. The later saves space but is much slower. 41 | name: String name of the layer. 42 | """ 43 | 44 | def __init__(self, 45 | self_interaction: bool = False, 46 | skip_gather: bool = False, 47 | name: Optional[str] = None, 48 | **kwargs) -> None: 49 | self._self_interaction = self_interaction 50 | self._skip_gather = skip_gather 51 | super().__init__(name=name, **kwargs) 52 | 53 | def call(self, inputs: List[tf.Tensor]) -> tf.Tensor: 54 | """Performs the interaction operation on the tensors in the list. 55 | 56 | The tensors represent as transformed dense features and embedded categorical 57 | features. 58 | Pre-condition: The tensors should all have the same shape. 59 | 60 | Args: 61 | inputs: List of features with shapes [batch_size, feature_dim]. 62 | 63 | Returns: 64 | activations: Tensor representing interacted features. It has a dimension 65 | `num_features * num_features` if skip_gather is True, otherside 66 | `num_features * (num_features + 1) / 2` if self_interaction is True and 67 | `num_features * (num_features - 1) / 2` if self_interaction is False. 68 | """ 69 | num_features = len(inputs) 70 | batch_size = tf.shape(inputs[0])[0] 71 | feature_dim = tf.shape(inputs[0])[1] 72 | # concat_features shape: batch_size, num_features, feature_dim 73 | try: 74 | concat_features = tf.concat(inputs, axis=-1) 75 | concat_features = tf.reshape(concat_features, 76 | [batch_size, -1, feature_dim]) 77 | except (ValueError, tf.errors.InvalidArgumentError) as e: 78 | raise ValueError(f"Input tensors` dimensions must be equal, original" 79 | f"error message: {e}") 80 | 81 | # Interact features, select lower-triangular portion, and re-shape. 82 | xactions = tf.matmul(concat_features, concat_features, transpose_b=True) 83 | ones = tf.ones_like(xactions) 84 | if self._self_interaction: 85 | # Selecting lower-triangular portion including the diagonal. 86 | lower_tri_mask = tf.linalg.band_part(ones, -1, 0) 87 | upper_tri_mask = ones - lower_tri_mask 88 | out_dim = num_features * (num_features + 1) // 2 89 | else: 90 | # Selecting lower-triangular portion not included the diagonal. 91 | upper_tri_mask = tf.linalg.band_part(ones, 0, -1) 92 | lower_tri_mask = ones - upper_tri_mask 93 | out_dim = num_features * (num_features - 1) // 2 94 | 95 | if self._skip_gather: 96 | # Setting upper tiangle part of the interaction matrix to zeros. 97 | activations = tf.where(condition=tf.cast(upper_tri_mask, tf.bool), 98 | x=tf.zeros_like(xactions), 99 | y=xactions) 100 | out_dim = num_features * num_features 101 | else: 102 | activations = tf.boolean_mask(xactions, lower_tri_mask) 103 | activations = tf.reshape(activations, (batch_size, out_dim)) 104 | return activations 105 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/dot_interaction_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for DotInteraction layer.""" 16 | 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from tensorflow_recommenders.layers.feature_interaction.dot_interaction import DotInteraction 21 | 22 | 23 | class DotInteractionTest(tf.test.TestCase): 24 | 25 | def test_valid_input(self): 26 | feature1 = np.asarray([[0.1, -4.3, 0.2, 1.1, 0.3]]).astype(np.float32) 27 | feature2 = np.asarray([[2.0, 3.2, -1.0, 0.0, 1.0]]).astype(np.float32) 28 | feature3 = np.asarray([[0.0, 1.0, -3.0, -2.2, -0.2]]).astype(np.float32) 29 | layer = DotInteraction(self_interaction=True, 30 | skip_gather=False) 31 | 32 | f11 = np.dot(feature1[0], feature1[0]) 33 | f12 = np.dot(feature1[0], feature2[0]) 34 | f13 = np.dot(feature1[0], feature3[0]) 35 | f22 = np.dot(feature2[0], feature2[0]) 36 | f23 = np.dot(feature2[0], feature3[0]) 37 | f33 = np.dot(feature3[0], feature3[0]) 38 | 39 | output = layer([feature1, feature2, feature3]) 40 | self.assertAllClose(np.asarray([[f11, 41 | f12, f22, 42 | f13, f23, f33]]), output) 43 | 44 | layer = DotInteraction(self_interaction=True, 45 | skip_gather=True) 46 | output = layer([feature1, feature2, feature3]) 47 | 48 | self.assertAllClose(np.asarray([[f11, 0, 0, 49 | f12, f22, 0, 50 | f13, f23, f33]]), output) 51 | 52 | layer = DotInteraction(self_interaction=False, 53 | skip_gather=False) 54 | output = layer([feature1, feature2, feature3]) 55 | self.assertAllClose(np.asarray([[f12, 56 | f13, f23]]), output) 57 | 58 | layer = DotInteraction(self_interaction=False, 59 | skip_gather=True) 60 | output = layer([feature1, feature2, feature3]) 61 | 62 | self.assertAllClose(np.asarray([[0, 0, 0, 63 | f12, 0, 0, 64 | f13, f23, 0]]), output) 65 | 66 | def test_non_matching_dimensions(self): 67 | with self.assertRaisesRegex(ValueError, r"dimensions must be equal"): 68 | feature1 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 69 | feature2 = np.asarray([[2.0, -1.0, 1.0]]).astype(np.float32) 70 | feature3 = np.asarray([[0.0, 1.0]]).astype(np.float32) 71 | layer = DotInteraction() 72 | layer([feature1, feature2, feature3]) 73 | 74 | 75 | if __name__ == "__main__": 76 | tf.test.main() 77 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/multi_layer_dcn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Copyright 2023 The TensorFlow Recommenders Authors. 16 | # 17 | # Licensed under the Apache License, Version 2.0 (the "License"); 18 | # you may not use this file except in compliance with the License. 19 | # You may obtain a copy of the License at 20 | # 21 | # http://www.apache.org/licenses/LICENSE-2.0 22 | # 23 | # Unless required by applicable law or agreed to in writing, software 24 | # distributed under the License is distributed on an "AS IS" BASIS, 25 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 26 | # See the License for the specific language governing permissions and 27 | # limitations under the License. 28 | 29 | """Implements `Cross` Layer, the cross layer in Deep & Cross Network (DCN).""" 30 | 31 | from typing import Union, Text, Optional 32 | 33 | import tensorflow as tf 34 | 35 | 36 | @tf.keras.utils.register_keras_serializable() 37 | class MultiLayerDCN(tf.keras.layers.Layer): 38 | """Cross Layer in Deep & Cross Network to learn explicit feature interactions. 39 | 40 | A layer that creates explicit and bounded-degree feature interactions 41 | efficiently. The `call` method accepts `inputs` as a tuple of size 2 42 | tensors. The first input `x0` is the base layer that contains the original 43 | features (usually the embedding layer); the second input `xi` is the output 44 | of the previous `Cross` layer in the stack, i.e., the i-th `Cross` 45 | layer. For the first `Cross` layer in the stack, x0 = xi. 46 | The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, 47 | where .* designates elementwise multiplication, W could be a full-rank 48 | matrix, or a low-rank matrix U*V to reduce the computational cost, and 49 | diag_scale increases the diagonal of W to improve training stability ( 50 | especially for the low-rank case). 51 | References: 52 | 1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) 53 | See Eq. (1) for full-rank and Eq. (2) for low-rank version. 54 | 2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) 55 | Example: 56 | ```python 57 | # after embedding layer in a functional model: 58 | input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64) 59 | x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6) 60 | x1 = MultiLayerDCN()(x0) 61 | x2 = MultiLayerDCN()(x0) 62 | logits = tf.keras.layers.Dense(units=10)(x2) 63 | model = tf.keras.Model(input, logits) 64 | ``` 65 | Attributes: 66 | projection_dim: project dimension to reduce the computational cost. a 67 | low-rank matrix W = U*V will be used, where U is of size `input_dim` by 68 | `projection_dim` and V is of size `projection_dim` by `input_dim`. 69 | `projection_dim` need to be smaller than `input_dim`/2 to improve the 70 | model efficiency. In practice, we've observed that `projection_dim` = 71 | input_dim/4 consistently preserved the accuracy of a full-rank version. 72 | num_layers: the number of stacked DCN layers 73 | use_bias: whether to add a bias term for this layer. If set to False, no 74 | bias term will be used. 75 | kernel_initializer: Initializer to use on the kernel matrix. 76 | bias_initializer: Initializer to use on the bias vector. 77 | kernel_regularizer: Regularizer to use on the kernel matrix. 78 | bias_regularizer: Regularizer to use on bias vector. 79 | 80 | Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs. 81 | Output shape: A single (batch_size, `input_dim`) dimensional output. 82 | """ 83 | 84 | def __init__( 85 | self, 86 | projection_dim: Optional[int] = 1, 87 | num_layers: Optional[int] = 3, 88 | use_bias: bool = True, 89 | kernel_initializer: Union[ 90 | Text, tf.keras.initializers.Initializer] = "truncated_normal", 91 | bias_initializer: Union[Text, 92 | tf.keras.initializers.Initializer] = "zeros", 93 | kernel_regularizer: Union[Text, None, 94 | tf.keras.regularizers.Regularizer] = None, 95 | bias_regularizer: Union[Text, None, 96 | tf.keras.regularizers.Regularizer] = None, 97 | **kwargs): 98 | 99 | super(MultiLayerDCN, self).__init__(**kwargs) 100 | 101 | self._projection_dim = projection_dim 102 | self._num_layers = num_layers 103 | self._use_bias = use_bias 104 | self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) 105 | self._bias_initializer = tf.keras.initializers.get(bias_initializer) 106 | self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) 107 | self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) 108 | self._input_dim = None 109 | 110 | self._supports_masking = True 111 | 112 | def build(self, input_shape): 113 | last_dim = input_shape[-1] 114 | self._dense_u_kernels, self._dense_v_kernels = [], [] 115 | 116 | for _ in range(self._num_layers): 117 | self._dense_u_kernels.append(tf.keras.layers.Dense( 118 | self._projection_dim, 119 | kernel_initializer=_clone_initializer(self._kernel_initializer), 120 | kernel_regularizer=self._kernel_regularizer, 121 | use_bias=False, 122 | dtype=self.dtype, 123 | )) 124 | self._dense_v_kernels.append(tf.keras.layers.Dense( 125 | last_dim, 126 | kernel_initializer=_clone_initializer(self._kernel_initializer), 127 | bias_initializer=self._bias_initializer, 128 | kernel_regularizer=self._kernel_regularizer, 129 | bias_regularizer=self._bias_regularizer, 130 | use_bias=self._use_bias, 131 | dtype=self.dtype, 132 | )) 133 | 134 | self.built = True 135 | 136 | def call(self, x0: tf.Tensor) -> tf.Tensor: 137 | """Computes the multi layer DCN feature cross. 138 | 139 | Args: 140 | x0: The input tensor 141 | Returns: 142 | Tensor of crosses. 143 | """ 144 | if not self.built: 145 | self.build(x0.shape) 146 | 147 | xl = x0 148 | 149 | for i in range(self._num_layers): 150 | prod_output = self._dense_v_kernels[i](self._dense_u_kernels[i](xl)) 151 | xl = x0 * prod_output + xl 152 | 153 | return xl 154 | 155 | def get_config(self): 156 | config = { 157 | "projection_dim": 158 | self._projection_dim, 159 | "num_layers": 160 | self._num_layers, 161 | "use_bias": 162 | self._use_bias, 163 | "kernel_initializer": 164 | tf.keras.initializers.serialize(self._kernel_initializer), 165 | "bias_initializer": 166 | tf.keras.initializers.serialize(self._bias_initializer), 167 | "kernel_regularizer": 168 | tf.keras.regularizers.serialize(self._kernel_regularizer), 169 | "bias_regularizer": 170 | tf.keras.regularizers.serialize(self._bias_regularizer), 171 | } 172 | base_config = super().get_config() 173 | return dict(list(base_config.items()) + list(config.items())) 174 | 175 | 176 | def _clone_initializer(initializer): 177 | return initializer.__class__.from_config(initializer.get_config()) 178 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/feature_interaction/multi_layer_dcn_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for Cross layer.""" 16 | 17 | import os 18 | import tempfile 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | from tensorflow_recommenders.layers.feature_interaction.multi_layer_dcn import MultiLayerDCN 23 | 24 | 25 | class MultiLayerDCNTest(tf.test.TestCase): 26 | # Do not use layer_test due to multiple inputs. 27 | 28 | def test_full_matrix(self): 29 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 30 | layer = MultiLayerDCN( 31 | projection_dim=3, 32 | num_layers=1, 33 | use_bias=False, 34 | kernel_initializer="ones", 35 | ) 36 | output = layer(x0) 37 | self.evaluate(tf.compat.v1.global_variables_initializer()) 38 | self.assertAllClose(np.asarray([[0.28, 0.56, 0.84]]), output) 39 | 40 | def test_low_rank_matrix(self): 41 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 42 | layer = MultiLayerDCN( 43 | projection_dim=1, 44 | num_layers=1, 45 | use_bias=False, 46 | kernel_initializer="ones", 47 | ) 48 | output = layer(x0) 49 | self.evaluate(tf.compat.v1.global_variables_initializer()) 50 | self.assertAllClose(np.asarray([[0.16, 0.32, 0.48]]), output) 51 | 52 | def test_bias(self): 53 | x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32) 54 | layer = MultiLayerDCN( 55 | projection_dim=1, kernel_initializer="ones", bias_initializer="ones" 56 | ) 57 | output = layer(x0) 58 | self.evaluate(tf.compat.v1.global_variables_initializer()) 59 | self.assertAllClose(np.asarray([[0.9256, 1.8512, 2.7768]]), output) 60 | 61 | def test_serialization(self): 62 | layer = MultiLayerDCN(projection_dim=1) 63 | serialized_layer = tf.keras.layers.serialize(layer) 64 | new_layer = tf.keras.layers.deserialize(serialized_layer) 65 | self.assertEqual(layer.get_config(), new_layer.get_config()) 66 | 67 | def test_save_model(self): 68 | 69 | def get_model(): 70 | x0 = tf.keras.layers.Input(shape=(13,)) 71 | x1 = MultiLayerDCN(projection_dim=1)(x0) 72 | x2 = MultiLayerDCN(projection_dim=1)(x1) 73 | logits = tf.keras.layers.Dense(units=1)(x2) 74 | model = tf.keras.Model(x0, logits) 75 | return model 76 | 77 | model = get_model() 78 | random_input = np.random.uniform(size=(10, 13)) 79 | model_pred = model.predict(random_input) 80 | 81 | with tempfile.TemporaryDirectory() as tmp: 82 | path = os.path.join(tmp, "multi_layer_dcn_model") 83 | model.save( 84 | path, 85 | options=tf.saved_model.SaveOptions(namespace_whitelist=["Addons"])) 86 | loaded_model = tf.keras.models.load_model(path) 87 | loaded_pred = loaded_model.predict(random_input) 88 | for i in range(3): 89 | assert model.layers[i].get_config() == loaded_model.layers[i].get_config() 90 | self.assertAllEqual(model_pred, loaded_pred) 91 | 92 | 93 | if __name__ == "__main__": 94 | tf.test.main() 95 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Layers related to loss computation.""" 17 | from typing import Tuple 18 | 19 | import numpy as np 20 | import tensorflow as tf 21 | 22 | MAX_FLOAT = np.finfo(np.float32).max / 100.0 23 | MIN_FLOAT = np.finfo(np.float32).min / 100.0 24 | 25 | 26 | def _gather_elements_along_row(data: tf.Tensor, 27 | column_indices: tf.Tensor) -> tf.Tensor: 28 | """Gathers elements from a 2D tensor given the column indices of each row. 29 | 30 | A more efficient way of gathering elements from 2D tensor than tf.gather_nd(). 31 | First, gets the flat 1D indices to gather from. Then flattens the data to 1D 32 | and uses tf.gather() to generate 1D output and finnally reshapes the 33 | output back to 2D. 34 | 35 | Args: 36 | data: A [N, M] 2D `Tensor`. 37 | column_indices: A [N, K] 2D `Tensor` denoting for each row, the K column 38 | indices to gather elements from the data `Tensor`. 39 | 40 | Returns: 41 | A [N, K] `Tensor` including output elements gathered from data `Tensor`. 42 | 43 | Raises: 44 | ValueError: if the first dimensions of data and column_indices don't match. 45 | """ 46 | with tf.control_dependencies( 47 | [tf.assert_equal(tf.shape(data)[0], tf.shape(column_indices)[0])]): 48 | num_row = tf.shape(data)[0] 49 | num_column = tf.shape(data)[1] 50 | num_gathered = tf.shape(column_indices)[1] 51 | row_indices = tf.tile( 52 | tf.expand_dims(tf.range(num_row), -1), 53 | [1, num_gathered]) 54 | flat_data = tf.reshape(data, [-1]) 55 | flat_indices = tf.reshape( 56 | row_indices * num_column + column_indices, [-1]) 57 | return tf.reshape( 58 | tf.gather(flat_data, flat_indices), [num_row, num_gathered]) 59 | 60 | 61 | class HardNegativeMining(tf.keras.layers.Layer): 62 | """Transforms logits and labels to return hard negatives.""" 63 | 64 | def __init__(self, num_hard_negatives: int) -> None: 65 | """Initializes the layer. 66 | 67 | Args: 68 | num_hard_negatives: How many hard negatives to return. 69 | """ 70 | 71 | super(HardNegativeMining, self).__init__() 72 | self._num_hard_negatives = num_hard_negatives 73 | 74 | def call(self, logits: tf.Tensor, 75 | labels: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: 76 | """Filters logits and labels with per-query hard negative mining. 77 | 78 | The result will include logits and labels for num_hard_negatives 79 | negatives as well as the positive candidate. 80 | 81 | Args: 82 | logits: [batch_size, number_of_candidates] tensor of logits. 83 | labels: [batch_size, number_of_candidates] one-hot tensor of labels. 84 | 85 | Returns: 86 | logits: [batch_size, num_hard_negatives + 1] tensor of logits. 87 | labels: [batch_size, num_hard_negatives + 1] one-hot tensor of labels. 88 | """ 89 | 90 | # Number of sampled logits, i.e, the number of hard negatives to be 91 | # sampled (k) + number of true logit (1) per query, capped by batch size. 92 | num_sampled = tf.minimum(self._num_hard_negatives + 1, tf.shape(logits)[1]) 93 | # To gather indices of top k negative logits per row (query) in 94 | # logits, true logits need to be excluded. First replace the true 95 | # logits (corresponding to positive labels) with a large score value 96 | # and then select the top k + 1 logits from each 97 | # row so that selected indices include the indices of true logit + top k 98 | # negative logits. This approach is to avoid using inefficient 99 | # tf.boolean_mask() when excluding true logits. 100 | 101 | # For each query, get the indices of the logits which have the highest 102 | # k + 1 logit values, including the highest k negative logits and one true 103 | # logit. 104 | _, col_indices = tf.nn.top_k( 105 | logits + labels * MAX_FLOAT, k=num_sampled, sorted=False) 106 | 107 | # Gather sampled logits and corresponding labels. 108 | logits = _gather_elements_along_row(logits, col_indices) 109 | labels = _gather_elements_along_row(labels, col_indices) 110 | 111 | return logits, labels 112 | 113 | 114 | class RemoveAccidentalHits(tf.keras.layers.Layer): 115 | """Zeroes the logits of accidental negatives.""" 116 | 117 | def call(self, labels: tf.Tensor, logits: tf.Tensor, 118 | candidate_ids: tf.Tensor) -> tf.Tensor: 119 | """Zeros selected logits. 120 | 121 | For each row in the batch, zeros the logits of negative candidates that have 122 | the same id as the positive candidate in that row. 123 | 124 | Args: 125 | labels: [batch_size, num_candidates] one-hot labels tensor. 126 | logits: [batch_size, num_candidates] logits tensor. 127 | candidate_ids: [num_candidates] candidate identifiers tensor 128 | 129 | Returns: 130 | logits: Modified logits. 131 | """ 132 | # A more principled way is to implement softmax_cross_entropy_with_logits 133 | # with a input mask. Here we approximate so by letting accidental hits 134 | # have extremely small logits (MIN_FLOAT) for ease-of-implementation. 135 | 136 | candidate_ids = tf.expand_dims(candidate_ids, 1) 137 | 138 | positive_indices = tf.math.argmax(labels, axis=1) 139 | positive_candidate_ids = tf.gather(candidate_ids, positive_indices) 140 | 141 | duplicate = tf.cast( 142 | tf.equal(positive_candidate_ids, tf.transpose(candidate_ids)), 143 | labels.dtype 144 | ) 145 | duplicate = duplicate - labels 146 | 147 | return logits + duplicate * MIN_FLOAT 148 | 149 | 150 | class SamplingProbablityCorrection(tf.keras.layers.Layer): 151 | """Sampling probability correction.""" 152 | 153 | def __call__(self, logits: tf.Tensor, 154 | candidate_sampling_probability: tf.Tensor) -> tf.Tensor: 155 | """Corrects the input logits to account for candidate sampling probability.""" 156 | 157 | return logits - tf.math.log( 158 | tf.clip_by_value(candidate_sampling_probability, 1e-6, 1.)) 159 | -------------------------------------------------------------------------------- /tensorflow_recommenders/layers/loss_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tests for loss layers.""" 17 | 18 | from absl.testing import parameterized 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from tensorflow_recommenders.layers import loss 24 | 25 | 26 | class LossTest(tf.test.TestCase, parameterized.TestCase): 27 | """Loss layers tests.""" 28 | 29 | @parameterized.parameters(42, 123, 8391, 12390, 1230) 30 | def test_hard_negative_mining(self, random_seed): 31 | """Test hard negative mining.""" 32 | 33 | num_hard_negatives = 3 34 | # (num_queries, num_candidates) 35 | shape = (2, 20) 36 | rng = np.random.RandomState(random_seed) 37 | 38 | logits = rng.uniform(size=shape).astype(np.float32) 39 | labels = rng.permutation(np.eye(*shape).T).T.astype(np.float32) 40 | 41 | out_logits, out_labels = loss.HardNegativeMining(num_hard_negatives)(logits, 42 | labels) 43 | out_logits, out_labels = out_logits.numpy(), out_labels.numpy() 44 | 45 | self.assertEqual(out_logits.shape[-1], num_hard_negatives + 1) 46 | 47 | # Logits for positives are always returned. 48 | self.assertAllClose((out_logits * out_labels).sum(axis=1), 49 | (logits * labels).sum(axis=1)) 50 | 51 | # Set the logits for the labels to be highest to ignore 52 | # the effect of labels. 53 | logits = logits + labels * 1000.0 54 | 55 | out_logits, out_labels = loss.HardNegativeMining(num_hard_negatives)(logits, 56 | labels) 57 | out_logits, out_labels = out_logits.numpy(), out_labels.numpy() 58 | 59 | # Highest K logits are always returned. 60 | self.assertAllClose( 61 | np.sort(logits, axis=1)[:, -num_hard_negatives - 1:], 62 | np.sort(out_logits)) 63 | 64 | @parameterized.parameters(42, 123, 8391, 12390, 1230) 65 | def test_remove_accidental_hits(self, random_seed): 66 | 67 | # (num_queries, num_candidates) 68 | shape = (2, 4) 69 | rng = np.random.RandomState(random_seed) 70 | 71 | logits = rng.uniform(size=shape).astype(np.float32) 72 | labels = rng.permutation(np.eye(*shape).T).T.astype(np.float32) 73 | candidate_ids = rng.randint(0, 3, size=shape[-1]) 74 | 75 | out_logits = loss.RemoveAccidentalHits()( 76 | labels, logits, candidate_ids).numpy() 77 | 78 | # Logits of labels are unchanged. 79 | self.assertAllClose((out_logits * labels).sum(axis=1), 80 | (logits * labels).sum(axis=1)) 81 | 82 | for row_idx in range(shape[0]): 83 | 84 | row_positive_idx = np.argmax(labels[row_idx]) 85 | positive_candidate_id = candidate_ids[row_positive_idx] 86 | 87 | for col_idx in range(shape[1]): 88 | 89 | same_candidate_as_positive = ( 90 | positive_candidate_id == candidate_ids[col_idx]) 91 | is_positive = col_idx == row_positive_idx 92 | 93 | if same_candidate_as_positive and not is_positive: 94 | # We zeroed the logits. 95 | self.assertAllClose(out_logits[row_idx, col_idx], 96 | logits[row_idx, col_idx] + loss.MIN_FLOAT) 97 | else: 98 | # We left the logits unchanged. 99 | self.assertAllClose(out_logits[row_idx, col_idx], logits[row_idx, 100 | col_idx]) 101 | 102 | 103 | class SamplingProbabilityCorrectionTest( 104 | tf.test.TestCase, parameterized.TestCase): 105 | """Loss layers tests.""" 106 | 107 | @parameterized.parameters(42, 123, 8391, 12390, 1230) 108 | def test_sampling_probability_correction(self, random_seed): 109 | """Test sampling probability correction.""" 110 | 111 | # (num_queries, num_candidates) 112 | shape = (10, 20) 113 | rng = np.random.RandomState(random_seed) 114 | 115 | logits = rng.uniform(size=shape).astype(np.float32) 116 | probs = rng.uniform(size=shape[1]).astype(np.float32) 117 | 118 | corrected_logits = loss.SamplingProbablityCorrection()(logits, probs) 119 | corrected_logits = corrected_logits.numpy() 120 | 121 | np.testing.assert_array_less(logits, corrected_logits) 122 | 123 | # set some of the probabilities to 0 124 | probs_with_zeros = probs * rng.choice([0., 1.], size=probs.shape) 125 | 126 | corrected_logits_with_zeros = loss.SamplingProbablityCorrection()( 127 | logits, probs_with_zeros) 128 | corrected_logits_with_zeros = corrected_logits_with_zeros.numpy() 129 | 130 | np.testing.assert_array_less(logits, corrected_logits_with_zeros) 131 | 132 | 133 | if __name__ == "__main__": 134 | tf.test.main() 135 | -------------------------------------------------------------------------------- /tensorflow_recommenders/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Metrics.""" 16 | 17 | from tensorflow_recommenders.metrics.factorized_top_k import Factorized 18 | from tensorflow_recommenders.metrics.factorized_top_k import FactorizedTopK 19 | -------------------------------------------------------------------------------- /tensorflow_recommenders/metrics/factorized_top_k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # lint-as: python3 16 | """Factorized retrieval top K metrics.""" 17 | 18 | import abc 19 | 20 | from typing import List, Optional, Sequence, Union 21 | 22 | import tensorflow as tf 23 | 24 | from tensorflow_recommenders import layers 25 | 26 | 27 | class Factorized(tf.keras.layers.Layer, abc.ABC): 28 | """Computes metrics across top K candidates surfaced by a retrieval model.""" 29 | 30 | @abc.abstractmethod 31 | def update_state( 32 | self, 33 | query_embeddings: tf.Tensor, 34 | true_candidate_embeddings: tf.Tensor, 35 | true_candidate_ids: Optional[tf.Tensor] = None 36 | ) -> tf.Operation: 37 | 38 | raise NotImplementedError() 39 | 40 | def reset_states(self) -> None: 41 | """Resets the metrics.""" 42 | 43 | for metric in self.metrics: 44 | metric.reset_states() 45 | 46 | def result(self) -> List[tf.Tensor]: 47 | """Returns a list of metric results.""" 48 | 49 | return [metric.result() for metric in self.metrics] 50 | 51 | 52 | class FactorizedTopK(Factorized): 53 | """Computes metrics for across top K candidates surfaced by a retrieval model. 54 | 55 | The default metric is top K categorical accuracy: how often the true candidate 56 | is in the top K candidates for a given query. 57 | """ 58 | 59 | def __init__( 60 | self, 61 | candidates: Union[layers.factorized_top_k.TopK, tf.data.Dataset], 62 | ks: Sequence[int] = (1, 5, 10, 50, 100), 63 | name: str = "factorized_top_k", 64 | ) -> None: 65 | """Initializes the metric. 66 | 67 | Args: 68 | candidates: A layer for retrieving top candidates in response 69 | to a query, or a dataset of candidate embeddings from which 70 | candidates should be retrieved. 71 | ks: A sequence of values of `k` at which to perform retrieval evaluation. 72 | name: Optional name. 73 | """ 74 | 75 | super().__init__(name=name) 76 | 77 | if isinstance(candidates, tf.data.Dataset): 78 | candidates = ( 79 | layers.factorized_top_k.Streaming(k=max(ks)) 80 | .index_from_dataset(candidates) 81 | ) 82 | 83 | self._ks = ks 84 | self._candidates = candidates 85 | self._top_k_metrics = [ 86 | tf.keras.metrics.Mean( 87 | name=f"{self.name}/top_{x}_categorical_accuracy" 88 | ) for x in ks 89 | ] 90 | 91 | def update_state( 92 | self, 93 | query_embeddings: tf.Tensor, 94 | true_candidate_embeddings: tf.Tensor, 95 | true_candidate_ids: Optional[tf.Tensor] = None, 96 | sample_weight: Optional[tf.Tensor] = None, 97 | ) -> tf.Operation: 98 | """Updates the metrics. 99 | 100 | Args: 101 | query_embeddings: [num_queries, embedding_dim] tensor of query embeddings. 102 | true_candidate_embeddings: [num_queries, embedding_dim] tensor of 103 | embeddings for candidates that were selected for the query. 104 | true_candidate_ids: Ids of the true candidates. If supplied, evaluation 105 | will be id-based: the supplied ids will be matched against the ids of 106 | the top candidates returned from the retrieval index, which should have 107 | been constructed with the appropriate identifiers. 108 | 109 | If not supplied, evaluation will be score-based: the score of the true 110 | candidate will be computed and compared with the scores returned from 111 | the index for the top candidates. 112 | 113 | Score-based evaluation is useful for when the true candidate is not 114 | in the retrieval index. Id-based evaluation is useful for when scores 115 | returned from the index are not directly comparable to scores computed 116 | by multiplying the candidate and embedding vector. For example, scores 117 | returned by ScaNN are quantized, and cannot be compared to 118 | full-precision scores. 119 | sample_weight: Optional weighting of each example. Defaults to 1. 120 | 121 | Returns: 122 | Update op. Only used in graph mode. 123 | """ 124 | 125 | if true_candidate_ids is None and not self._candidates.is_exact(): 126 | raise ValueError( 127 | f"The candidate generation layer ({self._candidates}) does not return " 128 | "exact results. To perform evaluation using that layer, you must " 129 | "supply `true_candidate_ids`, which will be checked against " 130 | "the candidate ids returned from the candidate generation layer." 131 | ) 132 | 133 | positive_scores = tf.reduce_sum( 134 | query_embeddings * true_candidate_embeddings, axis=1, keepdims=True) 135 | 136 | top_k_predictions, retrieved_ids = self._candidates( 137 | query_embeddings, k=max(self._ks)) 138 | 139 | update_ops = [] 140 | 141 | if true_candidate_ids is not None: 142 | # We're using ID-based evaluation. 143 | if len(true_candidate_ids.shape) == 1: 144 | true_candidate_ids = tf.expand_dims(true_candidate_ids, 1) 145 | 146 | # Deal with ScaNN using `NaN`-padding by converting its 147 | # `NaN` scores into minimum scores. 148 | nan_padding = tf.math.is_nan(top_k_predictions) 149 | top_k_predictions = tf.where( 150 | nan_padding, 151 | tf.ones_like(top_k_predictions) * tf.float32.min, 152 | top_k_predictions 153 | ) 154 | 155 | # Check sortedness. 156 | is_sorted = ( 157 | top_k_predictions[:, :-1] - top_k_predictions[:, 1:] 158 | ) 159 | tf.debugging.assert_non_negative( 160 | is_sorted, message="Top-K predictions must be sorted." 161 | ) 162 | 163 | # Check whether the true candidates were retrieved, accounting 164 | # for padding. 165 | ids_match = tf.cast( 166 | tf.math.logical_and( 167 | tf.math.equal(true_candidate_ids, retrieved_ids), 168 | tf.math.logical_not(nan_padding) 169 | ), 170 | tf.float32 171 | ) 172 | 173 | for k, metric in zip(self._ks, self._top_k_metrics): 174 | # By slicing until :k we assume scores are sorted. 175 | # Clip to only count multiple matches once. 176 | match_found = tf.clip_by_value( 177 | tf.reduce_sum(ids_match[:, :k], axis=1, keepdims=True), 178 | 0.0, 1.0 179 | ) 180 | update_ops.append(metric.update_state(match_found, sample_weight)) 181 | else: 182 | # Score-based evaluation. 183 | y_pred = tf.concat([positive_scores, top_k_predictions], axis=1) 184 | 185 | for k, metric in zip(self._ks, self._top_k_metrics): 186 | targets = tf.zeros(tf.shape(positive_scores)[0], dtype=tf.int32) 187 | top_k_accuracy = tf.math.in_top_k( 188 | targets=targets, 189 | predictions=y_pred, 190 | k=k 191 | ) 192 | update_ops.append(metric.update_state(top_k_accuracy, sample_weight)) 193 | 194 | return tf.group(update_ops) 195 | -------------------------------------------------------------------------------- /tensorflow_recommenders/metrics/factorized_top_k_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tests factorized top K metrics.""" 17 | 18 | import itertools 19 | 20 | from absl.testing import parameterized 21 | 22 | import numpy as np 23 | import tensorflow as tf 24 | 25 | from tensorflow_recommenders import layers 26 | from tensorflow_recommenders import metrics 27 | 28 | 29 | class FactorizedTopKTest(tf.test.TestCase, parameterized.TestCase): 30 | 31 | @parameterized.parameters( 32 | itertools.product( 33 | (layers.factorized_top_k.Streaming, 34 | layers.factorized_top_k.BruteForce, 35 | None), 36 | (True, False) 37 | ) 38 | ) 39 | def test_factorized_top_k(self, top_k_layer, use_candidate_ids): 40 | 41 | rng = np.random.RandomState(42) 42 | 43 | num_candidates, num_queries, embedding_dim = (100, 10, 4) 44 | 45 | candidate_ids = np.arange(0, num_candidates).astype(str) 46 | candidates = rng.normal(size=(num_candidates, 47 | embedding_dim)).astype(np.float32) 48 | 49 | query = rng.normal(size=(num_queries, embedding_dim)).astype(np.float32) 50 | sample_weight = rng.uniform(size=(num_queries, 1)).astype(np.float32) 51 | 52 | true_candidate_indexes = rng.randint(0, num_candidates, size=num_queries) 53 | true_candidate_embeddings = candidates[true_candidate_indexes] 54 | true_candidate_ids = candidate_ids[true_candidate_indexes] 55 | 56 | candidate_scores = query @ candidates.T 57 | 58 | ks = [1, 5, 10, 50] 59 | 60 | candidates = tf.data.Dataset.from_tensor_slices( 61 | (candidate_ids, candidates)).batch(32) 62 | 63 | if top_k_layer is not None: 64 | candidates = top_k_layer().index_from_dataset(candidates) 65 | 66 | metric = metrics.FactorizedTopK( 67 | candidates=candidates, 68 | ks=ks 69 | ) 70 | metric.update_state( 71 | query_embeddings=query, 72 | true_candidate_embeddings=true_candidate_embeddings, 73 | true_candidate_ids=true_candidate_ids if use_candidate_ids else None, 74 | sample_weight=sample_weight, 75 | ) 76 | 77 | for k, metric_value in zip(ks, metric.result()): 78 | in_top_k = tf.math.in_top_k( 79 | targets=true_candidate_indexes, 80 | predictions=candidate_scores, 81 | k=k) 82 | expected_val = np.average( 83 | in_top_k.numpy().astype(np.float32), 84 | weights=np.squeeze(sample_weight, 1), 85 | ) 86 | self.assertAllClose(metric_value, expected_val) 87 | 88 | @parameterized.parameters( 89 | layers.factorized_top_k.Streaming, 90 | layers.factorized_top_k.BruteForce, 91 | layers.factorized_top_k.ScaNN 92 | ) 93 | def test_id_based_evaluation(self, layer): 94 | 95 | rng = np.random.default_rng(42) 96 | 97 | k = 100 98 | num_candidates, num_queries, embedding_dim = (1280, 128, 128) 99 | candidates = rng.normal(size=(num_candidates, 100 | embedding_dim)).astype(np.float32) 101 | queries = rng.normal(size=(num_queries, embedding_dim)).astype(np.float32) 102 | true_candidate_indices = rng.integers( 103 | 0, num_candidates, size=num_queries).astype(np.int32) 104 | 105 | index = layer(k=k).index_from_dataset( 106 | tf.data.Dataset.from_tensor_slices(candidates).batch(32)) 107 | 108 | metric = metrics.FactorizedTopK( 109 | candidates=index, 110 | ks=[k] 111 | ) 112 | 113 | in_top_k = 0 114 | 115 | for query, true_candidate_idx in zip(queries, true_candidate_indices): 116 | 117 | metric.update_state( 118 | query.reshape(1, -1), 119 | candidates[true_candidate_idx].reshape(1, -1), 120 | np.array([true_candidate_idx]) 121 | ) 122 | 123 | top_scores, top_indices = index(query.reshape(1, -1)) 124 | top_scores, top_indices = top_scores.numpy()[0], top_indices.numpy()[0] 125 | 126 | if true_candidate_idx in top_indices.tolist(): 127 | in_top_k += 1 128 | 129 | expected_metric = in_top_k / num_queries 130 | 131 | self.assertEqual(metric.result()[0], expected_metric) 132 | 133 | 134 | if __name__ == "__main__": 135 | tf.test.main() 136 | -------------------------------------------------------------------------------- /tensorflow_recommenders/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Models.""" 16 | 17 | from tensorflow_recommenders.models.base import Model 18 | -------------------------------------------------------------------------------- /tensorflow_recommenders/models/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # lint-as: python3 16 | """Base model.""" 17 | 18 | import tensorflow as tf 19 | 20 | 21 | class Model(tf.keras.Model): 22 | """Base model for TFRS models. 23 | 24 | Many recommender models are relatively complex, and do not neatly fit into 25 | supervised or unsupervised paradigms. This base class makes it easy to 26 | define custom training and test losses for such complex models. 27 | 28 | This is done by asking the user to implement the following methods: 29 | - `__init__` to set up your model. Variable, task, loss, and metric 30 | initialization should go here. 31 | - `compute_loss` to define the training loss. The method takes as input the 32 | raw features passed into the model, and returns a loss tensor for training. 33 | As part of doing so, it should also update the model's metrics. 34 | - [Optional] `call` to define how the model computes its predictions. This 35 | is not always necessary: for example, two-tower retrieval models have two 36 | well-defined submodels whose `call` methods are normally used directly. 37 | 38 | Note that this base class is a thin conveniece wrapper for tf.keras.Model, and 39 | equivalent functionality can easily be achieved by overriding the `train_step` 40 | and `test_step` methods of a plain Keras model. Doing so also makes it easy 41 | to build even more complex training mechanisms, such as the use of 42 | different optimizers for different variables, or manipulating gradients. 43 | 44 | Keras has an excellent tutorial on how to 45 | do this [here]( 46 | https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit). 47 | """ 48 | 49 | def compute_loss(self, inputs, training: bool = False) -> tf.Tensor: # pytype: disable=signature-mismatch # overriding-parameter-count-checks 50 | """Defines the loss function. 51 | 52 | Args: 53 | inputs: A data structure of tensors: raw inputs to the model. These will 54 | usually contain labels and weights as well as features. 55 | training: Whether the model is in training mode. 56 | 57 | Returns: 58 | Loss tensor. 59 | """ 60 | 61 | raise NotImplementedError( 62 | "Implementers must implement the `compute_loss` method.") 63 | 64 | def train_step(self, inputs): 65 | """Custom train step using the `compute_loss` method.""" 66 | 67 | with tf.GradientTape() as tape: 68 | loss = self.compute_loss(inputs, training=True) 69 | 70 | # Handle regularization losses as well. 71 | regularization_loss = tf.reduce_sum( 72 | [tf.reduce_sum(loss) for loss in self.losses] 73 | ) 74 | 75 | total_loss = loss + regularization_loss 76 | 77 | gradients = tape.gradient(total_loss, self.trainable_variables) 78 | self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) 79 | 80 | metrics = {metric.name: metric.result() for metric in self.metrics} 81 | metrics["loss"] = loss 82 | metrics["regularization_loss"] = regularization_loss 83 | metrics["total_loss"] = total_loss 84 | 85 | return metrics 86 | 87 | def test_step(self, inputs): 88 | """Custom test step using the `compute_loss` method.""" 89 | 90 | loss = self.compute_loss(inputs, training=False) 91 | 92 | # Handle regularization losses as well. 93 | regularization_loss = tf.reduce_sum( 94 | [tf.reduce_sum(loss) for loss in self.losses] 95 | ) 96 | 97 | total_loss = loss + regularization_loss 98 | 99 | metrics = {metric.name: metric.result() for metric in self.metrics} 100 | metrics["loss"] = loss 101 | metrics["regularization_loss"] = regularization_loss 102 | metrics["total_loss"] = total_loss 103 | 104 | return metrics 105 | -------------------------------------------------------------------------------- /tensorflow_recommenders/models/base_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tests base model.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from tensorflow_recommenders import metrics 22 | from tensorflow_recommenders import models 23 | from tensorflow_recommenders import tasks 24 | 25 | 26 | class ModelTest(tf.test.TestCase): 27 | 28 | def test_ranking_model(self): 29 | """Tests a simple ranking model.""" 30 | 31 | class Model(models.Model): 32 | 33 | def __init__(self): 34 | super().__init__() 35 | self._dense = tf.keras.layers.Dense(1) 36 | self.task = tasks.Ranking( 37 | loss=tf.keras.losses.BinaryCrossentropy(), 38 | metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")]) 39 | 40 | def call(self, inputs): 41 | return self._dense(inputs) 42 | 43 | def compute_loss(self, inputs, training=False): 44 | features, labels = inputs 45 | 46 | predictions = self(features) 47 | 48 | return self.task(predictions=predictions, labels=labels) 49 | 50 | data = tf.data.Dataset.from_tensor_slices( 51 | (np.random.normal(size=(10, 3)), np.ones(10))) 52 | 53 | model = Model() 54 | model.compile() 55 | model.fit(data.batch(2)) 56 | metrics_ = model.evaluate(data.batch(2), return_dict=True) 57 | 58 | self.assertIn("loss", metrics_) 59 | self.assertIn("accuracy", metrics_) 60 | 61 | def test_factorized_model(self): 62 | """Tests a simple factorized retrieval model.""" 63 | 64 | class Model(models.Model): 65 | 66 | def __init__(self, candidate_dataset): 67 | super().__init__() 68 | 69 | self.query_model = tf.keras.layers.Dense(16) 70 | self.candidate_model = tf.keras.layers.Dense(16) 71 | 72 | self.task = tasks.Retrieval( 73 | metrics=metrics.FactorizedTopK( 74 | candidates=candidate_dataset.map(self.candidate_model), 75 | ks=[5], 76 | ) 77 | ) 78 | 79 | def compute_loss(self, inputs, training=False): 80 | query_features, candidate_features = inputs 81 | 82 | query_embeddings = self.query_model(query_features) 83 | candidate_embeddings = self.candidate_model(candidate_features) 84 | 85 | return self.task( 86 | query_embeddings=query_embeddings, 87 | candidate_embeddings=candidate_embeddings) 88 | 89 | candidate_dataset = tf.data.Dataset.from_tensor_slices( 90 | np.random.normal(size=(10, 3))) 91 | data = tf.data.Dataset.from_tensor_slices(( 92 | np.random.normal(size=(10, 3)), 93 | np.random.normal(size=(10, 3)), 94 | )) 95 | 96 | model = Model(candidate_dataset.batch(10)) 97 | model.compile() 98 | model.fit(data.batch(2)) 99 | metrics_ = model.evaluate(data.batch(2), return_dict=True) 100 | 101 | self.assertIn("loss", metrics_) 102 | self.assertIn("factorized_top_k/top_5_categorical_accuracy", metrics_) 103 | 104 | def test_multiask_model(self): 105 | """Test a joint ranking-retrieval model.""" 106 | 107 | class Model(models.Model): 108 | 109 | def __init__(self, candidate_dataset): 110 | super().__init__() 111 | 112 | self.query_model = tf.keras.layers.Dense(16) 113 | self.candidate_model = tf.keras.layers.Dense(16) 114 | self.ctr_model = tf.keras.layers.Dense(1, activation="sigmoid") 115 | 116 | self.retrieval_task = tasks.Retrieval( 117 | metrics=metrics.FactorizedTopK( 118 | candidates=candidate_dataset.map(self.candidate_model), 119 | ks=[5] 120 | ) 121 | ) 122 | self.ctr_task = tasks.Ranking( 123 | metrics=[tf.keras.metrics.AUC(name="ctr_auc")]) 124 | 125 | def compute_loss(self, inputs, training): 126 | query_features, candidate_features, clicks = inputs 127 | 128 | query_embeddings = self.query_model(query_features) 129 | candidate_embeddings = self.candidate_model(candidate_features) 130 | 131 | pctr = self.ctr_model( 132 | tf.concat([query_features, candidate_features], axis=1)) 133 | 134 | retrieval_loss = self.retrieval_task( 135 | query_embeddings=query_embeddings, 136 | candidate_embeddings=candidate_embeddings) 137 | ctr_loss = self.ctr_task(predictions=pctr, labels=clicks) 138 | 139 | return retrieval_loss + ctr_loss 140 | 141 | candidate_dataset = tf.data.Dataset.from_tensor_slices( 142 | np.random.normal(size=(10, 3))) 143 | data = tf.data.Dataset.from_tensor_slices(( 144 | np.random.normal(size=(10, 3)), 145 | np.random.normal(size=(10, 3)), 146 | np.random.randint(0, 2, size=10), 147 | )) 148 | 149 | model = Model(candidate_dataset.batch(10)) 150 | model.compile() 151 | model.fit(data.batch(2)) 152 | metrics_ = model.evaluate(data.batch(2), return_dict=True) 153 | 154 | self.assertIn("loss", metrics_) 155 | self.assertIn("factorized_top_k/top_5_categorical_accuracy", metrics_) 156 | self.assertIn("ctr_auc", metrics_) 157 | 158 | def test_regularization_losses(self): 159 | 160 | class Model(models.Model): 161 | 162 | def __init__(self): 163 | super().__init__() 164 | self._dense = tf.keras.layers.Dense(1) 165 | self.task = tasks.Ranking( 166 | loss=tf.keras.losses.BinaryCrossentropy(), 167 | metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")]) 168 | 169 | def call(self, inputs): 170 | self.add_loss(1000.0) 171 | return self._dense(inputs) 172 | 173 | def compute_loss(self, inputs, training=False): 174 | features, labels = inputs 175 | 176 | predictions = self(features) 177 | 178 | return self.task(predictions=predictions, labels=labels) 179 | 180 | data = tf.data.Dataset.from_tensor_slices( 181 | (np.random.normal(size=(10, 3)), np.ones(10))) 182 | 183 | model = Model() 184 | model.compile() 185 | model.fit(data.batch(2)) 186 | metrics_ = model.evaluate(data.batch(2), return_dict=True) 187 | 188 | self.assertIn("loss", metrics_) 189 | self.assertIn("accuracy", metrics_) 190 | 191 | self.assertEqual(metrics_["regularization_loss"], 1000.0) 192 | 193 | 194 | if __name__ == "__main__": 195 | tf.test.main() 196 | -------------------------------------------------------------------------------- /tensorflow_recommenders/public.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """TensorFlow Recommenders is a library for building recommender system models. 16 | 17 | It helps with the full workflow of building a recommender system: data 18 | preparation, model formulation, training, evaluation, and deployment. 19 | 20 | It's built on Keras and aims to have a gentle learning curve while still giving 21 | you the flexibility to build complex models. 22 | 23 | This is a public version of the library and hence does not include 24 | internal google stuff. 25 | """ 26 | 27 | __version__ = "v0.7.3" 28 | 29 | # This version does not include internal tfrs google library. 30 | from tensorflow_recommenders import examples 31 | from tensorflow_recommenders import experimental 32 | from tensorflow_recommenders import layers 33 | from tensorflow_recommenders import metrics 34 | from tensorflow_recommenders import models 35 | from tensorflow_recommenders import tasks 36 | from tensorflow_recommenders import types 37 | 38 | 39 | Model = models.Model 40 | 41 | # Artificially using the libraries in order to be able to use the tfrs_pub 42 | # without these imports if needed and not generate a lint error. 43 | __use_examples = examples 44 | __use_experimental = experimental 45 | __use_layers = layers 46 | __use_metrics = metrics 47 | __use_models = models 48 | __use_tasks = tasks 49 | __use_types = types 50 | -------------------------------------------------------------------------------- /tensorflow_recommenders/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Task libraries.""" 16 | 17 | from tensorflow_recommenders.tasks.base import Task 18 | from tensorflow_recommenders.tasks.ranking import Ranking 19 | from tensorflow_recommenders.tasks.retrieval import Retrieval 20 | -------------------------------------------------------------------------------- /tensorflow_recommenders/tasks/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Base task class.""" 17 | 18 | 19 | class Task: 20 | """Task class. 21 | 22 | This is a marker class: inherit from this class if you'd like to make 23 | your tasks distinguishable from plain Keras layers. 24 | """ 25 | 26 | pass 27 | -------------------------------------------------------------------------------- /tensorflow_recommenders/tasks/ranking.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """A ranking task.""" 17 | 18 | from typing import List, Optional, Text 19 | 20 | import tensorflow as tf 21 | 22 | 23 | from tensorflow_recommenders.tasks import base 24 | 25 | 26 | class Ranking(tf.keras.layers.Layer, base.Task): 27 | """A ranking task. 28 | 29 | Recommender systems are often composed of two components: 30 | - a retrieval model, retrieving O(thousands) candidates from a corpus of 31 | O(millions) candidates. 32 | - a ranker model, scoring the candidates retrieved by the retrieval model to 33 | return a ranked shortlist of a few dozen candidates. 34 | 35 | This task helps with building ranker models. Usually, these will involve 36 | predicting signals such as clicks, cart additions, likes, ratings, and 37 | purchases. 38 | """ 39 | 40 | def __init__( 41 | self, 42 | loss: Optional[tf.keras.losses.Loss] = None, 43 | metrics: Optional[List[tf.keras.metrics.Metric]] = None, 44 | prediction_metrics: Optional[List[tf.keras.metrics.Metric]] = None, 45 | label_metrics: Optional[List[tf.keras.metrics.Metric]] = None, 46 | loss_metrics: Optional[List[tf.keras.metrics.Metric]] = None, 47 | name: Optional[Text] = None) -> None: 48 | """Initializes the task. 49 | 50 | Args: 51 | loss: Loss function. Defaults to BinaryCrossentropy. 52 | metrics: List of Keras metrics to be evaluated. 53 | prediction_metrics: List of Keras metrics used to summarize the 54 | predictions. 55 | label_metrics: List of Keras metrics used to summarize the labels. 56 | loss_metrics: List of Keras metrics used to summarize the loss. 57 | name: Optional task name. 58 | """ 59 | 60 | super().__init__(name=name) 61 | 62 | self._loss = ( 63 | loss if loss is not None else tf.keras.losses.BinaryCrossentropy()) 64 | self._ranking_metrics = metrics or [] 65 | self._prediction_metrics = prediction_metrics or [] 66 | self._label_metrics = label_metrics or [] 67 | self._loss_metrics = loss_metrics or [] 68 | 69 | def call(self, 70 | labels: tf.Tensor, 71 | predictions: tf.Tensor, 72 | sample_weight: Optional[tf.Tensor] = None, 73 | training: bool = False, 74 | compute_metrics: bool = True) -> tf.Tensor: 75 | """Computes the task loss and metrics. 76 | 77 | Args: 78 | labels: Tensor of labels. 79 | predictions: Tensor of predictions. 80 | sample_weight: Tensor of sample weights. 81 | training: Indicator whether training or test loss is being computed. 82 | compute_metrics: Whether to compute metrics. Set this to False 83 | during training for faster training. 84 | 85 | Returns: 86 | loss: Tensor of loss values. 87 | """ 88 | 89 | loss = self._loss( 90 | y_true=labels, y_pred=predictions, sample_weight=sample_weight) 91 | 92 | if not compute_metrics: 93 | return loss 94 | 95 | update_ops = [] 96 | 97 | for metric in self._ranking_metrics: 98 | update_ops.append(metric.update_state( 99 | y_true=labels, y_pred=predictions, sample_weight=sample_weight)) 100 | 101 | for metric in self._prediction_metrics: 102 | update_ops.append( 103 | metric.update_state(predictions, sample_weight=sample_weight)) 104 | 105 | for metric in self._label_metrics: 106 | update_ops.append( 107 | metric.update_state(labels, sample_weight=sample_weight)) 108 | 109 | for metric in self._loss_metrics: 110 | update_ops.append( 111 | metric.update_state(loss) 112 | ) # Loss is a scalar here which is already weighted sum 113 | 114 | # Custom metrics may not return update ops, unlike built-in 115 | # Keras metrics. 116 | update_ops = [x for x in update_ops if x is not None] 117 | 118 | with tf.control_dependencies(update_ops): 119 | return tf.identity(loss) 120 | -------------------------------------------------------------------------------- /tensorflow_recommenders/tasks/ranking_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tests ranking tasks.""" 17 | 18 | import math 19 | 20 | from absl.testing import parameterized 21 | 22 | import tensorflow as tf 23 | 24 | from tensorflow_recommenders.tasks import ranking 25 | 26 | 27 | class RankingTest(tf.test.TestCase, parameterized.TestCase): 28 | 29 | @parameterized.parameters((True,), (False,)) 30 | def test_task(self, enable_sample_weight): 31 | task = ranking.Ranking( 32 | metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")], 33 | label_metrics=[tf.keras.metrics.Mean(name="label_mean")], 34 | prediction_metrics=[tf.keras.metrics.Mean(name="prediction_mean")], 35 | loss_metrics=[tf.keras.metrics.Mean(name="loss_mean")] 36 | ) 37 | 38 | predictions = tf.constant([[1], [0.3]], dtype=tf.float32) 39 | labels = tf.constant([[1], [1]], dtype=tf.float32) 40 | sample_weight = None 41 | if enable_sample_weight: 42 | sample_weight = tf.constant([1.0, 1.0], dtype=tf.float32) 43 | 44 | # Standard log loss formula. 45 | expected_loss = -(math.log(1) + math.log(0.3)) / 2.0 46 | expected_metrics = { 47 | "accuracy": 0.5, 48 | "label_mean": 1.0, 49 | "prediction_mean": 0.65, 50 | "loss_mean": expected_loss 51 | } 52 | 53 | loss = task( 54 | predictions=predictions, labels=labels, sample_weight=sample_weight 55 | ) 56 | metrics = { 57 | metric.name: metric.result().numpy() for metric in task.metrics 58 | } 59 | 60 | self.assertIsNotNone(loss) 61 | self.assertAllClose(expected_loss, loss) 62 | self.assertAllClose(expected_metrics, metrics) 63 | 64 | def test_task_graph(self): 65 | with tf.Graph().as_default(): 66 | with tf.compat.v1.Session() as sess: 67 | task = ranking.Ranking( 68 | metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")], 69 | label_metrics=[tf.keras.metrics.Mean(name="label_mean")], 70 | prediction_metrics=[tf.keras.metrics.Mean(name="prediction_mean")], 71 | loss_metrics=[tf.keras.metrics.Mean(name="loss_mean")] 72 | ) 73 | predictions = tf.constant([[1], [0.3]], dtype=tf.float32) 74 | labels = tf.constant([[1], [1]], dtype=tf.float32) 75 | 76 | # Standard log loss formula. 77 | expected_loss = -(math.log(1) + math.log(0.3)) / 2.0 78 | expected_metrics = { 79 | "accuracy": 0.5, 80 | "label_mean": 1.0, 81 | "prediction_mean": 0.65, 82 | "loss_mean": expected_loss 83 | } 84 | 85 | loss = task(predictions=predictions, labels=labels) 86 | 87 | sess.run([var.initializer for var in task.variables]) 88 | for metric in task.metrics: 89 | sess.run([var.initializer for var in metric.variables]) 90 | sess.run(loss) 91 | 92 | metrics = { 93 | metric.name: sess.run(metric.result()) for metric in task.metrics 94 | } 95 | 96 | self.assertAllClose(expected_metrics, metrics) 97 | 98 | 99 | if __name__ == "__main__": 100 | tf.test.main() 101 | -------------------------------------------------------------------------------- /tensorflow_recommenders/tasks/retrieval_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint-as: python3 16 | """Tests retrieval tasks.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from tensorflow_recommenders import metrics 22 | from tensorflow_recommenders.tasks import retrieval 23 | 24 | 25 | def _sigmoid(x): 26 | return 1. / (1 + np.exp(-x)) 27 | 28 | 29 | class RetrievalTest(tf.test.TestCase): 30 | 31 | def test_task(self): 32 | 33 | query = tf.constant([[1, 2, 3], [2, 3, 4]], dtype=tf.float32) 34 | candidate = tf.constant([[1, 1, 1], [1, 1, 0]], dtype=tf.float32) 35 | candidate_dataset = tf.data.Dataset.from_tensor_slices( 36 | np.array([[0, 0, 0]] * 20, dtype=np.float32)) 37 | 38 | task = retrieval.Retrieval( 39 | metrics=metrics.FactorizedTopK( 40 | candidates=candidate_dataset.batch(16), ks=[5] 41 | ), 42 | batch_metrics=[ 43 | tf.keras.metrics.TopKCategoricalAccuracy( 44 | k=1, name="batch_categorical_accuracy_at_1" 45 | ) 46 | ], 47 | loss_metrics=[ 48 | tf.keras.metrics.Mean( 49 | name="batch_loss", 50 | dtype=tf.float32, 51 | ) 52 | ], 53 | ) 54 | 55 | # All_pair_scores: [[6, 3], [9, 5]]. 56 | # Normalized logits: [[3, 0], [4, 0]]. 57 | expected_loss = -np.log(_sigmoid(3.0)) - np.log(1 - _sigmoid(4.0)) 58 | 59 | expected_metrics = { 60 | "factorized_top_k/top_5_categorical_accuracy": 1.0, 61 | "batch_categorical_accuracy_at_1": 0.5, 62 | "batch_loss": expected_loss, 63 | } 64 | loss = task(query_embeddings=query, candidate_embeddings=candidate) 65 | metrics_ = { 66 | metric.name: metric.result().numpy() for metric in task.metrics 67 | } 68 | 69 | self.assertIsNotNone(loss) 70 | self.assertAllClose(expected_loss, loss) 71 | self.assertAllClose(expected_metrics, metrics_) 72 | 73 | # Test computation of batch metrics when skipping corpus metrics 74 | for metric in task.metrics: 75 | metric.reset_states() 76 | loss = task(query_embeddings=query, 77 | candidate_embeddings=candidate, 78 | compute_metrics=False) 79 | expected_metrics1 = { 80 | "factorized_top_k/top_5_categorical_accuracy": 0.0, 81 | "batch_categorical_accuracy_at_1": 0.5, 82 | "batch_loss": loss, 83 | } 84 | metrics1_ = { 85 | metric.name: metric.result().numpy() for metric in task.metrics 86 | } 87 | 88 | self.assertIsNotNone(loss) 89 | self.assertAllClose(expected_loss, loss) 90 | self.assertAllClose(expected_metrics1, metrics1_) 91 | 92 | # Test computation of corpus metrics when skipping batch metrics 93 | for metric in task.metrics: 94 | metric.reset_states() 95 | loss = task( 96 | query_embeddings=query, 97 | candidate_embeddings=candidate, 98 | compute_batch_metrics=False) 99 | expected_metrics2 = { 100 | "factorized_top_k/top_5_categorical_accuracy": 1.0, 101 | "batch_categorical_accuracy_at_1": 0.0, 102 | "batch_loss": loss, 103 | } 104 | metrics2_ = { 105 | metric.name: metric.result().numpy() for metric in task.metrics 106 | } 107 | 108 | self.assertIsNotNone(loss) 109 | self.assertAllClose(expected_loss, loss) 110 | self.assertAllClose(expected_metrics2, metrics2_) 111 | 112 | # Test computation of metrics with sample_weight 113 | for metric in task.metrics: 114 | metric.reset_states() 115 | loss = task( 116 | query_embeddings=query, 117 | candidate_embeddings=candidate, 118 | sample_weight=tf.constant([0.7, 0.3], dtype=tf.float32), 119 | ) 120 | 121 | # All_pair_scores: [[6, 3], [9, 5]]. 122 | # Normalized logits: [[3, 0], [4, 0]]. 123 | expected_loss3 = -0.7 * np.log(_sigmoid(3.0)) - 0.3 * np.log( 124 | 1 - _sigmoid(4.0) 125 | ) 126 | 127 | expected_metrics3 = { 128 | "factorized_top_k/top_5_categorical_accuracy": 1.0, 129 | "batch_categorical_accuracy_at_1": 0.7, 130 | "batch_loss": expected_loss3, 131 | } 132 | metrics3_ = { 133 | metric.name: metric.result().numpy() for metric in task.metrics 134 | } 135 | self.assertIsNotNone(loss) 136 | self.assertAllClose(expected_loss3, loss) 137 | self.assertAllClose(expected_metrics3, metrics3_) 138 | 139 | def test_task_graph(self): 140 | 141 | with tf.Graph().as_default(): 142 | with tf.compat.v1.Session() as sess: 143 | query = tf.constant([[1, 2, 3], [2, 3, 4]], dtype=tf.float32) 144 | candidate = tf.constant([[1, 1, 1], [1, 1, 0]], dtype=tf.float32) 145 | candidate_dataset = tf.data.Dataset.from_tensor_slices( 146 | np.array([[0, 0, 0]] * 20, dtype=np.float32)) 147 | 148 | task = retrieval.Retrieval( 149 | metrics=metrics.FactorizedTopK( 150 | candidates=candidate_dataset.batch(16), 151 | ks=[5] 152 | ), 153 | batch_metrics=[ 154 | tf.keras.metrics.TopKCategoricalAccuracy( 155 | k=1, name="batch_categorical_accuracy_at_1") 156 | ]) 157 | 158 | expected_metrics = { 159 | "factorized_top_k/top_5_categorical_accuracy": 1.0, 160 | "batch_categorical_accuracy_at_1": 0.5, 161 | } 162 | 163 | loss = task(query_embeddings=query, candidate_embeddings=candidate) 164 | 165 | sess.run([var.initializer for var in task.variables]) 166 | for metric in task.metrics: 167 | sess.run([var.initializer for var in metric.variables]) 168 | sess.run(loss) 169 | 170 | metrics_ = { 171 | metric.name: sess.run(metric.result()) for metric in task.metrics 172 | } 173 | 174 | self.assertAllClose(expected_metrics, metrics_) 175 | 176 | 177 | class RetrievalTestWithNegativeSamples(tf.test.TestCase): 178 | 179 | def test_task(self): 180 | 181 | query = tf.constant([[3, 2, 1], [2, 3, 4]], dtype=tf.float32) 182 | candidate = tf.constant([[0, 1, 0], [0, 1, 1], [1, 1, 0]], dtype=tf.float32) 183 | candidate_dataset = tf.data.Dataset.from_tensor_slices( 184 | np.array([[0, 0, 0]] * 20, dtype=np.float32)) 185 | 186 | task = retrieval.Retrieval( 187 | metrics=metrics.FactorizedTopK( 188 | candidates=candidate_dataset.batch(16), 189 | ks=[5] 190 | ), 191 | batch_metrics=[ 192 | tf.keras.metrics.TopKCategoricalAccuracy( 193 | k=1, name="batch_categorical_accuracy_at_1") 194 | ]) 195 | 196 | # Scores will have shape [num_queries, num_candidates] 197 | # All_pair_scores: [[2, 3, 5], [3, 7, 5]]. 198 | # Normalized logits: [[0, 1, 3], [0, 4, 2]]. 199 | expected_loss = (- np.log(1 / (1 + np.exp(1) + np.exp(3))) 200 | - np.log(np.exp(4) / (1 + np.exp(4) + np.exp(2)))) 201 | 202 | expected_metrics = { 203 | "factorized_top_k/top_5_categorical_accuracy": 1.0, 204 | "batch_categorical_accuracy_at_1": 0.5, 205 | } 206 | loss = task(query_embeddings=query, candidate_embeddings=candidate) 207 | metrics_ = { 208 | metric.name: metric.result().numpy() for metric in task.metrics 209 | } 210 | 211 | self.assertIsNotNone(loss) 212 | self.assertAllClose(expected_loss, loss) 213 | self.assertAllClose(expected_metrics, metrics_) 214 | 215 | # Test computation of batch metrics when skipping corpus metrics 216 | for metric in task.metrics: 217 | metric.reset_states() 218 | loss = task(query_embeddings=query, 219 | candidate_embeddings=candidate, 220 | compute_metrics=False) 221 | expected_metrics1 = { 222 | "factorized_top_k/top_5_categorical_accuracy": 0.0, 223 | "batch_categorical_accuracy_at_1": 0.5 224 | } 225 | metrics1_ = { 226 | metric.name: metric.result().numpy() for metric in task.metrics 227 | } 228 | 229 | self.assertIsNotNone(loss) 230 | self.assertAllClose(expected_loss, loss) 231 | self.assertAllClose(expected_metrics1, metrics1_) 232 | 233 | # Test computation of corpus metrics when skipping batch metrics 234 | for metric in task.metrics: 235 | metric.reset_states() 236 | loss = task( 237 | query_embeddings=query, 238 | candidate_embeddings=candidate, 239 | compute_batch_metrics=False) 240 | expected_metrics2 = { 241 | "factorized_top_k/top_5_categorical_accuracy": 1.0, 242 | "batch_categorical_accuracy_at_1": 0.0 243 | } 244 | metrics2_ = { 245 | metric.name: metric.result().numpy() for metric in task.metrics 246 | } 247 | 248 | self.assertIsNotNone(loss) 249 | self.assertAllClose(expected_loss, loss) 250 | self.assertAllClose(expected_metrics2, metrics2_) 251 | 252 | 253 | class RetrievalTestWithMultipointQueries(tf.test.TestCase): 254 | 255 | def test_task(self): 256 | 257 | query = tf.constant( 258 | [[[3, 2, 1], [1, 2, 3]], [[2, 3, 4], [4, 3, 2]]], dtype=tf.float32 259 | ) 260 | candidate = tf.constant([[0, 1, 0], [0, 1, 1], [1, 1, 0]], dtype=tf.float32) 261 | candidate_dataset = tf.data.Dataset.from_tensor_slices( 262 | np.array([[0, 0, 0]] * 20, dtype=np.float32) 263 | ) 264 | 265 | task = retrieval.Retrieval( 266 | metrics=metrics.FactorizedTopK( 267 | candidates=candidate_dataset.batch(16), ks=[5] 268 | ), 269 | batch_metrics=[ 270 | tf.keras.metrics.TopKCategoricalAccuracy( 271 | k=1, name="batch_categorical_accuracy_at_1" 272 | ) 273 | ], 274 | ) 275 | 276 | # Scores will have shape [num_queries, num_candidates] 277 | # All_pair_scores: [[[2,2], [3,5], [5,3]], [[3, 3], [7,5], [5,7]]]. 278 | # Max-sim scores: [[2, 5, 5], [3, 7, 7]]. 279 | # Normalized logits: [[0, 3, 3], [1, 5, 5]]. 280 | expected_loss = -np.log(1 / (1 + np.exp(3) + np.exp(3))) - np.log( 281 | np.exp(5) / (np.exp(1) + np.exp(5) + np.exp(5)) 282 | ) 283 | 284 | expected_metrics = { 285 | "factorized_top_k/top_5_categorical_accuracy": ( 286 | 0.0 287 | ), # not computed for multipoint queries 288 | "batch_categorical_accuracy_at_1": 0.5, 289 | } 290 | loss = task( 291 | query_embeddings=query, 292 | candidate_embeddings=candidate, 293 | ) 294 | metrics_ = {metric.name: metric.result().numpy() for metric in task.metrics} 295 | 296 | self.assertIsNotNone(loss) 297 | self.assertAllClose(expected_loss, loss) 298 | self.assertAllClose(expected_metrics, metrics_) 299 | 300 | 301 | if __name__ == "__main__": 302 | tf.test.main() 303 | -------------------------------------------------------------------------------- /tensorflow_recommenders/types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Type definitions.""" 16 | 17 | from typing import Callable, Union 18 | 19 | 20 | import tensorflow as tf 21 | 22 | 23 | Activation = Union[Callable[[tf.Tensor], tf.Tensor], str] 24 | -------------------------------------------------------------------------------- /tools/build_api_docs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The TensorFlow Recommenders Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # lint-as: python3 16 | r"""Tool to generate API docs. 17 | 18 | # How to run 19 | 20 | Install tensorflow_docs if needed: 21 | 22 | ``` 23 | pip install git+https://github.com/tensorflow/docs 24 | ``` 25 | 26 | Run the docs generator: 27 | 28 | ```shell 29 | python $(pwd)/tensorflow_recommenders/tools/build_api_docs.py 30 | ``` 31 | """ 32 | 33 | from typing import Text 34 | 35 | import fire 36 | 37 | import tensorflow as tf 38 | 39 | from tensorflow_docs.api_generator import doc_controls 40 | from tensorflow_docs.api_generator import generate_lib 41 | from tensorflow_docs.api_generator import public_api 42 | 43 | import tensorflow_recommenders as tfrs 44 | 45 | 46 | GITHUB_CODE_PATH = ( 47 | "https://github.com/tensorflow/recommenders/" 48 | "blob/main/tensorflow_recommenders/" 49 | ) 50 | 51 | 52 | def _hide_layer_and_module_methods(): 53 | """Hide methods and properties defined in the base classes of Keras layers. 54 | 55 | We hide all methods and properties of the base classes, except: 56 | - `__init__` is always documented. 57 | - `call` is always documented, as it can carry important information for 58 | complex layers. 59 | """ 60 | 61 | module_contents = list(tf.Module.__dict__.items()) 62 | model_contents = list(tf.keras.Model.__dict__.items()) 63 | layer_contents = list(tf.keras.layers.Layer.__dict__.items()) 64 | 65 | for name, obj in module_contents + layer_contents + model_contents: 66 | if name == "__init__": 67 | # Always document __init__. 68 | continue 69 | 70 | if name == "call": 71 | # Always document `call`. 72 | if hasattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS): # pylint: disable=protected-access 73 | delattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS) # pylint: disable=protected-access 74 | continue 75 | 76 | # Otherwise, exclude from documentation. 77 | if isinstance(obj, property): 78 | obj = obj.fget 79 | 80 | if isinstance(obj, (staticmethod, classmethod)): 81 | obj = obj.__func__ 82 | 83 | try: 84 | doc_controls.do_not_doc_in_subclasses(obj) 85 | except AttributeError: 86 | pass 87 | 88 | 89 | def build_api_docs(output_dir: Text = "/tmp/tensorflow_recommenders/api_docs", 90 | code_url_prefix: Text = GITHUB_CODE_PATH, 91 | search_hints: bool = True, 92 | site_path: Text = "recommenders/api_docs/") -> None: 93 | """Builds the API docs.""" 94 | 95 | _hide_layer_and_module_methods() 96 | 97 | print(f"Writing docs to {output_dir}") 98 | 99 | doc_generator = generate_lib.DocGenerator( 100 | root_title="TensorFlow Recommenders", 101 | py_modules=[("tfrs", tfrs)], 102 | code_url_prefix=code_url_prefix, 103 | search_hints=search_hints, 104 | site_path=site_path, 105 | callbacks=[ 106 | public_api.local_definitions_filter, 107 | public_api.explicit_package_contents_filter 108 | ]) 109 | doc_generator.build(output_dir=output_dir) 110 | 111 | 112 | if __name__ == "__main__": 113 | fire.Fire(build_api_docs, name="build_api_docs") 114 | -------------------------------------------------------------------------------- /tools/build_scripts/pip_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e # fail and exit on any command erroring 5 | 6 | # Need to set these env vars 7 | : "${TF_VERSION:?}" 8 | : "${PY_VERSION:?}" 9 | 10 | # Import build functions. 11 | source ./tools/build_scripts/utils.sh 12 | 13 | which python3.10 14 | python3.10 --version 15 | 16 | # Install pip 17 | echo "Upgrading pip." 18 | python3.10 -m pip install --upgrade pip 19 | 20 | # Install TensorFlow. 21 | echo "Installing TensorFlow..." 22 | python3.10 -m pip install tensorflow 23 | python3.10 -m pip install -q urllib3 24 | 25 | # Install TensorFlow Recommenders. 26 | echo "Installing TensorFlow Recommenders..." 27 | python3.10 -m pip install -e .[docs] 28 | 29 | # Test successful build. 30 | echo "Testing import..." 31 | python3.10 -c "import tensorflow_recommenders as tfrs" 32 | 33 | echo "Done." 34 | -------------------------------------------------------------------------------- /tools/build_scripts/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e # fail and exit on any command erroring 4 | set -x 5 | 6 | TF_VERSION="2.9.0" 7 | 8 | GIT_COMMIT_ID=${1:-""} 9 | [[ -z $GIT_COMMIT_ID ]] && echo "Must provide a commit." && exit 1 10 | SETUP_ARGS="" 11 | if [ "$GIT_COMMIT_ID" = "nightly" ] 12 | then 13 | echo "Nightly version building currently not implemented." 14 | exit 1 15 | fi 16 | 17 | # Import build functions. 18 | source ./tools/build_scripts/utils.sh 19 | 20 | which python3.10 21 | python3.10 --version 22 | 23 | # Install PyPI-related packages. 24 | python3.10 -m pip install -q wheel twine pyopenssl 25 | 26 | echo "Checking out commit $GIT_COMMIT_ID..." 27 | git checkout $GIT_COMMIT_ID 28 | 29 | echo "Building source distribution..." 30 | 31 | # Build the wheels 32 | python3.10 setup.py sdist $SETUP_ARGS 33 | python3.10 setup.py bdist_wheel $SETUP_ARGS 34 | 35 | # Check setup.py. 36 | twine check dist/* 37 | 38 | # Install and test the distribution 39 | echo "Running tests..." 40 | python3.10 -m pip install dist/*.whl 41 | python3.10 -m pip install scann 42 | python3.10 -m pip install pytest 43 | python3.10 -m pytest -v . 44 | 45 | # Publish to PyPI 46 | read -p "Publish? (y/n) " -r 47 | echo 48 | if [[ $REPLY =~ ^[Yy]$ ]] 49 | then 50 | echo "Publishing to PyPI." 51 | twine upload dist/* 52 | else 53 | echo "Skipping upload." 54 | fi 55 | 56 | echo "Done." 57 | -------------------------------------------------------------------------------- /tools/build_scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e # fail and exit on any command erroring 4 | 5 | # Install. 6 | source ./tools/build_scripts/pip_install.sh 7 | 8 | # Install test dependencies. 9 | python3.10 -m pip install pytest 10 | 11 | # Run tests. 12 | python3.10 -m pytest -v . 13 | -------------------------------------------------------------------------------- /tools/build_scripts/utils.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function install_tf() { 4 | local version=$1 5 | if [[ "$version" == "tf-nightly" ]] 6 | then 7 | pip install -q tf-nightly; 8 | else 9 | pip install -q "tensorflow==$version" 10 | fi 11 | pip install -q urllib3 12 | } 13 | --------------------------------------------------------------------------------