├── .github
    └── workflows
    │   ├── pylint-presubmit.yml
    │   └── test.yaml
├── .gitignore
├── .pylintrc
├── CHANGELOG.md
├── CITATION.cff
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    ├── alternate_dcn_structures.png
    ├── cross_features.gif
    ├── feature_crossing.png
    ├── full_logo.png
    ├── low_rank_dcn.png
    ├── parallel_deep_cross.png
    ├── stacked_deep_cross.png
    └── stacked_structure.png
├── docs
    ├── _book.yaml
    ├── _index.yaml
    └── examples
    │   ├── _toc.yaml
    │   ├── basic_ranking.ipynb
    │   ├── basic_retrieval.ipynb
    │   ├── context_features.ipynb
    │   ├── dcn.ipynb
    │   ├── deep_recommenders.ipynb
    │   ├── diststrat_retrieval.ipynb
    │   ├── efficient_serving.ipynb
    │   ├── featurization.ipynb
    │   ├── listwise_ranking.ipynb
    │   ├── multitask.ipynb
    │   ├── quickstart.ipynb
    │   ├── ranking_tfx.ipynb
    │   ├── sequential_retrieval.ipynb
    │   └── tpu_embedding_layer.ipynb
├── requirements.txt
├── setup.py
├── tensorflow_recommenders
    ├── .flake8
    ├── __init__.py
    ├── dev_requirements.txt
    ├── examples
    │   ├── __init__.py
    │   ├── movielens.py
    │   └── nbtool.py
    ├── experimental
    │   ├── __init__.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   └── embedding
    │   │   │   ├── __init__.py
    │   │   │   ├── partial_tpu_embedding.py
    │   │   │   └── partial_tpu_embedding_test.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── ranking.py
    │   │   └── ranking_test.py
    │   └── optimizers
    │   │   ├── __init__.py
    │   │   ├── clippy_adagrad.py
    │   │   ├── clippy_adagrad_test.py
    │   │   ├── composite_optimizer.py
    │   │   └── composite_optimizer_test.py
    ├── layers
    │   ├── __init__.py
    │   ├── blocks.py
    │   ├── embedding
    │   │   ├── __init__.py
    │   │   ├── tpu_embedding_layer.py
    │   │   └── tpu_embedding_layer_test.py
    │   ├── factorized_top_k.py
    │   ├── factorized_top_k_test.py
    │   ├── feature_interaction
    │   │   ├── __init__.py
    │   │   ├── dcn.py
    │   │   ├── dcn_test.py
    │   │   ├── dot_interaction.py
    │   │   ├── dot_interaction_test.py
    │   │   ├── multi_layer_dcn.py
    │   │   └── multi_layer_dcn_test.py
    │   ├── loss.py
    │   └── loss_test.py
    ├── metrics
    │   ├── __init__.py
    │   ├── factorized_top_k.py
    │   └── factorized_top_k_test.py
    ├── models
    │   ├── __init__.py
    │   ├── base.py
    │   └── base_test.py
    ├── public.py
    ├── tasks
    │   ├── __init__.py
    │   ├── base.py
    │   ├── ranking.py
    │   ├── ranking_test.py
    │   ├── retrieval.py
    │   └── retrieval_test.py
    └── types.py
└── tools
    ├── build_api_docs.py
    └── build_scripts
        ├── pip_install.sh
        ├── release.sh
        ├── test.sh
        └── utils.sh


/.github/workflows/pylint-presubmit.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | name: PyLint
17 | on:
18 |   pull_request:
19 |     paths:
20 |       - '**.py'
21 | 
22 | jobs:
23 |   build:
24 |     name: PyLint
25 |     runs-on: ubuntu-latest
26 |     steps:
27 |     - name: Checkout code
28 |       uses: actions/checkout@v2
29 |     - name: Get file changes
30 |       id: get_file_changes
31 |       uses: trilom/file-changes-action@v1.2.4
32 |       with:
33 |         output: ' '
34 |     - name: Report list of changed files
35 |       run: |
36 |         echo Changed files: ${{ steps.get_file_changes.outputs.files }}
37 |     - name: Set up Python 3.9
38 |       uses: actions/setup-python@v2
39 |       with:
40 |         python-version: "3.9"
41 |     - name: Install Python dependencies
42 |       run: |
43 |         python -m pip install --upgrade pip
44 |         pip install pylint numpy wheel
45 |     - name: Run PyLint on changed files
46 |       run: |
47 |         echo "${{ steps.get_file_changes.outputs.files}}" | tr " " "\n" | grep ".py$" | xargs pylint --rcfile=.pylintrc
48 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | name: TensorFlow Recommenders Test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       matrix:
11 |         python-version: [3.8]
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install flake8 pytest
23 |         pip install -e .[docs]
24 |     - name: Lint with flake8
25 |       run: |
26 |         # stop the build if there are Python syntax errors or undefined names
27 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
28 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
29 |         # Disable flake checks initially.
30 |         # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E111,E731,F401
31 |     - name: Test with pytest
32 |       run: |
33 |         pytest
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## [unreleased][unreleased]
  4 | 
  5 | ## [0.7.3][2023-02-02]
  6 | 
  7 | ### Changed
  8 | 
  9 | -   The `Retrieval` task now accepts a list of factorized metrics instead of a
 10 |     single optional metric.
 11 | 
 12 | ### Added
 13 | 
 14 | -   `tfrs.experimental.optimizers.ClippyAdagrad`: a new optimizer based on
 15 |     `tf.keras.optimizers.Adagrad` that is able to improve training stability.
 16 | -   `tfrs.metrics.FactorizedTopK` now accepts sample weights which are used to
 17 |     compute weighted top k metrics.
 18 | 
 19 | ## [0.7.2][2022-09-28]
 20 | 
 21 | -   Improved support for using TPUEmbedding under parameter server strategy.
 22 | 
 23 | ## [0.7.0][2022-07-07]
 24 | 
 25 | A number of changes to make factorized top-K metric computation more accurate
 26 | and less prone to user error.
 27 | 
 28 | ### Changed
 29 | 
 30 | -   `tfrs.layers.embedding.TPUEmbedding` now supports input features with
 31 |     dynamic shape. `batch_size` argument is deprecated and no longer required.
 32 | 
 33 | -   `tfrs.layers.embedding.TPUEmbedding` now supports running on different
 34 |     versions of TPU.
 35 | 
 36 | -   Pinned TensorFlow to >= 2.9.0 which works with Scann 1.2.7.
 37 | 
 38 | -   `tfrs.tasks.Ranking.call` now accepts a `compute_batch_metrics` argument to
 39 |     allow switching off batch metric computation. Following this change,
 40 |     'compute_metrics'argument does not impact computation of batch metrics.
 41 | 
 42 | ### Breaking changes
 43 | 
 44 | -   `tfrs.metrics.FactorizedTopK` requires the candidate ids for positive
 45 |     candidates to be supplied when using approximate top-K sources. Each top-K
 46 |     layer now has an `exact` method to broadcast its ability to return exact or
 47 |     approximate top-K results.
 48 | -   Removed `metrics` constructor parameter for `tfrs.metrics.FactorizedTopK`.
 49 |     `FactorizedTopK` only makes sense with top-k metrics, and this change
 50 |     enforces this.
 51 | -   Replaced the `k` constructor argument in `tfrs.metrics.FactorizedTopK` with
 52 |     `ks`: a list of `k` values at which to compute the top k metric.
 53 | 
 54 | ### Changed
 55 | 
 56 | -   The `tfrs.metrics.FactorizedTopK` metric can now compute candidate-id based
 57 |     metrics when given the `true_candidate_ids` argument in its `call` method.
 58 | 
 59 | ### Added
 60 | 
 61 | -   The `Retrieval` task now also accepts a `loss_metrics` argument.
 62 | 
 63 | ## [0.6.0][2021-08-23]
 64 | 
 65 | ### Changed
 66 | 
 67 | -   Pinned TensorFlow to >= 2.6.0, which works with Scann 1.2.3.
 68 | 
 69 | ### Breaking changes
 70 | 
 71 | -   `TopK` layer indexing API changed. Indexing with datasets is now done via
 72 |     the `index_from_dataset` method. This change reduces the possibility of
 73 |     misaligning embeddings and candidate identifiers when indexing via
 74 |     indeterministic datasets.
 75 | 
 76 | ## [0.5.2][2021-07-15]
 77 | 
 78 | ### Fixed
 79 | 
 80 | -   Fixed error in default arguments to `tfrs.experimental.models.Ranking`
 81 |     (https://github.com/tensorflow/recommenders/issues/311).
 82 | -   Fix TPUEmbedding layer to use named parameters.
 83 | 
 84 | ### Added
 85 | 
 86 | -   Added `batch_metrics` to `tfrs.tasks.Retrieval` for measuring how good the
 87 |     model is at picking out the true candidate for a query from other candidates
 88 |     in the batch.
 89 | -   Added `tfrs.experimental.layers.embedding.PartialTPUEmbedding` layer, which
 90 |     uses `tfrs.layers.embedding.TPUEmbedding` for large embedding lookups and
 91 |     `tf.keras.layers.Embedding` for smaller embedding lookups.
 92 | 
 93 | ## [0.5.1][2021-05-14]
 94 | 
 95 | ### Changed
 96 | 
 97 | -   Supplying incompatibly-shaped candidates and identifiers inputs to
 98 |     `factorized_top_k` layers will now raise (to prevent issues similar to
 99 |     https://github.com/tensorflow/recommenders/issues/286).
100 | 
101 | ## [0.5.0][2021-05-06]
102 | 
103 | ### Changed
104 | 
105 | -   Fixed the bug in `tfrs.layers.loss.SamplingProbablityCorrection` that logits
106 |     should subtract the log of item probability.
107 | -   `tfrs.experimental.optimizers.CompositeOptimizer`: an optimizer that
108 |     composes multiple individual optimizers which can be applied to different
109 |     subsets of the model's variables.
110 | -   `tfrs.layers.dcn.Cross` and `DotInteraction` layers have been moved to
111 |     `tfrs.layers.feature_interaction` package.
112 | 
113 | ### Added
114 | 
115 | -   `tfrs.experimental.models.Ranking`, an experimental pre-built model for
116 |     ranking tasks. Can be used as DLRM like model with Dot Product feature
117 |     interaction or DCN like model with Cross layer.
118 | 
119 | ## [0.4.0][2021-01-20]
120 | 
121 | ### Added
122 | 
123 | -   `TopK` layers now come with a `query_with_exclusions` method, allowing
124 |     certain candidates to be excluded from top-k retrieval.
125 | -   `TPUEmbedding` Keras layer for accelerating embedding lookups for large
126 |     tables with TPU.
127 | 
128 | ### Changed
129 | 
130 | -   `factorized_top_k.Streaming` layer now accepts a query model, like other
131 |     `factorized_top_k` layers.
132 | 
133 | -   Updated ScaNN to 1.2.0, which requires TensorFlow 2.4.x. When not using
134 |     ScaNN, any TF >= 2.3 is still supported.
135 | 
136 | ## [0.3.2][2020-12-22]
137 | 
138 | ### Changed
139 | 
140 | -   Pinned TensorFlow to >= 2.3 when ScaNN is not being installed. When ScaNN is
141 |     being installed, we pin on >= 2.3, < 2.4. This allows users to use TFRS on
142 |     TF 2.4 when they are not using ScaNN.
143 | 
144 | ## [0.3.1][2020-12-22]
145 | 
146 | ### Changed
147 | 
148 | -   Pinned TensorFlow to 2.3.x and ScaNN to 1.1.1 to ensure TF and ScaNN
149 |     versions are in lockstep.
150 | 
151 | ## [0.3.0][2020-11-18]
152 | 
153 | ### Added
154 | 
155 | -   Deep cross networks: efficient ways of learning feature interactions.
156 | -   ScaNN integration: efficient approximate maximum inner product search for
157 |     fast retrieval.
158 | 
159 | ## [0.2.0][2020-10-15]
160 | 
161 | ### Added
162 | 
163 | -   `tfrs.tasks.Ranking.call` now accepts a `compute_metrics` argument to allow
164 |     switching off metric computation.
165 | -   `tfrs.tasks.Ranking` now accepts label and prediction metrics.
166 | -   Add metrics setter/getters on `tfrs.tasks.Retrieval`.
167 | 
168 | ### Breaking changes
169 | 
170 | -   Corpus retrieval metrics and layers have been reworked.
171 | 
172 |     `tfrs.layers.corpus.DatasetTopk` has been removed,
173 |     `tfrs.layers.corpus.DatasetIndexedTopK` renamed to
174 |     `tfrs.layers.factorized_top_k.Streaming`, `tfrs.layers.ann.BruteForce`
175 |     renamed to `tfrs.layers.factorized_top_k.BruteForce`. All top-k retrieval
176 |     layers (`BruteForce`, `Streaming`) now follow a common interface.
177 | 
178 | ### Changed
179 | 
180 | -   `Dataset` parallelism enabled by default in `DatasetTopK` and
181 |     `DatasetIndexedTopK` layers, bringing over 2x speed-ups to evaluations
182 |     workloads.
183 | -   `evaluate_metrics` argument to `tfrs.tasks.Retrieval.call` renamed to
184 |     `compute_metrics`.
185 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Kula"
 5 |   given-names: "Maciej"
 6 | - family-names: "Chen"
 7 |   given-names: "James"
 8 | - family-names: "Yi"
 9 |   given-names: "Xinyang"
10 | - family-names: "Yao"
11 |   given-names: "Tiansheng"
12 | - family-names: "Sathiamoorthy"
13 |   given-names: "Maheswaran"
14 | - family-names: "Hong"
15 |   given-names: "Lichan"
16 | - family-names: "Chi"
17 |   given-names: "Ed"
18 | title: "TensorFlow Recommenders"
19 | date-released: 2020-09-16
20 | url: "https://github.com/tensorflow/recommenders"


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | <https://cla.developers.google.com/> to see your current agreements on file or
13 | to sign a new one.
14 | 
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 | 
19 | ## Code reviews
20 | 
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 | 
26 | ## Community Guidelines
27 | 
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TensorFlow Recommenders
 2 | 
 3 | ![TensorFlow Recommenders logo](assets/full_logo.png)
 4 | 
 5 | ![TensorFlow Recommenders build badge](https://github.com/tensorflow/recommenders/actions/workflows/test.yaml/badge.svg)
 6 | [![PyPI badge](https://img.shields.io/pypi/v/tensorflow-recommenders.svg)](https://pypi.python.org/pypi/tensorflow-recommenders/)
 7 | 
 8 | TensorFlow Recommenders is a library for building recommender system models
 9 | using [TensorFlow](https://www.tensorflow.org).
10 | 
11 | It helps with the full workflow of building a recommender system: data
12 | preparation, model formulation, training, evaluation, and deployment.
13 | 
14 | It's built on Keras and aims to have a gentle learning curve while still giving
15 | you the flexibility to build complex models.
16 | 
17 | ## Installation
18 | 
19 | Make sure you have TensorFlow 2.x installed, and install from `pip`:
20 | 
21 | ```shell
22 | pip install tensorflow-recommenders
23 | ```
24 | 
25 | ## Documentation
26 | 
27 | Have a look at our
28 | [tutorials](https://tensorflow.org/recommenders/examples/quickstart) and
29 | [API reference](https://www.tensorflow.org/recommenders/api_docs/python/tfrs/).
30 | 
31 | ## Quick start
32 | 
33 | Building a factorization model for the Movielens 100K dataset is very simple
34 | ([Colab](https://tensorflow.org/recommenders/examples/quickstart)):
35 | 
36 | ```python
37 | from typing import Dict, Text
38 | 
39 | import tensorflow as tf
40 | import tensorflow_datasets as tfds
41 | import tensorflow_recommenders as tfrs
42 | 
43 | # Ratings data.
44 | ratings = tfds.load('movielens/100k-ratings', split="train")
45 | # Features of all the available movies.
46 | movies = tfds.load('movielens/100k-movies', split="train")
47 | 
48 | # Select the basic features.
49 | ratings = ratings.map(lambda x: {
50 |     "movie_id": tf.strings.to_number(x["movie_id"]),
51 |     "user_id": tf.strings.to_number(x["user_id"])
52 | })
53 | movies = movies.map(lambda x: tf.strings.to_number(x["movie_id"]))
54 | 
55 | # Build a model.
56 | class Model(tfrs.Model):
57 | 
58 |   def __init__(self):
59 |     super().__init__()
60 | 
61 |     # Set up user representation.
62 |     self.user_model = tf.keras.layers.Embedding(
63 |         input_dim=2000, output_dim=64)
64 |     # Set up movie representation.
65 |     self.item_model = tf.keras.layers.Embedding(
66 |         input_dim=2000, output_dim=64)
67 |     # Set up a retrieval task and evaluation metrics over the
68 |     # entire dataset of candidates.
69 |     self.task = tfrs.tasks.Retrieval(
70 |         metrics=tfrs.metrics.FactorizedTopK(
71 |             candidates=movies.batch(128).map(self.item_model)
72 |         )
73 |     )
74 | 
75 |   def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
76 | 
77 |     user_embeddings = self.user_model(features["user_id"])
78 |     movie_embeddings = self.item_model(features["movie_id"])
79 | 
80 |     return self.task(user_embeddings, movie_embeddings)
81 | 
82 | 
83 | model = Model()
84 | model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))
85 | 
86 | # Randomly shuffle data and split between train and test.
87 | tf.random.set_seed(42)
88 | shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)
89 | 
90 | train = shuffled.take(80_000)
91 | test = shuffled.skip(80_000).take(20_000)
92 | 
93 | # Train.
94 | model.fit(train.batch(4096), epochs=5)
95 | 
96 | # Evaluate.
97 | model.evaluate(test.batch(4096), return_dict=True)
98 | ```
99 | 


--------------------------------------------------------------------------------
/assets/alternate_dcn_structures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/alternate_dcn_structures.png


--------------------------------------------------------------------------------
/assets/cross_features.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/cross_features.gif


--------------------------------------------------------------------------------
/assets/feature_crossing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/feature_crossing.png


--------------------------------------------------------------------------------
/assets/full_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/full_logo.png


--------------------------------------------------------------------------------
/assets/low_rank_dcn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/low_rank_dcn.png


--------------------------------------------------------------------------------
/assets/parallel_deep_cross.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/parallel_deep_cross.png


--------------------------------------------------------------------------------
/assets/stacked_deep_cross.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/stacked_deep_cross.png


--------------------------------------------------------------------------------
/assets/stacked_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/assets/stacked_structure.png


--------------------------------------------------------------------------------
/docs/_book.yaml:
--------------------------------------------------------------------------------
 1 | upper_tabs:
 2 | # Tabs left of dropdown menu
 3 | - include: /_upper_tabs_left.yaml
 4 | - include: /api_docs/_upper_tabs_api.yaml
 5 | # Dropdown menu
 6 | - name: Resources
 7 |   path: /resources
 8 |   is_default: true
 9 |   menu:
10 |   - include: /resources/_menu_toc.yaml
11 |   lower_tabs:
12 |     # Subsite tabs
13 |     other:
14 |     - name: Guide & Tutorials
15 |       contents:
16 |       - title: Quickstart
17 |         path: /recommenders/examples/quickstart
18 |       - include: /recommenders/examples/_toc.yaml
19 | 
20 |     - name: API
21 |       skip_translation: true
22 |       contents:
23 |       - title: All Symbols
24 |         path: /recommenders/api_docs/python/tfrs/all_symbols
25 |       - include: /recommenders/api_docs/python/tfrs/_toc.yaml
26 | 
27 | - include: /_upper_tabs_right.yaml
28 | 


--------------------------------------------------------------------------------
/docs/_index.yaml:
--------------------------------------------------------------------------------
  1 | book_path: /recommenders/_book.yaml
  2 | project_path: /recommenders/_project.yaml
  3 | description: A library for building recommender system models.
  4 | landing_page:
  5 |   custom_css_path: /site-assets/css/style.css
  6 |   rows:
  7 |   - heading: TensorFlow Recommenders
  8 |     items:
  9 |     - classname: devsite-landing-row-50
 10 |       description: >
 11 |         TensorFlow Recommenders (TFRS) is a library for building recommender
 12 |         system models.
 13 |         <br>
 14 |         <br>
 15 |         It helps with the full workflow of building a recommender system: data
 16 |         preparation, model formulation, training, evaluation, and deployment.
 17 |         <br>
 18 |         <br>
 19 |         It's built on Keras and aims to have a gentle learning curve while
 20 |         still giving you the flexibility to build complex models.
 21 |         <br>
 22 |         <br>
 23 |         TFRS makes it possible to:
 24 |         <ul style="padding-left: 20px; margin: 20px 0;">
 25 |           <li>Build and evaluate flexible recommendation retrieval models.</li>
 26 |           <li>Freely incorporate item, user, and <a href="./examples/featurization">
 27 |           context information</a> into recommendation models.</li>
 28 |           <li>Train <a href="./examples/multitask/">multi-task models</a>
 29 |           that jointly optimize multiple recommendation objectives.</li>
 30 |         </ul>
 31 |         TFRS is open source and available
 32 |         <a href="https://github.com/tensorflow/recommenders">on Github</a>.
 33 |         <br>
 34 |         <br>
 35 |         To learn more, see the <a href="./examples/basic_retrieval">tutorial on
 36 |         how to build a movie recommender system</a>, or check the
 37 |         <a href="./api_docs/python/tfrs"> API docs</a> for the API reference.
 38 | 
 39 |       code_block: |
 40 |         <pre class = "prettyprint">
 41 |         import tensorflow_datasets as tfds
 42 |         import tensorflow_recommenders as tfrs
 43 | 
 44 |         # Load data on movie ratings.
 45 |         ratings = tfds.load("movielens/100k-ratings", split="train")
 46 |         movies = tfds.load("movielens/100k-movies", split="train")
 47 | 
 48 |         # Build flexible representation models.
 49 |         user_model = tf.keras.Sequential([...])
 50 |         movie_model = tf.keras.Sequential([...])
 51 | 
 52 |         # Define your objectives.
 53 |         task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
 54 |             movies.batch(128).map(movie_model)
 55 |           )
 56 |         )
 57 | 
 58 |         # Create a retrieval model.
 59 |         model = MovielensModel(user_model, movie_model, task)
 60 |         model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))
 61 | 
 62 |         # Train.
 63 |         model.fit(ratings.batch(4096), epochs=3)
 64 | 
 65 |         # Set up retrieval using trained representations.
 66 |         index = tfrs.layers.ann.BruteForce(model.user_model)
 67 |         index.index_from_dataset(
 68 |             movies.batch(100).map(lambda title: (title, model.movie_model(title)))
 69 |         )
 70 | 
 71 |         # Get recommendations.
 72 |         _, titles = index(np.array(["42"]))
 73 |         print(f"Recommendations for user 42: {titles[0, :3]}")
 74 |         </pre>
 75 |         {% dynamic if request.tld != 'cn' %}
 76 |         <a class="colab-button" target="_blank" href="https://colab.research.google.com/github/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb">Run in a <span>Notebook</span></a>
 77 |         {% dynamic endif %}
 78 | 
 79 |   - classname: devsite-landing-row-cards
 80 |     items:
 81 |     - heading: "Introduction to TensorFlow Recommenders"
 82 |       youtube_id: jz0-satrmrA
 83 |       buttons:
 84 |       - label: "Watch the introduction video"
 85 |         path: https://www.youtube.com/watch?v=jz0-satrmrA&list=PLQY2H8rRoyvy2MiyUBz5RWZr5MPFkV3qz&index=4
 86 |       - label: "Watch the video series"
 87 |         path: https://goo.gle/3Bi8NUS
 88 |     - heading: "TensorFlow Recommenders: Scalable retrieval and feature interaction modelling"
 89 |       image_path: /resources/images/google-research-card-16x9.png
 90 |       path: https://blog.tensorflow.org/2020/11/tensorflow-recommenders-scalable-retrieval-feature-interaction-modelling.html
 91 |       buttons:
 92 |       - label: "Read on TensorFlow blog"
 93 |         path: https://blog.tensorflow.org/2020/11/tensorflow-recommenders-scalable-retrieval-feature-interaction-modelling.html
 94 |     - heading: "Introducing TensorFlow Recommenders"
 95 |       image_path: /resources/images/tf-logo-card-16x9.png
 96 |       path: https://blog.tensorflow.org/2020/09/introducing-tensorflow-recommenders.html
 97 |       buttons:
 98 |       - label: "Read on TensorFlow blog"
 99 |         path: https://blog.tensorflow.org/2020/09/introducing-tensorflow-recommenders.html
100 |   - classname: devsite-landing-row-cards
101 |     items:
102 |     - heading: "TensorFlow Recommenders on GitHub"
103 |       image_path: /resources/images/github-card-16x9.png
104 |       path: https://github.com/tensorflow/recommenders
105 |       buttons:
106 |       - label: "View on GitHub"
107 |         path: https://github.com/tensorflow/recommenders
108 |     - heading: ""
109 |       options:
110 |       - hidden
111 |     - heading: ""
112 |       options:
113 |       - hidden
114 | 


--------------------------------------------------------------------------------
/docs/examples/_toc.yaml:
--------------------------------------------------------------------------------
 1 | toc:
 2 | - heading: "Beginner"
 3 |   style: divider
 4 | - title: "Recommender basics"
 5 |   style: accordion
 6 |   section:
 7 |   - title: "Recommending movies: retrieval"
 8 |     path: /recommenders/examples/basic_retrieval
 9 |   - title: "Recommending movies: ranking"
10 |     path: /recommenders/examples/basic_ranking
11 |   - title: "Retrieval with distribution strategy"
12 |     path: /recommenders/examples/diststrat_retrieval
13 |   - title: "Retrieval with sequential model"
14 |     path: /recommenders/examples/sequential_retrieval
15 |   - title: "Retrieval with TFX"
16 |     status: external
17 |     path: /tfx/tutorials/tfx/recommenders
18 |   - title: "Ranking with TFX"
19 |     path: /recommenders/examples/ranking_tfx
20 |   - title: "Large Embeddings with TPU"
21 |     path: /recommenders/examples/tpu_embedding_layer
22 | - title: "Using rich features"
23 |   style: accordion
24 |   section:
25 |   - title: "Feature preprocessing"
26 |     path: /recommenders/examples/featurization
27 |   - title: "Leveraging context features"
28 |     path: /recommenders/examples/context_features
29 |   - title: "Building deep retrieval models"
30 |     path: /recommenders/examples/deep_recommenders
31 | - heading: "Intermediate"
32 |   style: divider
33 | - title: "Multitask recommenders"
34 |   path: /recommenders/examples/multitask
35 | - title: "Cross networks"
36 |   path: /recommenders/examples/dcn
37 | - title: "Efficient serving"
38 |   path: /recommenders/examples/efficient_serving
39 | - title: "Listwise ranking"
40 |   path: /recommenders/examples/listwise_ranking
41 | 


--------------------------------------------------------------------------------
/docs/examples/quickstart.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "_dEaVsqSgNyQ"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2020 The TensorFlow Authors."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "4FyfuZX-gTKS"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "sT8AyHRMNh41"
 38 |       },
 39 |       "source": [
 40 |         "# TensorFlow Recommenders: Quickstart\n",
 41 |         "\n",
 42 |         "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
 43 |         "  \u003ctd\u003e\n",
 44 |         "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/recommenders/quickstart\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
 45 |         "  \u003c/td\u003e\n",
 46 |         "  \u003ctd\u003e\n",
 47 |         "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
 48 |         "  \u003c/td\u003e\n",
 49 |         "  \u003ctd\u003e\n",
 50 |         "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
 51 |         "  \u003c/td\u003e\n",
 52 |         "  \u003ctd\u003e\n",
 53 |         "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/recommenders/docs/examples/quickstart.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
 54 |         "  \u003c/td\u003e\n",
 55 |         "\u003c/table\u003e"
 56 |       ]
 57 |     },
 58 |     {
 59 |       "cell_type": "markdown",
 60 |       "metadata": {
 61 |         "id": "8f-reQ11gbLB"
 62 |       },
 63 |       "source": [
 64 |         "In this tutorial, we build a simple matrix factorization model using the [MovieLens 100K dataset](https://grouplens.org/datasets/movielens/100k/) with TFRS. We can use this model to recommend movies for a given user."
 65 |       ]
 66 |     },
 67 |     {
 68 |       "cell_type": "markdown",
 69 |       "metadata": {
 70 |         "id": "qA00wBE2Ntdm"
 71 |       },
 72 |       "source": [
 73 |         "### Import TFRS\n",
 74 |         "\n",
 75 |         "First, install and import TFRS:"
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "execution_count": null,
 81 |       "metadata": {
 82 |         "id": "6yzAaM85Z12D"
 83 |       },
 84 |       "outputs": [],
 85 |       "source": [
 86 |         "!pip install -q tensorflow-recommenders\n",
 87 |         "!pip install -q --upgrade tensorflow-datasets"
 88 |       ]
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "execution_count": null,
 93 |       "metadata": {
 94 |         "id": "n3oYt3R6Nr9l"
 95 |       },
 96 |       "outputs": [],
 97 |       "source": [
 98 |         "from typing import Dict, Text\n",
 99 |         "\n",
100 |         "import numpy as np\n",
101 |         "import tensorflow as tf\n",
102 |         "\n",
103 |         "import tensorflow_datasets as tfds\n",
104 |         "import tensorflow_recommenders as tfrs"
105 |       ]
106 |     },
107 |     {
108 |       "cell_type": "markdown",
109 |       "metadata": {
110 |         "id": "zCxQ1CZcO2wh"
111 |       },
112 |       "source": [
113 |         "### Read the data"
114 |       ]
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "execution_count": null,
119 |       "metadata": {
120 |         "id": "M-mxBYjdO5m7"
121 |       },
122 |       "outputs": [],
123 |       "source": [
124 |         "# Ratings data.\n",
125 |         "ratings = tfds.load('movielens/100k-ratings', split=\"train\")\n",
126 |         "# Features of all the available movies.\n",
127 |         "movies = tfds.load('movielens/100k-movies', split=\"train\")\n",
128 |         "\n",
129 |         "# Select the basic features.\n",
130 |         "ratings = ratings.map(lambda x: {\n",
131 |         "    \"movie_title\": x[\"movie_title\"],\n",
132 |         "    \"user_id\": x[\"user_id\"]\n",
133 |         "})\n",
134 |         "movies = movies.map(lambda x: x[\"movie_title\"])"
135 |       ]
136 |     },
137 |     {
138 |       "cell_type": "markdown",
139 |       "metadata": {
140 |         "id": "5W0HSfmSNCWm"
141 |       },
142 |       "source": [
143 |         "Build vocabularies to convert user ids and movie titles into integer indices for embedding layers:"
144 |       ]
145 |     },
146 |     {
147 |       "cell_type": "code",
148 |       "execution_count": null,
149 |       "metadata": {
150 |         "id": "9I1VTEjHzpfX"
151 |       },
152 |       "outputs": [],
153 |       "source": [
154 |         "user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)\n",
155 |         "user_ids_vocabulary.adapt(ratings.map(lambda x: x[\"user_id\"]))\n",
156 |         "\n",
157 |         "movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)\n",
158 |         "movie_titles_vocabulary.adapt(movies)"
159 |       ]
160 |     },
161 |     {
162 |       "cell_type": "markdown",
163 |       "metadata": {
164 |         "id": "Lrch6rVBOB9Q"
165 |       },
166 |       "source": [
167 |         "### Define a model\n",
168 |         "\n",
169 |         "We can define a TFRS model by inheriting from `tfrs.Model` and implementing the `compute_loss` method:"
170 |       ]
171 |     },
172 |     {
173 |       "cell_type": "code",
174 |       "execution_count": null,
175 |       "metadata": {
176 |         "id": "e5dNbDZwOIHR"
177 |       },
178 |       "outputs": [],
179 |       "source": [
180 |         "class MovieLensModel(tfrs.Model):\n",
181 |         "  # We derive from a custom base class to help reduce boilerplate. Under the hood,\n",
182 |         "  # these are still plain Keras Models.\n",
183 |         "\n",
184 |         "  def __init__(\n",
185 |         "      self,\n",
186 |         "      user_model: tf.keras.Model,\n",
187 |         "      movie_model: tf.keras.Model,\n",
188 |         "      task: tfrs.tasks.Retrieval):\n",
189 |         "    super().__init__()\n",
190 |         "\n",
191 |         "    # Set up user and movie representations.\n",
192 |         "    self.user_model = user_model\n",
193 |         "    self.movie_model = movie_model\n",
194 |         "\n",
195 |         "    # Set up a retrieval task.\n",
196 |         "    self.task = task\n",
197 |         "\n",
198 |         "  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -\u003e tf.Tensor:\n",
199 |         "    # Define how the loss is computed.\n",
200 |         "\n",
201 |         "    user_embeddings = self.user_model(features[\"user_id\"])\n",
202 |         "    movie_embeddings = self.movie_model(features[\"movie_title\"])\n",
203 |         "\n",
204 |         "    return self.task(user_embeddings, movie_embeddings)"
205 |       ]
206 |     },
207 |     {
208 |       "cell_type": "markdown",
209 |       "metadata": {
210 |         "id": "wdwtgUCEOI8y"
211 |       },
212 |       "source": [
213 |         "Define the two models and the retrieval task."
214 |       ]
215 |     },
216 |     {
217 |       "cell_type": "code",
218 |       "execution_count": null,
219 |       "metadata": {
220 |         "id": "EvtnUN6aUY4U"
221 |       },
222 |       "outputs": [],
223 |       "source": [
224 |         "# Define user and movie models.\n",
225 |         "user_model = tf.keras.Sequential([\n",
226 |         "    user_ids_vocabulary,\n",
227 |         "    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)\n",
228 |         "])\n",
229 |         "movie_model = tf.keras.Sequential([\n",
230 |         "    movie_titles_vocabulary,\n",
231 |         "    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)\n",
232 |         "])\n",
233 |         "\n",
234 |         "# Define your objectives.\n",
235 |         "task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(\n",
236 |         "    movies.batch(128).map(movie_model)\n",
237 |         "  )\n",
238 |         ")"
239 |       ]
240 |     },
241 |     {
242 |       "cell_type": "markdown",
243 |       "metadata": {
244 |         "id": "BMV0HpzmJGWk"
245 |       },
246 |       "source": [
247 |         "\n",
248 |         "### Fit and evaluate it.\n",
249 |         "\n",
250 |         "Create the model, train it, and generate predictions:\n",
251 |         "\n"
252 |       ]
253 |     },
254 |     {
255 |       "cell_type": "code",
256 |       "execution_count": null,
257 |       "metadata": {
258 |         "id": "H2tQDhqkOKf1"
259 |       },
260 |       "outputs": [],
261 |       "source": [
262 |         "# Create a retrieval model.\n",
263 |         "model = MovieLensModel(user_model, movie_model, task)\n",
264 |         "model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))\n",
265 |         "\n",
266 |         "# Train for 3 epochs.\n",
267 |         "model.fit(ratings.batch(4096), epochs=3)\n",
268 |         "\n",
269 |         "# Use brute-force search to set up retrieval using the trained representations.\n",
270 |         "index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)\n",
271 |         "index.index_from_dataset(\n",
272 |         "    movies.batch(100).map(lambda title: (title, model.movie_model(title))))\n",
273 |         "\n",
274 |         "# Get some recommendations.\n",
275 |         "_, titles = index(np.array([\"42\"]))\n",
276 |         "print(f\"Top 3 recommendations for user 42: {titles[0, :3]}\")"
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "code",
281 |       "execution_count": null,
282 |       "metadata": {
283 |         "id": "neJAJVwbReNd"
284 |       },
285 |       "outputs": [],
286 |       "source": [
287 |         ""
288 |       ]
289 |     }
290 |   ],
291 |   "metadata": {
292 |     "colab": {
293 |       "collapsed_sections": [],
294 |       "name": "quickstart.ipynb",
295 |       "private_outputs": true,
296 |       "provenance": [],
297 |       "toc_visible": true
298 |     },
299 |     "kernelspec": {
300 |       "display_name": "Python 3",
301 |       "language": "python",
302 |       "name": "python3"
303 |     }
304 |   },
305 |   "nbformat": 4,
306 |   "nbformat_minor": 0
307 | }
308 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 2.9.0; sys_platform != 'darwin'
2 | tensorflow-macos >= 2.9.0; sys_platform == 'darwin'
3 | absl-py >= 0.1.6
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """TensorFlow Recommenders, a TensorFlow library for recommender systems."""
16 | 
17 | import pathlib
18 | import setuptools
19 | 
20 | VERSION = "0.7.3"
21 | 
22 | long_description = (pathlib.Path(__file__).parent
23 |                     .joinpath("README.md")
24 |                     .read_text())
25 | 
26 | setuptools.setup(
27 |     name="tensorflow-recommenders",
28 |     version=VERSION,
29 |     description="Tensorflow Recommenders, a TensorFlow library for recommender systems.",
30 |     long_description=long_description,
31 |     long_description_content_type="text/markdown",
32 |     url="https://github.com/tensorflow/recommenders",
33 |     author="Google Inc.",
34 |     author_email="packages@tensorflow.org",
35 |     packages=setuptools.find_packages(),
36 |     install_requires=pathlib.Path("requirements.txt").read_text().splitlines(),
37 |     extras_require={
38 |         "docs": ["fire", "annoy", "scann == 1.2.*", "tensorflow-ranking"],
39 |     },
40 |     # PyPI package information.
41 |     classifiers=[
42 |         "Development Status :: 3 - Alpha",
43 |         "Intended Audience :: Developers",
44 |         "Intended Audience :: Education",
45 |         "Intended Audience :: Science/Research",
46 |         "License :: OSI Approved :: Apache Software License",
47 |         "Programming Language :: Python :: 3",
48 |         "Programming Language :: Python :: 3.8",
49 |         "Programming Language :: Python :: 3.9",
50 |         "Programming Language :: Python :: 3.10",
51 |         "Topic :: Scientific/Engineering",
52 |         "Topic :: Scientific/Engineering :: Mathematics",
53 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
54 |         "Topic :: Software Development",
55 |         "Topic :: Software Development :: Libraries",
56 |         "Topic :: Software Development :: Libraries :: Python Modules",
57 |     ],
58 |     license="Apache 2.0",
59 |     keywords="tensorflow recommenders recommendations",
60 | )
61 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/.flake8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/recommenders/b639fe3a15ce00acf765a005c78fe264d2df7931/tensorflow_recommenders/.flake8


--------------------------------------------------------------------------------
/tensorflow_recommenders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """TensorFlow Recommenders is a library for building recommender system models.
16 | 
17 | It helps with the full workflow of building a recommender system: data
18 | preparation, model formulation, training, evaluation, and deployment.
19 | 
20 | It's built on Keras and aims to have a gentle learning curve while still giving
21 | you the flexibility to build complex models.
22 | """
23 | 
24 | __version__ = "v0.7.3"
25 | 
26 | from tensorflow_recommenders import examples
27 | from tensorflow_recommenders import experimental
28 | # Internal extension library import.
29 | from tensorflow_recommenders import layers
30 | from tensorflow_recommenders import metrics
31 | from tensorflow_recommenders import models
32 | from tensorflow_recommenders import tasks
33 | from tensorflow_recommenders import types
34 | 
35 | 
36 | Model = models.Model
37 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | # Building docs.
2 | fire
3 | git+https://github.com/tensorflow/docs
4 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Lint-as: python3
16 | """Functions used in examples."""
17 | 
18 | from tensorflow_recommenders.examples import movielens
19 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/examples/movielens.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Functions supporting Movielens examples."""
 16 | 
 17 | import array
 18 | import collections
 19 | 
 20 | from typing import Dict, List, Optional, Text, Tuple
 21 | 
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | 
 25 | 
 26 | def evaluate(user_model: tf.keras.Model,
 27 |              movie_model: tf.keras.Model,
 28 |              test: tf.data.Dataset,
 29 |              movies: tf.data.Dataset,
 30 |              train: Optional[tf.data.Dataset] = None,
 31 |              k: int = 10) -> Dict[Text, float]:
 32 |   """Evaluates a Movielens model on the supplied datasets.
 33 | 
 34 |   Args:
 35 |     user_model: User representation model.
 36 |     movie_model: Movie representation model.
 37 |     test: Test dataset.
 38 |     movies: Dataset of movies.
 39 |     train: Training dataset. If supplied, recommendations for training watches
 40 |       will be removed.
 41 |     k: The cutoff value at which to compute precision and recall.
 42 | 
 43 |   Returns:
 44 |    Dictionary of metrics.
 45 |   """
 46 | 
 47 |   movie_ids = np.concatenate(
 48 |       list(movies.batch(1000).map(lambda x: x["movie_id"]).as_numpy_iterator()))
 49 | 
 50 |   movie_vocabulary = dict(zip(movie_ids.tolist(), range(len(movie_ids))))
 51 | 
 52 |   train_user_to_movies = collections.defaultdict(lambda: array.array("i"))
 53 |   test_user_to_movies = collections.defaultdict(lambda: array.array("i"))
 54 | 
 55 |   if train is not None:
 56 |     for row in train.as_numpy_iterator():
 57 |       user_id = row["user_id"]
 58 |       movie_id = movie_vocabulary[row["movie_id"]]
 59 |       train_user_to_movies[user_id].append(movie_id)
 60 | 
 61 |   for row in test.as_numpy_iterator():
 62 |     user_id = row["user_id"]
 63 |     movie_id = movie_vocabulary[row["movie_id"]]
 64 |     test_user_to_movies[user_id].append(movie_id)
 65 | 
 66 |   movie_embeddings = np.concatenate(
 67 |       list(movies.batch(4096).map(
 68 |           lambda x: movie_model({"movie_id": x["movie_id"]})
 69 |       ).as_numpy_iterator()))
 70 | 
 71 |   precision_values = []
 72 |   recall_values = []
 73 | 
 74 |   for user_id, test_movies in test_user_to_movies.items():
 75 |     user_embedding = user_model({"user_id": np.array([user_id])}).numpy()
 76 |     scores = (user_embedding @ movie_embeddings.T).flatten()
 77 | 
 78 |     test_movies = np.frombuffer(test_movies, dtype=np.int32)
 79 | 
 80 |     if train is not None:
 81 |       train_movies = np.frombuffer(
 82 |           train_user_to_movies[user_id], dtype=np.int32)
 83 |       scores[train_movies] = -1e6
 84 | 
 85 |     top_movies = np.argsort(-scores)[:k]
 86 |     num_test_movies_in_k = sum(x in top_movies for x in test_movies)
 87 |     precision_values.append(num_test_movies_in_k / k)
 88 |     recall_values.append(num_test_movies_in_k / len(test_movies))
 89 | 
 90 |   return {
 91 |       "precision_at_k": np.mean(precision_values),
 92 |       "recall_at_k": np.mean(recall_values)
 93 |   }
 94 | 
 95 | 
 96 | def _create_feature_dict() -> Dict[Text, List[tf.Tensor]]:
 97 |   """Helper function for creating an empty feature dict for defaultdict."""
 98 |   return {"movie_title": [], "user_rating": []}
 99 | 
100 | 
101 | def _sample_list(
102 |     feature_lists: Dict[Text, List[tf.Tensor]],
103 |     num_examples_per_list: int,
104 |     random_state: Optional[np.random.RandomState] = None,
105 | ) -> Tuple[tf.Tensor, tf.Tensor]:
106 |   """Function for sampling a list example from given feature lists."""
107 |   if random_state is None:
108 |     random_state = np.random.RandomState()
109 | 
110 |   sampled_indices = random_state.choice(
111 |       range(len(feature_lists["movie_title"])),
112 |       size=num_examples_per_list,
113 |       replace=False,
114 |   )
115 |   sampled_movie_titles = [
116 |       feature_lists["movie_title"][idx] for idx in sampled_indices
117 |   ]
118 |   sampled_ratings = [
119 |       feature_lists["user_rating"][idx]
120 |       for idx in sampled_indices
121 |   ]
122 | 
123 |   return (
124 |       tf.stack(sampled_movie_titles, 0),
125 |       tf.stack(sampled_ratings, 0),
126 |   )
127 | 
128 | 
129 | def sample_listwise(
130 |     rating_dataset: tf.data.Dataset,
131 |     num_list_per_user: int = 10,
132 |     num_examples_per_list: int = 10,
133 |     seed: Optional[int] = None,
134 | ) -> tf.data.Dataset:
135 |   """Function for converting the MovieLens 100K dataset to a listwise dataset.
136 | 
137 |   Args:
138 |       rating_dataset:
139 |         The MovieLens ratings dataset loaded from TFDS with features
140 |         "movie_title", "user_id", and "user_rating".
141 |       num_list_per_user:
142 |         An integer representing the number of lists that should be sampled for
143 |         each user in the training dataset.
144 |       num_examples_per_list:
145 |         An integer representing the number of movies to be sampled for each list
146 |         from the list of movies rated by the user.
147 |       seed:
148 |         An integer for creating `np.random.RandomState`.
149 | 
150 |   Returns:
151 |       A tf.data.Dataset containing list examples.
152 | 
153 |       Each example contains three keys: "user_id", "movie_title", and
154 |       "user_rating". "user_id" maps to a string tensor that represents the user
155 |       id for the example. "movie_title" maps to a tensor of shape
156 |       [sum(num_example_per_list)] with dtype tf.string. It represents the list
157 |       of candidate movie ids. "user_rating" maps to a tensor of shape
158 |       [sum(num_example_per_list)] with dtype tf.float32. It represents the
159 |       rating of each movie in the candidate list.
160 |   """
161 |   random_state = np.random.RandomState(seed)
162 | 
163 |   example_lists_by_user = collections.defaultdict(_create_feature_dict)
164 | 
165 |   movie_title_vocab = set()
166 |   for example in rating_dataset:
167 |     user_id = example["user_id"].numpy()
168 |     example_lists_by_user[user_id]["movie_title"].append(
169 |         example["movie_title"])
170 |     example_lists_by_user[user_id]["user_rating"].append(
171 |         example["user_rating"])
172 |     movie_title_vocab.add(example["movie_title"].numpy())
173 | 
174 |   tensor_slices = {"user_id": [], "movie_title": [], "user_rating": []}
175 | 
176 |   for user_id, feature_lists in example_lists_by_user.items():
177 |     for _ in range(num_list_per_user):
178 | 
179 |       # Drop the user if they don't have enough ratings.
180 |       if len(feature_lists["movie_title"]) < num_examples_per_list:
181 |         continue
182 | 
183 |       sampled_movie_titles, sampled_ratings = _sample_list(
184 |           feature_lists,
185 |           num_examples_per_list,
186 |           random_state=random_state,
187 |       )
188 |       tensor_slices["user_id"].append(user_id)
189 |       tensor_slices["movie_title"].append(sampled_movie_titles)
190 |       tensor_slices["user_rating"].append(sampled_ratings)
191 | 
192 |   return tf.data.Dataset.from_tensor_slices(tensor_slices)
193 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/examples/nbtool.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tools for cleaning and testing notebooks."""
 17 | 
 18 | import glob
 19 | import os
 20 | import subprocess
 21 | import tempfile
 22 | 
 23 | from typing import Text
 24 | 
 25 | import fire
 26 | import nbformat
 27 | 
 28 | 
 29 | def clean_cell(cell):
 30 |   """Cleans a cell."""
 31 |   metadata = cell.metadata
 32 | 
 33 |   for key in ("pinned", "imported_from", "executionInfo", "outputId"):
 34 |     if key in metadata:
 35 |       del metadata[key]
 36 | 
 37 |   if cell["cell_type"] == "code":
 38 |     cell["execution_count"] = 0
 39 | 
 40 | 
 41 | def clean_notebook(notebook):
 42 |   """Cleans a notebook."""
 43 |   colab = notebook["metadata"]["colab"]
 44 | 
 45 |   for key in ("defaultview", "views", "last_runtime", "provenance"):
 46 |     if key in colab:
 47 |       del colab[key]
 48 | 
 49 |   for cell in notebook.cells:
 50 |     clean_cell(cell)
 51 | 
 52 |   return notebook
 53 | 
 54 | 
 55 | class NBTool:
 56 |   """Tool for checking and cleaning notebooks."""
 57 | 
 58 |   def format(self, path):
 59 |     """Formats notebooks."""
 60 | 
 61 |     for notebook_path in glob.glob(os.path.join(path, "*ipynb")):
 62 |       print(f"Formatting {notebook_path}")
 63 | 
 64 |       with open(notebook_path, "r") as notebook_file:
 65 |         notebook = nbformat.read(notebook_file, as_version=4)
 66 | 
 67 |       with open(notebook_path, "w") as notebook_file:
 68 |         nbformat.write(notebook, notebook_file)
 69 | 
 70 |   def clean(self, path: Text):
 71 |     """Cleans notebooks."""
 72 |     for notebook_path in glob.glob(os.path.join(path, "*ipynb")):
 73 |       print(f"Cleaning {notebook_path}")
 74 | 
 75 |       with open(notebook_path, "r") as notebook_file:
 76 |         notebook = nbformat.read(notebook_file, as_version=4)
 77 | 
 78 |       notebook = clean_notebook(notebook)
 79 | 
 80 |       with open(notebook_path, "w") as notebook_file:
 81 |         nbformat.write(notebook, notebook_file)
 82 | 
 83 |   def check(self, path: Text):
 84 |     """Executes a notebook, checking for execution errors."""
 85 | 
 86 |     with tempfile.NamedTemporaryFile(mode="w", delete=True) as fle:
 87 |       fname = fle.name
 88 |       args = ["jupyter", "nbconvert", "--to", "notebook", "--execute",
 89 |               "--ExecutePreprocessor.timeout=600",
 90 |               "--ExecutePreprocessor.kernel_name=python3",
 91 |               "--output", fname, path]
 92 | 
 93 |       try:
 94 |         subprocess.check_output(args, stderr=subprocess.STDOUT)
 95 |       except subprocess.CalledProcessError as e:
 96 |         raise Exception(
 97 |             f"Execution of notebook {path} failed: {e.stdout, e.stderr}.")
 98 | 
 99 |   def check_all(self, path: Text):
100 |     """Runs all notebooks under path."""
101 |     for notebook_path in glob.glob(os.path.join(path, "*ipynb")):
102 |       print(f"Executing {notebook_path}")
103 | 
104 |       self.check(notebook_path)
105 | 
106 | 
107 | if __name__ == "__main__":
108 |   fire.Fire(NBTool, name="nbtool")
109 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Experimental APIs."""
16 | 
17 | from tensorflow_recommenders.experimental import layers
18 | from tensorflow_recommenders.experimental import models
19 | from tensorflow_recommenders.experimental import optimizers
20 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Experimental layers APIs."""
16 | 
17 | from tensorflow_recommenders.experimental.layers import embedding
18 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/layers/embedding/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Experimental embedding layers."""
16 | 
17 | from tensorflow_recommenders.experimental.layers.embedding.partial_tpu_embedding import PartialTPUEmbedding
18 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/layers/embedding/partial_tpu_embedding.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Embedding layer for the Ranking model."""
 16 | 
 17 | from typing import Dict, Optional, Union
 18 | 
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow_recommenders.layers.embedding.tpu_embedding_layer import TPUEmbedding
 22 | 
 23 | Tensor = Union[tf.Tensor, tf.SparseTensor, tf.RaggedTensor]
 24 | 
 25 | 
 26 | class PartialTPUEmbedding(tf.keras.layers.Layer):
 27 |   """Partial TPU Embedding layer.
 28 | 
 29 |   This layer is composed  of `tfrs.layers.embedding.TPUEmbedding` and
 30 |   `tf.keras.layers.Embedding` embedding layers. When training on TPUs, it is
 31 |   preferable to use TPU Embedding layers for large tables (as they are sharded
 32 |   accross TPU cores) and Keras embedding layer for small tables.
 33 |   For tables with vocab sizes less than `size_threshold` a Keras embedding
 34 |   layer will be used, above that threshold a TPU embedding layer will be used.
 35 | 
 36 |   This layer will be applied on a dictionary of feature_name, categorical_tensor
 37 |   pairs and return a dictionary of string-to-tensor of feature_name,
 38 |   embedded_value pairs.
 39 |   """
 40 | 
 41 |   def __init__(self,
 42 |                feature_config,
 43 |                optimizer: tf.keras.optimizers.Optimizer,
 44 |                pipeline_execution_with_tensor_core: bool = False,
 45 |                batch_size: Optional[int] = None,
 46 |                size_threshold: Optional[int] = 10_000) -> None:
 47 |     """Initializes the embedding layer.
 48 | 
 49 |     Args:
 50 |       feature_config: A nested structure of
 51 |         `tf.tpu.experimental.embedding.FeatureConfig` configs.
 52 |       optimizer: An optimizer used for TPU embeddings.
 53 |       pipeline_execution_with_tensor_core: If True, the TPU embedding
 54 |         computations will overlap with the TensorCore computations (and hence
 55 |         will be one step old with potential correctness drawbacks). Set to True
 56 |         for improved performance.
 57 |       batch_size: If set, this will be used as the global batch size and
 58 |         override the autodetection of the batch size from the layer's input.
 59 |         This is necesarry if all inputs to the layer's call are SparseTensors.
 60 |       size_threshold: A threshold for table sizes below which a Keras embedding
 61 |         layer is used, and above which a TPU embedding layer is used.
 62 |         Set `size_threshold=0` to use TPU embedding for all tables and
 63 |         `size_threshold=None` to use only Keras embeddings.
 64 |     """
 65 |     super().__init__()
 66 | 
 67 |     tpu_feature_config = {}
 68 |     table_to_keras_emb = {}
 69 |     self._keras_embedding_layers = {}
 70 | 
 71 |     for name, embedding_feature_config in feature_config.items():
 72 |       table_config = embedding_feature_config.table
 73 |       if size_threshold is not None and table_config.vocabulary_size > size_threshold:
 74 |          # TPUEmbedding layer.
 75 |         tpu_feature_config[name] = embedding_feature_config
 76 |         continue
 77 | 
 78 |       # Keras layer.
 79 |       # Multiple features can reuse the same table.
 80 |       if table_config not in table_to_keras_emb:
 81 |         table_to_keras_emb[table_config] = tf.keras.layers.Embedding(
 82 |             input_dim=table_config.vocabulary_size,
 83 |             output_dim=table_config.dim,
 84 |             embeddings_initializer=table_config.initializer or "uniform",
 85 |         )
 86 |       self._keras_embedding_layers[name] = table_to_keras_emb[table_config]
 87 | 
 88 |     self._tpu_embedding = None
 89 |     if tpu_feature_config:
 90 |       self._tpu_embedding = TPUEmbedding(
 91 |           tpu_feature_config, optimizer, pipeline_execution_with_tensor_core
 92 |       )
 93 | 
 94 |   def call(self, inputs: Dict[str, Tensor]) -> Dict[str, tf.Tensor]:
 95 |     """Computes the output of the embedding layer.
 96 | 
 97 |     It expects a string-to-tensor (or SparseTensor/RaggedTensor) dict as input,
 98 |     and outputs a dictionary of string-to-tensor of feature_name, embedded_value
 99 |     pairs. Note that SparseTensor/RaggedTensor are only supported for
100 |     TPUEmbedding and are not supported for Keras embeddings.
101 | 
102 |     Args:
103 |       inputs: A string-to-tensor (or SparseTensor/RaggedTensor) dictionary.
104 | 
105 |     Returns:
106 |       output: A dictionary of string-to-tensor of feature_name, embedded_value
107 |         pairs.
108 | 
109 |     Raises:
110 |       ValueError if no tf.Tensor is passed to a Keras embedding layer.
111 |     """
112 |     keras_emb_inputs = {
113 |         key: val for key, val in inputs.items()
114 |         if key in self._keras_embedding_layers
115 |     }
116 |     tpu_emb_inputs = {
117 |         key: val for key, val in inputs.items()
118 |         if key not in self._keras_embedding_layers
119 |     }
120 | 
121 |     output = {}
122 |     for key, val in keras_emb_inputs.items():
123 |       if not isinstance(val, tf.Tensor):
124 |         raise ValueError("Only tf.Tensor input is supported for Keras embedding"
125 |                          f" layers, but got: {type(val)}")
126 | 
127 |       output[key] = self._keras_embedding_layers[key](val)
128 | 
129 |     if self._tpu_embedding:
130 |       tpu_emb_output_dict = self._tpu_embedding(tpu_emb_inputs)  # pylint: disable=[not-callable]
131 |       output.update(tpu_emb_output_dict)
132 |     return output
133 | 
134 |   @property
135 |   def tpu_embedding(self) -> Optional[TPUEmbedding]:
136 |     """Returns TPUEmbedding or `None` if only Keras embeddings are used."""
137 |     return self._tpu_embedding
138 | 
139 |   @property
140 |   def keras_embedding_layers(self) -> Dict[str, tf.keras.layers.Embedding]:
141 |     """Returns a dictionary mapping feature names to Keras embedding layers."""
142 |     return self._keras_embedding_layers
143 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/layers/embedding/partial_tpu_embedding_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Tests for PartialTPUEmbedding."""
 16 | 
 17 | import math
 18 | from typing import Dict, List
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | import tensorflow_recommenders as tfrs
 23 | 
 24 | 
 25 | def _get_tpu_embedding_feature_config(
 26 |     vocab_sizes: List[int],
 27 |     embedding_dims: List[int]
 28 | ) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]:
 29 |   """Returns TPU embedding feature config.
 30 | 
 31 |   Args:
 32 |     vocab_sizes: List of sizes of categories/id's in the table.
 33 |     embedding_dims: Embedding dimensions.
 34 |   Returns:
 35 |     A dictionary of feature_name, FeatureConfig pairs.
 36 |   """
 37 |   assert len(vocab_sizes) == len(embedding_dims)
 38 |   feature_config = {}
 39 | 
 40 |   for i, vocab_size in enumerate(vocab_sizes):
 41 |     table_config = tf.tpu.experimental.embedding.TableConfig(
 42 |         vocabulary_size=vocab_size,
 43 |         dim=embedding_dims[i],
 44 |         combiner="mean",
 45 |         initializer=tf.initializers.TruncatedNormal(
 46 |             mean=0.0, stddev=1 / math.sqrt(embedding_dims[i])
 47 |         ),
 48 |         name=f"table_{i}"
 49 |     )
 50 |     feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig(
 51 |         table=table_config)
 52 | 
 53 |   return feature_config
 54 | 
 55 | 
 56 | class PartialTPUEmbeddingTest(tf.test.TestCase):
 57 | 
 58 |   def test_embedding_layer(self):
 59 |     feature_config = _get_tpu_embedding_feature_config(
 60 |         vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10])
 61 | 
 62 |     embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding(
 63 |         feature_config=feature_config,
 64 |         optimizer=tf.keras.optimizers.legacy.Adam(),
 65 |         size_threshold=10)
 66 | 
 67 |     tpu_embedding_tables = embedding_layer.tpu_embedding.embedding_tables
 68 |     keras_embedding_layers = embedding_layer.keras_embedding_layers
 69 | 
 70 |     self.assertLen(tpu_embedding_tables, 2)
 71 |     self.assertLen(keras_embedding_layers, 3)
 72 | 
 73 |     for tbl_config, weight in tpu_embedding_tables.items():
 74 |       print(tbl_config, weight)
 75 |       if "1" in tbl_config.name:
 76 |         self.assertEqual(tbl_config.vocabulary_size, 20)
 77 |         self.assertEqual(tbl_config.dim, 4)
 78 |       else:
 79 |         self.assertEqual(tbl_config.vocabulary_size, 15)
 80 |         self.assertEqual(tbl_config.dim, 10)
 81 | 
 82 |     self.assertEqual(keras_embedding_layers["0"].input_dim, 5)
 83 |     self.assertEqual(keras_embedding_layers["0"].output_dim, 2)
 84 |     self.assertEqual(keras_embedding_layers["2"].input_dim, 8)
 85 |     self.assertEqual(keras_embedding_layers["2"].output_dim, 6)
 86 |     self.assertEqual(keras_embedding_layers["3"].input_dim, 9)
 87 |     self.assertEqual(keras_embedding_layers["3"].output_dim, 8)
 88 | 
 89 |     output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0})
 90 |     for key, val in output.items():
 91 |       self.assertEqual(val.shape, feature_config[key].table.dim)
 92 | 
 93 |   def test_all_keras_embedding(self):
 94 |     feature_config = _get_tpu_embedding_feature_config(
 95 |         vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10])
 96 | 
 97 |     embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding(
 98 |         feature_config=feature_config,
 99 |         optimizer=tf.keras.optimizers.legacy.Adam(),
100 |         size_threshold=None)
101 | 
102 |     self.assertIsNone(embedding_layer.tpu_embedding)
103 |     keras_embedding_layers = embedding_layer.keras_embedding_layers
104 | 
105 |     self.assertLen(keras_embedding_layers, 5)
106 | 
107 |     output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0})
108 |     for key, val in output.items():
109 |       self.assertEqual(val.shape, feature_config[key].table.dim)
110 | 
111 |   def test_all_tpu_embedding(self):
112 |     feature_config = _get_tpu_embedding_feature_config(
113 |         vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10])
114 |     embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding(
115 |         feature_config=feature_config,
116 |         optimizer=tf.keras.optimizers.legacy.Adam(),
117 |         size_threshold=0)
118 | 
119 |     self.assertLen(embedding_layer.tpu_embedding.embedding_tables, 5)
120 | 
121 |     output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0})
122 |     for key, val in output.items():
123 |       self.assertEqual(val.shape, feature_config[key].table.dim)
124 | 
125 |   def test_all_tpu_embedding_with_pipelining(self):
126 |     feature_config = _get_tpu_embedding_feature_config(
127 |         vocab_sizes=[5, 20, 8, 9, 15], embedding_dims=[2, 4, 6, 8, 10])
128 |     embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding(
129 |         feature_config=feature_config,
130 |         optimizer=tf.keras.optimizers.legacy.Adam(),
131 |         pipeline_execution_with_tensor_core=True,
132 |         size_threshold=0)
133 | 
134 |     self.assertLen(embedding_layer.tpu_embedding.embedding_tables, 5)
135 | 
136 |     output = embedding_layer({"0": 4, "1": 10, "2": 6, "3": 8, "4": 0})
137 |     for key, val in output.items():
138 |       self.assertEqual(val.shape, feature_config[key].table.dim)
139 | 
140 | if __name__ == "__main__":
141 |   tf.test.main()
142 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Experimental Models."""
16 | 
17 | from tensorflow_recommenders.experimental.models.ranking import Ranking
18 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/models/ranking.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """A pre-built ranking model."""
 16 | 
 17 | from typing import cast, Dict, Optional, Sequence, Tuple, Union
 18 | 
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow_recommenders import layers
 22 | from tensorflow_recommenders import models
 23 | from tensorflow_recommenders import tasks
 24 | from tensorflow_recommenders.layers import feature_interaction as feature_interaction_lib
 25 | 
 26 | 
 27 | class Ranking(models.Model):
 28 |   """A configurable ranking model.
 29 | 
 30 |   This class represents a sensible and reasonably flexible configuration for a
 31 |   ranking model that can be used for tasks such as CTR prediction.
 32 | 
 33 |   It can be customized as needed, and its constituent blocks can be changed by
 34 |   passing user-defined alternatives.
 35 | 
 36 |   For example:
 37 |   - Pass
 38 |     `feature_interaction = tfrs.layers.feature_interaction.DotInteraction()`
 39 |     to train a DLRM model, or pass
 40 |     ```
 41 |     feature_interaction = tf.keras.Sequential([
 42 |       tf.keras.layers.Concatenate(),
 43 |       tfrs.layers.feature_interaction.Cross()
 44 |     ])
 45 |     ```
 46 |     to train a DCN model.
 47 |   - Pass `task = tfrs.tasks.Ranking(loss=tf.keras.losses.BinaryCrossentropy())`
 48 |     to train a CTR prediction model, and
 49 |     `tfrs.tasks.Ranking(loss=tf.keras.losses.MeanSquaredError())` to train
 50 |     a rating prediction model.
 51 | 
 52 |   Changing these should cover a broad range of models, but this class is not
 53 |   intended to cover all possible use cases.  For full flexibility, inherit
 54 |   from `tfrs.models.Model` and provide your own implementations of
 55 |   the `compute_loss` and `call` methods.
 56 |   """
 57 | 
 58 |   def __init__(
 59 |       self,
 60 |       embedding_layer: tf.keras.layers.Layer,
 61 |       bottom_stack: Optional[tf.keras.layers.Layer] = None,
 62 |       feature_interaction: Optional[tf.keras.layers.Layer] = None,
 63 |       top_stack: Optional[tf.keras.layers.Layer] = None,
 64 |       concat_dense: bool = True,
 65 |       task: Optional[tasks.Task] = None) -> None:
 66 |     """Initializes the model.
 67 | 
 68 |     Args:
 69 |       embedding_layer: The embedding layer is applied to categorical features.
 70 |         It expects a string-to-tensor (or SparseTensor/RaggedTensor) dict as
 71 |         an input, and outputs a dictionary of string-to-tensor of feature_name,
 72 |         embedded_value pairs.
 73 |         {feature_name_i: tensor_i} -> {feature_name_i: emb(tensor_i)}.
 74 |       bottom_stack: The `bottom_stack` layer is applied to dense features before
 75 |         feature interaction. If None, an MLP with layer sizes [256, 64, 16] is
 76 |         used. For DLRM model, the output of bottom_stack should be of shape
 77 |         (batch_size, embedding dimension).
 78 |       feature_interaction: Feature interaction layer is applied to the
 79 |         `bottom_stack` output and sparse feature embeddings. If it is None,
 80 |         DotInteraction layer is used.
 81 |       top_stack: The `top_stack` layer is applied to the `feature_interaction`
 82 |         output. The output of top_stack should be in the range [0, 1]. If it is
 83 |         None, MLP with layer sizes [512, 256, 1] is used.
 84 |       concat_dense: Weather to concatenate the interaction output with dense
 85 |         embedding vector again before feeding into the top stack
 86 |       task: The task which the model should optimize for. Defaults to a
 87 |         `tfrs.tasks.Ranking` task with a binary cross-entropy loss, suitable for
 88 |         tasks like click prediction.
 89 |     """
 90 | 
 91 |     super().__init__()
 92 | 
 93 |     self._embedding_layer = embedding_layer
 94 |     self._concat_dense = concat_dense
 95 |     self._bottom_stack = (
 96 |         bottom_stack
 97 |         if bottom_stack
 98 |         else layers.blocks.MLP(units=[256, 64, 16], final_activation="relu")
 99 |     )
100 |     self._top_stack = (
101 |         top_stack
102 |         if top_stack
103 |         else layers.blocks.MLP(units=[512, 256, 1], final_activation="sigmoid")
104 |     )
105 |     self._feature_interaction = (
106 |         feature_interaction
107 |         if feature_interaction
108 |         else feature_interaction_lib.DotInteraction()
109 |     )
110 | 
111 |     if task is not None:
112 |       self._task = task
113 |     else:
114 |       self._task = tasks.Ranking(
115 |           loss=tf.keras.losses.BinaryCrossentropy(
116 |               reduction=tf.keras.losses.Reduction.NONE
117 |           ),
118 |           metrics=[
119 |               tf.keras.metrics.AUC(name="auc"),
120 |               tf.keras.metrics.BinaryAccuracy(name="accuracy"),
121 |           ],
122 |           prediction_metrics=[
123 |               tf.keras.metrics.Mean("prediction_mean"),
124 |           ],
125 |           label_metrics=[
126 |               tf.keras.metrics.Mean("label_mean")
127 |           ]
128 |       )
129 | 
130 |   def compute_loss(self,
131 |                    inputs: Union[
132 |                        # Tuple of (features, labels).
133 |                        Tuple[
134 |                            Dict[str, tf.Tensor],
135 |                            tf.Tensor
136 |                        ],
137 |                        # Tuple of (features, labels, sample weights).
138 |                        Tuple[
139 |                            Dict[str, tf.Tensor],
140 |                            tf.Tensor,
141 |                            Optional[tf.Tensor]
142 |                        ]
143 |                    ],
144 |                    training: bool = False) -> tf.Tensor:
145 |     """Computes the loss and metrics of the model.
146 | 
147 |     Args:
148 |       inputs: A data structure of tensors of the following format:
149 |         ({"dense_features": dense_tensor,
150 |           "sparse_features": sparse_tensors},
151 |           label_tensor), or
152 |         ({"dense_features": dense_tensor,
153 |           "sparse_features": sparse_tensors},
154 |           label_tensor,
155 |           sample_weight tensor).
156 |       training: Whether the model is in training mode.
157 | 
158 |     Returns:
159 |       Loss tensor.
160 | 
161 |     Raises:
162 |       ValueError if the the shape of the inputs is invalid.
163 |     """
164 | 
165 |     # We need to work around a bug in mypy - tuple narrowing
166 |     # based on length checks doesn't work.
167 |     # See https://github.com/python/mypy/issues/1178 for details.
168 |     if len(inputs) == 2:
169 |       inputs = cast(
170 |           Tuple[
171 |               Dict[str, tf.Tensor],
172 |               tf.Tensor
173 |           ],
174 |           inputs
175 |       )
176 |       features, labels = inputs
177 |       sample_weight = None
178 |     elif len(inputs) == 3:
179 |       inputs = cast(
180 |           Tuple[
181 |               Dict[str, tf.Tensor],
182 |               tf.Tensor,
183 |               Optional[tf.Tensor],
184 |           ],
185 |           inputs
186 |       )
187 |       features, labels, sample_weight = inputs
188 |     else:
189 |       raise ValueError(
190 |           "Inputs should be either a tuple of (features, labels), "
191 |           "or a tuple of (features, labels, sample weights). "
192 |           "Got a length {len(inputs)} tuple instead: {inputs}."
193 |       )
194 | 
195 |     outputs = self(features, training=training)
196 | 
197 |     loss = self._task(labels, outputs, sample_weight=sample_weight)
198 |     loss = tf.reduce_mean(loss)
199 |     # Scales loss as the default gradients allreduce performs sum inside the
200 |     # optimizer.
201 |     return loss / tf.distribute.get_strategy().num_replicas_in_sync
202 | 
203 |   def call(self, inputs: Dict[str, tf.Tensor]) -> tf.Tensor:
204 |     """Executes forward and backward pass, returns loss.
205 | 
206 |     Args:
207 |       inputs: Model function inputs (features and labels).
208 | 
209 |     Returns:
210 |       loss: Scalar tensor.
211 |     """
212 |     dense_features = inputs["dense_features"]
213 |     sparse_features = inputs["sparse_features"]
214 | 
215 |     sparse_embeddings = self._embedding_layer(sparse_features)
216 |     # Combine a dictionary into a vector and squeeze dimension from
217 |     # (batch_size, 1, emb) to (batch_size, emb).
218 |     sparse_embeddings = tf.nest.flatten(sparse_embeddings)
219 | 
220 |     sparse_embedding_vecs = [
221 |         tf.squeeze(sparse_embedding) for sparse_embedding in sparse_embeddings
222 |     ]
223 |     dense_embedding_vec = self._bottom_stack(dense_features)
224 | 
225 |     interaction_args = sparse_embedding_vecs + [dense_embedding_vec]
226 |     interaction_output = self._feature_interaction(interaction_args)
227 |     if self._concat_dense:
228 |       feature_interaction_output = tf.concat(
229 |           [dense_embedding_vec, interaction_output], axis=1
230 |       )
231 |     else:
232 |       feature_interaction_output = interaction_output
233 | 
234 |     prediction = self._top_stack(feature_interaction_output)
235 | 
236 |     return tf.reshape(prediction, [-1])
237 | 
238 |   @property
239 |   def embedding_trainable_variables(self) -> Sequence[tf.Variable]:
240 |     """Returns trainable variables from embedding tables.
241 | 
242 |     When training a recommendation model with embedding tables, sometimes it's
243 |     preferable to use separate optimizers/learning rates for embedding
244 |     variables and dense variables.
245 |     `tfrs.experimental.optimizers.CompositeOptimizer` can be used to apply
246 |     different optimizers to embedding variables and the remaining variables.
247 |     """
248 |     return self._embedding_layer.trainable_variables
249 | 
250 |   @property
251 |   def dense_trainable_variables(self) -> Sequence[tf.Variable]:
252 |     """Returns all trainable variables that are not embeddings."""
253 |     dense_vars = []
254 |     for layer in self.layers:
255 |       if layer != self._embedding_layer:
256 |         dense_vars.extend(layer.trainable_variables)
257 |     return dense_vars
258 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/models/ranking_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # pylint: disable=g-long-lambda
 16 | """Tests for Ranking."""
 17 | 
 18 | import itertools
 19 | import math
 20 | 
 21 | from typing import List, Dict
 22 | 
 23 | from absl.testing import parameterized
 24 | 
 25 | import tensorflow as tf
 26 | 
 27 | import tensorflow_recommenders as tfrs
 28 | 
 29 | 
 30 | def _get_tpu_embedding_feature_config(
 31 |     vocab_sizes: List[int],
 32 |     embedding_dim: int,
 33 |     table_name_prefix: str = "embedding_table"
 34 | ) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]:
 35 |   """Returns TPU embedding feature config.
 36 | 
 37 |   Args:
 38 |     vocab_sizes: List of sizes of categories/id's in the table.
 39 |     embedding_dim: Embedding dimension.
 40 |     table_name_prefix: A prefix for embedding tables.
 41 |   Returns:
 42 |     A dictionary of feature_name, FeatureConfig pairs.
 43 |   """
 44 |   feature_config = {}
 45 | 
 46 |   for i, vocab_size in enumerate(vocab_sizes):
 47 |     table_config = tf.tpu.experimental.embedding.TableConfig(
 48 |         vocabulary_size=vocab_size,
 49 |         dim=embedding_dim,
 50 |         combiner="mean",
 51 |         initializer=tf.initializers.TruncatedNormal(
 52 |             mean=0.0, stddev=1 / math.sqrt(embedding_dim)
 53 |         ),
 54 |         name=f"{table_name_prefix}_{i}"
 55 |     )
 56 |     feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig(
 57 |         table=table_config)
 58 | 
 59 |   return feature_config
 60 | 
 61 | 
 62 | def _generate_synthetic_data(num_dense: int,
 63 |                              vocab_sizes: List[int],
 64 |                              dataset_size: int,
 65 |                              batch_size: int,
 66 |                              generate_weights: bool = False) -> tf.data.Dataset:
 67 |   dense_tensor = tf.random.uniform(
 68 |       shape=(dataset_size, num_dense), maxval=1.0, dtype=tf.float32)
 69 |   # The mean is in [0, 1] interval.
 70 |   dense_tensor_mean = tf.math.reduce_mean(dense_tensor, axis=1)
 71 | 
 72 |   sparse_tensors = []
 73 |   for size in vocab_sizes:
 74 |     sparse_tensors.append(
 75 |         tf.random.uniform(
 76 |             shape=(dataset_size,), maxval=int(size), dtype=tf.int32))
 77 | 
 78 |   sparse_tensor_elements = {
 79 |       str(i): sparse_tensors[i] for i in range(len(sparse_tensors))
 80 |   }
 81 | 
 82 |   sparse_tensors = tf.stack(sparse_tensors, axis=-1)
 83 |   sparse_tensors_mean = tf.math.reduce_sum(sparse_tensors, axis=1)
 84 |   # The mean is in [0, 1] interval.
 85 |   sparse_tensors_mean = tf.cast(sparse_tensors_mean, dtype=tf.float32)
 86 |   sparse_tensors_mean /= sum(vocab_sizes)
 87 |   # The label is in [0, 1] interval.
 88 |   label_tensor = (dense_tensor_mean + sparse_tensors_mean) / 2.0
 89 |   # Use the threshold 0.5 to convert to 0/1 labels.
 90 |   label_tensor = tf.cast(label_tensor + 0.5, tf.int32)
 91 | 
 92 |   if generate_weights:
 93 |     weights = tf.random.uniform(shape=(dataset_size, 1))
 94 | 
 95 |     input_elem = (
 96 |         {"dense_features": dense_tensor,
 97 |          "sparse_features": sparse_tensor_elements},
 98 |         label_tensor,
 99 |         weights
100 |     )
101 |   else:
102 |     input_elem = (
103 |         {"dense_features": dense_tensor,
104 |          "sparse_features": sparse_tensor_elements},
105 |         label_tensor,
106 |     )
107 | 
108 |   dataset = tf.data.Dataset.from_tensor_slices(input_elem)
109 | 
110 |   return dataset.batch(batch_size, drop_remainder=True)
111 | 
112 | 
113 | class RankingTest(tf.test.TestCase, parameterized.TestCase):
114 | 
115 |   @parameterized.parameters(
116 |       itertools.product(
117 |           # Feature interaction layers.
118 |           (
119 |               tfrs.layers.feature_interaction.DotInteraction,
120 |               lambda: tf.keras.Sequential([
121 |                   tf.keras.layers.Concatenate(),
122 |                   tfrs.layers.feature_interaction.Cross()
123 |               ]),
124 |           ),
125 |           # Bottom stack.
126 |           (lambda: None, lambda: tfrs.layers.blocks.MLP(units=[40, 16])),
127 |           # Top stack.
128 |           (lambda: None, lambda: tfrs.layers.blocks.MLP(
129 |               units=[40, 20, 1], final_activation="sigmoid")),
130 |           # Concat Dense.
131 |           (True, False),
132 |           # Use weights.
133 |           (True, False),
134 |           # Size threshold.
135 |           (None, -1, 20)))
136 |   def test_ranking_model(self,
137 |                          feature_interaction_layer,
138 |                          bottom_stack,
139 |                          top_stack,
140 |                          concat_dense=True,
141 |                          use_weights=False,
142 |                          size_threshold=10):
143 |     """Tests a ranking model."""
144 |     vocabulary_sizes = [30, 3, 26]
145 | 
146 |     embedding_feature_config = _get_tpu_embedding_feature_config(
147 |         vocab_sizes=vocabulary_sizes, embedding_dim=16)
148 |     optimizer = tf.keras.optimizers.legacy.Adam()
149 | 
150 |     model = tfrs.experimental.models.Ranking(
151 |         embedding_layer=tfrs.experimental.layers.embedding.PartialTPUEmbedding(
152 |             feature_config=embedding_feature_config,
153 |             optimizer=optimizer,
154 |             size_threshold=size_threshold),
155 |         bottom_stack=bottom_stack(),
156 |         feature_interaction=feature_interaction_layer(),
157 |         top_stack=top_stack(),
158 |         concat_dense=concat_dense)
159 |     model.compile(optimizer=optimizer, steps_per_execution=5)
160 | 
161 |     dataset = _generate_synthetic_data(
162 |         num_dense=8,
163 |         vocab_sizes=vocabulary_sizes,
164 |         dataset_size=64,
165 |         batch_size=16,
166 |         generate_weights=use_weights)
167 | 
168 |     model.fit(
169 |         dataset.repeat(), validation_data=dataset, epochs=1, steps_per_epoch=5)
170 | 
171 |     metrics_ = model.evaluate(dataset, return_dict=True)
172 | 
173 |     self.assertIn("loss", metrics_)
174 |     self.assertIn("accuracy", metrics_)
175 | 
176 | 
177 | if __name__ == "__main__":
178 |   tf.test.main()
179 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Experimental Optimizers."""
16 | 
17 | from tensorflow_recommenders.experimental.optimizers.clippy_adagrad import ClippyAdagrad
18 | from tensorflow_recommenders.experimental.optimizers.composite_optimizer import CompositeOptimizer
19 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/optimizers/composite_optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Composite Optimizer."""
 16 | 
 17 | import collections
 18 | from typing import Callable, List, Optional, Sequence, Tuple, Union
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | Tensor = Union[tf.Tensor, tf.SparseTensor, tf.RaggedTensor]
 23 | 
 24 | 
 25 | class CompositeOptimizer(tf.keras.optimizers.legacy.Optimizer):
 26 |   """An optimizer that composes multiple individual optimizers.
 27 | 
 28 |   It allows different optimizers to be applied to different subsets of the
 29 |   model's variables. For example, it makes it possible to apply one
 30 |   optimizer to the model's embeddings (sparse variables) and another
 31 |   optimizer to the rest of its variables.
 32 | 
 33 |   To specify which optimizer should apply to each variable, pass a list of
 34 |   pairs of (optimizer instance, function returning a list of variables the
 35 |   optimizer should apply to).
 36 | 
 37 |   For example:
 38 |   ```python
 39 |     optimizer = CompositeOptimizer([
 40 |         (tf.keras.optimizers.legacy.SGD(),
 41 |             lambda: model.sparse_trainable_variables),
 42 |         (tf.keras.optimizers.legacy.Adam(),
 43 |             lambda: model.dense_trainable_variables),
 44 |     ])
 45 |   ```
 46 |   """
 47 | 
 48 |   def __init__(self,
 49 |                optimizers_and_vars: Sequence[
 50 |                    Tuple[tf.keras.optimizers.legacy.Optimizer,
 51 |                          Callable[[], Sequence[tf.Variable]]]],
 52 |                name: str = "CompositeOptimizer") -> None:
 53 |     """Initializes an CompositeOptimizer instance.
 54 | 
 55 |     Args:
 56 |       optimizers_and_vars:  List of tuples of (optimizer instance, function
 57 |         returning variables that the optimizer should apply to).
 58 |       name: The optimizer name.
 59 |     """
 60 |     super().__init__(name=name)
 61 |     if not optimizers_and_vars:
 62 |       raise ValueError("`optimizers_and_vars` can't be empty")
 63 |     self._optimizers_and_vars = optimizers_and_vars
 64 |     for i, optimizer_and_var in enumerate(optimizers_and_vars):
 65 |       optimizer = optimizer_and_var[0]
 66 |       self._track_trackable(optimizer, name=f"Optimizer{i}")
 67 | 
 68 |   def apply_gradients(self, grads_and_vars: Sequence[Tuple[Tensor, Tensor]],
 69 |                       name: Optional[str] = None,
 70 |                       experimental_aggregate_gradients: bool = True) -> None:
 71 |     """See base class."""
 72 |     var_optimizer_dict = {}
 73 | 
 74 |     for optimizer, var_callable in self._optimizers_and_vars:
 75 |       for v in var_callable():
 76 |         if v.ref() in var_optimizer_dict:
 77 |           raise ValueError(
 78 |               f"The set of variables handled by each optimizer should be "
 79 |               f"disjoint, but variable {v} is handled both "
 80 |               f"by {var_optimizer_dict[v.ref()]} and {optimizer}.")
 81 |         var_optimizer_dict[v.ref()] = optimizer
 82 | 
 83 |     optimizer_grads_and_vars = collections.defaultdict(list)
 84 |     for g, v in grads_and_vars:
 85 |       if v.ref() in var_optimizer_dict:
 86 |         optimizer = var_optimizer_dict[v.ref()]
 87 |         optimizer_grads_and_vars[optimizer].append((g, v))
 88 |       else:
 89 |         raise ValueError(f"Variable {v} is not handled by any optimizer. "
 90 |                          f"This would cause it to be not trained.")
 91 | 
 92 |     for optimizer, opt_grads_and_vars in optimizer_grads_and_vars.items():
 93 |       optimizer.apply_gradients(
 94 |           opt_grads_and_vars,
 95 |           name=name,
 96 |           experimental_aggregate_gradients=experimental_aggregate_gradients)
 97 | 
 98 |   def get_config(self):
 99 |     raise NotImplementedError("CompositeOptimizer cannot be serialized because"
100 |                               " it uses callable to get variables.")
101 | 
102 |   @property
103 |   def iterations(self):
104 |     """See base class."""
105 |     # Returning iterations from the first optimizer.
106 |     return self._optimizers_and_vars[0][0].iterations
107 | 
108 |   @iterations.setter
109 |   def iterations(self, variable):
110 |     """See base class."""
111 |     for optimizer, _ in self._optimizers_and_vars:
112 |       optimizer.iterations = variable
113 | 
114 |   def variables(self):
115 |     """Returns the optimizer's variables."""
116 |     # OptimizerV2.variables() returns self._weights, so override that method.
117 |     return self.weights
118 | 
119 |   @property
120 |   def weights(self) -> List[tf.Variable]:
121 |     """Returns the optimizer's variables."""
122 |     weights = []
123 |     for optimizer, _ in self._optimizers_and_vars:
124 |       weights += optimizer.weights
125 |     return weights
126 | 
127 |   @property
128 |   def optimizers(self) -> List[tf.keras.optimizers.legacy.Optimizer]:
129 |     """Returns the optimizers in composite optimizer (in the original order)."""
130 |     return [optimizer for optimizer, _ in self._optimizers_and_vars]
131 | 
132 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/experimental/optimizers/composite_optimizer_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Tests for CompositeOptimizer."""
 16 | import os.path
 17 | import tempfile
 18 | 
 19 | from absl.testing import parameterized
 20 | 
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | 
 24 | from tensorflow_recommenders.experimental.optimizers.composite_optimizer import CompositeOptimizer
 25 | 
 26 | 
 27 | class CompositeOptimizerTest(tf.test.TestCase, parameterized.TestCase):
 28 | 
 29 |   @parameterized.parameters(
 30 |       ("sgd", "adam"),
 31 |       ("rmsprop", "sgd"),
 32 |       ("adam", "adagrad"),
 33 |       ("adagrad", "rmsprop"))
 34 |   def test_composite_optimizer(self, optimizer1_type, optimizer2_type):
 35 |     values1 = [1.0, 2.0, 3.0]
 36 |     values2 = [0.5, 0.0, -2.0]
 37 |     values3 = [0.1, 0.0, -1.0]
 38 | 
 39 |     grad1_values = [0.1, 0.2, 1.0]
 40 |     grad2_values = [-0.1, 0.05, 2.0]
 41 |     grad3_values = [2.1, 0.0, 0.3]
 42 | 
 43 |     var1 = tf.Variable(values1)
 44 |     var2 = tf.Variable(values2)
 45 |     var3 = tf.Variable(values3)
 46 | 
 47 |     grads1 = tf.constant(grad1_values)
 48 |     grads2 = tf.constant(grad2_values)
 49 |     grads3 = tf.constant(grad3_values)
 50 | 
 51 |     optimizer_dict = {
 52 |         "sgd": tf.keras.optimizers.legacy.SGD,
 53 |         "adam": tf.keras.optimizers.legacy.Adam,
 54 |         "rmsprop": tf.keras.optimizers.legacy.RMSprop,
 55 |         "adagrad": tf.keras.optimizers.legacy.Adagrad,
 56 |     }
 57 | 
 58 |     comp_optimizer1 = optimizer_dict[optimizer1_type]()
 59 |     comp_optimizer2 = optimizer_dict[optimizer2_type]()
 60 | 
 61 |     composite_optimizer = CompositeOptimizer([
 62 |         (comp_optimizer1, lambda: [var1]),
 63 |         (comp_optimizer2, lambda: [var2, var3]),
 64 |     ])
 65 | 
 66 |     self.assertSequenceEqual(composite_optimizer.optimizers,
 67 |                              [comp_optimizer1, comp_optimizer2])
 68 | 
 69 |     optimizer1 = optimizer_dict[optimizer1_type]()
 70 |     optimizer2 = optimizer_dict[optimizer2_type]()
 71 | 
 72 |     grads_and_vars_1 = [(tf.constant(grad1_values), tf.Variable(values1))]
 73 |     grads_and_vars_2 = [(tf.constant(grad2_values), tf.Variable(values2)),
 74 |                         (tf.constant(grad3_values), tf.Variable(values3))]
 75 |     grads_and_vars = list(zip([grads1, grads2, grads3], [var1, var2, var3]))
 76 | 
 77 |     for _ in range(10):
 78 |       # Test that applying a composite optimizer has the same effect as
 79 |       # applying optimizer1 and optimizer2 separately on subset of gradients/
 80 |       # variables.
 81 |       composite_optimizer.apply_gradients(grads_and_vars)
 82 |       optimizer1.apply_gradients(grads_and_vars_1)
 83 |       optimizer2.apply_gradients(grads_and_vars_2)
 84 | 
 85 |       self.assertAllClose(grads_and_vars[:1], grads_and_vars_1)
 86 |       self.assertAllClose(grads_and_vars[1:], grads_and_vars_2)
 87 | 
 88 |   def test_incorrect_inputs(self):
 89 |     var1 = tf.Variable([0.1, 0.2, 1.0])
 90 |     var2 = tf.Variable([-5.1, 0.1, 0])
 91 |     var3 = tf.Variable([-2.1, 1.3, 0/3])
 92 | 
 93 |     grads1 = tf.constant([0.1, 0.2, 1.0])
 94 |     grads2 = tf.constant([0.5, 0.0, -2.0])
 95 |     grads3 = tf.constant([-0.2, 0.0, -1.0])
 96 | 
 97 |     # Test same variable in two optimizers.
 98 |     composite_optimizer = CompositeOptimizer([
 99 |         (tf.keras.optimizers.legacy.Adam(), lambda: [var1]),
100 |         (tf.keras.optimizers.legacy.Adagrad(), lambda: [var1, var2]),
101 |     ])
102 | 
103 |     grads_and_vars = list(zip([grads1, grads2], [var1, var2]))
104 | 
105 |     with self.assertRaises(ValueError):
106 |       composite_optimizer.apply_gradients(grads_and_vars)
107 | 
108 |     # Test missing variable (var3) in optimizers.
109 |     composite_optimizer = CompositeOptimizer([
110 |         (tf.keras.optimizers.legacy.Adam(), lambda: [var1]),
111 |         (tf.keras.optimizers.legacy.Adagrad(), lambda: [var2]),
112 |     ])
113 | 
114 |     grads_and_vars = list(zip([grads1, grads2, grads3], [var1, var2, var3]))
115 | 
116 |     with self.assertRaises(ValueError):
117 |       composite_optimizer.apply_gradients(grads_and_vars)
118 | 
119 |   def test_checkpoint_save_restore_export(self):
120 |     # Use a simple LinearModel to test checkpoint save/restore/export.
121 |     def get_model() -> tf.keras.Model:
122 |       model = tf.keras.experimental.LinearModel(units=10)
123 | 
124 |       composite_optimizer = CompositeOptimizer([
125 |           (tf.keras.optimizers.legacy.Adam(),
126 |            lambda: model.trainable_variables[:1]),
127 |           (tf.keras.optimizers.legacy.Adagrad(),
128 |            lambda: model.trainable_variables[1:]),
129 |       ])
130 |       model.compile(optimizer=composite_optimizer,
131 |                     loss=tf.keras.losses.MSE)
132 |       return model
133 | 
134 |     batch_size = 16
135 |     num_of_batches = 8
136 |     rng = np.random.RandomState(42)
137 | 
138 |     x = rng.normal(size=(num_of_batches * batch_size, 5))
139 |     y = rng.normal(size=(num_of_batches * batch_size, 1))
140 |     training_dataset = tf.data.Dataset.from_tensor_slices((x, y))
141 |     training_dataset = training_dataset.batch(batch_size)
142 | 
143 |     model = get_model()
144 |     model.fit(training_dataset, epochs=1)
145 | 
146 |     # Check that optimizer iterations match dataset size.
147 |     self.assertEqual(model.optimizer.iterations.numpy(), num_of_batches)
148 |     # Check that it has state for all the model's variables
149 |     self.assertLen(model.optimizer.variables(), 5)
150 | 
151 |     # Save checkpoint.
152 |     checkpoint = tf.train.Checkpoint(model=model)
153 |     checkpoint_path = self.get_temp_dir()
154 |     checkpoint.write(checkpoint_path)
155 | 
156 |     # Restore to a fresh instance and check.
157 |     new_model = get_model()
158 |     # Run only one epoch: if the restore fails, we can tell
159 |     # by the number of iterations being 1 rather than `num_batches`.
160 |     new_model.fit(training_dataset.take(1))
161 | 
162 |     checkpoint = tf.train.Checkpoint(model=new_model)
163 |     checkpoint.read(checkpoint_path).assert_consumed()
164 | 
165 |     # After restoring the checkpoint, optimizer iterations should also be
166 |     # restored to its original value.
167 |     self.assertEqual(new_model.optimizer.iterations.numpy(), num_of_batches)
168 |     # Same for the rest of its variables.
169 |     self.assertAllClose(
170 |         new_model.optimizer.variables(),
171 |         model.optimizer.variables()
172 |     )
173 | 
174 |     model_pred = new_model.predict(training_dataset)
175 | 
176 |     with tempfile.TemporaryDirectory() as tmp:
177 |       path = os.path.join(tmp, "model_with_composite_optimizer")
178 |       new_model.save(
179 |           path,
180 |           include_optimizer=False,
181 |           options=tf.saved_model.SaveOptions(namespace_whitelist=["Addons"]))
182 |       loaded_model = tf.keras.models.load_model(path)
183 |       loaded_pred = loaded_model.predict(training_dataset)
184 | 
185 |     self.assertEqual(
186 |         model.layers[0].get_config(), loaded_model.layers[0].get_config())
187 |     self.assertAllEqual(model_pred, loaded_pred)
188 | 
189 | 
190 | if __name__ == "__main__":
191 |   tf.test.main()
192 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Lint-as: python3
16 | """Layers."""
17 | 
18 | from tensorflow_recommenders.layers import blocks
19 | from tensorflow_recommenders.layers import embedding
20 | from tensorflow_recommenders.layers import factorized_top_k
21 | from tensorflow_recommenders.layers import feature_interaction
22 | from tensorflow_recommenders.layers import loss
23 | from tensorflow_recommenders.layers.feature_interaction import dcn
24 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/blocks.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Convenience blocks for building models."""
16 | 
17 | from typing import List, Optional
18 | 
19 | import tensorflow as tf
20 | 
21 | from tensorflow_recommenders import types
22 | 
23 | 
24 | class MLP(tf.keras.layers.Layer):
25 |   """Sequential multi-layer perceptron (MLP) block."""
26 | 
27 |   def __init__(
28 |       self,
29 |       units: List[int],
30 |       use_bias: bool = True,
31 |       activation: Optional[types.Activation] = "relu",
32 |       final_activation: Optional[types.Activation] = None,
33 |       **kwargs) -> None:
34 |     """Initializes the MLP layer.
35 | 
36 |     Args:
37 |       units: Sequential list of layer sizes.
38 |       use_bias: Whether to include a bias term.
39 |       activation: Type of activation to use on all except the last layer.
40 |       final_activation: Type of activation to use on last layer.
41 |       **kwargs: Extra args passed to the Keras Layer base class.
42 |     """
43 | 
44 |     super().__init__(**kwargs)
45 | 
46 |     self._sublayers = []
47 | 
48 |     for num_units in units[:-1]:
49 |       self._sublayers.append(
50 |           tf.keras.layers.Dense(
51 |               num_units, activation=activation, use_bias=use_bias))
52 |     self._sublayers.append(
53 |         tf.keras.layers.Dense(
54 |             units[-1], activation=final_activation, use_bias=use_bias))
55 | 
56 |   def call(self, x: tf.Tensor) -> tf.Tensor:
57 |     """Performs the forward computation of the block."""
58 |     for layer in self._sublayers:
59 |       x = layer(x)
60 | 
61 |     return x
62 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/embedding/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Embedding layers."""
16 | 
17 | from tensorflow_recommenders.layers.embedding.tpu_embedding_layer import TPUEmbedding
18 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/factorized_top_k_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tests for factorized top K layers."""
 17 | 
 18 | import itertools
 19 | import os
 20 | 
 21 | from typing import Any, Dict, Iterator
 22 | 
 23 | from absl.testing import parameterized
 24 | 
 25 | import numpy as np
 26 | import tensorflow as tf
 27 | 
 28 | from tensorflow_recommenders.layers import factorized_top_k
 29 | 
 30 | 
 31 | def test_cases(
 32 |     k=(5, 10),
 33 |     batch_size=(3, 16),
 34 |     num_queries=(3, 15, 16),
 35 |     num_candidates=(1024, 128),
 36 |     indices_dtype=(str, None),
 37 |     use_exclusions=(True, False)) -> Iterator[Dict[str, Any]]:
 38 |   """Generates test cases.
 39 | 
 40 |   Generates all possible combinations of input arguments as test cases.
 41 | 
 42 |   Args:
 43 |     k: The number of candidates to retrieve.
 44 |     batch_size: The query batch size.
 45 |     num_queries: Number of queries.
 46 |     num_candidates: Number of candidates.
 47 |     indices_dtype: The type of indices.
 48 |     use_exclusions: Whether to test exclusions.
 49 | 
 50 |   Yields:
 51 |     Keyword argument dicts.
 52 |   """
 53 | 
 54 |   keys = ("k", "batch_size", "num_queries", "num_candidates", "indices_dtype",
 55 |           "use_exclusions")
 56 | 
 57 |   for values in itertools.product(k, batch_size, num_queries, num_candidates,
 58 |                                   indices_dtype, use_exclusions):
 59 |     yield dict(zip(keys, values))
 60 | 
 61 | 
 62 | class FactorizedTopKTestBase(tf.test.TestCase, parameterized.TestCase):
 63 | 
 64 |   def run_save_and_restore_test(self, layer, query, num):
 65 |     for _ in range(num):
 66 |       pre_serialization_results = layer(query)
 67 | 
 68 |     path = os.path.join(self.get_temp_dir(), "layer")
 69 |     layer.save(
 70 |         path, options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"]))
 71 |     restored = tf.keras.models.load_model(path)
 72 | 
 73 |     for _ in range(num):
 74 |       post_serialization_results = restored(tf.constant(query))
 75 | 
 76 |     self.assertAllEqual(post_serialization_results, pre_serialization_results)
 77 | 
 78 |   def run_top_k_test(self,
 79 |                      layer_class,
 80 |                      k,
 81 |                      batch_size,
 82 |                      num_queries,
 83 |                      num_candidates,
 84 |                      indices_dtype,
 85 |                      use_exclusions,
 86 |                      random_seed=42,
 87 |                      check_export=True):
 88 | 
 89 |     layer = layer_class(k=k)
 90 | 
 91 |     rng = np.random.RandomState(random_seed)
 92 |     candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32)
 93 |     query = rng.normal(size=(num_queries, 4)).astype(np.float32)
 94 | 
 95 |     candidate_indices = np.arange(num_candidates).astype(
 96 |         indices_dtype if indices_dtype is not None else np.int32)
 97 | 
 98 |     exclude = rng.randint(0, num_candidates, size=(num_queries, 5))
 99 | 
100 |     scores = np.dot(query, candidates.T)
101 | 
102 |     # Set scores of candidates chosen for exclusion to a low value.
103 |     adjusted_scores = scores.copy()
104 |     if use_exclusions:
105 |       exclude_identifiers = candidate_indices[exclude]
106 |       for row_idx, row in enumerate(exclude):
107 |         for col_idx in set(row):
108 |           adjusted_scores[row_idx, col_idx] -= 1000.0
109 |     else:
110 |       exclude_identifiers = None
111 | 
112 |     # Get indices based on adjusted scores, but retain actual scores.
113 |     indices = np.argsort(-adjusted_scores, axis=1)[:, :k]
114 |     expected_top_scores = np.take_along_axis(scores, indices, 1)
115 |     expected_top_indices = candidate_indices[indices]
116 | 
117 |     candidates = tf.data.Dataset.from_tensor_slices(candidates).batch(
118 |         batch_size)
119 | 
120 |     if indices_dtype is not None:
121 |       identifiers = tf.data.Dataset.from_tensor_slices(candidate_indices).batch(
122 |           batch_size)
123 |       candidates = tf.data.Dataset.zip((identifiers, candidates))
124 | 
125 |     # Call twice to ensure the results are repeatable.
126 |     for _ in range(2):
127 |       if use_exclusions:
128 |         layer.index_from_dataset(candidates)
129 |         top_scores, top_indices = layer.query_with_exclusions(
130 |             query, exclude_identifiers)
131 |       else:
132 |         layer.index_from_dataset(candidates)
133 |         top_scores, top_indices = layer(query)
134 | 
135 |     self.assertAllEqual(top_scores.shape, expected_top_scores.shape)
136 |     self.assertAllEqual(top_indices.shape, expected_top_indices.shape)
137 |     self.assertAllClose(top_scores, expected_top_scores, atol=1e-4)
138 | 
139 |     self.assertAllEqual(top_indices.numpy().astype(indices_dtype),
140 |                         expected_top_indices)
141 | 
142 |     if not check_export:
143 |       return
144 | 
145 |     # Save and restore to check export.
146 |     path = os.path.join(self.get_temp_dir(), "layer")
147 |     layer.save(
148 |         path, options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"]))
149 |     restored = tf.keras.models.load_model(path)
150 | 
151 |     if use_exclusions:
152 |       _, restored_top_indices = restored.query_with_exclusions(
153 |           query, exclude_identifiers)
154 |     else:
155 |       _, restored_top_indices = restored(query)
156 | 
157 |     self.assertAllEqual(restored_top_indices.numpy().astype(indices_dtype),
158 |                         expected_top_indices)
159 | 
160 | 
161 | class StreamingTest(FactorizedTopKTestBase):
162 | 
163 |   @parameterized.parameters(test_cases())
164 |   def test_streaming(self, *args, **kwargs):
165 |     self.run_top_k_test(
166 |         factorized_top_k.Streaming, *args, check_export=False, **kwargs)
167 | 
168 | 
169 | class BruteForceTest(FactorizedTopKTestBase):
170 | 
171 |   @parameterized.parameters(test_cases())
172 |   def test_brute_force(self, *args, **kwargs):
173 |     self.run_top_k_test(factorized_top_k.BruteForce, *args, **kwargs)
174 | 
175 | 
176 | class ScannTest(FactorizedTopKTestBase):
177 | 
178 |   @parameterized.parameters(str, np.float32, np.float64, np.int32, np.int64)
179 |   def test_scann(self, identifier_dtype):
180 | 
181 |     num_candidates, num_queries = (1000, 4)
182 | 
183 |     rng = np.random.RandomState(42)
184 |     candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32)
185 |     query = rng.normal(size=(num_queries, 4)).astype(np.float32)
186 |     candidate_names = np.arange(num_candidates).astype(identifier_dtype)
187 | 
188 |     scann = factorized_top_k.ScaNN()
189 |     scann.index(candidates, candidate_names)
190 | 
191 |     self.run_save_and_restore_test(scann, query, 100)
192 | 
193 |   def test_scann_dataset_arg_no_identifiers(self):
194 | 
195 |     num_candidates, num_queries = (100, 4)
196 | 
197 |     rng = np.random.RandomState(42)
198 |     candidates = tf.data.Dataset.from_tensor_slices(
199 |         rng.normal(size=(num_candidates, 4)).astype(np.float32))
200 |     query = rng.normal(size=(num_queries, 4)).astype(np.float32)
201 | 
202 |     scann = factorized_top_k.ScaNN()
203 |     scann.index_from_dataset(candidates.batch(100))
204 | 
205 |     self.run_save_and_restore_test(scann, query, 100)
206 | 
207 |   def test_scann_dataset_arg_with_identifiers(self):
208 | 
209 |     num_candidates, num_queries = (100, 4)
210 | 
211 |     rng = np.random.RandomState(42)
212 |     candidates = tf.data.Dataset.from_tensor_slices(
213 |         rng.normal(size=(num_candidates, 4)).astype(np.float32))
214 |     query = rng.normal(size=(num_queries, 4)).astype(np.float32)
215 |     identifiers = tf.data.Dataset.from_tensor_slices(np.arange(num_candidates))
216 | 
217 |     index = factorized_top_k.ScaNN()
218 |     index.index_from_dataset(identifiers.zip(candidates).batch(100))
219 | 
220 |     self.run_save_and_restore_test(index, query, 100)
221 | 
222 |   @parameterized.parameters(factorized_top_k.ScaNN, factorized_top_k.BruteForce)
223 |   def test_raise_on_incorrect_input_shape(
224 |       self, layer_class: factorized_top_k.TopK):
225 | 
226 |     num_candidates = 100
227 |     candidates = tf.data.Dataset.from_tensor_slices(
228 |         np.random.normal(size=(num_candidates, 4)).astype(np.float32))
229 |     identifiers = tf.data.Dataset.from_tensor_slices(
230 |         np.arange(num_candidates - 1))
231 | 
232 |     with self.assertRaises(ValueError):
233 |       index = layer_class()
234 |       index.index_from_dataset(
235 |           tf.data.Dataset.zip((identifiers.batch(20), candidates.batch(100)))
236 |       )
237 | 
238 |   @parameterized.parameters(test_cases())
239 |   def test_scann_top_k(self, k, batch_size, num_queries, num_candidates,
240 |                        indices_dtype, use_exclusions):
241 | 
242 |     def scann(k):
243 |       """Returns brute-force-like ScaNN for testing."""
244 |       return factorized_top_k.ScaNN(
245 |           k=k,
246 |           num_leaves=1,
247 |           num_leaves_to_search=1,
248 |           num_reordering_candidates=num_candidates)
249 | 
250 |     self.run_top_k_test(scann, k, batch_size, num_queries, num_candidates,
251 |                         indices_dtype, use_exclusions)
252 | 
253 | 
254 | if __name__ == "__main__":
255 |   tf.test.main()
256 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Feature Interaction layers."""
16 | 
17 | from tensorflow_recommenders.layers.feature_interaction.dcn import Cross
18 | from tensorflow_recommenders.layers.feature_interaction.dot_interaction import DotInteraction
19 | from tensorflow_recommenders.layers.feature_interaction.multi_layer_dcn import MultiLayerDCN
20 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/dcn.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Implements `Cross` Layer, the cross layer in Deep & Cross Network (DCN)."""
 16 | 
 17 | from typing import Union, Text, Optional
 18 | 
 19 | import tensorflow as tf
 20 | 
 21 | 
 22 | @tf.keras.utils.register_keras_serializable()
 23 | class Cross(tf.keras.layers.Layer):
 24 |   """Cross Layer in Deep & Cross Network to learn explicit feature interactions.
 25 | 
 26 |     A layer that creates explicit and bounded-degree feature interactions
 27 |     efficiently. The `call` method accepts `inputs` as a tuple of size 2
 28 |     tensors. The first input `x0` is the base layer that contains the original
 29 |     features (usually the embedding layer); the second input `xi` is the output
 30 |     of the previous `Cross` layer in the stack, i.e., the i-th `Cross`
 31 |     layer. For the first `Cross` layer in the stack, x0 = xi.
 32 | 
 33 |     The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi,
 34 |     where .* designates elementwise multiplication, W could be a full-rank
 35 |     matrix, or a low-rank matrix U*V to reduce the computational cost, and
 36 |     diag_scale increases the diagonal of W to improve training stability (
 37 |     especially for the low-rank case).
 38 | 
 39 |     References:
 40 |         1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf)
 41 |           See Eq. (1) for full-rank and Eq. (2) for low-rank version.
 42 |         2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf)
 43 | 
 44 |     Example:
 45 | 
 46 |         ```python
 47 |         # after embedding layer in a functional model:
 48 |         input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64)
 49 |         x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6)
 50 |         x1 = Cross()(x0, x0)
 51 |         x2 = Cross()(x0, x1)
 52 |         logits = tf.keras.layers.Dense(units=10)(x2)
 53 |         model = tf.keras.Model(input, logits)
 54 |         ```
 55 | 
 56 |     Args:
 57 |         projection_dim: project dimension to reduce the computational cost.
 58 |           Default is `None` such that a full (`input_dim` by `input_dim`) matrix
 59 |           W is used. If enabled, a low-rank matrix W = U*V will be used, where U
 60 |           is of size `input_dim` by `projection_dim` and V is of size
 61 |           `projection_dim` by `input_dim`. `projection_dim` need to be smaller
 62 |           than `input_dim`/2 to improve the model efficiency. In practice, we've
 63 |           observed that `projection_dim` = d/4 consistently preserved the
 64 |           accuracy of a full-rank version.
 65 |         diag_scale: a non-negative float used to increase the diagonal of the
 66 |           kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an
 67 |           identity matrix.
 68 |         use_bias: whether to add a bias term for this layer. If set to False,
 69 |           no bias term will be used.
 70 |         preactivation: Activation applied to output matrix of the layer, before
 71 |           multiplication with the input. Can be used to control the scale of the
 72 |           layer's outputs and improve stability.
 73 |         kernel_initializer: Initializer to use on the kernel matrix.
 74 |         bias_initializer: Initializer to use on the bias vector.
 75 |         kernel_regularizer: Regularizer to use on the kernel matrix.
 76 |         bias_regularizer: Regularizer to use on bias vector.
 77 | 
 78 |     Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs.
 79 |     Output shape: A single (batch_size, `input_dim`) dimensional output.
 80 |   """
 81 | 
 82 |   def __init__(
 83 |       self,
 84 |       projection_dim: Optional[int] = None,
 85 |       diag_scale: Optional[float] = 0.0,
 86 |       use_bias: bool = True,
 87 |       preactivation: Optional[Union[str, tf.keras.layers.Activation]] = None,
 88 |       kernel_initializer: Union[
 89 |           Text, tf.keras.initializers.Initializer] = "truncated_normal",
 90 |       bias_initializer: Union[Text,
 91 |                               tf.keras.initializers.Initializer] = "zeros",
 92 |       kernel_regularizer: Union[Text, None,
 93 |                                 tf.keras.regularizers.Regularizer] = None,
 94 |       bias_regularizer: Union[Text, None,
 95 |                               tf.keras.regularizers.Regularizer] = None,
 96 |       **kwargs):
 97 | 
 98 |     super(Cross, self).__init__(**kwargs)
 99 | 
100 |     self._projection_dim = projection_dim
101 |     self._diag_scale = diag_scale
102 |     self._use_bias = use_bias
103 |     self._preactivation = tf.keras.activations.get(preactivation)
104 |     self._kernel_initializer = tf.keras.initializers.get(kernel_initializer)
105 |     self._bias_initializer = tf.keras.initializers.get(bias_initializer)
106 |     self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
107 |     self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
108 |     self._input_dim = None
109 | 
110 |     self._supports_masking = True
111 | 
112 |     if self._diag_scale < 0:  # pytype: disable=unsupported-operands
113 |       raise ValueError(
114 |           "`diag_scale` should be non-negative. Got `diag_scale` = {}".format(
115 |               self._diag_scale))
116 | 
117 |   def build(self, input_shape):
118 |     last_dim = input_shape[-1]
119 | 
120 |     if self._projection_dim is None:
121 |       self._dense = tf.keras.layers.Dense(
122 |           last_dim,
123 |           kernel_initializer=_clone_initializer(self._kernel_initializer),
124 |           bias_initializer=self._bias_initializer,
125 |           kernel_regularizer=self._kernel_regularizer,
126 |           bias_regularizer=self._bias_regularizer,
127 |           use_bias=self._use_bias,
128 |           dtype=self.dtype,
129 |           activation=self._preactivation,
130 |       )
131 |     else:
132 |       self._dense_u = tf.keras.layers.Dense(
133 |           self._projection_dim,
134 |           kernel_initializer=_clone_initializer(self._kernel_initializer),
135 |           kernel_regularizer=self._kernel_regularizer,
136 |           use_bias=False,
137 |           dtype=self.dtype,
138 |       )
139 |       self._dense_v = tf.keras.layers.Dense(
140 |           last_dim,
141 |           kernel_initializer=_clone_initializer(self._kernel_initializer),
142 |           bias_initializer=self._bias_initializer,
143 |           kernel_regularizer=self._kernel_regularizer,
144 |           bias_regularizer=self._bias_regularizer,
145 |           use_bias=self._use_bias,
146 |           dtype=self.dtype,
147 |           activation=self._preactivation,
148 |       )
149 |     self.built = True
150 | 
151 |   def call(self, x0: tf.Tensor, x: Optional[tf.Tensor] = None) -> tf.Tensor:
152 |     """Computes the feature cross.
153 | 
154 |     Args:
155 |       x0: The input tensor
156 |       x: Optional second input tensor. If provided, the layer will compute
157 |         crosses between x0 and x; if not provided, the layer will compute
158 |         crosses between x0 and itself.
159 | 
160 |     Returns:
161 |      Tensor of crosses.
162 |     """
163 | 
164 |     if not self.built:
165 |       self.build(x0.shape)
166 | 
167 |     if x is None:
168 |       x = x0
169 | 
170 |     if x0.shape[-1] != x.shape[-1]:
171 |       raise ValueError(
172 |           "`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x "
173 |           "dimension {}. This case is not supported yet.".format(
174 |               x0.shape[-1], x.shape[-1]))
175 | 
176 |     if self._projection_dim is None:
177 |       prod_output = self._dense(x)
178 |     else:
179 |       prod_output = self._dense_v(self._dense_u(x))
180 | 
181 |     prod_output = tf.cast(prod_output, self.compute_dtype)
182 | 
183 |     if self._diag_scale:
184 |       prod_output = prod_output + self._diag_scale * x
185 | 
186 |     return x0 * prod_output + x
187 | 
188 |   def get_config(self):
189 |     config = {
190 |         "projection_dim":
191 |             self._projection_dim,
192 |         "diag_scale":
193 |             self._diag_scale,
194 |         "use_bias":
195 |             self._use_bias,
196 |         "preactivation":
197 |             tf.keras.activations.serialize(self._preactivation),
198 |         "kernel_initializer":
199 |             tf.keras.initializers.serialize(self._kernel_initializer),
200 |         "bias_initializer":
201 |             tf.keras.initializers.serialize(self._bias_initializer),
202 |         "kernel_regularizer":
203 |             tf.keras.regularizers.serialize(self._kernel_regularizer),
204 |         "bias_regularizer":
205 |             tf.keras.regularizers.serialize(self._bias_regularizer),
206 |     }
207 |     base_config = super().get_config()
208 |     return dict(list(base_config.items()) + list(config.items()))
209 | 
210 | 
211 | def _clone_initializer(initializer):
212 |   return initializer.__class__.from_config(initializer.get_config())
213 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/dcn_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Tests for Cross layer."""
 16 | 
 17 | import os
 18 | import tempfile
 19 | 
 20 | import numpy as np
 21 | import tensorflow as tf
 22 | 
 23 | from tensorflow_recommenders.layers.feature_interaction.dcn import Cross
 24 | 
 25 | 
 26 | class CrossTest(tf.test.TestCase):
 27 |   # Do not use layer_test due to multiple inputs.
 28 | 
 29 |   def test_full_matrix(self):
 30 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 31 |     x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32)
 32 |     layer = Cross(projection_dim=None, kernel_initializer="ones")
 33 |     output = layer(x0, x)
 34 |     self.evaluate(tf.compat.v1.global_variables_initializer())
 35 |     self.assertAllClose(np.asarray([[0.55, 0.8, 1.05]]), output)
 36 | 
 37 |   def test_low_rank_matrix(self):
 38 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 39 |     x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32)
 40 |     layer = Cross(projection_dim=1, kernel_initializer="ones")
 41 |     output = layer(x0, x)
 42 |     self.evaluate(tf.compat.v1.global_variables_initializer())
 43 |     self.assertAllClose(np.asarray([[0.55, 0.8, 1.05]]), output)
 44 | 
 45 |   def test_one_input(self):
 46 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 47 |     layer = Cross(projection_dim=None, kernel_initializer="ones")
 48 |     output = layer(x0)
 49 |     self.evaluate(tf.compat.v1.global_variables_initializer())
 50 |     self.assertAllClose(np.asarray([[0.16, 0.32, 0.48]]), output)
 51 | 
 52 |   def test_unsupported_input_dim(self):
 53 |     with self.assertRaisesRegex(ValueError,
 54 |                                  r"dimension mismatch"):
 55 |       x0 = np.random.random((12, 5))
 56 |       x = np.random.random((12, 7))
 57 |       layer = Cross()
 58 |       layer(x0, x)
 59 | 
 60 |   def test_invalid_diag_scale(self):
 61 |     with self.assertRaisesRegex(ValueError,
 62 |                                  r"`diag_scale` should be non-negative"):
 63 |       x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 64 |       x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32)
 65 |       layer = Cross(diag_scale=-1.)
 66 |       layer(x0, x)
 67 | 
 68 |   def test_bias(self):
 69 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 70 |     x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32)
 71 |     layer = Cross(projection_dim=None, kernel_initializer="ones",
 72 |                   bias_initializer="ones")
 73 |     output = layer(x0, x)
 74 |     self.evaluate(tf.compat.v1.global_variables_initializer())
 75 |     self.assertAllClose(np.asarray([[0.65, 1., 1.35]]), output)
 76 | 
 77 |   def test_serialization(self):
 78 |     layer = Cross(projection_dim=None, preactivation="swish")
 79 |     serialized_layer = tf.keras.layers.serialize(layer)
 80 |     new_layer = tf.keras.layers.deserialize(serialized_layer)
 81 |     self.assertEqual(layer.get_config(), new_layer.get_config())
 82 | 
 83 |   def test_diag_scale(self):
 84 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 85 |     x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32)
 86 |     layer = Cross(
 87 |         projection_dim=None, diag_scale=1., kernel_initializer="ones")
 88 |     output = layer(x0, x)
 89 |     self.evaluate(tf.compat.v1.global_variables_initializer())
 90 |     self.assertAllClose(np.asarray([[0.59, 0.9, 1.23]]), output)
 91 | 
 92 |   def test_preactivation(self):
 93 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
 94 |     x = np.asarray([[0.4, 0.5, 0.6]]).astype(np.float32)
 95 |     layer = Cross(
 96 |         projection_dim=None,
 97 |         preactivation=tf.zeros_like
 98 |     )
 99 |     output = layer(x0, x)
100 |     self.evaluate(tf.compat.v1.global_variables_initializer())
101 |     self.assertAllClose(x, output)
102 | 
103 |   def test_save_model(self):
104 | 
105 |     def get_model():
106 |       x0 = tf.keras.layers.Input(shape=(13,))
107 |       x1 = Cross(projection_dim=None)(x0, x0)
108 |       x2 = Cross(projection_dim=None)(x0, x1)
109 |       logits = tf.keras.layers.Dense(units=1)(x2)
110 |       model = tf.keras.Model(x0, logits)
111 |       return model
112 | 
113 |     model = get_model()
114 |     random_input = np.random.uniform(size=(10, 13))
115 |     model_pred = model.predict(random_input)
116 | 
117 |     with tempfile.TemporaryDirectory() as tmp:
118 |       path = os.path.join(tmp, "dcn_model")
119 |       model.save(
120 |           path,
121 |           options=tf.saved_model.SaveOptions(namespace_whitelist=["Addons"]))
122 |       loaded_model = tf.keras.models.load_model(path)
123 |       loaded_pred = loaded_model.predict(random_input)
124 |     for i in range(3):
125 |       assert model.layers[i].get_config() == loaded_model.layers[i].get_config()
126 |     self.assertAllEqual(model_pred, loaded_pred)
127 | 
128 | 
129 | if __name__ == "__main__":
130 |   tf.test.main()
131 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/dot_interaction.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Implements `Dot Interaction` Layer of DLRM model."""
 16 | 
 17 | from typing import List, Optional
 18 | 
 19 | import tensorflow as tf
 20 | 
 21 | 
 22 | class DotInteraction(tf.keras.layers.Layer):
 23 |   """Dot interaction layer.
 24 | 
 25 |   See theory in the DLRM paper: https://arxiv.org/pdf/1906.00091.pdf,
 26 |   section 2.1.3. Sparse activations and dense activations are combined.
 27 |   Dot interaction is applied to a batch of input Tensors [e1,...,e_k] of the
 28 |   same dimension and the output is a batch of Tensors with all distinct pairwise
 29 |   dot products of the form dot(e_i, e_j) for i <= j if self self_interaction is
 30 |   True, otherwise dot(e_i, e_j) i < j.
 31 | 
 32 |   Attributes:
 33 |     self_interaction: Boolean indicating if features should self-interact.
 34 |       If it is True, then the diagonal enteries of the interaction matric are
 35 |       also taken.
 36 |     skip_gather: An optimization flag. If it's set then the upper triangle part
 37 |       of the dot interaction matrix dot(e_i, e_j) is set to 0. The resulting
 38 |       activations will be of dimension [num_features * num_features] from which
 39 |       half will be zeros. Otherwise activations will be only lower triangle part
 40 |       of the interaction matrix. The later saves space but is much slower.
 41 |     name: String name of the layer.
 42 |   """
 43 | 
 44 |   def __init__(self,
 45 |                self_interaction: bool = False,
 46 |                skip_gather: bool = False,
 47 |                name: Optional[str] = None,
 48 |                **kwargs) -> None:
 49 |     self._self_interaction = self_interaction
 50 |     self._skip_gather = skip_gather
 51 |     super().__init__(name=name, **kwargs)
 52 | 
 53 |   def call(self, inputs: List[tf.Tensor]) -> tf.Tensor:
 54 |     """Performs the interaction operation on the tensors in the list.
 55 | 
 56 |     The tensors represent as transformed dense features and embedded categorical
 57 |     features.
 58 |     Pre-condition: The tensors should all have the same shape.
 59 | 
 60 |     Args:
 61 |       inputs: List of features with shapes [batch_size, feature_dim].
 62 | 
 63 |     Returns:
 64 |       activations: Tensor representing interacted features. It has a dimension
 65 |       `num_features * num_features` if skip_gather is True, otherside
 66 |       `num_features * (num_features + 1) / 2` if self_interaction is True and
 67 |       `num_features * (num_features - 1) / 2` if self_interaction is False.
 68 |     """
 69 |     num_features = len(inputs)
 70 |     batch_size = tf.shape(inputs[0])[0]
 71 |     feature_dim = tf.shape(inputs[0])[1]
 72 |     # concat_features shape: batch_size, num_features, feature_dim
 73 |     try:
 74 |       concat_features = tf.concat(inputs, axis=-1)
 75 |       concat_features = tf.reshape(concat_features,
 76 |                                    [batch_size, -1, feature_dim])
 77 |     except (ValueError, tf.errors.InvalidArgumentError) as e:
 78 |       raise ValueError(f"Input tensors` dimensions must be equal, original"
 79 |                        f"error message: {e}")
 80 | 
 81 |     # Interact features, select lower-triangular portion, and re-shape.
 82 |     xactions = tf.matmul(concat_features, concat_features, transpose_b=True)
 83 |     ones = tf.ones_like(xactions)
 84 |     if self._self_interaction:
 85 |       # Selecting lower-triangular portion including the diagonal.
 86 |       lower_tri_mask = tf.linalg.band_part(ones, -1, 0)
 87 |       upper_tri_mask = ones - lower_tri_mask
 88 |       out_dim = num_features * (num_features + 1) // 2
 89 |     else:
 90 |       # Selecting lower-triangular portion not included the diagonal.
 91 |       upper_tri_mask = tf.linalg.band_part(ones, 0, -1)
 92 |       lower_tri_mask = ones - upper_tri_mask
 93 |       out_dim = num_features * (num_features - 1) // 2
 94 | 
 95 |     if self._skip_gather:
 96 |       # Setting upper tiangle part of the interaction matrix to zeros.
 97 |       activations = tf.where(condition=tf.cast(upper_tri_mask, tf.bool),
 98 |                              x=tf.zeros_like(xactions),
 99 |                              y=xactions)
100 |       out_dim = num_features * num_features
101 |     else:
102 |       activations = tf.boolean_mask(xactions, lower_tri_mask)
103 |     activations = tf.reshape(activations, (batch_size, out_dim))
104 |     return activations
105 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/dot_interaction_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Tests for DotInteraction layer."""
16 | 
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | from tensorflow_recommenders.layers.feature_interaction.dot_interaction import DotInteraction
21 | 
22 | 
23 | class DotInteractionTest(tf.test.TestCase):
24 | 
25 |   def test_valid_input(self):
26 |     feature1 = np.asarray([[0.1, -4.3, 0.2, 1.1, 0.3]]).astype(np.float32)
27 |     feature2 = np.asarray([[2.0, 3.2, -1.0, 0.0, 1.0]]).astype(np.float32)
28 |     feature3 = np.asarray([[0.0, 1.0, -3.0, -2.2, -0.2]]).astype(np.float32)
29 |     layer = DotInteraction(self_interaction=True,
30 |                            skip_gather=False)
31 | 
32 |     f11 = np.dot(feature1[0], feature1[0])
33 |     f12 = np.dot(feature1[0], feature2[0])
34 |     f13 = np.dot(feature1[0], feature3[0])
35 |     f22 = np.dot(feature2[0], feature2[0])
36 |     f23 = np.dot(feature2[0], feature3[0])
37 |     f33 = np.dot(feature3[0], feature3[0])
38 | 
39 |     output = layer([feature1, feature2, feature3])
40 |     self.assertAllClose(np.asarray([[f11,
41 |                                      f12, f22,
42 |                                      f13, f23, f33]]), output)
43 | 
44 |     layer = DotInteraction(self_interaction=True,
45 |                            skip_gather=True)
46 |     output = layer([feature1, feature2, feature3])
47 | 
48 |     self.assertAllClose(np.asarray([[f11, 0, 0,
49 |                                      f12, f22, 0,
50 |                                      f13, f23, f33]]), output)
51 | 
52 |     layer = DotInteraction(self_interaction=False,
53 |                            skip_gather=False)
54 |     output = layer([feature1, feature2, feature3])
55 |     self.assertAllClose(np.asarray([[f12,
56 |                                      f13, f23]]), output)
57 | 
58 |     layer = DotInteraction(self_interaction=False,
59 |                            skip_gather=True)
60 |     output = layer([feature1, feature2, feature3])
61 | 
62 |     self.assertAllClose(np.asarray([[0, 0, 0,
63 |                                      f12, 0, 0,
64 |                                      f13, f23, 0]]), output)
65 | 
66 |   def test_non_matching_dimensions(self):
67 |     with self.assertRaisesRegex(ValueError, r"dimensions must be equal"):
68 |       feature1 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
69 |       feature2 = np.asarray([[2.0, -1.0, 1.0]]).astype(np.float32)
70 |       feature3 = np.asarray([[0.0, 1.0]]).astype(np.float32)
71 |       layer = DotInteraction()
72 |       layer([feature1, feature2, feature3])
73 | 
74 | 
75 | if __name__ == "__main__":
76 |   tf.test.main()
77 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/multi_layer_dcn.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Copyright 2023 The TensorFlow Recommenders Authors.
 16 | #
 17 | # Licensed under the Apache License, Version 2.0 (the "License");
 18 | # you may not use this file except in compliance with the License.
 19 | # You may obtain a copy of the License at
 20 | #
 21 | #     http://www.apache.org/licenses/LICENSE-2.0
 22 | #
 23 | # Unless required by applicable law or agreed to in writing, software
 24 | # distributed under the License is distributed on an "AS IS" BASIS,
 25 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 26 | # See the License for the specific language governing permissions and
 27 | # limitations under the License.
 28 | 
 29 | """Implements `Cross` Layer, the cross layer in Deep & Cross Network (DCN)."""
 30 | 
 31 | from typing import Union, Text, Optional
 32 | 
 33 | import tensorflow as tf
 34 | 
 35 | 
 36 | @tf.keras.utils.register_keras_serializable()
 37 | class MultiLayerDCN(tf.keras.layers.Layer):
 38 |   """Cross Layer in Deep & Cross Network to learn explicit feature interactions.
 39 | 
 40 |   A layer that creates explicit and bounded-degree feature interactions
 41 |   efficiently. The `call` method accepts `inputs` as a tuple of size 2
 42 |   tensors. The first input `x0` is the base layer that contains the original
 43 |   features (usually the embedding layer); the second input `xi` is the output
 44 |   of the previous `Cross` layer in the stack, i.e., the i-th `Cross`
 45 |   layer. For the first `Cross` layer in the stack, x0 = xi.
 46 |   The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi,
 47 |   where .* designates elementwise multiplication, W could be a full-rank
 48 |   matrix, or a low-rank matrix U*V to reduce the computational cost, and
 49 |   diag_scale increases the diagonal of W to improve training stability (
 50 |   especially for the low-rank case).
 51 |   References:
 52 |       1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf)
 53 |         See Eq. (1) for full-rank and Eq. (2) for low-rank version.
 54 |       2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf)
 55 |   Example:
 56 |       ```python
 57 |       # after embedding layer in a functional model:
 58 |       input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64)
 59 |       x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6)
 60 |       x1 = MultiLayerDCN()(x0)
 61 |       x2 = MultiLayerDCN()(x0)
 62 |       logits = tf.keras.layers.Dense(units=10)(x2)
 63 |       model = tf.keras.Model(input, logits)
 64 |       ```
 65 |   Attributes:
 66 |       projection_dim: project dimension to reduce the computational cost. a
 67 |         low-rank matrix W = U*V will be used, where U is of size `input_dim` by
 68 |         `projection_dim` and V is of size `projection_dim` by `input_dim`.
 69 |         `projection_dim` need to be smaller than `input_dim`/2 to improve the
 70 |         model efficiency. In practice, we've observed that `projection_dim` =
 71 |         input_dim/4 consistently preserved the accuracy of a full-rank version.
 72 |       num_layers: the number of stacked DCN layers
 73 |       use_bias: whether to add a bias term for this layer. If set to False, no
 74 |         bias term will be used.
 75 |       kernel_initializer: Initializer to use on the kernel matrix.
 76 |       bias_initializer: Initializer to use on the bias vector.
 77 |       kernel_regularizer: Regularizer to use on the kernel matrix.
 78 |       bias_regularizer: Regularizer to use on bias vector.
 79 | 
 80 |   Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs.
 81 |   Output shape: A single (batch_size, `input_dim`) dimensional output.
 82 |   """
 83 | 
 84 |   def __init__(
 85 |       self,
 86 |       projection_dim: Optional[int] = 1,
 87 |       num_layers: Optional[int] = 3,
 88 |       use_bias: bool = True,
 89 |       kernel_initializer: Union[
 90 |           Text, tf.keras.initializers.Initializer] = "truncated_normal",
 91 |       bias_initializer: Union[Text,
 92 |                               tf.keras.initializers.Initializer] = "zeros",
 93 |       kernel_regularizer: Union[Text, None,
 94 |                                 tf.keras.regularizers.Regularizer] = None,
 95 |       bias_regularizer: Union[Text, None,
 96 |                               tf.keras.regularizers.Regularizer] = None,
 97 |       **kwargs):
 98 | 
 99 |     super(MultiLayerDCN, self).__init__(**kwargs)
100 | 
101 |     self._projection_dim = projection_dim
102 |     self._num_layers = num_layers
103 |     self._use_bias = use_bias
104 |     self._kernel_initializer = tf.keras.initializers.get(kernel_initializer)
105 |     self._bias_initializer = tf.keras.initializers.get(bias_initializer)
106 |     self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
107 |     self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
108 |     self._input_dim = None
109 | 
110 |     self._supports_masking = True
111 | 
112 |   def build(self, input_shape):
113 |     last_dim = input_shape[-1]
114 |     self._dense_u_kernels, self._dense_v_kernels = [], []
115 | 
116 |     for _ in range(self._num_layers):
117 |       self._dense_u_kernels.append(tf.keras.layers.Dense(
118 |           self._projection_dim,
119 |           kernel_initializer=_clone_initializer(self._kernel_initializer),
120 |           kernel_regularizer=self._kernel_regularizer,
121 |           use_bias=False,
122 |           dtype=self.dtype,
123 |       ))
124 |       self._dense_v_kernels.append(tf.keras.layers.Dense(
125 |           last_dim,
126 |           kernel_initializer=_clone_initializer(self._kernel_initializer),
127 |           bias_initializer=self._bias_initializer,
128 |           kernel_regularizer=self._kernel_regularizer,
129 |           bias_regularizer=self._bias_regularizer,
130 |           use_bias=self._use_bias,
131 |           dtype=self.dtype,
132 |       ))
133 | 
134 |     self.built = True
135 | 
136 |   def call(self, x0: tf.Tensor) -> tf.Tensor:
137 |     """Computes the multi layer DCN feature cross.
138 | 
139 |     Args:
140 |       x0: The input tensor
141 |     Returns:
142 |      Tensor of crosses.
143 |     """
144 |     if not self.built:
145 |       self.build(x0.shape)
146 | 
147 |     xl = x0
148 | 
149 |     for i in range(self._num_layers):
150 |       prod_output = self._dense_v_kernels[i](self._dense_u_kernels[i](xl))
151 |       xl = x0 * prod_output + xl
152 | 
153 |     return xl
154 | 
155 |   def get_config(self):
156 |     config = {
157 |         "projection_dim":
158 |             self._projection_dim,
159 |         "num_layers":
160 |             self._num_layers,
161 |         "use_bias":
162 |             self._use_bias,
163 |         "kernel_initializer":
164 |             tf.keras.initializers.serialize(self._kernel_initializer),
165 |         "bias_initializer":
166 |             tf.keras.initializers.serialize(self._bias_initializer),
167 |         "kernel_regularizer":
168 |             tf.keras.regularizers.serialize(self._kernel_regularizer),
169 |         "bias_regularizer":
170 |             tf.keras.regularizers.serialize(self._bias_regularizer),
171 |     }
172 |     base_config = super().get_config()
173 |     return dict(list(base_config.items()) + list(config.items()))
174 | 
175 | 
176 | def _clone_initializer(initializer):
177 |   return initializer.__class__.from_config(initializer.get_config())
178 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/feature_interaction/multi_layer_dcn_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Tests for Cross layer."""
16 | 
17 | import os
18 | import tempfile
19 | 
20 | import numpy as np
21 | import tensorflow as tf
22 | from tensorflow_recommenders.layers.feature_interaction.multi_layer_dcn import MultiLayerDCN
23 | 
24 | 
25 | class MultiLayerDCNTest(tf.test.TestCase):
26 |   # Do not use layer_test due to multiple inputs.
27 | 
28 |   def test_full_matrix(self):
29 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
30 |     layer = MultiLayerDCN(
31 |         projection_dim=3,
32 |         num_layers=1,
33 |         use_bias=False,
34 |         kernel_initializer="ones",
35 |     )
36 |     output = layer(x0)
37 |     self.evaluate(tf.compat.v1.global_variables_initializer())
38 |     self.assertAllClose(np.asarray([[0.28, 0.56, 0.84]]), output)
39 | 
40 |   def test_low_rank_matrix(self):
41 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
42 |     layer = MultiLayerDCN(
43 |         projection_dim=1,
44 |         num_layers=1,
45 |         use_bias=False,
46 |         kernel_initializer="ones",
47 |     )
48 |     output = layer(x0)
49 |     self.evaluate(tf.compat.v1.global_variables_initializer())
50 |     self.assertAllClose(np.asarray([[0.16, 0.32, 0.48]]), output)
51 | 
52 |   def test_bias(self):
53 |     x0 = np.asarray([[0.1, 0.2, 0.3]]).astype(np.float32)
54 |     layer = MultiLayerDCN(
55 |         projection_dim=1, kernel_initializer="ones", bias_initializer="ones"
56 |     )
57 |     output = layer(x0)
58 |     self.evaluate(tf.compat.v1.global_variables_initializer())
59 |     self.assertAllClose(np.asarray([[0.9256, 1.8512, 2.7768]]), output)
60 | 
61 |   def test_serialization(self):
62 |     layer = MultiLayerDCN(projection_dim=1)
63 |     serialized_layer = tf.keras.layers.serialize(layer)
64 |     new_layer = tf.keras.layers.deserialize(serialized_layer)
65 |     self.assertEqual(layer.get_config(), new_layer.get_config())
66 | 
67 |   def test_save_model(self):
68 | 
69 |     def get_model():
70 |       x0 = tf.keras.layers.Input(shape=(13,))
71 |       x1 = MultiLayerDCN(projection_dim=1)(x0)
72 |       x2 = MultiLayerDCN(projection_dim=1)(x1)
73 |       logits = tf.keras.layers.Dense(units=1)(x2)
74 |       model = tf.keras.Model(x0, logits)
75 |       return model
76 | 
77 |     model = get_model()
78 |     random_input = np.random.uniform(size=(10, 13))
79 |     model_pred = model.predict(random_input)
80 | 
81 |     with tempfile.TemporaryDirectory() as tmp:
82 |       path = os.path.join(tmp, "multi_layer_dcn_model")
83 |       model.save(
84 |           path,
85 |           options=tf.saved_model.SaveOptions(namespace_whitelist=["Addons"]))
86 |       loaded_model = tf.keras.models.load_model(path)
87 |       loaded_pred = loaded_model.predict(random_input)
88 |     for i in range(3):
89 |       assert model.layers[i].get_config() == loaded_model.layers[i].get_config()
90 |     self.assertAllEqual(model_pred, loaded_pred)
91 | 
92 | 
93 | if __name__ == "__main__":
94 |   tf.test.main()
95 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Layers related to loss computation."""
 17 | from typing import Tuple
 18 | 
 19 | import numpy as np
 20 | import tensorflow as tf
 21 | 
 22 | MAX_FLOAT = np.finfo(np.float32).max / 100.0
 23 | MIN_FLOAT = np.finfo(np.float32).min / 100.0
 24 | 
 25 | 
 26 | def _gather_elements_along_row(data: tf.Tensor,
 27 |                                column_indices: tf.Tensor) -> tf.Tensor:
 28 |   """Gathers elements from a 2D tensor given the column indices of each row.
 29 | 
 30 |   A more efficient way of gathering elements from 2D tensor than tf.gather_nd().
 31 |   First, gets the flat 1D indices to gather from. Then flattens the data to 1D
 32 |   and uses tf.gather() to generate 1D output and finnally reshapes the
 33 |   output back to 2D.
 34 | 
 35 |   Args:
 36 |     data: A [N, M] 2D `Tensor`.
 37 |     column_indices: A [N, K] 2D `Tensor` denoting for each row, the K column
 38 |       indices to gather elements from the data `Tensor`.
 39 | 
 40 |   Returns:
 41 |     A [N, K] `Tensor` including output elements gathered from data `Tensor`.
 42 | 
 43 |   Raises:
 44 |     ValueError: if the first dimensions of data and column_indices don't match.
 45 |   """
 46 |   with tf.control_dependencies(
 47 |       [tf.assert_equal(tf.shape(data)[0], tf.shape(column_indices)[0])]):
 48 |     num_row = tf.shape(data)[0]
 49 |     num_column = tf.shape(data)[1]
 50 |     num_gathered = tf.shape(column_indices)[1]
 51 |     row_indices = tf.tile(
 52 |         tf.expand_dims(tf.range(num_row), -1),
 53 |         [1, num_gathered])
 54 |     flat_data = tf.reshape(data, [-1])
 55 |     flat_indices = tf.reshape(
 56 |         row_indices * num_column + column_indices, [-1])
 57 |     return tf.reshape(
 58 |         tf.gather(flat_data, flat_indices), [num_row, num_gathered])
 59 | 
 60 | 
 61 | class HardNegativeMining(tf.keras.layers.Layer):
 62 |   """Transforms logits and labels to return hard negatives."""
 63 | 
 64 |   def __init__(self, num_hard_negatives: int) -> None:
 65 |     """Initializes the layer.
 66 | 
 67 |     Args:
 68 |       num_hard_negatives: How many hard negatives to return.
 69 |     """
 70 | 
 71 |     super(HardNegativeMining, self).__init__()
 72 |     self._num_hard_negatives = num_hard_negatives
 73 | 
 74 |   def call(self, logits: tf.Tensor,
 75 |            labels: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
 76 |     """Filters logits and labels with per-query hard negative mining.
 77 | 
 78 |     The result will include logits and labels for num_hard_negatives
 79 |     negatives as well as the positive candidate.
 80 | 
 81 |     Args:
 82 |       logits: [batch_size, number_of_candidates] tensor of logits.
 83 |       labels: [batch_size, number_of_candidates] one-hot tensor of labels.
 84 | 
 85 |     Returns:
 86 |       logits: [batch_size, num_hard_negatives + 1] tensor of logits.
 87 |       labels: [batch_size, num_hard_negatives + 1] one-hot tensor of labels.
 88 |     """
 89 | 
 90 |     # Number of sampled logits, i.e, the number of hard negatives to be
 91 |     # sampled (k) + number of true logit (1) per query, capped by batch size.
 92 |     num_sampled = tf.minimum(self._num_hard_negatives + 1, tf.shape(logits)[1])
 93 |     # To gather indices of top k negative logits per row (query) in
 94 |     # logits, true logits need to be excluded. First replace the true
 95 |     # logits (corresponding to positive labels) with a large score value
 96 |     # and then select the top k + 1 logits from each
 97 |     # row so that selected indices include the indices of true logit + top k
 98 |     # negative logits. This approach is to avoid using inefficient
 99 |     # tf.boolean_mask() when excluding true logits.
100 | 
101 |     # For each query, get the indices of the logits which have the highest
102 |     # k + 1 logit values, including the highest k negative logits and one true
103 |     # logit.
104 |     _, col_indices = tf.nn.top_k(
105 |         logits + labels * MAX_FLOAT, k=num_sampled, sorted=False)
106 | 
107 |     # Gather sampled logits and corresponding labels.
108 |     logits = _gather_elements_along_row(logits, col_indices)
109 |     labels = _gather_elements_along_row(labels, col_indices)
110 | 
111 |     return logits, labels
112 | 
113 | 
114 | class RemoveAccidentalHits(tf.keras.layers.Layer):
115 |   """Zeroes the logits of accidental negatives."""
116 | 
117 |   def call(self, labels: tf.Tensor, logits: tf.Tensor,
118 |            candidate_ids: tf.Tensor) -> tf.Tensor:
119 |     """Zeros selected logits.
120 | 
121 |     For each row in the batch, zeros the logits of negative candidates that have
122 |     the same id as the positive candidate in that row.
123 | 
124 |     Args:
125 |       labels: [batch_size, num_candidates] one-hot labels tensor.
126 |       logits: [batch_size, num_candidates] logits tensor.
127 |       candidate_ids: [num_candidates] candidate identifiers tensor
128 | 
129 |     Returns:
130 |       logits: Modified logits.
131 |     """
132 |     # A more principled way is to implement softmax_cross_entropy_with_logits
133 |     # with a input mask. Here we approximate so by letting accidental hits
134 |     # have extremely small logits (MIN_FLOAT) for ease-of-implementation.
135 | 
136 |     candidate_ids = tf.expand_dims(candidate_ids, 1)
137 | 
138 |     positive_indices = tf.math.argmax(labels, axis=1)
139 |     positive_candidate_ids = tf.gather(candidate_ids, positive_indices)
140 | 
141 |     duplicate = tf.cast(
142 |         tf.equal(positive_candidate_ids, tf.transpose(candidate_ids)),
143 |         labels.dtype
144 |     )
145 |     duplicate = duplicate - labels
146 | 
147 |     return logits + duplicate * MIN_FLOAT
148 | 
149 | 
150 | class SamplingProbablityCorrection(tf.keras.layers.Layer):
151 |   """Sampling probability correction."""
152 | 
153 |   def __call__(self, logits: tf.Tensor,
154 |                candidate_sampling_probability: tf.Tensor) -> tf.Tensor:
155 |     """Corrects the input logits to account for candidate sampling probability."""
156 | 
157 |     return logits - tf.math.log(
158 |         tf.clip_by_value(candidate_sampling_probability, 1e-6, 1.))
159 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/layers/loss_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tests for loss layers."""
 17 | 
 18 | from absl.testing import parameterized
 19 | 
 20 | import numpy as np
 21 | import tensorflow as tf
 22 | 
 23 | from tensorflow_recommenders.layers import loss
 24 | 
 25 | 
 26 | class LossTest(tf.test.TestCase, parameterized.TestCase):
 27 |   """Loss layers tests."""
 28 | 
 29 |   @parameterized.parameters(42, 123, 8391, 12390, 1230)
 30 |   def test_hard_negative_mining(self, random_seed):
 31 |     """Test hard negative mining."""
 32 | 
 33 |     num_hard_negatives = 3
 34 |     # (num_queries, num_candidates)
 35 |     shape = (2, 20)
 36 |     rng = np.random.RandomState(random_seed)
 37 | 
 38 |     logits = rng.uniform(size=shape).astype(np.float32)
 39 |     labels = rng.permutation(np.eye(*shape).T).T.astype(np.float32)
 40 | 
 41 |     out_logits, out_labels = loss.HardNegativeMining(num_hard_negatives)(logits,
 42 |                                                                          labels)
 43 |     out_logits, out_labels = out_logits.numpy(), out_labels.numpy()
 44 | 
 45 |     self.assertEqual(out_logits.shape[-1], num_hard_negatives + 1)
 46 | 
 47 |     # Logits for positives are always returned.
 48 |     self.assertAllClose((out_logits * out_labels).sum(axis=1),
 49 |                         (logits * labels).sum(axis=1))
 50 | 
 51 |     # Set the logits for the labels to be highest to ignore
 52 |     # the effect of labels.
 53 |     logits = logits + labels * 1000.0
 54 | 
 55 |     out_logits, out_labels = loss.HardNegativeMining(num_hard_negatives)(logits,
 56 |                                                                          labels)
 57 |     out_logits, out_labels = out_logits.numpy(), out_labels.numpy()
 58 | 
 59 |     # Highest K logits are always returned.
 60 |     self.assertAllClose(
 61 |         np.sort(logits, axis=1)[:, -num_hard_negatives - 1:],
 62 |         np.sort(out_logits))
 63 | 
 64 |   @parameterized.parameters(42, 123, 8391, 12390, 1230)
 65 |   def test_remove_accidental_hits(self, random_seed):
 66 | 
 67 |     # (num_queries, num_candidates)
 68 |     shape = (2, 4)
 69 |     rng = np.random.RandomState(random_seed)
 70 | 
 71 |     logits = rng.uniform(size=shape).astype(np.float32)
 72 |     labels = rng.permutation(np.eye(*shape).T).T.astype(np.float32)
 73 |     candidate_ids = rng.randint(0, 3, size=shape[-1])
 74 | 
 75 |     out_logits = loss.RemoveAccidentalHits()(
 76 |         labels, logits, candidate_ids).numpy()
 77 | 
 78 |     # Logits of labels are unchanged.
 79 |     self.assertAllClose((out_logits * labels).sum(axis=1),
 80 |                         (logits * labels).sum(axis=1))
 81 | 
 82 |     for row_idx in range(shape[0]):
 83 | 
 84 |       row_positive_idx = np.argmax(labels[row_idx])
 85 |       positive_candidate_id = candidate_ids[row_positive_idx]
 86 | 
 87 |       for col_idx in range(shape[1]):
 88 | 
 89 |         same_candidate_as_positive = (
 90 |             positive_candidate_id == candidate_ids[col_idx])
 91 |         is_positive = col_idx == row_positive_idx
 92 | 
 93 |         if same_candidate_as_positive and not is_positive:
 94 |           # We zeroed the logits.
 95 |           self.assertAllClose(out_logits[row_idx, col_idx],
 96 |                               logits[row_idx, col_idx] + loss.MIN_FLOAT)
 97 |         else:
 98 |           # We left the logits unchanged.
 99 |           self.assertAllClose(out_logits[row_idx, col_idx], logits[row_idx,
100 |                                                                    col_idx])
101 | 
102 | 
103 | class SamplingProbabilityCorrectionTest(
104 |     tf.test.TestCase, parameterized.TestCase):
105 |   """Loss layers tests."""
106 | 
107 |   @parameterized.parameters(42, 123, 8391, 12390, 1230)
108 |   def test_sampling_probability_correction(self, random_seed):
109 |     """Test sampling probability correction."""
110 | 
111 |     # (num_queries, num_candidates)
112 |     shape = (10, 20)
113 |     rng = np.random.RandomState(random_seed)
114 | 
115 |     logits = rng.uniform(size=shape).astype(np.float32)
116 |     probs = rng.uniform(size=shape[1]).astype(np.float32)
117 | 
118 |     corrected_logits = loss.SamplingProbablityCorrection()(logits, probs)
119 |     corrected_logits = corrected_logits.numpy()
120 | 
121 |     np.testing.assert_array_less(logits, corrected_logits)
122 | 
123 |     # set some of the probabilities to 0
124 |     probs_with_zeros = probs * rng.choice([0., 1.], size=probs.shape)
125 | 
126 |     corrected_logits_with_zeros = loss.SamplingProbablityCorrection()(
127 |         logits, probs_with_zeros)
128 |     corrected_logits_with_zeros = corrected_logits_with_zeros.numpy()
129 | 
130 |     np.testing.assert_array_less(logits, corrected_logits_with_zeros)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |   tf.test.main()
135 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Metrics."""
16 | 
17 | from tensorflow_recommenders.metrics.factorized_top_k import Factorized
18 | from tensorflow_recommenders.metrics.factorized_top_k import FactorizedTopK
19 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/metrics/factorized_top_k.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # lint-as: python3
 16 | """Factorized retrieval top K metrics."""
 17 | 
 18 | import abc
 19 | 
 20 | from typing import List, Optional, Sequence, Union
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | from tensorflow_recommenders import layers
 25 | 
 26 | 
 27 | class Factorized(tf.keras.layers.Layer, abc.ABC):
 28 |   """Computes metrics across top K candidates surfaced by a retrieval model."""
 29 | 
 30 |   @abc.abstractmethod
 31 |   def update_state(
 32 |       self,
 33 |       query_embeddings: tf.Tensor,
 34 |       true_candidate_embeddings: tf.Tensor,
 35 |       true_candidate_ids: Optional[tf.Tensor] = None
 36 |   ) -> tf.Operation:
 37 | 
 38 |     raise NotImplementedError()
 39 | 
 40 |   def reset_states(self) -> None:
 41 |     """Resets the metrics."""
 42 | 
 43 |     for metric in self.metrics:
 44 |       metric.reset_states()
 45 | 
 46 |   def result(self) -> List[tf.Tensor]:
 47 |     """Returns a list of metric results."""
 48 | 
 49 |     return [metric.result() for metric in self.metrics]
 50 | 
 51 | 
 52 | class FactorizedTopK(Factorized):
 53 |   """Computes metrics for across top K candidates surfaced by a retrieval model.
 54 | 
 55 |   The default metric is top K categorical accuracy: how often the true candidate
 56 |    is in the top K candidates for a given query.
 57 |   """
 58 | 
 59 |   def __init__(
 60 |       self,
 61 |       candidates: Union[layers.factorized_top_k.TopK, tf.data.Dataset],
 62 |       ks: Sequence[int] = (1, 5, 10, 50, 100),
 63 |       name: str = "factorized_top_k",
 64 |   ) -> None:
 65 |     """Initializes the metric.
 66 | 
 67 |     Args:
 68 |       candidates: A layer for retrieving top candidates in response
 69 |         to a query, or a dataset of candidate embeddings from which
 70 |         candidates should be retrieved.
 71 |       ks: A sequence of values of `k` at which to perform retrieval evaluation.
 72 |       name: Optional name.
 73 |     """
 74 | 
 75 |     super().__init__(name=name)
 76 | 
 77 |     if isinstance(candidates, tf.data.Dataset):
 78 |       candidates = (
 79 |           layers.factorized_top_k.Streaming(k=max(ks))
 80 |           .index_from_dataset(candidates)
 81 |       )
 82 | 
 83 |     self._ks = ks
 84 |     self._candidates = candidates
 85 |     self._top_k_metrics = [
 86 |         tf.keras.metrics.Mean(
 87 |             name=f"{self.name}/top_{x}_categorical_accuracy"
 88 |         ) for x in ks
 89 |     ]
 90 | 
 91 |   def update_state(
 92 |       self,
 93 |       query_embeddings: tf.Tensor,
 94 |       true_candidate_embeddings: tf.Tensor,
 95 |       true_candidate_ids: Optional[tf.Tensor] = None,
 96 |       sample_weight: Optional[tf.Tensor] = None,
 97 |   ) -> tf.Operation:
 98 |     """Updates the metrics.
 99 | 
100 |     Args:
101 |       query_embeddings: [num_queries, embedding_dim] tensor of query embeddings.
102 |       true_candidate_embeddings: [num_queries, embedding_dim] tensor of
103 |         embeddings for candidates that were selected for the query.
104 |       true_candidate_ids: Ids of the true candidates. If supplied, evaluation
105 |         will be id-based: the supplied ids will be matched against the ids of
106 |         the top candidates returned from the retrieval index, which should have
107 |         been constructed with the appropriate identifiers.
108 | 
109 |         If not supplied, evaluation will be score-based: the score of the true
110 |         candidate will be computed and compared with the scores returned from
111 |         the index for the top candidates.
112 | 
113 |         Score-based evaluation is useful for when the true candidate is not
114 |         in the retrieval index. Id-based evaluation is useful for when scores
115 |         returned from the index are not directly comparable to scores computed
116 |         by multiplying the candidate and embedding vector. For example, scores
117 |         returned by ScaNN are quantized, and cannot be compared to
118 |         full-precision scores.
119 |       sample_weight: Optional weighting of each example. Defaults to 1.
120 | 
121 |     Returns:
122 |       Update op. Only used in graph mode.
123 |     """
124 | 
125 |     if true_candidate_ids is None and not self._candidates.is_exact():
126 |       raise ValueError(
127 |           f"The candidate generation layer ({self._candidates}) does not return "
128 |           "exact results. To perform evaluation using that layer, you must "
129 |           "supply `true_candidate_ids`, which will be checked against "
130 |           "the candidate ids returned from the candidate generation layer."
131 |       )
132 | 
133 |     positive_scores = tf.reduce_sum(
134 |         query_embeddings * true_candidate_embeddings, axis=1, keepdims=True)
135 | 
136 |     top_k_predictions, retrieved_ids = self._candidates(
137 |         query_embeddings, k=max(self._ks))
138 | 
139 |     update_ops = []
140 | 
141 |     if true_candidate_ids is not None:
142 |       # We're using ID-based evaluation.
143 |       if len(true_candidate_ids.shape) == 1:
144 |         true_candidate_ids = tf.expand_dims(true_candidate_ids, 1)
145 | 
146 |       # Deal with ScaNN using `NaN`-padding by converting its
147 |       # `NaN` scores into minimum scores.
148 |       nan_padding = tf.math.is_nan(top_k_predictions)
149 |       top_k_predictions = tf.where(
150 |           nan_padding,
151 |           tf.ones_like(top_k_predictions) * tf.float32.min,
152 |           top_k_predictions
153 |       )
154 | 
155 |       # Check sortedness.
156 |       is_sorted = (
157 |           top_k_predictions[:, :-1] - top_k_predictions[:, 1:]
158 |       )
159 |       tf.debugging.assert_non_negative(
160 |           is_sorted, message="Top-K predictions must be sorted."
161 |       )
162 | 
163 |       # Check whether the true candidates were retrieved, accounting
164 |       # for padding.
165 |       ids_match = tf.cast(
166 |           tf.math.logical_and(
167 |               tf.math.equal(true_candidate_ids, retrieved_ids),
168 |               tf.math.logical_not(nan_padding)
169 |           ),
170 |           tf.float32
171 |       )
172 | 
173 |       for k, metric in zip(self._ks, self._top_k_metrics):
174 |         # By slicing until :k we assume scores are sorted.
175 |         # Clip to only count multiple matches once.
176 |         match_found = tf.clip_by_value(
177 |             tf.reduce_sum(ids_match[:, :k], axis=1, keepdims=True),
178 |             0.0, 1.0
179 |         )
180 |         update_ops.append(metric.update_state(match_found, sample_weight))
181 |     else:
182 |       # Score-based evaluation.
183 |       y_pred = tf.concat([positive_scores, top_k_predictions], axis=1)
184 | 
185 |       for k, metric in zip(self._ks, self._top_k_metrics):
186 |         targets = tf.zeros(tf.shape(positive_scores)[0], dtype=tf.int32)
187 |         top_k_accuracy = tf.math.in_top_k(
188 |             targets=targets,
189 |             predictions=y_pred,
190 |             k=k
191 |         )
192 |         update_ops.append(metric.update_state(top_k_accuracy, sample_weight))
193 | 
194 |     return tf.group(update_ops)
195 |   


--------------------------------------------------------------------------------
/tensorflow_recommenders/metrics/factorized_top_k_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tests factorized top K metrics."""
 17 | 
 18 | import itertools
 19 | 
 20 | from absl.testing import parameterized
 21 | 
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | 
 25 | from tensorflow_recommenders import layers
 26 | from tensorflow_recommenders import metrics
 27 | 
 28 | 
 29 | class FactorizedTopKTest(tf.test.TestCase, parameterized.TestCase):
 30 | 
 31 |   @parameterized.parameters(
 32 |       itertools.product(
 33 |           (layers.factorized_top_k.Streaming,
 34 |            layers.factorized_top_k.BruteForce,
 35 |            None),
 36 |           (True, False)
 37 |       )
 38 |   )
 39 |   def test_factorized_top_k(self, top_k_layer, use_candidate_ids):
 40 | 
 41 |     rng = np.random.RandomState(42)
 42 | 
 43 |     num_candidates, num_queries, embedding_dim = (100, 10, 4)
 44 | 
 45 |     candidate_ids = np.arange(0, num_candidates).astype(str)
 46 |     candidates = rng.normal(size=(num_candidates,
 47 |                                   embedding_dim)).astype(np.float32)
 48 | 
 49 |     query = rng.normal(size=(num_queries, embedding_dim)).astype(np.float32)
 50 |     sample_weight = rng.uniform(size=(num_queries, 1)).astype(np.float32)
 51 | 
 52 |     true_candidate_indexes = rng.randint(0, num_candidates, size=num_queries)
 53 |     true_candidate_embeddings = candidates[true_candidate_indexes]
 54 |     true_candidate_ids = candidate_ids[true_candidate_indexes]
 55 | 
 56 |     candidate_scores = query @ candidates.T
 57 | 
 58 |     ks = [1, 5, 10, 50]
 59 | 
 60 |     candidates = tf.data.Dataset.from_tensor_slices(
 61 |         (candidate_ids, candidates)).batch(32)
 62 | 
 63 |     if top_k_layer is not None:
 64 |       candidates = top_k_layer().index_from_dataset(candidates)
 65 | 
 66 |     metric = metrics.FactorizedTopK(
 67 |         candidates=candidates,
 68 |         ks=ks
 69 |     )
 70 |     metric.update_state(
 71 |         query_embeddings=query,
 72 |         true_candidate_embeddings=true_candidate_embeddings,
 73 |         true_candidate_ids=true_candidate_ids if use_candidate_ids else None,
 74 |         sample_weight=sample_weight,
 75 |     )
 76 | 
 77 |     for k, metric_value in zip(ks, metric.result()):
 78 |       in_top_k = tf.math.in_top_k(
 79 |           targets=true_candidate_indexes,
 80 |           predictions=candidate_scores,
 81 |           k=k)
 82 |       expected_val = np.average(
 83 |           in_top_k.numpy().astype(np.float32),
 84 |           weights=np.squeeze(sample_weight, 1),
 85 |       )
 86 |       self.assertAllClose(metric_value, expected_val)
 87 | 
 88 |   @parameterized.parameters(
 89 |       layers.factorized_top_k.Streaming,
 90 |       layers.factorized_top_k.BruteForce,
 91 |       layers.factorized_top_k.ScaNN
 92 |   )
 93 |   def test_id_based_evaluation(self, layer):
 94 | 
 95 |     rng = np.random.default_rng(42)
 96 | 
 97 |     k = 100
 98 |     num_candidates, num_queries, embedding_dim = (1280, 128, 128)
 99 |     candidates = rng.normal(size=(num_candidates,
100 |                                   embedding_dim)).astype(np.float32)
101 |     queries = rng.normal(size=(num_queries, embedding_dim)).astype(np.float32)
102 |     true_candidate_indices = rng.integers(
103 |         0, num_candidates, size=num_queries).astype(np.int32)
104 | 
105 |     index = layer(k=k).index_from_dataset(
106 |         tf.data.Dataset.from_tensor_slices(candidates).batch(32))
107 | 
108 |     metric = metrics.FactorizedTopK(
109 |         candidates=index,
110 |         ks=[k]
111 |     )
112 | 
113 |     in_top_k = 0
114 | 
115 |     for query, true_candidate_idx in zip(queries, true_candidate_indices):
116 | 
117 |       metric.update_state(
118 |           query.reshape(1, -1),
119 |           candidates[true_candidate_idx].reshape(1, -1),
120 |           np.array([true_candidate_idx])
121 |       )
122 | 
123 |       top_scores, top_indices = index(query.reshape(1, -1))
124 |       top_scores, top_indices = top_scores.numpy()[0], top_indices.numpy()[0]
125 | 
126 |       if true_candidate_idx in top_indices.tolist():
127 |         in_top_k += 1
128 | 
129 |     expected_metric = in_top_k / num_queries
130 | 
131 |     self.assertEqual(metric.result()[0], expected_metric)
132 | 
133 | 
134 | if __name__ == "__main__":
135 |   tf.test.main()
136 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Models."""
16 | 
17 | from tensorflow_recommenders.models.base import Model
18 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/models/base.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # lint-as: python3
 16 | """Base model."""
 17 | 
 18 | import tensorflow as tf
 19 | 
 20 | 
 21 | class Model(tf.keras.Model):
 22 |   """Base model for TFRS models.
 23 | 
 24 |   Many recommender models are relatively complex, and do not neatly fit into
 25 |   supervised or unsupervised paradigms. This base class makes it easy to
 26 |   define custom training and test losses for such complex models.
 27 | 
 28 |   This is done by asking the user to implement the following methods:
 29 |   - `__init__` to set up your model. Variable, task, loss, and metric
 30 |     initialization should go here.
 31 |   - `compute_loss` to define the training loss. The method takes as input the
 32 |     raw features passed into the model, and returns a loss tensor for training.
 33 |     As part of doing so, it should also update the model's metrics.
 34 |   - [Optional] `call` to define how the model computes its predictions. This
 35 |     is not always necessary: for example, two-tower retrieval models have two
 36 |     well-defined submodels whose `call` methods are normally used directly.
 37 | 
 38 |   Note that this base class is a thin conveniece wrapper for tf.keras.Model, and
 39 |   equivalent functionality can easily be achieved by overriding the `train_step`
 40 |   and `test_step` methods of a plain Keras model. Doing so also makes it easy
 41 |   to build even more complex training mechanisms, such as the use of
 42 |   different optimizers for different variables, or manipulating gradients.
 43 | 
 44 |   Keras has an excellent tutorial on how to
 45 |   do this [here](
 46 |   https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).
 47 |   """
 48 | 
 49 |   def compute_loss(self, inputs, training: bool = False) -> tf.Tensor:  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
 50 |     """Defines the loss function.
 51 | 
 52 |     Args:
 53 |       inputs: A data structure of tensors: raw inputs to the model. These will
 54 |         usually contain labels and weights as well as features.
 55 |       training: Whether the model is in training mode.
 56 | 
 57 |     Returns:
 58 |       Loss tensor.
 59 |     """
 60 | 
 61 |     raise NotImplementedError(
 62 |         "Implementers must implement the `compute_loss` method.")
 63 | 
 64 |   def train_step(self, inputs):
 65 |     """Custom train step using the `compute_loss` method."""
 66 | 
 67 |     with tf.GradientTape() as tape:
 68 |       loss = self.compute_loss(inputs, training=True)
 69 | 
 70 |       # Handle regularization losses as well.
 71 |       regularization_loss = tf.reduce_sum(
 72 |           [tf.reduce_sum(loss) for loss in self.losses]
 73 |       )
 74 | 
 75 |       total_loss = loss + regularization_loss
 76 | 
 77 |     gradients = tape.gradient(total_loss, self.trainable_variables)
 78 |     self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
 79 | 
 80 |     metrics = {metric.name: metric.result() for metric in self.metrics}
 81 |     metrics["loss"] = loss
 82 |     metrics["regularization_loss"] = regularization_loss
 83 |     metrics["total_loss"] = total_loss
 84 | 
 85 |     return metrics
 86 | 
 87 |   def test_step(self, inputs):
 88 |     """Custom test step using the `compute_loss` method."""
 89 | 
 90 |     loss = self.compute_loss(inputs, training=False)
 91 | 
 92 |     # Handle regularization losses as well.
 93 |     regularization_loss = tf.reduce_sum(
 94 |         [tf.reduce_sum(loss) for loss in self.losses]
 95 |     )
 96 | 
 97 |     total_loss = loss + regularization_loss
 98 | 
 99 |     metrics = {metric.name: metric.result() for metric in self.metrics}
100 |     metrics["loss"] = loss
101 |     metrics["regularization_loss"] = regularization_loss
102 |     metrics["total_loss"] = total_loss
103 | 
104 |     return metrics
105 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/models/base_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tests base model."""
 17 | 
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow_recommenders import metrics
 22 | from tensorflow_recommenders import models
 23 | from tensorflow_recommenders import tasks
 24 | 
 25 | 
 26 | class ModelTest(tf.test.TestCase):
 27 | 
 28 |   def test_ranking_model(self):
 29 |     """Tests a simple ranking model."""
 30 | 
 31 |     class Model(models.Model):
 32 | 
 33 |       def __init__(self):
 34 |         super().__init__()
 35 |         self._dense = tf.keras.layers.Dense(1)
 36 |         self.task = tasks.Ranking(
 37 |             loss=tf.keras.losses.BinaryCrossentropy(),
 38 |             metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")])
 39 | 
 40 |       def call(self, inputs):
 41 |         return self._dense(inputs)
 42 | 
 43 |       def compute_loss(self, inputs, training=False):
 44 |         features, labels = inputs
 45 | 
 46 |         predictions = self(features)
 47 | 
 48 |         return self.task(predictions=predictions, labels=labels)
 49 | 
 50 |     data = tf.data.Dataset.from_tensor_slices(
 51 |         (np.random.normal(size=(10, 3)), np.ones(10)))
 52 | 
 53 |     model = Model()
 54 |     model.compile()
 55 |     model.fit(data.batch(2))
 56 |     metrics_ = model.evaluate(data.batch(2), return_dict=True)
 57 | 
 58 |     self.assertIn("loss", metrics_)
 59 |     self.assertIn("accuracy", metrics_)
 60 | 
 61 |   def test_factorized_model(self):
 62 |     """Tests a simple factorized retrieval model."""
 63 | 
 64 |     class Model(models.Model):
 65 | 
 66 |       def __init__(self, candidate_dataset):
 67 |         super().__init__()
 68 | 
 69 |         self.query_model = tf.keras.layers.Dense(16)
 70 |         self.candidate_model = tf.keras.layers.Dense(16)
 71 | 
 72 |         self.task = tasks.Retrieval(
 73 |             metrics=metrics.FactorizedTopK(
 74 |                 candidates=candidate_dataset.map(self.candidate_model),
 75 |                 ks=[5],
 76 |             )
 77 |         )
 78 | 
 79 |       def compute_loss(self, inputs, training=False):
 80 |         query_features, candidate_features = inputs
 81 | 
 82 |         query_embeddings = self.query_model(query_features)
 83 |         candidate_embeddings = self.candidate_model(candidate_features)
 84 | 
 85 |         return self.task(
 86 |             query_embeddings=query_embeddings,
 87 |             candidate_embeddings=candidate_embeddings)
 88 | 
 89 |     candidate_dataset = tf.data.Dataset.from_tensor_slices(
 90 |         np.random.normal(size=(10, 3)))
 91 |     data = tf.data.Dataset.from_tensor_slices((
 92 |         np.random.normal(size=(10, 3)),
 93 |         np.random.normal(size=(10, 3)),
 94 |     ))
 95 | 
 96 |     model = Model(candidate_dataset.batch(10))
 97 |     model.compile()
 98 |     model.fit(data.batch(2))
 99 |     metrics_ = model.evaluate(data.batch(2), return_dict=True)
100 | 
101 |     self.assertIn("loss", metrics_)
102 |     self.assertIn("factorized_top_k/top_5_categorical_accuracy", metrics_)
103 | 
104 |   def test_multiask_model(self):
105 |     """Test a joint ranking-retrieval model."""
106 | 
107 |     class Model(models.Model):
108 | 
109 |       def __init__(self, candidate_dataset):
110 |         super().__init__()
111 | 
112 |         self.query_model = tf.keras.layers.Dense(16)
113 |         self.candidate_model = tf.keras.layers.Dense(16)
114 |         self.ctr_model = tf.keras.layers.Dense(1, activation="sigmoid")
115 | 
116 |         self.retrieval_task = tasks.Retrieval(
117 |             metrics=metrics.FactorizedTopK(
118 |                 candidates=candidate_dataset.map(self.candidate_model),
119 |                 ks=[5]
120 |             )
121 |         )
122 |         self.ctr_task = tasks.Ranking(
123 |             metrics=[tf.keras.metrics.AUC(name="ctr_auc")])
124 | 
125 |       def compute_loss(self, inputs, training):
126 |         query_features, candidate_features, clicks = inputs
127 | 
128 |         query_embeddings = self.query_model(query_features)
129 |         candidate_embeddings = self.candidate_model(candidate_features)
130 | 
131 |         pctr = self.ctr_model(
132 |             tf.concat([query_features, candidate_features], axis=1))
133 | 
134 |         retrieval_loss = self.retrieval_task(
135 |             query_embeddings=query_embeddings,
136 |             candidate_embeddings=candidate_embeddings)
137 |         ctr_loss = self.ctr_task(predictions=pctr, labels=clicks)
138 | 
139 |         return retrieval_loss + ctr_loss
140 | 
141 |     candidate_dataset = tf.data.Dataset.from_tensor_slices(
142 |         np.random.normal(size=(10, 3)))
143 |     data = tf.data.Dataset.from_tensor_slices((
144 |         np.random.normal(size=(10, 3)),
145 |         np.random.normal(size=(10, 3)),
146 |         np.random.randint(0, 2, size=10),
147 |     ))
148 | 
149 |     model = Model(candidate_dataset.batch(10))
150 |     model.compile()
151 |     model.fit(data.batch(2))
152 |     metrics_ = model.evaluate(data.batch(2), return_dict=True)
153 | 
154 |     self.assertIn("loss", metrics_)
155 |     self.assertIn("factorized_top_k/top_5_categorical_accuracy", metrics_)
156 |     self.assertIn("ctr_auc", metrics_)
157 | 
158 |   def test_regularization_losses(self):
159 | 
160 |     class Model(models.Model):
161 | 
162 |       def __init__(self):
163 |         super().__init__()
164 |         self._dense = tf.keras.layers.Dense(1)
165 |         self.task = tasks.Ranking(
166 |             loss=tf.keras.losses.BinaryCrossentropy(),
167 |             metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")])
168 | 
169 |       def call(self, inputs):
170 |         self.add_loss(1000.0)
171 |         return self._dense(inputs)
172 | 
173 |       def compute_loss(self, inputs, training=False):
174 |         features, labels = inputs
175 | 
176 |         predictions = self(features)
177 | 
178 |         return self.task(predictions=predictions, labels=labels)
179 | 
180 |     data = tf.data.Dataset.from_tensor_slices(
181 |         (np.random.normal(size=(10, 3)), np.ones(10)))
182 | 
183 |     model = Model()
184 |     model.compile()
185 |     model.fit(data.batch(2))
186 |     metrics_ = model.evaluate(data.batch(2), return_dict=True)
187 | 
188 |     self.assertIn("loss", metrics_)
189 |     self.assertIn("accuracy", metrics_)
190 | 
191 |     self.assertEqual(metrics_["regularization_loss"], 1000.0)
192 | 
193 | 
194 | if __name__ == "__main__":
195 |   tf.test.main()
196 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/public.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """TensorFlow Recommenders is a library for building recommender system models.
16 | 
17 | It helps with the full workflow of building a recommender system: data
18 | preparation, model formulation, training, evaluation, and deployment.
19 | 
20 | It's built on Keras and aims to have a gentle learning curve while still giving
21 | you the flexibility to build complex models.
22 | 
23 | This is a public version of the library and hence does not include
24 | internal google stuff.
25 | """
26 | 
27 | __version__ = "v0.7.3"
28 | 
29 | # This version does not include internal tfrs google library.
30 | from tensorflow_recommenders import examples
31 | from tensorflow_recommenders import experimental
32 | from tensorflow_recommenders import layers
33 | from tensorflow_recommenders import metrics
34 | from tensorflow_recommenders import models
35 | from tensorflow_recommenders import tasks
36 | from tensorflow_recommenders import types
37 | 
38 | 
39 | Model = models.Model
40 | 
41 | # Artificially using the libraries in order to be able to use the tfrs_pub
42 | # without these imports if needed and not generate a lint error.
43 | __use_examples = examples
44 | __use_experimental = experimental
45 | __use_layers = layers
46 | __use_metrics = metrics
47 | __use_models = models
48 | __use_tasks = tasks
49 | __use_types = types
50 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Task libraries."""
16 | 
17 | from tensorflow_recommenders.tasks.base import Task
18 | from tensorflow_recommenders.tasks.ranking import Ranking
19 | from tensorflow_recommenders.tasks.retrieval import Retrieval
20 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/tasks/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Lint-as: python3
16 | """Base task class."""
17 | 
18 | 
19 | class Task:
20 |   """Task class.
21 | 
22 |   This is a marker class: inherit from this class if you'd like to make
23 |   your tasks distinguishable from plain Keras layers.
24 |   """
25 | 
26 |   pass
27 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/tasks/ranking.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """A ranking task."""
 17 | 
 18 | from typing import List, Optional, Text
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | 
 23 | from tensorflow_recommenders.tasks import base
 24 | 
 25 | 
 26 | class Ranking(tf.keras.layers.Layer, base.Task):
 27 |   """A ranking task.
 28 | 
 29 |   Recommender systems are often composed of two components:
 30 |   - a retrieval model, retrieving O(thousands) candidates from a corpus of
 31 |     O(millions) candidates.
 32 |   - a ranker model, scoring the candidates retrieved by the retrieval model to
 33 |     return a ranked shortlist of a few dozen candidates.
 34 | 
 35 |   This task helps with building ranker models. Usually, these will involve
 36 |   predicting signals such as clicks, cart additions, likes, ratings, and
 37 |   purchases.
 38 |   """
 39 | 
 40 |   def __init__(
 41 |       self,
 42 |       loss: Optional[tf.keras.losses.Loss] = None,
 43 |       metrics: Optional[List[tf.keras.metrics.Metric]] = None,
 44 |       prediction_metrics: Optional[List[tf.keras.metrics.Metric]] = None,
 45 |       label_metrics: Optional[List[tf.keras.metrics.Metric]] = None,
 46 |       loss_metrics: Optional[List[tf.keras.metrics.Metric]] = None,
 47 |       name: Optional[Text] = None) -> None:
 48 |     """Initializes the task.
 49 | 
 50 |     Args:
 51 |       loss: Loss function. Defaults to BinaryCrossentropy.
 52 |       metrics: List of Keras metrics to be evaluated.
 53 |       prediction_metrics: List of Keras metrics used to summarize the
 54 |         predictions.
 55 |       label_metrics: List of Keras metrics used to summarize the labels.
 56 |       loss_metrics: List of Keras metrics used to summarize the loss.
 57 |       name: Optional task name.
 58 |     """
 59 | 
 60 |     super().__init__(name=name)
 61 | 
 62 |     self._loss = (
 63 |         loss if loss is not None else tf.keras.losses.BinaryCrossentropy())
 64 |     self._ranking_metrics = metrics or []
 65 |     self._prediction_metrics = prediction_metrics or []
 66 |     self._label_metrics = label_metrics or []
 67 |     self._loss_metrics = loss_metrics or []
 68 | 
 69 |   def call(self,
 70 |            labels: tf.Tensor,
 71 |            predictions: tf.Tensor,
 72 |            sample_weight: Optional[tf.Tensor] = None,
 73 |            training: bool = False,
 74 |            compute_metrics: bool = True) -> tf.Tensor:
 75 |     """Computes the task loss and metrics.
 76 | 
 77 |     Args:
 78 |       labels: Tensor of labels.
 79 |       predictions: Tensor of predictions.
 80 |       sample_weight: Tensor of sample weights.
 81 |       training: Indicator whether training or test loss is being computed.
 82 |       compute_metrics: Whether to compute metrics. Set this to False
 83 |         during training for faster training.
 84 | 
 85 |     Returns:
 86 |       loss: Tensor of loss values.
 87 |     """
 88 | 
 89 |     loss = self._loss(
 90 |         y_true=labels, y_pred=predictions, sample_weight=sample_weight)
 91 | 
 92 |     if not compute_metrics:
 93 |       return loss
 94 | 
 95 |     update_ops = []
 96 | 
 97 |     for metric in self._ranking_metrics:
 98 |       update_ops.append(metric.update_state(
 99 |           y_true=labels, y_pred=predictions, sample_weight=sample_weight))
100 | 
101 |     for metric in self._prediction_metrics:
102 |       update_ops.append(
103 |           metric.update_state(predictions, sample_weight=sample_weight))
104 | 
105 |     for metric in self._label_metrics:
106 |       update_ops.append(
107 |           metric.update_state(labels, sample_weight=sample_weight))
108 | 
109 |     for metric in self._loss_metrics:
110 |       update_ops.append(
111 |           metric.update_state(loss)
112 |       )  # Loss is a scalar here which is already weighted sum
113 | 
114 |     # Custom metrics may not return update ops, unlike built-in
115 |     # Keras metrics.
116 |     update_ops = [x for x in update_ops if x is not None]
117 | 
118 |     with tf.control_dependencies(update_ops):
119 |       return tf.identity(loss)
120 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/tasks/ranking_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tests ranking tasks."""
 17 | 
 18 | import math
 19 | 
 20 | from absl.testing import parameterized
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | from tensorflow_recommenders.tasks import ranking
 25 | 
 26 | 
 27 | class RankingTest(tf.test.TestCase, parameterized.TestCase):
 28 | 
 29 |   @parameterized.parameters((True,), (False,))
 30 |   def test_task(self, enable_sample_weight):
 31 |     task = ranking.Ranking(
 32 |         metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")],
 33 |         label_metrics=[tf.keras.metrics.Mean(name="label_mean")],
 34 |         prediction_metrics=[tf.keras.metrics.Mean(name="prediction_mean")],
 35 |         loss_metrics=[tf.keras.metrics.Mean(name="loss_mean")]
 36 |     )
 37 | 
 38 |     predictions = tf.constant([[1], [0.3]], dtype=tf.float32)
 39 |     labels = tf.constant([[1], [1]], dtype=tf.float32)
 40 |     sample_weight = None
 41 |     if enable_sample_weight:
 42 |       sample_weight = tf.constant([1.0, 1.0], dtype=tf.float32)
 43 | 
 44 |     # Standard log loss formula.
 45 |     expected_loss = -(math.log(1) + math.log(0.3)) / 2.0
 46 |     expected_metrics = {
 47 |         "accuracy": 0.5,
 48 |         "label_mean": 1.0,
 49 |         "prediction_mean": 0.65,
 50 |         "loss_mean": expected_loss
 51 |     }
 52 | 
 53 |     loss = task(
 54 |         predictions=predictions, labels=labels, sample_weight=sample_weight
 55 |     )
 56 |     metrics = {
 57 |         metric.name: metric.result().numpy() for metric in task.metrics
 58 |     }
 59 | 
 60 |     self.assertIsNotNone(loss)
 61 |     self.assertAllClose(expected_loss, loss)
 62 |     self.assertAllClose(expected_metrics, metrics)
 63 | 
 64 |   def test_task_graph(self):
 65 |     with tf.Graph().as_default():
 66 |       with tf.compat.v1.Session() as sess:
 67 |         task = ranking.Ranking(
 68 |             metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy")],
 69 |             label_metrics=[tf.keras.metrics.Mean(name="label_mean")],
 70 |             prediction_metrics=[tf.keras.metrics.Mean(name="prediction_mean")],
 71 |             loss_metrics=[tf.keras.metrics.Mean(name="loss_mean")]
 72 |         )
 73 |         predictions = tf.constant([[1], [0.3]], dtype=tf.float32)
 74 |         labels = tf.constant([[1], [1]], dtype=tf.float32)
 75 | 
 76 |         # Standard log loss formula.
 77 |         expected_loss = -(math.log(1) + math.log(0.3)) / 2.0
 78 |         expected_metrics = {
 79 |             "accuracy": 0.5,
 80 |             "label_mean": 1.0,
 81 |             "prediction_mean": 0.65,
 82 |             "loss_mean": expected_loss
 83 |         }
 84 | 
 85 |         loss = task(predictions=predictions, labels=labels)
 86 | 
 87 |         sess.run([var.initializer for var in task.variables])
 88 |         for metric in task.metrics:
 89 |           sess.run([var.initializer for var in metric.variables])
 90 |         sess.run(loss)
 91 | 
 92 |         metrics = {
 93 |             metric.name: sess.run(metric.result()) for metric in task.metrics
 94 |         }
 95 | 
 96 |     self.assertAllClose(expected_metrics, metrics)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |   tf.test.main()
101 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/tasks/retrieval_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Lint-as: python3
 16 | """Tests retrieval tasks."""
 17 | 
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow_recommenders import metrics
 22 | from tensorflow_recommenders.tasks import retrieval
 23 | 
 24 | 
 25 | def _sigmoid(x):
 26 |   return 1. / (1 + np.exp(-x))
 27 | 
 28 | 
 29 | class RetrievalTest(tf.test.TestCase):
 30 | 
 31 |   def test_task(self):
 32 | 
 33 |     query = tf.constant([[1, 2, 3], [2, 3, 4]], dtype=tf.float32)
 34 |     candidate = tf.constant([[1, 1, 1], [1, 1, 0]], dtype=tf.float32)
 35 |     candidate_dataset = tf.data.Dataset.from_tensor_slices(
 36 |         np.array([[0, 0, 0]] * 20, dtype=np.float32))
 37 | 
 38 |     task = retrieval.Retrieval(
 39 |         metrics=metrics.FactorizedTopK(
 40 |             candidates=candidate_dataset.batch(16), ks=[5]
 41 |         ),
 42 |         batch_metrics=[
 43 |             tf.keras.metrics.TopKCategoricalAccuracy(
 44 |                 k=1, name="batch_categorical_accuracy_at_1"
 45 |             )
 46 |         ],
 47 |         loss_metrics=[
 48 |             tf.keras.metrics.Mean(
 49 |                 name="batch_loss",
 50 |                 dtype=tf.float32,
 51 |             )
 52 |         ],
 53 |     )
 54 | 
 55 |     # All_pair_scores: [[6, 3], [9, 5]].
 56 |     # Normalized logits: [[3, 0], [4, 0]].
 57 |     expected_loss = -np.log(_sigmoid(3.0)) - np.log(1 - _sigmoid(4.0))
 58 | 
 59 |     expected_metrics = {
 60 |         "factorized_top_k/top_5_categorical_accuracy": 1.0,
 61 |         "batch_categorical_accuracy_at_1": 0.5,
 62 |         "batch_loss": expected_loss,
 63 |     }
 64 |     loss = task(query_embeddings=query, candidate_embeddings=candidate)
 65 |     metrics_ = {
 66 |         metric.name: metric.result().numpy() for metric in task.metrics
 67 |     }
 68 | 
 69 |     self.assertIsNotNone(loss)
 70 |     self.assertAllClose(expected_loss, loss)
 71 |     self.assertAllClose(expected_metrics, metrics_)
 72 | 
 73 |     # Test computation of batch metrics when skipping corpus metrics
 74 |     for metric in task.metrics:
 75 |       metric.reset_states()
 76 |     loss = task(query_embeddings=query,
 77 |                 candidate_embeddings=candidate,
 78 |                 compute_metrics=False)
 79 |     expected_metrics1 = {
 80 |         "factorized_top_k/top_5_categorical_accuracy": 0.0,
 81 |         "batch_categorical_accuracy_at_1": 0.5,
 82 |         "batch_loss": loss,
 83 |     }
 84 |     metrics1_ = {
 85 |         metric.name: metric.result().numpy() for metric in task.metrics
 86 |     }
 87 | 
 88 |     self.assertIsNotNone(loss)
 89 |     self.assertAllClose(expected_loss, loss)
 90 |     self.assertAllClose(expected_metrics1, metrics1_)
 91 | 
 92 |     # Test computation of corpus metrics when skipping batch metrics
 93 |     for metric in task.metrics:
 94 |       metric.reset_states()
 95 |     loss = task(
 96 |         query_embeddings=query,
 97 |         candidate_embeddings=candidate,
 98 |         compute_batch_metrics=False)
 99 |     expected_metrics2 = {
100 |         "factorized_top_k/top_5_categorical_accuracy": 1.0,
101 |         "batch_categorical_accuracy_at_1": 0.0,
102 |         "batch_loss": loss,
103 |     }
104 |     metrics2_ = {
105 |         metric.name: metric.result().numpy() for metric in task.metrics
106 |     }
107 | 
108 |     self.assertIsNotNone(loss)
109 |     self.assertAllClose(expected_loss, loss)
110 |     self.assertAllClose(expected_metrics2, metrics2_)
111 | 
112 |     # Test computation of metrics with sample_weight
113 |     for metric in task.metrics:
114 |       metric.reset_states()
115 |     loss = task(
116 |         query_embeddings=query,
117 |         candidate_embeddings=candidate,
118 |         sample_weight=tf.constant([0.7, 0.3], dtype=tf.float32),
119 |     )
120 | 
121 |     # All_pair_scores: [[6, 3], [9, 5]].
122 |     # Normalized logits: [[3, 0], [4, 0]].
123 |     expected_loss3 = -0.7 * np.log(_sigmoid(3.0)) - 0.3 * np.log(
124 |         1 - _sigmoid(4.0)
125 |     )
126 | 
127 |     expected_metrics3 = {
128 |         "factorized_top_k/top_5_categorical_accuracy": 1.0,
129 |         "batch_categorical_accuracy_at_1": 0.7,
130 |         "batch_loss": expected_loss3,
131 |     }
132 |     metrics3_ = {
133 |         metric.name: metric.result().numpy() for metric in task.metrics
134 |     }
135 |     self.assertIsNotNone(loss)
136 |     self.assertAllClose(expected_loss3, loss)
137 |     self.assertAllClose(expected_metrics3, metrics3_)
138 | 
139 |   def test_task_graph(self):
140 | 
141 |     with tf.Graph().as_default():
142 |       with tf.compat.v1.Session() as sess:
143 |         query = tf.constant([[1, 2, 3], [2, 3, 4]], dtype=tf.float32)
144 |         candidate = tf.constant([[1, 1, 1], [1, 1, 0]], dtype=tf.float32)
145 |         candidate_dataset = tf.data.Dataset.from_tensor_slices(
146 |             np.array([[0, 0, 0]] * 20, dtype=np.float32))
147 | 
148 |         task = retrieval.Retrieval(
149 |             metrics=metrics.FactorizedTopK(
150 |                 candidates=candidate_dataset.batch(16),
151 |                 ks=[5]
152 |             ),
153 |             batch_metrics=[
154 |                 tf.keras.metrics.TopKCategoricalAccuracy(
155 |                     k=1, name="batch_categorical_accuracy_at_1")
156 |             ])
157 | 
158 |         expected_metrics = {
159 |             "factorized_top_k/top_5_categorical_accuracy": 1.0,
160 |             "batch_categorical_accuracy_at_1": 0.5,
161 |         }
162 | 
163 |         loss = task(query_embeddings=query, candidate_embeddings=candidate)
164 | 
165 |         sess.run([var.initializer for var in task.variables])
166 |         for metric in task.metrics:
167 |           sess.run([var.initializer for var in metric.variables])
168 |         sess.run(loss)
169 | 
170 |         metrics_ = {
171 |             metric.name: sess.run(metric.result()) for metric in task.metrics
172 |         }
173 | 
174 |         self.assertAllClose(expected_metrics, metrics_)
175 | 
176 | 
177 | class RetrievalTestWithNegativeSamples(tf.test.TestCase):
178 | 
179 |   def test_task(self):
180 | 
181 |     query = tf.constant([[3, 2, 1], [2, 3, 4]], dtype=tf.float32)
182 |     candidate = tf.constant([[0, 1, 0], [0, 1, 1], [1, 1, 0]], dtype=tf.float32)
183 |     candidate_dataset = tf.data.Dataset.from_tensor_slices(
184 |         np.array([[0, 0, 0]] * 20, dtype=np.float32))
185 | 
186 |     task = retrieval.Retrieval(
187 |         metrics=metrics.FactorizedTopK(
188 |             candidates=candidate_dataset.batch(16),
189 |             ks=[5]
190 |         ),
191 |         batch_metrics=[
192 |             tf.keras.metrics.TopKCategoricalAccuracy(
193 |                 k=1, name="batch_categorical_accuracy_at_1")
194 |         ])
195 | 
196 |     # Scores will have shape [num_queries, num_candidates]
197 |     # All_pair_scores:   [[2, 3, 5], [3, 7, 5]].
198 |     # Normalized logits: [[0, 1, 3], [0, 4, 2]].
199 |     expected_loss = (- np.log(1         / (1 + np.exp(1) + np.exp(3)))
200 |                      - np.log(np.exp(4) / (1 + np.exp(4) + np.exp(2))))
201 | 
202 |     expected_metrics = {
203 |         "factorized_top_k/top_5_categorical_accuracy": 1.0,
204 |         "batch_categorical_accuracy_at_1": 0.5,
205 |     }
206 |     loss = task(query_embeddings=query, candidate_embeddings=candidate)
207 |     metrics_ = {
208 |         metric.name: metric.result().numpy() for metric in task.metrics
209 |     }
210 | 
211 |     self.assertIsNotNone(loss)
212 |     self.assertAllClose(expected_loss, loss)
213 |     self.assertAllClose(expected_metrics, metrics_)
214 | 
215 |     # Test computation of batch metrics when skipping corpus metrics
216 |     for metric in task.metrics:
217 |       metric.reset_states()
218 |     loss = task(query_embeddings=query,
219 |                 candidate_embeddings=candidate,
220 |                 compute_metrics=False)
221 |     expected_metrics1 = {
222 |         "factorized_top_k/top_5_categorical_accuracy": 0.0,
223 |         "batch_categorical_accuracy_at_1": 0.5
224 |     }
225 |     metrics1_ = {
226 |         metric.name: metric.result().numpy() for metric in task.metrics
227 |     }
228 | 
229 |     self.assertIsNotNone(loss)
230 |     self.assertAllClose(expected_loss, loss)
231 |     self.assertAllClose(expected_metrics1, metrics1_)
232 | 
233 |     # Test computation of corpus metrics when skipping batch metrics
234 |     for metric in task.metrics:
235 |       metric.reset_states()
236 |     loss = task(
237 |         query_embeddings=query,
238 |         candidate_embeddings=candidate,
239 |         compute_batch_metrics=False)
240 |     expected_metrics2 = {
241 |         "factorized_top_k/top_5_categorical_accuracy": 1.0,
242 |         "batch_categorical_accuracy_at_1": 0.0
243 |     }
244 |     metrics2_ = {
245 |         metric.name: metric.result().numpy() for metric in task.metrics
246 |     }
247 | 
248 |     self.assertIsNotNone(loss)
249 |     self.assertAllClose(expected_loss, loss)
250 |     self.assertAllClose(expected_metrics2, metrics2_)
251 | 
252 | 
253 | class RetrievalTestWithMultipointQueries(tf.test.TestCase):
254 | 
255 |   def test_task(self):
256 | 
257 |     query = tf.constant(
258 |         [[[3, 2, 1], [1, 2, 3]], [[2, 3, 4], [4, 3, 2]]], dtype=tf.float32
259 |     )
260 |     candidate = tf.constant([[0, 1, 0], [0, 1, 1], [1, 1, 0]], dtype=tf.float32)
261 |     candidate_dataset = tf.data.Dataset.from_tensor_slices(
262 |         np.array([[0, 0, 0]] * 20, dtype=np.float32)
263 |     )
264 | 
265 |     task = retrieval.Retrieval(
266 |         metrics=metrics.FactorizedTopK(
267 |             candidates=candidate_dataset.batch(16), ks=[5]
268 |         ),
269 |         batch_metrics=[
270 |             tf.keras.metrics.TopKCategoricalAccuracy(
271 |                 k=1, name="batch_categorical_accuracy_at_1"
272 |             )
273 |         ],
274 |     )
275 | 
276 |     # Scores will have shape [num_queries, num_candidates]
277 |     # All_pair_scores:   [[[2,2], [3,5], [5,3]], [[3, 3], [7,5], [5,7]]].
278 |     # Max-sim scores:    [[2, 5, 5], [3, 7, 7]].
279 |     # Normalized logits: [[0, 3, 3], [1, 5, 5]].
280 |     expected_loss = -np.log(1 / (1 + np.exp(3) + np.exp(3))) - np.log(
281 |         np.exp(5) / (np.exp(1) + np.exp(5) + np.exp(5))
282 |     )
283 | 
284 |     expected_metrics = {
285 |         "factorized_top_k/top_5_categorical_accuracy": (
286 |             0.0
287 |         ),  # not computed for multipoint queries
288 |         "batch_categorical_accuracy_at_1": 0.5,
289 |     }
290 |     loss = task(
291 |         query_embeddings=query,
292 |         candidate_embeddings=candidate,
293 |     )
294 |     metrics_ = {metric.name: metric.result().numpy() for metric in task.metrics}
295 | 
296 |     self.assertIsNotNone(loss)
297 |     self.assertAllClose(expected_loss, loss)
298 |     self.assertAllClose(expected_metrics, metrics_)
299 | 
300 | 
301 | if __name__ == "__main__":
302 |   tf.test.main()
303 | 


--------------------------------------------------------------------------------
/tensorflow_recommenders/types.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The TensorFlow Recommenders Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Type definitions."""
16 | 
17 | from typing import Callable, Union
18 | 
19 | 
20 | import tensorflow as tf
21 | 
22 | 
23 | Activation = Union[Callable[[tf.Tensor], tf.Tensor], str]
24 | 


--------------------------------------------------------------------------------
/tools/build_api_docs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The TensorFlow Recommenders Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # lint-as: python3
 16 | r"""Tool to generate API docs.
 17 | 
 18 | # How to run
 19 | 
 20 | Install tensorflow_docs if needed:
 21 | 
 22 | ```
 23 | pip install git+https://github.com/tensorflow/docs
 24 | ```
 25 | 
 26 | Run the docs generator:
 27 | 
 28 | ```shell
 29 | python $(pwd)/tensorflow_recommenders/tools/build_api_docs.py
 30 | ```
 31 | """
 32 | 
 33 | from typing import Text
 34 | 
 35 | import fire
 36 | 
 37 | import tensorflow as tf
 38 | 
 39 | from tensorflow_docs.api_generator import doc_controls
 40 | from tensorflow_docs.api_generator import generate_lib
 41 | from tensorflow_docs.api_generator import public_api
 42 | 
 43 | import tensorflow_recommenders as tfrs
 44 | 
 45 | 
 46 | GITHUB_CODE_PATH = (
 47 |     "https://github.com/tensorflow/recommenders/"
 48 |     "blob/main/tensorflow_recommenders/"
 49 | )
 50 | 
 51 | 
 52 | def _hide_layer_and_module_methods():
 53 |   """Hide methods and properties defined in the base classes of Keras layers.
 54 | 
 55 |   We hide all methods and properties of the base classes, except:
 56 |   - `__init__` is always documented.
 57 |   - `call` is always documented, as it can carry important information for
 58 |     complex layers.
 59 |   """
 60 | 
 61 |   module_contents = list(tf.Module.__dict__.items())
 62 |   model_contents = list(tf.keras.Model.__dict__.items())
 63 |   layer_contents = list(tf.keras.layers.Layer.__dict__.items())
 64 | 
 65 |   for name, obj in module_contents + layer_contents + model_contents:
 66 |     if name == "__init__":
 67 |       # Always document __init__.
 68 |       continue
 69 | 
 70 |     if name == "call":
 71 |       # Always document `call`.
 72 |       if hasattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS):  # pylint: disable=protected-access
 73 |         delattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS)  # pylint: disable=protected-access
 74 |       continue
 75 | 
 76 |     # Otherwise, exclude from documentation.
 77 |     if isinstance(obj, property):
 78 |       obj = obj.fget
 79 | 
 80 |     if isinstance(obj, (staticmethod, classmethod)):
 81 |       obj = obj.__func__
 82 | 
 83 |     try:
 84 |       doc_controls.do_not_doc_in_subclasses(obj)
 85 |     except AttributeError:
 86 |       pass
 87 | 
 88 | 
 89 | def build_api_docs(output_dir: Text = "/tmp/tensorflow_recommenders/api_docs",
 90 |                    code_url_prefix: Text = GITHUB_CODE_PATH,
 91 |                    search_hints: bool = True,
 92 |                    site_path: Text = "recommenders/api_docs/") -> None:
 93 |   """Builds the API docs."""
 94 | 
 95 |   _hide_layer_and_module_methods()
 96 | 
 97 |   print(f"Writing docs to {output_dir}")
 98 | 
 99 |   doc_generator = generate_lib.DocGenerator(
100 |       root_title="TensorFlow Recommenders",
101 |       py_modules=[("tfrs", tfrs)],
102 |       code_url_prefix=code_url_prefix,
103 |       search_hints=search_hints,
104 |       site_path=site_path,
105 |       callbacks=[
106 |           public_api.local_definitions_filter,
107 |           public_api.explicit_package_contents_filter
108 |       ])
109 |   doc_generator.build(output_dir=output_dir)
110 | 
111 | 
112 | if __name__ == "__main__":
113 |   fire.Fire(build_api_docs, name="build_api_docs")
114 | 


--------------------------------------------------------------------------------
/tools/build_scripts/pip_install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e   # fail and exit on any command erroring
 5 | 
 6 | # Need to set these env vars
 7 | : "${TF_VERSION:?}"
 8 | : "${PY_VERSION:?}"
 9 | 
10 | # Import build functions.
11 | source ./tools/build_scripts/utils.sh
12 | 
13 | which python3.10
14 | python3.10 --version
15 | 
16 | # Install pip
17 | echo "Upgrading pip."
18 | python3.10 -m pip install --upgrade pip
19 | 
20 | # Install TensorFlow.
21 | echo "Installing TensorFlow..."
22 | python3.10 -m pip install tensorflow
23 | python3.10 -m pip install -q urllib3
24 | 
25 | # Install TensorFlow Recommenders.
26 | echo "Installing TensorFlow Recommenders..."
27 | python3.10 -m pip install -e .[docs]
28 | 
29 | # Test successful build.
30 | echo "Testing import..."
31 | python3.10 -c "import tensorflow_recommenders as tfrs"
32 | 
33 | echo "Done."
34 | 


--------------------------------------------------------------------------------
/tools/build_scripts/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e   # fail and exit on any command erroring
 4 | set -x
 5 | 
 6 | TF_VERSION="2.9.0"
 7 | 
 8 | GIT_COMMIT_ID=${1:-""}
 9 | [[ -z $GIT_COMMIT_ID ]] && echo "Must provide a commit." && exit 1
10 | SETUP_ARGS=""
11 | if [ "$GIT_COMMIT_ID" = "nightly" ]
12 | then
13 |   echo "Nightly version building currently not implemented."
14 |   exit 1
15 | fi
16 | 
17 | # Import build functions.
18 | source ./tools/build_scripts/utils.sh
19 | 
20 | which python3.10
21 | python3.10 --version
22 | 
23 | # Install PyPI-related packages.
24 | python3.10 -m pip install -q wheel twine pyopenssl
25 | 
26 | echo "Checking out commit $GIT_COMMIT_ID..."
27 | git checkout $GIT_COMMIT_ID
28 | 
29 | echo "Building source distribution..."
30 | 
31 | # Build the wheels
32 | python3.10 setup.py sdist $SETUP_ARGS
33 | python3.10 setup.py bdist_wheel $SETUP_ARGS
34 | 
35 | # Check setup.py.
36 | twine check dist/*
37 | 
38 | # Install and test the distribution
39 | echo "Running tests..."
40 | python3.10 -m pip install dist/*.whl
41 | python3.10 -m pip install scann
42 | python3.10 -m pip install pytest
43 | python3.10 -m pytest -v .
44 | 
45 | # Publish to PyPI
46 | read -p "Publish? (y/n) " -r
47 | echo
48 | if [[ $REPLY =~ ^[Yy]$ ]]
49 | then
50 |   echo "Publishing to PyPI."
51 |   twine upload dist/*
52 | else
53 |   echo "Skipping upload."
54 | fi
55 | 
56 | echo "Done."
57 | 


--------------------------------------------------------------------------------
/tools/build_scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e   # fail and exit on any command erroring
 4 | 
 5 | # Install.
 6 | source ./tools/build_scripts/pip_install.sh
 7 | 
 8 | # Install test dependencies.
 9 | python3.10 -m pip install pytest
10 | 
11 | # Run tests.
12 | python3.10 -m pytest -v .
13 | 


--------------------------------------------------------------------------------
/tools/build_scripts/utils.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function install_tf() {
 4 |   local version=$1
 5 |   if [[ "$version" == "tf-nightly"  ]]
 6 |   then
 7 |     pip install -q tf-nightly;
 8 |   else
 9 |     pip install -q "tensorflow==$version"
10 |   fi
11 |   pip install -q urllib3
12 | }
13 | 


--------------------------------------------------------------------------------