├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── allrank
    ├── __init__.py
    ├── click_models
    │   ├── __init__.py
    │   ├── base.py
    │   ├── cascade_models.py
    │   ├── click_utils.py
    │   └── duplicate_aware.py
    ├── config.py
    ├── config_template.json
    ├── data
    │   ├── __init__.py
    │   ├── dataset_loading.py
    │   ├── dataset_saving.py
    │   └── generate_dummy_data.py
    ├── inference
    │   ├── __init__.py
    │   └── inference_utils.py
    ├── main.py
    ├── models
    │   ├── __init__.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── approxNDCG.py
    │   │   ├── bce.py
    │   │   ├── binary_listNet.py
    │   │   ├── lambdaLoss.py
    │   │   ├── listMLE.py
    │   │   ├── listNet.py
    │   │   ├── loss_utils.py
    │   │   ├── neuralNDCG.py
    │   │   ├── ordinal.py
    │   │   ├── pointwise.py
    │   │   └── rankNet.py
    │   ├── metrics.py
    │   ├── model.py
    │   ├── model_utils.py
    │   ├── positional.py
    │   └── transformer.py
    ├── rank_and_click.py
    ├── training
    │   ├── __init__.py
    │   ├── early_stop.py
    │   └── train_utils.py
    └── utils
    │   ├── __init__.py
    │   ├── args_utils.py
    │   ├── command_executor.py
    │   ├── config_utils.py
    │   ├── experiments.py
    │   ├── file_utils.py
    │   ├── ltr_logging.py
    │   ├── python_utils.py
    │   └── tensorboard_utils.py
├── reproducibility
    ├── HOWTO.md
    ├── configs
    │   ├── contextaware_web30k
    │   │   ├── ndcgloss2pp.json
    │   │   ├── ndcgloss2pp_mlp.json
    │   │   ├── ordinal.json
    │   │   └── ordinal_mlp.json
    │   └── neuralndcg_web30k
    │   │   ├── approxndcg.json
    │   │   ├── lambdarank_atmax.json
    │   │   └── neuralndcg_atmax.json
    └── normalize_features.py
├── requirements.txt
├── scripts
    ├── ci.sh
    ├── local_config.json
    ├── local_config_click_model.json
    ├── run_example.sh
    ├── run_in_docker.sh
    ├── run_in_docker_click.sh
    └── run_tests.sh
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── click_models
        ├── __init__.py
        ├── test_alternative_click_models.py
        ├── test_apply_click_model.py
        ├── test_base_cascade_model.py
        ├── test_diverse_clicks_model.py
        ├── test_duplicate_click_model.py
        ├── test_feature_click_model.py
        ├── test_fixed_click_model.py
        ├── test_masked_click_model.py
        └── test_random_click_model.py
    ├── losses
        ├── __init__.py
        ├── test_approxndcg.py
        ├── test_binary_listnet.py
        ├── test_lambdaloss.py
        ├── test_listmle.py
        ├── test_listnet.py
        ├── test_loss_ordinal.py
        ├── test_loss_pointwise.py
        ├── test_mrr.py
        ├── test_ndcg.py
        ├── test_neuralndcg.py
        ├── test_ranknet.py
        └── utils.py
    └── test_rank_slates.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | .DS_Store
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | .DS_Store
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # PyInstaller
25 | #  Usually these files are written by a python script from a template
26 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
27 | *.manifest
28 | *.spec
29 | 
30 | # Installer logs
31 | pip-log.txt
32 | pip-delete-this-directory.txt
33 | 
34 | # Unit test / coverage reports
35 | htmlcov/
36 | .tox/
37 | .coverage
38 | .cache
39 | nosetests.xml
40 | coverage.xml
41 | .mypy_cache
42 | 
43 | # Translations
44 | *.mo
45 | *.pot
46 | 
47 | # Django stuff:
48 | *.log
49 | 
50 | # Sphinx documentation
51 | docs/_build/
52 | 
53 | *.sqlite3
54 | 
55 | # Vagrant
56 | vagrant/.vagrant
57 | vagrant/Vagrantfile.local
58 | 
59 | # MKDocs
60 | site/
61 | 
62 | # Static files
63 | bower_components/
64 | node_modules/
65 | 
66 | # Editors
67 | .idea/
68 | *~
69 | 
70 | # Project-specific files
71 | model_output
72 | allrank/config.json
73 | task-data
74 | dummy_data
75 | 
76 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG arch_version
 2 | 
 3 | FROM python:3.10 as base
 4 | 
 5 | MAINTAINER MLR <allrank@allegro.pl>
 6 | 
 7 | RUN mkdir /allrank
 8 | COPY requirements.txt setup.py Makefile README.md /allrank/
 9 | 
10 | RUN make -C /allrank install-reqs
11 | 
12 | WORKDIR /allrank
13 | 
14 | FROM base as CPU
15 | RUN python3 -m pip  install torchvision==0.14.1 torch==1.13.1  --extra-index-url https://download.pytorch.org/whl/cpu
16 | 
17 | FROM base as GPU
18 | RUN python3 -m pip  install torchvision==0.14.1 torch==1.13.1  
19 | 
20 | FROM ${arch_version} as FINAL
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2019 Allegro.pl sp. z o.o.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: ci
 2 | ci: lint tests wheel egg
 3 | 
 4 | .PHONY: lint
 5 | lint:
 6 | 	flake8 allrank
 7 | 	flake8 tests
 8 | 	mypy allrank --ignore-missing-imports  --check-untyped-defs
 9 | 	mypy tests --ignore-missing-imports --check-untyped-defs
10 | 
11 | .PHONY: install-reqs
12 | install-reqs:
13 | 	pip install -r requirements.txt
14 | 	python setup.py install
15 | 
16 | .PHONY: tests
17 | tests: install-reqs unittests
18 | 
19 | .PHONY: unittests
20 | unittests:
21 | 	python -m pytest
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # allRank : Learning to Rank in PyTorch
  2 | 
  3 | ## About
  4 | 
  5 | allRank is a PyTorch-based framework for training neural Learning-to-Rank (LTR) models, featuring implementations of:
  6 | * common pointwise, pairwise and listwise loss functions
  7 | * fully connected and Transformer-like scoring functions
  8 | * commonly used evaluation metrics like Normalized Discounted Cumulative Gain (NDCG) and Mean Reciprocal Rank (MRR)
  9 | * click-models for experiments on simulated click-through data
 10 | 
 11 | ### Motivation
 12 | 
 13 | allRank provides an easy and flexible way to experiment with various LTR neural network models and loss functions.
 14 | It is easy to add a custom loss, and to configure the model and the training procedure. 
 15 | We hope that allRank will facilitate both research in neural LTR and its industrial applications.
 16 | 
 17 | ## Features
 18 | 
 19 | ### Implemented loss functions:
 20 |  1. ListNet (for binary and graded relevance)
 21 |  2. ListMLE
 22 |  3. RankNet
 23 |  4. Ordinal loss
 24 |  5. LambdaRank
 25 |  6. LambdaLoss
 26 |  7. ApproxNDCG
 27 |  8. RMSE
 28 |  9. NeuralNDCG (introduced in https://arxiv.org/pdf/2102.07831)
 29 | 
 30 | ### Getting started guide
 31 | 
 32 | To help you get started, we provide a ```run_example.sh``` script which generates dummy ranking data in libsvm format and trains
 33 |  a Transformer model on the data using provided example ```config.json``` config file. Once you run the script, the dummy data can be found in `dummy_data` directory
 34 |  and the results of the experiment in `test_run` directory. To run the example, Docker is required.
 35 | 
 36 | ### Getting the right architecture version (GPU vs CPU-only)
 37 | 
 38 | Since torch binaries are different for GPU and CPU and GPU version doesn't work on CPU - one must select & build appropriate docker image version.
 39 | 
 40 | To do so pass `gpu` or `cpu` as `arch_version` build-arg in 
 41 | 
 42 | ```docker build --build-arg arch_version=${ARCH_VERSION}```
 43 | 
 44 | When calling `run_example.sh` you can select the proper version by a first cmd line argument e.g. 
 45 | 
 46 | ```run_example.sh gpu ...```
 47 | 
 48 | with `cpu` being the default if not specified.
 49 | 
 50 | ### Configuring your model & training
 51 | 
 52 | To train your own model, configure your experiment in ```config.json``` file and run  
 53 | 
 54 | ```python allrank/main.py --config_file_name allrank/config.json --run_id <the_name_of_your_experiment> --job_dir <the_place_to_save_results>```
 55 | 
 56 | All the hyperparameters of the training procedure: i.e. model defintion, data location, loss and metrics used, training hyperparametrs etc. are controlled
 57 | by the ```config.json``` file. We provide a template file ```config_template.json``` where supported attributes, their meaning and possible values are explained.
 58 |  Note that following MSLR-WEB30K convention, your libsvm file with training data should be named `train.txt`. You can specify the name of the validation dataset 
 59 |  (eg. valid or test) in the config. Results will be saved under the path ```<job_dir>/results/<run_id>```
 60 |  
 61 | Google Cloud Storage is supported in allRank as a place for data and job results.
 62 | 
 63 | 
 64 | ### Implementing custom loss functions
 65 | 
 66 | To experiment with your own custom loss, you need to implement a function that takes two tensors (model prediction and ground truth) as input
 67 |  and put it in the `losses` package, making sure it is exposed on a package level.
 68 | To use it in training, simply pass the name (and args, if your loss method has some hyperparameters) of your function in the correct place in the config file:
 69 | 
 70 | ```
 71 | "loss": {
 72 |     "name": "yourLoss",
 73 |     "args": {
 74 |         "arg1": val1,
 75 |         "arg2: val2
 76 |     }
 77 |   }
 78 | ```
 79 | 
 80 | ### Applying click-model
 81 | 
 82 | To apply a click model you need to first have an allRank model trained.
 83 | Next, run:
 84 | 
 85 | ```python allrank/rank_and_click.py --input-model-path <path_to_the_model_weights_file> --roles <comma_separated_list_of_ds_roles_to_process e.g. train,valid> --config_file_name allrank/config.json --run_id <the_name_of_your_experiment> --job_dir <the_place_to_save_results>``` 
 86 | 
 87 | The model will be used to rank all slates from the dataset specified in config. Next - a click model configured in config will be applied and the resulting click-through dataset will be written under ```<job_dir>/results/<run_id>``` in a libSVM format.
 88 | The path to the results directory may then be used as an input for another allRank model training.
 89 | 
 90 | ## Continuous integration
 91 | 
 92 | You should run `scripts/ci.sh` to verify that code passes style guidelines and unit tests.
 93 | 
 94 | ## Research
 95 | 
 96 | This framework was developed to support the research project [Context-Aware Learning to Rank with Self-Attention](https://arxiv.org/abs/2005.10084). If you use allRank in your research, please cite:
 97 | ```
 98 | @article{Pobrotyn2020ContextAwareLT,
 99 |   title={Context-Aware Learning to Rank with Self-Attention},
100 |   author={Przemyslaw Pobrotyn and Tomasz Bartczak and Mikolaj Synowiec and Radoslaw Bialobrzeski and Jaroslaw Bojar},
101 |   journal={ArXiv},
102 |   year={2020},
103 |   volume={abs/2005.10084}
104 | }
105 | ```
106 | Additionally, if you use the NeuralNDCG loss function, please cite the corresponding work, [NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting](https://arxiv.org/abs/2102.07831):
107 | ```
108 | @article{Pobrotyn2021NeuralNDCG,
109 |   title={NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting},
110 |   author={Przemyslaw Pobrotyn and Radoslaw Bialobrzeski},
111 |   journal={ArXiv},
112 |   year={2021},
113 |   volume={abs/2102.07831}
114 | }
115 | ```
116 | 
117 | ## License
118 | 
119 | Apache 2 License
120 | 


--------------------------------------------------------------------------------
/allrank/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/__init__.py


--------------------------------------------------------------------------------
/allrank/click_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/click_models/__init__.py


--------------------------------------------------------------------------------
/allrank/click_models/base.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from abc import ABC, abstractmethod
  3 | from typing import List, Tuple, Callable
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | 
  9 | class ClickModel(ABC):
 10 |     """
 11 |     Base class for all click models. Specifies the click model contract
 12 |     """
 13 | 
 14 |     @abstractmethod
 15 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
 16 |         """
 17 |         Applies a click model and returns the mask for documents.
 18 | 
 19 |         :rtype: np.ndarray [ number_of_documents ] -> a mask of the same length as the documents -
 20 |         defining whether a document was clicked (1), not clicked (0) or is a padded element (-1)
 21 | 
 22 |         :param documents: Tuple of :
 23 |            torch.Tensor [ number_of_documents, dimensionality_of_latent_vector ], representing features of documents
 24 |            torch.Tensor [ number_of_documents ] representing relevancy of documents
 25 |         """
 26 |         pass
 27 | 
 28 | 
 29 | class RandomClickModel(ClickModel):
 30 |     """
 31 |     This ClickModel clicks a configured number of times on random documents
 32 |     """
 33 | 
 34 |     def __init__(self, n_clicks: int):
 35 |         """
 36 | 
 37 |         :param n_clicks: number of documents that will be clicked
 38 |         """
 39 |         self.n_clicks = n_clicks
 40 | 
 41 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
 42 |         X, y = documents
 43 |         clicks = np.random.choice(range(len(y)), size=self.n_clicks, replace=False)
 44 |         mask = np.zeros(len(y), dtype=bool)
 45 |         mask[clicks] = 1
 46 |         return mask
 47 | 
 48 | 
 49 | class FixedClickModel(ClickModel):
 50 |     """
 51 |     This ClickModel clicks on documents at fixed positions
 52 |     """
 53 | 
 54 |     def __init__(self, click_positions: List[int]):
 55 |         """
 56 | 
 57 |         :param click_positions: list of indices of documents that will be clicked
 58 |         """
 59 |         self.click_positions = click_positions
 60 | 
 61 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
 62 |         X, y = documents
 63 |         clicks = np.zeros(len(y), dtype=bool)
 64 |         clicks[self.click_positions] = 1
 65 |         return clicks
 66 | 
 67 | 
 68 | class MultipleClickModel(ClickModel):
 69 |     """
 70 |     This click model uses one of given click models with given probability
 71 |     """
 72 | 
 73 |     def __init__(self, inner_click_models: List[ClickModel], probabilities: List[float]):
 74 |         """
 75 | 
 76 |         :param inner_click_models: list of click models to choose from
 77 |         :param probabilities: list of probabilities - must be of the same length as list of click models and sum to 1.0
 78 |         """
 79 |         self.inner_click_models = inner_click_models
 80 |         assert math.isclose(np.sum(probabilities), 1.0, abs_tol=1e-5), \
 81 |             f"probabilities should sum to one, but got {probabilities} which sums to {np.sum(probabilities)}"
 82 |         self.probabilities = np.array(probabilities).cumsum()
 83 | 
 84 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
 85 |         index = np.argmax(np.random.rand() < self.probabilities)
 86 |         result = self.inner_click_models[index].click(documents)  # type: ignore
 87 |         return result
 88 | 
 89 | 
 90 | class ConditionedClickModel(ClickModel):
 91 |     """
 92 |     This click model allows to combine multiple click models with a logical funciton
 93 |     """
 94 | 
 95 |     def __init__(self, inner_click_models: List[ClickModel], combiner: Callable):
 96 |         """
 97 | 
 98 |         :param inner_click_models: list of click models to combine
 99 |         :param combiner: a function applied to the result of clicks from click models - e.g. np.all or np.any
100 |         """
101 |         self.inner_click_models = inner_click_models
102 |         self.combiner = combiner
103 | 
104 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
105 |         clicks_from_click_models = [click_model.click(documents) for click_model in self.inner_click_models]
106 |         return self.combiner(clicks_from_click_models, 0)
107 | 
108 | 
109 | class MaxClicksModel(ClickModel):
110 |     """
111 |     This click model takes other click model and limits the number of clicks to given value
112 |     effectively keeping top `max_clicks` clicks
113 |     """
114 | 
115 |     def __init__(self, inner_click_model: ClickModel, max_clicks: int):
116 |         """
117 | 
118 |         :param inner_click_model: a click model to generate clicks
119 |         :param max_clicks: number of clicks that should be preserved
120 |         """
121 |         self.inner_click_model = inner_click_model
122 |         self.max_clicks = max_clicks
123 | 
124 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
125 |         underlying_clicks = self.inner_click_model.click(documents)
126 |         if self.max_clicks is not None:
127 |             max_clicks_mask = underlying_clicks.cumsum() <= self.max_clicks
128 |             return underlying_clicks * max_clicks_mask
129 |         return underlying_clicks
130 | 
131 | 
132 | class OnlyRelevantClickModel(ClickModel):
133 |     """
134 |     This ClickModel clicks on a document when its relevancy is greater that or equal to a predefined value
135 | 
136 |     """
137 | 
138 |     def __init__(self, relevancy_threshold: float):
139 |         """
140 |         :param relevancy_threshold: a minimum value of relevancy of a document to be clicked (inclusive)
141 |         """
142 |         self.relevancy_threshold = relevancy_threshold
143 | 
144 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
145 |         X, y = documents
146 |         return np.array(y) >= self.relevancy_threshold
147 | 


--------------------------------------------------------------------------------
/allrank/click_models/cascade_models.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from scipy.spatial.distance import cdist
 6 | 
 7 | from allrank.click_models.base import ClickModel
 8 | from allrank.click_models.duplicate_aware import EverythingButDuplicatesClickModel
 9 | from allrank.data.dataset_loading import PADDED_Y_VALUE
10 | 
11 | 
12 | class BaseCascadeModel(ClickModel):
13 |     """
14 |     This ClickModel simulates decaying probability of observing an item
15 |     and clicks on an observed item given it's relevance is greater than or equal to a given threshold
16 | 
17 |     """
18 | 
19 |     def __init__(self, eta: float, threshold: float):
20 |         """
21 | 
22 |         :param eta: the power to be applied over a result of a decay function (specified as 1/position)
23 |                     to decide whether a document was observed
24 |         :param threshold: a minimum value of relevancy of an observed document to be clicked (inclusive)
25 |         """
26 |         self.eta = eta
27 |         self.threshold = threshold
28 | 
29 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
30 |         X, y = documents
31 |         observed_mask = (1 / np.arange(1, len(y) + 1) ** self.eta) >= np.random.rand(len(y))
32 |         return (y * observed_mask >= self.threshold).numpy()
33 | 
34 | 
35 | class DiverseClicksModel(ClickModel):
36 |     """
37 |     A 'diverse-clicks' model from Seq2Slate paper https://arxiv.org/abs/1810.02019
38 |     It clicks on documents from top to the bottom if:
39 |       1. a delegate click model decides to click on the document (in the original paper - CascadeModel
40 |       2. it is no closer than a defined percentile of distances to a previously clicked document
41 |     """
42 | 
43 |     def __init__(self, inner_click_model, q_percentile=0.5):
44 |         """
45 | 
46 |         :param inner_click_model: original, non-diversified click model
47 |         :param q_percentile: a percentile of pairwise distances that will be used as a distance threshold to tell if a pair is a duplicate
48 |         """
49 |         self.inner_click_model = inner_click_model
50 |         self.q_percentile = q_percentile
51 | 
52 |     def __pairwise_distances_list(self, X):
53 |         dist = cdist(X, X, metric='euclidean')
54 |         triu_indices = np.triu_indices(dist.shape[0] - 1)
55 |         return dist[:-1, 1:][triu_indices]
56 | 
57 |     def click(self, documents: Tuple[torch.Tensor, torch.Tensor]) -> np.ndarray:
58 |         X, y = documents
59 | 
60 |         real_docs_mask = (y != PADDED_Y_VALUE)
61 |         real_X = X[real_docs_mask, :]
62 | 
63 |         distances = self.__pairwise_distances_list(real_X)
64 |         if len(distances) == 0:
65 |             duplicate_margin = 0
66 |         else:
67 |             duplicate_margin = np.quantile(distances, q=self.q_percentile)
68 | 
69 |         def not_similar(x_vec, clicked_X):
70 |             cX = clicked_X.copy()
71 |             cX.append(x_vec)
72 |             cX = torch.stack(cX, dim=0)
73 |             cm = EverythingButDuplicatesClickModel(duplicate_margin)
74 |             clicks = cm.click((cX, np.ones(len(cX))))
75 |             last_element_clicked = clicks[-1]
76 |             return last_element_clicked == 1
77 | 
78 |         relevant_for_click = self.inner_click_model.click(documents)
79 | 
80 |         clicked_Xs = []  # type: ignore
81 |         indices_to_click = np.argwhere(relevant_for_click == 1)
82 |         for idx_to_click in indices_to_click:
83 |             idx_to_click = idx_to_click[0]
84 |             X_to_click = X[idx_to_click]
85 |             if not_similar(X_to_click, clicked_Xs):
86 |                 clicked_Xs.append(X_to_click)
87 |             else:
88 |                 relevant_for_click[idx_to_click] = 0
89 | 
90 |         return relevant_for_click
91 | 


--------------------------------------------------------------------------------
/allrank/click_models/click_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple, Union
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from allrank.click_models.base import ClickModel
 7 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 8 | 
 9 | 
10 | def click_on_slates(slates: Union[Tuple[np.ndarray, np.ndarray], Tuple[torch.Tensor, torch.Tensor]],
11 |                     click_model: ClickModel, include_empty: bool) -> Tuple[List[Union[np.ndarray, torch.Tensor]], List[List[int]]]:
12 |     """
13 |     This metod runs a click model on a list of slates and returns new slates with `y` taken from clicks
14 | 
15 |     :param slates: a Tuple of X, y:
16 |         X being a list of slates represented by document vectors
17 |         y being a list of slates represented by document relevancies
18 |     :param click_model: a click model to be applied to every slate
19 |     :param include_empty: if True - will return even slates that didn't get any click
20 |     :return: Tuple of X, clicks, X representing the same document vectors as input 'X', clicks representing click mask for every slate
21 |     """
22 |     X, y = slates
23 |     clicks = [MaskedRemainMasked(click_model).click(slate) for slate in zip(X, y)]
24 |     X_with_clicks = [[X, slate_clicks] for X, slate_clicks in list(zip(X, clicks)) if
25 |                      (np.sum(slate_clicks > 0) > 0 or include_empty)]
26 |     return_X, clicks = map(list, zip(*X_with_clicks))  # type: ignore
27 |     return return_X, clicks  # type: ignore
28 | 
29 | 
30 | class MaskedRemainMasked(ClickModel):
31 |     """
32 |     This click model wraps another click model and:
33 |       1. ensures inner click model do not get documents that were padded
34 |       2. ensures padded documents get '-1' in 'clicked' vector
35 |     """
36 | 
37 |     def __init__(self, inner_click_model: ClickModel):
38 |         """
39 | 
40 |         :param inner_click_model: a click model that is run on the list of non-padded documents
41 |         """
42 |         self.inner_click_model = inner_click_model
43 | 
44 |     def click(self, documents: Union[Tuple[np.ndarray, np.ndarray], Tuple[torch.Tensor, torch.Tensor]]) -> np.ndarray:
45 |         X, y = documents
46 |         padded_values_mask = y == PADDED_Y_VALUE
47 |         real_X = X[~padded_values_mask]
48 |         real_y = y[~padded_values_mask]
49 |         clicks = self.inner_click_model.click((real_X, real_y))
50 |         final_clicks = np.zeros_like(y)
51 |         final_clicks[padded_values_mask] = PADDED_Y_VALUE
52 |         final_clicks[~padded_values_mask] = clicks
53 |         return final_clicks
54 | 


--------------------------------------------------------------------------------
/allrank/click_models/duplicate_aware.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Union
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from scipy.spatial.distance import cdist
 6 | 
 7 | from allrank.click_models.base import ClickModel
 8 | 
 9 | 
10 | class EverythingButDuplicatesClickModel(ClickModel):
11 |     """
12 |     This ClickModel clicks on every document, which was not previously clicked,
13 |     if the distance between this document and any previous is larger than given margin in given metric
14 |     """
15 | 
16 |     def __init__(self, duplicate_margin: float = 0, metric: str = "euclidean"):
17 |         """
18 | 
19 |         :param duplicate_margin: a margin to tell whether a pair of documents is treated as a duplicate.
20 |             If the distance is less than or equal to this value - this marks a duplicate
21 |         :param metric: a metric in which pairwise distances are calculated
22 |             (metric must be supported by `scipy.spatial.distance.cdist`)
23 |         """
24 |         self.duplicate_margin = duplicate_margin
25 |         self.metric = metric
26 | 
27 |     def click(self, documents: Tuple[torch.Tensor, Union[torch.Tensor, np.ndarray]]) -> np.ndarray:
28 |         X, y = documents
29 |         dist = cdist(X, X, metric=self.metric)
30 |         dist = np.triu(dist, k=1)
31 |         np.fill_diagonal(dist, np.inf)
32 |         indices = np.tril_indices(dist.shape[0])
33 |         dist[indices] = np.inf
34 |         return 1 * (dist > self.duplicate_margin).min(0)
35 | 


--------------------------------------------------------------------------------
/allrank/config.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from collections import defaultdict
  3 | from typing import Dict, List, Optional
  4 | 
  5 | from attr import attrib, attrs
  6 | 
  7 | 
  8 | @attrs
  9 | class TransformerConfig:
 10 |     N = attrib(type=int)
 11 |     d_ff = attrib(type=int)
 12 |     h = attrib(type=int)
 13 |     positional_encoding = attrib(type=dict)
 14 |     dropout = attrib(type=float)
 15 | 
 16 | 
 17 | @attrs
 18 | class FCConfig:
 19 |     sizes = attrib(type=List[int])
 20 |     input_norm = attrib(type=bool)
 21 |     activation = attrib(type=str)
 22 |     dropout = attrib(type=float)
 23 | 
 24 | 
 25 | @attrs
 26 | class PostModelConfig:
 27 |     d_output = attrib(type=int)
 28 |     output_activation = attrib(type=str)
 29 | 
 30 | 
 31 | @attrs
 32 | class ModelConfig:
 33 |     fc_model = attrib(type=FCConfig)
 34 |     transformer = attrib(type=TransformerConfig)
 35 |     post_model = attrib(type=PostModelConfig)
 36 | 
 37 | 
 38 | @attrs
 39 | class PositionalEncoding:
 40 |     strategy = attrib(type=str)
 41 |     max_indices = attrib(type=int)
 42 | 
 43 | 
 44 | @attrs
 45 | class DataConfig:
 46 |     path = attrib(type=str)
 47 |     num_workers = attrib(type=int)
 48 |     batch_size = attrib(type=int)
 49 |     slate_length = attrib(type=int)
 50 |     validation_ds_role = attrib(type=str)
 51 | 
 52 | 
 53 | @attrs
 54 | class TrainingConfig:
 55 |     epochs = attrib(type=int)
 56 |     gradient_clipping_norm = attrib(type=float)
 57 |     early_stopping_patience = attrib(type=int, default=0)
 58 | 
 59 | 
 60 | @attrs
 61 | class NameArgsConfig:
 62 |     name = attrib(type=str)
 63 |     args = attrib(type=dict)
 64 | 
 65 | 
 66 | @attrs
 67 | class Config:
 68 |     model = attrib(type=ModelConfig)
 69 |     data = attrib(type=DataConfig)
 70 |     optimizer = attrib(type=NameArgsConfig)
 71 |     training = attrib(type=TrainingConfig)
 72 |     loss = attrib(type=NameArgsConfig)
 73 |     metrics = attrib(type=Dict[str, List[int]])
 74 |     lr_scheduler = attrib(type=NameArgsConfig)
 75 |     val_metric = attrib(type=str, default=None)
 76 |     expected_metrics = attrib(type=Dict[str, Dict[str, float]], default={})
 77 |     detect_anomaly = attrib(type=bool, default=False)
 78 |     click_model = attrib(type=Optional[NameArgsConfig], default=None)
 79 | 
 80 |     @classmethod
 81 |     def from_json(cls, config_path):
 82 |         with open(config_path) as config_file:
 83 |             config = json.load(config_file)
 84 |             return Config.from_dict(config)
 85 | 
 86 |     @classmethod
 87 |     def from_dict(cls, config):
 88 |         config["model"] = ModelConfig(**config["model"])
 89 |         if config["model"].transformer:
 90 |             config["model"].transformer = TransformerConfig(**config["model"].transformer)
 91 |             if config["model"].transformer.positional_encoding:
 92 |                 config["model"].transformer.positional_encoding = PositionalEncoding(
 93 |                     **config["model"].transformer.positional_encoding)
 94 |         config["data"] = DataConfig(**config["data"])
 95 |         config["optimizer"] = NameArgsConfig(**config["optimizer"])
 96 |         config["training"] = TrainingConfig(**config["training"])
 97 |         config["metrics"] = cls._parse_metrics(config["metrics"])
 98 |         config["lr_scheduler"] = NameArgsConfig(**config["lr_scheduler"])
 99 |         config["loss"] = NameArgsConfig(**config["loss"])
100 |         if "click_model" in config.keys():
101 |             config["click_model"] = NameArgsConfig(**config["click_model"])
102 |         return cls(**config)
103 | 
104 |     @staticmethod
105 |     def _parse_metrics(metrics):
106 |         metrics_dict = defaultdict(list)  # type: Dict[str, list]
107 |         for metric_string in metrics:
108 |             try:
109 |                 name, at = metric_string.split("_")
110 |                 metrics_dict[name].append(int(at))
111 |             except (ValueError, TypeError):
112 |                 raise MetricConfigError(
113 |                     metric_string,
114 |                     "Wrong formatting of metric in config. Expected format: <name>_<at> where name is valid metric name and at is and int")
115 |         return metrics_dict
116 | 
117 | 
118 | class MetricConfigError(Exception):
119 |     pass
120 | 


--------------------------------------------------------------------------------
/allrank/config_template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model":
 3 |   {
 4 |     "fc_model":
 5 |     {
 6 |       "sizes": <array of ints, dimmensions of hidden layers of a fully connected network applied to the input>,
 7 |       "input_norm": <boolean, whether to to apply LayerNorm to input>
 8 |       "activation": <Optional[str], name of the activation funtion to be used between hidden layers, as in torch.nn.modules.activation>
 9 |       "dropout": <float, dropout probability>
10 |     },
11 |     "transformer":
12 |     {
13 |       "N": <int, number of transformer blocks>
14 |       "d_ff": <int, hidden dimmension>
15 |       "h": <int, number of transformer heads>
16 |       "positional_encoding": <Optional[dict] with two keys, strategy (either "fixed" or "learned") and max_indices, int>
17 |       "dropout": <float, dropout probability
18 |     },
19 |     "post_model":
20 |     {
21 |       "output_activation": <Optional[str], name of the activation funtion to be used on the output, as in torch.nn.modules.activation>
22 |       "d_output": <int, number of output neurons>
23 |     }
24 |   },
25 |   "data":
26 |   {
27 |     "path": <str, path to dataset dir>,
28 |     "validation_ds_role": <str, name of the validation dataset to be used, excluding .txt extension>,
29 |     "num_workers": <int, num workers in the data loaders>
30 |     "batch_size": <int, batch size>
31 |     "slate_length": <int, slate length used in training, slates will be subsampled or padded to match that length>
32 |   },
33 |   "optimizer":
34 |   {
35 |     "name": <str, name of the optimizer as in torch.optim>
36 |     "args": <dict, arguments of the optimizer>
37 |   },
38 |   "lr_scheduler":
39 |   {
40 |     "name": <str, name of the LR scheduler as in torch.optim.lr_scheduler>
41 |     "args": <dict, arguments of the LR scheduler>
42 |   },
43 |   "training":
44 |   {
45 |     "epochs": <int, num epochs>
46 |     "early_stopping_patience": <int, number of epochs for early stopping>
47 |     "gradient_clipping_norm": <Optional[float]
48 |   },
49 |   "metrics": <list of metrics for evaluation of the form name_k where name is the name of the metric
50 |               as defined in models/metrics.py and k is the rank used for evaluation>,
51 | "loss":
52 |   {
53 |     "name": <str, name of the loss to be used, as defined in models/losses
54 |     "args": <dict, arguments of the loss function, empty if no arguments are needed>
55 |   },
56 | "val_metric": <str, name of the metric to be monitored in early stopping>,
57 | "detect_anomaly": <boolean, whether to use pytorch's detect anomaly feature>
58 | "expected_metrics" :
59 |   {
60 |   "val": <dict, keys are metric names are values are their expected values,
61 |           useful for seeing if changes to your code degrade model performance of previously trained model>
62 |   },
63 |  "click_model":  <this is an optional section, used when running 'rank_and_click' script.
64 |                  it defines the implementation of the click model to use.
65 |                  For the list of click_models see allrank.click_models package
66 |     "name": <str, fully qualified name of a class to instantiate, e.g. `allrank.click_models.cascade_models.BaseCascadeModel`
67 |     "args": <dict, arguments of the click_model, empty if no arguments are needed, can contain nested {"name":..., "args":...}
68 |                    for nested object construction>
69 |   }
70 | }


--------------------------------------------------------------------------------
/allrank/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/data/__init__.py


--------------------------------------------------------------------------------
/allrank/data/dataset_loading.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Tuple
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from sklearn.datasets import load_svmlight_file
  7 | from torch.utils.data import DataLoader, Dataset
  8 | from torchvision import transforms
  9 | from torchvision.transforms import Compose
 10 | 
 11 | from allrank.utils.file_utils import open_local_or_gs
 12 | from allrank.utils.ltr_logging import get_logger
 13 | 
 14 | logger = get_logger()
 15 | PADDED_Y_VALUE = -1
 16 | PADDED_INDEX_VALUE = -1
 17 | 
 18 | 
 19 | class ToTensor(object):
 20 |     """
 21 |     Wrapper for ndarray->Tensor conversion.
 22 |     """
 23 |     def __call__(self, sample):
 24 |         """
 25 |         :param sample: tuple of three ndarrays
 26 |         :return: ndarrays converted to tensors
 27 |         """
 28 |         x, y, indices = sample
 29 |         return torch.from_numpy(x).type(torch.float32), torch.from_numpy(y).type(torch.float32), torch.from_numpy(indices).type(torch.long)
 30 | 
 31 | 
 32 | class FixLength(object):
 33 |     """
 34 |     Wrapper for slate transformation to fix its length, either by zero padding or sampling.
 35 | 
 36 |     For a given slate, if its length is less than self.dim_given, x's and y's are padded with zeros to match that length.
 37 |     If its length is greater than self.dim_given, a random sample of items from that slate is taken to match the self.dim_given.
 38 |     """
 39 |     def __init__(self, dim_given):
 40 |         """
 41 |         :param dim_given: dimensionality of x after length fixing operation
 42 |         """
 43 |         assert isinstance(dim_given, int)
 44 |         self.dim_given = dim_given
 45 | 
 46 |     def __call__(self, sample):
 47 |         """
 48 |         :param sample: ndarrays tuple containing features, labels and original ranks of shapes
 49 |         [sample_length, features_dim], [sample_length] and [sample_length], respectively
 50 |         :return: ndarrays tuple containing features, labels and original ranks of shapes
 51 |             [self.dim_given, features_dim], [self.dim_given] and [self.dim_given], respectively
 52 |         """
 53 |         sample_size = len(sample[1])
 54 |         if sample_size < self.dim_given:  # when expected dimension is larger than number of observation in instance do the padding
 55 |             fixed_len_x, fixed_len_y, indices = self._pad(sample, sample_size)
 56 |         else:  # otherwise do the sampling
 57 |             fixed_len_x, fixed_len_y, indices = self._sample(sample, sample_size)
 58 | 
 59 |         return fixed_len_x, fixed_len_y, indices
 60 | 
 61 |     def _sample(self, sample, sample_size):
 62 |         """
 63 |         Sampling from a slate longer than self.dim_given.
 64 |         :param sample: ndarrays tuple containing features, labels and original ranks of shapes
 65 |             [sample_length, features_dim], [sample_length] and [sample_length], respectively
 66 |         :param sample_size: target slate length
 67 |         :return: ndarrays tuple containing features, labels and original ranks of shapes
 68 |             [sample_size, features_dim], [sample_size] and [sample_size]
 69 |         """
 70 |         indices = np.random.choice(sample_size, self.dim_given, replace=False)
 71 |         fixed_len_y = sample[1][indices]
 72 |         if fixed_len_y.sum() == 0:
 73 |             if sample[1].sum() == 1:
 74 |                 indices = np.concatenate([np.random.choice(indices, self.dim_given - 1, replace=False), [np.argmax(sample[1])]])
 75 |                 fixed_len_y = sample[1][indices]
 76 |             elif sample[1].sum() > 0:
 77 |                 return self._sample(sample, sample_size)
 78 |         fixed_len_x = sample[0][indices]
 79 |         return fixed_len_x, fixed_len_y, indices
 80 | 
 81 |     def _pad(self, sample, sample_size):
 82 |         """
 83 |         Zero padding a slate shorter than self.dim_given
 84 |         :param sample: ndarrays tuple containing features, labels and original ranks of shapes
 85 |             [sample_length, features_dim], [sample_length] and [sample_length]
 86 |         :param sample_size: target slate length
 87 |         :return: ndarrays tuple containing features, labels and original ranks of shapes
 88 |             [sample_size, features_dim], [sample_size] and [sample_size]
 89 |         """
 90 |         fixed_len_x = np.pad(sample[0], ((0, self.dim_given - sample_size), (0, 0)), "constant")
 91 |         fixed_len_y = np.pad(sample[1], (0, self.dim_given - sample_size), "constant", constant_values=PADDED_Y_VALUE)
 92 |         indices = np.pad(np.arange(0, sample_size), (0, self.dim_given - sample_size), "constant", constant_values=PADDED_INDEX_VALUE)
 93 |         return fixed_len_x, fixed_len_y, indices
 94 | 
 95 | 
 96 | class LibSVMDataset(Dataset):
 97 |     """
 98 |     LibSVM Learning to Rank dataset.
 99 |     """
100 |     def __init__(self, X, y, query_ids, transform=None):
101 |         """
102 |         :param X: scipy sparse matrix containing features of the dataset of shape [dataset_size, features_dim]
103 |         :param y: ndarray containing target labels of shape [dataset_size]
104 |         :param query_ids: ndarray containing group (slate) membership of dataset items of shape [dataset_size, features_dim]
105 |         :param transform: a callable defining an optional transformation called on the dataset
106 |         """
107 |         X = X.toarray()
108 | 
109 |         _, indices, counts = np.unique(query_ids, return_index=True, return_counts=True)
110 |         groups = np.cumsum(counts[np.argsort(indices)])
111 | 
112 |         self.X_by_qid = np.split(X, groups)[:-1]
113 |         self.y_by_qid = np.split(y, groups)[:-1]
114 | 
115 |         self.longest_query_length = max([len(a) for a in self.X_by_qid])
116 | 
117 |         logger.info("loaded dataset with {} queries".format(len(self.X_by_qid)))
118 |         logger.info("longest query had {} documents".format(self.longest_query_length))
119 | 
120 |         self.transform = transform
121 | 
122 |     @classmethod
123 |     def from_svm_file(cls, svm_file_path, transform=None):
124 |         """
125 |         Instantiate a LibSVMDataset from a LibSVM file path.
126 |         :param svm_file_path: LibSVM file path
127 |         :param transform: a callable defining an optional transformation called on the dataset
128 |         :return: LibSVMDataset instantiated from a given file and with an optional transformation defined
129 |         """
130 |         x, y, query_ids = load_svmlight_file(svm_file_path, query_id=True)
131 |         logger.info("loaded dataset from {} and got x shape {}, y shape {} and query_ids shape {}".format(
132 |             svm_file_path, x.shape, y.shape, query_ids.shape))
133 |         return cls(x, y, query_ids, transform)
134 | 
135 |     def __len__(self):
136 |         """
137 |         :return: number of groups (slates) in the dataset
138 |         """
139 |         return len(self.X_by_qid)
140 | 
141 |     def __getitem__(self, idx):
142 |         """
143 |         :param idx: index of a group
144 |         :return: ndarrays tuple containing features and labels of shapes [slate_length, features_dim] and [slate_length], respectively
145 |         """
146 |         X = self.X_by_qid[idx]
147 |         y = self.y_by_qid[idx]
148 | 
149 |         sample = X, y
150 | 
151 |         if self.transform:
152 |             sample = self.transform(sample)
153 | 
154 |         return sample
155 | 
156 |     @property
157 |     def shape(self):
158 |         """
159 |         :return: shape of the dataset [batch_dim, document_dim, features_dim] where batch_dim is the number of groups
160 |             (slates) and document_dim is the length of the longest group
161 |         """
162 |         batch_dim = len(self)
163 |         document_dim = self.longest_query_length
164 |         features_dim = self[0][0].shape[-1]
165 |         return [batch_dim, document_dim, features_dim]
166 | 
167 | 
168 | def load_libsvm_role(input_path: str, role: str) -> LibSVMDataset:
169 |     """
170 |     Helper function loading a LibSVMDataset of a specific role.
171 | 
172 |     The file can be located either in the local filesystem or in GCS.
173 |     :param input_path: LibSVM file directory
174 |     :param role: dataset role (file name without an extension)
175 |     :return: LibSVMDataset from file {input_path}/{role}.txt
176 |     """
177 |     path = os.path.join(input_path, "{}.txt".format(role))
178 |     logger.info("will load {} data from {}".format(role, path))
179 |     with open_local_or_gs(path, "rb") as input_stream:
180 |         ds = LibSVMDataset.from_svm_file(input_stream)
181 |     logger.info("{} DS shape: {}".format(role, ds.shape))
182 |     return ds
183 | 
184 | 
185 | def fix_length_to_longest_slate(ds: LibSVMDataset) -> Compose:
186 |     """
187 |     Helper function returning a transforms.Compose object performing length fixing and tensor conversion.
188 | 
189 |     Length fixing operation will fix every slate's length to maximum length present in the LibSVMDataset.
190 |     :param ds: LibSVMDataset to transform
191 |     :return: transforms.Compose object
192 |     """
193 |     logger.info("Will pad to the longest slate: {}".format(ds.longest_query_length))
194 |     return transforms.Compose([FixLength(int(ds.longest_query_length)), ToTensor()])
195 | 
196 | 
197 | def load_libsvm_dataset(input_path: str, slate_length: int, validation_ds_role: str) \
198 |         -> Tuple[LibSVMDataset, LibSVMDataset]:
199 |     """
200 |     Helper function loading a train LibSVMDataset and a specified validation LibSVMDataset.
201 |     :param input_path: directory containing the LibSVM files
202 |     :param slate_length: target slate length of the training dataset
203 |     :param validation_ds_role: dataset role used for valdation (file name without an extension)
204 |     :return: tuple of LibSVMDatasets containing train and validation datasets,
205 |         where train slates are padded to slate_length and validation slates to val_ds.longest_query_length
206 |     """
207 |     train_ds = load_libsvm_dataset_role("train", input_path, slate_length)
208 | 
209 |     val_ds = load_libsvm_dataset_role(validation_ds_role, input_path, slate_length)
210 | 
211 |     return train_ds, val_ds
212 | 
213 | 
214 | def load_libsvm_dataset_role(role: str, input_path: str, slate_length: int) -> LibSVMDataset:
215 |     """
216 |     Helper function loading a single role LibSVMDataset
217 |     :param role: the role of the dataset - specifies file name and padding behaviour
218 |     :param input_path: directory containing the LibSVM files
219 |     :param slate_length: target slate length of the training dataset
220 |     :return: loaded LibSVMDataset
221 |     """
222 |     ds = load_libsvm_role(input_path, role)
223 |     if role == "train":
224 |         ds.transform = transforms.Compose([FixLength(slate_length), ToTensor()])
225 |     else:
226 |         ds.transform = fix_length_to_longest_slate(ds)
227 |     return ds
228 | 
229 | 
230 | def create_data_loaders(train_ds: LibSVMDataset, val_ds: LibSVMDataset, num_workers: int, batch_size: int):
231 |     """
232 |     Helper function creating train and validation data loaders with specified number of workers and batch sizes.
233 |     :param train_ds: LibSVMDataset train dataset
234 |     :param val_ds: LibSVMDataset validation dataset
235 |     :param num_workers: number of data loader workers
236 |     :param batch_size: size of the batches returned by the data loaders
237 |     :return: tuple containing train and validation DataLoader objects
238 |     """
239 |     # We are multiplying the batch size by the processing units count
240 |     gpu_count = torch.cuda.device_count()
241 |     total_batch_size = max(1, gpu_count) * batch_size
242 |     logger.info("total batch size is {}".format(total_batch_size))
243 | 
244 |     # Please note that the batch size for validation dataloader is twice the total_batch_size
245 |     train_dl = DataLoader(train_ds, batch_size=total_batch_size, num_workers=num_workers, shuffle=True)
246 |     val_dl = DataLoader(val_ds, batch_size=total_batch_size, num_workers=num_workers, shuffle=False)
247 |     return train_dl, val_dl
248 | 


--------------------------------------------------------------------------------
/allrank/data/dataset_saving.py:
--------------------------------------------------------------------------------
 1 | from typing import Iterable
 2 | 
 3 | import numpy as np
 4 | from sklearn.datasets import dump_svmlight_file
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | 
 8 | 
 9 | def write_to_libsvm_without_masked(path: str, X: Iterable[np.ndarray], y: Iterable[np.ndarray]) -> None:
10 |     """
11 |     This function writes given X's and y's in svmlight / libsvm file format.
12 |     It supports padded documents - they are removed from the written dataset.
13 |     Slates are identified by a 'qid' column within the file.
14 | 
15 |     :param path: a path to save libsvm file
16 |     :param X: Iterable of lists of document vectors
17 |     :param y: Iterable of lists of document relevancies
18 |     """
19 |     Xs = []
20 |     ys = []
21 |     qids = []
22 |     qid = 0
23 |     for X, y in zip(X, y):
24 |         mask = y != PADDED_Y_VALUE
25 |         Xs.append(X[mask])  # type:ignore
26 |         ys.append(y[mask])  # type:ignore
27 |         qids.append(np.repeat(qid, len(y[mask])))  # type:ignore
28 |         qid += 1
29 |     Xs = np.vstack(Xs)  # type:ignore
30 |     ys = np.concatenate(ys)
31 |     qids = np.concatenate(qids)
32 |     dump_svmlight_file(Xs, ys, path, query_id=qids)
33 | 


--------------------------------------------------------------------------------
/allrank/data/generate_dummy_data.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | from typing import Any, Dict
 3 | from typing import Tuple
 4 | 
 5 | import os
 6 | import numpy as np
 7 | from sklearn.datasets import dump_svmlight_file
 8 | 
 9 | 
10 | def generate_dummy_data(
11 |         num_queries: int, results_len: int, num_labels: int, num_features: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
12 |     """
13 |     Generate dummy dataset to be dumped in libsvm format.
14 |     """
15 |     X = np.random.randn(num_queries * results_len, num_features)
16 |     y = np.maximum(0, (((X + 1) / 2).mean(axis=-1) * num_labels).astype(np.int32))
17 |     qid = np.repeat(np.arange(0, num_queries), results_len)
18 |     return X, y, qid
19 | 
20 | 
21 | def parse_args() -> Dict[str, Any]:
22 |     parser = ArgumentParser("Dummy data")
23 |     parser.add_argument("--num_queries", help="Number of queries.", default=100)
24 |     parser.add_argument("--results_len", help="Length of results list for a single query.", default=20)
25 |     parser.add_argument("--num_labels", help="Number of relevance levels.", default=5)
26 |     parser.add_argument("--num_features", help="Number of features of a single item", default=20)
27 |     return vars(parser.parse_args())
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     np.random.seed(42)
32 |     args = parse_args()
33 |     X_train, y_train, qid_train = generate_dummy_data(**args)
34 |     X_val, y_val, qid_val = generate_dummy_data(**args)
35 | 
36 |     os.makedirs("dummy_data", exist_ok=True)
37 |     dump_svmlight_file(X_train, y_train, os.path.join("dummy_data", "train.txt"), query_id=qid_train)
38 |     dump_svmlight_file(X_val, y_val, os.path.join("dummy_data", "vali.txt"), query_id=qid_val)
39 | 


--------------------------------------------------------------------------------
/allrank/inference/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/inference/__init__.py


--------------------------------------------------------------------------------
/allrank/inference/inference_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Dict, List, Generator
 2 | 
 3 | import torch
 4 | from torch.utils.data.dataloader import DataLoader
 5 | 
 6 | import allrank.models.losses as losses
 7 | from allrank.config import Config
 8 | from allrank.data.dataset_loading import LibSVMDataset
 9 | from allrank.models.metrics import ndcg, dcg
10 | from allrank.models.model import LTRModel
11 | from allrank.models.model_utils import get_torch_device
12 | 
13 | 
14 | def rank_slates(datasets: Dict[str, LibSVMDataset], model: LTRModel, config: Config) \
15 |         -> Dict[str, Tuple[torch.Tensor, torch.Tensor]]:
16 |     """
17 |     Ranks given datasets according to a given model
18 | 
19 |     :param datasets: dictionary of role -> dataset that will be ranked
20 |     :param model: a model to use for scoring documents
21 |     :param config: config for DataLoaders
22 |     :return: dictionary of role -> ranked dataset
23 |         every dataset is a Tuple of torch.Tensor - storing X and y in the descending order of the scores.
24 |     """
25 | 
26 |     dataloaders = {role: __create_data_loader(ds, config) for role, ds in datasets.items()}
27 | 
28 |     ranked_slates = {role: __rank_slates(dl, model) for role, dl in dataloaders.items()}
29 | 
30 |     return ranked_slates
31 | 
32 | 
33 | def __create_data_loader(ds: LibSVMDataset, config: Config) -> DataLoader:
34 |     return DataLoader(ds, batch_size=config.data.batch_size, num_workers=config.data.num_workers, shuffle=False)
35 | 
36 | 
37 | def __rank_slates(dataloader: DataLoader, model: LTRModel) -> Tuple[torch.Tensor, torch.Tensor]:
38 |     reranked_X = []
39 |     reranked_y = []
40 |     model.eval()
41 |     device = get_torch_device()
42 |     with torch.no_grad():
43 |         for xb, yb, _ in dataloader:
44 |             X = xb.type(torch.float32).to(device=device)
45 |             y_true = yb.to(device=device)
46 | 
47 |             input_indices = torch.ones_like(y_true).type(torch.long)
48 |             mask = (y_true == losses.PADDED_Y_VALUE)
49 |             scores = model.score(X, mask, input_indices)
50 | 
51 |             scores[mask] = float('-inf')
52 | 
53 |             _, indices = scores.sort(descending=True, dim=-1)
54 |             indices_X = torch.unsqueeze(indices, -1).repeat_interleave(X.shape[-1], -1)
55 |             reranked_X.append(torch.gather(X, dim=1, index=indices_X).cpu())
56 |             reranked_y.append(torch.gather(y_true, dim=1, index=indices).cpu())
57 | 
58 |     combined_X = torch.cat(reranked_X)
59 |     combined_y = torch.cat(reranked_y)
60 |     return combined_X, combined_y
61 | 
62 | 
63 | def __clicked_ndcg(ordered_clicks: List[int]) -> float:
64 |     return ndcg(torch.arange(start=len(ordered_clicks), end=0, step=-1, dtype=torch.float32)[None, :],
65 |                 torch.tensor(ordered_clicks)[None, :]).item()
66 | 
67 | 
68 | def __clicked_dcg(ordered_clicks: List[int]) -> float:
69 |     return dcg(torch.arange(start=len(ordered_clicks), end=0, step=-1, dtype=torch.float32)[None, :],
70 |                torch.tensor(ordered_clicks)[None, :]).item()
71 | 
72 | 
73 | def metrics_on_clicked_slates(clicked_slates: Tuple[List[torch.Tensor], List[List[int]]]) \
74 |         -> Generator[Dict[str, float], None, None]:
75 |     Xs, ys = clicked_slates
76 |     for X, y in zip(Xs, ys):
77 |         yield {
78 |             "slate_length": len(y),
79 |             "no_of_clicks": sum(y > 0),  # type: ignore
80 |             "dcg": __clicked_dcg(y),
81 |             "ndcg": __clicked_ndcg(y)
82 |         }
83 | 


--------------------------------------------------------------------------------
/allrank/main.py:
--------------------------------------------------------------------------------
  1 | from urllib.parse import urlparse
  2 | 
  3 | import allrank.models.losses as losses
  4 | import numpy as np
  5 | import os
  6 | import torch
  7 | from allrank.config import Config
  8 | from allrank.data.dataset_loading import load_libsvm_dataset, create_data_loaders
  9 | from allrank.models.model import make_model
 10 | from allrank.models.model_utils import get_torch_device, CustomDataParallel
 11 | from allrank.training.train_utils import fit
 12 | from allrank.utils.command_executor import execute_command
 13 | from allrank.utils.experiments import dump_experiment_result, assert_expected_metrics
 14 | from allrank.utils.file_utils import create_output_dirs, PathsContainer, copy_local_to_gs
 15 | from allrank.utils.ltr_logging import init_logger
 16 | from allrank.utils.python_utils import dummy_context_mgr
 17 | from argparse import ArgumentParser, Namespace
 18 | from attr import asdict
 19 | from functools import partial
 20 | from pprint import pformat
 21 | from torch import optim
 22 | 
 23 | 
 24 | def parse_args() -> Namespace:
 25 |     parser = ArgumentParser("allRank")
 26 |     parser.add_argument("--job-dir", help="Base output path for all experiments", required=True)
 27 |     parser.add_argument("--run-id", help="Name of this run to be recorded (must be unique within output dir)",
 28 |                         required=True)
 29 |     parser.add_argument("--config-file-name", required=True, type=str, help="Name of json file with config")
 30 | 
 31 |     return parser.parse_args()
 32 | 
 33 | 
 34 | def run():
 35 |     # reproducibility
 36 |     torch.manual_seed(42)
 37 |     torch.cuda.manual_seed_all(42)
 38 |     np.random.seed(42)
 39 | 
 40 |     args = parse_args()
 41 | 
 42 |     paths = PathsContainer.from_args(args.job_dir, args.run_id, args.config_file_name)
 43 | 
 44 |     create_output_dirs(paths.output_dir)
 45 | 
 46 |     logger = init_logger(paths.output_dir)
 47 |     logger.info(f"created paths container {paths}")
 48 | 
 49 |     # read config
 50 |     config = Config.from_json(paths.config_path)
 51 |     logger.info("Config:\n {}".format(pformat(vars(config), width=1)))
 52 | 
 53 |     output_config_path = os.path.join(paths.output_dir, "used_config.json")
 54 |     execute_command("cp {} {}".format(paths.config_path, output_config_path))
 55 | 
 56 |     # train_ds, val_ds
 57 |     train_ds, val_ds = load_libsvm_dataset(
 58 |         input_path=config.data.path,
 59 |         slate_length=config.data.slate_length,
 60 |         validation_ds_role=config.data.validation_ds_role,
 61 |     )
 62 | 
 63 |     n_features = train_ds.shape[-1]
 64 |     assert n_features == val_ds.shape[-1], "Last dimensions of train_ds and val_ds do not match!"
 65 | 
 66 |     # train_dl, val_dl
 67 |     train_dl, val_dl = create_data_loaders(
 68 |         train_ds, val_ds, num_workers=config.data.num_workers, batch_size=config.data.batch_size)
 69 | 
 70 |     # gpu support
 71 |     dev = get_torch_device()
 72 |     logger.info("Model training will execute on {}".format(dev.type))
 73 | 
 74 |     # instantiate model
 75 |     model = make_model(n_features=n_features, **asdict(config.model, recurse=False))
 76 |     if torch.cuda.device_count() > 1:
 77 |         model = CustomDataParallel(model)
 78 |         logger.info("Model training will be distributed to {} GPUs.".format(torch.cuda.device_count()))
 79 |     model.to(dev)
 80 | 
 81 |     # load optimizer, loss and LR scheduler
 82 |     optimizer = getattr(optim, config.optimizer.name)(params=model.parameters(), **config.optimizer.args)
 83 |     loss_func = partial(getattr(losses, config.loss.name), **config.loss.args)
 84 |     if config.lr_scheduler.name:
 85 |         scheduler = getattr(optim.lr_scheduler, config.lr_scheduler.name)(optimizer, **config.lr_scheduler.args)
 86 |     else:
 87 |         scheduler = None
 88 | 
 89 |     with torch.autograd.detect_anomaly() if config.detect_anomaly else dummy_context_mgr():  # type: ignore
 90 |         # run training
 91 |         result = fit(
 92 |             model=model,
 93 |             loss_func=loss_func,
 94 |             optimizer=optimizer,
 95 |             scheduler=scheduler,
 96 |             train_dl=train_dl,
 97 |             valid_dl=val_dl,
 98 |             config=config,
 99 |             device=dev,
100 |             output_dir=paths.output_dir,
101 |             tensorboard_output_path=paths.tensorboard_output_path,
102 |             **asdict(config.training)
103 |         )
104 | 
105 |     dump_experiment_result(args, config, paths.output_dir, result)
106 | 
107 |     if urlparse(args.job_dir).scheme == "gs":
108 |         copy_local_to_gs(paths.local_base_output_path, args.job_dir)
109 | 
110 |     assert_expected_metrics(result, config.expected_metrics)
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     run()
115 | 


--------------------------------------------------------------------------------
/allrank/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/models/__init__.py


--------------------------------------------------------------------------------
/allrank/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | DEFAULT_EPS = 1e-10
 2 | 
 3 | from .approxNDCG import *  # noqa F403 F401
 4 | from .binary_listNet import *  # noqa F403 F401
 5 | from .lambdaLoss import *  # noqa F403 F401
 6 | from .listMLE import *  # noqa F403 F401
 7 | from .listNet import *  # noqa F403 F401
 8 | from .neuralNDCG import *  # noqa F403 F401
 9 | from .ordinal import *  # noqa F403 F401
10 | from .pointwise import *  # noqa F403 F401
11 | from .rankNet import *  # noqa F403 F401
12 | from .bce import *  # noqa F403 F401
13 | 


--------------------------------------------------------------------------------
/allrank/models/losses/approxNDCG.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 4 | from allrank.models.losses import DEFAULT_EPS
 5 | 
 6 | 
 7 | def approxNDCGLoss(y_pred, y_true, eps=DEFAULT_EPS, padded_value_indicator=PADDED_Y_VALUE, alpha=1.):
 8 |     """
 9 |     Loss based on approximate NDCG introduced in "A General Approximation Framework for Direct Optimization of
10 |     Information Retrieval Measures". Please note that this method does not implement any kind of truncation.
11 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
12 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
13 |     :param eps: epsilon value, used for numerical stability
14 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
15 |     :param alpha: score difference weight used in the sigmoid function
16 |     :return: loss value, a torch.Tensor
17 |     """
18 |     device = y_pred.device
19 |     y_pred = y_pred.clone()
20 |     y_true = y_true.clone()
21 | 
22 |     padded_mask = y_true == padded_value_indicator
23 |     y_pred[padded_mask] = float("-inf")
24 |     y_true[padded_mask] = float("-inf")
25 | 
26 |     # Here we sort the true and predicted relevancy scores.
27 |     y_pred_sorted, indices_pred = y_pred.sort(descending=True, dim=-1)
28 |     y_true_sorted, _ = y_true.sort(descending=True, dim=-1)
29 | 
30 |     # After sorting, we can mask out the pairs of indices (i, j) containing index of a padded element.
31 |     true_sorted_by_preds = torch.gather(y_true, dim=1, index=indices_pred)
32 |     true_diffs = true_sorted_by_preds[:, :, None] - true_sorted_by_preds[:, None, :]
33 |     padded_pairs_mask = torch.isfinite(true_diffs)
34 |     padded_pairs_mask.diagonal(dim1=-2, dim2=-1).zero_()
35 | 
36 |     # Here we clamp the -infs to get correct gains and ideal DCGs (maxDCGs)
37 |     true_sorted_by_preds.clamp_(min=0.)
38 |     y_true_sorted.clamp_(min=0.)
39 | 
40 |     # Here we find the gains, discounts and ideal DCGs per slate.
41 |     pos_idxs = torch.arange(1, y_pred.shape[1] + 1).to(device)
42 |     D = torch.log2(1. + pos_idxs.float())[None, :]
43 |     maxDCGs = torch.sum((torch.pow(2, y_true_sorted) - 1) / D, dim=-1).clamp(min=eps)
44 |     G = (torch.pow(2, true_sorted_by_preds) - 1) / maxDCGs[:, None]
45 | 
46 |     # Here we approximate the ranking positions according to Eqs 19-20 and later approximate NDCG (Eq 21)
47 |     scores_diffs = (y_pred_sorted[:, :, None] - y_pred_sorted[:, None, :])
48 |     scores_diffs[~padded_pairs_mask] = 0.
49 |     approx_pos = 1. + torch.sum(padded_pairs_mask.float() * (torch.sigmoid(-alpha * scores_diffs).clamp(min=eps)), dim=-1)
50 |     approx_D = torch.log2(1. + approx_pos)
51 |     approx_NDCG = torch.sum((G / approx_D), dim=-1)
52 | 
53 |     return -torch.mean(approx_NDCG)
54 | 


--------------------------------------------------------------------------------
/allrank/models/losses/bce.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import BCELoss
 3 | 
 4 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 5 | from allrank.models.model_utils import get_torch_device
 6 | 
 7 | 
 8 | def bce(y_pred, y_true, padded_value_indicator=PADDED_Y_VALUE):
 9 |     """
10 |     Binary Cross-Entropy loss.
11 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
12 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
13 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
14 |     :return: loss value, a torch.Tensor
15 |     """
16 |     device = get_torch_device()
17 | 
18 |     y_pred = y_pred.clone()
19 |     y_true = y_true.clone()
20 | 
21 |     mask = y_true == padded_value_indicator
22 |     valid_mask = y_true != padded_value_indicator
23 | 
24 |     ls = BCELoss(reduction='none')(y_pred, y_true)
25 |     ls[mask] = 0.0
26 | 
27 |     document_loss = torch.sum(ls, dim=-1)
28 |     sum_valid = torch.sum(valid_mask, dim=-1).type(torch.float32) > torch.tensor(0.0, dtype=torch.float32, device=device)
29 | 
30 |     loss_output = torch.sum(document_loss) / torch.sum(sum_valid)
31 | 
32 |     return loss_output
33 | 


--------------------------------------------------------------------------------
/allrank/models/losses/binary_listNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 5 | from allrank.models.losses import DEFAULT_EPS
 6 | 
 7 | 
 8 | def binary_listNet(y_pred, y_true, eps=DEFAULT_EPS, padded_value_indicator=PADDED_Y_VALUE):
 9 |     """
10 |     ListNet loss variant for binary ground truth data introduced in "Learning to Rank: From Pairwise Approach to Listwise Approach".
11 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
12 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
13 |     :param eps: epsilon value, used for numerical stability
14 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
15 |     :return: loss value, a torch.Tensor
16 |     """
17 |     y_pred = y_pred.clone()
18 |     y_true = y_true.clone()
19 | 
20 |     mask = y_true == padded_value_indicator
21 |     y_pred[mask] = float('-inf')
22 |     y_true[mask] = 0.0
23 |     normalizer = torch.unsqueeze(y_true.sum(dim=-1), 1)
24 |     normalizer[normalizer == 0.0] = 1.0
25 |     normalizer = normalizer.expand(-1, y_true.shape[1])
26 |     y_true = torch.div(y_true, normalizer)
27 | 
28 |     preds_smax = F.softmax(y_pred, dim=1)
29 | 
30 |     preds_smax = preds_smax + eps
31 |     preds_log = torch.log(preds_smax)
32 | 
33 |     return torch.mean(-torch.sum(y_true * preds_log, dim=1))
34 | 


--------------------------------------------------------------------------------
/allrank/models/losses/lambdaLoss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from allrank.data.dataset_loading import PADDED_Y_VALUE
  4 | from allrank.models.losses import DEFAULT_EPS
  5 | 
  6 | 
  7 | def lambdaLoss(y_pred, y_true, eps=DEFAULT_EPS, padded_value_indicator=PADDED_Y_VALUE, weighing_scheme=None, k=None, sigma=1., mu=10.,
  8 |                reduction="sum", reduction_log="binary"):
  9 |     """
 10 |     LambdaLoss framework for LTR losses implementations, introduced in "The LambdaLoss Framework for Ranking Metric Optimization".
 11 |     Contains implementations of different weighing schemes corresponding to e.g. LambdaRank or RankNet.
 12 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
 13 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
 14 |     :param eps: epsilon value, used for numerical stability
 15 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
 16 |     :param weighing_scheme: a string corresponding to a name of one of the weighing schemes
 17 |     :param k: rank at which the loss is truncated
 18 |     :param sigma: score difference weight used in the sigmoid function
 19 |     :param mu: optional weight used in NDCGLoss2++ weighing scheme
 20 |     :param reduction: losses reduction method, could be either a sum or a mean
 21 |     :param reduction_log: logarithm variant used prior to masking and loss reduction, either binary or natural
 22 |     :return: loss value, a torch.Tensor
 23 |     """
 24 |     device = y_pred.device
 25 |     y_pred = y_pred.clone()
 26 |     y_true = y_true.clone()
 27 | 
 28 |     padded_mask = y_true == padded_value_indicator
 29 |     y_pred[padded_mask] = float("-inf")
 30 |     y_true[padded_mask] = float("-inf")
 31 | 
 32 |     # Here we sort the true and predicted relevancy scores.
 33 |     y_pred_sorted, indices_pred = y_pred.sort(descending=True, dim=-1)
 34 |     y_true_sorted, _ = y_true.sort(descending=True, dim=-1)
 35 | 
 36 |     # After sorting, we can mask out the pairs of indices (i, j) containing index of a padded element.
 37 |     true_sorted_by_preds = torch.gather(y_true, dim=1, index=indices_pred)
 38 |     true_diffs = true_sorted_by_preds[:, :, None] - true_sorted_by_preds[:, None, :]
 39 |     padded_pairs_mask = torch.isfinite(true_diffs)
 40 | 
 41 |     if weighing_scheme != "ndcgLoss1_scheme":
 42 |         padded_pairs_mask = padded_pairs_mask & (true_diffs > 0)
 43 | 
 44 |     ndcg_at_k_mask = torch.zeros((y_pred.shape[1], y_pred.shape[1]), dtype=torch.bool, device=device)
 45 |     ndcg_at_k_mask[:k, :k] = 1
 46 | 
 47 |     # Here we clamp the -infs to get correct gains and ideal DCGs (maxDCGs)
 48 |     true_sorted_by_preds.clamp_(min=0.)
 49 |     y_true_sorted.clamp_(min=0.)
 50 | 
 51 |     # Here we find the gains, discounts and ideal DCGs per slate.
 52 |     pos_idxs = torch.arange(1, y_pred.shape[1] + 1).to(device)
 53 |     D = torch.log2(1. + pos_idxs.float())[None, :]
 54 |     maxDCGs = torch.sum(((torch.pow(2, y_true_sorted) - 1) / D)[:, :k], dim=-1).clamp(min=eps)
 55 |     G = (torch.pow(2, true_sorted_by_preds) - 1) / maxDCGs[:, None]
 56 | 
 57 |     # Here we apply appropriate weighing scheme - ndcgLoss1, ndcgLoss2, ndcgLoss2++ or no weights (=1.0)
 58 |     if weighing_scheme is None:
 59 |         weights = 1.
 60 |     else:
 61 |         weights = globals()[weighing_scheme](G, D, mu, true_sorted_by_preds)  # type: ignore
 62 | 
 63 |     # We are clamping the array entries to maintain correct backprop (log(0) and division by 0)
 64 |     scores_diffs = (y_pred_sorted[:, :, None] - y_pred_sorted[:, None, :]).clamp(min=-1e8, max=1e8)
 65 |     scores_diffs.masked_fill(torch.isnan(scores_diffs), 0.)
 66 |     weighted_probas = (torch.sigmoid(sigma * scores_diffs).clamp(min=eps) ** weights).clamp(min=eps)
 67 |     if reduction_log == "natural":
 68 |         losses = torch.log(weighted_probas)
 69 |     elif reduction_log == "binary":
 70 |         losses = torch.log2(weighted_probas)
 71 |     else:
 72 |         raise ValueError("Reduction logarithm base can be either natural or binary")
 73 | 
 74 |     if reduction == "sum":
 75 |         loss = -torch.sum(losses[padded_pairs_mask & ndcg_at_k_mask])
 76 |     elif reduction == "mean":
 77 |         loss = -torch.mean(losses[padded_pairs_mask & ndcg_at_k_mask])
 78 |     else:
 79 |         raise ValueError("Reduction method can be either sum or mean")
 80 | 
 81 |     return loss
 82 | 
 83 | 
 84 | def ndcgLoss1_scheme(G, D, *args):
 85 |     return (G / D)[:, :, None]
 86 | 
 87 | 
 88 | def ndcgLoss2_scheme(G, D, *args):
 89 |     pos_idxs = torch.arange(1, G.shape[1] + 1, device=G.device)
 90 |     delta_idxs = torch.abs(pos_idxs[:, None] - pos_idxs[None, :])
 91 |     deltas = torch.abs(torch.pow(torch.abs(D[0, delta_idxs - 1]), -1.) - torch.pow(torch.abs(D[0, delta_idxs]), -1.))
 92 |     deltas.diagonal().zero_()
 93 | 
 94 |     return deltas[None, :, :] * torch.abs(G[:, :, None] - G[:, None, :])
 95 | 
 96 | 
 97 | def lambdaRank_scheme(G, D, *args):
 98 |     return torch.abs(torch.pow(D[:, :, None], -1.) - torch.pow(D[:, None, :], -1.)) * torch.abs(G[:, :, None] - G[:, None, :])
 99 | 
100 | 
101 | def ndcgLoss2PP_scheme(G, D, *args):
102 |     return args[0] * ndcgLoss2_scheme(G, D) + lambdaRank_scheme(G, D)
103 | 
104 | 
105 | def rankNet_scheme(G, D, *args):
106 |     return 1.
107 | 
108 | 
109 | def rankNetWeightedByGTDiff_scheme(G, D, *args):
110 |     return torch.abs(args[1][:, :, None] - args[1][:, None, :])
111 | 
112 | 
113 | def rankNetWeightedByGTDiffPowed_scheme(G, D, *args):
114 |     return torch.abs(torch.pow(args[1][:, :, None], 2) - torch.pow(args[1][:, None, :], 2))
115 | 


--------------------------------------------------------------------------------
/allrank/models/losses/listMLE.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 4 | from allrank.models.losses import DEFAULT_EPS
 5 | 
 6 | 
 7 | def listMLE(y_pred, y_true, eps=DEFAULT_EPS, padded_value_indicator=PADDED_Y_VALUE):
 8 |     """
 9 |     ListMLE loss introduced in "Listwise Approach to Learning to Rank - Theory and Algorithm".
10 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
11 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
12 |     :param eps: epsilon value, used for numerical stability
13 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
14 |     :return: loss value, a torch.Tensor
15 |     """
16 |     # shuffle for randomised tie resolution
17 |     random_indices = torch.randperm(y_pred.shape[-1])
18 |     y_pred_shuffled = y_pred[:, random_indices]
19 |     y_true_shuffled = y_true[:, random_indices]
20 | 
21 |     y_true_sorted, indices = y_true_shuffled.sort(descending=True, dim=-1)
22 | 
23 |     mask = y_true_sorted == padded_value_indicator
24 | 
25 |     preds_sorted_by_true = torch.gather(y_pred_shuffled, dim=1, index=indices)
26 |     preds_sorted_by_true[mask] = float("-inf")
27 | 
28 |     max_pred_values, _ = preds_sorted_by_true.max(dim=1, keepdim=True)
29 | 
30 |     preds_sorted_by_true_minus_max = preds_sorted_by_true - max_pred_values
31 | 
32 |     cumsums = torch.cumsum(preds_sorted_by_true_minus_max.exp().flip(dims=[1]), dim=1).flip(dims=[1])
33 | 
34 |     observation_loss = torch.log(cumsums + eps) - preds_sorted_by_true_minus_max
35 | 
36 |     observation_loss[mask] = 0.0
37 | 
38 |     return torch.mean(torch.sum(observation_loss, dim=1))
39 | 


--------------------------------------------------------------------------------
/allrank/models/losses/listNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 5 | from allrank.models.losses import DEFAULT_EPS
 6 | 
 7 | 
 8 | def listNet(y_pred, y_true, eps=DEFAULT_EPS, padded_value_indicator=PADDED_Y_VALUE):
 9 |     """
10 |     ListNet loss introduced in "Learning to Rank: From Pairwise Approach to Listwise Approach".
11 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
12 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
13 |     :param eps: epsilon value, used for numerical stability
14 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
15 |     :return: loss value, a torch.Tensor
16 |     """
17 |     y_pred = y_pred.clone()
18 |     y_true = y_true.clone()
19 | 
20 |     mask = y_true == padded_value_indicator
21 |     y_pred[mask] = float('-inf')
22 |     y_true[mask] = float('-inf')
23 | 
24 |     preds_smax = F.softmax(y_pred, dim=1)
25 |     true_smax = F.softmax(y_true, dim=1)
26 | 
27 |     preds_smax = preds_smax + eps
28 |     preds_log = torch.log(preds_smax)
29 | 
30 |     return torch.mean(-torch.sum(true_smax * preds_log, dim=1))
31 | 


--------------------------------------------------------------------------------
/allrank/models/losses/loss_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | from allrank.models.losses import DEFAULT_EPS
  5 | from allrank.models.model_utils import get_torch_device
  6 | 
  7 | 
  8 | def sinkhorn_scaling(mat, mask=None, tol=1e-6, max_iter=50):
  9 |     """
 10 |     Sinkhorn scaling procedure.
 11 |     :param mat: a tensor of square matrices of shape N x M x M, where N is batch size
 12 |     :param mask: a tensor of masks of shape N x M
 13 |     :param tol: Sinkhorn scaling tolerance
 14 |     :param max_iter: maximum number of iterations of the Sinkhorn scaling
 15 |     :return: a tensor of (approximately) doubly stochastic matrices
 16 |     """
 17 |     if mask is not None:
 18 |         mat = mat.masked_fill(mask[:, None, :] | mask[:, :, None], 0.0)
 19 |         mat = mat.masked_fill(mask[:, None, :] & mask[:, :, None], 1.0)
 20 | 
 21 |     for _ in range(max_iter):
 22 |         mat = mat / mat.sum(dim=1, keepdim=True).clamp(min=DEFAULT_EPS)
 23 |         mat = mat / mat.sum(dim=2, keepdim=True).clamp(min=DEFAULT_EPS)
 24 | 
 25 |         if torch.max(torch.abs(mat.sum(dim=2) - 1.)) < tol and torch.max(torch.abs(mat.sum(dim=1) - 1.)) < tol:
 26 |             break
 27 | 
 28 |     if mask is not None:
 29 |         mat = mat.masked_fill(mask[:, None, :] | mask[:, :, None], 0.0)
 30 | 
 31 |     return mat
 32 | 
 33 | 
 34 | def deterministic_neural_sort(s, tau, mask):
 35 |     """
 36 |     Deterministic neural sort.
 37 |     Code taken from "Stochastic Optimization of Sorting Networks via Continuous Relaxations", ICLR 2019.
 38 |     Minor modifications applied to the original code (masking).
 39 |     :param s: values to sort, shape [batch_size, slate_length]
 40 |     :param tau: temperature for the final softmax function
 41 |     :param mask: mask indicating padded elements
 42 |     :return: approximate permutation matrices of shape [batch_size, slate_length, slate_length]
 43 |     """
 44 |     dev = get_torch_device()
 45 | 
 46 |     n = s.size()[1]
 47 |     one = torch.ones((n, 1), dtype=torch.float32, device=dev)
 48 |     s = s.masked_fill(mask[:, :, None], -1e8)
 49 |     A_s = torch.abs(s - s.permute(0, 2, 1))
 50 |     A_s = A_s.masked_fill(mask[:, :, None] | mask[:, None, :], 0.0)
 51 | 
 52 |     B = torch.matmul(A_s, torch.matmul(one, torch.transpose(one, 0, 1)))
 53 | 
 54 |     temp = [n - m + 1 - 2 * (torch.arange(n - m, device=dev) + 1) for m in mask.squeeze(-1).sum(dim=1)]
 55 |     temp = [t.type(torch.float32) for t in temp]
 56 |     temp = [torch.cat((t, torch.zeros(n - len(t), device=dev))) for t in temp]
 57 |     scaling = torch.stack(temp).type(torch.float32).to(dev)  # type: ignore
 58 | 
 59 |     s = s.masked_fill(mask[:, :, None], 0.0)
 60 |     C = torch.matmul(s, scaling.unsqueeze(-2))
 61 | 
 62 |     P_max = (C - B).permute(0, 2, 1)
 63 |     P_max = P_max.masked_fill(mask[:, :, None] | mask[:, None, :], -np.inf)
 64 |     P_max = P_max.masked_fill(mask[:, :, None] & mask[:, None, :], 1.0)
 65 |     sm = torch.nn.Softmax(-1)
 66 |     P_hat = sm(P_max / tau)
 67 |     return P_hat
 68 | 
 69 | 
 70 | def sample_gumbel(samples_shape, device, eps=1e-10) -> torch.Tensor:
 71 |     """
 72 |     Sampling from Gumbel distribution.
 73 |     Code taken from "Stochastic Optimization of Sorting Networks via Continuous Relaxations", ICLR 2019.
 74 |     Minor modifications applied to the original code (masking).
 75 |     :param samples_shape: shape of the output samples tensor
 76 |     :param device: device of the output samples tensor
 77 |     :param eps: epsilon for the logarithm function
 78 |     :return: Gumbel samples tensor of shape samples_shape
 79 |     """
 80 |     U = torch.rand(samples_shape, device=device)
 81 |     return -torch.log(-torch.log(U + eps) + eps)
 82 | 
 83 | 
 84 | def stochastic_neural_sort(s, n_samples, tau, mask, beta=1.0, log_scores=True, eps=1e-10):
 85 |     """
 86 |     Stochastic neural sort. Please note that memory complexity grows by factor n_samples.
 87 |     Code taken from "Stochastic Optimization of Sorting Networks via Continuous Relaxations", ICLR 2019.
 88 |     Minor modifications applied to the original code (masking).
 89 |     :param s: values to sort, shape [batch_size, slate_length]
 90 |     :param n_samples: number of samples (approximations) for each permutation matrix
 91 |     :param tau: temperature for the final softmax function
 92 |     :param mask: mask indicating padded elements
 93 |     :param beta: scale parameter for the Gumbel distribution
 94 |     :param log_scores: whether to apply the logarithm function to scores prior to Gumbel perturbation
 95 |     :param eps: epsilon for the logarithm function
 96 |     :return: approximate permutation matrices of shape [n_samples, batch_size, slate_length, slate_length]
 97 |     """
 98 |     dev = get_torch_device()
 99 | 
100 |     batch_size = s.size()[0]
101 |     n = s.size()[1]
102 |     s_positive = s + torch.abs(s.min())
103 |     samples = beta * sample_gumbel([n_samples, batch_size, n, 1], device=dev)
104 |     if log_scores:
105 |         s_positive = torch.log(s_positive + eps)
106 | 
107 |     s_perturb = (s_positive + samples).view(n_samples * batch_size, n, 1)
108 |     mask_repeated = mask.repeat_interleave(n_samples, dim=0)
109 | 
110 |     P_hat = deterministic_neural_sort(s_perturb, tau, mask_repeated)
111 |     P_hat = P_hat.view(n_samples, batch_size, n, n)
112 |     return P_hat
113 | 


--------------------------------------------------------------------------------
/allrank/models/losses/neuralNDCG.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from allrank.data.dataset_loading import PADDED_Y_VALUE
  4 | from allrank.models.losses import DEFAULT_EPS
  5 | from allrank.models.losses.loss_utils import deterministic_neural_sort, sinkhorn_scaling, stochastic_neural_sort
  6 | from allrank.models.metrics import dcg
  7 | from allrank.models.model_utils import get_torch_device
  8 | 
  9 | 
 10 | def neuralNDCG(y_pred, y_true, padded_value_indicator=PADDED_Y_VALUE, temperature=1., powered_relevancies=True, k=None,
 11 |                stochastic=False, n_samples=32, beta=0.1, log_scores=True):
 12 |     """
 13 |     NeuralNDCG loss introduced in "NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable
 14 |     Relaxation of Sorting" - https://arxiv.org/abs/2102.07831. Based on the NeuralSort algorithm.
 15 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
 16 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
 17 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
 18 |     :param temperature: temperature for the NeuralSort algorithm
 19 |     :param powered_relevancies: whether to apply 2^x - 1 gain function, x otherwise
 20 |     :param k: rank at which the loss is truncated
 21 |     :param stochastic: whether to calculate the stochastic variant
 22 |     :param n_samples: how many stochastic samples are taken, used if stochastic == True
 23 |     :param beta: beta parameter for NeuralSort algorithm, used if stochastic == True
 24 |     :param log_scores: log_scores parameter for NeuralSort algorithm, used if stochastic == True
 25 |     :return: loss value, a torch.Tensor
 26 |     """
 27 |     dev = get_torch_device()
 28 | 
 29 |     if k is None:
 30 |         k = y_true.shape[1]
 31 | 
 32 |     mask = (y_true == padded_value_indicator)
 33 |     # Choose the deterministic/stochastic variant
 34 |     if stochastic:
 35 |         P_hat = stochastic_neural_sort(y_pred.unsqueeze(-1), n_samples=n_samples, tau=temperature, mask=mask,
 36 |                                        beta=beta, log_scores=log_scores)
 37 |     else:
 38 |         P_hat = deterministic_neural_sort(y_pred.unsqueeze(-1), tau=temperature, mask=mask).unsqueeze(0)
 39 | 
 40 |     # Perform sinkhorn scaling to obtain doubly stochastic permutation matrices
 41 |     P_hat = sinkhorn_scaling(P_hat.view(P_hat.shape[0] * P_hat.shape[1], P_hat.shape[2], P_hat.shape[3]),
 42 |                              mask.repeat_interleave(P_hat.shape[0], dim=0), tol=1e-6, max_iter=50)
 43 |     P_hat = P_hat.view(int(P_hat.shape[0] / y_pred.shape[0]), y_pred.shape[0], P_hat.shape[1], P_hat.shape[2])
 44 | 
 45 |     # Mask P_hat and apply to true labels, ie approximately sort them
 46 |     P_hat = P_hat.masked_fill(mask[None, :, :, None] | mask[None, :, None, :], 0.)
 47 |     y_true_masked = y_true.masked_fill(mask, 0.).unsqueeze(-1).unsqueeze(0)
 48 |     if powered_relevancies:
 49 |         y_true_masked = torch.pow(2., y_true_masked) - 1.
 50 | 
 51 |     ground_truth = torch.matmul(P_hat, y_true_masked).squeeze(-1)
 52 |     discounts = (torch.tensor(1.) / torch.log2(torch.arange(y_true.shape[-1], dtype=torch.float) + 2.)).to(dev)
 53 |     discounted_gains = ground_truth * discounts
 54 | 
 55 |     if powered_relevancies:
 56 |         idcg = dcg(y_true, y_true, ats=[k]).permute(1, 0)
 57 |     else:
 58 |         idcg = dcg(y_true, y_true, ats=[k], gain_function=lambda x: x).permute(1, 0)
 59 | 
 60 |     discounted_gains = discounted_gains[:, :, :k]
 61 |     ndcg = discounted_gains.sum(dim=-1) / (idcg + DEFAULT_EPS)
 62 |     idcg_mask = idcg == 0.
 63 |     ndcg = ndcg.masked_fill(idcg_mask.repeat(ndcg.shape[0], 1), 0.)
 64 | 
 65 |     assert (ndcg < 0.).sum() >= 0, "every ndcg should be non-negative"
 66 |     if idcg_mask.all():
 67 |         return torch.tensor(0.)
 68 | 
 69 |     mean_ndcg = ndcg.sum() / ((~idcg_mask).sum() * ndcg.shape[0])  # type: ignore
 70 |     return -1. * mean_ndcg  # -1 cause we want to maximize NDCG
 71 | 
 72 | 
 73 | def neuralNDCG_transposed(y_pred, y_true, padded_value_indicator=PADDED_Y_VALUE, temperature=1.,
 74 |                           powered_relevancies=True, k=None, stochastic=False, n_samples=32, beta=0.1, log_scores=True,
 75 |                           max_iter=50, tol=1e-6):
 76 |     """
 77 |     NeuralNDCG Transposed loss introduced in "NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable
 78 |     Relaxation of Sorting" - https://arxiv.org/abs/2102.07831. Based on the NeuralSort algorithm.
 79 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
 80 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
 81 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
 82 |     :param temperature: temperature for the NeuralSort algorithm
 83 |     :param powered_relevancies: whether to apply 2^x - 1 gain function, x otherwise
 84 |     :param k: rank at which the loss is truncated
 85 |     :param stochastic: whether to calculate the stochastic variant
 86 |     :param n_samples: how many stochastic samples are taken, used if stochastic == True
 87 |     :param beta: beta parameter for NeuralSort algorithm, used if stochastic == True
 88 |     :param log_scores: log_scores parameter for NeuralSort algorithm, used if stochastic == True
 89 |     :param max_iter: maximum iteration count for Sinkhorn scaling
 90 |     :param tol: tolerance for Sinkhorn scaling
 91 |     :return: loss value, a torch.Tensor
 92 |     """
 93 |     dev = get_torch_device()
 94 | 
 95 |     if k is None:
 96 |         k = y_true.shape[1]
 97 | 
 98 |     mask = (y_true == padded_value_indicator)
 99 | 
100 |     if stochastic:
101 |         P_hat = stochastic_neural_sort(y_pred.unsqueeze(-1), n_samples=n_samples, tau=temperature, mask=mask,
102 |                                        beta=beta, log_scores=log_scores)
103 |     else:
104 |         P_hat = deterministic_neural_sort(y_pred.unsqueeze(-1), tau=temperature, mask=mask).unsqueeze(0)
105 | 
106 |     # Perform sinkhorn scaling to obtain doubly stochastic permutation matrices
107 |     P_hat_masked = sinkhorn_scaling(P_hat.view(P_hat.shape[0] * y_pred.shape[0], y_pred.shape[1], y_pred.shape[1]),
108 |                                     mask.repeat_interleave(P_hat.shape[0], dim=0), tol=tol, max_iter=max_iter)
109 |     P_hat_masked = P_hat_masked.view(P_hat.shape[0], y_pred.shape[0], y_pred.shape[1], y_pred.shape[1])
110 |     discounts = (torch.tensor(1) / torch.log2(torch.arange(y_true.shape[-1], dtype=torch.float) + 2.)).to(dev)
111 | 
112 |     # This takes care of the @k metric truncation - if something is @>k, it is useless and gets 0.0 discount
113 |     discounts[k:] = 0.
114 |     discounts = discounts[None, None, :, None]
115 | 
116 |     # Here the discounts become expected discounts
117 |     discounts = torch.matmul(P_hat_masked.permute(0, 1, 3, 2), discounts).squeeze(-1)
118 |     if powered_relevancies:
119 |         gains = torch.pow(2., y_true) - 1
120 |         discounted_gains = gains.unsqueeze(0) * discounts
121 |         idcg = dcg(y_true, y_true, ats=[k]).squeeze()
122 |     else:
123 |         gains = y_true
124 |         discounted_gains = gains.unsqueeze(0) * discounts
125 |         idcg = dcg(y_true, y_true, ats=[k]).squeeze()
126 | 
127 |     ndcg = discounted_gains.sum(dim=2) / (idcg + DEFAULT_EPS)
128 |     idcg_mask = idcg == 0.
129 |     ndcg = ndcg.masked_fill(idcg_mask, 0.)
130 | 
131 |     assert (ndcg < 0.).sum() >= 0, "every ndcg should be non-negative"
132 |     if idcg_mask.all():
133 |         return torch.tensor(0.)
134 | 
135 |     mean_ndcg = ndcg.sum() / ((~idcg_mask).sum() * ndcg.shape[0])  # type: ignore
136 |     return -1. * mean_ndcg  # -1 cause we want to maximize NDCG
137 | 


--------------------------------------------------------------------------------
/allrank/models/losses/ordinal.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import BCELoss
 3 | 
 4 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 5 | from allrank.models.model_utils import get_torch_device
 6 | 
 7 | 
 8 | def with_ordinals(y, n, padded_value_indicator=PADDED_Y_VALUE):
 9 |     """
10 |     Helper function for ordinal loss, transforming input labels to ordinal values.
11 |     :param y: labels, shape [batch_size, slate_length]
12 |     :param n: number of ordinals
13 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
14 |     :return: ordinals, shape [batch_size, slate_length, n]
15 |     """
16 |     dev = get_torch_device()
17 |     one_to_n = torch.arange(start=1, end=n + 1, dtype=torch.float, device=dev)
18 |     unsqueezed = y.unsqueeze(2).repeat(1, 1, n)
19 |     mask = unsqueezed == padded_value_indicator
20 |     ordinals = (unsqueezed >= one_to_n).type(torch.float)
21 |     ordinals[mask] = padded_value_indicator
22 |     return ordinals
23 | 
24 | 
25 | def ordinal(y_pred, y_true, n, padded_value_indicator=PADDED_Y_VALUE):
26 |     """
27 |     Ordinal loss.
28 |     :param y_pred: predictions from the model, shape [batch_size, slate_length, n]
29 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
30 |     :param n: number of ordinal values, int
31 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
32 |     :return: loss value, a torch.Tensor
33 |     """
34 |     device = get_torch_device()
35 | 
36 |     y_pred = y_pred.clone()
37 |     y_true = with_ordinals(y_true.clone(), n)
38 | 
39 |     mask = y_true == padded_value_indicator
40 |     valid_mask = y_true != padded_value_indicator
41 | 
42 |     ls = BCELoss(reduction='none')(y_pred, y_true)
43 |     ls[mask] = 0.0
44 | 
45 |     document_loss = torch.sum(ls, dim=2)
46 |     sum_valid = torch.sum(valid_mask, dim=2).type(torch.float32) > torch.tensor(0.0, dtype=torch.float32, device=device)
47 | 
48 |     loss_output = torch.sum(document_loss) / torch.sum(sum_valid)
49 | 
50 |     return loss_output
51 | 


--------------------------------------------------------------------------------
/allrank/models/losses/pointwise.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 4 | 
 5 | 
 6 | def pointwise_rmse(y_pred, y_true, no_of_levels, padded_value_indicator=PADDED_Y_VALUE):
 7 |     """
 8 |     Pointwise RMSE loss.
 9 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
10 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
11 |     :param no_of_levels: number of unique ground truth values
12 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
13 |     :return: loss value, a torch.Tensor
14 |     """
15 |     y_pred = y_pred.clone()
16 |     y_true = y_true.clone()
17 | 
18 |     mask = y_true == padded_value_indicator
19 |     valid_mask = (y_true != padded_value_indicator).type(torch.float32)
20 | 
21 |     y_true[mask] = 0
22 |     y_pred[mask] = 0
23 | 
24 |     errors = (y_true - no_of_levels * y_pred)
25 | 
26 |     squared_errors = errors ** 2
27 | 
28 |     mean_squared_errors = torch.sum(squared_errors, dim=1) / torch.sum(valid_mask, dim=1)
29 | 
30 |     rmses = torch.sqrt(mean_squared_errors)
31 | 
32 |     return torch.mean(rmses)
33 | 


--------------------------------------------------------------------------------
/allrank/models/losses/rankNet.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | 
 3 | import torch
 4 | from torch.nn import BCEWithLogitsLoss
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | 
 8 | 
 9 | def rankNet_weightByGTDiff(y_pred, y_true, padded_value_indicator=PADDED_Y_VALUE):
10 |     """
11 |     Wrapper for RankNet employing weighing by the differences of ground truth values.
12 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
13 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
14 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
15 |     :return: loss value, a torch.Tensor
16 |     """
17 |     return rankNet(y_pred, y_true, padded_value_indicator, weight_by_diff=True)
18 | 
19 | 
20 | def rankNet_weightByGTDiff_pow(y_pred, y_true, padded_value_indicator=PADDED_Y_VALUE):
21 |     """
22 |     Wrapper for RankNet employing weighing by the squared differences of ground truth values.
23 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
24 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
25 |     :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
26 |     :return: loss value, a torch.Tensor
27 |     """
28 |     return rankNet(y_pred, y_true, padded_value_indicator, weight_by_diff=False, weight_by_diff_powed=True)
29 | 
30 | 
31 | def rankNet(y_pred, y_true, padded_value_indicator=PADDED_Y_VALUE, weight_by_diff=False, weight_by_diff_powed=False):
32 |     """
33 |     RankNet loss introduced in "Learning to Rank using Gradient Descent".
34 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
35 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
36 |     :param weight_by_diff: flag indicating whether to weight the score differences by ground truth differences.
37 |     :param weight_by_diff_powed: flag indicating whether to weight the score differences by the squared ground truth differences.
38 |     :return: loss value, a torch.Tensor
39 |     """
40 |     y_pred = y_pred.clone()
41 |     y_true = y_true.clone()
42 | 
43 |     mask = y_true == padded_value_indicator
44 |     y_pred[mask] = float('-inf')
45 |     y_true[mask] = float('-inf')
46 | 
47 |     # here we generate every pair of indices from the range of document length in the batch
48 |     document_pairs_candidates = list(product(range(y_true.shape[1]), repeat=2))
49 | 
50 |     pairs_true = y_true[:, document_pairs_candidates]
51 |     selected_pred = y_pred[:, document_pairs_candidates]
52 | 
53 |     # here we calculate the relative true relevance of every candidate pair
54 |     true_diffs = pairs_true[:, :, 0] - pairs_true[:, :, 1]
55 |     pred_diffs = selected_pred[:, :, 0] - selected_pred[:, :, 1]
56 | 
57 |     # here we filter just the pairs that are 'positive' and did not involve a padded instance
58 |     # we can do that since in the candidate pairs we had symetric pairs so we can stick with
59 |     # positive ones for a simpler loss function formulation
60 |     the_mask = (true_diffs > 0) & (~torch.isinf(true_diffs))
61 | 
62 |     pred_diffs = pred_diffs[the_mask]
63 | 
64 |     weight = None
65 |     if weight_by_diff:
66 |         abs_diff = torch.abs(true_diffs)
67 |         weight = abs_diff[the_mask]
68 |     elif weight_by_diff_powed:
69 |         true_pow_diffs = torch.pow(pairs_true[:, :, 0], 2) - torch.pow(pairs_true[:, :, 1], 2)
70 |         abs_diff = torch.abs(true_pow_diffs)
71 |         weight = abs_diff[the_mask]
72 | 
73 |     # here we 'binarize' true relevancy diffs since for a pairwise loss we just need to know
74 |     # whether one document is better than the other and not about the actual difference in
75 |     # their relevancy levels
76 |     true_diffs = (true_diffs > 0).type(torch.float32)
77 |     true_diffs = true_diffs[the_mask]
78 | 
79 |     return BCEWithLogitsLoss(weight=weight)(pred_diffs, true_diffs)
80 | 


--------------------------------------------------------------------------------
/allrank/models/metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from allrank.data.dataset_loading import PADDED_Y_VALUE
  5 | 
  6 | 
  7 | def ndcg(y_pred, y_true, ats=None, gain_function=lambda x: torch.pow(2, x) - 1, padding_indicator=PADDED_Y_VALUE,
  8 |          filler_value=1.0):
  9 |     """
 10 |     Normalized Discounted Cumulative Gain at k.
 11 | 
 12 |     Compute NDCG at ranks given by ats or at the maximum rank if ats is None.
 13 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
 14 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
 15 |     :param ats: optional list of ranks for NDCG evaluation, if None, maximum rank is used
 16 |     :param gain_function: callable, gain function for the ground truth labels, e.g. torch.pow(2, x) - 1
 17 |     :param padding_indicator: an indicator of the y_true index containing a padded item, e.g. -1
 18 |     :param filler_value: a filler NDCG value to use when there are no relevant items in listing
 19 |     :return: NDCG values for each slate and rank passed, shape [batch_size, len(ats)]
 20 |     """
 21 |     idcg = dcg(y_true, y_true, ats, gain_function, padding_indicator)
 22 |     ndcg_ = dcg(y_pred, y_true, ats, gain_function, padding_indicator) / idcg
 23 |     idcg_mask = idcg == 0
 24 |     ndcg_[idcg_mask] = filler_value  # if idcg == 0 , set ndcg to filler_value
 25 | 
 26 |     assert (ndcg_ < 0.0).sum() >= 0, "every ndcg should be non-negative"
 27 | 
 28 |     return ndcg_
 29 | 
 30 | 
 31 | def __apply_mask_and_get_true_sorted_by_preds(y_pred, y_true, padding_indicator=PADDED_Y_VALUE):
 32 |     mask = y_true == padding_indicator
 33 | 
 34 |     y_pred[mask] = float('-inf')
 35 |     y_true[mask] = 0.0
 36 | 
 37 |     _, indices = y_pred.sort(descending=True, dim=-1)
 38 |     return torch.gather(y_true, dim=1, index=indices)
 39 | 
 40 | 
 41 | def dcg(y_pred, y_true, ats=None, gain_function=lambda x: torch.pow(2, x) - 1, padding_indicator=PADDED_Y_VALUE):
 42 |     """
 43 |     Discounted Cumulative Gain at k.
 44 | 
 45 |     Compute DCG at ranks given by ats or at the maximum rank if ats is None.
 46 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
 47 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
 48 |     :param ats: optional list of ranks for DCG evaluation, if None, maximum rank is used
 49 |     :param gain_function: callable, gain function for the ground truth labels, e.g. torch.pow(2, x) - 1
 50 |     :param padding_indicator: an indicator of the y_true index containing a padded item, e.g. -1
 51 |     :return: DCG values for each slate and evaluation position, shape [batch_size, len(ats)]
 52 |     """
 53 |     y_true = y_true.clone()
 54 |     y_pred = y_pred.clone()
 55 | 
 56 |     actual_length = y_true.shape[1]
 57 | 
 58 |     if ats is None:
 59 |         ats = [actual_length]
 60 |     ats = [min(at, actual_length) for at in ats]
 61 | 
 62 |     true_sorted_by_preds = __apply_mask_and_get_true_sorted_by_preds(y_pred, y_true, padding_indicator)
 63 | 
 64 |     discounts = (torch.tensor(1) / torch.log2(torch.arange(true_sorted_by_preds.shape[1], dtype=torch.float) + 2.0)).to(
 65 |         device=true_sorted_by_preds.device)
 66 | 
 67 |     gains = gain_function(true_sorted_by_preds)
 68 | 
 69 |     discounted_gains = (gains * discounts)[:, :np.max(ats)]
 70 | 
 71 |     cum_dcg = torch.cumsum(discounted_gains, dim=1)
 72 | 
 73 |     ats_tensor = torch.tensor(ats, dtype=torch.long) - torch.tensor(1)
 74 | 
 75 |     dcg = cum_dcg[:, ats_tensor]
 76 | 
 77 |     return dcg
 78 | 
 79 | 
 80 | def mrr(y_pred, y_true, ats=None, padding_indicator=PADDED_Y_VALUE):
 81 |     """
 82 |     Mean Reciprocal Rank at k.
 83 | 
 84 |     Compute MRR at ranks given by ats or at the maximum rank if ats is None.
 85 |     :param y_pred: predictions from the model, shape [batch_size, slate_length]
 86 |     :param y_true: ground truth labels, shape [batch_size, slate_length]
 87 |     :param ats: optional list of ranks for MRR evaluation, if None, maximum rank is used
 88 |     :param padding_indicator: an indicator of the y_true index containing a padded item, e.g. -1
 89 |     :return: MRR values for each slate and evaluation position, shape [batch_size, len(ats)]
 90 |     """
 91 |     y_true = y_true.clone()
 92 |     y_pred = y_pred.clone()
 93 | 
 94 |     if ats is None:
 95 |         ats = [y_true.shape[1]]
 96 | 
 97 |     true_sorted_by_preds = __apply_mask_and_get_true_sorted_by_preds(y_pred, y_true, padding_indicator)
 98 | 
 99 |     values, indices = torch.max(true_sorted_by_preds, dim=1)
100 |     indices = indices.type_as(values).unsqueeze(dim=0).t().expand(len(y_true), len(ats))
101 | 
102 |     ats_rep = torch.tensor(data=ats, device=indices.device, dtype=torch.float32).expand(len(y_true), len(ats))
103 | 
104 |     within_at_mask = (indices < ats_rep).type(torch.float32)
105 | 
106 |     result = torch.tensor(1.0) / (indices + torch.tensor(1.0))
107 | 
108 |     zero_sum_mask = torch.sum(values) == 0.0
109 |     result[zero_sum_mask] = 0.0
110 | 
111 |     result = result * within_at_mask
112 | 
113 |     return result
114 | 


--------------------------------------------------------------------------------
/allrank/models/model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from attr import asdict
  3 | 
  4 | from allrank.models.transformer import make_transformer
  5 | from allrank.utils.python_utils import instantiate_class
  6 | 
  7 | 
  8 | def first_arg_id(x, *y):
  9 |     return x
 10 | 
 11 | 
 12 | class FCModel(nn.Module):
 13 |     """
 14 |     This class represents a fully connected neural network model with given layer sizes and activation function.
 15 |     """
 16 |     def __init__(self, sizes, input_norm, activation, dropout, n_features):
 17 |         """
 18 |         :param sizes: list of layer sizes (excluding the input layer size which is given by n_features parameter)
 19 |         :param input_norm: flag indicating whether to perform layer normalization on the input
 20 |         :param activation: name of the PyTorch activation function, e.g. Sigmoid or Tanh
 21 |         :param dropout: dropout probability
 22 |         :param n_features: number of input features
 23 |         """
 24 |         super(FCModel, self).__init__()
 25 |         sizes.insert(0, n_features)
 26 |         layers = [nn.Linear(size_in, size_out) for size_in, size_out in zip(sizes[:-1], sizes[1:])]
 27 |         self.input_norm = nn.LayerNorm(n_features) if input_norm else nn.Identity()
 28 |         self.activation = nn.Identity() if activation is None else instantiate_class(
 29 |             "torch.nn.modules.activation", activation)
 30 |         self.dropout = nn.Dropout(dropout or 0.0)
 31 |         self.output_size = sizes[-1]
 32 | 
 33 |         self.layers = nn.ModuleList(layers)
 34 | 
 35 |     def forward(self, x):
 36 |         """
 37 |         Forward pass through the FCModel.
 38 |         :param x: input of shape [batch_size, slate_length, self.layers[0].in_features]
 39 |         :return: output of shape [batch_size, slate_length, self.output_size]
 40 |         """
 41 |         x = self.input_norm(x)
 42 |         for layer in self.layers:
 43 |             x = self.dropout(self.activation(layer(x)))
 44 |         return x
 45 | 
 46 | 
 47 | class LTRModel(nn.Module):
 48 |     """
 49 |     This class represents a full neural Learning to Rank model with a given encoder model.
 50 |     """
 51 |     def __init__(self, input_layer, encoder, output_layer):
 52 |         """
 53 |         :param input_layer: the input block (e.g. FCModel)
 54 |         :param encoder: the encoding block (e.g. transformer.Encoder)
 55 |         :param output_layer: the output block (e.g. OutputLayer)
 56 |         """
 57 |         super(LTRModel, self).__init__()
 58 |         self.input_layer = input_layer if input_layer else nn.Identity()
 59 |         self.encoder = encoder if encoder else first_arg_id
 60 |         self.output_layer = output_layer
 61 | 
 62 |     def prepare_for_output(self, x, mask, indices):
 63 |         """
 64 |         Forward pass through the input layer and encoder.
 65 |         :param x: input of shape [batch_size, slate_length, input_dim]
 66 |         :param mask: padding mask of shape [batch_size, slate_length]
 67 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
 68 |         :return: encoder output of shape [batch_size, slate_length, encoder_output_dim]
 69 |         """
 70 |         return self.encoder(self.input_layer(x), mask, indices)
 71 | 
 72 |     def forward(self, x, mask, indices):
 73 |         """
 74 |         Forward pass through the whole LTRModel.
 75 |         :param x: input of shape [batch_size, slate_length, input_dim]
 76 |         :param mask: padding mask of shape [batch_size, slate_length]
 77 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
 78 |         :return: model output of shape [batch_size, slate_length, output_dim]
 79 |         """
 80 |         return self.output_layer(self.prepare_for_output(x, mask, indices))
 81 | 
 82 |     def score(self, x, mask, indices):
 83 |         """
 84 |         Forward pass through the whole LTRModel and item scoring.
 85 | 
 86 |         Used when evaluating listwise metrics in the training loop.
 87 |         :param x: input of shape [batch_size, slate_length, input_dim]
 88 |         :param mask: padding mask of shape [batch_size, slate_length]
 89 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
 90 |         :return: scores of shape [batch_size, slate_length]
 91 |         """
 92 |         return self.output_layer.score(self.prepare_for_output(x, mask, indices))
 93 | 
 94 | 
 95 | class OutputLayer(nn.Module):
 96 |     """
 97 |     This class represents an output block reducing the output dimensionality to d_output.
 98 |     """
 99 |     def __init__(self, d_model, d_output, output_activation=None):
100 |         """
101 |         :param d_model: dimensionality of the output layer input
102 |         :param d_output: dimensionality of the output layer output
103 |         :param output_activation: name of the PyTorch activation function used before scoring, e.g. Sigmoid or Tanh
104 |         """
105 |         super(OutputLayer, self).__init__()
106 |         self.activation = nn.Identity() if output_activation is None else instantiate_class(
107 |             "torch.nn.modules.activation", output_activation)
108 |         self.d_output = d_output
109 |         self.w_1 = nn.Linear(d_model, d_output)
110 | 
111 |     def forward(self, x):
112 |         """
113 |         Forward pass through the OutputLayer.
114 |         :param x: input of shape [batch_size, slate_length, self.d_model]
115 |         :return: output of shape [batch_size, slate_length, self.d_output]
116 |         """
117 |         return self.activation(self.w_1(x).squeeze(dim=2))
118 | 
119 |     def score(self, x):
120 |         """
121 |         Forward pass through the OutputLayer and item scoring by summing the individual outputs if d_output > 1.
122 |         :param x: input of shape [batch_size, slate_length, self.d_model]
123 |         :return: output of shape [batch_size, slate_length]
124 |         """
125 |         if self.d_output > 1:
126 |             return self.forward(x).sum(-1)
127 |         else:
128 |             return self.forward(x)
129 | 
130 | 
131 | def make_model(fc_model, transformer, post_model, n_features):
132 |     """
133 |     Helper function for instantiating LTRModel.
134 |     :param fc_model: FCModel used as input block
135 |     :param transformer: transformer Encoder used as encoder block
136 |     :param post_model: parameters dict for OutputModel output block (excluding d_model)
137 |     :param n_features: number of input features
138 |     :return: LTR model instance
139 |     """
140 |     if fc_model:
141 |         fc_model = FCModel(**fc_model, n_features=n_features)  # type: ignore
142 |     d_model = n_features if not fc_model else fc_model.output_size
143 |     if transformer:
144 |         transformer = make_transformer(n_features=d_model, **asdict(transformer, recurse=False))  # type: ignore
145 |     model = LTRModel(fc_model, transformer, OutputLayer(d_model, **post_model))
146 | 
147 |     # Initialize parameters with Glorot / fan_avg.
148 |     for p in model.parameters():
149 |         if p.dim() > 1:
150 |             nn.init.xavier_uniform_(p)
151 |     return model
152 | 


--------------------------------------------------------------------------------
/allrank/models/model_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from allrank.utils.file_utils import is_gs_path, copy_file_to_local
 8 | from allrank.utils.ltr_logging import get_logger
 9 | 
10 | logger = get_logger()
11 | 
12 | 
13 | def get_torch_device():
14 |     """
15 |     Getter for an available pyTorch device.
16 |     :return: CUDA-capable GPU if available, CPU otherwise
17 |     """
18 |     return torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
19 | 
20 | 
21 | def get_num_params(model: nn.Module) -> int:
22 |     """
23 |     Calculation of the number of nn.Module parameters.
24 |     :param model: nn.Module
25 |     :return: number of parameters
26 |     """
27 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
28 |     params = sum([np.prod(p.size()) for p in model_parameters])
29 |     return params  # type: ignore
30 | 
31 | 
32 | def log_num_params(num_params: int) -> None:
33 |     """
34 |     Logging num_params to the global logger.
35 |     :param num_params: number of parameters to log
36 |     """
37 |     logger.info("Model has {} trainable parameters".format(num_params))
38 | 
39 | 
40 | class CustomDataParallel(nn.DataParallel):
41 |     """
42 |     Wrapper for scoring with nn.DataParallel object containing LTRModel.
43 |     """
44 | 
45 |     def score(self, x, mask, indices):
46 |         """
47 |         Wrapper function for a forward pass through the whole LTRModel and item scoring.
48 |         :param x: input of shape [batch_size, slate_length, input_dim]
49 |         :param mask: padding mask of shape [batch_size, slate_length]
50 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
51 |         :return: scores of shape [batch_size, slate_length]
52 |         """
53 |         return self.module.score(x, mask, indices)  # type: ignore
54 | 
55 | 
56 | def load_state_dict_from_file(path: str, device: Any):
57 |     if is_gs_path(path):
58 |         path = copy_file_to_local(path)
59 | 
60 |     return torch.load(path, map_location=device)
61 | 


--------------------------------------------------------------------------------
/allrank/models/positional.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code in this file was adapted from "The Annotated Transformer" by Harvard NLP.
 3 | http://nlp.seas.harvard.edu/2018/04/03/attention.html
 4 | """
 5 | 
 6 | import math
 7 | from typing import Optional
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from allrank.config import PositionalEncoding
13 | 
14 | 
15 | class FixedPositionalEncoding(nn.Module):
16 |     """
17 |     Class implementing fixed positional encodings.
18 | 
19 |     Fixed positional encodings up to max_len position are computed once during object construction.
20 |     """
21 |     def __init__(self, d_model: int, max_len=5000):
22 |         """
23 |         :param d_model: dimensionality of the embeddings
24 |         :param max_len: maximum length of the sequence
25 |         """
26 |         super().__init__()
27 | 
28 |         # Compute the positional encodings once in log space.
29 |         pe = torch.zeros(max_len, d_model)
30 |         position = torch.arange(0.0, max_len).unsqueeze(1)
31 |         div_term = torch.exp(torch.arange(0.0, d_model, 2) *
32 |                              -(math.log(10000.0) / d_model))
33 |         pe[:, 0::2] = torch.sin(position * div_term)
34 |         pe[:, 1::2] = torch.cos(position * div_term)
35 |         pe = torch.cat((pe, torch.zeros([1, d_model])))
36 |         self.padding_idx = pe.size()[0] - 1
37 |         self.register_buffer('pe', pe)
38 | 
39 |     def forward(self, x, mask, indices):
40 |         """
41 |         Forward pass through the FixedPositionalEncoding.
42 |         :param x: input of shape [batch_size, slate_length, d_model]
43 |         :param mask: padding mask of shape [batch_size, slate_length]
44 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
45 |         :return: output of shape [batch_size, slate_length, d_model]
46 |         """
47 |         padded_indices = indices.masked_fill(mask, self.padding_idx)
48 |         padded_indices[padded_indices > self.padding_idx] = self.padding_idx
49 |         x = math.sqrt(self.pe.shape[1]) * x + self.pe[padded_indices, :]  # type: ignore
50 |         return x
51 | 
52 | 
53 | class LearnedPositionalEncoding(nn.Module):
54 |     """
55 |     Class implementing learnable positional encodings.
56 |     """
57 |     def __init__(self, d_model, max_len=5000):
58 |         """
59 |         :param d_model: dimensionality of the embeddings
60 |         :param max_len: maximum length of the sequence
61 |         """
62 |         super().__init__()
63 | 
64 |         self.pe = nn.Embedding(max_len + 1, d_model, padding_idx=-1)
65 | 
66 |     def forward(self, x, mask, indices):
67 |         """
68 |         Forward pass through the LearnedPositionalEncoding.
69 |         :param x: input of shape [batch_size, slate_length, d_model]
70 |         :param mask: padding mask of shape [batch_size, slate_length]
71 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
72 |         :return: output of shape [batch_size, slate_length, d_model]
73 |         """
74 |         padded_indices = indices.masked_fill(mask, self.pe.padding_idx)
75 |         padded_indices[padded_indices > self.pe.padding_idx] = self.pe.padding_idx
76 |         x = math.sqrt(self.pe.embedding_dim) * x + self.pe(padded_indices)
77 |         return x
78 | 
79 | 
80 | def _make_positional_encoding(d_model: int, positional_encoding: Optional[PositionalEncoding]):
81 |     """
82 |     Helper function for instantiating positional encodings classes.
83 |     :param d_model: dimensionality of the embeddings
84 |     :param positional_encoding: config.PositionalEncoding object containing PE config
85 |     :return: positional encoding object of given variant
86 |     """
87 |     if positional_encoding is None:
88 |         return None
89 |     elif positional_encoding.strategy == "fixed":
90 |         return FixedPositionalEncoding(d_model, max_len=positional_encoding.max_indices)
91 |     elif positional_encoding.strategy == "learned":
92 |         return LearnedPositionalEncoding(d_model, max_len=positional_encoding.max_indices)
93 |     else:
94 |         raise ValueError("Invalid positional encoding type: {}".format(positional_encoding.strategy))
95 | 


--------------------------------------------------------------------------------
/allrank/models/transformer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code in this file was adapted from "The Annotated Transformer" by Harvard NLP.
  3 | http://nlp.seas.harvard.edu/2018/04/03/attention.html
  4 | """
  5 | 
  6 | import copy
  7 | import math
  8 | from typing import Optional
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | from allrank.config import PositionalEncoding
 15 | from allrank.models.positional import _make_positional_encoding
 16 | 
 17 | 
 18 | def clones(module, N):
 19 |     """
 20 |     Creation of N identical layers.
 21 |     :param module: module to clone
 22 |     :param N: number of copies
 23 |     :return: nn.ModuleList of module copies
 24 |     """
 25 |     return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
 26 | 
 27 | 
 28 | class Encoder(nn.Module):
 29 |     """
 30 |     Stack of Transformer encoder blocks with positional encoding.
 31 |     """
 32 |     def __init__(self, layer, N, position):
 33 |         """
 34 |         :param layer: single building block to clone
 35 |         :param N: number of copies
 36 |         :param position: positional encoding module
 37 |         """
 38 |         super(Encoder, self).__init__()
 39 |         self.layers = clones(layer, N)
 40 |         self.norm = LayerNorm(layer.size)
 41 |         self.position = position
 42 | 
 43 |     def forward(self, x, mask, indices):
 44 |         """
 45 |         Forward pass through each block of the Transformer.
 46 |         :param x: input of shape [batch_size, slate_length, input_dim]
 47 |         :param mask: padding mask of shape [batch_size, slate_length]
 48 |         :param indices: original item ranks used in positional encoding, shape [batch_size, slate_length]
 49 |         :return: output of shape [batch_size, slate_length, output_dim]
 50 |         """
 51 |         if self.position:
 52 |             x = self.position(x, mask, indices)
 53 |         mask = mask.unsqueeze(-2)
 54 |         for layer in self.layers:
 55 |             x = layer(x, mask)
 56 |         return self.norm(x)
 57 | 
 58 | 
 59 | class LayerNorm(nn.Module):
 60 |     """
 61 |     Layer normalization module.
 62 |     """
 63 |     def __init__(self, features, eps=1e-6):
 64 |         """
 65 |         :param features: shape of normalized features
 66 |         :param eps: epsilon used for standard deviation
 67 |         """
 68 |         super(LayerNorm, self).__init__()
 69 |         self.a_2 = nn.Parameter(torch.ones(features))  # type: ignore
 70 |         self.b_2 = nn.Parameter(torch.zeros(features))  # type: ignore
 71 |         self.eps = eps
 72 | 
 73 |     def forward(self, x):
 74 |         """
 75 |         Forward pass through the layer normalization.
 76 |         :param x: input of shape [batch_size, slate_length, input_dim]
 77 |         :return: normalized input of shape [batch_size, slate_length, output_dim]
 78 |         """
 79 |         mean = x.mean(-1, keepdim=True)
 80 |         std = x.std(-1, keepdim=True)
 81 |         return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
 82 | 
 83 | 
 84 | class SublayerConnection(nn.Module):
 85 |     """
 86 |     Residual connection followed by layer normalization.
 87 |     Please not that for code simplicity the norm is first as opposed to last.
 88 |     """
 89 |     def __init__(self, size, dropout):
 90 |         """
 91 |         :param size: number of input/output features
 92 |         :param dropout: dropout probability
 93 |         """
 94 |         super(SublayerConnection, self).__init__()
 95 |         self.norm = LayerNorm(size)
 96 |         self.dropout = nn.Dropout(dropout)
 97 | 
 98 |     def forward(self, x, sublayer):
 99 |         """
100 |         Forward pass through the sublayer connection module, applying the residual connection to any sublayer with the same size.
101 |         :param x: input of shape [batch_size, slate_length, input_dim]
102 |         :param sublayer: layer through which to pass the input prior to applying the sum
103 |         :return: output of shape [batch_size, slate_length, output_dim]
104 |         """
105 |         return x + self.dropout(
106 |             sublayer(self.norm(x)))
107 | 
108 | 
109 | class EncoderLayer(nn.Module):
110 |     """
111 |     Single Transformer encoder block made of self-attention and feed-forward layers with residual connections.
112 |     """
113 |     def __init__(self, size, self_attn, feed_forward, dropout):
114 |         """
115 |         :param size: input/output size of the encoder block
116 |         :param self_attn: self-attention layer
117 |         :param feed_forward: feed-forward layer
118 |         :param dropout: dropout probability
119 |         """
120 |         super(EncoderLayer, self).__init__()
121 |         self.self_attn = self_attn
122 |         self.feed_forward = feed_forward
123 |         self.sublayer = clones(SublayerConnection(size, dropout), 2)
124 |         self.size = size
125 | 
126 |     def forward(self, x, mask):
127 |         """
128 |         Forward pass through the encoder block.
129 |         :param x: input of shape [batch_size, slate_length, self.size]
130 |         :param mask: padding mask of shape [batch_size, slate_length]
131 |         :return: output of shape [batch_size, slate_length, self.size]
132 |         """
133 |         x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
134 |         return self.sublayer[1](x, self.feed_forward)
135 | 
136 | 
137 | def attention(query, key, value, mask=None, dropout=None):
138 |     """
139 |     Basic function for "Scaled Dot Product Attention" computation.
140 |     :param query: query set of shape [batch_size, slate_size, n_attention_heads, attention_dim]
141 |     :param key: key set of shape [batch_size, slate_size, n_attention_heads, attention_dim]
142 |     :param value: value set of shape [batch_size, slate_size, n_attention_heads, attention_dim]
143 |     :param mask: padding mask of shape [batch_size, slate_length]
144 |     :param dropout: dropout probability
145 |     :return: attention scores of shape [batch_size, slate_size, n_attention_heads, attention_dim]
146 |     """
147 |     d_k = query.size(-1)
148 |     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
149 | 
150 |     if mask is not None:
151 |         scores = scores.masked_fill(mask == 1, float("-inf"))
152 | 
153 |     p_attn = F.softmax(scores, dim=-1)
154 |     if dropout is not None:
155 |         p_attn = dropout(p_attn)
156 |     return torch.matmul(p_attn, value), p_attn
157 | 
158 | 
159 | class MultiHeadedAttention(nn.Module):
160 |     """
161 |     Multi-headed attention block.
162 |     """
163 |     def __init__(self, h, d_model, dropout=0.1):
164 |         """
165 |         :param h: number of attention heads
166 |         :param d_model: input/output dimensionality
167 |         :param dropout: dropout probability
168 |         """
169 |         super(MultiHeadedAttention, self).__init__()
170 |         assert d_model % h == 0
171 |         # We assume d_v always equals d_k
172 |         self.d_k = d_model // h
173 |         self.h = h
174 |         self.linears = clones(nn.Linear(d_model, d_model), 4)
175 |         self.attn = None
176 |         self.dropout = nn.Dropout(p=dropout)
177 | 
178 |     def forward(self, query, key, value, mask=None):
179 |         """
180 |         Forward pass through the multi-head attention block.
181 |         :param query: query set of shape [batch_size, slate_size, self.d_model]
182 |         :param key: key set of shape [batch_size, slate_size, self.d_model]
183 |         :param value: value set of shape [batch_size, slate_size, self.d_model]
184 |         :param mask: padding mask of shape [batch_size, slate_length]
185 |         :return: output of shape [batch_size, slate_size, self.d_model]
186 |         """
187 |         if mask is not None:
188 |             # same mask applied to all h heads
189 |             mask = mask.unsqueeze(1)
190 |         nbatches = query.size(0)
191 | 
192 |         # 1) Do all the linear projections in batch from d_model => h x d_k
193 |         query, key, value = \
194 |             [linear(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
195 |              for linear, x in zip(self.linears, (query, key, value))]
196 | 
197 |         # 2) Apply attention on all the projected vectors in batch.
198 |         x, self.attn = attention(query, key, value, mask=mask, dropout=self.dropout)
199 | 
200 |         # 3) "Concat" using a view and apply a final linear.
201 |         x = x.transpose(1, 2).contiguous() \
202 |             .view(nbatches, -1, self.h * self.d_k)
203 |         return self.linears[-1](x)
204 | 
205 | 
206 | class PositionwiseFeedForward(nn.Module):
207 |     """
208 |     Feed-forward block.
209 |     """
210 |     def __init__(self, d_model, d_ff, dropout=0.1):
211 |         """
212 |         :param d_model: input/output dimensionality
213 |         :param d_ff: hidden dimensionality
214 |         :param dropout: dropout probability
215 |         """
216 |         super(PositionwiseFeedForward, self).__init__()
217 |         self.w_1 = nn.Linear(d_model, d_ff)
218 |         self.w_2 = nn.Linear(d_ff, d_model)
219 |         self.dropout = nn.Dropout(dropout)
220 | 
221 |     def forward(self, x):
222 |         """
223 |         Forward pass through the feed-forward block.
224 |         :param x: input of shape [batch_size, slate_size, self.d_model]
225 |         :return: output of shape [batch_size, slate_size, self.d_model]
226 |         """
227 |         return self.w_2(self.dropout(F.relu(self.w_1(x))))
228 | 
229 | 
230 | def make_transformer(N=6, d_ff=2048, h=8, dropout=0.1, n_features=136,
231 |                      positional_encoding: Optional[PositionalEncoding] = None):
232 |     """
233 |     Helper function for instantiating Transformer-based Encoder.
234 |     :param N: number of Transformer blocks
235 |     :param d_ff: hidden dimensionality of the feed-forward layer in the Transformer block
236 |     :param h: number of attention heads
237 |     :param dropout: dropout probability
238 |     :param n_features: number of input/output features of the feed-forward layer
239 |     :param positional_encoding: config.PositionalEncoding object containing PE config
240 |     :return: Transformer-based Encoder with given hyperparameters
241 |     """
242 |     c = copy.deepcopy
243 |     attn = MultiHeadedAttention(h, n_features, dropout)
244 | 
245 |     ff = PositionwiseFeedForward(n_features, d_ff, dropout)
246 |     position = _make_positional_encoding(n_features, positional_encoding)
247 |     return Encoder(EncoderLayer(n_features, c(attn), c(ff), dropout), N, position)
248 | 


--------------------------------------------------------------------------------
/allrank/rank_and_click.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from argparse import ArgumentParser, Namespace
  3 | from pprint import pformat
  4 | from urllib.parse import urlparse
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | import torch
  9 | from attr import asdict
 10 | 
 11 | from allrank.click_models.click_utils import click_on_slates
 12 | from allrank.config import Config
 13 | from allrank.data.dataset_loading import load_libsvm_dataset_role
 14 | from allrank.data.dataset_saving import write_to_libsvm_without_masked
 15 | from allrank.inference.inference_utils import rank_slates, metrics_on_clicked_slates
 16 | from allrank.models.model import make_model
 17 | from allrank.models.model_utils import get_torch_device, CustomDataParallel, load_state_dict_from_file
 18 | from allrank.utils.args_utils import split_as_strings
 19 | from allrank.utils.command_executor import execute_command
 20 | from allrank.utils.config_utils import instantiate_from_recursive_name_args
 21 | from allrank.utils.file_utils import create_output_dirs, PathsContainer, copy_local_to_gs
 22 | from allrank.utils.ltr_logging import init_logger
 23 | from allrank.utils.python_utils import all_equal
 24 | 
 25 | 
 26 | def parse_args() -> Namespace:
 27 |     parser = ArgumentParser("allRank rank and apply click model")
 28 |     parser.add_argument("--job-dir", help="Base output path for all experiments", required=True)
 29 |     parser.add_argument("--run-id", help="Name of this run to be recorded (must be unique within output dir)",
 30 |                         required=True)
 31 |     parser.add_argument("--config-file-name", required=True, type=str, help="Name of json file with model config")
 32 |     parser.add_argument("--input-model-path", required=True, type=str, help="Path to the model to read weights")
 33 |     parser.add_argument("--roles", required=True, type=split_as_strings,
 34 |                         help="List of comma-separated dataset roles to load and process")
 35 | 
 36 |     return parser.parse_args()
 37 | 
 38 | 
 39 | def run():
 40 |     # reproducibility
 41 |     torch.manual_seed(42)
 42 |     torch.cuda.manual_seed_all(42)
 43 |     np.random.seed(42)
 44 | 
 45 |     args = parse_args()
 46 | 
 47 |     paths = PathsContainer.from_args(args.job_dir, args.run_id, args.config_file_name)
 48 | 
 49 |     os.makedirs(paths.base_output_path, exist_ok=True)
 50 | 
 51 |     create_output_dirs(paths.output_dir)
 52 |     logger = init_logger(paths.output_dir)
 53 | 
 54 |     logger.info("will save data in {output_dir}".format(output_dir=paths.base_output_path))
 55 | 
 56 |     # read config
 57 |     config = Config.from_json(paths.config_path)
 58 |     logger.info("Config:\n {}".format(pformat(vars(config), width=1)))
 59 | 
 60 |     output_config_path = os.path.join(paths.output_dir, "used_config.json")
 61 |     execute_command("cp {} {}".format(paths.config_path, output_config_path))
 62 | 
 63 |     datasets = {role: load_libsvm_dataset_role(role, config.data.path, config.data.slate_length) for role in args.roles}
 64 | 
 65 |     n_features = [ds.shape[-1] for ds in datasets.values()]
 66 |     assert all_equal(n_features), f"Last dimensions of datasets must match but got {n_features}"
 67 | 
 68 |     # gpu support
 69 |     dev = get_torch_device()
 70 |     logger.info("Will use device {}".format(dev.type))
 71 | 
 72 |     # instantiate model
 73 |     model = make_model(n_features=n_features[0], **asdict(config.model, recurse=False))
 74 | 
 75 |     model.load_state_dict(load_state_dict_from_file(args.input_model_path, dev))
 76 |     logger.info(f"loaded model weights from {args.input_model_path}")
 77 | 
 78 |     if torch.cuda.device_count() > 1:
 79 |         model = CustomDataParallel(model)
 80 |         logger.info("Model training will be distributed to {} GPUs.".format(torch.cuda.device_count()))
 81 |     model.to(dev)
 82 | 
 83 |     assert config.click_model is not None, "click_model must be defined in config for this run"
 84 |     click_model = instantiate_from_recursive_name_args(name_args=config.click_model)
 85 | 
 86 |     ranked_slates = rank_slates(datasets, model, config)
 87 | 
 88 |     clicked_slates = {role: click_on_slates(slates, click_model, include_empty=False) for role, slates in ranked_slates.items()}
 89 | 
 90 |     # save clickthrough datasets
 91 |     for role, slates in clicked_slates.items():
 92 |         write_to_libsvm_without_masked(os.path.join(paths.output_dir, f"{role}.txt"), *slates)  # type: ignore
 93 | 
 94 |     # calculate metrics
 95 |     metered_slates = {role: metrics_on_clicked_slates(slates) for role, slates in clicked_slates.items()}  # type: ignore
 96 | 
 97 |     for role, metrics in metered_slates.items():
 98 |         metrics_df = pd.DataFrame(metrics)
 99 |         logger.info(f"{role} metrics summary:")
100 |         logger.info(metrics_df.mean())
101 |         metrics_df.to_csv(os.path.join(paths.output_dir, f"{role}_metrics.csv"), index=False)
102 |         pd.DataFrame(metrics_df.mean()).T.to_csv(os.path.join(paths.output_dir, f"{role}_metrics_mean.csv"), index=False)
103 | 
104 |     if urlparse(args.job_dir).scheme == "gs":
105 |         copy_local_to_gs(paths.local_base_output_path, args.job_dir)
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     run()
110 | 


--------------------------------------------------------------------------------
/allrank/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/training/__init__.py


--------------------------------------------------------------------------------
/allrank/training/early_stop.py:
--------------------------------------------------------------------------------
 1 | from allrank.utils.ltr_logging import get_logger
 2 | 
 3 | logger = get_logger()
 4 | 
 5 | 
 6 | class EarlyStop:
 7 |     def __init__(self, patience):
 8 |         self.patience = patience
 9 |         self.best_value = 0.0
10 |         self.best_epoch = 0
11 | 
12 |     def step(self, current_value, current_epoch):
13 |         logger.info("Current:{} Best:{}".format(current_value, self.best_value))
14 |         if current_value > self.best_value:
15 |             self.best_value = current_value
16 |             self.best_epoch = current_epoch
17 | 
18 |     def stop_training(self, current_epoch) -> bool:
19 |         return current_epoch - self.best_epoch > self.patience
20 | 


--------------------------------------------------------------------------------
/allrank/training/train_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from functools import partial
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from torch.nn.utils import clip_grad_norm_
  7 | 
  8 | import allrank.models.metrics as metrics_module
  9 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 10 | from allrank.models.model_utils import get_num_params, log_num_params
 11 | from allrank.training.early_stop import EarlyStop
 12 | from allrank.utils.ltr_logging import get_logger
 13 | from allrank.utils.tensorboard_utils import TensorboardSummaryWriter
 14 | 
 15 | logger = get_logger()
 16 | 
 17 | 
 18 | def loss_batch(model, loss_func, xb, yb, indices, gradient_clipping_norm, opt=None):
 19 |     mask = (yb == PADDED_Y_VALUE)
 20 |     loss = loss_func(model(xb, mask, indices), yb)
 21 | 
 22 |     if opt is not None:
 23 |         loss.backward()
 24 |         if gradient_clipping_norm:
 25 |             clip_grad_norm_(model.parameters(), gradient_clipping_norm)
 26 |         opt.step()
 27 |         opt.zero_grad()
 28 | 
 29 |     return loss.item(), len(xb)
 30 | 
 31 | 
 32 | def metric_on_batch(metric, model, xb, yb, indices):
 33 |     mask = (yb == PADDED_Y_VALUE)
 34 |     return metric(model.score(xb, mask, indices), yb)
 35 | 
 36 | 
 37 | def metric_on_epoch(metric, model, dl, dev):
 38 |     metric_values = torch.mean(
 39 |         torch.cat(
 40 |             [metric_on_batch(metric, model, xb.to(device=dev), yb.to(device=dev), indices.to(device=dev))
 41 |              for xb, yb, indices in dl]
 42 |         ), dim=0
 43 |     ).cpu().numpy()
 44 |     return metric_values
 45 | 
 46 | 
 47 | def compute_metrics(metrics, model, dl, dev):
 48 |     metric_values_dict = {}
 49 |     for metric_name, ats in metrics.items():
 50 |         metric_func = getattr(metrics_module, metric_name)
 51 |         metric_func_with_ats = partial(metric_func, ats=ats)
 52 |         metrics_values = metric_on_epoch(metric_func_with_ats, model, dl, dev)
 53 |         metrics_names = ["{metric_name}_{at}".format(metric_name=metric_name, at=at) for at in ats]
 54 |         metric_values_dict.update(dict(zip(metrics_names, metrics_values)))
 55 | 
 56 |     return metric_values_dict
 57 | 
 58 | 
 59 | def epoch_summary(epoch, train_loss, val_loss, train_metrics, val_metrics):
 60 |     summary = "Epoch : {epoch} Train loss: {train_loss} Val loss: {val_loss}".format(
 61 |         epoch=epoch, train_loss=train_loss, val_loss=val_loss)
 62 |     for metric_name, metric_value in train_metrics.items():
 63 |         summary += " Train {metric_name} {metric_value}".format(
 64 |             metric_name=metric_name, metric_value=metric_value)
 65 | 
 66 |     for metric_name, metric_value in val_metrics.items():
 67 |         summary += " Val {metric_name} {metric_value}".format(
 68 |             metric_name=metric_name, metric_value=metric_value)
 69 | 
 70 |     return summary
 71 | 
 72 | 
 73 | def get_current_lr(optimizer):
 74 |     for param_group in optimizer.param_groups:
 75 |         return param_group["lr"]
 76 | 
 77 | 
 78 | def fit(epochs, model, loss_func, optimizer, scheduler, train_dl, valid_dl, config,
 79 |         gradient_clipping_norm, early_stopping_patience, device, output_dir, tensorboard_output_path):
 80 |     tensorboard_summary_writer = TensorboardSummaryWriter(tensorboard_output_path)
 81 | 
 82 |     num_params = get_num_params(model)
 83 |     log_num_params(num_params)
 84 | 
 85 |     early_stop = EarlyStop(early_stopping_patience)
 86 | 
 87 |     for epoch in range(epochs):
 88 |         logger.info("Current learning rate: {}".format(get_current_lr(optimizer)))
 89 | 
 90 |         model.train()
 91 |         # xb dim: [batch_size, slate_length, embedding_dim]
 92 |         # yb dim: [batch_size, slate_length]
 93 | 
 94 |         train_losses, train_nums = zip(
 95 |             *[loss_batch(model, loss_func, xb.to(device=device), yb.to(device=device), indices.to(device=device),
 96 |                          gradient_clipping_norm, optimizer) for
 97 |               xb, yb, indices in train_dl])
 98 |         train_loss = np.sum(np.multiply(train_losses, train_nums)) / np.sum(train_nums)
 99 |         train_metrics = compute_metrics(config.metrics, model, train_dl, device)
100 | 
101 |         model.eval()
102 |         with torch.no_grad():
103 |             val_losses, val_nums = zip(
104 |                 *[loss_batch(model, loss_func, xb.to(device=device), yb.to(device=device), indices.to(device=device),
105 |                              gradient_clipping_norm) for
106 |                   xb, yb, indices in valid_dl])
107 |             val_metrics = compute_metrics(config.metrics, model, valid_dl, device)
108 | 
109 |         val_loss = np.sum(np.multiply(val_losses, val_nums)) / np.sum(val_nums)
110 | 
111 |         tensorboard_metrics_dict = {("train", "loss"): train_loss, ("val", "loss"): val_loss}
112 | 
113 |         train_metrics_to_tb = {("train", name): value for name, value in train_metrics.items()}
114 |         tensorboard_metrics_dict.update(train_metrics_to_tb)
115 |         val_metrics_to_tb = {("val", name): value for name, value in val_metrics.items()}
116 |         tensorboard_metrics_dict.update(val_metrics_to_tb)
117 |         tensorboard_metrics_dict.update({("train", "lr"): get_current_lr(optimizer)})
118 | 
119 |         tensorboard_summary_writer.save_to_tensorboard(tensorboard_metrics_dict, epoch)
120 | 
121 |         logger.info(epoch_summary(epoch, train_loss, val_loss, train_metrics, val_metrics))
122 | 
123 |         current_val_metric_value = val_metrics.get(config.val_metric)
124 |         if scheduler:
125 |             if type(scheduler) == torch.optim.lr_scheduler.ReduceLROnPlateau:
126 |                 args = [val_metrics[config.val_metric]]
127 |                 scheduler.step(*args)
128 |             else:
129 |                 scheduler.step()
130 | 
131 |         early_stop.step(current_val_metric_value, epoch)
132 |         if early_stop.stop_training(epoch):
133 |             logger.info(
134 |                 "early stopping at epoch {} since {} didn't improve from epoch no {}. Best value {}, current value {}".format(
135 |                     epoch, config.val_metric, early_stop.best_epoch, early_stop.best_value, current_val_metric_value
136 |                 ))
137 |             break
138 | 
139 |     torch.save(model.state_dict(), os.path.join(output_dir, "model.pkl"))
140 |     tensorboard_summary_writer.close_all_writers()
141 | 
142 |     return {
143 |         "epochs": epoch,
144 |         "train_metrics": train_metrics,
145 |         "val_metrics": val_metrics,
146 |         "num_params": num_params
147 |     }
148 | 


--------------------------------------------------------------------------------
/allrank/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/allrank/utils/__init__.py


--------------------------------------------------------------------------------
/allrank/utils/args_utils.py:
--------------------------------------------------------------------------------
1 | def split_as_strings(splits):
2 |     return [str(x).strip() for x in splits.split(",")]
3 | 


--------------------------------------------------------------------------------
/allrank/utils/command_executor.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from allrank.utils.ltr_logging import get_logger
 4 | 
 5 | logger = get_logger()
 6 | 
 7 | 
 8 | def execute_command(command):
 9 |     logger.info("will execute {}".format(command))
10 |     result = os.system(command)
11 |     logger.info("exit_code = {}".format(result))
12 |     if result != 0:
13 |         raise RuntimeError("non-zero exit-code: {} from command '{}'".format(result, command))
14 | 


--------------------------------------------------------------------------------
/allrank/utils/config_utils.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from typing import Any
 3 | 
 4 | from allrank.config import NameArgsConfig
 5 | 
 6 | 
 7 | def instantiate_from_recursive_name_args(name_args: NameArgsConfig):
 8 |     def instantiate_if_name_args(o: Any):
 9 |         if isinstance(o, NameArgsConfig):
10 |             return instantiate_from_recursive_name_args(o)
11 |         elif isinstance(o, dict) and {"name", "args"} == o.keys():
12 |             return instantiate_from_recursive_name_args(NameArgsConfig(**o))
13 |         else:
14 |             return o
15 | 
16 |     instantiated_args = dict([(k, instantiate_if_name_args(v)) for k, v in name_args.args.items()])
17 |     return instantiate_class(name_args.name, **instantiated_args)
18 | 
19 | 
20 | def instantiate_class(full_name: str, **kwargs):
21 |     module_name, class_name = full_name.rsplit(".", 1)
22 |     module = importlib.import_module(module_name)
23 |     class_ = getattr(module, class_name)
24 |     return class_(**kwargs)
25 | 


--------------------------------------------------------------------------------
/allrank/utils/experiments.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from argparse import Namespace
 4 | from typing import Dict, Any
 5 | 
 6 | from attr import asdict
 7 | from flatten_dict import flatten
 8 | 
 9 | from allrank.config import Config
10 | from allrank.utils.ltr_logging import get_logger
11 | 
12 | logger = get_logger()
13 | 
14 | 
15 | def unpack_numpy_values(dict):
16 |     return {k: v.item() for k, v in dict.items()}
17 | 
18 | 
19 | def dump_experiment_result(args: Namespace, config: Config, output_dir: str, result: Dict[str, Any]):
20 |     final_config_dict = asdict(config)
21 |     flattened_experiment = flatten(final_config_dict, reducer="path")
22 |     result["train_metrics"] = unpack_numpy_values(result["train_metrics"])
23 |     result["val_metrics"] = unpack_numpy_values(result["val_metrics"])
24 |     result["num_params"] = result["num_params"].item()
25 |     flattened_result = flatten(result, reducer="path")
26 |     flattened_experiment.update(flattened_result)
27 |     flattened_experiment["run_id"] = args.run_id
28 |     flattened_experiment["dir"] = output_dir
29 |     with open(os.path.join(output_dir, "experiment_result.json"), "w") as json_file:
30 |         json.dump(flattened_experiment, json_file)
31 |         json_file.write("\n")
32 | 
33 | 
34 | def assert_expected_metrics(result: Dict[str, Any], expected_metrics: Dict[str, Dict[str, float]]):
35 |     if expected_metrics:
36 |         for role, metrics in expected_metrics.items():
37 |             for name, expected_value in metrics.items():
38 |                 actual_value = result["{}_metrics".format(role)][name]
39 |                 msg = "{} {} got {}. It was expected to be at least {}".format(
40 |                     role, name, actual_value, expected_value)
41 |                 if actual_value < expected_value:
42 |                     logger.info(msg)
43 |                 assert actual_value >= expected_value, msg
44 | 


--------------------------------------------------------------------------------
/allrank/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from typing import Any
 4 | from urllib.parse import urlparse
 5 | 
 6 | import gcsfs
 7 | from attr import attrib, attrs
 8 | from pkg_resources import Requirement, resource_filename
 9 | 
10 | from allrank.utils.command_executor import execute_command
11 | from allrank.utils.ltr_logging import get_logger
12 | 
13 | logger = get_logger()
14 | 
15 | 
16 | @attrs
17 | class PathsContainer:
18 |     local_base_output_path = attrib(type=str)
19 |     base_output_path = attrib(type=str)
20 |     output_dir = attrib(type=str)
21 |     tensorboard_output_path = attrib(type=str)
22 |     config_path = attrib(type=str)
23 | 
24 |     @classmethod
25 |     def from_args(cls, output, run_id, config_path, package_name="allrank"):
26 |         base_output_path = get_path_from_local_uri(output)
27 |         if is_gs_path(base_output_path):
28 |             local_base_output_path = tempfile.mkdtemp()
29 |         else:
30 |             local_base_output_path = base_output_path
31 |         output_dir = os.path.join(local_base_output_path, "results", run_id)
32 |         tensorboard_output_path = os.path.join(base_output_path, "tb_evals", "single", run_id)
33 |         if not os.path.exists(config_path):
34 |             print("config not exists at {}, extracting config file path from package {}".format(config_path, package_name))
35 |             config_path = resource_filename(Requirement.parse(
36 |                 package_name), os.path.join(package_name, config_path))
37 |         print("will read config from {}".format(config_path))
38 |         return cls(local_base_output_path, base_output_path, output_dir, tensorboard_output_path, config_path)
39 | 
40 | 
41 | def clean_up(path):
42 |     rm_command = "rm -rf {path}".format(path=path)
43 |     execute_command(rm_command)
44 | 
45 | 
46 | def create_output_dirs(output_path: str) -> None:
47 |     for subdir in ["models", "models/partial", "evals", "evals/tensorboard", "predictions"]:
48 |         os.makedirs(os.path.join(output_path, subdir), exist_ok=True)
49 | 
50 | 
51 | def get_path_from_local_uri(uri: Any) -> str:
52 |     parsed = urlparse(uri)
53 |     if parsed.scheme == "file":
54 |         return parsed.netloc + parsed.path
55 |     else:
56 |         return uri
57 | 
58 | 
59 | def is_gs_path(uri) -> bool:
60 |     return urlparse(uri).scheme == "gs"
61 | 
62 | 
63 | def open_local_or_gs(path, mode):
64 |     open_func = gcsfs.GCSFileSystem().open if is_gs_path(path) else open
65 |     return open_func(path, mode)
66 | 
67 | 
68 | def copy_file_to_local(uri: str) -> str:
69 |     temp_dir = tempfile.mkdtemp()
70 |     local_file = "local_file"
71 |     command = "gsutil cp {gs_uri} {local_path}".format(
72 |         gs_uri=uri, local_path=os.path.join(temp_dir, local_file))
73 |     execute_command(command)
74 |     return os.path.join(temp_dir, local_file)
75 | 
76 | 
77 | def copy_local_to_gs(source_local: str, destination_uri: str) -> None:
78 |     command = "gsutil cp -r {source_local}/* {destination_uri}".format(
79 |         source_local=source_local, destination_uri=destination_uri)
80 |     execute_command(command)
81 | 


--------------------------------------------------------------------------------
/allrank/utils/ltr_logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | def init_logger(output_dir: str) -> logging.Logger:
 7 |     log_format = "[%(levelname)s] %(asctime)s - %(message)s"
 8 |     log_dateformat = "%Y-%m-%d %H:%M:%S"
 9 | 
10 |     logging.basicConfig(format=log_format, datefmt=log_dateformat, stream=sys.stdout, level=logging.INFO)
11 | 
12 |     logger = logging.getLogger(__name__)
13 |     logger.setLevel(logging.INFO)
14 | 
15 |     # create a file handler
16 |     handler = logging.FileHandler(os.path.join(output_dir, "training.log"))
17 |     handler.setLevel(logging.INFO)
18 | 
19 |     # create a logging format
20 |     formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
21 |     handler.setFormatter(formatter)
22 |     logger.addHandler(handler)
23 |     return logger
24 | 
25 | 
26 | def get_logger() -> logging.Logger:
27 |     return logging.getLogger(__name__)
28 | 


--------------------------------------------------------------------------------
/allrank/utils/python_utils.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from typing import List, Any
 3 | 
 4 | 
 5 | def instantiate_class(module_name: str, class_name: str):
 6 |     module = importlib.import_module(module_name)
 7 |     class_ = getattr(module, class_name)
 8 |     return class_()
 9 | 
10 | 
11 | class dummy_context_mgr():
12 |     def __enter__(self):
13 |         return None
14 | 
15 |     def __exit__(self, exc_type, exc_value, traceback):
16 |         return False
17 | 
18 | 
19 | def all_equal(values: List[Any]) -> bool:
20 |     return len(set(values)) == 1
21 | 


--------------------------------------------------------------------------------
/allrank/utils/tensorboard_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Any, Dict, Tuple
 3 | 
 4 | from tensorboardX import SummaryWriter
 5 | 
 6 | 
 7 | class TensorboardSummaryWriter:
 8 |     def __init__(self, output_path: str) -> None:
 9 |         self.output_path = output_path
10 |         self.writers = {}  # type: Dict[str, Any]
11 | 
12 |     def ensure_writer_exists(self, name: str) -> None:
13 |         if name not in self.writers.keys():
14 |             writer_path = os.path.join(self.output_path, name)
15 |             self.writers[name] = SummaryWriter(writer_path)
16 | 
17 |     def save_to_tensorboard(self, results: Dict[Tuple[str, str], float], n_epoch: int) -> None:
18 |         for (role, metric), value in results.items():
19 |             metric_with_role = "_".join([metric, role])
20 |             self.ensure_writer_exists(metric_with_role)
21 |             self.writers[metric_with_role].add_scalar(metric, value, n_epoch)
22 | 
23 |     def close_all_writers(self) -> None:
24 |         for writer in self.writers.values():
25 |             writer.close()
26 | 


--------------------------------------------------------------------------------
/reproducibility/HOWTO.md:
--------------------------------------------------------------------------------
 1 | # allRank MSLR-WEB30K reproducibility guide
 2 | 
 3 | ## Introduction
 4 | In this guide we provide all the necessary information to reproduce allRank results from papers
 5 | [Context-Aware Learning to Rank with Self-Attention](https://arxiv.org/abs/2005.10084) and [NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting](https://arxiv.org/abs/2102.07831) on the [MSLR-WEB30K dataset](https://www.microsoft.com/en-us/research/project/mslr/).
 6 | 
 7 | ## Data preprocessing
 8 | The same preprocessing steps were used for both papers. They are performed by the `normalize_features.py` script. Some features are only standardised, while most are log-transformed before standardisation. The script takes three parameters:
 9 | * `--ds_path` - path to the MSLR-WEB30K fold to preprocess
10 | * `--features_without_logarithm` - features to standardise without prior log-transform
11 | * `--features_negative` - features that need to be shifted by a constant to ensure strict positivity prior to log-transform
12 | 
13 | Default values (used in both papers) of `--features_without_logarithm` and `--features_negative` are supplied in the script.
14 | Please note that the script needs to be run on each MSLR-WEB30K fold separately.
15 | 
16 | ## Configuration files
17 | Selected configuration files are supplied in the `configs/` directory. For Context-Aware Learning to Rank with Self-Attention, they are MLP and context-aware rankers with two loss functions:
18 | * ordinal loss, the best-performing context-aware ranker
19 | * NDCGLoss2++, the best-performing MLP ranker
20 | 
21 | Configuration files for NeuralNDCG: Direct Optimisation of a Ranking Metric via Differentiable Relaxation of Sorting are context-aware rankers trained with loss functions:
22 | * NeuralNDCG@max, the best-performing NeuralNDCG variant
23 | * ApproxNDCG, another direct NDCG optimization method
24 | * LambdaRank@max, the best-performing loss function
25 | 
26 | Please remember about filling the correct dataset path in the configuration files. These files can be adapted for any other loss function/model combination, requiring only minor changes in model and loss function parameters.
27 | 
28 | **Important** - in case of any OOM errors on folds 2 and 4 and Context-Aware Learning to Rank configuration files, please try reducing the batch size to 32.
29 | ## Notes
30 | The results in the first paper are averaged (+ standard deviation) on validation subsets of five dataset folds while the NeuralNDCG paper supplies a single number, the Fold 1 test subset result.
31 | 
32 | The whole dataset has ~3% queries with no relevant items resulting in IDCG == 0. Following XGBoost and LightGBM default behaviour, we assume NDCG = 1 for such queries.
33 | The results can be rescaled for other IDCG == 0 treatment methods without the need to re-run any experiment.
34 | For example, one can rescale the result from NDCG = 1 to  NDCG = 0 treatment by applying a simple formula: ```avg_ndcg - (num_blank/num_all)```.
35 | Below is a table with the number of "blank" queries for each fold of the dataset and train/validation/test sets.
36 | 
37 | |      |   Train   |         |    Test   |         | Validation |         |
38 | |:----:|:---------:|:-------:|:---------:|:-------:|:----------:|---------|
39 | | Fold | num_blank | num_all | num_blank | num_all | num_blank  | num_all |
40 | | 1    | 602       | 18919   | 189       | 6306    | 191        | 6306    |
41 | | 2    | 587       | 18918   | 206       | 6307    | 189        | 6306    |
42 | | 3    | 581       | 18918   | 195       | 6306    | 206        | 6307    |
43 | | 4    | 586       | 18919   | 201       | 6306    | 195        | 6306    |
44 | | 5    | 590       | 18919   | 191       | 6306    | 201        | 6306    |
45 | 
46 | The configuration files were doublechecked prior to release on GCP AI Platform `n1-highmem16` machines with P100 GPUs.
47 | 


--------------------------------------------------------------------------------
/reproducibility/configs/contextaware_web30k/ndcgloss2pp.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [
 5 |         128
 6 |       ],
 7 |       "input_norm": false,
 8 |       "activation": null,
 9 |       "dropout": 0.0
10 |     },
11 |     "transformer": {
12 |       "N": 4,
13 |       "d_ff": 512,
14 |       "h": 4,
15 |       "positional_encoding": null,
16 |       "dropout": 0.3
17 |     },
18 |     "post_model": {
19 |       "output_activation": null,
20 |       "d_output": 1
21 |     }
22 |   },
23 |   "data": {
24 |     "path": "MLSR-WEB30K-FOLD1-PATH",
25 |     "validation_ds_role": "vali",
26 |     "num_workers": 1,
27 |     "batch_size": 64,
28 |     "slate_length": 240
29 |   },
30 |   "optimizer": {
31 |     "name": "Adam",
32 |     "args": {
33 |       "lr": 0.001
34 |     }
35 |   },
36 |   "lr_scheduler": {
37 |     "name": "StepLR",
38 |     "args": {
39 |       "step_size": 50,
40 |       "gamma": 0.1
41 |     }
42 |   },
43 |   "training": {
44 |     "epochs": 100,
45 |     "early_stopping_patience": 100,
46 |     "gradient_clipping_norm": null
47 |   },
48 |   "val_metric": "ndcg_5",
49 |   "metrics": [
50 |     "ndcg_5",
51 |     "ndcg_10"
52 |   ],
53 |   "loss": {
54 |     "name": "lambdaLoss",
55 |     "args": {
56 |       "weighing_scheme": "ndcgLoss2PP_scheme",
57 |       "k": null,
58 |       "mu": 10,
59 |       "sigma": 1.0
60 |     }
61 |   },
62 |   "expected_metrics" : {
63 |     "val": {
64 |       "ndcg_5": 0.50
65 |     }
66 |   }
67 | }


--------------------------------------------------------------------------------
/reproducibility/configs/contextaware_web30k/ndcgloss2pp_mlp.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [
 5 |         256, 512, 1024, 512, 256
 6 |       ],
 7 |       "input_norm": false,
 8 |       "activation": "ReLU",
 9 |       "dropout": 0.3
10 |     },
11 |     "transformer": null,
12 |     "post_model": {
13 |       "output_activation": null,
14 |       "d_output": 1
15 |     }
16 |   },
17 |   "data": {
18 |     "path": "MLSR-WEB30K-FOLD1-PATH",
19 |     "validation_ds_role": "vali",
20 |     "num_workers": 1,
21 |     "batch_size": 64,
22 |     "slate_length": 240
23 |   },
24 |   "optimizer": {
25 |     "name": "Adam",
26 |     "args": {
27 |       "lr": 0.001
28 |     }
29 |   },
30 |   "lr_scheduler": {
31 |     "name": "StepLR",
32 |     "args": {
33 |       "step_size": 50,
34 |       "gamma": 0.1
35 |     }
36 |   },
37 |   "training": {
38 |     "epochs": 100,
39 |     "early_stopping_patience": 100,
40 |     "gradient_clipping_norm": null
41 |   },
42 |   "val_metric": "ndcg_5",
43 |   "metrics": [
44 |     "ndcg_5",
45 |     "ndcg_10",
46 |     "ndcg_30",
47 |     "ndcg_60"
48 |   ],
49 |   "loss": {
50 |     "name": "lambdaLoss",
51 |     "args": {
52 |       "weighing_scheme": "ndcgLoss2PP_scheme",
53 |       "k": null,
54 |       "mu": 10,
55 |       "sigma": 1.0
56 |     }
57 |   },
58 |   "expected_metrics" : {
59 |     "val": {
60 |       "ndcg_5": 0.45
61 |     }
62 |   }
63 | }


--------------------------------------------------------------------------------
/reproducibility/configs/contextaware_web30k/ordinal.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [
 5 |         144
 6 |       ],
 7 |       "input_norm": false,
 8 |       "activation": null,
 9 |       "dropout": 0.0
10 |     },
11 |     "transformer": {
12 |       "N": 4,
13 |       "d_ff": 512,
14 |       "h": 2,
15 |       "positional_encoding": null,
16 |       "dropout": 0.4
17 |     },
18 |     "post_model": {
19 |       "output_activation": "Sigmoid",
20 |       "d_output": 4
21 |     }
22 |   },
23 |   "data": {
24 |     "path": "MLSR-WEB30K-FOLD1-PATH",
25 |     "validation_ds_role": "vali",
26 |     "num_workers": 1,
27 |     "batch_size": 64,
28 |     "slate_length": 240
29 |   },
30 |   "optimizer": {
31 |     "name": "Adam",
32 |     "args": {
33 |       "lr": 0.001
34 |     }
35 |   },
36 |   "lr_scheduler": {
37 |     "name": "StepLR",
38 |     "args": {
39 |       "step_size": 50,
40 |       "gamma": 0.1
41 |     }
42 |   },
43 |   "training": {
44 |     "epochs": 100,
45 |     "early_stopping_patience": 100,
46 |     "gradient_clipping_norm": null
47 |   },
48 |   "val_metric": "ndcg_5",
49 |   "metrics": [
50 |     "ndcg_5",
51 |     "ndcg_10"
52 |   ],
53 |   "loss": {
54 |     "name": "ordinal",
55 |     "args": {
56 |       "n": 4
57 |     }
58 |   },
59 |   "expected_metrics" : {
60 |     "val": {
61 |       "ndcg_5": 0.50
62 |     }
63 |   }
64 | }


--------------------------------------------------------------------------------
/reproducibility/configs/contextaware_web30k/ordinal_mlp.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [
 5 |         256, 512, 1024, 512, 256
 6 |       ],
 7 |       "input_norm": false,
 8 |       "activation": "ReLU",
 9 |       "dropout": 0.3
10 |     },
11 |     "transformer": null,
12 |     "post_model": {
13 |       "output_activation": "Sigmoid",
14 |       "d_output": 4
15 |     }
16 |   },
17 |   "data": {
18 |     "path": "MLSR-WEB30K-FOLD1-PATH",
19 |     "validation_ds_role": "vali",
20 |     "num_workers": 1,
21 |     "batch_size": 64,
22 |     "slate_length": 240
23 |   },
24 |   "optimizer": {
25 |     "name": "Adam",
26 |     "args": {
27 |       "lr": 0.001
28 |     }
29 |   },
30 |   "lr_scheduler": {
31 |     "name": "StepLR",
32 |     "args": {
33 |       "step_size": 50,
34 |       "gamma": 0.1
35 |     }
36 |   },
37 |   "training": {
38 |     "epochs": 100,
39 |     "early_stopping_patience": 100,
40 |     "gradient_clipping_norm": null
41 |   },
42 |   "val_metric": "ndcg_5",
43 |   "metrics": [
44 |     "ndcg_5",
45 |     "ndcg_10",
46 |     "ndcg_30",
47 |     "ndcg_60"
48 |   ],
49 |   "loss": {
50 |     "name": "ordinal",
51 |     "args": {
52 |       "n": 4
53 |     }
54 |   },
55 |   "expected_metrics" : {
56 |     "val": {
57 |       "ndcg_5": 0.45
58 |     }
59 |   }
60 | }


--------------------------------------------------------------------------------
/reproducibility/configs/neuralndcg_web30k/approxndcg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [96],
 5 |       "input_norm": false,
 6 |       "activation": null,
 7 |       "dropout": 0.0
 8 |     },
 9 |     "transformer": {
10 |       "N": 2,
11 |       "d_ff": 384,
12 |       "h": 1,
13 |       "positional_encoding" : null,
14 |       "dropout": 0.1
15 |     },
16 |     "post_model": {
17 |       "output_activation": null,
18 |       "d_output": 1
19 |     }
20 |   },
21 |   "data": {
22 |     "path": "MLSR-WEB30K-FOLD1-PATH",
23 |     "validation_ds_role": "test",
24 |     "num_workers": 1,
25 |     "batch_size": 64,
26 |     "slate_length": 240
27 |   },
28 |   "optimizer": {
29 |     "name": "Adam",
30 |     "args": {
31 |       "lr": 0.001
32 |     }
33 |   },
34 |   "lr_scheduler": {
35 |     "name": "StepLR",
36 |     "args": {
37 |       "step_size": 50,
38 |       "gamma": 0.1
39 |     }
40 |   },
41 |   "training": {
42 |     "epochs": 100,
43 |     "early_stopping_patience": 100,
44 |     "gradient_clipping_norm": null
45 |   },
46 |   "val_metric": "ndcg_5",
47 |   "metrics": [
48 |     "ndcg_5",
49 |     "ndcg_10",
50 |     "ndcg_30",
51 |     "ndcg_60"
52 |   ],
53 |   "loss": {
54 |     "name": "approxNDCGLoss",
55 |     "args":
56 |     {
57 |       "alpha": 1.0
58 |     }
59 |   },
60 |   "detect_anomaly": false,
61 |   "expected_metrics" : {
62 |     "val": {
63 |       "ndcg_5": 0.49
64 |     }
65 |   }
66 | }


--------------------------------------------------------------------------------
/reproducibility/configs/neuralndcg_web30k/lambdarank_atmax.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [96],
 5 |       "input_norm": false,
 6 |       "activation": null,
 7 |       "dropout": 0.0
 8 |     },
 9 |     "transformer": {
10 |       "N": 2,
11 |       "d_ff": 384,
12 |       "h": 1,
13 |       "positional_encoding" : null,
14 |       "dropout": 0.1
15 |     },
16 |     "post_model": {
17 |       "output_activation": null,
18 |       "d_output": 1
19 |     }
20 |   },
21 |   "data": {
22 |     "path": "MLSR-WEB30K-FOLD1-PATH",
23 |     "validation_ds_role": "test",
24 |     "num_workers": 1,
25 |     "batch_size": 64,
26 |     "slate_length": 240
27 |   },
28 |   "optimizer": {
29 |     "name": "Adam",
30 |     "args": {
31 |       "lr": 0.001
32 |     }
33 |   },
34 |   "lr_scheduler": {
35 |     "name": "StepLR",
36 |     "args": {
37 |       "step_size": 50,
38 |       "gamma": 0.1
39 |     }
40 |   },
41 |   "training": {
42 |     "epochs": 100,
43 |     "early_stopping_patience": 100,
44 |     "gradient_clipping_norm": null
45 |   },
46 |   "val_metric": "ndcg_5",
47 |   "metrics": [
48 |     "ndcg_5",
49 |     "ndcg_10",
50 |     "ndcg_30",
51 |     "ndcg_60"
52 |   ],
53 |   "loss": {
54 |     "name": "lambdaLoss",
55 |     "args": {
56 |       "weighing_scheme": "lambdaRank_scheme",
57 |       "k": null,
58 |       "mu": 10,
59 |       "sigma": 1.0
60 |     }
61 |   },
62 |   "detect_anomaly": false,
63 |   "expected_metrics" : {
64 |     "val": {
65 |       "ndcg_5": 0.50
66 |     }
67 |   }
68 | }


--------------------------------------------------------------------------------
/reproducibility/configs/neuralndcg_web30k/neuralndcg_atmax.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [96],
 5 |       "input_norm": false,
 6 |       "activation": null,
 7 |       "dropout": 0.0
 8 |     },
 9 |     "transformer": {
10 |       "N": 2,
11 |       "d_ff": 384,
12 |       "h": 1,
13 |       "positional_encoding" : null,
14 |       "dropout": 0.1
15 |     },
16 |     "post_model": {
17 |       "output_activation": "Tanh",
18 |       "d_output": 1
19 |     }
20 |   },
21 |   "data": {
22 |     "path": "MLSR-WEB30K-FOLD1-PATH",
23 |     "validation_ds_role": "test",
24 |     "num_workers": 1,
25 |     "batch_size": 64,
26 |     "slate_length": 240
27 |   },
28 |   "optimizer": {
29 |     "name": "Adam",
30 |     "args": {
31 |       "lr": 0.001
32 |     }
33 |   },
34 |   "lr_scheduler": {
35 |     "name": "StepLR",
36 |     "args": {
37 |       "step_size": 50,
38 |       "gamma": 0.1
39 |     }
40 |   },
41 |   "training": {
42 |     "epochs": 100,
43 |     "early_stopping_patience": 100,
44 |     "gradient_clipping_norm": null
45 |   },
46 |   "val_metric": "ndcg_5",
47 |   "metrics": [
48 |     "ndcg_5",
49 |     "ndcg_10",
50 |     "ndcg_30",
51 |     "ndcg_60"
52 |   ],
53 |   "loss": {
54 |     "name": "neuralNDCG",
55 |     "args": {
56 |       "temperature": 1.0,
57 |       "k": null,
58 |       "powered_relevancies": true,
59 |       "stochastic": false
60 |     }
61 |   },
62 |   "detect_anomaly": false,
63 |   "expected_metrics" : {
64 |     "val": {
65 |       "ndcg_5": 0.5
66 |     }
67 |   }
68 | }


--------------------------------------------------------------------------------
/reproducibility/normalize_features.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser, Namespace
 3 | 
 4 | import numpy as np
 5 | from sklearn.datasets import load_svmlight_file, dump_svmlight_file
 6 | 
 7 | 
 8 | def parse_args() -> Namespace:
 9 |     # default params
10 |     FEATURES_WITHOUT_LOGARITHM = [
11 |         5, 6, 7, 8, 9, 15, 19, 57, 58, 62, 75, 79, 85, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 121, 122, 127, 129, 130]
12 |     FEATURES_NEGATIVE = [110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 123, 124]
13 | 
14 |     parser = ArgumentParser("Normalize features script")
15 | 
16 |     parser.add_argument("--ds_path", help="location of the dataset", required=True, type=str)
17 | 
18 |     parser.add_argument("--features_without_logarithm",
19 |                         help="indices of features which are to be normalized without being logarithmized", required=False,
20 |                         type=int, nargs="*", default=FEATURES_WITHOUT_LOGARITHM)
21 | 
22 |     parser.add_argument("--features_negative",
23 |                         help="indices of features which are to be normalized with logarithm but their values can be negative",
24 |                         required=False, type=int, nargs="*" , default=FEATURES_NEGATIVE)
25 | 
26 |     return parser.parse_args()
27 | 
28 | 
29 | args = parse_args()
30 | 
31 | x_train, y_train, query_ids_train = load_svmlight_file(os.path.join(args.ds_path, "train.txt"), query_id=True)
32 | x_test, y_test, query_ids_test = load_svmlight_file(os.path.join(args.ds_path, "test.txt"), query_id=True)
33 | x_vali, y_vali, query_ids_vali = load_svmlight_file(os.path.join(args.ds_path, "vali.txt"), query_id=True)
34 | 
35 | x_train_transposed = x_train.toarray().T
36 | x_test_transposed = x_test.toarray().T
37 | x_vali_transposed = x_vali.toarray().T
38 | 
39 | x_train_normalized = np.zeros(x_train_transposed.shape)
40 | x_test_normalized = np.zeros(x_test_transposed.shape)
41 | x_vali_normalized = np.zeros(x_vali_transposed.shape)
42 | 
43 | eps_log = 1e-2
44 | eps = 1e-6
45 | 
46 | for i, feat in enumerate(x_train_transposed):
47 |     feature_vector_train = feat
48 |     feature_vector_test = x_test_transposed[i, ]
49 |     feature_vector_vali = x_vali_transposed[i, ]
50 | 
51 |     if i in args.features_negative:
52 |         feature_vector_train = (-1) * feature_vector_train
53 |         feature_vector_test = (-1) * feature_vector_test
54 |         feature_vector_vali = (-1) * feature_vector_vali
55 | 
56 |     if i not in args.features_without_logarithm:
57 |         # log only if all values >= 0
58 |         if np.all(feature_vector_train >= 0) & np.all(feature_vector_test >= 0) & np.all(feature_vector_vali >= 0):
59 |             feature_vector_train = np.log(feature_vector_train + eps_log)
60 |             feature_vector_test = np.log(feature_vector_test + eps_log)
61 |             feature_vector_vali = np.log(feature_vector_vali + eps_log)
62 |         else:
63 |             print("Some values of feature no. {} are still < 0 which is why the feature won't be normalized".format(i))
64 | 
65 |     mean = np.mean(feature_vector_train)
66 |     std = np.std(feature_vector_train)
67 |     feature_vector_train = (feature_vector_train - mean) / (std + eps)
68 |     feature_vector_test = (feature_vector_test - mean) / (std + eps)
69 |     feature_vector_vali = (feature_vector_vali - mean) / (std + eps)
70 |     x_train_normalized[i, ] = feature_vector_train
71 |     x_test_normalized[i, ] = feature_vector_test
72 |     x_vali_normalized[i, ] = feature_vector_vali
73 | 
74 | ds_normalized_path = "{}_normalized".format(args.ds_path)
75 | os.makedirs(ds_normalized_path, exist_ok=True)
76 | 
77 | train_normalized_path = os.path.join(ds_normalized_path, "train.txt")
78 | with open(train_normalized_path, "w"):
79 |     dump_svmlight_file(x_train_normalized.T, y_train, train_normalized_path, query_id=query_ids_train)
80 | 
81 | test_normalized_path = os.path.join(ds_normalized_path, "test.txt")
82 | with open(test_normalized_path, "w"):
83 |     dump_svmlight_file(x_test_normalized.T, y_test, test_normalized_path, query_id=query_ids_test)
84 | 
85 | vali_normalized_path = os.path.join(ds_normalized_path, "vali.txt")
86 | with open(vali_normalized_path, "w"):
87 |     dump_svmlight_file(x_vali_normalized.T, y_vali, vali_normalized_path, query_id=query_ids_vali)
88 | 
89 | print("Dataset with normalized features saved here: {}.".format(ds_normalized_path))
90 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Testing
 2 | pytest
 3 | 
 4 | # Lint and code style
 5 | flake8>=3.3.0
 6 | flake8-blind-except==0.1.1
 7 | flake8-debugger==1.4.0
 8 | flake8-print==2.0.2
 9 | flake8-tuple==0.2.13
10 | mypy==0.790


--------------------------------------------------------------------------------
/scripts/ci.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | DIR=$(dirname $0)
4 | PROJECT_DIR="$(cd $DIR/..; pwd)"
5 | 
6 | docker build -t allrank:latest $PROJECT_DIR
7 | $PROJECT_DIR/scripts/run_tests.sh


--------------------------------------------------------------------------------
/scripts/local_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [
 5 |         64
 6 |       ],
 7 |       "input_norm": false,
 8 |       "activation": null,
 9 |       "dropout": 0.0
10 |     },
11 |     "transformer": {
12 |       "N": 1,
13 |       "d_ff": 64,
14 |       "h": 1,
15 |       "positional_encoding": null,
16 |       "dropout": 0.0
17 |     },
18 |     "post_model": {
19 |       "output_activation": "Sigmoid",
20 |       "d_output": 4
21 |     }
22 |   },
23 |   "data": {
24 |     "path": "/allrank/dummy_data",
25 |     "validation_ds_role": "vali",
26 |     "num_workers": 1,
27 |     "batch_size": 64,
28 |     "slate_length": 240
29 |   },
30 |   "optimizer": {
31 |     "name": "Adam",
32 |     "args": {
33 |       "lr": 0.001
34 |     }
35 |   },
36 |   "lr_scheduler": {
37 |     "name": "StepLR",
38 |     "args": {
39 |       "step_size": 3,
40 |       "gamma": 0.5
41 |     }
42 |   },
43 |   "training": {
44 |     "epochs": 4,
45 |     "early_stopping_patience": 100,
46 |     "gradient_clipping_norm": null
47 |   },
48 |   "val_metric": "ndcg_5",
49 |   "metrics": [
50 |     "ndcg_5"
51 |   ],
52 |   "loss": {
53 |     "name": "ordinal",
54 |     "args": {
55 |       "n": 4
56 |     }
57 |   },
58 |   "expected_metrics" : {
59 |     "val": {
60 |       "ndcg_5": 0.76
61 |     }
62 |   }
63 | }


--------------------------------------------------------------------------------
/scripts/local_config_click_model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": {
 3 |     "fc_model": {
 4 |       "sizes": [
 5 |         64
 6 |       ],
 7 |       "input_norm": false,
 8 |       "activation": null,
 9 |       "dropout": 0.0
10 |     },
11 |     "transformer": {
12 |       "N": 1,
13 |       "d_ff": 64,
14 |       "h": 1,
15 |       "positional_encoding": null,
16 |       "dropout": 0.0
17 |     },
18 |     "post_model": {
19 |       "output_activation": "Sigmoid",
20 |       "d_output": 4
21 |     }
22 |   },
23 |   "data": {
24 |     "path": "/allrank/dummy_data",
25 |     "validation_ds_role": "vali",
26 |     "num_workers": 1,
27 |     "batch_size": 64,
28 |     "slate_length": 240
29 |   },
30 |   "optimizer": {
31 |     "name": "Adam",
32 |     "args": {
33 |       "lr": 0.001
34 |     }
35 |   },
36 |   "lr_scheduler": {
37 |     "name": "StepLR",
38 |     "args": {
39 |       "step_size": 3,
40 |       "gamma": 0.5
41 |     }
42 |   },
43 |   "training": {
44 |     "epochs": 4,
45 |     "early_stopping_patience": 100,
46 |     "gradient_clipping_norm": null
47 |   },
48 |   "val_metric": "ndcg_5",
49 |   "metrics": [
50 |     "ndcg_5"
51 |   ],
52 |   "loss": {
53 |     "name": "ordinal",
54 |     "args": {
55 |       "n": 4
56 |     }
57 |   },
58 |   "expected_metrics" : {
59 |     "val": {
60 |       "ndcg_5": 0.76
61 |     }
62 |   },
63 |   "click_model": {
64 |     "name": "allrank.click_models.cascade_models.DiverseClicksModel",
65 |     "args": {
66 |       "inner_click_model": {
67 |         "name": "allrank.click_models.cascade_models.BaseCascadeModel",
68 |         "args": {
69 |           "eta": 1,
70 |           "threshold": 2
71 |         }
72 |       },
73 |       "q_percentile": 0.5
74 |     }
75 |   }
76 | }


--------------------------------------------------------------------------------
/scripts/run_example.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR=$(dirname $0)
 4 | PROJECT_DIR="$(cd $DIR/..; pwd)"
 5 | 
 6 | ARCH_VERSION=${1:cpu}
 7 | 
 8 | docker build --build-arg arch_version=${ARCH_VERSION} --progress=plain -t allrank:latest $PROJECT_DIR
 9 | docker run -e PYTHONPATH=/allrank -v $PROJECT_DIR:/allrank allrank:latest /bin/sh -c 'python allrank/data/generate_dummy_data.py && python allrank/main.py --config-file-name /allrank/scripts/local_config.json --run-id test_run --job-dir /allrank/test_run'
10 | 


--------------------------------------------------------------------------------
/scripts/run_in_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # before start - from the main dir run:
4 | # docker build -t allrank:latest .
5 | 
6 | DIR=$(dirname $0)
7 | PROJECT_DIR="$(cd $DIR/..; pwd)"
8 | 
9 | docker run -e PYTHONPATH=/allrank -v $PROJECT_DIR:/allrank allrank:latest /bin/sh -c 'python allrank/data/generate_dummy_data.py && python allrank/main.py --config-file-name /allrank/scripts/local_config.json --run-id test_run --job-dir /allrank/task-data'


--------------------------------------------------------------------------------
/scripts/run_in_docker_click.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # before start - from the main dir run:
4 | # docker build -t allrank:latest .
5 | 
6 | DIR=$(dirname $0)
7 | PROJECT_DIR="$(cd $DIR/..; pwd)"
8 | 
9 | docker run -e PYTHONPATH=/allrank -v $PROJECT_DIR:/allrank allrank:latest /bin/sh -c 'python allrank/rank_and_click.py --config-file-name /allrank/scripts/local_config_click_model.json --input-model-path /allrank/task-data/results/test_run/model.pkl --run-id test_run_click --roles train,vali --job-dir /allrank/task-data'


--------------------------------------------------------------------------------
/scripts/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eu
 3 | 
 4 | # before start - from the main dir run:
 5 | # docker build -t allrank:latest .
 6 | 
 7 | DIR=$(dirname $0)
 8 | PROJECT_DIR="$(cd $DIR/..; pwd)"
 9 | 
10 | docker run -v $PROJECT_DIR:/allrank allrank:latest /bin/sh -c 'make lint tests'


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [aliases]
 2 | test=pytest
 3 | 
 4 | [tool:pytest]
 5 | testpaths=tests
 6 | 
 7 | [flake8]
 8 | max-line-length=140
 9 | ignore=T003
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | import pathlib
 4 | from setuptools import setup, find_packages
 5 | 
 6 | HERE = pathlib.Path(__file__).parent
 7 | README = (HERE / "README.md").read_text()
 8 | 
 9 | reqs = [
10 |     "scikit-learn>=1.1.0, <=1.2.1",
11 |     "pandas>=1.0.5, <=1.3.5",
12 |     "numpy>=1.18.5, <=1.21.6",
13 |     "scipy>=1.4.1, <=1.7.3",
14 |     "attrs>=19.3.0",
15 |     "flatten_dict>=0.3.0",
16 |     "tensorboardX>=2.1.0",
17 |     "gcsfs==0.6.2",
18 |     "google-auth>=2.15.0",
19 |     "fsspec <= 2023.1.0"
20 | ]
21 | 
22 | setup(
23 |     name="allRank",
24 |     version="1.4.3",
25 |     description="allRank is a framework for training learning-to-rank neural models",
26 |     long_description=README,
27 |     long_description_content_type="text/markdown",
28 |     license="Apache 2",
29 |     url="https://github.com/allegro/allRank",
30 |     install_requires=reqs,
31 |     author_email="allrank@allegro.pl",
32 |     packages=find_packages(exclude=["tests"]),
33 |     package_data={"allrank": ["config.json"]},
34 |     entry_points={"console_scripts": ['allRank = allrank.main:run']},
35 |     zip_safe=False,
36 | )
37 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/tests/__init__.py


--------------------------------------------------------------------------------
/tests/click_models/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, List
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from allrank.click_models.base import ClickModel
 7 | 
 8 | 
 9 | def click(click_model: ClickModel, X: Union[list, np.ndarray], y: Union[list, np.ndarray]) -> List[int]:
10 |     clicks = click_model.click((torch.tensor(X), torch.tensor(y)))
11 |     assert isinstance(clicks, np.ndarray)
12 |     return clicks.tolist()
13 | 


--------------------------------------------------------------------------------
/tests/click_models/test_alternative_click_models.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.base import FixedClickModel, MultipleClickModel, ConditionedClickModel
 4 | from tests.click_models import click
 5 | 
 6 | 
 7 | def test_click_model_should_use_all_click_models():
 8 |     np.random.seed(42)
 9 | 
10 |     click_model_0 = FixedClickModel([0])
11 |     click_model_1 = FixedClickModel([1])
12 |     click_model = MultipleClickModel([click_model_0, click_model_1], probabilities=[0.5, 0.5])
13 |     clicks = np.array([
14 |         click(click_model, [], [1, 2])
15 |         for _ in range(20000)
16 |     ])
17 |     assert 9950 < np.sum(clicks[:, 0]) < 10050
18 |     assert 9950 < np.sum(clicks[:, 1]) < 10050
19 | 
20 | 
21 | def test_click_model_should_combine_click_models_and():
22 |     click_model_0 = FixedClickModel([0, 1])
23 |     click_model_1 = FixedClickModel([1, 2])
24 |     click_model = ConditionedClickModel([click_model_0, click_model_1], np.all)
25 |     clicks = click(click_model, [], [1, 2, 3])
26 |     assert clicks == [0, 1, 0]
27 | 
28 | 
29 | def test_click_model_should_combine_click_models_or():
30 |     click_model_0 = FixedClickModel([0, 1])
31 |     click_model_1 = FixedClickModel([1, 2])
32 |     click_model = ConditionedClickModel([click_model_0, click_model_1], np.any)
33 |     clicks = click(click_model, [], [1, 2, 3, 4])
34 |     assert clicks == [1, 1, 1, 0]
35 | 


--------------------------------------------------------------------------------
/tests/click_models/test_apply_click_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.base import RandomClickModel, OnlyRelevantClickModel
 4 | from allrank.click_models.click_utils import click_on_slates
 5 | 
 6 | 
 7 | def test_click_on_slates():
 8 |     np.random.seed(42)
 9 | 
10 |     n_slates = 5
11 |     n_docs_per_slate = 5
12 |     n_dimensions = 10
13 | 
14 |     X = np.random.rand(n_slates, n_docs_per_slate, n_dimensions).astype(np.float32)
15 |     y = np.vstack([np.random.randint(0, 4, size=len(x)) for x in X])
16 | 
17 |     n_clicks = 2
18 |     click_model = RandomClickModel(n_clicks)
19 | 
20 |     slates_X, slates_y = click_on_slates((X, y), click_model, True)
21 | 
22 |     assert len(slates_X) == X.shape[0]
23 |     assert (slates_X == X).all()
24 |     assert len(slates_y) == len(y)
25 |     assert (np.sum(slates_y, axis=1) == np.repeat(n_clicks, n_slates)).all()
26 | 
27 | 
28 | def test_click_on_slates_without_empty():
29 |     np.random.seed(42)
30 | 
31 |     X = np.array([[[-1.0]], [[1.0]]])
32 |     y = np.vstack([np.array([0]), np.array([1])])
33 | 
34 |     click_model = OnlyRelevantClickModel(1)
35 | 
36 |     slates_X, slates_y = click_on_slates((X, y), click_model, include_empty=False)
37 | 
38 |     assert len(slates_X) == X[1:].shape[0]
39 |     assert (slates_X == X[1:]).all()
40 |     assert slates_y == [[1]]
41 | 


--------------------------------------------------------------------------------
/tests/click_models/test_base_cascade_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.cascade_models import BaseCascadeModel
 4 | from tests.click_models import click
 5 | 
 6 | 
 7 | def test_base_cascade_model_no_eta():
 8 |     click_model = BaseCascadeModel(0.0, 1)
 9 |     assert click(click_model, [], [1]) == [1]
10 |     assert click(click_model, [], [1, 2]) == [1, 1]
11 |     assert click(click_model, [], [1, 2, 3]) == [1, 1, 1]
12 | 
13 | 
14 | def test_base_cascade_model_below_threshold():
15 |     y = [1, 2, 0, 4, 3]
16 |     assert click(BaseCascadeModel(0.0, 1), [], y) == [1, 1, 0, 1, 1]
17 |     assert click(BaseCascadeModel(0.0, 2), [], y) == [0, 1, 0, 1, 1]
18 |     assert click(BaseCascadeModel(0.0, 4), [], y) == [0, 0, 0, 1, 0]
19 | 
20 | 
21 | def test_base_cascade_model_eta():
22 |     np.random.seed(42)
23 |     click_model_1 = BaseCascadeModel(0.3, 1)
24 |     click_model_2 = BaseCascadeModel(0.5, 1)
25 |     assert click(click_model_1, [], [1, 2]) == [1, 0]
26 |     assert click(click_model_1, [], [1, 2, 3]) == [1, 1, 1]
27 |     assert click(click_model_1, [], [1, 2, 3, 4]) == [1, 1, 0, 1]
28 |     assert click(click_model_2, [], [1, 2]) == [1, 1]
29 |     assert click(click_model_2, [], [1, 2, 3]) == [1, 0, 1]
30 |     assert click(click_model_2, [], [1, 2, 3, 4]) == [1, 1, 1, 0]
31 | 
32 | 
33 | def test_base_cascade_model_obs_irrelevant():
34 |     np.random.seed(42)
35 |     y = [1, 2, 0, 4, 3]
36 |     assert click(BaseCascadeModel(0.3, 0), [], y) == [1, 1, 1, 1, 1]
37 |     assert click(BaseCascadeModel(0.3, 1), [], y) == [1, 1, 0, 1, 0]
38 |     assert click(BaseCascadeModel(0.3, 3), [], y) == [0, 0, 0, 1, 1]
39 |     assert click(BaseCascadeModel(0.3, 4), [], y) == [0, 0, 0, 1, 0]
40 | 


--------------------------------------------------------------------------------
/tests/click_models/test_diverse_clicks_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.cascade_models import BaseCascadeModel, DiverseClicksModel
 4 | from tests.click_models import click
 5 | 
 6 | base_click_model = BaseCascadeModel(0.0, 1)
 7 | click_model = DiverseClicksModel(base_click_model)
 8 | 
 9 | 
10 | def test_diverse_clicks_model_simple():
11 |     assert click(click_model, np.array([[0, 1]]), [1]) == [1]
12 |     assert click(click_model, np.array([[0, 1], [0, 1]]), [1, 1]) == [1, 0]
13 |     assert click(click_model, np.array([[0, 1], [0, 1], [1, 1]]), [1, 1, 1]) == [1, 0, 0]
14 |     assert click(click_model, np.array([[0, 1], [0, 1], [2, 2], [1, 1]]), [1, 1, 1, 1]) == [1, 0, 1, 0]
15 | 


--------------------------------------------------------------------------------
/tests/click_models/test_duplicate_click_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.duplicate_aware import EverythingButDuplicatesClickModel
 4 | from tests.click_models import click
 5 | 
 6 | 
 7 | def test_feature_click_model_everything():
 8 |     click_model = EverythingButDuplicatesClickModel()
 9 |     assert click(click_model, np.array([[0, 1]]), []) == [1]
10 |     assert click(click_model, np.array([[1, 1], [1, 0]]), []) == [1, 1]
11 |     assert click(click_model, np.array([[1, 1], [1, 0], [0, 0]]), []) == [1, 1, 1]
12 | 
13 | 
14 | def test_feature_click_model_except_exact_duplicates():
15 |     click_model = EverythingButDuplicatesClickModel()
16 |     assert click(click_model, np.array([[0, 1]]), []) == [1]
17 |     assert click(click_model, np.array([[1, 1], [1, 1]]), []) == [1, 0]
18 |     assert click(click_model, np.array([[1, 1], [1, 1], [0, 0]]), []) == [1, 0, 1]
19 | 
20 | 
21 | def test_feature_click_model_except_near_duplicates():
22 |     click_model = EverythingButDuplicatesClickModel(0.1)
23 |     assert click(click_model, np.array([[0, 1]]), []) == [1]
24 |     assert click(click_model, np.array([[1, 1], [1, 1]]), []) == [1, 0]
25 |     assert click(click_model, np.array([[1, 1], [1, 0.99], [1, 0.8]]), []) == [1, 0, 1]
26 | 


--------------------------------------------------------------------------------
/tests/click_models/test_feature_click_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.base import OnlyRelevantClickModel
 4 | from tests.click_models import click
 5 | 
 6 | 
 7 | def test_only_relevant_click_model():
 8 |     click_model = OnlyRelevantClickModel(1)
 9 |     assert click(click_model, np.array([[0, 1]]), [1]) == [1]
10 |     assert click(click_model, np.array([[0, 1]]), [0]) == [0]
11 |     assert click(click_model, np.array([[1, 1], [1, 0], [0, 0]]), [1, 0, 0]) == [1, 0, 0]
12 | 
13 | 
14 | def test_only_relevant_above_threshold_click_model():
15 |     click_model = OnlyRelevantClickModel(2)
16 |     assert click(click_model, np.array([[0, 1]]), [2]) == [1]
17 |     assert click(click_model, np.array([[0, 1]]), [1]) == [0]
18 |     assert click(click_model, np.array([[0, 1]]), [0]) == [0]
19 |     assert click(click_model, np.array([[1, 1], [1, 0], [0, 0]]), [0, 1, 2]) == [0, 0, 1]
20 | 


--------------------------------------------------------------------------------
/tests/click_models/test_fixed_click_model.py:
--------------------------------------------------------------------------------
 1 | from allrank.click_models.base import FixedClickModel
 2 | from tests.click_models import click
 3 | 
 4 | 
 5 | def test_fixed_click_model_single():
 6 |     click_model = FixedClickModel([0])
 7 |     assert click(click_model, [], [1]) == [1]
 8 |     assert click(click_model, [], [1, 2]) == [1, 0]
 9 |     assert click(click_model, [], [1, 2, 3]) == [1, 0, 0]
10 | 
11 | 
12 | def test_fixed_click_model_multiple():
13 |     assert click(FixedClickModel([0, 1]), [], [1, 2, 3, 4]) == [1, 1, 0, 0]
14 |     assert click(FixedClickModel([0, 1, 2]), [], [1, 2, 3, 4]) == [1, 1, 1, 0]
15 |     assert click(FixedClickModel([0, 2, 3]), [], [1, 2, 3, 4]) == [1, 0, 1, 1]
16 | 


--------------------------------------------------------------------------------
/tests/click_models/test_masked_click_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.base import FixedClickModel, RandomClickModel
 4 | from allrank.click_models.click_utils import MaskedRemainMasked
 5 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 6 | from tests.click_models import click
 7 | 
 8 | 
 9 | def test_masked_should_remain_masked():
10 |     click_model = MaskedRemainMasked(FixedClickModel(click_positions=[1]))
11 |     assert click(click_model, np.ones((3, 1)), np.array([0, 0, PADDED_Y_VALUE])) == [0, 1, PADDED_Y_VALUE]
12 | 
13 | 
14 | def test_inner_click_model_should_just_get_unmasked_docs():
15 |     np.random.seed(42)
16 |     click_model = MaskedRemainMasked(RandomClickModel(n_clicks=1))
17 |     clicks = click(click_model, np.ones((5, 1)), np.array([0, PADDED_Y_VALUE, PADDED_Y_VALUE, PADDED_Y_VALUE, PADDED_Y_VALUE]))
18 |     assert clicks == [1, PADDED_Y_VALUE, PADDED_Y_VALUE, PADDED_Y_VALUE, PADDED_Y_VALUE]
19 | 


--------------------------------------------------------------------------------
/tests/click_models/test_random_click_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from allrank.click_models.base import RandomClickModel
 4 | from tests.click_models import click
 5 | 
 6 | 
 7 | def test_random_click_model_single():
 8 |     click_model = RandomClickModel(1)
 9 |     np.random.seed(42)
10 |     assert click(click_model, [], [1]) == [1]
11 |     assert click(click_model, [], [1, 2]) == [0, 1]
12 |     assert click(click_model, [], [1, 2, 3]) == [0, 1, 0]
13 | 
14 | 
15 | def test_random_click_model_multiple():
16 |     np.random.seed(42)
17 |     assert click(RandomClickModel(2), [], [1, 2, 3, 4]) == [0, 1, 0, 1]
18 |     assert click(RandomClickModel(3), [], [1, 2, 3, 4]) == [1, 1, 0, 1]
19 |     assert click(RandomClickModel(4), [], [1, 2, 3, 4]) == [1, 1, 1, 1]
20 | 


--------------------------------------------------------------------------------
/tests/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allRank/c88475661cb72db292d13283fdbc4f2ae6498ee4/tests/losses/__init__.py


--------------------------------------------------------------------------------
/tests/losses/test_approxndcg.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from pytest import approx
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | from allrank.models.losses.approxNDCG import approxNDCGLoss
 8 | 
 9 | 
10 | def test_approxndcg_ignores_padded():
11 |     y_pred = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, 1.0]])]
12 |     y_true = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, PADDED_Y_VALUE]])]
13 | 
14 |     result = approxNDCGLoss(y_pred[0], y_true[0], alpha=1.).item()
15 |     result_pad = approxNDCGLoss(y_pred[1], y_true[1], alpha=1.).item()
16 | 
17 |     expected = -0.8499219417
18 | 
19 |     assert math.isfinite(result) and math.isfinite(result_pad)
20 |     assert (result == approx(result_pad)) and (result == approx(expected))
21 | 


--------------------------------------------------------------------------------
/tests/losses/test_binary_listnet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from pytest import approx
 6 | from scipy.special import softmax
 7 | 
 8 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 9 | from allrank.models.losses import DEFAULT_EPS, binary_listNet
10 | 
11 | 
12 | def binary_listNet_wrap(y_pred, y_true, eps=1e-10):
13 |     return binary_listNet(torch.tensor([y_pred]), torch.tensor([y_true]), eps).item()
14 | 
15 | 
16 | def test_binary_listnet_simple():
17 |     y_pred = [0.5, 0.2]
18 |     y_true = [1.0, 0.0]
19 | 
20 |     result = binary_listNet_wrap(y_pred, y_true, eps=0.0)
21 |     expected = - np.sum(y_true * np.log(softmax(y_pred)))
22 | 
23 |     assert not math.isnan(result) and not math.isinf(result)
24 |     assert (result == approx(expected))
25 | 
26 | 
27 | def test_binary_listnet_stable_for_very_small_prediction():
28 |     y_pred = [0.5, -1e30]
29 |     y_true = [1.0, 0.0]
30 | 
31 |     result = binary_listNet_wrap(y_pred, y_true)
32 |     expected = - np.sum(y_true * np.log(softmax(y_pred) + DEFAULT_EPS))
33 | 
34 |     assert not math.isnan(result) and not math.isinf(result)
35 |     assert (result == approx(expected, abs=1e-9))
36 | 
37 | 
38 | def test_binary_listnet_ignores_padded_value():
39 |     y_pred = [0.5, 0.2, 0.5]
40 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
41 | 
42 |     result = binary_listNet_wrap(y_pred, y_true)
43 |     expected = - np.sum(y_true[:2] * np.log(softmax(y_pred[:2]) + DEFAULT_EPS))
44 | 
45 |     assert not math.isnan(result) and not math.isinf(result)
46 |     assert (result == approx(expected))
47 | 


--------------------------------------------------------------------------------
/tests/losses/test_lambdaloss.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from pytest import approx
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | from allrank.models.losses.lambdaLoss import lambdaLoss
 8 | 
 9 | 
10 | def test_ndcgloss1_ignores_padded():
11 |     y_pred = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, 1.0]])]
12 |     y_true = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, PADDED_Y_VALUE]])]
13 | 
14 |     result = lambdaLoss(y_pred[0], y_true[0], weighing_scheme="ndcgLoss1_scheme", reduction_log="binary").item()
15 |     result_pad = lambdaLoss(y_pred[1], y_true[1], weighing_scheme="ndcgLoss1_scheme", reduction_log="binary").item()
16 | 
17 |     expected = 2.9272110462
18 | 
19 |     assert math.isfinite(result) and math.isfinite(result_pad)
20 |     assert (result == approx(result_pad)) and (result == approx(expected))
21 | 
22 | 
23 | def test_ndcgloss2_ignores_padded():
24 |     y_pred = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, 1.0]])]
25 |     y_true = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, PADDED_Y_VALUE]])]
26 | 
27 |     result = lambdaLoss(y_pred[0], y_true[0], weighing_scheme="ndcgLoss2PP_scheme", reduction_log="binary").item()
28 |     result_pad = lambdaLoss(y_pred[1], y_true[1], weighing_scheme="ndcgLoss2PP_scheme", reduction_log="binary").item()
29 | 
30 |     expected = 1.1244146823
31 | 
32 |     assert math.isfinite(result) and math.isfinite(result_pad)
33 |     assert (result == approx(result_pad)) and (result == approx(expected))
34 | 
35 | 
36 | def test_ranknet_ignores_padded():
37 |     y_pred = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, 1.0]])]
38 |     y_true = [torch.tensor([[0.5, 0.3, 0.5]]), torch.tensor([[0.5, 0.3, 0.5, PADDED_Y_VALUE]])]
39 | 
40 |     result = lambdaLoss(y_pred[0], y_true[0], weighing_scheme="rankNet_scheme", reduction_log="natural").item()
41 |     result_pad = lambdaLoss(y_pred[1], y_true[1], weighing_scheme="rankNet_scheme", reduction_log="natural").item()
42 | 
43 |     expected = 1.1962778568
44 | 
45 |     assert math.isfinite(result) and math.isfinite(result_pad)
46 |     assert (result == approx(result_pad)) and (result == approx(expected))
47 | 


--------------------------------------------------------------------------------
/tests/losses/test_listmle.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from pytest import approx
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | from allrank.models.losses import listMLE
 8 | 
 9 | 
10 | def listmle_wrap(y_pred, y_true):
11 |     return listMLE(torch.tensor([y_pred]), torch.tensor([y_true])).item()
12 | 
13 | 
14 | def test_listmle_ignores_padded_value():
15 |     y_pred = [0.5, 0.3, 0.5]
16 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
17 | 
18 |     result = listmle_wrap(y_pred, y_true)
19 |     expected = 0.5981389284133911
20 | 
21 |     assert not math.isnan(result) and not math.isinf(result)
22 |     assert (result == approx(expected))
23 | 


--------------------------------------------------------------------------------
/tests/losses/test_listnet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from pytest import approx
 6 | from scipy.special import softmax
 7 | 
 8 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 9 | from allrank.models.losses import DEFAULT_EPS, listNet
10 | 
11 | 
12 | def listNet_wrap(y_pred, y_true, eps=1e-10):
13 |     return listNet(torch.tensor([y_pred]), torch.tensor([y_true]), eps).item()
14 | 
15 | 
16 | def test_listnet_simple():
17 |     y_pred = [0.5, 0.2]
18 |     y_true = [1.0, 0.0]
19 | 
20 |     result = listNet_wrap(y_pred, y_true, eps=0.0)
21 |     expected = - np.sum(softmax(y_true) * np.log(softmax(y_pred)))
22 | 
23 |     assert not math.isnan(result) and not math.isinf(result)
24 |     assert (result == approx(expected))
25 | 
26 | 
27 | def test_listnet_stable_for_very_small_prediction():
28 |     y_pred = [0.5, -1e30]
29 |     y_true = [1.0, 0.0]
30 | 
31 |     result = listNet_wrap(y_pred, y_true)
32 |     expected = - np.sum(softmax(y_true) * np.log(softmax(y_pred) + DEFAULT_EPS))
33 | 
34 |     assert not math.isnan(result) and not math.isinf(result)
35 |     assert (result == approx(expected))
36 | 
37 | 
38 | def test_listnet_ignores_padded_value():
39 |     y_pred = [0.5, 0.2, 0.5]
40 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
41 | 
42 |     result = listNet_wrap(y_pred, y_true)
43 |     expected = - np.sum(softmax(y_true[:2]) * np.log(softmax(y_pred[:2]) + DEFAULT_EPS))
44 | 
45 |     assert not math.isnan(result) and not math.isinf(result)
46 |     assert (result == approx(expected))
47 | 


--------------------------------------------------------------------------------
/tests/losses/test_loss_ordinal.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from pytest import approx
 6 | 
 7 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 8 | from allrank.models.losses import (
 9 |     ordinal,
10 |     with_ordinals)
11 | 
12 | 
13 | def loss_wrap(y_pred, y_true):
14 |     return ordinal(torch.tensor([y_pred]), torch.tensor([y_true]), n=2).item()
15 | 
16 | 
17 | def xe(true, pred):
18 |     return - true * math.log(pred) - (1 - true) * math.log(1 - pred)
19 | 
20 | 
21 | def test_ds_transform():
22 |     y_true = np.array([2.0, 1.0, 0.0])
23 |     result = with_ordinals(torch.tensor([y_true], dtype=torch.float), 2).tolist()
24 |     expected = [[[1.0, 1.0], [1.0, 0.0], [0.0, 0.0]]]
25 |     assert result == expected
26 | 
27 | 
28 | def test_ordinal_single_doc():
29 |     y_pred = [[0.8, 0.6]]
30 |     y_true = [1.0]
31 | 
32 |     result = loss_wrap(y_pred, y_true)
33 |     expected = np.mean([xe(1, 0.8) + xe(0, 0.6)])
34 | 
35 |     assert not math.isnan(result) and not math.isinf(result)
36 |     assert (result == approx(expected))
37 | 
38 | 
39 | def test_ordinal_simple():
40 |     y_pred = [[0.8, 0.7], [0.4, 0.3], [0.2, 0.1]]
41 |     y_true = [2.0, 1.0, 0.0]
42 | 
43 |     result = loss_wrap(y_pred, y_true)
44 |     expected = np.mean([xe(1, 0.8) + xe(1, 0.7), xe(1, 0.4) + xe(0, 0.3), xe(0, 0.2) + xe(0, 0.1)])
45 | 
46 |     assert not math.isnan(result) and not math.isinf(result)
47 |     assert (result == approx(expected))
48 | 
49 | 
50 | def test_ordinal_single_doc_padded():
51 |     y_pred = [[0.8, 0.6], [0.2, 0.1]]
52 |     y_true = [1.0, PADDED_Y_VALUE]
53 | 
54 |     result = loss_wrap(y_pred, y_true)
55 |     expected = np.mean([xe(1, 0.8) + xe(0, 0.6)])
56 | 
57 |     assert not math.isnan(result) and not math.isinf(result)
58 |     assert (result == approx(expected))
59 | 


--------------------------------------------------------------------------------
/tests/losses/test_loss_pointwise.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from pytest import approx
 6 | 
 7 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 8 | from allrank.models.losses import pointwise_rmse
 9 | 
10 | 
11 | def loss_wrap(y_pred, y_true, no_of_levels):
12 |     return pointwise_rmse(torch.tensor([y_pred]), torch.tensor([y_true]), no_of_levels).item()
13 | 
14 | 
15 | def test_pointwise_simple():
16 |     y_pred = [0.5, 0.2]
17 |     y_true = [1.0, 0.0]
18 | 
19 |     result = loss_wrap(y_pred, y_true, 1)
20 |     expected = math.sqrt(np.mean([0.5 ** 2, 0.2 ** 2]))
21 | 
22 |     assert not math.isnan(result) and not math.isinf(result)
23 |     assert (result == approx(expected))
24 | 
25 | 
26 | def test_pointwise_simple_padded():
27 |     y_pred = [0.5, 0.2, 0.5]
28 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
29 | 
30 |     result = loss_wrap(y_pred, y_true, 1)
31 |     expected = math.sqrt(np.mean([0.5 ** 2, 0.2 ** 2]))
32 | 
33 |     assert not math.isnan(result) and not math.isinf(result)
34 |     assert (result == approx(expected))
35 | 
36 | 
37 | def test_pointwise_multiple_levels():
38 |     y_pred = [0.5, 0.2, 0.7, 0.8]
39 |     y_true = [1.0, 0.0, 2.0, 3.0]
40 | 
41 |     result = loss_wrap(y_pred, y_true, 3)
42 |     expected = math.sqrt(np.mean([0.5 ** 2, 0.6 ** 2, 0.1 ** 2, 0.6 ** 2]))
43 | 
44 |     assert not math.isnan(result) and not math.isinf(result)
45 |     assert (result == approx(expected))
46 | 


--------------------------------------------------------------------------------
/tests/losses/test_mrr.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from allrank.data.dataset_loading import PADDED_Y_VALUE
  4 | from allrank.models.metrics import mrr
  5 | 
  6 | 
  7 | def mrr_wrap_single_slate(y_pred, y_true, ats=[10]):
  8 |     return mrr_wrap_multiple_slates([y_pred], [y_true], ats)
  9 | 
 10 | 
 11 | def mrr_wrap_multiple_slates(y_pred, y_true, ats=[10]):
 12 |     return mrr(torch.tensor(y_pred), torch.tensor(y_true), ats=ats).numpy()
 13 | 
 14 | 
 15 | def single_slate_and_ats(result):
 16 |     return result[0][0]
 17 | 
 18 | 
 19 | def test_mrr_simple_1():
 20 |     y_pred = [0.5, 0.2]
 21 |     y_true = [1.0, 0.0]
 22 | 
 23 |     result = mrr_wrap_single_slate(y_pred, y_true)
 24 | 
 25 |     assert single_slate_and_ats(result) == 1.0
 26 | 
 27 | 
 28 | def test_mrr_simple_no_ats():
 29 |     y_pred = [0.5, 0.2]
 30 |     y_true = [1.0, 0.0]
 31 | 
 32 |     result = mrr_wrap_single_slate(y_pred, y_true, ats=None)
 33 | 
 34 |     assert single_slate_and_ats(result) == 1.0
 35 | 
 36 | 
 37 | def test_mrr_simple_2():
 38 |     y_pred = [0.5, 0.2]
 39 |     y_true = [0.0, 1.0]
 40 | 
 41 |     result = mrr_wrap_single_slate(y_pred, y_true)
 42 | 
 43 |     assert single_slate_and_ats(result) == 0.5
 44 | 
 45 | 
 46 | def test_mrr_multiple_slates():
 47 |     y_pred_1 = [0.2, 0.5]
 48 |     y_pred_05 = [0.5, 0.2]
 49 |     y_true = [0.0, 1.0]
 50 | 
 51 |     result = mrr_wrap_multiple_slates([y_pred_1, y_pred_05], [y_true, y_true])
 52 | 
 53 |     assert result[0][0] == 1.0
 54 |     assert result[1][0] == 0.5
 55 | 
 56 | 
 57 | def test_mrr_multiple_ats():
 58 |     y_pred = [0.5, 0.2]
 59 |     y_true = [0.0, 1.0]
 60 | 
 61 |     result = mrr_wrap_single_slate(y_pred, y_true, ats=[1, 2])
 62 | 
 63 |     assert result[0][0] == 0.0
 64 |     assert result[0][1] == 0.5
 65 | 
 66 | 
 67 | def test_mrr_multiple_slates_multiple_ats():
 68 |     y_pred_1 = [0.2, 0.5]
 69 |     y_pred_05 = [0.5, 0.2]
 70 |     y_true = [0.0, 1.0]
 71 | 
 72 |     result = mrr_wrap_multiple_slates([y_pred_1, y_pred_05], [y_true, y_true], ats=[1, 2])
 73 | 
 74 |     assert result[0][0] == 1.0
 75 |     assert result[0][1] == 1.0
 76 |     assert result[1][0] == 0.0
 77 |     assert result[1][1] == 0.5
 78 | 
 79 | 
 80 | def test_mrr_zero_when_no_relevant():
 81 |     y_pred = [0.5, 0.2]
 82 |     y_true = [0.0, 0.0]
 83 | 
 84 |     result = mrr_wrap_single_slate(y_pred, y_true)
 85 | 
 86 |     assert single_slate_and_ats(result) == 0.0
 87 | 
 88 | 
 89 | def test_mrr_with_padded_input():
 90 |     y_pred = [0.5, 0.2, 1.0]
 91 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
 92 | 
 93 |     result = mrr_wrap_single_slate(y_pred, y_true)
 94 | 
 95 |     assert single_slate_and_ats(result) == 1.0
 96 | 
 97 | 
 98 | def test_mrr_with_padded_input_2():
 99 |     y_pred = [0.5, 0.2, 1.0]
100 |     y_true = [0.0, 1.0, PADDED_Y_VALUE]
101 | 
102 |     result = mrr_wrap_single_slate(y_pred, y_true)
103 | 
104 |     assert single_slate_and_ats(result) == 0.5
105 | 


--------------------------------------------------------------------------------
/tests/losses/test_ndcg.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from pytest import approx
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | from allrank.models.metrics import ndcg
 8 | 
 9 | 
10 | def ndcg_wrap(y_pred, y_true, ats=None):
11 |     return ndcg(torch.tensor([y_pred]), torch.tensor([y_true]), ats=ats).numpy()
12 | 
13 | 
14 | def test_ndcg_simple_1():
15 |     y_pred = [0.5, 0.2]
16 |     y_true = [1.0, 0.0]
17 | 
18 |     result = ndcg_wrap(y_pred, y_true)
19 | 
20 |     assert (result == 1.0)
21 | 
22 | 
23 | def test_ndcg_simple_2():
24 |     y_pred = [0.5, 0.2]
25 |     y_true = [0.0, 1.0]
26 | 
27 |     result = ndcg_wrap(y_pred, y_true)
28 | 
29 |     assert (result == 1 / math.log2(3))
30 | 
31 | 
32 | def test_ndcg_zero_when_no_relevant():
33 |     y_pred = [0.5, 0.2]
34 |     y_true = [0.0, 0.0]
35 | 
36 |     result = ndcg_wrap(y_pred, y_true)
37 | 
38 |     assert (result == 0.0)
39 | 
40 | 
41 | def test_ndcg_for_multiple_ats():
42 |     y_pred = [0.5, 0.2, 0.1]
43 |     y_true = [1.0, 0.0, 1.0]
44 | 
45 |     result = ndcg_wrap(y_pred, y_true, ats=[1, 2])
46 | 
47 |     ndcg_one_relevant_on_top = 1.0 / (1.0 + 1 / math.log2(3))
48 |     expected = [1.0, ndcg_one_relevant_on_top]
49 | 
50 |     batch_0 = 0
51 |     assert result[batch_0] == approx(expected)
52 | 
53 | 
54 | def test_ndcg_with_padded_input():
55 |     y_pred = [0.5, 0.2, 1.0]
56 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
57 | 
58 |     result = ndcg_wrap(y_pred, y_true)
59 | 
60 |     assert result == 1.0
61 | 
62 | 
63 | def test_ndcg_with_padded_input_2():
64 |     y_pred = [0.5, 0.2, 1.0]
65 |     y_true = [0.0, 1.0, PADDED_Y_VALUE]
66 | 
67 |     result = ndcg_wrap(y_pred, y_true)
68 | 
69 |     assert result == 1 / math.log2(3)
70 | 


--------------------------------------------------------------------------------
/tests/losses/test_neuralndcg.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from functools import partial
 4 | from pytest import approx
 5 | 
 6 | from allrank.data.dataset_loading import PADDED_Y_VALUE
 7 | from tests.losses.utils import neuralNDCG_wrap, ndcg_wrap
 8 | 
 9 | 
10 | test_cases = [{"stochastic": False, "transposed": False},
11 |               {"stochastic": True, "transposed": False},
12 |               {"stochastic": False, "transposed": True},
13 |               {"stochastic": True, "transposed": True}]
14 | 
15 | 
16 | def test_neuralNDCG_simple():
17 |     for tc in test_cases:
18 |         neuralNDCG_simple(partial(neuralNDCG_wrap, **tc))
19 | 
20 | 
21 | def neuralNDCG_simple(fun):
22 |     y_pred = [0.5, 0.2]
23 |     y_true = [1.0, 0.0]
24 | 
25 |     result = fun(y_pred, y_true)
26 |     expected = ndcg_wrap(y_pred, y_true)
27 | 
28 |     assert math.isfinite(result)
29 |     assert (-1 * result == approx(expected))
30 | 
31 | 
32 | def test_neuralNDCG_longer():
33 |     for tc in test_cases:
34 |         neuralNDCG_longer(partial(neuralNDCG_wrap, **tc))
35 | 
36 | 
37 | def neuralNDCG_longer(fun):
38 |     y_pred = [0.5, 0.2, 0.1, 0.4, 1.0, -1.0, 0.63]
39 |     y_true = [1.0, 2.0, 2.0, 4.0, 1.0, 4.0, 3.0]
40 | 
41 |     result = fun(y_pred, y_true)
42 |     expected = ndcg_wrap(y_pred, y_true)
43 | 
44 |     assert math.isfinite(result)
45 |     assert (-1 * result == approx(expected))
46 | 
47 | 
48 | def test_neuralNDCG_stable_for_very_small_prediction():
49 |     for tc in test_cases:
50 |         neuralNDCG_stable_for_very_small_prediction(partial(neuralNDCG_wrap, **tc))
51 | 
52 | 
53 | def neuralNDCG_stable_for_very_small_prediction(fun):
54 |     y_pred = [0.5, -1e30]
55 |     y_true = [1.0, 0.0]
56 | 
57 |     result = fun(y_pred, y_true)
58 |     expected = ndcg_wrap(y_pred, y_true)
59 | 
60 |     assert math.isfinite(result)
61 |     assert (-1 * result == approx(expected))
62 | 
63 | 
64 | def test_neuralNDCG_ignores_padded_value():
65 |     for tc in test_cases:
66 |         neuralNDCG_ignores_padded_value(partial(neuralNDCG_wrap, **tc))
67 | 
68 | 
69 | def neuralNDCG_ignores_padded_value(fun):
70 |     y_pred = [0.5, 0.2, 0.1, 0.4, 1.0, -1.0, 0.63, 1., 0.5, 0.3]
71 |     y_true = [1.0, 2.0, 2.0, 4.0, 1.0, 4.0, 3.0, PADDED_Y_VALUE, PADDED_Y_VALUE, PADDED_Y_VALUE]
72 | 
73 |     result = fun(y_pred, y_true, temperature=0.001)
74 |     expected = ndcg_wrap(y_pred, y_true)
75 | 
76 |     assert math.isfinite(result)
77 |     assert (-1 * result == approx(expected))
78 | 
79 | 
80 | def test_neuralNDCG_at_3():
81 |     for tc in test_cases:
82 |         neuralNDCG_at_3(partial(neuralNDCG_wrap, **tc))
83 | 
84 | 
85 | def neuralNDCG_at_3(fun):
86 |     y_pred = [0.5, 0.2, 0.1, 0.4, 1.0, -1.0, 0.63]
87 |     y_true = [1.0, 2.0, 2.0, 4.0, 1.0, 4.0, 3.0]
88 |     ats = 3
89 | 
90 |     result = fun(y_pred, y_true, k=ats)
91 |     expected = ndcg_wrap(y_pred, y_true, ats=[ats])
92 | 
93 |     assert math.isfinite(result)
94 |     assert (-1 * result == approx(expected))
95 | 


--------------------------------------------------------------------------------
/tests/losses/test_ranknet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from pytest import approx
  5 | from torch.nn import BCEWithLogitsLoss
  6 | 
  7 | from allrank.data.dataset_loading import PADDED_Y_VALUE
  8 | from allrank.models.losses import rankNet, rankNet_weightByGTDiff, rankNet_weightByGTDiff_pow
  9 | 
 10 | 
 11 | def rankNet_wrap(y_pred, y_true):
 12 |     return rankNet(torch.tensor([y_pred]), torch.tensor([y_true])).item()
 13 | 
 14 | 
 15 | def rankNet_weighted_wrap(y_pred, y_true):
 16 |     return rankNet_weightByGTDiff(torch.tensor([y_pred]), torch.tensor([y_true])).item()
 17 | 
 18 | 
 19 | def rankNet_weighted_pow_wrap(y_pred, y_true):
 20 |     return rankNet_weightByGTDiff_pow(torch.tensor([y_pred]), torch.tensor([y_true])).item()
 21 | 
 22 | 
 23 | def bce_wrap(y_pred, y_true, weight=None):
 24 |     if weight:
 25 |         weight = torch.tensor(weight)
 26 |     return BCEWithLogitsLoss(weight=weight)(torch.tensor([y_pred]), torch.tensor([y_true])).item()
 27 | 
 28 | 
 29 | def test_ranknet_onepair():
 30 |     y_pred = [0.5, 0.2]
 31 |     y_true = [1.0, 0.0]
 32 | 
 33 |     result = rankNet_wrap(y_pred, y_true)
 34 |     expected = bce_wrap(y_pred[0] - y_pred[1], 1.0)
 35 | 
 36 |     assert not math.isnan(result) and not math.isinf(result)
 37 |     assert (result == approx(expected))
 38 | 
 39 | 
 40 | def test_ranknet_onepair_minus():
 41 |     y_pred = [0.2, 0.5]
 42 |     y_true = [1.0, 0.0]
 43 | 
 44 |     result = rankNet_wrap(y_pred, y_true)
 45 |     expected = bce_wrap(y_pred[0] - y_pred[1], 1.0)
 46 | 
 47 |     assert not math.isnan(result) and not math.isinf(result)
 48 |     assert (result == approx(expected))
 49 | 
 50 | 
 51 | def test_ranknet_two_pairs():
 52 |     y_pred = [0.5, 0.2, 0.1]
 53 |     y_true = [1.0, 0.0, 0.0]
 54 | 
 55 |     result = rankNet_wrap(y_pred, y_true)
 56 |     expected = bce_wrap([0.3, 0.4], [1.0, 1.0])
 57 | 
 58 |     assert not math.isnan(result) and not math.isinf(result)
 59 |     assert (result == approx(expected))
 60 | 
 61 | 
 62 | def test_ranknet_pair_reversed():
 63 |     y_pred = [0.2, 0.5]
 64 |     y_true = [0.0, 1.0]
 65 | 
 66 |     result = rankNet_wrap(y_pred, y_true)
 67 |     expected = bce_wrap(0.3, 1.0)
 68 | 
 69 |     assert not math.isnan(result) and not math.isinf(result)
 70 |     assert (result == approx(expected))
 71 | 
 72 | 
 73 | def test_ranknet_pair_multirelevancy():
 74 |     y_pred = [0.2, 0.5]
 75 |     y_true = [0.0, 2.0]
 76 | 
 77 |     result = rankNet_wrap(y_pred, y_true)
 78 |     expected = bce_wrap(0.3, 1.0)
 79 | 
 80 |     assert not math.isnan(result) and not math.isinf(result)
 81 |     assert (result == approx(expected))
 82 | 
 83 | 
 84 | def test_ranknet_onepair_masked():
 85 |     y_pred = [0.5, 0.2, 0.66]
 86 |     y_true = [1.0, 0.0, PADDED_Y_VALUE]
 87 | 
 88 |     result = rankNet_wrap(y_pred, y_true)
 89 |     expected = bce_wrap(y_pred[0] - y_pred[1], 1.0)
 90 | 
 91 |     assert not math.isnan(result) and not math.isinf(result)
 92 |     assert (result == approx(expected))
 93 | 
 94 | 
 95 | def test_ranknet_two_pairs_weighted():
 96 |     y_pred = [0.5, 0.2, 0.1]
 97 |     y_true = [2.0, 1.0, 0.0]
 98 | 
 99 |     result = rankNet_weighted_wrap(y_pred, y_true)
100 |     expected = bce_wrap([0.3, 0.4, 0.1], [1.0, 1.0, 1.0], weight=[1.0, 2.0, 1.0])
101 | 
102 |     assert not math.isnan(result) and not math.isinf(result)
103 |     assert (result == approx(expected))
104 | 
105 | 
106 | def test_ranknet_two_pairs_weighted_pow():
107 |     y_pred = [0.5, 0.2, 0.1]
108 |     y_true = [2.0, 1.0, 0.0]
109 | 
110 |     result = rankNet_weighted_pow_wrap(y_pred, y_true)
111 |     expected = bce_wrap([0.3, 0.4, 0.1], [1.0, 1.0, 1.0], weight=[3.0, 4.0, 1.0])
112 | 
113 |     assert not math.isnan(result) and not math.isinf(result)
114 |     assert (result == approx(expected))
115 | 


--------------------------------------------------------------------------------
/tests/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allrank.models.losses.neuralNDCG import neuralNDCG, neuralNDCG_transposed
 4 | from allrank.models.metrics import ndcg
 5 | 
 6 | 
 7 | def neuralNDCG_wrap(
 8 |         y_pred, y_true, temperature=1e-4, powered_relevancies=True, k=None, stochastic=False,
 9 |         n_samples=1024, beta=0.001, transposed=False):
10 |     if transposed:
11 |         fun = neuralNDCG_transposed  # type: ignore
12 |     else:
13 |         fun = neuralNDCG  # type: ignore
14 | 
15 |     return fun(torch.tensor([y_pred]), torch.tensor([y_true]), temperature=temperature,
16 |                powered_relevancies=powered_relevancies, k=k,
17 |                stochastic=stochastic, n_samples=n_samples, beta=beta).item()
18 | 
19 | 
20 | def ndcg_wrap(y_pred, y_true, ats=None):
21 |     return ndcg(torch.tensor([y_pred]), torch.tensor([y_true]), ats=ats).mean().item()
22 | 


--------------------------------------------------------------------------------
/tests/test_rank_slates.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data import DataLoader
 3 | from torch.utils.data import Dataset
 4 | 
 5 | from allrank.inference.inference_utils import __rank_slates
 6 | from allrank.models.model import make_model
 7 | 
 8 | 
 9 | class ListBackedDataset(Dataset):
10 |     def __init__(self, collection):
11 |         self.collection = collection
12 | 
13 |     def __len__(self):
14 |         return len(self.collection)
15 | 
16 |     def __getitem__(self, idx):
17 |         return self.collection[idx]
18 | 
19 | 
20 | def test_rerank_slates():
21 |     np.random.seed(42)
22 | 
23 |     n_slates = 2
24 |     n_docs_per_slate = 5
25 |     n_dimensions = 10
26 | 
27 |     X = [np.random.rand(n_docs_per_slate, n_dimensions).astype(np.float32) for _ in range(n_slates)]
28 |     y_true = [np.random.randint(0, 1, size=len(x)) for x in X]
29 |     indices = [np.zeros(len(x)) for x in X]
30 | 
31 |     fc_model = {"sizes": [10], "input_norm": False, "activation": None, "dropout": None}
32 |     post_model = {"d_output": 1}
33 |     model = make_model(fc_model, None, post_model, n_dimensions)
34 | 
35 |     dataloader = DataLoader(ListBackedDataset(list(zip(X, y_true, indices))), batch_size=2)
36 |     slates_X, slates_y = __rank_slates(dataloader, model)
37 | 
38 |     assert len(slates_X) == len(X)
39 |     assert len(slates_y) == len(y_true)
40 | 
41 |     for x in slates_X:
42 |         assert x.shape[0] == n_docs_per_slate
43 |         assert x.shape[1] == n_dimensions
44 |     for y in slates_y:
45 |         assert y.shape[0] == n_docs_per_slate
46 | 


--------------------------------------------------------------------------------