├── .gitignore ├── LICENSE ├── README.md ├── cwl-eval ├── cwl ├── .DS_Store ├── __init__.py ├── cwl_eval.py ├── ruler │ ├── __init__.py │ ├── cwl_ruler.py │ ├── measures │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── cwl_ap.py │ │ ├── cwl_bpm.py │ │ ├── cwl_dcg.py │ │ ├── cwl_ift.py │ │ ├── cwl_insq.py │ │ ├── cwl_inst.py │ │ ├── cwl_metrics.py │ │ ├── cwl_nerr.py │ │ ├── cwl_npv.py │ │ ├── cwl_precision.py │ │ ├── cwl_rbp.py │ │ ├── cwl_rr.py │ │ ├── cwl_set.py │ │ ├── cwl_tbg.py │ │ └── cwl_umeasure.py │ └── ranking.py ├── seeker │ ├── __init__.py │ ├── common_helpers.py │ ├── topic_document_file_handler.py │ ├── trec_qrel_handler.py │ └── trec_result_handler.py └── tests │ ├── __init__.py │ ├── big_gain_file │ ├── common_metric_test.py │ ├── cost_file │ ├── dcg_precision_metrics_file │ ├── gain_file │ ├── metrics_file │ ├── neg_gain_file │ ├── precision_metrics │ ├── qrel_file │ ├── ranking_test.py │ └── result_file ├── make-instructions.txt ├── make-requirements.txt ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 ireval/cwl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # C/W/L Evaluation Script 2 | An evaluation script based on the C/W/L framework 3 | that is TREC Compatible and provides a replacement 4 | for INST_EVAL, RBP_EVAL, TBG_EVAL, UMeasure, TREC_EVAL. 5 | 6 | 7 | ## Install 8 | 9 | Install either via `pip install cwl-eval` or ``git clone https://github.com/ireval/cwl.git``. 10 | `cwl-eval` requires Python 3 and Numpy. 11 | 12 | 13 | ## Usage 14 | 15 | Once you have installed the C/W/L Evaluation Framework using `pip install`, you should be able to use the `cwl-eval` as shown below. 16 | If you have used `git clone` to install the framework, then you will need to run `cwl_eval.py` directly. 17 | 18 | Usage: cwl-eval -c -m -b 19 | 20 | Usage: cwl-eval -c -m 21 | 22 | Usage: cwl-eval 23 | 24 | Usage: cwl-eval -h 25 | 26 | - : A TREC Formatted Qrel File with relevance scores used as gains (float) 27 | Four column tab/space sep file with fields: topic_id unused doc_id gain 28 | 29 | - : Costs associated with element type 30 | 31 | - : If not specified, costs default to one for all elements 32 | Two column tab/space sep file with fields: element_type element_cost 33 | 34 | - : A TREC Formatted Result File 35 | Six column tab/space sep file with fields: topic_id element_type doc_id rank score run_id 36 | 37 | - : The list of metrics that are to be reported 38 | If not specified, a set of default metrics will be reported 39 | Tab/space sep file with fields: metric_name params 40 | 41 | - : Specify this file if you would like the BibTeX associated with the measures specified to be 42 | output to a file called 43 | 44 | - -n: Add -n flag to output column names (e.g. Topic, Metric, EU, ETU, EC, ETC, ED) 45 | 46 | - -r: Add -r flag to also output residuals for each measurement. 47 | 48 | - --max_n : Specify the depth of the calculation of the metrics (default=1000). 49 | 50 | - --max_gain : Specify the maximum value of the gain (default=1.0). Note some metrics have restrictions on the maximum allowable value. This is also used when computing the residuals. 51 | 52 | - --min_gain : Specify the minimum value of the gain (default=0.0). Note some metrics have restrictions on the minimum allowable value. 53 | 54 | 55 | 56 | **Example without using a cost file** 57 | When no costs are specified the cost per item is assumed to be 1.0, and EC and I will be equal. 58 | 59 | cwl-eval qrel_file result_file 60 | 61 | 62 | **Example with using a cost file** 63 | 64 | cwl-eval qrel_file result_file -c cost_file 65 | 66 | 67 | 68 | **Output** 69 | A seven column tab/space separated file that contains: 70 | 71 | - Topic ID 72 | - Metric Name 73 | - Expected Utility Per Item (EU) 74 | - Expected Utility (ETU) 75 | - Expected Cost per Item (EC) 76 | - Expected Cost (ETC) 77 | - Expected Depth (ED) 78 | 79 | If the `-r` flag is included, then another five columns will be included: ResEU, ResETU, ResEC, ResETC, ResED. 80 | These report the residual values for each of the measures (i.e. the difference between the best case and worse case for un-judged items). 81 | 82 | 83 | 84 | CWL Citation 85 | ------------ 86 | Please consider citing the following paper when using our code for your evaluations: 87 | 88 | @inproceedings{azzopardi2019cwl, 89 | author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair}, 90 | title = {cwl\_eval: An Evaluation Tool for Information Retrieval}, 91 | booktitle = {Proc. of the 42nd International ACM SIGIR Conference}, 92 | series = {SIGIR '19}, 93 | year = {2019} 94 | } 95 | 96 | 97 | 98 | Metrics within CWL EVAL 99 | ----------------------- 100 | For each of the metrics provided in cwl_eval.py, the user model for each 101 | measure has been extracted and encoded within the C/W/L framework. 102 | 103 | All weightings have been converted to probabilities. 104 | 105 | As a result, all metrics report a series of values (not a single value): 106 | - Expected Utility per item examined (EU), 107 | - Expected Total Utility (ETU), 108 | - Expected Cost per item examined (EC), 109 | - Expected Total Cost (ETC) 110 | - Expected number of items to be examined i.e expected depth (ED) 111 | 112 | All the values are related, such that: 113 | 114 | ETU = EU * ED 115 | 116 | and 117 | 118 | ETC = EC * ED 119 | 120 | If the cost per item is 1.0, then the expected cost per item is 1.0, 121 | and the expected cost EC will be equal to I. 122 | 123 | Costs can be specified in whatever unit is desired. i.e seconds, characters, words, etc. 124 | 125 | 126 | **List of Metrics** 127 | 128 | - RR - (Expected) Reciprocal Rank 129 | - P@k - Precision At k 130 | - AP - Average Precision 131 | - RBP - Rank Biased Precision 132 | - INST T 133 | - INSQ T 134 | - NDCG@k - Normalized Discounted Cumulative Gain at k 135 | - BPM-Static - Bejewelled Player Model - Static 136 | - BPM-Dynamic - Bejewelled Player Model - Dynamic 137 | - UMeasure - U-Measure 138 | - TBG - Time Biased Gain 139 | - IFT-C1 - Information Foraging Theory (Goal) 140 | - IFT-C2 - Information Foraging Theory (Rate) 141 | - IFT-C1-C2 - Information Foraging Theory (Goal and Rate) 142 | - NERREq8 - Not/Nearly ERR(Eq8)@k using gain based stopping with truncation k 143 | - NERREq9 - Not/Nearly ERR(Eq9)@k using gain based stopping and discount with truncation k 144 | - NERREq10 - Not/Nearly ERR(Eq10)@phi using gain based stopping and RBP patience (phi) 145 | - NERREq11 - Not/Nearly ERR(Eq11)@T using gain based stopping and INST Goal (T) 146 | 147 | 148 | 149 | **Sample Output from cwl_eval.py where costs per item = 1.0** 150 | 151 | cwl-eval qrel_file result_file 152 | 153 | | Topic| Metric | EU | ETU | EC | ETC | ED | 154 | |------|---------------------------------------------------|-------|-------|-------|--------|--------| 155 | | T1 | P@20 | 0.150 | 3.000 | 1.000 | 20.000 | 20.000 | 156 | | T1 | P@10 | 0.300 | 3.000 | 1.000 | 10.000 | 10.000 | 157 | | T1 | P@5 | 0.360 | 1.800 | 1.000 | 5.000 | 5.000 | 158 | | T1 | P@1 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 159 | | T1 | RBP@0.5 | 0.566 | 1.132 | 1.000 | 2.000 | 2.000 | 160 | | T1 | RBP@0.9 | 0.214 | 2.136 | 1.000 | 10.000 | 10.000 | 161 | | T1 | SDCG-k@10 | 0.380 | 1.726 | 1.000 | 4.544 | 4.544 | 162 | | T1 | SDCG-k@5 | 0.461 | 1.358 | 1.000 | 2.948 | 2.948 | 163 | | T1 | RR | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 164 | | T1 | AP | 0.397 | 1.907 | 1.000 | 4.800 | 4.800 | 165 | | T1 | INST-T=2 | 0.401 | 1.303 | 1.000 | 3.242 | 3.247 | 166 | | T1 | INST-T=1 | 0.680 | 1.071 | 1.000 | 1.574 | 1.575 | 167 | | T1 | INSQ-T=2 | 0.316 | 1.428 | 1.000 | 4.509 | 4.525 | 168 | | T1 | INSQ-T=1 | 0.465 | 1.198 | 1.000 | 2.572 | 2.576 | 169 | | T1 | BPM-Static-T=1-K=1000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 170 | | T1 | BPM-Static-T=1000-K=10 | 0.300 | 3.000 | 1.000 | 10.000 | 10.000 | 171 | | T1 | BPM-Static-T=1.2-K=10 | 0.400 | 1.200 | 1.000 | 3.000 | 3.000 | 172 | | T1 | BPM-Dynamic-T=1-K=1000-hb=1.0-hc=1.0 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 173 | | T1 | BPM-Dynamic-T=1000-K=10-hb=1.0-hc=1.0 | 0.300 | 3.000 | 1.000 | 10.000 | 10.000 | 174 | | T1 | BPM-Dynamic-T=1.2-K=10-hb=1.0-hc=1.0 | 0.400 | 1.200 | 1.000 | 3.000 | 3.000 | 175 | | T1 | U-L@50 | 0.109 | 2.772 | 1.000 | 25.500 | 25.500 | 176 | | T1 | U-L@10 | 0.338 | 1.860 | 1.000 | 5.500 | 5.500 | 177 | | T1 | TBG-H@22 | 0.083 | 2.676 | 1.000 | 32.242 | 32.242 | 178 | | T1 | IFT-C1-T@2.0-b1@0.9-R1@1 | 0.456 | 1.323 | 1.000 | 2.903 | 2.903 | 179 | | T1 | IFT-C1-T@2.0-b1@0.9-R1@10 | 0.308 | 2.078 | 1.000 | 6.738 | 6.738 | 180 | | T1 | IFT-C1-T@2.0-b1@0.9-R1@100 | 0.289 | 2.224 | 1.000 | 7.698 | 7.698 | 181 | | T1 | IFT-C2-A@0.2-b2@0.9-R2@1 | 0.463 | 1.255 | 1.000 | 2.711 | 2.711 | 182 | | T1 | IFT-C2-A@0.2-b2@0.9-R2@10 | 0.293 | 2.040 | 1.000 | 6.965 | 6.965 | 183 | | T1 | IFT-C2-A@0.2-b2@0.9-R2@100 | 0.197 | 2.994 | 1.000 | 15.208 | 15.208 | 184 | | T1 | IFT-C1-C2-T@2.0-b1@0.9-R1@10-A@2.0-b2@0.9-R2@10 | 0.329 | 1.804 | 1.000 | 5.487 | 5.487 | 185 | | T1 | IFT-C1-C2-T@2.0-b1@0.9-R1@100-A@2.0-b2@0.9-R2@100 | 0.289 | 2.223 | 1.000 | 7.697 | 7.697 | 186 | 187 | 188 | **Sample Output from cwl-eval where costs are set based on cost_file** 189 | 190 | cwl-eval qrel_file result_file -c cost_file 191 | 192 | | Topic| Metric | EU | ETU | EC | ETC | ED | 193 | |------|---------------------------------------------------|-------|-------|-------|--------|--------| 194 | | T1 | P@20 | 0.150 | 3.000 | 1.650 | 33.000 | 20.000 | 195 | | T1 | P@10 | 0.300 | 3.000 | 2.300 | 23.000 | 10.000 | 196 | | T1 | P@5 | 0.360 | 1.800 | 2.400 | 12.000 | 5.000 | 197 | | T1 | P@1 | 1.000 | 1.000 | 2.000 | 2.000 | 1.000 | 198 | | T1 | RBP@0.5 | 0.566 | 1.132 | 1.951 | 3.902 | 2.000 | 199 | | T1 | RBP@0.9 | 0.214 | 2.136 | 1.776 | 17.765 | 10.000 | 200 | | T1 | SDCG-k@10 | 0.380 | 1.726 | 2.188 | 9.943 | 4.544 | 201 | | T1 | SDCG-k@5 | 0.461 | 1.358 | 2.224 | 6.557 | 2.948 | 202 | | T1 | RR | 1.000 | 1.000 | 2.000 | 2.000 | 1.000 | 203 | | T1 | AP | 0.397 | 1.907 | 1.958 | 9.400 | 4.800 | 204 | | T1 | INST-T=2 | 0.401 | 1.303 | 1.884 | 6.113 | 3.247 | 205 | | T1 | INST-T=1 | 0.680 | 1.071 | 1.955 | 3.077 | 1.575 | 206 | | T1 | INSQ-T=2 | 0.316 | 1.428 | 1.799 | 8.125 | 4.525 | 207 | | T1 | INSQ-T=1 | 0.465 | 1.198 | 1.887 | 4.855 | 2.576 | 208 | | T1 | BPM-Static-T=1-K=1000 | 1.000 | 1.000 | 2.000 | 2.000 | 1.000 | 209 | | T1 | BPM-Static-T=1000-K=10 | 0.360 | 1.800 | 2.400 | 12.000 | 5.000 | 210 | | T1 | BPM-Static-T=1.2-K=10 | 0.400 | 1.200 | 1.667 | 5.000 | 3.000 | 211 | | T1 | BPM-Dynamic-T=1-K=1000-hb=1.0-hc=1.0 | 1.000 | 1.000 | 2.000 | 2.000 | 1.000 | 212 | | T1 | BPM-Dynamic-T=1000-K=10-hb=1.0-hc=1.0 | 0.360 | 1.800 | 2.400 | 12.000 | 5.000 | 213 | | T1 | BPM-Dynamic-T=1.2-K=10-hb=1.0-hc=1.0 | 0.400 | 1.200 | 1.667 | 5.000 | 3.000 | 214 | | T1 | U-L@50 | 0.162 | 2.552 | 1.654 | 26.000 | 15.720 | 215 | | T1 | U-L@10 | 0.444 | 1.420 | 2.094 | 6.700 | 3.200 | 216 | | T1 | TBG-H@22 | 0.143 | 2.339 | 2.046 | 33.508 | 16.375 | 217 | | T1 | IFT-C1-T@2.0-b1@0.9-R1@1 | 0.456 | 1.323 | 1.971 | 5.723 | 2.903 | 218 | | T1 | IFT-C1-T@2.0-b1@0.9-R1@10 | 0.308 | 2.078 | 2.080 | 14.017 | 6.738 | 219 | | T1 | IFT-C1-T@2.0-b1@0.9-R1@100 | 0.289 | 2.224 | 2.068 | 15.922 | 7.698 | 220 | | T1 | IFT-C2-A@0.2-b2@0.9-R2@1 | 0.516 | 1.180 | 1.958 | 4.481 | 2.289 | 221 | | T1 | IFT-C2-A@0.2-b2@0.9-R2@10 | 0.404 | 1.368 | 2.011 | 6.802 | 3.382 | 222 | | T1 | IFT-C2-A@0.2-b2@0.9-R2@100 | 0.360 | 1.786 | 2.388 | 11.832 | 4.954 | 223 | | T1 | IFT-C1-C2-T@2.0-b1@0.9-R1@10-A@2.0-b2@0.9-R2@10 | 0.413 | 1.361 | 1.990 | 6.552 | 3.293 | 224 | | T1 | IFT-C1-C2-T@2.0-b1@0.9-R1@100-A@2.0-b2@0.9-R2@100 | 0.360 | 1.786 | 2.388 | 11.832 | 4.954 | 225 | 226 | 227 | **Using the metrics_file to specify the metrics** 228 | 229 | cwl-eval qrel_file result_file -m metrics_file 230 | 231 | if a metrics_file is not specified, CWL Eval will default to a set of metrics 232 | defined in `ruler/measures/cwl_ruler.py` 233 | 234 | If the metrics_file is specified, CWL Eval will instantiate and use the metrics listed. 235 | An example test_metrics_file is provided, which includes the following: 236 | 237 | PrecisionCWLMetric(k=1) 238 | PrecisionCWLMetric(k=5) 239 | PrecisionCWLMetric(k=10) 240 | PrecisionCWLMetric(k=20) 241 | RBPCWLMetric(theta=0.9) 242 | NDCGCWLMetric(k=10) 243 | RRCWLMetric() 244 | APCWLMetric() 245 | INSTCWLMetric(T=1.0) 246 | INSQCWLMetric(T=1.0) 247 | BPMCWLMetric(T=1.0,K=20) 248 | BPMCWLMetric(T=2.0,K=10) 249 | BPMDCWLMetric(T=1,20) 250 | BPMDCWLMetric(T=2.0,K=10) 251 | UMeasureCWLMetric(L=50) 252 | UMeasureCWLMetric(L=10) 253 | TBGCWLMetric(halflife=22) 254 | IFTGoalCWLMetric(T=2.0, b1=0.9, R1=10) 255 | IFTGoalCWLMetric(T=2.0, b1=0.9, R1=100) 256 | IFTRateCWLMetric(A=0.2, b2=0.9, R2=10) 257 | IFTRateCWLMetric(A=0.2, b2=0.9, R2=100) 258 | IFTGoalRateCWLMetric(T=2.0, b1=0.9, R1=10, A=0.2, b2=0.9, R2=10) 259 | IFTGoalRateCWLMetric(T=2.0, b1=0.9, R1=100, A=0.2, b2=0.9, R2=100) 260 | NERReq8CWLMetric(k=10) 261 | NERReq9CWLMetric(k=10) 262 | NERReq10CWLMetric(phi=0.8) 263 | NERReq11CWLMetric(T=2.0) 264 | 265 | To specify which metric you desire, inspect the metrics classes in `ruler/measures/` 266 | to see what metrics are available, and how the parameterize them. 267 | 268 | For example if you only wanted Precision Based Measures then you can list them as follows: 269 | 270 | PrecisionCWLMetric(1) 271 | PrecisionCWLMetric(2) 272 | PrecisionCWLMetric(3) 273 | PrecisionCWLMetric(4) 274 | PrecisionCWLMetric(5) 275 | PrecisionCWLMetric(6) 276 | PrecisionCWLMetric(7) 277 | PrecisionCWLMetric(8) 278 | PrecisionCWLMetric(9) 279 | PrecisionCWLMetric(10) 280 | PrecisionCWLMetric(11) 281 | PrecisionCWLMetric(12) 282 | PrecisionCWLMetric(13) 283 | PrecisionCWLMetric(14) 284 | PrecisionCWLMetric(15) 285 | PrecisionCWLMetric(16) 286 | PrecisionCWLMetric(17) 287 | PrecisionCWLMetric(18) 288 | PrecisionCWLMetric(19) 289 | PrecisionCWLMetric(20) 290 | 291 | While if you only wanted Rank Biased Precision Measures, then you can vary the patience parameter: 292 | 293 | RBPCWLMetric(0.1) 294 | RBPCWLMetric(0.2) 295 | RBPCWLMetric(0.3) 296 | RBPCWLMetric(0.4) 297 | RBPCWLMetric(0.5) 298 | RBPCWLMetric(0.6) 299 | RBPCWLMetric(0.7) 300 | RBPCWLMetric(0.8) 301 | RBPCWLMetric(0.9) 302 | RBPCWLMetric(0.95) 303 | RBPCWLMetric(0.99) 304 | 305 | 306 | -------------------------------------------------------------------------------- /cwl-eval: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | from distutils.sysconfig import get_python_lib 6 | 7 | if __name__ == '__main__': 8 | # Work out the directory for the cwl scripts (in the site-packages directory) 9 | site_packages_dir = get_python_lib() 10 | cwl_dir = os.path.join(site_packages_dir, 'cwl') 11 | #print(cwl_dir) 12 | 13 | # If in developer mode, we can work this out from the current directory. 14 | current_dir = os.path.dirname(os.path.realpath(__file__)) 15 | scripts_dir = os.path.join(current_dir, 'cwl') 16 | #print(scripts_dir) 17 | 18 | # Prepend the paths to the PYTHONPATH for this instance. 19 | sys.path.insert(0, cwl_dir) 20 | #sys.path.insert(0, os.path.join(cwl_dir,'cwl')) 21 | 22 | sys.path.insert(0, scripts_dir) 23 | #sys.path.insert(0, os.path.join(scripts_dir,'cwl')) 24 | #print(os.path.join(scripts_dir,'cwl')) 25 | # Now we should be able to import CWL without issue 26 | from cwl import cwl_eval 27 | 28 | # Parse the arguments, check that the files exist, and run! 29 | args = cwl_eval.parse_args() 30 | 31 | cwl_eval.check_file_exists(args.result_file) 32 | cwl_eval.check_file_exists(args.gain_file) 33 | cwl_eval.check_file_exists(args.cost_file) 34 | cwl_eval.check_file_exists(args.metrics_file) 35 | 36 | cwl_eval.main(args.result_file, args.gain_file, args.cost_file, args.metrics_file, args.bib_file, 37 | args.colnames, args.residuals, args.max_gain, args.min_gain, args.max_cost, args.min_cost, args.max_depth) 38 | -------------------------------------------------------------------------------- /cwl/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/.DS_Store -------------------------------------------------------------------------------- /cwl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/__init__.py -------------------------------------------------------------------------------- /cwl/cwl_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python. 2 | """ 3 | cwl_eval tool for information retrieval evaluation of TREC formated results 4 | """ 5 | 6 | __author__ = 'leifos' 7 | __credits__ = ['InProceedings{Azzopardi:2019:cwl,' 8 | 'author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair}, ' 9 | 'title = {cwl_eval: An evaluation tool for information retrieval},' 10 | 'booktitle = {Proceedings of the International ACM SIGIR Conference},' 11 | 'year = {2019}}'] 12 | __license__ = 'MIT' 13 | __version__ = '1.0.0' 14 | 15 | import os 16 | import argparse 17 | import logging 18 | from cwl.seeker.trec_qrel_handler import TrecQrelHandler 19 | from cwl.ruler.cwl_ruler import CWLRuler 20 | from cwl.ruler.ranking import RankingMaker, Ranking 21 | 22 | 23 | def read_in_cost_file(cost_file): 24 | """ 25 | Reads in the cost file and stores in a dictionary for looking up the costs. 26 | The element_type is to be denoted in the TREC Results File using the previously unused field (2nd Column). 27 | :param cost_file: expects a space/tab seperated file with element_type (string) and cost(float) 28 | :return: returns a dictionary of element_type/costs 29 | """ 30 | costs = dict() 31 | with open(cost_file, "r") as cf: 32 | while cf: 33 | line = cf.readline() 34 | if not line: 35 | break 36 | (element_type, cost) = line.split() 37 | element_type = element_type.strip() 38 | costs[element_type] = float(cost) 39 | return costs 40 | 41 | 42 | def check_file_exists(filename): 43 | if filename and not os.path.exists(filename): 44 | print("{0} Not Found".format(filename)) 45 | quit(1) 46 | 47 | 48 | def parse_args(): 49 | 50 | arg_parser = argparse.ArgumentParser(description="CWL Evaluation Metrics") 51 | arg_parser.add_argument("gain_file", help="A TREC Formatted Qrel File with " 52 | "relevance column assumed to be gain values." 53 | "Gain values should be between zero and one (unless otherwise specified)." 54 | "Four column tab/space sep file with fields: topic_id unused doc_id gain") 55 | arg_parser.add_argument("result_file", 56 | help="TREC formatted results file. Six column tab/space sep file with fields:" 57 | " topic_id element_type doc_id rank score run_id.") 58 | arg_parser.add_argument("-c", "--cost_file", 59 | help="Costs associated with each element type specified in result file.", 60 | required=False, default=None) 61 | arg_parser.add_argument("-m", "--metrics_file", help="The list of metrics that are to be reported. " 62 | "If not specified, a set of default metrics will be reported." 63 | " Tab/space sep file with fields: metric_name params", 64 | required=False, default=None) 65 | arg_parser.add_argument("-b", "--bib_file", help="If specified, then the BibTeX for the measures used" 66 | " will be saved to the filename given.", required=False, 67 | default=None) 68 | arg_parser.add_argument("-n", "--colnames", help="Includes headings in the output.", 69 | required=False, action="store_true") 70 | arg_parser.add_argument("-r", "--residuals", help="Include residual calculations.", 71 | required=False, action="store_true") 72 | arg_parser.add_argument("--max_gain", help="Maximum gain associated with an item used for computing residuals" 73 | " and checking gain input file. " 74 | "(default=1.0)", required=False, default=1.0, type=float) 75 | arg_parser.add_argument("--min_gain", help="Minimum gain associated with an item used" 76 | " for checking gain input file. " 77 | "(default=0.0)", required=False, default=0.0, type=float) 78 | arg_parser.add_argument("--max_cost", help="Maximum cost associated with an item. Used for computing residuals. " 79 | "(default=1.0)", required=False, default=1.0) 80 | arg_parser.add_argument("--min_cost", help="Minimum cost associated with an item. Used for computing residuals. " 81 | "(default=1.0)", required=False, default=1.0) 82 | arg_parser.add_argument("--max_depth", help="Maximum depth to compute metrics. " 83 | "(default=1000)", required=False, default=1000, type=int) 84 | 85 | p_args = arg_parser.parse_args() 86 | if p_args.colnames: 87 | p_args.colnames = True 88 | else: 89 | p_args.colnames = False 90 | 91 | if p_args.residuals: 92 | p_args.residuals = True 93 | else: 94 | p_args.residuals = False 95 | 96 | return p_args 97 | 98 | 99 | def main(results_file, gain_file, cost_file=None, metrics_file=None, bib_file=None, col_names=False, 100 | residuals=False, max_gain=1.0, min_gain=0.0, max_cost=1.0, min_cost=1.0, max_n=1000): 101 | 102 | logger = logging.getLogger('cwl') 103 | logger.setLevel(logging.DEBUG) 104 | logger.addHandler(logging.FileHandler('cwl.log')) 105 | logger.info("Processing: {} using gain: {} and costs: {}".format(results_file, gain_file, cost_file)) 106 | logger.info("max_gain={} min_gain={} max_cost={} min_cost={} max_n={}".format(max_gain, min_gain, max_cost, min_cost, max_n)) 107 | if residuals: 108 | logger.info("Residuals are being computed assuming max gain is: {}".format(max_gain)) 109 | qrh = TrecQrelHandler(gain_file) 110 | qrh.validate_gains(min_gain=min_gain, max_gain=max_gain) 111 | costs = None 112 | # read in cost file - if cost file exists 113 | if cost_file: 114 | costs = read_in_cost_file(cost_file) 115 | cwl_ruler = CWLRuler(metrics_file, residuals) 116 | 117 | curr_topic_id = None 118 | ranking_maker = None 119 | 120 | if col_names: 121 | if residuals: 122 | print("Topic\tMetric\tEU\tETU\tEC\tETC\tED\tResEU\tResETU\tResEC\tResETC\tResED") 123 | else: 124 | print("Topic\tMetric\tEU\tETU\tEC\tETC\tED") 125 | 126 | with open(results_file, "r") as rf: 127 | while rf: 128 | line = rf.readline() 129 | if not line: 130 | break 131 | (topic_id, element_type, doc_id, rank, score, run_id) = line.split() 132 | doc_id = doc_id.strip() 133 | 134 | if topic_id == curr_topic_id: 135 | # build vectors 136 | ranking_maker.add(doc_id, element_type) 137 | else: 138 | if curr_topic_id is not None: 139 | # Perform the measurements 140 | ranking = ranking_maker.get_ranking() 141 | # print(ranking._gains[0:10]) 142 | cwl_ruler.measure(ranking) 143 | cwl_ruler.report() 144 | 145 | # new topic 146 | curr_topic_id = topic_id 147 | 148 | # reset seen list 149 | ranking_maker = RankingMaker(curr_topic_id, qrh, costs, 150 | max_gain=max_gain, max_cost=max_cost, min_cost=min_cost, max_n=max_n) 151 | ranking_maker.add(doc_id, element_type) 152 | 153 | # Perform the Measurements on the last topic 154 | ranking = ranking_maker.get_ranking() 155 | # print(ranking._gains[0:10]) 156 | cwl_ruler.measure(ranking) 157 | cwl_ruler.report() 158 | 159 | if bib_file: 160 | cwl_ruler.save_bibtex(bib_file) 161 | 162 | 163 | if __name__ == "__main__": 164 | args = parse_args() 165 | 166 | check_file_exists(args.result_file) 167 | check_file_exists(args.gain_file) 168 | check_file_exists(args.cost_file) 169 | check_file_exists(args.metrics_file) 170 | 171 | main(args.result_file, args.gain_file, args.cost_file, args.metrics_file, args.bib_file, 172 | args.colnames, args.residuals, args.max_gain, args.min_gain, args.max_cost, args.min_cost, args.max_depth) 173 | -------------------------------------------------------------------------------- /cwl/ruler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/ruler/__init__.py -------------------------------------------------------------------------------- /cwl/ruler/cwl_ruler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import inspect 3 | import importlib 4 | 5 | from cwl.ruler.measures.cwl_metrics import * 6 | from cwl.ruler.measures.cwl_precision import * 7 | from cwl.ruler.measures.cwl_rbp import * 8 | from cwl.ruler.measures.cwl_rr import * 9 | from cwl.ruler.measures.cwl_ap import * 10 | from cwl.ruler.measures.cwl_dcg import * 11 | from cwl.ruler.measures.cwl_inst import * 12 | from cwl.ruler.measures.cwl_insq import * 13 | from cwl.ruler.measures.cwl_tbg import * 14 | from cwl.ruler.measures.cwl_bpm import * 15 | from cwl.ruler.measures.cwl_umeasure import * 16 | from cwl.ruler.measures.cwl_ift import * 17 | from cwl.ruler.measures.cwl_nerr import * 18 | from cwl.ruler.ranking import Ranking 19 | 20 | class CWLRuler(object): 21 | 22 | def __init__(self, metrics_file=None, residuals=False): 23 | self.metrics = [] 24 | #add the metrics to the list 25 | if metrics_file: 26 | # load up the metrics specified 27 | self.populate_list(metrics_file) 28 | else: 29 | # use the default set of metrics 30 | # ideally we will tune these to create a set of baselines. 31 | # however, depending on the costs used... the tuning will be different 32 | # for instance, U-measure costs are in characters, while TBG costs are in seconds 33 | # if costs are not specified, then the cost of each item is 1.0 34 | self.metrics = [ 35 | PrecisionCWLMetric(1), 36 | PrecisionCWLMetric(2), 37 | PrecisionCWLMetric(3), 38 | PrecisionCWLMetric(4), 39 | PrecisionCWLMetric(5), 40 | PrecisionCWLMetric(10), 41 | RBPCWLMetric(0.2), 42 | RBPCWLMetric(0.4), 43 | RBPCWLMetric(0.8), 44 | NDCGCWLMetric(5), 45 | NDCGCWLMetric(10), 46 | RRCWLMetric(), 47 | APCWLMetric(), 48 | INSTCWLMetric(1.0), 49 | INSTCWLMetric(2.0), 50 | INSTCWLMetric(3.0), 51 | ] 52 | 53 | for m in self.metrics: 54 | m.residuals = residuals 55 | 56 | def measure(self, ranking): 57 | for metric in self.metrics: 58 | metric.measure(ranking) 59 | 60 | def report(self): 61 | for metric in self.metrics: 62 | metric.report() 63 | 64 | def csv(self): 65 | out = "" 66 | for metric in self.metrics: 67 | out += (metric.csv() + ";") 68 | return out 69 | 70 | def populate_list(self, input_filename): 71 | """ 72 | Reads from the input filename -- should be like 73 | ClassName(param1, param2, ...) 74 | Then once each class has been instantiated, adds to the self.metrics list 75 | Thanks @maxwelld90 76 | """ 77 | f = open(input_filename, 'r') 78 | 79 | for line in f: 80 | # Process the input line 81 | line_split = line.strip().split('(') 82 | line_split[-1] = line_split[-1][:-1] # Removes the extra bracket at the end 83 | 84 | class_name = line_split[0] 85 | parameters = line_split[1].split(',') 86 | self.metrics.append(self.instantiate_class(class_name, *parameters)) 87 | 88 | f.close() 89 | 90 | def instantiate_class(self, requested_class_name, *args, **kwargs): 91 | """ 92 | Given a class name and one or more parameters, attempts to instantiate the requested class with the provided parameters. 93 | If successful, the instantiated class is returned. 94 | """ 95 | classes = self.get_class_list() 96 | ref = None 97 | casted_args = [] 98 | 99 | # Change the args to ints/floats. Assuming that that is all that is required. 100 | for i in range(0, len(args)): 101 | val = args[i] 102 | 103 | if val == '': 104 | continue 105 | 106 | if '.' in val: 107 | casted_args.append(float(val)) 108 | else: 109 | casted_args.append(int(val)) 110 | 111 | for class_tuple in classes: 112 | class_name = class_tuple[0] 113 | class_ref = class_tuple[1] 114 | 115 | if class_name == requested_class_name: 116 | ref = class_ref(*casted_args) # Instantiate the class with parameters! 117 | # If you want to use parameter names, try kwargs instead. 118 | 119 | # If ref is not set, the class was not located! 120 | if ref is None: 121 | raise NameError("The class {0} could not be found.".format(requested_class_name)) 122 | 123 | return ref 124 | 125 | def get_class_list(self): 126 | """ 127 | Looking inside the measures_package package, returns a list of all the classes that are available for instantiating. 128 | This means that any class inside any .py file in the measures directory is returned in the list from this method. 129 | """ 130 | modules = [] 131 | classes = [] 132 | path = os.path.dirname(os.path.abspath(__file__)) 133 | measures_path = os.path.join(path, 'measures') 134 | package_path = 'ruler.measures' 135 | 136 | # List through the modules in the specified package, ignoring __init__.py, and append them to a list. 137 | for f in os.listdir(measures_path): 138 | if f.endswith('.py') and not f.startswith('__init__'): 139 | modules.append('{0}.{1}'.format(package_path, os.path.splitext(f)[0])) 140 | 141 | module_references = [] 142 | 143 | # Attempt to import each module in turn so we can access its classes 144 | for module in modules: 145 | module_references.append(importlib.import_module(module)) 146 | 147 | # Now loop through each module, looking at the classes within it - 148 | # and then append each class to a list of valid classes. 149 | for module in module_references: 150 | for name, obj in inspect.getmembers(module): 151 | if inspect.isclass(obj): 152 | classes.append((obj.__name__, obj)) 153 | 154 | return classes 155 | 156 | def print_list(self): 157 | """ 158 | Proof that it works, iterates over each instantiate metric class and calls whoami(). 159 | """ 160 | print("Displaying each metric:") 161 | print("======") 162 | for metric in self.metrics: 163 | metric.whoami() 164 | print("======") 165 | print(self.metrics) 166 | print("END") 167 | print() 168 | 169 | def save_bibtex(self, bib_file): 170 | 171 | eval_tool_bibtex = """ 172 | @inproceedings{azzopardi2019cwl, 173 | author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair} 174 | title = {cwl\_eval: An Evaluation Tool for Information Retrieval}, 175 | booktitle = {Proc. of the 42nd International ACM SIGIR Conference}, 176 | series = {SIGIR '19}, 177 | year = {2019} 178 | } 179 | """ 180 | 181 | bib_list = [eval_tool_bibtex] 182 | 183 | for m in self.metrics: 184 | if m.bibtex not in bib_list: 185 | bib_list.append(m.bibtex) 186 | 187 | with open(bib_file, "w") as bf: 188 | for bib in bib_list: 189 | bf.write(bib) 190 | bf.write("\n") 191 | -------------------------------------------------------------------------------- /cwl/ruler/measures/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/ruler/measures/.DS_Store -------------------------------------------------------------------------------- /cwl/ruler/measures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/ruler/measures/__init__.py -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_ap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | """ 6 | APCWLMetric implements: 7 | Average Precision (corrected to use the R, the total number of relevant items as the demonimator) - Harman 8 | 9 | TrAPCWLMetric implements: 10 | Average Precision which uses the number of relevant items retrieved as the demoninator - Harman 11 | 12 | 13 | GrAPCWLMetric (TO BE IMPLEMENTED): 14 | @inproceedings{Robertson:2010:EAP:1835449.1835550, 15 | author = {Robertson, Stephen E. and Kanoulas, Evangelos and Yilmaz, Emine}, 16 | title = {Extending Average Precision to Graded Relevance Judgments}, 17 | booktitle = {Proceedings of the 33rd International ACM SIGIR Conference on Research and Development in Information Retrieval}, 18 | series = {SIGIR '10}, 19 | year = {2010}, 20 | location = {Geneva, Switzerland}, 21 | pages = {603--610}, 22 | url = {http://doi.acm.org/10.1145/1835449.1835550} 23 | } 24 | """ 25 | 26 | class APCWLMetric(CWLMetric): 27 | def __init__(self): 28 | super().__init__() 29 | self.metric_name = "AP" 30 | self.bibtex = """ 31 | @article{Harman:1992:ESIR, 32 | author = {Donna Harman}, 33 | title = {Evaluation Issues in Information Retrieval}, 34 | journal = {Information Processing and Management}, 35 | volume = {28}, 36 | number = {4}, 37 | pages = {439 - -440}, 38 | year = {1992}, 39 | } 40 | 41 | """ 42 | 43 | def name(self): 44 | return self.metric_name 45 | 46 | def c_vector(self, ranking, worse_case=True): 47 | gains = ranking.get_gain_vector(worse_case) 48 | rels = 0 49 | for g in gains: 50 | if g > 0.0: 51 | rels += 1 52 | 53 | n = len(gains) 54 | rii = [] 55 | cvec = [] 56 | for i in range(0, n): 57 | rii.append(gains[i]/(i+1)) 58 | 59 | for i in range(0, n-1): 60 | bot = np.sum(rii[i:n]) 61 | top = np.sum(rii[i+1:n]) 62 | 63 | if top > 0.0: 64 | cvec.append(top/bot) 65 | else: 66 | cvec.append(0.0) 67 | 68 | cvec.append(0.0) 69 | cvec = np.array(cvec) 70 | return cvec 71 | 72 | 73 | class TrAPCWLMetric(CWLMetric): 74 | """ 75 | According to Sanderson (http://www.marksanderson.org/publications/my_papers/FnTIR.pdf) 76 | Harman was the first to publish the non-interpolated AP measure. 77 | However, apparently Harman's paper had an error, the demonminator was the number of relevant items retrieved 78 | and not the total number of relevant items (known). This was later corrected. 79 | """ 80 | def __init__(self): 81 | super(CWLMetric, self).__init__() 82 | self.metric_name = "TrAP" 83 | self.bibtex = """ 84 | @article{Harman:1992:ESIR, 85 | author = {Donna Harman}, 86 | title = {Evaluation Issues in Information Retrieval}, 87 | journal = {Information Processing and Management}, 88 | volume = {28}, 89 | number = {4}, 90 | pages = {439 - -440}, 91 | year = {1992}, 92 | } 93 | """ 94 | 95 | def name(self): 96 | return self.metric_name 97 | 98 | def c_vector(self, ranking, worse_case=True): 99 | wvec = self.w_vector(ranking, worse_case) 100 | 101 | cvec = [] 102 | for i in range(0, len(wvec)-1): 103 | if wvec[i] > 0.0: 104 | cvec.append(wvec[i+1] / wvec[i]) 105 | else: 106 | cvec.append(0.0) 107 | 108 | cvec.append(0.0) 109 | cvec = np.array(cvec) 110 | 111 | return cvec 112 | 113 | def w_vector(self, ranking, worse_case=True): 114 | wvec = [] 115 | c_costs = np.cumsum(ranking.get_cost_vector(worse_case)) 116 | c_gains = np.cumsum(ranking.get_gain_vector(worse_case)) 117 | 118 | i = 0 119 | while (c_gains[i] == 0) and (i < len(c_gains)-1): 120 | c_gains[i] = 1.0 121 | i += 1 122 | 123 | total_rels = ranking.get_total_rels(worse_case) 124 | wvec = np.divide(c_gains, c_costs) 125 | if total_rels > 0: 126 | wvec = wvec / total_rels 127 | 128 | return np.array(wvec) 129 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_bpm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric 3 | 4 | """ 5 | Bejewelled Player Model (BPM) by Zhang et al 2017 6 | 7 | Gains are assumed to be scaled to be between: 0.0 - 1.0 8 | thus rel_max is assumed to be 1.0. 9 | 10 | In Zhang et al (2017), rel_max is an integer i.e. 0,1,2,3 (for a 4 levels of grades) 11 | and the rel level is raised to the power of 2. To encode this within the C/W/L BPM, 12 | the rel levels would need to be re-scaled to be between one and zero. 13 | 14 | Static: takes T (i.e. E_b) and K (i.e. E_c) in Zhang et al (2017) 15 | T is the total amount of gain desired - similar to T in INST and IFT 16 | 17 | K is the total amout of cost willing to be spent, similar to k in precision, 18 | however K can be any unit of cost (depending on the costs file), 19 | while k in P@k, is the number of documents. 20 | In Zhang et al (2017), K is k the number of documents, 21 | but here we provide the generalized verison, 22 | such that K can be set based on the costs specified for each doc (as per the cost file) 23 | 24 | 25 | Dynamic: Also takes: hb, hc and gain_med ( i.e. rel_med in Zhang et al (2017) 26 | hb 27 | 28 | gain_med is the median gain (i.e. value between 0 and 1.0) 29 | if gain observed at position i is higher than gain_med, 30 | than T is increased, while K is increased 31 | 32 | if gain observed at position i is lower than gain_med, 33 | than T is decreased, while K is decreased 34 | 35 | The change in gain is: T <- T + hb * (gain[i] - gain_med) 36 | The change in cost is: K <- K + hc * (gain[i] - gain_med) 37 | 38 | hb and hc are therefore scaling parameters. 39 | 40 | """ 41 | 42 | class BPMCWLMetric(CWLMetric): 43 | 44 | def __init__(self, T=1.0, K=10): 45 | CWLMetric.__init__(self) 46 | # super(CWLMetric, self).__init__() 47 | self.metric_name = "BPM-Static-T={0}-K={1}".format(T, K) 48 | self.T = T # E_b the total amount of benefit desired 49 | self.K = K # E_c the total amount of cost or documents willing to be examined 50 | self.bibtex = """ 51 | @inproceedings{Zhang:2017:EWS:3077136.3080841, 52 | author = {Zhang, Fan and Liu, Yiqun and Li, Xin and Zhang, Min and Xu, Yinghui and Ma, Shaoping}, 53 | title = {Evaluating Web Search with a Bejeweled Player Model}, 54 | booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, 55 | series = {SIGIR '17}, 56 | year = {2017}, 57 | location = {Shinjuku, Tokyo, Japan}, 58 | pages = {425--434}, 59 | url = {http://doi.acm.org/10.1145/3077136.3080841}, 60 | } 61 | """ 62 | 63 | def name(self): 64 | return "BPM-Static-T={0}-K={1}".format(self.T,self.K) 65 | 66 | 67 | def c_vector(self, ranking, worse_case=True): 68 | gains = ranking.get_gain_vector(worse_case) 69 | costs = ranking.get_cost_vector(worse_case) 70 | 71 | c_gain = np.cumsum(gains) 72 | c_cost = np.cumsum(costs) 73 | 74 | # GAIN Constraint 75 | rr_cvec = np.zeros(len(gains)) 76 | i = 0 77 | # continue until the gain accumulated exceeds T 78 | while i < len(gains) and (c_gain[i] < self.T): 79 | rr_cvec[i] = 1.0 80 | i = i + 1 81 | 82 | # COST Constraint 83 | p_cvec = np.zeros(len(costs)) 84 | i = 0 85 | # continue until the costs accumulated exceeds K 86 | while i < len(costs) and (c_cost[i] < self.K): 87 | p_cvec[i] = 1.0 88 | i = i + 1 89 | 90 | # combine the two continuation vectors 91 | bpm_cvec = np.zeros(len(costs)) 92 | i = 0 93 | while i < len(costs): 94 | if (rr_cvec[i] == 1.0) and (p_cvec[i] == 1.0): 95 | bpm_cvec[i] = 1.0 96 | i = i + 1 97 | 98 | return bpm_cvec 99 | 100 | 101 | 102 | 103 | class BPMDCWLMetric(CWLMetric): 104 | 105 | def __init__(self, T=1, K=10, hb=1.0, hc=1.0, gain_med=0.5): 106 | super().__init__() 107 | self.metric_name = "BPM-Dynamic-T={0}-K={1}-hb={2}-hc={3}".format(T,K,hb,hc) 108 | self.T = T # E_b the total amount of benefit desired 109 | self.K = K # E_c the total amount of cost or documents willing to be examined 110 | self.hb = hb # the scaling factor to adjust the T constraint by 111 | self.hc = hc # the scaling factor to adjust the K constraint by 112 | self.gain_med = gain_med # i.e. rel_med to adjust the T and K by 113 | self.bibtex = """ 114 | @inproceedings{Zhang:2017:EWS:3077136.3080841, 115 | author = {Zhang, Fan and Liu, Yiqun and Li, Xin and Zhang, Min and Xu, Yinghui and Ma, Shaoping}, 116 | title = {Evaluating Web Search with a Bejeweled Player Model}, 117 | booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval}, 118 | series = {SIGIR '17}, 119 | year = {2017}, 120 | location = {Shinjuku, Tokyo, Japan}, 121 | pages = {425--434}, 122 | url = {http://doi.acm.org/10.1145/3077136.3080841}, 123 | } 124 | """ 125 | 126 | def name(self): 127 | return "BPM-Dynamic-T={0}-K={1}-hb={2}-hc={3}".format(self.T,self.K, self.hb, self.hc) 128 | 129 | def c_vector(self, ranking, worse_case=True): 130 | gains = ranking.get_gain_vector(worse_case) 131 | costs = ranking.get_cost_vector(worse_case) 132 | c_gain = np.cumsum(gains) 133 | c_cost = np.cumsum(costs) 134 | 135 | # GAIN Constraint 136 | rr_cvec = np.zeros(len(gains)) 137 | i = 0 138 | T = self.T 139 | # continue until the gain accumulated exceeds T 140 | while i < len(gains) and (c_gain[i] < T): 141 | rr_cvec[i] = 1.0 142 | # Now Update T, depending on gain[i] 143 | T = T + self.hb * (gains[i] - self.gain_med) 144 | 145 | i = i + 1 146 | # COST Constraint 147 | p_cvec = np.zeros(len(costs)) 148 | i = 0 149 | K = self.K 150 | # continue until the costs accumulated exceeds K 151 | while i < len(costs) and (c_cost[i] < K): 152 | p_cvec[i] = 1.0 153 | # Now Update K, depending on gain[i] 154 | T = T + self.hc * (gains[i] - self.gain_med) 155 | i = i + 1 156 | 157 | # combine the two continuation vectors 158 | bpm_cvec = np.zeros(len(costs)) 159 | i = 0 160 | while i < len(costs): 161 | if (rr_cvec[i] == 1.0) and (p_cvec[i] == 1.0): 162 | bpm_cvec[i] = 1.0 163 | i = i + 1 164 | 165 | return bpm_cvec 166 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_dcg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | 6 | """ 7 | Discounted Cumulative Gain by Jarvelin and Kekalainen (2002) 8 | The discount is scaled so that forms a proper probability distribution 9 | 10 | k is the rank cut off i.e number of items to be examined 11 | base is the base of the log for the discounting, which is set to 2 by default as per the original paper. 12 | """ 13 | 14 | class NDCGCWLMetric(CWLMetric): 15 | def __init__(self, k): 16 | super().__init__() 17 | self.metric_name = "NDCG-k@{0}".format(k) 18 | self.k = k 19 | self.base = 2.0 20 | self.bibtex = """ 21 | @article{Jarvelin:2002:CGE:582415.582418, 22 | author = {J\"{a}rvelin, Kalervo and Kek\"{a}l\"{a}inen, Jaana}, 23 | title = {Cumulated Gain-based Evaluation of IR Techniques}, 24 | journal = {ACM Trans. Inf. Syst.}, 25 | volume = {20}, 26 | number = {4}, 27 | year = {2002}, 28 | pages = {422--446}, 29 | numpages = {25}, 30 | url = {http://doi.acm.org/10.1145/582415.582418}, 31 | } 32 | """ 33 | 34 | def name(self): 35 | return "NDCG-k@{0}".format(self.k) 36 | 37 | def c_vector(self, ranking, worse_case=True): 38 | 39 | cvec = [] 40 | for i in range(1, ranking.n+1): 41 | if i < self.k: 42 | cvec.append(math.log(i+1, self.base)/math.log(i+2, self.base)) 43 | else: 44 | cvec.append(0.0) 45 | 46 | cvec = np.array(cvec) 47 | 48 | return cvec 49 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_ift.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | """ 6 | Information Foraging Based Measure by Azzopardi et al (2018) 7 | 8 | T is the target gain (i.e. how much is desired) 9 | A is the average rate of gain that expected 10 | b1/b2 are intercept parameters 11 | R1/R2 are 'rationality' parameters, as R1/R2 are increased to infinity then the searcher becomes increasingly rational, 12 | and will stop if T is met, or A is not met. but as R1/R2 are decreased to zero, the the searcher will become ambivalent 13 | towards T or A respectively, and fall back to the default b1/b2 intercepts. i.e. T or A will not influence the decision 14 | to continue. 15 | 16 | As a result, if R1/R2 are set to zero, then the metric becomes akin to RBP. 17 | If R1 is set to inf, then once T gain is acquired, the searcher will stop - which is akin or RR (where T would equal 1) 18 | If R1/R2 are set inbetween, then it suggests that as the user approaches T, they become more likely to stop, as they 19 | are getting closer to their goal, and once they reach their goal, they are still likely to continue (but to a lesser 20 | and lesser degree). Similiarly, if the user is experiencing a rate of gain higher than A, then they are much more likely to continue, 21 | but as the rate of gain decreases and gets further from A, then user is less likely to continue. 22 | 23 | IFTGoalCWLMetric implements the Goal only variant 24 | IFTRateCWLMetric implements the Rate only variant 25 | IFTGoalRateCWLMetric implements the Goal and Rate variant - which was shown to be the most accurate in Azzopardi et al. 26 | """ 27 | 28 | 29 | class IFTGoalCWLMetric(CWLMetric): 30 | def __init__(self, T, b1, R1): 31 | super().__init__() 32 | self.metric_name = "IFT-C1-T={0}-b1={1}-R1={2}".format(T,b1,R1) 33 | self.b1 = b1 34 | self.T = T 35 | self.R1 = R1 36 | self.bibtex = "@inproceedings{Azzopardi:2018:MUS:3209978.3210027," \ 37 | "author = {Azzopardi, Leif and Thomas, Paul and Craswell, Nick}," \ 38 | "title = {Measuring the Utility of Search Engine Result Pages: An Information Foraging Based Measure}," \ 39 | "booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval}," \ 40 | "series = {SIGIR '18}," \ 41 | "year = {2018}," \ 42 | "location = {Ann Arbor, MI, USA}," \ 43 | "pages = {605--614}," \ 44 | "numpages = {10}," \ 45 | "} " 46 | 47 | def name(self): 48 | return "IFT-C1-T={0}-b1={1}-R1={2}".format(self.T, self.b1, self.R1) 49 | 50 | def c_vector(self, ranking, worse_case=True): 51 | gains = ranking.get_gain_vector(worse_case) 52 | c_gains = np.cumsum(gains) 53 | cvec = [] 54 | for i in range(0, len(gains)): 55 | c1 = self.c1_func(c_gains[i]) 56 | cvec.append(c1) 57 | cvec = np.array(cvec) 58 | return cvec 59 | 60 | def c1_func(self, yi): 61 | ex = (1.0 + self.b1 * math.pow(math.e, ((self.T-yi) * self.R1))) 62 | return 1.0 - math.pow(ex, -1.0) 63 | 64 | 65 | class IFTRateCWLMetric(CWLMetric): 66 | def __init__(self, A, b2, R2): 67 | super().__init__() 68 | self.metric_name = "IFT-C2-A={0}-b2={1}-R2={2}".format(A, b2, R2) 69 | self.b2 = b2 70 | self.A = A 71 | self.R2 = R2 72 | self.bibtex = "@inproceedings{Azzopardi:2018:MUS:3209978.3210027," \ 73 | "author = {Azzopardi, Leif and Thomas, Paul and Craswell, Nick}," \ 74 | "title = {Measuring the Utility of Search Engine Result Pages: An Information Foraging Based Measure}," \ 75 | "booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval}," \ 76 | "series = {SIGIR '18}," \ 77 | "year = {2018}," \ 78 | "location = {Ann Arbor, MI, USA}," \ 79 | "pages = {605--614}," \ 80 | "numpages = {10}," \ 81 | "} " 82 | 83 | def name(self): 84 | return "IFT-C2-A={0}-b2={1}-R2={2}".format(self.A, self.b2, self.R2) 85 | 86 | def c_vector(self, ranking, worse_case=True): 87 | gains = ranking.get_gain_vector(worse_case) 88 | costs = ranking.get_cost_vector(worse_case) 89 | 90 | c_gains = np.cumsum(gains) 91 | c_costs = np.cumsum(costs) 92 | cvec = [] 93 | for i in range(0, len(gains)): 94 | c2 = self.c2_func(c_gains[i], c_costs[i]) 95 | cvec.append(c2) 96 | 97 | cvec = np.array(cvec) 98 | 99 | return cvec 100 | 101 | def c2_func(self, yi, ki): 102 | ex = (1.0 + self.b2 * math.pow(math.e, ((self.A - (yi/ki)) * self.R2))) 103 | return math.pow(ex, -1.0) 104 | 105 | 106 | class IFTGoalRateCWLMetric(CWLMetric): 107 | def __init__(self, T, b1, R1, A, b2, R2): 108 | super().__init__() 109 | self.metric_name = "IFT-C1-C2-T={0}-b1={1}-R1={2}-A={3}-b2={4}-R2={5}".format(T, b1, R1, A, b2, R2) 110 | self.b1 = b1 111 | self.T = T 112 | self.R1 = R1 113 | self.b2 = b2 114 | self.A = A 115 | self.R2 = R2 116 | self.bibtex = """ 117 | @inproceedings{Azzopardi:2018:MUS:3209978.3210027, 118 | author = {Azzopardi, Leif and Thomas, Paul and Craswell, Nick}, 119 | title = {Measuring the Utility of Search Engine Result Pages: An Information Foraging Based Measure}, 120 | booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval}, 121 | series = {SIGIR '18}, 122 | year = {2018}, 123 | location = {Ann Arbor, MI, USA}, 124 | pages = {605--614}, 125 | numpages = {10}, 126 | } 127 | """ 128 | 129 | def name(self): 130 | return "IFT-C1-C2-T={0}-b1={1}-R1={2}-A={3}-b2={4}-R2={5}".format(self.T, self.b1, self.R1, self.A, self.b2, self.R2) 131 | 132 | def c_vector(self, ranking, worse_case=True): 133 | gains = ranking.get_gain_vector(worse_case) 134 | costs = ranking.get_cost_vector(worse_case) 135 | c_gains = np.cumsum(gains) 136 | c_costs = np.cumsum(costs) 137 | cvec = [] 138 | for i in range(0, len(gains)): 139 | 140 | c1 = self.c1_func(c_gains[i]) 141 | c2 = self.c2_func(c_gains[i], c_costs[i]) 142 | cvec.append(c1*c2) 143 | 144 | cvec = np.array(cvec) 145 | 146 | return cvec 147 | 148 | def c2_func(self, yi, ki): 149 | ex = (1.0 + self.b2 * math.pow(math.e, ((self.A - (yi/ki)) * self.R2))) 150 | return math.pow(ex, -1.0) 151 | 152 | def c1_func(self, yi): 153 | ex = (1.0 + self.b1 * math.pow(math.e, ((self.T-yi) * self.R1))) 154 | return 1.0 - math.pow(ex, -1.0) 155 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_insq.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | """ 6 | INSQ by Moffat et al (is a variant on INST) 7 | 8 | T denotes the desired amount of gain. 9 | """ 10 | 11 | 12 | class INSQCWLMetric(CWLMetric): 13 | 14 | def __init__(self, T=1.0): 15 | super().__init__() 16 | self.metric_name = "INSQ-T={0} ".format(T) 17 | self.T = T 18 | self.bibtex = """ 19 | @inproceedings{Moffat:2012:MMI:2407085.2407092, 20 | author = {Moffat, Alistair and Scholer, Falk and Thomas, Paul}, 21 | title = {Models and Metrics: IR Evaluation As a User Process}, 22 | booktitle = {Proceedings of the Seventeenth Australasian Document Computing Symposium}, 23 | series = {ADCS '12}, 24 | year = {2012}, 25 | location = {Dunedin, New Zealand}, 26 | pages = {47--54}, 27 | url = {http://doi.acm.org/10.1145/2407085.2407092}, 28 | } 29 | """ 30 | 31 | def name(self): 32 | return "INSQ-T={0}".format(self.T) 33 | 34 | def c_vector(self, ranking, worse_case=True): 35 | gains = ranking.get_gain_vector(worse_case) 36 | cg = np.cumsum(gains) 37 | cvec = [] 38 | for i in range(0, len(cg)): 39 | ci = (((i+1.0) + (2.0 * self.T)-1.0) / ((i+1.0) + (2.0 * self.T)))**2.0 40 | cvec.append(ci) 41 | 42 | cvec = np.array(cvec) 43 | return cvec 44 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_inst.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | """ 6 | INST is from Moffat et al., Australasian Document Computing Symposium 2015 7 | 8 | T: Is the desired amount of relevant items or gain, 9 | depending on whether gain is binary (0,1) or graded (0..1.0) 10 | """ 11 | 12 | class INSTCWLMetric(CWLMetric): 13 | 14 | # INST requires gains to be in range [0, 1] 15 | MINGAIN = 0.0 16 | MAXGAIN = 1.0 17 | 18 | def __init__(self, T=1.0): 19 | super().__init__() 20 | self.metric_name = "INST-T={0} ".format(T) 21 | self.T = T 22 | self.bibtex = """ 23 | @inproceedings{Moffat:2015:IAM:2838931.2838938, 24 | author = {Moffat, Alistair and Bailey, Peter and Scholer, Falk and Thomas, Paul}, 25 | title = {INST: An Adaptive Metric for Information Retrieval Evaluation}, 26 | booktitle = {Proceedings of the 20th Australasian Document Computing Symposium}, 27 | series = {ADCS '15}, 28 | year = {2015}, 29 | location = {Parramatta, NSW, Australia}, 30 | pages = {5:1--5:4}, 31 | articleno = {5}, 32 | numpages = {4}, 33 | url = {http://doi.acm.org/10.1145/2838931.2838938} 34 | } 35 | """ 36 | 37 | def name(self): 38 | return "INST-T={0}".format(self.T) 39 | 40 | def c_vector(self, ranking, worse_case=True): 41 | gains = ranking.get_gain_vector(worse_case) 42 | self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains) 43 | c_gains = np.cumsum(gains) 44 | cvec = [] 45 | for i in range(0, len(c_gains)): 46 | Ti = self.T - c_gains[i] 47 | ci = (((i+1.0)+self.T+Ti-1.0) / ((i+1.0)+self.T+Ti))**2.0 48 | cvec.append(ci) 49 | 50 | cvec = np.array(cvec) 51 | return cvec 52 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_metrics.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import logging 4 | 5 | logger = logging.getLogger('cwl') 6 | 7 | class CWLMetric(object): 8 | 9 | def __init__(self): 10 | self.expected_utility = 0.0 11 | self.expected_cost = 0.0 12 | self.expected_total_utility = 0.0 13 | self.expected_total_cost = 0.0 14 | self.expected_items = 0.0 15 | self.residual_expected_utility = None 16 | self.residual_expected_cost = None 17 | self.residual_expected_total_utility = None 18 | self.residual_expected_total_cost = None 19 | self.residual_expected_items = None 20 | self.residuals = False 21 | self.metric_name = "Undefined" 22 | self.ranking = None 23 | self.bibtex = "" 24 | 25 | def name(self): 26 | return self.metric_name 27 | 28 | def c_vector(self, ranking, worse_case=True): 29 | """ 30 | Create a vector of C probabilities (i.e. probability of continuing from position i to position i+1) 31 | Note: when defining a metric is best/easiest to re-implement this function. 32 | :param ranking: CWL Ranking object 33 | :param worse_case: Boolean, to denote whether to estimate based on assuming the 34 | worse case i.e. unjudged are considered to be zero gain, and max cost or 35 | best case i.e. worse_case=False, and unjudged are considered to be max gain, and min cost 36 | Note that the Ranking object handles what is returned in the gain and cost vectors. 37 | :return: returns the C vector probabilities 38 | """ 39 | cvec = np.ones(len(ranking.get_gain_vector(worse_case))) 40 | return cvec 41 | 42 | def l_vector(self, ranking, worse_case=True): 43 | """ 44 | Create a vector of L probabilities (i.e. the Likelihoods of stopping at position i given the C vector) 45 | :param ranking: CWL Ranking object 46 | :param worse_case: Boolean, to denote whether to estimate based on assuming the 47 | :return: returns the L vector probabilities 48 | """ 49 | cvec = self.c_vector(ranking, worse_case) 50 | logger.debug("{0} {1} {2} {3}".format(ranking.topic_id, self.name(), "cvec", cvec[0:11])) 51 | cshift = np.append(np.array([1.0]), cvec[0:-1]) 52 | lvec = np.cumprod(cshift) 53 | lvec = np.multiply(lvec, (np.subtract(np.ones(len(cvec)), cvec))) 54 | logger.debug("{0} {1} {2} {3}".format(ranking.topic_id, self.name(), "lvec", lvec[0:11])) 55 | return lvec 56 | 57 | def w_vector(self, ranking, worse_case=True): 58 | """ 59 | Create a vector of E probabilities (i.e. probability of examining item i) 60 | Note: when defining a metric is best/easiest to re-implement this function. 61 | :param ranking: CWL Ranking object 62 | :param worse_case: Boolean, to denote whether to estimate based on assuming the 63 | :return: returns the W vector probabilities 64 | """ 65 | cvec = self.c_vector(ranking, worse_case) 66 | cvec = cvec[0:-1] 67 | cvec_prod = np.cumprod(cvec) 68 | cvec_prod = np.pad(cvec_prod, (1, 0), 'constant', constant_values=1.0) 69 | w1 = np.divide(1.0, np.sum(cvec_prod)) 70 | w_tail = np.multiply(cvec_prod[1:len(cvec_prod)], w1) 71 | wvec = np.append(w1, w_tail) 72 | logger.debug("{0} {1} {2} {3}".format(ranking.topic_id, self.name(), "wvec", wvec[0:11])) 73 | return wvec 74 | 75 | def measure(self, ranking): 76 | """ 77 | Given the ranking, measure estimates the various measurements given the CWL framework 78 | if residuals are required, these are also computed. 79 | :param ranking: CWL Ranking object 80 | :return: the expected utility per item 81 | """ 82 | self.ranking = ranking 83 | # score based on worse case - lower bounds 84 | (eu, etu, ec, etc, ei) = self._do_score(ranking, True) 85 | 86 | self.expected_utility = eu 87 | self.expected_total_utility = etu 88 | self.expected_cost = ec 89 | self.expected_total_cost = etc 90 | self.expected_items = ei 91 | 92 | if self.residuals: 93 | # score based on best case - upper bounds 94 | (eu, etu, ec, etc, ei) = self._do_score(ranking, False) 95 | 96 | # compute the residual i.e. the difference between the upper and lower bounds 97 | self.residual_expected_utility = eu - self.expected_utility 98 | self.residual_expected_total_utility = etu - self.expected_total_utility 99 | self.residual_expected_cost = ec - self.expected_cost 100 | self.residual_expected_total_cost = etc - self.expected_total_cost 101 | self.residual_expected_items = ei - self.expected_items 102 | 103 | # return the rate of gain per document 104 | return self.expected_utility 105 | 106 | def _do_score(self, ranking, worse_case=True): 107 | """ 108 | An internal function that handles the scoring of a ranking given the CWL machinery. 109 | :param ranking: CWL Ranking object 110 | :return: the expected utility per item 111 | :return: returns the expected utility per item, etc.. 112 | """ 113 | wvec = self.w_vector(ranking, worse_case) 114 | lvec = self.l_vector(ranking, worse_case) 115 | gain_vec = ranking.get_gain_vector(worse_case) 116 | cost_vec = ranking.get_cost_vector(worse_case) 117 | cum_gains = np.cumsum(gain_vec) 118 | cum_costs = np.cumsum(cost_vec) 119 | expected_utility = np.sum(np.dot(wvec, gain_vec)) 120 | expected_total_utility = np.sum(np.dot(lvec, cum_gains)) 121 | expected_cost = np.sum(np.dot(wvec, cost_vec)) 122 | expected_total_cost = np.sum(np.dot(lvec, cum_costs)) 123 | expected_items = 1.0 / wvec[0] 124 | return expected_utility, expected_total_utility, expected_cost, expected_total_cost, expected_items 125 | 126 | def report(self): 127 | if self.residuals: 128 | print("{0}\t{1}\t{2:.4f}\t{3:.4f}\t{4:.4f}\t{5:.4f}\t{6:.4f}\t{7:.4f}\t{8:.4f}\t{9:.4f}\t{10:.4f}\t{11:.4f}".format( 129 | self.ranking.topic_id, self.name(), self.expected_utility, self.expected_total_utility, 130 | self.expected_cost, self.expected_total_cost, self.expected_items, 131 | self.residual_expected_utility, self.residual_expected_total_utility, 132 | self.residual_expected_cost, self.residual_expected_total_cost, self.residual_expected_items 133 | )) 134 | else: 135 | print("{0}\t{1}\t{2:.4f}\t{3:.4f}\t{4:.4f}\t{5:.4f}\t{6:.4f}".format( 136 | self.ranking.topic_id, self.name(), self.expected_utility, self.expected_total_utility, 137 | self.expected_cost, self.expected_total_cost, self.expected_items, 138 | )) 139 | 140 | def csv(self): 141 | return ("{0},{1:.3f},{2:.3f},{3:.3f},{4:.3f},{5:.3f}".format( 142 | self.name(), self.expected_utility, self.expected_total_utility, self.expected_cost, 143 | self.expected_total_cost, self.expected_items)) 144 | 145 | def get_scores(self): 146 | """ 147 | :return: list with values of each measurement for the previously measured ranking 148 | """ 149 | scores = [ 150 | self.expected_utility, 151 | self.expected_total_utility, 152 | self.expected_cost, 153 | self.expected_total_cost, 154 | self.expected_items] 155 | return scores 156 | 157 | def _pad_vector(self, vec1, n, val): 158 | """ 159 | Pads vector 1 up to size n, with the value val 160 | :param vec1: np array 161 | :param n: size of the desired array 162 | :param val: the value to be inserted if padding is required 163 | :return: the padded vector 164 | """ 165 | if len(vec1) < n: 166 | vec1 = np.pad(vec1, (0, n-len(vec1)), 'constant', constant_values=val) 167 | return vec1 168 | 169 | def validate_gain_range(self, min_allowed_gain, max_allowed_gain, gain_vec): 170 | """ 171 | Checks that the gain vector does not violate any metric assumptions 172 | These assumptions (about the min or max gain) should be provided by 173 | the calling metric class. 174 | """ 175 | if np.min(gain_vec) < min_allowed_gain: 176 | raise ValueError("Supplied gain values violate metric assumptions: Metric = {}.\n " 177 | "The minimum allowable gain for this metric is: {}.".format(self.name(), min_allowed_gain)) 178 | if np.max(gain_vec) > max_allowed_gain: 179 | raise ValueError("Supplied gain values ({}) violate metric assumptions: Metric = {}.\n " 180 | "The maximum allowable gain for this " 181 | "metric is: {}.".format(np.max(gain_vec), self.name(), max_allowed_gain)) 182 | 183 | 184 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_nerr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric 3 | 4 | ''' 5 | The suite of Not (but Nearly) ERR metrics (NERR) from the Azzopardi et al. in 6 | ICTIR 2021 ("ERR is not C/W/L..."). 7 | 8 | There are four specific instances of NERR metrics which correspond to equations 9 | presented in the aforementioned research paper: NERReq{8, 9, 10, 11}. 10 | 11 | Note that NERReq8 and NERReq9 are designed to be truncated at k, whereas 12 | NRReq10 runs to full depth according to the parameter phi (akin to RBP) and 13 | NRReq11 runs to full depth according to the parameter T (akin to INST). 14 | ''' 15 | 16 | 17 | # Option One (Equation 8) 18 | class NERReq8CWLMetric(CWLMetric): 19 | 20 | # NERReq8 requires gains to be in range [0, 1] 21 | MINGAIN = 0.0 22 | MAXGAIN = 1.0 23 | 24 | def __init__(self, k): 25 | super().__init__() 26 | self.metric_name = "NERR-EQ8@k={0}".format(k) 27 | self.k = k 28 | self.bibtex = """ 29 | @inproceedings{Azzopardi:2021:ECE:3471158.3472239, 30 | author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair}, 31 | title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics}, 32 | booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval}, 33 | series = {ICTIR '21}, 34 | location = {Virtual Event, Canada}, 35 | url = {https://doi.org/10.1145/3471158.3472239}, 36 | doi = {3471158.3472239}, 37 | } 38 | """ 39 | 40 | def c_vector(self, ranking, worse_case=True): 41 | gains = ranking.get_gain_vector(worse_case) 42 | self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains) 43 | cvec = np.zeros(len(gains)) 44 | i = 0 45 | while i < len(gains) and i < self.k - 1: 46 | cvec[i] = 1 - gains[i] 47 | i = i + 1 48 | return np.array(cvec) 49 | 50 | 51 | # Option Two (Equation 9) 52 | class NERReq9CWLMetric(CWLMetric): 53 | 54 | # NERReq9 requires gains to be in range [0, 1] 55 | MINGAIN = 0.0 56 | MAXGAIN = 1.0 57 | 58 | def __init__(self, k): 59 | super().__init__() 60 | self.metric_name = "NERR-EQ9@k={0}".format(k) 61 | self.k = k 62 | self.bibtex = """ 63 | @inproceedings{Azzopardi:2021:ECE:3471158.3472239, 64 | author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair}, 65 | title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics}, 66 | booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval}, 67 | series = {ICTIR '21}, 68 | location = {Virtual Event, Canada}, 69 | url = {https://doi.org/10.1145/3471158.3472239}, 70 | doi = {3471158.3472239}, 71 | } 72 | """ 73 | 74 | def c_vector(self, ranking, worse_case=True): 75 | gains = ranking.get_gain_vector(worse_case) 76 | self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains) 77 | cvec = np.zeros(len(gains)) 78 | i = 0 79 | while i < len(gains) and i < self.k - 1: 80 | rank = i + 1 81 | cvec[i] = (1.0*rank/(rank+1.0)) * (1.0-gains[i]) 82 | i = i + 1 83 | return np.array(cvec) 84 | 85 | 86 | # Option Three (Equation 10) 87 | class NERReq10CWLMetric(CWLMetric): 88 | 89 | # NERReq10 requires gains to be in range [0, 1] 90 | MINGAIN = 0.0 91 | MAXGAIN = 1.0 92 | 93 | def __init__(self, phi=0.9): 94 | super().__init__() 95 | self.metric_name = "NERR-EQ10@phi={0}".format(phi) 96 | self.phi = phi 97 | self.bibtex = """ 98 | @inproceedings{Azzopardi:2021:ECE:3471158.3472239, 99 | author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair}, 100 | title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics}, 101 | booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval}, 102 | series = {ICTIR '21}, 103 | location = {Virtual Event, Canada}, 104 | url = {https://doi.org/10.1145/3471158.3472239}, 105 | doi = {3471158.3472239}, 106 | } 107 | """ 108 | 109 | def c_vector(self, ranking, worse_case=True): 110 | gains = ranking.get_gain_vector(worse_case) 111 | self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains) 112 | cvec = np.zeros(len(gains)) 113 | i = 0 114 | while i < len(gains): 115 | cvec[i] = self.phi * (1 - gains[i]) 116 | i = i + 1 117 | return np.array(cvec) 118 | 119 | 120 | # Option Four (Equation 11) 121 | class NERReq11CWLMetric(CWLMetric): 122 | 123 | # NERReq11 requires gains to be in range [0, 1] 124 | MINGAIN = 0.0 125 | MAXGAIN = 1.0 126 | 127 | def __init__(self, T=1.0): 128 | super().__init__() 129 | self.metric_name = "NERR-EQ11@T={0}".format(T) 130 | self.T = T 131 | self.bibtex = """ 132 | @inproceedings{Azzopardi:2021:ECE:3471158.3472239, 133 | author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair}, 134 | title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics}, 135 | booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval}, 136 | series = {ICTIR '21}, 137 | location = {Virtual Event, Canada}, 138 | url = {https://doi.org/10.1145/3471158.3472239}, 139 | doi = {3471158.3472239}, 140 | } 141 | """ 142 | 143 | def c_vector(self, ranking, worse_case=True): 144 | gains = ranking.get_gain_vector(worse_case) 145 | self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains) 146 | cvec = np.zeros(len(gains)) 147 | i = 0 148 | while i < len(gains): 149 | rank = i + 1 150 | cvec[i] = (((rank + (2.0 * self.T)-1.0) / (rank + (2.0 * self.T)))**2.0) * (1.0-gains[i]) 151 | i = i + 1 152 | return np.array(cvec) 153 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_npv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric 3 | 4 | """ 5 | An economic metric derived directly from computing the Net Present Value of a given list. 6 | r is the rate by how much the user discounts the future interaction 7 | 8 | Note that NPV is equivalent to RBP where theta = 1/(1+rate). 9 | 10 | This means that patience (theta) can be expressed 11 | as a how much searchers discount the future value for an alternative perspective. 12 | 13 | """ 14 | 15 | 16 | class NPVCWLMetric(CWLMetric): 17 | 18 | def __init__(self, rate=0.1): 19 | super().__init__() 20 | self.metric_name = "NPV-r@{0}".format(rate) 21 | self.rate = rate 22 | self.bibtex = """ 23 | @inproceedings{azzopardi2019cwl, 24 | author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair} 25 | title = {cwl\_eval: An Evaluation Tool for Information Retrieval}, 26 | booktitle = {Proc. of the 42nd International ACM SIGIR Conference}, 27 | series = {SIGIR '19}, 28 | year = {2019} 29 | } 30 | """ 31 | 32 | def name(self): 33 | return "NPV-r@{0}".format(self.rate) 34 | 35 | def c_vector(self, ranking, worse_case=True): 36 | gains = ranking.get_gain_vector(worse_case) 37 | cvec = np.dot(np.ones(len(gains)), (1.0/(1.0+self.rate))) 38 | return cvec 39 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_precision.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric 3 | 4 | ''' 5 | (Graded) Precision at k, where k is assumed to be the number of items to be examined. 6 | 7 | if the gains are set to (0 or 1) then binary precision is calculated, 8 | if the gains are set to 0..1.0 then graded precision is calculated 9 | 10 | Note that CG@k / R@k and P@k are essentially related. 11 | where the EU/Doc is P@k, while the EU/Serp (ETU) is CG@k or R@k 12 | 13 | Van Rijsbergen (and Salton) both mention calculating precision at k - though in the context of computing the PR curve. 14 | P@k was used much later more widely in the 1990s through TREC. 15 | ''' 16 | 17 | 18 | class PrecisionCWLMetric(CWLMetric): 19 | 20 | def __init__(self, k=10): 21 | super().__init__() 22 | self.metric_name = "P@{0}".format(k) 23 | self.k = k 24 | self.bibtex = """ 25 | @misc{rijsbergen:1979:ir, 26 | title={Information Retrieval.}, 27 | author={Van Rijsbergen, Cornelis J}, 28 | year={1979}, 29 | publisher={USA: Butterworth-Heinemann} 30 | } 31 | """ 32 | 33 | def name(self): 34 | return "P@{0}".format(self.k) 35 | 36 | def c_vector(self, ranking, worse_case=True): 37 | cvec = np.ones(self.k-1) 38 | cvec = self._pad_vector(cvec, ranking.n, 0.0) 39 | return cvec 40 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_rbp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric 3 | 4 | """ 5 | Rank Biased Precision by Moffat and Zobel 6 | 7 | theta denotes the patience of a user - higher thetas means that they are more likely to continue down the ranked list 8 | 9 | A very simple user model where theta is the continuation probability. 10 | 11 | RBP is directly related to Net Present Value ( see cwl_npv.NPCWLmetric) 12 | and RBP is also related to Time Biased Gain (see cwl_tbg.TBGCWLMetric) 13 | 14 | """ 15 | 16 | 17 | class RBPCWLMetric(CWLMetric): 18 | 19 | def __init__(self, theta=0.9): 20 | #CWLMetric.__init__(self) 21 | super().__init__() 22 | self.metric_name = "RBP@{0}".format(theta) 23 | self.theta = theta 24 | self.bibtex = """ 25 | @article{Moffat:2008:RPM:1416950.1416952, 26 | author = {Moffat, Alistair and Zobel, Justin}, 27 | title = {Rank-biased Precision for Measurement of Retrieval Effectiveness}, 28 | journal = {ACM Trans. Inf. Syst.}, 29 | volume = {27}, 30 | number = {1}, 31 | year = {2008}, 32 | pages = {2:1--2:27}, 33 | articleno = {2}, 34 | numpages = {27}, 35 | url = {http://doi.acm.org/10.1145/1416950.1416952}, 36 | } 37 | """ 38 | 39 | def name(self): 40 | return "RBP@{0}".format(self.theta) 41 | 42 | def c_vector(self, ranking, worse_case=True): 43 | cvec = np.dot(np.ones(ranking.n), self.theta) 44 | return cvec 45 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_rr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric 3 | 4 | """ 5 | Reciprocal Rank (RR) - From TREC-5 in 1996 by Kantor and Voorhees 6 | """ 7 | 8 | 9 | class RRCWLMetric(CWLMetric): 10 | 11 | def __init__(self): 12 | super().__init__() 13 | self.metric_name = "RR" 14 | self.bibtex = """ 15 | @article{kantor2000trec, 16 | title={The TREC-5 Confusion Track}, 17 | author={Kantor, Paul and Voorhees, Ellen}, 18 | journal={Information Retrieval}, 19 | volume={2}, 20 | number={2-3}, 21 | pages={165--176}, 22 | year={2000} 23 | } 24 | """ 25 | 26 | def name(self): 27 | return "RR" 28 | 29 | def c_vector(self, ranking, worse_case=True): 30 | gains = ranking.get_gain_vector(worse_case) 31 | cvec = np.zeros(len(gains)) 32 | i = 0 33 | found_gain = False 34 | while i < len(gains) and not found_gain: 35 | if (gains[i] > 0): 36 | found_gain = True 37 | else: 38 | cvec[i] = 1.0 39 | i = i + 1 40 | 41 | return cvec 42 | 43 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_set.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | """ 6 | Search Economic Metric based on Azzopardi (2014)'s economic model of search. 7 | 8 | Given the total gain function g(i) = i^beta 9 | where i is the rank of the item, and beta controls the amount of discount. 10 | There is no explicit reference to a relevance vector in the paper as it makes 11 | an assumption about how much, on average, a user would get by going to the next rank. 12 | So essentially, there is an implicit assumption that each item provides one unit of gain. 13 | But here the implementation will use the same discounting scheme - but the observed relevance/gain vector. 14 | 15 | Note that for each k, the expected total utility (ETU from CWL) @k = g(k) when all items are relevant. 16 | 17 | 0 <= beta <= 1.0 - and is the amount of diminishing returns that the user experiences 18 | k = 1...n up to 1000 - is the cut-off which the user will stop. 19 | 20 | note that when beta = 1.0 and k = k, then user model is equivalent to the P@k user model. 21 | 22 | """ 23 | 24 | 25 | class SETCWLMetric(CWLMetric): 26 | 27 | def __init__(self, beta=0.5, k=10): 28 | super().__init__() 29 | self.k = k 30 | self.beta = beta 31 | self.metric_name = self.name() 32 | self.bibtex = """ 33 | @inproceedings{Azzopardi:2014:MIE:2600428.2609574, 34 | author = {Azzopardi, Leif}, 35 | title = {Modelling Interaction with Economic Models of Search}, 36 | booktitle = {Proceedings of the 37th International ACM SIGIR Conference 37 | on Research \&\#38; Development in Information Retrieval}, 38 | year = {2014}, 39 | location = {Gold Coast, Queensland, Australia}, 40 | pages = {3--12}, 41 | numpages = {10}, 42 | url = {http://doi.acm.org/10.1145/2600428.2609574}, 43 | } 44 | """ 45 | 46 | def name(self): 47 | return "SET-k@{0}-b@{1}".format(self.k, self.beta) 48 | 49 | def _weight(self, i): 50 | return math.pow(i + 1, self.beta) - math.pow(i, self.beta) 51 | 52 | def c_vector(self, ranking, worse_case=True): 53 | 54 | cvec = [] 55 | for i in range(1, ranking.n + 1): 56 | if i < self.k: 57 | cvec.append(self._weight(i+1)/self._weight(i)) 58 | else: 59 | cvec.append(0.0) 60 | 61 | cvec = np.array(cvec) 62 | 63 | return cvec 64 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_tbg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | 6 | """ 7 | Time Biased Gain by Smucker and Clarke 8 | 9 | H is the halflife which stipulates how quickly the gain decays over time 10 | 11 | TBG is equivalent to RBP if the cost of items is equal. 12 | 13 | Note in the formulation below the weight is normalized so that a probability vector is formed for W (i.e. it sums to one). 14 | I.e. the weights are re-scaled. 15 | 16 | Also note that the costs vectors should pre-compute apriori the cost of each element, 17 | and if no gain is assigned to duplicate/similar items, then qrel file used should be pre-processed to zero out duplicate 18 | items see subsequently. 19 | 20 | TODO(): Consider implementing duplicate sensitive qrel handler that would be duplicate aware. 21 | 22 | """ 23 | 24 | class TBGCWLMetric(CWLMetric): 25 | def __init__(self, halflife=224): 26 | super().__init__() 27 | self.metric_name = "TBG-H@{0} ".format(halflife) 28 | self.halflife = halflife 29 | self.bibtex = """ 30 | @inproceedings{Smucker:2012:TCE:2348283.2348300, 31 | author = {Smucker, Mark D. and Clarke, Charles L.A.}, 32 | title = {Time-based Calibration of Effectiveness Measures}, 33 | booktitle = {Proceedings of the 35th International ACM SIGIR Conference 34 | on Research and Development in Information Retrieval}, 35 | series = {SIGIR '12}, 36 | year = {2012}, 37 | location = {Portland, Oregon, USA}, 38 | pages = {95--104}, 39 | numpages = {10}, 40 | url = {http://doi.acm.org/10.1145/2348283.2348300}, 41 | } 42 | """ 43 | 44 | def name(self): 45 | return "TBG-H@{0} ".format(self.halflife) 46 | 47 | def c_vector(self, ranking, worse_case=True): 48 | wvec = self.w_vector(ranking, worse_case) 49 | cvec = [] 50 | for i in range(0, len(wvec)-1): 51 | if wvec[i] > 0.0: 52 | cvec.append( wvec[i+1]/ wvec[i]) 53 | else: 54 | cvec.append(0.0) 55 | 56 | cvec.append(0.0) 57 | cvec = np.array(cvec) 58 | 59 | return cvec 60 | 61 | def w_vector(self, ranking, worse_case=True): 62 | costs = ranking.get_cost_vector(worse_case) 63 | wvec = [] 64 | c_costs = np.cumsum(costs) 65 | start = 0.0 66 | 67 | norm = self.integral_decay(0.0) 68 | wvec.append(norm) 69 | 70 | for i in range(0, len(c_costs)-1): 71 | weight_i = self.integral_decay(c_costs[i]) 72 | norm = norm + weight_i 73 | wvec.append(weight_i) 74 | 75 | wvec = np.divide(np.array(wvec), norm) 76 | return wvec 77 | 78 | def integral_decay(self, x): 79 | h = self.halflife 80 | return (h * (2.0 ** (-x/h))) / math.log(2.0, math.e) 81 | -------------------------------------------------------------------------------- /cwl/ruler/measures/cwl_umeasure.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric 4 | 5 | 6 | """ 7 | U-Measure by Saka and Dou (2013) 8 | 9 | The metric assumes that as searchers read more and more text they are less likely to continue. 10 | 11 | L expresses the half-life associated with reading text. A higher L means the searcher is more likely to continue. 12 | 13 | The cost used should be expressed in characters - but as this is porportional to time - time could be used as well. 14 | Note that if costs are in terms of characters - then the EC and ETC will then be in units based on characters (obviously) 15 | 16 | """ 17 | 18 | class UMeasureCWLMetric(CWLMetric): 19 | def __init__(self, L=1000): 20 | super().__init__() 21 | self.metric_name = "U-L@{0} ".format(L) 22 | self.L = L 23 | self.bibtex = """ 24 | @inproceedings{Sakai:2013:SRR:2484028.2484031, 25 | author = {Sakai, Tetsuya and Dou, Zhicheng}, 26 | title = {Summaries, Ranked Retrieval and Sessions: A Unified Framework for Information Access Evaluation} 27 | booktitle = {Proceedings of the 36th International ACM SIGIR Conference on Research and Development in Information Retrieval}, 28 | series = {SIGIR '13}, 29 | year = {2013}, 30 | location = {Dublin, Ireland}, 31 | pages = {473--482}, 32 | numpages = {10}, 33 | url = {http://doi.acm.org/10.1145/2484028.2484031} 34 | } 35 | """ 36 | 37 | def name(self): 38 | return "U-L@{0} ".format(self.L) 39 | 40 | def c_vector(self, ranking, worse_case=True): 41 | wvec = self.w_vector(ranking, worse_case) 42 | cvec = [] 43 | for i in range(0, len(wvec)-1): 44 | if wvec[i] > 0.0: 45 | cvec.append(wvec[i+1] / wvec[i]) 46 | else: 47 | cvec.append(0.0) 48 | 49 | cvec.append(0.0) 50 | cvec = np.array(cvec) 51 | return cvec 52 | 53 | def w_vector(self, ranking, worse_case=True): 54 | wvec = [] 55 | # to get the positions, cumulative sum the costs.. 56 | # costs are assumed to length of each document 57 | costs = ranking.get_cost_vector(worse_case) 58 | c_costs = np.cumsum(costs) 59 | start = 0 60 | norm = 0.0 61 | for i in range(0, len(c_costs)-1): 62 | weight_i = self.pos_decay(start) 63 | start = c_costs[i] 64 | wvec.append(weight_i) 65 | norm = norm + weight_i 66 | wvec.append(0.0) 67 | 68 | # now normalize the wvec to sum to one. 69 | wvec = np.divide(np.array(wvec), norm) 70 | return wvec 71 | 72 | 73 | def pos_decay(self, pos): 74 | return max(0.0, (1.0 - (pos / self.L))) 75 | -------------------------------------------------------------------------------- /cwl/ruler/ranking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Ranking(object): 5 | 6 | def __init__(self, topic_id, gains, costs, max_gain=1.0, min_gain=0.0, max_cost=1.0, min_cost=1.0, max_n=1000): 7 | """ 8 | The ranking object encapsulates the data about the items in the ranked list. 9 | The gains and costs vectors should only be accessed through the two getter methods 10 | as these will construct the list of gains and costs upto MAX_N and handle any unjudged items 11 | :param topic_id: a string to denote the topic 12 | :param gains: a vector of floats to represent the gain associated with each item in the list 13 | :param costs: a vector of floats to represent the cost of each item in the list 14 | :param max_gain: float that is greater than zero 15 | :param min_gain: float that is greater than zero 16 | :param max_cost: float that is greater than zero (and greater to or equal to min_cost) 17 | :param min_cost: float that is greater than zero (no free lunches) 18 | """ 19 | self.topic_id = topic_id 20 | self._gains = gains 21 | self._costs = costs 22 | self.total_qrel_gain = 0.0 23 | self.total_qrel_rels = 0.0 24 | self.max_gain = max_gain 25 | self.min_gain = min_gain 26 | self.max_cost = max_cost 27 | self.min_cost = min_cost 28 | self.n = max_n 29 | # Calculates a lower bound on the total gain and total relevant items 30 | # For metrics like AP to be computed accurately, these values need to be 31 | # manually set after creating the ranking i.e. set w.r.t the QRELs file 32 | # As the QRELs file has all the KNOWN relevant items. 33 | for g in gains: 34 | if g > 0.0: 35 | self.total_qrel_gain += g 36 | self.total_qrel_rels += 1.0 37 | 38 | def get_gain_vector(self, worse_case=True): 39 | # pad out the vector to size n 40 | # convert all NaNs to min (worse case) or max (best case) 41 | if worse_case: 42 | gains = self._pad_trunc_vector(self._gains, self.n, self.min_gain) 43 | gains[np.isnan(gains)] = self.min_gain 44 | return gains 45 | else: 46 | gains = self._pad_trunc_vector(self._gains, self.n, self.max_gain) 47 | gains[np.isnan(gains)] = self.max_gain 48 | return gains 49 | 50 | def get_cost_vector(self, worse_case=True): 51 | # pad out the vector to size n 52 | # convert all NaNs to max (worse case) or min (best case) 53 | if worse_case: 54 | costs = self._pad_trunc_vector(self._costs, self.n, self.max_cost) 55 | costs[np.isnan(costs)] = self.max_cost 56 | return costs 57 | else: 58 | costs = self._pad_trunc_vector(self._costs, self.n, self.min_cost) 59 | costs[np.isnan(costs)] = self.min_cost 60 | return costs 61 | 62 | def get_total_gain(self, worse_case=True): 63 | if worse_case: 64 | return self.total_qrel_gain 65 | else: 66 | # return the max of self.total_qrel_gain 67 | return max(np.sum(self.get_gain_vector(worse_case)), self.total_qrel_gain) 68 | 69 | def get_total_cost(self, worse_case=True): 70 | return np.sum(self.get_cost_vector(worse_case)) 71 | 72 | def get_total_rels(self, worse_case=True): 73 | if worse_case: 74 | return self.total_qrel_rels 75 | else: 76 | # return the max of self.total_qrel_rels 77 | gains = np.array(self.get_gain_vector(worse_case)) 78 | # convert gain values to rel values 79 | gains[gains > 0.0] = 1.0 80 | return max(np.sum(gains), self.total_qrel_rels) 81 | 82 | def _pad_trunc_vector(self, vec1, n, val): 83 | """ 84 | Pads vector 1 up to size n, with the value val 85 | :param vec1: np array 86 | :param n: size of the desired array 87 | :param val: the value to be inserted if padding is required 88 | :return: the padded vector 89 | """ 90 | if len(vec1) < n: 91 | vec1 = np.pad(vec1, (0, n-len(vec1)), 'constant', constant_values=val) 92 | else: 93 | vec1 = vec1[0:n] 94 | return np.array(vec1) 95 | 96 | def report(self): 97 | if self.show_report: 98 | print("Topic: {0}".format(self.topic_id)) 99 | print(self.topic_id, self.gains[:10]) 100 | print(self.topic_id, self.costs[:10]) 101 | 102 | 103 | class RankingMaker(object): 104 | """ 105 | This helper class builds Rankings 106 | """ 107 | def __init__(self, topic_id, gain_handler, cost_dict=None, max_gain=1.0, min_gain=0.0, max_cost=1.0, min_cost=1.0, max_n=1000): 108 | """ 109 | Iteratively builds up the ranked list of items (via the add function) then returns the final ranking 110 | by calling get_ranking 111 | :param topic_id: (string) represents the topic id - should match the topic id in the results file 112 | :param gain_handler: seeker.trec_qrel_handler.TrecQrelHandler 113 | :param cost_dict: a dictionary containing the element_type (key) and cost (float, value). 114 | :param max_gain: if an item is unjudged, when worse_case=False, then set gain to max_gain 115 | :param max_cost: if an item is unjudged, when worse_case=True, then set cost to max_cost 116 | :param min_cost: if an item is unjudged, when worse_case=False, then set the cost to min_cost 117 | """ 118 | self.topic_id = topic_id 119 | self.gain_handler = gain_handler 120 | self.cost_lookup = cost_dict 121 | self.total_qrel_gain = 0.0 122 | self.total_qrel_rels = 0.0 123 | self._gains = [] 124 | self._costs = [] 125 | self.max_gain = max_gain 126 | self.min_gain = min_gain 127 | self.max_cost = max_cost 128 | self.min_cost = min_cost 129 | self.show_report = False 130 | self.max_n = max_n 131 | 132 | def add(self, doc_id, element_type): 133 | gain = self.gain_handler.get_value_if_exists(self.topic_id, doc_id) 134 | # if the item is not judged, then insert a NaN value for the gain 135 | # the Ranking object will resolve the NaN value as a min or max gain 136 | if gain is None: 137 | self._gains.append(np.nan) 138 | else: 139 | self._gains.append(gain) 140 | 141 | cost = self._get_cost(doc_id, element_type) 142 | self._costs.append(cost) 143 | 144 | def _get_cost(self, doc_id, element_type): 145 | """ 146 | For a given document and element type returns the cost given the cost dictionary (cost_lookup) 147 | if no cost lookup exists or if the element is not in the dictionary, a nan value is assigned. 148 | :param doc_id: string 149 | :param element_type: string 150 | :return: return a float or nan value 151 | """ 152 | if self.cost_lookup is None: 153 | return np.nan 154 | else: 155 | if element_type in self.cost_lookup: 156 | return self.cost_lookup[element_type] 157 | else: 158 | return np.nan 159 | 160 | def get_ranking(self): 161 | """ 162 | Creates and returns a Ranking given the gains and costs added to the ranked lists. 163 | :return: ruler.ranking.Ranking 164 | """ 165 | ranking = Ranking(self.topic_id, self._gains, self._costs, self.max_gain, self.min_gain, self.max_cost, self.min_cost, self.max_n) 166 | ranking.total_qrel_rels = self.gain_handler.get_total_rels(self.topic_id) 167 | ranking.total_qrel_gain = self.gain_handler.get_total_gains(self.topic_id) 168 | return ranking 169 | 170 | def report(self): 171 | if self.show_report: 172 | print("Topic: {0}".format(self.topic_id)) 173 | print(self.topic_id, self.gains[:10]) 174 | print(self.topic_id, self.costs[:10]) 175 | -------------------------------------------------------------------------------- /cwl/seeker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/seeker/__init__.py -------------------------------------------------------------------------------- /cwl/seeker/common_helpers.py: -------------------------------------------------------------------------------- 1 | # seekiir Framework - Common Files 2 | # Helper Functions and Classes 3 | 4 | def file_exists(filename): 5 | ''' 6 | Helper function which returns a boolean value indicating if the file specified by string parameter filename exists. 7 | Solution from http://stackoverflow.com/questions/82831/how-do-i-check-if-a-file-exists-using-python 8 | ''' 9 | try: 10 | with open(filename) as f: pass 11 | return True 12 | except IOError: 13 | return False 14 | 15 | 16 | 17 | class AutoVivification(dict): 18 | def __getitem__(self, item): 19 | try: 20 | return dict.__getitem__(self, item) 21 | except KeyError: 22 | value = self[item] = type(self)() 23 | return value 24 | -------------------------------------------------------------------------------- /cwl/seeker/topic_document_file_handler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cwl.seeker.common_helpers import file_exists 3 | from cwl.seeker.common_helpers import AutoVivification 4 | 5 | 6 | class TopicDocumentFileHandler(object): 7 | def __init__(self, filename=None): 8 | self.data = AutoVivification() 9 | if filename: 10 | self.read_file(filename) 11 | 12 | 13 | def _put_in_line(self, line): 14 | # handles the specific format of the line (assumes 3 columns: topic document value ) 15 | parts = line.partition(' ') 16 | topic = parts[0] 17 | parts = parts[2].partition(' ') 18 | doc = parts[0] 19 | value = parts[2].strip() 20 | self.put_value(topic, doc, value) 21 | 22 | def _get_out_line(self, topic, doc): 23 | # outputs the topic document and value in a specific way. 24 | return "%s %s %d\n" % (topic, doc, self.data[topic][doc]) 25 | 26 | def read_file(self, filename): 27 | if file_exists(filename): 28 | infile = open(filename, "r") 29 | while infile: 30 | line = infile.readline() 31 | if not line: 32 | infile.close() 33 | break 34 | else: 35 | self._put_in_line(line) 36 | 37 | def save_file(self, filename, append=False): 38 | if append: 39 | outfile = open(filename, "a") 40 | else: 41 | outfile = open(filename, "w") 42 | 43 | for t in self.get_topic_list(): 44 | for d in self.get_doc_list(t): 45 | out_line = self._get_out_line(t, d) 46 | outfile.write(out_line) 47 | 48 | outfile.close() 49 | 50 | def put_value(self, topic, doc, value): 51 | if topic and doc: 52 | self.data[topic][doc] = float(value) 53 | 54 | def get_value(self, topic, doc): 55 | if topic not in self.data: 56 | return 0.0 57 | 58 | if self.data[topic][doc]: 59 | return self.data[topic][doc] 60 | else: 61 | return 0.0 62 | 63 | def get_value_if_exists(self, topic, doc): 64 | if topic not in self.data.keys(): 65 | return None 66 | 67 | if doc in self.data[topic].keys(): 68 | return float(self.data[topic][doc]) 69 | else: 70 | return None 71 | 72 | def get_doc_list(self, topic): 73 | if self.data[topic]: 74 | return self.data[topic] 75 | else: 76 | return [] 77 | 78 | def get_topic_list(self): 79 | tl = [] 80 | if self.data: 81 | for topic in self.data.keys(): 82 | tl.append(topic) 83 | 84 | return tl 85 | 86 | def get_topic_doc_dict(self): 87 | return self.data 88 | 89 | def add_topic_doc(self, topic, doc, value): 90 | self.data[topic][doc] = value 91 | 92 | def inc_topic_doc(self, topic, doc, value=1.0): 93 | if self.data[topic][doc]: 94 | self.data[topic][doc] = self.data[topic][doc] + value 95 | else: 96 | self.data[topic][doc] = value 97 | 98 | def __str__(self): 99 | return 'TOPICS READ IN: ' + str(len(self.data)) 100 | -------------------------------------------------------------------------------- /cwl/seeker/trec_qrel_handler.py: -------------------------------------------------------------------------------- 1 | from cwl.seeker.common_helpers import file_exists 2 | from cwl.seeker.common_helpers import AutoVivification 3 | from cwl.seeker.topic_document_file_handler import TopicDocumentFileHandler 4 | 5 | 6 | class TrecQrelHandler(TopicDocumentFileHandler): 7 | 8 | def __init__(self, filename=None): 9 | super(TrecQrelHandler, self).__init__(filename) 10 | 11 | def _put_in_line(self, line): 12 | """ 13 | For TREC QREL the Format is: 14 | Topic Iteration Document Judgement 15 | Iteration is not used. 16 | """ 17 | parts = line.split() 18 | topic = parts[0] 19 | doc = parts[2].strip() 20 | judgement = parts[3].strip() 21 | self.put_value(topic, doc, judgement) 22 | 23 | def _get_out_line(self, topic, doc): 24 | # outputs the topic document and value as the TREC QREL Format with iteration default to zero 25 | return "%s 0 %s %d\n" % (topic, doc, self.data[topic][doc]) 26 | 27 | def validate_gains(self, min_gain=0.0, max_gain=1.0): 28 | """ 29 | Iterates all gains and checks to ensure they are below the value of 30 | max_gain. 31 | """ 32 | all_gains = self.get_topic_doc_dict() 33 | for topic_id in all_gains: 34 | for gain in all_gains[topic_id].values(): 35 | if gain > max_gain: 36 | raise ValueError("Detected a gain value ({}) greater than the maximum ({}).\n" 37 | "Please check your input gain file".format(gain,max_gain)) 38 | if gain < min_gain: 39 | raise ValueError("Detected a gain value ({}) less than minimum ({}).\n " 40 | "Please check your input gain file.".format(gain,min_gain)) 41 | 42 | def get_total_gains(self, topic): 43 | 44 | doc_list = self.get_doc_list(topic) 45 | gain = 0.0 46 | for doc in doc_list: 47 | gain += self.get_value(topic, doc) 48 | return gain 49 | 50 | def get_total_rels(self, topic): 51 | doc_list = self.get_doc_list(topic) 52 | rels = 0.0 53 | for doc in doc_list: 54 | if self.get_value(topic, doc) > 0.0: 55 | rels += 1.0 56 | return rels 57 | -------------------------------------------------------------------------------- /cwl/seeker/trec_result_handler.py: -------------------------------------------------------------------------------- 1 | from cwl.seeker.common_helpers import file_exists 2 | from cwl.seeker.common_helpers import AutoVivification 3 | from cwl.seeker.topic_document_file_handler import TopicDocumentFileHandler 4 | 5 | 6 | def process_trec_line(line): 7 | # handles the specific format of the line - assumes 6 columns TREC Result format 8 | # topic QO document rank score EXP 9 | parts = line.partition(' ') 10 | topic = parts[0] 11 | parts = parts[2].partition(' ') 12 | parts = parts[2].partition(' ') 13 | docid = parts[0] 14 | parts = parts[2].partition(' ') 15 | rank = parts[0] 16 | parts = parts[2].partition(' ') 17 | score = parts[0] 18 | 19 | return (topic, docid, rank, score) 20 | 21 | 22 | class TrecResultHandler(TopicDocumentFileHandler): 23 | 24 | def __init__(self, filename=None): 25 | super(TrecResultHandler, self).__init__(filename) 26 | 27 | def _put_in_line(self, line): 28 | topic, docid, rank, score = process_trec_line(line) 29 | self.put_value(topic, docid, score) 30 | 31 | def _get_out_line(self, topic, doc, rank, score): 32 | # outputs in TREC Result format 33 | return "{0} Q0 {1} {2} {3} EXP\n".format(topic, doc.strip(), rank, score) 34 | 35 | def get_score(self, topic, doc): 36 | if self.data[topic][doc]: 37 | return self.data[topic][doc][1] 38 | else: 39 | return 0.0 40 | 41 | def update_score(self, topic, doc, score): 42 | if self.data[topic][doc]: 43 | self.data[topic][doc][1] = score 44 | return True 45 | return False 46 | 47 | 48 | def get_value(self, topic, doc): 49 | if self.data[topic][doc]: 50 | return self.data[topic][doc][0] 51 | else: 52 | return 0 53 | 54 | def get_rank(self, topic, doc): 55 | return self.get_value(topic, doc) 56 | 57 | 58 | def get_ranking(self, topic): 59 | ''' 60 | Returns an ordered list of tuples (doc,rank, score) 61 | ''' 62 | udl = self.get_doc_list(topic) 63 | dl = [] 64 | for d in udl: 65 | dl.append((d, self.get_score(topic,d))) 66 | odl = sorted(dl, key=lambda doc: doc[1],reverse=True) 67 | 68 | return odl 69 | 70 | def save_file(self, filename, append=False): 71 | ''' Saves the docs ordered by rank for each topic 72 | ''' 73 | if append: 74 | outfile = open(filename, "a") 75 | else: 76 | outfile = open(filename, "w") 77 | 78 | for t in self.get_topic_list(): 79 | odl = self.get_ranking(t) 80 | rank = 1 81 | for d in odl: 82 | out_line = self._get_out_line(t,d[0], rank, d[1]) 83 | rank += 1 84 | outfile.write (out_line) 85 | 86 | outfile.close() 87 | 88 | def clear(self): 89 | self.data = AutoVivification() 90 | -------------------------------------------------------------------------------- /cwl/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/tests/__init__.py -------------------------------------------------------------------------------- /cwl/tests/big_gain_file: -------------------------------------------------------------------------------- 1 | T1 00 D1 2.0 2 | T1 00 D2 0.0 3 | T1 00 D3 2.0 4 | T1 00 D4 1.0 5 | T1 00 D7 2.0 6 | T1 00 D8 0.0 7 | T1 00 D9 1.0 8 | T1 00 D10 0.0 9 | T2 00 D1 0.2 10 | T2 00 D2 0.4 11 | T2 00 D5 1.0 12 | T2 00 D6 0.0 13 | T2 00 D7 0.2 14 | T2 00 D8 2.0 15 | T2 00 D9 0.4 16 | T2 00 D10 0.0 17 | T3 00 D4 0.0 18 | T3 00 D5 0.0 19 | T3 00 D6 0.8 20 | T3 00 D7 0.2 21 | T3 00 D8 0.4 22 | T3 00 D9 0.0 23 | T3 00 D10 2.0 -------------------------------------------------------------------------------- /cwl/tests/common_metric_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | #sys.path.insert(0, '../') 4 | 5 | from cwl.ruler.measures.cwl_precision import PrecisionCWLMetric 6 | from cwl.ruler.ranking import Ranking 7 | 8 | 9 | class TestPrecision(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.p1 = PrecisionCWLMetric(k=1) 13 | self.p5 = PrecisionCWLMetric(k=5) 14 | 15 | def test_patone_ranking1(self): 16 | """ 17 | Test that Precision at one is correct for each ranking. 18 | """ 19 | 20 | ranking1 = Ranking("T1", [1], [1]) 21 | self.p1.measure(ranking1) 22 | self.assertEqual(self.p1.expected_utility, 1.0) 23 | self.assertEqual(self.p1.expected_total_utility, 1.0) 24 | 25 | 26 | def test_patone_ranking2(self): 27 | """ 28 | Test that Precision at one is correct for each ranking. 29 | """ 30 | ranking2 = Ranking("T2", [1, 0], [1, 1]) 31 | self.p1.measure(ranking2) 32 | self.assertEqual(self.p1.expected_utility, 1.0) 33 | self.assertEqual(self.p1.expected_total_utility, 1.0) 34 | 35 | 36 | def test_patone_ranking3(self): 37 | """ 38 | Test that Precision at one is correct for each ranking. 39 | """ 40 | ranking3 = Ranking("T3", [0, 1], [1, 1]) 41 | self.p1.measure(ranking3) 42 | self.assertEqual(self.p1.expected_utility, 0.0) 43 | self.assertEqual(self.p1.expected_total_utility, 0.0) 44 | 45 | 46 | 47 | def test_padding(self): 48 | """ 49 | Test that Precision at one is correct for each ranking. 50 | """ 51 | ranking3 = Ranking("T3", [0, 1], [1, 1]) 52 | self.p5.measure(ranking3) 53 | self.assertEqual(self.p5.expected_utility, 0.2) 54 | self.assertEqual(self.p5.expected_total_utility, 1.0) 55 | 56 | 57 | 58 | 59 | 60 | 61 | if __name__ == '__main__': 62 | unittest.main() -------------------------------------------------------------------------------- /cwl/tests/cost_file: -------------------------------------------------------------------------------- 1 | E1 1.0 2 | E2 2.0 3 | E3 5.0 4 | E4 7.5 5 | -------------------------------------------------------------------------------- /cwl/tests/dcg_precision_metrics_file: -------------------------------------------------------------------------------- 1 | PrecisionCWLMetric(1) 2 | PrecisionCWLMetric(5) 3 | PrecisionCWLMetric(10) 4 | PrecisionCWLMetric(20) 5 | NDCGCWLMetric(1) 6 | NDCGCWLMetric(5) 7 | NDCGCWLMetric(10) 8 | NDCGCWLMetric(20) 9 | -------------------------------------------------------------------------------- /cwl/tests/gain_file: -------------------------------------------------------------------------------- 1 | T1 00 D1 1.0 2 | T1 00 D2 0.0 3 | T1 00 D3 1.0 4 | T1 00 D4 1.0 5 | T1 00 D7 1.0 6 | T1 00 D8 0.0 7 | T1 00 D9 1.0 8 | T1 00 D10 0.0 9 | T2 00 D1 0.2 10 | T2 00 D2 0.4 11 | T2 00 D5 1.0 12 | T2 00 D6 0.0 13 | T2 00 D7 0.2 14 | T2 00 D8 1.0 15 | T2 00 D9 0.4 16 | T2 00 D10 0.0 17 | T3 00 D4 0.0 18 | T3 00 D5 0.0 19 | T3 00 D6 0.8 20 | T3 00 D7 0.2 21 | T3 00 D8 0.4 22 | T3 00 D9 0.0 23 | T3 00 D10 1.0 -------------------------------------------------------------------------------- /cwl/tests/metrics_file: -------------------------------------------------------------------------------- 1 | PrecisionCWLMetric(1) 2 | PrecisionCWLMetric(5) 3 | PrecisionCWLMetric(10) 4 | PrecisionCWLMetric(20) 5 | RBPCWLMetric(0.9) 6 | NDCGCWLMetric(10) 7 | RRCWLMetric() 8 | APCWLMetric() 9 | INSTCWLMetric(1) 10 | INSQCWLMetric(1) 11 | BPMCWLMetric(1,1000) 12 | BPMCWLMetric(1000,10) 13 | BPMCWLMetric(1.2,10) 14 | BPMDCWLMetric(1,1000) 15 | BPMDCWLMetric(1000,10) 16 | BPMDCWLMetric(1.2,10) 17 | UMeasureCWLMetric(50) 18 | UMeasureCWLMetric(10) 19 | TBGCWLMetric(22) 20 | IFTGoalCWLMetric(2.0, 0.9, 1) 21 | IFTGoalCWLMetric(2.0, 0.9, 10) 22 | IFTGoalCWLMetric(2.0, 0.9, 100) 23 | IFTRateCWLMetric(0.2, 0.9, 1) 24 | IFTRateCWLMetric(0.2, 0.9, 10) 25 | IFTRateCWLMetric(0.2, 0.9, 100) 26 | IFTGoalRateCWLMetric(2.0,0.9,10, 0.2, 0.9, 10) 27 | IFTGoalRateCWLMetric(2.0,0.9,100, 0.2, 0.9, 100) -------------------------------------------------------------------------------- /cwl/tests/neg_gain_file: -------------------------------------------------------------------------------- 1 | T1 00 D1 1.0 2 | T1 00 D2 -1.0 3 | T1 00 D3 1.0 4 | T1 00 D4 1.0 5 | T1 00 D7 1.0 6 | T1 00 D8 -1.0 7 | T1 00 D9 1.0 8 | T1 00 D10 -1.0 9 | T2 00 D1 0.2 10 | T2 00 D2 0.4 11 | T2 00 D5 1.0 12 | T2 00 D6 0.0 13 | T2 00 D7 0.2 14 | T2 00 D8 1.0 15 | T2 00 D9 0.4 16 | T2 00 D10 0.0 17 | T3 00 D4 0.0 18 | T3 00 D5 -1.0 19 | T3 00 D6 0.8 20 | T3 00 D7 0.2 21 | T3 00 D8 0.4 22 | T3 00 D9 -1.0 23 | T3 00 D10 1.0 -------------------------------------------------------------------------------- /cwl/tests/precision_metrics: -------------------------------------------------------------------------------- 1 | PrecisionCWLMetric(1) 2 | PrecisionCWLMetric(2) 3 | PrecisionCWLMetric(3) 4 | PrecisionCWLMetric(4) 5 | PrecisionCWLMetric(5) 6 | PrecisionCWLMetric(10) 7 | -------------------------------------------------------------------------------- /cwl/tests/qrel_file: -------------------------------------------------------------------------------- 1 | T1 00 D1 1 2 | T1 00 D2 0 3 | T1 00 D3 1 4 | T1 00 D4 1 5 | T1 00 D7 1 6 | T1 00 D8 0 7 | T1 00 D9 1 8 | T1 00 D10 0 9 | T2 00 D1 1 10 | T2 00 D2 1 11 | T2 00 D5 1 12 | T2 00 D6 0 13 | T2 00 D7 1 14 | T2 00 D8 1 15 | T2 00 D9 1 16 | T2 00 D10 0 17 | T3 00 D4 0 18 | T3 00 D5 0 19 | T3 00 D6 1 20 | T3 00 D7 1 21 | T3 00 D8 1 22 | T3 00 D9 0 23 | T3 00 D10 1 24 | -------------------------------------------------------------------------------- /cwl/tests/ranking_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import numpy as np 4 | sys.path.insert(0,'./') 5 | 6 | from cwl.ruler.ranking import Ranking 7 | from cwl.ruler.ranking import RankingMaker 8 | from cwl.seeker.trec_qrel_handler import TrecQrelHandler 9 | 10 | class TestRanking(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.ranking1 = Ranking("T1", [1., 0., 0.5, 1., 0.0], [1., 1., 1., 1., 1.]) 14 | self.ranking2 = Ranking("T2", [1., np.nan, 0.5, 1., np.nan], [1., 1., 1., 1., 1.]) 15 | self.ranking3 = Ranking("T3", 16 | [1., np.nan, 0.5, 1., np.nan, 0.0, 0.0, 0.0, 0.0, 0.0], 17 | [1., 1., 1., 1., 1., 2., 2., 2., 2., 2.], 18 | max_gain=2.0, max_cost=5.0) 19 | 20 | self.ranking4 = Ranking("T4", 21 | [1., np.nan, 0.5, 1., np.nan, 0.0, 0.0, 0.0, 0.0, 0.0], 22 | [], 23 | max_gain=2.0, max_cost=3.0, min_cost=2.0) 24 | 25 | self.ranking5 = Ranking("T5", 26 | [1., 0., 1., 1., 1., 0.0, 0.0, 0.0, 0.0, 0.0], 27 | []) 28 | 29 | def test_ranking1_total_rels(self): 30 | """ 31 | Test whether the tail is filled with min gain (worse case), and max gain (best case) 32 | Assumes that MAX_N = 1000 33 | """ 34 | min_total = self.ranking1.get_total_rels() 35 | max_total = self.ranking1.get_total_rels(worse_case=False) 36 | 37 | self.assertEqual(min_total, 3.0) 38 | self.assertEqual(max_total, 998.0) 39 | 40 | 41 | def test_ranking1_total_gain(self): 42 | """ 43 | Test whether the tail is filled with min gain (worse case), and max gain (best case) 44 | """ 45 | min_total = self.ranking1.get_total_gain() 46 | max_total = self.ranking1.get_total_gain(worse_case=False) 47 | # print(self.ranking1.get_gain_vector(worse_case=False)) 48 | # print(max_total) 49 | self.assertEqual(min_total, 2.5) 50 | self.assertEqual(max_total, 997.5) 51 | 52 | def test_ranking2_total_rels(self): 53 | """ 54 | Test whether the tail is filled with min gain (worse case), and max gain (best case) 55 | and that the np.nans are converted to min and max gain. 56 | """ 57 | min_total = self.ranking2.get_total_rels() 58 | max_total = self.ranking2.get_total_rels(worse_case=False) 59 | # print(self.ranking1.get_gain_vector(worse_case=False)) 60 | # print(max_total) 61 | self.assertEqual(min_total, 3.0) 62 | self.assertEqual(max_total, 1000.0) 63 | 64 | def test_ranking2_total_gain(self): 65 | """ 66 | Test whether the tail is filled with min gain (worse case), and max gain (best case) 67 | and that the np.nans are converted to min and max gain. 68 | """ 69 | min_total = self.ranking2.get_total_gain() 70 | max_total = self.ranking2.get_total_gain(worse_case=False) 71 | # print(self.ranking1.get_gain_vector(worse_case=False)) 72 | # print(max_total) 73 | self.assertEqual(min_total, 2.5) 74 | self.assertEqual(max_total, 999.5) 75 | 76 | def test_ranking3_total_rels(self): 77 | """ 78 | Test whether the tail is filled with min gain (worse case), and max gain (best case) 79 | and that the np.nans are converted to min and max gain. 80 | """ 81 | min_total = self.ranking3.get_total_rels() 82 | max_total = self.ranking3.get_total_rels(worse_case=False) 83 | # print(self.ranking1.get_gain_vector(worse_case=False)) 84 | # print(max_total) 85 | self.assertEqual(min_total, 3.0) 86 | self.assertEqual(max_total, 995.0) 87 | 88 | def test_ranking3_total_gain(self): 89 | """ 90 | Test whether the tail is filled with min gain (worse case), and max gain (best case) 91 | and that the np.nans are converted to min and max gain. 92 | """ 93 | min_total = self.ranking3.get_total_gain() 94 | max_total = self.ranking3.get_total_gain(worse_case=False) 95 | # print(self.ranking1.get_gain_vector(worse_case=False)) 96 | # print(max_total) 97 | self.assertEqual(min_total, 2.5) 98 | self.assertEqual(max_total, 1986.5) 99 | 100 | 101 | def test_ranking3_total_cost(self): 102 | """ 103 | Test whether the tail is filled with max_cost (worse case) 104 | and min cost (best case) 105 | """ 106 | max_total = np.sum(self.ranking3.get_cost_vector()) 107 | min_total = np.sum(self.ranking3.get_cost_vector(worse_case=False)) 108 | self.assertEqual(max_total, 4965.0) 109 | self.assertEqual(min_total, 1005.0) 110 | 111 | def test_ranking4_total_cost_when_no_cost_vector_is_supplied(self): 112 | """ 113 | Test whether the tail is filled with max_cost (worse case) 114 | and min cost (best case) 115 | Note this is the reverse 116 | """ 117 | max_total = self.ranking4.get_total_cost() 118 | min_total = self.ranking4.get_total_cost(worse_case=False) 119 | self.assertEqual(max_total, 3000.0) 120 | self.assertEqual(min_total, 2000.0) 121 | 122 | def test_ranking5_sum_over_top_ranks(self): 123 | min_gains = self.ranking5.get_gain_vector() 124 | max_gains = self.ranking5.get_gain_vector(worse_case=False) 125 | # print(min_gains[0:5]) 126 | self.assertEqual(np.sum(min_gains[0:5]), 4) 127 | self.assertEqual(np.sum(max_gains[0:5]), 4) 128 | 129 | class TestRankingMaker(unittest.TestCase): 130 | 131 | def setUp(self): 132 | gh = TrecQrelHandler("qrel_file") 133 | gh.put_value("T1", "D1", 1.0) 134 | gh.put_value("T1", "D2", 0.0) 135 | gh.put_value("T1", "D3", 1.0) 136 | gh.put_value("T1", "D4", 0.0) 137 | gh.put_value("T1", "D5", 1.0) 138 | gh.put_value("T1", "D6", 0.0) 139 | gh.put_value("T1", "D7", 1.0) 140 | gh.put_value("T1", "D8", 0.0) 141 | gh.put_value("T1", "D9", 0.0) 142 | gh.put_value("T1", "D10", 1.0) 143 | 144 | self.rm = RankingMaker(topic_id="T1", gain_handler=gh, cost_dict=None) 145 | docs = ["D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10"] 146 | for d in docs: 147 | self.rm.add(d, "") 148 | 149 | def test_ranking(self): 150 | ranking = self.rm.get_ranking() 151 | #print(ranking.) 152 | min_gains = ranking.get_gain_vector() 153 | max_gains = ranking.get_gain_vector(worse_case=False) 154 | #print(min_gains[0:20]) 155 | # print(np.cumsum(gains)[0:20]) 156 | # print(gains[0:10]) 157 | self.assertEqual(np.sum(min_gains[0:20]), 5.0) 158 | self.assertEqual(np.sum(max_gains[0:20]), 15.0) 159 | 160 | 161 | if __name__ == '__main__': 162 | unittest.main() -------------------------------------------------------------------------------- /cwl/tests/result_file: -------------------------------------------------------------------------------- 1 | T1 E2 D1 1 4.3 R1 2 | T1 E2 D2 2 4.2 R1 3 | T1 E1 D3 3 4.1 R1 4 | T1 E2 D4 4 3.9 R1 5 | T1 E3 D5 5 3.8 R1 6 | T1 E1 D6 6 3.7 R1 7 | T1 E2 D7 7 3.6 R1 8 | T1 E1 D8 8 3.5 R1 9 | T1 E2 D9 9 3.4 R1 10 | T1 E3 D10 10 3.3 R1 11 | T2 E1 D1 1 4.3 R1 12 | T2 E1 D2 2 4.2 R1 13 | T2 E1 D3 3 4.1 R1 14 | T2 E2 D4 4 3.9 R1 15 | T2 E2 D5 5 3.8 R1 16 | T2 E1 D6 6 3.7 R1 17 | T2 E2 D7 7 3.6 R1 18 | T2 E1 D8 8 3.5 R1 19 | T2 E2 D9 9 3.4 R1 20 | T2 E3 D10 10 3.3 R1 21 | T3 E3 D1 1 4.3 R1 22 | T3 E2 D2 2 4.2 R1 23 | T3 E1 D3 3 4.1 R1 24 | T3 E2 D4 4 3.9 R1 25 | T3 E2 D5 5 3.8 R1 26 | T3 E1 D6 6 3.7 R1 27 | T3 E2 D7 7 3.6 R1 28 | T3 E1 D8 8 3.5 R1 29 | T3 E2 D9 9 3.4 R1 30 | T3 E3 D10 10 3.3 R1 31 | -------------------------------------------------------------------------------- /make-instructions.txt: -------------------------------------------------------------------------------- 1 | Update verision number and any requirements in: 2 | 3 | setup.py 4 | 5 | Create the source distribution: 6 | 7 | python setup.py sdist 8 | 9 | Make sure twine is installed (pip install twine) and then do the upload: 10 | 11 | twine upload dist/* 12 | 13 | You will need your username and password for PyPi. 14 | 15 | To see if the changes worked you can upgrade with: 16 | 17 | pip install cwl-eval --upgrade 18 | 19 | 20 | -------------------------------------------------------------------------------- /make-requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | bleach==3.3.0 3 | certifi==2019.3.9 4 | chardet==3.0.4 5 | distlib==0.3.1 6 | docutils==0.14 7 | filelock==3.0.12 8 | idna==2.8 9 | numpy==1.23.4 10 | packaging==20.9 11 | pkginfo==1.5.0.1 12 | Pygments==2.7.4 13 | pyparsing==2.4.7 14 | readme-renderer==24.0 15 | requests==2.22.0 16 | requests-toolbelt==0.9.1 17 | six==1.12.0 18 | tqdm==4.32.1 19 | twine==1.13.0 20 | urllib3==1.26.5 21 | virtualenv==20.4.2 22 | webencodings==0.5.1 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.4 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open('README.md', 'r') as f: 4 | long_description = f.read() 5 | 6 | setuptools.setup( 7 | name='cwl-eval', 8 | version='1.0.12', 9 | 10 | scripts=['cwl-eval'], 11 | 12 | author='Leif Azzopardi, Paul Thomas, Alistair Moffat', 13 | author_email='leifos@acm.org, pathom@microsoft.com, ammoffat@unimelb.edu.au', 14 | 15 | description='An information retrieval evaluation script based on the C/W/L framework ' 16 | 'that is TREC Compatible and provides a replacement for INST_EVAL, RBP_EVAL, ' 17 | 'TBG_EVAL, UMeasure and TREC_EVAL scripts. All measurements are reported in ' 18 | 'the same units making all metrics directly comparable.', 19 | 20 | long_description=long_description, 21 | long_description_content_type='text/markdown', 22 | 23 | url='https://github.com/ireval/cwl', 24 | 25 | packages=setuptools.find_packages(), 26 | 27 | python_requires='>=3', 28 | 29 | install_requires=[ 30 | 'numpy', 31 | ], 32 | 33 | classifiers=[ 34 | 'Intended Audience :: Science/Research', 35 | 'Programming Language :: Python :: 3 :: Only', 36 | 'Topic :: Scientific/Engineering :: Information Analysis', 37 | 'License :: OSI Approved :: MIT License', 38 | 'Development Status :: 3 - Alpha', 39 | 40 | ], 41 | 42 | ) 43 | --------------------------------------------------------------------------------