├── .gitignore
├── LICENSE
├── README.md
├── cwl-eval
├── cwl
    ├── .DS_Store
    ├── __init__.py
    ├── cwl_eval.py
    ├── ruler
    │   ├── __init__.py
    │   ├── cwl_ruler.py
    │   ├── measures
    │   │   ├── .DS_Store
    │   │   ├── __init__.py
    │   │   ├── cwl_ap.py
    │   │   ├── cwl_bpm.py
    │   │   ├── cwl_dcg.py
    │   │   ├── cwl_ift.py
    │   │   ├── cwl_insq.py
    │   │   ├── cwl_inst.py
    │   │   ├── cwl_metrics.py
    │   │   ├── cwl_nerr.py
    │   │   ├── cwl_npv.py
    │   │   ├── cwl_precision.py
    │   │   ├── cwl_rbp.py
    │   │   ├── cwl_rr.py
    │   │   ├── cwl_set.py
    │   │   ├── cwl_tbg.py
    │   │   └── cwl_umeasure.py
    │   └── ranking.py
    ├── seeker
    │   ├── __init__.py
    │   ├── common_helpers.py
    │   ├── topic_document_file_handler.py
    │   ├── trec_qrel_handler.py
    │   └── trec_result_handler.py
    └── tests
    │   ├── __init__.py
    │   ├── big_gain_file
    │   ├── common_metric_test.py
    │   ├── cost_file
    │   ├── dcg_precision_metrics_file
    │   ├── gain_file
    │   ├── metrics_file
    │   ├── neg_gain_file
    │   ├── precision_metrics
    │   ├── qrel_file
    │   ├── ranking_test.py
    │   └── result_file
├── make-instructions.txt
├── make-requirements.txt
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ireval/cwl
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # C/W/L Evaluation Script
  2 | An evaluation script based on the C/W/L framework
  3 | that is TREC Compatible and provides a replacement
  4 | for INST_EVAL, RBP_EVAL, TBG_EVAL, UMeasure, TREC_EVAL.
  5 | 
  6 | 
  7 | ## Install
  8 | 
  9 | Install either via `pip install cwl-eval` or ``git clone https://github.com/ireval/cwl.git``.
 10 | `cwl-eval` requires Python 3 and Numpy.
 11 | 
 12 | 
 13 | ## Usage
 14 | 
 15 | Once you have installed the C/W/L Evaluation Framework using `pip install`, you should be able to use the `cwl-eval` as shown below.
 16 | If you have used `git clone` to install the framework, then you will need to run `cwl_eval.py` directly.
 17 | 
 18 |     Usage: cwl-eval <gain_file> <result_file> -c <cost_file> -m <metrics_file> -b <bibtex_file>
 19 | 
 20 |     Usage: cwl-eval <gain_file> <result_file> -c <cost_file> -m <metrics_file>
 21 | 
 22 |     Usage: cwl-eval <gain_file> <result_file>
 23 | 
 24 |     Usage: cwl-eval -h
 25 | 
 26 | - <gain_file>   : A TREC Formatted Qrel File with relevance scores used as gains (float)
 27 |                 Four column tab/space sep file with fields: topic_id unused doc_id gain
 28 | 
 29 | - <cost_file>   : Costs associated with element type
 30 | 
 31 | - <cost_file>   : If not specified, costs default to one for all elements
 32 |                 Two column tab/space sep file with fields: element_type element_cost
 33 | 
 34 | - <result_file> : A TREC Formatted Result File
 35 |                 Six column tab/space sep file with fields: topic_id element_type doc_id rank score run_id
 36 | 
 37 | - <metrics_file>: The list of metrics that are to be reported
 38 |                 If not specified, a set of default metrics will be reported
 39 |                 Tab/space sep file with fields: metric_name params
 40 | 
 41 | - <bibtex_file>: Specify this file if you would like the BibTeX associated with the measures specified to be
 42 |                output to a file called <bibtex_file>
 43 | 
 44 | - -n: Add -n flag to output column names (e.g. Topic, Metric, EU, ETU, EC, ETC, ED)
 45 | 
 46 | - -r: Add -r flag to also output residuals for each measurement.
 47 | 
 48 | - --max_n <value>: Specify the depth of the calculation of the metrics (default=1000). 
 49 | 
 50 | - --max_gain <value>: Specify the maximum value of the gain (default=1.0). Note some metrics have restrictions on the maximum allowable value. This is also used when computing the residuals.
 51 | 
 52 | - --min_gain <value>: Specify the minimum value of the gain (default=0.0). Note some metrics have restrictions on the minimum allowable value.
 53 | 
 54 | 
 55 | 
 56 | **Example without using a cost file**
 57 | When no costs are specified the cost per item is assumed to be 1.0, and EC and I will be equal.
 58 | 
 59 |     cwl-eval qrel_file result_file
 60 | 
 61 | 
 62 | **Example with using a cost file**
 63 | 
 64 |     cwl-eval qrel_file result_file -c cost_file
 65 | 
 66 | 
 67 | 
 68 | **Output**
 69 | A seven column tab/space separated file that contains:
 70 | 
 71 | - Topic ID
 72 | - Metric Name
 73 | - Expected Utility Per Item (EU)
 74 | - Expected Utility (ETU)
 75 | - Expected Cost per Item (EC)
 76 | - Expected Cost (ETC)
 77 | - Expected Depth (ED)
 78 | 
 79 | If the `-r` flag is included, then another five columns will be included: ResEU, ResETU, ResEC, ResETC, ResED. 
 80 | These report the residual values for each of the measures (i.e. the difference between the best case and worse case for un-judged items).
 81 | 
 82 | 
 83 | 
 84 | CWL Citation
 85 | ------------
 86 | Please consider citing the following paper when using our code for your evaluations:
 87 | 
 88 |         @inproceedings{azzopardi2019cwl,
 89 |             author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair},
 90 |             title = {cwl\_eval: An Evaluation Tool for Information Retrieval},
 91 |             booktitle = {Proc. of the 42nd International ACM SIGIR Conference},
 92 |             series = {SIGIR '19},
 93 |             year = {2019}
 94 |         } 
 95 | 
 96 | 
 97 | 
 98 | Metrics within CWL EVAL
 99 | -----------------------
100 | For each of the metrics provided in cwl_eval.py, the user model for each
101 | measure has been extracted and encoded within the C/W/L framework.
102 | 
103 | All weightings have been converted to probabilities.
104 | 
105 | As a result, all metrics report a series of values (not a single value):
106 |  - Expected Utility per item examined (EU),
107 |  - Expected Total Utility (ETU),
108 |  - Expected Cost per item examined (EC),
109 |  - Expected Total Cost (ETC)
110 |  - Expected number of items to be examined i.e expected depth (ED)
111 | 
112 | All the values are related, such that:
113 | 
114 | ETU = EU * ED
115 | 
116 | and
117 | 
118 | ETC = EC * ED
119 | 
120 | If the cost per item is 1.0, then the expected cost per item is 1.0,
121 | and the expected cost EC will be equal to I.
122 | 
123 | Costs can be specified in whatever unit is desired. i.e seconds, characters, words, etc.
124 | 
125 | 
126 | **List of Metrics**
127 | 
128 | - RR - (Expected) Reciprocal Rank
129 | - P@k - Precision At k
130 | - AP - Average Precision
131 | - RBP - Rank Biased Precision
132 | - INST T
133 | - INSQ T
134 | - NDCG@k - Normalized Discounted Cumulative Gain at k
135 | - BPM-Static - Bejewelled Player Model  - Static
136 | - BPM-Dynamic - Bejewelled Player Model - Dynamic
137 | - UMeasure - U-Measure
138 | - TBG - Time Biased Gain
139 | - IFT-C1 - Information Foraging Theory (Goal)
140 | - IFT-C2 - Information Foraging Theory (Rate)
141 | - IFT-C1-C2 - Information Foraging Theory (Goal and Rate)
142 | - NERREq8 - Not/Nearly ERR(Eq8)@k using gain based stopping with truncation k
143 | - NERREq9 - Not/Nearly ERR(Eq9)@k using gain based stopping and discount with truncation k
144 | - NERREq10 - Not/Nearly ERR(Eq10)@phi using gain based stopping and RBP patience (phi)
145 | - NERREq11 - Not/Nearly ERR(Eq11)@T using gain based stopping and INST Goal (T)
146 | 
147 | 
148 | 
149 | **Sample Output from cwl_eval.py where costs per item = 1.0**
150 | 
151 |     cwl-eval qrel_file result_file
152 | 
153 | | Topic| Metric                                             | EU | ETU | EC | ETC | ED |
154 | |------|---------------------------------------------------|-------|-------|-------|--------|--------|
155 | | T1   | P@20                                              | 0.150 | 3.000 | 1.000 | 20.000 | 20.000 |
156 | | T1   | P@10                                              | 0.300 | 3.000 | 1.000 | 10.000 | 10.000 |
157 | | T1   | P@5                                               | 0.360 | 1.800 | 1.000 | 5.000  | 5.000  |
158 | | T1   | P@1                                               | 1.000 | 1.000 | 1.000 | 1.000  | 1.000  |
159 | | T1   | RBP@0.5                                           | 0.566 | 1.132 | 1.000 | 2.000  | 2.000  |
160 | | T1   | RBP@0.9                                           | 0.214 | 2.136 | 1.000 | 10.000 | 10.000 |
161 | | T1   | SDCG-k@10                                         | 0.380 | 1.726 | 1.000 | 4.544  | 4.544  |
162 | | T1   | SDCG-k@5                                          | 0.461 | 1.358 | 1.000 | 2.948  | 2.948  |
163 | | T1   | RR                                                | 1.000 | 1.000 | 1.000 | 1.000  | 1.000  |
164 | | T1   | AP                                                | 0.397 | 1.907 | 1.000 | 4.800  | 4.800  |
165 | | T1   | INST-T=2                                          | 0.401 | 1.303 | 1.000 | 3.242  | 3.247  |
166 | | T1   | INST-T=1                                          | 0.680 | 1.071 | 1.000 | 1.574  | 1.575  |
167 | | T1   | INSQ-T=2                                          | 0.316 | 1.428 | 1.000 | 4.509  | 4.525  |
168 | | T1   | INSQ-T=1                                          | 0.465 | 1.198 | 1.000 | 2.572  | 2.576  |
169 | | T1   | BPM-Static-T=1-K=1000                             | 1.000 | 1.000 | 1.000 | 1.000  | 1.000  |
170 | | T1   | BPM-Static-T=1000-K=10                            | 0.300 | 3.000 | 1.000 | 10.000 | 10.000 |
171 | | T1   | BPM-Static-T=1.2-K=10                             | 0.400 | 1.200 | 1.000 | 3.000  | 3.000  |
172 | | T1   | BPM-Dynamic-T=1-K=1000-hb=1.0-hc=1.0              | 1.000 | 1.000 | 1.000 | 1.000  | 1.000  |
173 | | T1   | BPM-Dynamic-T=1000-K=10-hb=1.0-hc=1.0             | 0.300 | 3.000 | 1.000 | 10.000 | 10.000 |
174 | | T1   | BPM-Dynamic-T=1.2-K=10-hb=1.0-hc=1.0              | 0.400 | 1.200 | 1.000 | 3.000  | 3.000  |
175 | | T1   | U-L@50                                            | 0.109 | 2.772 | 1.000 | 25.500 | 25.500 |
176 | | T1   | U-L@10                                            | 0.338 | 1.860 | 1.000 | 5.500  | 5.500  |
177 | | T1   | TBG-H@22                                          | 0.083 | 2.676 | 1.000 | 32.242 | 32.242 |
178 | | T1   | IFT-C1-T@2.0-b1@0.9-R1@1                          | 0.456 | 1.323 | 1.000 | 2.903  | 2.903  |
179 | | T1   | IFT-C1-T@2.0-b1@0.9-R1@10                         | 0.308 | 2.078 | 1.000 | 6.738  | 6.738  |
180 | | T1   | IFT-C1-T@2.0-b1@0.9-R1@100                        | 0.289 | 2.224 | 1.000 | 7.698  | 7.698  |
181 | | T1   | IFT-C2-A@0.2-b2@0.9-R2@1                          | 0.463 | 1.255 | 1.000 | 2.711  | 2.711  |
182 | | T1   | IFT-C2-A@0.2-b2@0.9-R2@10                         | 0.293 | 2.040 | 1.000 | 6.965  | 6.965  |
183 | | T1   | IFT-C2-A@0.2-b2@0.9-R2@100                        | 0.197 | 2.994 | 1.000 | 15.208 | 15.208 |
184 | | T1   | IFT-C1-C2-T@2.0-b1@0.9-R1@10-A@2.0-b2@0.9-R2@10   | 0.329 | 1.804 | 1.000 | 5.487  | 5.487  |
185 | | T1   | IFT-C1-C2-T@2.0-b1@0.9-R1@100-A@2.0-b2@0.9-R2@100 | 0.289 | 2.223 | 1.000 | 7.697  | 7.697  |
186 | 
187 | 
188 | **Sample Output from cwl-eval where costs are set based on cost_file**
189 | 
190 |     cwl-eval qrel_file result_file -c cost_file
191 | 
192 | | Topic| Metric                                             | EU   | ETU   | EC     | ETC   | ED     |
193 | |------|---------------------------------------------------|-------|-------|-------|--------|--------|
194 | | T1   | P@20                                              | 0.150 | 3.000 | 1.650 | 33.000 | 20.000 |
195 | | T1   | P@10                                              | 0.300 | 3.000 | 2.300 | 23.000 | 10.000 |
196 | | T1   | P@5                                               | 0.360 | 1.800 | 2.400 | 12.000 | 5.000  |
197 | | T1   | P@1                                               | 1.000 | 1.000 | 2.000 | 2.000  | 1.000  |
198 | | T1   | RBP@0.5                                           | 0.566 | 1.132 | 1.951 | 3.902  | 2.000  |
199 | | T1   | RBP@0.9                                           | 0.214 | 2.136 | 1.776 | 17.765 | 10.000 |
200 | | T1   | SDCG-k@10                                         | 0.380 | 1.726 | 2.188 | 9.943  | 4.544  |
201 | | T1   | SDCG-k@5                                          | 0.461 | 1.358 | 2.224 | 6.557  | 2.948  |
202 | | T1   | RR                                                | 1.000 | 1.000 | 2.000 | 2.000  | 1.000  |
203 | | T1   | AP                                                | 0.397 | 1.907 | 1.958 | 9.400  | 4.800  |
204 | | T1   | INST-T=2                                          | 0.401 | 1.303 | 1.884 | 6.113  | 3.247  |
205 | | T1   | INST-T=1                                          | 0.680 | 1.071 | 1.955 | 3.077  | 1.575  |
206 | | T1   | INSQ-T=2                                          | 0.316 | 1.428 | 1.799 | 8.125  | 4.525  |
207 | | T1   | INSQ-T=1                                          | 0.465 | 1.198 | 1.887 | 4.855  | 2.576  |
208 | | T1   | BPM-Static-T=1-K=1000                             | 1.000 | 1.000 | 2.000 | 2.000  | 1.000  |
209 | | T1   | BPM-Static-T=1000-K=10                            | 0.360 | 1.800 | 2.400 | 12.000 | 5.000  |
210 | | T1   | BPM-Static-T=1.2-K=10                             | 0.400 | 1.200 | 1.667 | 5.000  | 3.000  |
211 | | T1   | BPM-Dynamic-T=1-K=1000-hb=1.0-hc=1.0              | 1.000 | 1.000 | 2.000 | 2.000  | 1.000  |
212 | | T1   | BPM-Dynamic-T=1000-K=10-hb=1.0-hc=1.0             | 0.360 | 1.800 | 2.400 | 12.000 | 5.000  |
213 | | T1   | BPM-Dynamic-T=1.2-K=10-hb=1.0-hc=1.0              | 0.400 | 1.200 | 1.667 | 5.000  | 3.000  |
214 | | T1   | U-L@50                                            | 0.162 | 2.552 | 1.654 | 26.000 | 15.720 |
215 | | T1   | U-L@10                                            | 0.444 | 1.420 | 2.094 | 6.700  | 3.200  |
216 | | T1   | TBG-H@22                                          | 0.143 | 2.339 | 2.046 | 33.508 | 16.375 |
217 | | T1   | IFT-C1-T@2.0-b1@0.9-R1@1                          | 0.456 | 1.323 | 1.971 | 5.723  | 2.903  |
218 | | T1   | IFT-C1-T@2.0-b1@0.9-R1@10                         | 0.308 | 2.078 | 2.080 | 14.017 | 6.738  |
219 | | T1   | IFT-C1-T@2.0-b1@0.9-R1@100                        | 0.289 | 2.224 | 2.068 | 15.922 | 7.698  |
220 | | T1   | IFT-C2-A@0.2-b2@0.9-R2@1                          | 0.516 | 1.180 | 1.958 | 4.481  | 2.289  |
221 | | T1   | IFT-C2-A@0.2-b2@0.9-R2@10                         | 0.404 | 1.368 | 2.011 | 6.802  | 3.382  |
222 | | T1   | IFT-C2-A@0.2-b2@0.9-R2@100                        | 0.360 | 1.786 | 2.388 | 11.832 | 4.954  |
223 | | T1   | IFT-C1-C2-T@2.0-b1@0.9-R1@10-A@2.0-b2@0.9-R2@10   | 0.413 | 1.361 | 1.990 | 6.552  | 3.293  |
224 | | T1   | IFT-C1-C2-T@2.0-b1@0.9-R1@100-A@2.0-b2@0.9-R2@100 | 0.360 | 1.786 | 2.388 | 11.832 | 4.954  |
225 | 
226 | 
227 | **Using the metrics_file to specify the metrics**
228 | 
229 |     cwl-eval qrel_file result_file -m metrics_file
230 | 
231 | if a metrics_file is not specified, CWL Eval will default to a set of metrics
232 | defined in `ruler/measures/cwl_ruler.py`
233 | 
234 | If the metrics_file is specified, CWL Eval will instantiate and use the metrics listed.
235 | An example test_metrics_file is provided, which includes the following:
236 | 
237 |     PrecisionCWLMetric(k=1)
238 |     PrecisionCWLMetric(k=5)
239 |     PrecisionCWLMetric(k=10)
240 |     PrecisionCWLMetric(k=20)
241 |     RBPCWLMetric(theta=0.9)
242 |     NDCGCWLMetric(k=10)
243 |     RRCWLMetric()
244 |     APCWLMetric()
245 |     INSTCWLMetric(T=1.0)
246 |     INSQCWLMetric(T=1.0)
247 |     BPMCWLMetric(T=1.0,K=20)
248 |     BPMCWLMetric(T=2.0,K=10)
249 |     BPMDCWLMetric(T=1,20)
250 |     BPMDCWLMetric(T=2.0,K=10)
251 |     UMeasureCWLMetric(L=50)
252 |     UMeasureCWLMetric(L=10)
253 |     TBGCWLMetric(halflife=22)
254 |     IFTGoalCWLMetric(T=2.0, b1=0.9, R1=10)
255 |     IFTGoalCWLMetric(T=2.0, b1=0.9, R1=100)
256 |     IFTRateCWLMetric(A=0.2, b2=0.9, R2=10)
257 |     IFTRateCWLMetric(A=0.2, b2=0.9, R2=100)
258 |     IFTGoalRateCWLMetric(T=2.0, b1=0.9, R1=10, A=0.2, b2=0.9, R2=10)
259 |     IFTGoalRateCWLMetric(T=2.0, b1=0.9, R1=100, A=0.2, b2=0.9, R2=100)
260 |     NERReq8CWLMetric(k=10)
261 |     NERReq9CWLMetric(k=10)
262 |     NERReq10CWLMetric(phi=0.8)
263 |     NERReq11CWLMetric(T=2.0)
264 | 
265 | To specify which metric you desire, inspect the metrics classes in `ruler/measures/`
266 | to see what metrics are available, and how the parameterize them.
267 | 
268 | For example if you only wanted Precision Based Measures then you can list them as follows:
269 | 
270 |     PrecisionCWLMetric(1)
271 |     PrecisionCWLMetric(2)
272 |     PrecisionCWLMetric(3)
273 |     PrecisionCWLMetric(4)
274 |     PrecisionCWLMetric(5)
275 |     PrecisionCWLMetric(6)
276 |     PrecisionCWLMetric(7)
277 |     PrecisionCWLMetric(8)
278 |     PrecisionCWLMetric(9)
279 |     PrecisionCWLMetric(10)
280 |     PrecisionCWLMetric(11)
281 |     PrecisionCWLMetric(12)
282 |     PrecisionCWLMetric(13)
283 |     PrecisionCWLMetric(14)
284 |     PrecisionCWLMetric(15)
285 |     PrecisionCWLMetric(16)
286 |     PrecisionCWLMetric(17)
287 |     PrecisionCWLMetric(18)
288 |     PrecisionCWLMetric(19)
289 |     PrecisionCWLMetric(20)
290 | 
291 | While if you only wanted Rank Biased Precision Measures, then you can vary the patience parameter:
292 | 
293 |     RBPCWLMetric(0.1)
294 |     RBPCWLMetric(0.2)
295 |     RBPCWLMetric(0.3)
296 |     RBPCWLMetric(0.4)
297 |     RBPCWLMetric(0.5)
298 |     RBPCWLMetric(0.6)
299 |     RBPCWLMetric(0.7)
300 |     RBPCWLMetric(0.8)
301 |     RBPCWLMetric(0.9)
302 |     RBPCWLMetric(0.95)
303 |     RBPCWLMetric(0.99)
304 | 
305 | 
306 | 


--------------------------------------------------------------------------------
/cwl-eval:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | from distutils.sysconfig import get_python_lib
 6 | 
 7 | if __name__ == '__main__':
 8 |     # Work out the directory for the cwl scripts (in the site-packages directory)
 9 |     site_packages_dir = get_python_lib()
10 |     cwl_dir = os.path.join(site_packages_dir, 'cwl')
11 |     #print(cwl_dir)
12 |     
13 |     # If in developer mode, we can work this out from the current directory.
14 |     current_dir = os.path.dirname(os.path.realpath(__file__))
15 |     scripts_dir = os.path.join(current_dir, 'cwl')
16 |     #print(scripts_dir)
17 |     
18 |     # Prepend the paths to the PYTHONPATH for this instance.
19 |     sys.path.insert(0, cwl_dir)
20 |     #sys.path.insert(0, os.path.join(cwl_dir,'cwl'))
21 | 
22 |     sys.path.insert(0, scripts_dir)
23 |     #sys.path.insert(0, os.path.join(scripts_dir,'cwl'))
24 |     #print(os.path.join(scripts_dir,'cwl'))
25 |     # Now we should be able to import CWL without issue
26 |     from cwl import cwl_eval
27 | 
28 |     # Parse the arguments, check that the files exist, and run!
29 |     args = cwl_eval.parse_args()
30 | 
31 |     cwl_eval.check_file_exists(args.result_file)
32 |     cwl_eval.check_file_exists(args.gain_file)
33 |     cwl_eval.check_file_exists(args.cost_file)
34 |     cwl_eval.check_file_exists(args.metrics_file)
35 | 
36 |     cwl_eval.main(args.result_file, args.gain_file, args.cost_file, args.metrics_file, args.bib_file,
37 |                   args.colnames, args.residuals, args.max_gain, args.min_gain, args.max_cost, args.min_cost, args.max_depth)
38 | 


--------------------------------------------------------------------------------
/cwl/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/.DS_Store


--------------------------------------------------------------------------------
/cwl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/__init__.py


--------------------------------------------------------------------------------
/cwl/cwl_eval.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python.
  2 | """
  3 | cwl_eval tool for information retrieval evaluation of TREC formated results
  4 | """
  5 | 
  6 | __author__ = 'leifos'
  7 | __credits__ = ['InProceedings{Azzopardi:2019:cwl,'
  8 |                'author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair}, '
  9 |                'title = {cwl_eval: An evaluation tool for information retrieval},'
 10 |                'booktitle = {Proceedings of the International ACM SIGIR Conference},'
 11 |                'year = {2019}}']
 12 | __license__ = 'MIT'
 13 | __version__ = '1.0.0'
 14 | 
 15 | import os
 16 | import argparse
 17 | import logging
 18 | from cwl.seeker.trec_qrel_handler import TrecQrelHandler
 19 | from cwl.ruler.cwl_ruler import CWLRuler
 20 | from cwl.ruler.ranking import RankingMaker, Ranking
 21 | 
 22 | 
 23 | def read_in_cost_file(cost_file):
 24 |     """
 25 |     Reads in the cost file and stores in a dictionary for looking up the costs.
 26 |     The element_type is to be denoted in the TREC Results File using the previously unused field (2nd Column).
 27 |     :param cost_file: expects a space/tab seperated file with element_type (string) and cost(float)
 28 |     :return: returns a dictionary of element_type/costs
 29 |     """
 30 |     costs = dict()
 31 |     with open(cost_file, "r") as cf:
 32 |         while cf:
 33 |             line = cf.readline()
 34 |             if not line:
 35 |                 break
 36 |             (element_type, cost) = line.split()
 37 |             element_type = element_type.strip()
 38 |             costs[element_type] = float(cost)
 39 |     return costs
 40 | 
 41 | 
 42 | def check_file_exists(filename):
 43 |     if filename and not os.path.exists(filename):
 44 |         print("{0} Not Found".format(filename))
 45 |         quit(1)
 46 | 
 47 | 
 48 | def parse_args():
 49 | 
 50 |     arg_parser = argparse.ArgumentParser(description="CWL Evaluation Metrics")
 51 |     arg_parser.add_argument("gain_file", help="A TREC Formatted Qrel File with "
 52 |                                               "relevance column assumed to be gain values."
 53 |                                               "Gain values should be between zero and one (unless otherwise specified)."
 54 |                                               "Four column tab/space sep file with fields: topic_id unused doc_id gain")
 55 |     arg_parser.add_argument("result_file",
 56 |                             help="TREC formatted results file. Six column tab/space sep file with fields:"
 57 |                                  " topic_id element_type doc_id rank score run_id.")
 58 |     arg_parser.add_argument("-c", "--cost_file",
 59 |                             help="Costs associated with each element type specified in result file.",
 60 |                             required=False, default=None)
 61 |     arg_parser.add_argument("-m", "--metrics_file", help="The list of metrics that are to be reported. "
 62 |                                                          "If not specified, a set of default metrics will be reported."
 63 |                                                          " Tab/space sep file with fields: metric_name params",
 64 |                             required=False, default=None)
 65 |     arg_parser.add_argument("-b", "--bib_file", help="If specified, then the BibTeX for the measures used"
 66 |                                                      " will be saved to the filename given.", required=False,
 67 |                             default=None)
 68 |     arg_parser.add_argument("-n", "--colnames", help="Includes headings in the output.",
 69 |                             required=False, action="store_true")
 70 |     arg_parser.add_argument("-r", "--residuals", help="Include residual calculations.",
 71 |                             required=False, action="store_true")
 72 |     arg_parser.add_argument("--max_gain", help="Maximum gain associated with an item used for computing residuals"
 73 |                                                " and checking gain input file. "
 74 |                                                "(default=1.0)", required=False, default=1.0, type=float)
 75 |     arg_parser.add_argument("--min_gain", help="Minimum gain associated with an item used"
 76 |                                                " for checking gain input file. "
 77 |                                                "(default=0.0)", required=False, default=0.0, type=float)
 78 |     arg_parser.add_argument("--max_cost", help="Maximum cost associated with an item. Used for computing residuals. "
 79 |                                                "(default=1.0)", required=False, default=1.0)
 80 |     arg_parser.add_argument("--min_cost", help="Minimum cost associated with an item. Used for computing residuals. "
 81 |                                                "(default=1.0)", required=False, default=1.0)
 82 |     arg_parser.add_argument("--max_depth", help="Maximum depth to compute metrics. "
 83 |                                                 "(default=1000)", required=False, default=1000, type=int)
 84 | 
 85 |     p_args = arg_parser.parse_args()
 86 |     if p_args.colnames:
 87 |         p_args.colnames = True
 88 |     else:
 89 |         p_args.colnames = False
 90 | 
 91 |     if p_args.residuals:
 92 |         p_args.residuals = True
 93 |     else:
 94 |         p_args.residuals = False
 95 | 
 96 |     return p_args
 97 | 
 98 | 
 99 | def main(results_file, gain_file, cost_file=None, metrics_file=None, bib_file=None, col_names=False,
100 |          residuals=False, max_gain=1.0, min_gain=0.0, max_cost=1.0, min_cost=1.0, max_n=1000):
101 |   
102 |     logger = logging.getLogger('cwl')
103 |     logger.setLevel(logging.DEBUG)
104 |     logger.addHandler(logging.FileHandler('cwl.log'))
105 |     logger.info("Processing: {} using gain: {} and costs: {}".format(results_file, gain_file, cost_file))
106 |     logger.info("max_gain={} min_gain={} max_cost={}  min_cost={} max_n={}".format(max_gain, min_gain, max_cost, min_cost, max_n))
107 |     if residuals:
108 |         logger.info("Residuals are being computed assuming max gain is: {}".format(max_gain))
109 |     qrh = TrecQrelHandler(gain_file)
110 |     qrh.validate_gains(min_gain=min_gain, max_gain=max_gain)
111 |     costs = None
112 |     # read in cost file - if cost file exists
113 |     if cost_file:
114 |         costs = read_in_cost_file(cost_file)
115 |     cwl_ruler = CWLRuler(metrics_file, residuals)
116 | 
117 |     curr_topic_id = None
118 |     ranking_maker = None
119 | 
120 |     if col_names:
121 |         if residuals:
122 |             print("Topic\tMetric\tEU\tETU\tEC\tETC\tED\tResEU\tResETU\tResEC\tResETC\tResED")
123 |         else:
124 |             print("Topic\tMetric\tEU\tETU\tEC\tETC\tED")
125 | 
126 |     with open(results_file, "r") as rf:
127 |         while rf:
128 |             line = rf.readline()
129 |             if not line:
130 |                 break
131 |             (topic_id, element_type, doc_id, rank, score, run_id) = line.split()
132 |             doc_id = doc_id.strip()
133 | 
134 |             if topic_id == curr_topic_id:
135 |                 # build vectors
136 |                 ranking_maker.add(doc_id, element_type)
137 |             else:
138 |                 if curr_topic_id is not None:
139 |                     # Perform the measurements
140 |                     ranking = ranking_maker.get_ranking()
141 |                     # print(ranking._gains[0:10])
142 |                     cwl_ruler.measure(ranking)
143 |                     cwl_ruler.report()
144 | 
145 |                 # new topic
146 |                 curr_topic_id = topic_id
147 | 
148 |                 # reset seen list
149 |                 ranking_maker = RankingMaker(curr_topic_id, qrh, costs,
150 |                                              max_gain=max_gain, max_cost=max_cost, min_cost=min_cost, max_n=max_n)
151 |                 ranking_maker.add(doc_id, element_type)
152 | 
153 |         # Perform the Measurements on the last topic
154 |         ranking = ranking_maker.get_ranking()
155 |         # print(ranking._gains[0:10])
156 |         cwl_ruler.measure(ranking)
157 |         cwl_ruler.report()
158 | 
159 |     if bib_file:
160 |         cwl_ruler.save_bibtex(bib_file)
161 | 
162 | 
163 | if __name__ == "__main__":
164 |     args = parse_args()
165 | 
166 |     check_file_exists(args.result_file)
167 |     check_file_exists(args.gain_file)
168 |     check_file_exists(args.cost_file)
169 |     check_file_exists(args.metrics_file)
170 | 
171 |     main(args.result_file, args.gain_file, args.cost_file, args.metrics_file, args.bib_file,
172 |          args.colnames, args.residuals, args.max_gain, args.min_gain, args.max_cost, args.min_cost, args.max_depth)
173 | 


--------------------------------------------------------------------------------
/cwl/ruler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/ruler/__init__.py


--------------------------------------------------------------------------------
/cwl/ruler/cwl_ruler.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import inspect
  3 | import importlib
  4 | 
  5 | from cwl.ruler.measures.cwl_metrics import *
  6 | from cwl.ruler.measures.cwl_precision import *
  7 | from cwl.ruler.measures.cwl_rbp import *
  8 | from cwl.ruler.measures.cwl_rr import *
  9 | from cwl.ruler.measures.cwl_ap import *
 10 | from cwl.ruler.measures.cwl_dcg import *
 11 | from cwl.ruler.measures.cwl_inst import *
 12 | from cwl.ruler.measures.cwl_insq import *
 13 | from cwl.ruler.measures.cwl_tbg import *
 14 | from cwl.ruler.measures.cwl_bpm import *
 15 | from cwl.ruler.measures.cwl_umeasure import *
 16 | from cwl.ruler.measures.cwl_ift import *
 17 | from cwl.ruler.measures.cwl_nerr import *
 18 | from cwl.ruler.ranking import Ranking
 19 | 
 20 | class CWLRuler(object):
 21 | 
 22 |     def __init__(self, metrics_file=None, residuals=False):
 23 |         self.metrics = []
 24 |         #add the metrics to the list
 25 |         if metrics_file:
 26 |             # load up the metrics specified
 27 |             self.populate_list(metrics_file)
 28 |         else:
 29 |             # use the default set of metrics
 30 |             # ideally we will tune these to create a set of baselines.
 31 |             # however, depending on the costs used... the tuning will be different
 32 |             # for instance, U-measure costs are in characters, while TBG costs are in seconds
 33 |             # if costs are not specified, then the cost of each item is 1.0
 34 |             self.metrics = [
 35 |                 PrecisionCWLMetric(1),
 36 |                 PrecisionCWLMetric(2),
 37 |                 PrecisionCWLMetric(3),
 38 |                 PrecisionCWLMetric(4),
 39 |                 PrecisionCWLMetric(5),
 40 |                 PrecisionCWLMetric(10),
 41 |                 RBPCWLMetric(0.2),
 42 |                 RBPCWLMetric(0.4),
 43 |                 RBPCWLMetric(0.8),
 44 |                 NDCGCWLMetric(5),
 45 |                 NDCGCWLMetric(10),
 46 |                 RRCWLMetric(),
 47 |                 APCWLMetric(),
 48 |                 INSTCWLMetric(1.0),
 49 |                 INSTCWLMetric(2.0),
 50 |                 INSTCWLMetric(3.0),
 51 |             ]
 52 | 
 53 |         for m in self.metrics:
 54 |             m.residuals = residuals
 55 | 
 56 |     def measure(self, ranking):
 57 |         for metric in self.metrics:
 58 |             metric.measure(ranking)
 59 | 
 60 |     def report(self):
 61 |         for metric in self.metrics:
 62 |             metric.report()
 63 | 
 64 |     def csv(self):
 65 |         out = ""
 66 |         for metric in self.metrics:
 67 |             out += (metric.csv() + ";")
 68 |         return out
 69 | 
 70 |     def populate_list(self, input_filename):
 71 |         """
 72 |         Reads from the input filename -- should be like
 73 |             ClassName(param1, param2, ...)
 74 |         Then once each class has been instantiated, adds to the self.metrics list
 75 |         Thanks @maxwelld90
 76 |         """
 77 |         f = open(input_filename, 'r')
 78 | 
 79 |         for line in f:
 80 |             # Process the input line
 81 |             line_split = line.strip().split('(')
 82 |             line_split[-1] = line_split[-1][:-1]  # Removes the extra bracket at the end
 83 | 
 84 |             class_name = line_split[0]
 85 |             parameters = line_split[1].split(',')
 86 |             self.metrics.append(self.instantiate_class(class_name, *parameters))
 87 | 
 88 |         f.close()
 89 | 
 90 |     def instantiate_class(self, requested_class_name, *args, **kwargs):
 91 |         """
 92 |         Given a class name and one or more parameters, attempts to instantiate the requested class with the provided parameters.
 93 |         If successful, the instantiated class is returned.
 94 |         """
 95 |         classes = self.get_class_list()
 96 |         ref = None
 97 |         casted_args = []
 98 |         
 99 |         # Change the args to ints/floats. Assuming that that is all that is required.
100 |         for i in range(0, len(args)):
101 |             val = args[i]
102 |             
103 |             if val == '':
104 |                 continue
105 |             
106 |             if '.' in val:
107 |                 casted_args.append(float(val))
108 |             else:
109 |                 casted_args.append(int(val))
110 |         
111 |         for class_tuple in classes:
112 |             class_name = class_tuple[0]
113 |             class_ref = class_tuple[1]
114 | 
115 |             if class_name == requested_class_name:
116 |                 ref = class_ref(*casted_args)  # Instantiate the class with parameters!
117 |                 # If you want to use parameter names, try kwargs instead.
118 | 
119 |         # If ref is not set, the class was not located!
120 |         if ref is None:
121 |             raise NameError("The class {0} could not be found.".format(requested_class_name))
122 | 
123 |         return ref
124 | 
125 |     def get_class_list(self):
126 |         """
127 |         Looking inside the measures_package package, returns a list of all the classes that are available for instantiating.
128 |         This means that any class inside any .py file in the measures directory is returned in the list from this method.
129 |         """
130 |         modules = []
131 |         classes = []
132 |         path = os.path.dirname(os.path.abspath(__file__))
133 |         measures_path = os.path.join(path, 'measures')
134 |         package_path = 'ruler.measures'
135 | 
136 |         # List through the modules in the specified package, ignoring __init__.py, and append them to a list.
137 |         for f in os.listdir(measures_path):
138 |             if f.endswith('.py') and not f.startswith('__init__'):
139 |                 modules.append('{0}.{1}'.format(package_path, os.path.splitext(f)[0]))
140 | 
141 |         module_references = []
142 | 
143 |         # Attempt to import each module in turn so we can access its classes
144 |         for module in modules:
145 |             module_references.append(importlib.import_module(module))
146 | 
147 |         # Now loop through each module, looking at the classes within it -
148 |         # and then append each class to a list of valid classes.
149 |         for module in module_references:
150 |             for name, obj in inspect.getmembers(module):
151 |                 if inspect.isclass(obj):
152 |                     classes.append((obj.__name__, obj))
153 | 
154 |         return classes
155 | 
156 |     def print_list(self):
157 |         """
158 |         Proof that it works, iterates over each instantiate metric class and calls whoami().
159 |         """
160 |         print("Displaying each metric:")
161 |         print("======")
162 |         for metric in self.metrics:
163 |             metric.whoami()
164 |         print("======")
165 |         print(self.metrics)
166 |         print("END")
167 |         print()
168 | 
169 |     def save_bibtex(self, bib_file):
170 | 
171 |         eval_tool_bibtex = """
172 |         @inproceedings{azzopardi2019cwl,
173 |         author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair}
174 |         title = {cwl\_eval: An Evaluation Tool for Information Retrieval},
175 |         booktitle = {Proc. of the 42nd International ACM SIGIR Conference},
176 |         series = {SIGIR '19},
177 |         year = {2019} 
178 |         }
179 |         """
180 | 
181 |         bib_list = [eval_tool_bibtex]
182 | 
183 |         for m in self.metrics:
184 |             if m.bibtex not in bib_list:
185 |                 bib_list.append(m.bibtex)
186 | 
187 |         with open(bib_file, "w") as bf:
188 |             for bib in bib_list:
189 |                 bf.write(bib)
190 |                 bf.write("\n")
191 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/ruler/measures/.DS_Store


--------------------------------------------------------------------------------
/cwl/ruler/measures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/ruler/measures/__init__.py


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_ap.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
  4 | 
  5 | """
  6 | APCWLMetric implements:
  7 |  Average Precision (corrected to use the R, the total number of relevant items as the demonimator) - Harman
  8 | 
  9 | TrAPCWLMetric implements:
 10 |  Average Precision which uses the number of relevant items retrieved as the demoninator  - Harman
 11 |  
 12 |  
 13 | GrAPCWLMetric (TO BE IMPLEMENTED):
 14 |         @inproceedings{Robertson:2010:EAP:1835449.1835550,
 15 |         author = {Robertson, Stephen E. and Kanoulas, Evangelos and Yilmaz, Emine},
 16 |         title = {Extending Average Precision to Graded Relevance Judgments},
 17 |         booktitle = {Proceedings of the 33rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
 18 |         series = {SIGIR '10},
 19 |         year = {2010},
 20 |         location = {Geneva, Switzerland},
 21 |         pages = {603--610},
 22 |         url = {http://doi.acm.org/10.1145/1835449.1835550}
 23 |         }  
 24 | """
 25 | 
 26 | class APCWLMetric(CWLMetric):
 27 |     def __init__(self):
 28 |         super().__init__()
 29 |         self.metric_name = "AP"
 30 |         self.bibtex = """
 31 |         @article{Harman:1992:ESIR,
 32 |         author = {Donna Harman},
 33 |         title = {Evaluation Issues in Information Retrieval},
 34 |         journal = {Information Processing and Management},
 35 |         volume = {28},
 36 |         number = {4},
 37 |         pages = {439 - -440},
 38 |         year = {1992},
 39 |         }
 40 |         
 41 |         """
 42 | 
 43 |     def name(self):
 44 |         return self.metric_name
 45 | 
 46 |     def c_vector(self, ranking, worse_case=True):
 47 |         gains = ranking.get_gain_vector(worse_case)
 48 |         rels = 0
 49 |         for g in gains:
 50 |             if g > 0.0:
 51 |                 rels += 1
 52 | 
 53 |         n = len(gains)
 54 |         rii = []
 55 |         cvec = []
 56 |         for i in range(0, n):
 57 |             rii.append(gains[i]/(i+1))
 58 | 
 59 |         for i in range(0, n-1):
 60 |             bot = np.sum(rii[i:n])
 61 |             top = np.sum(rii[i+1:n])
 62 | 
 63 |             if top > 0.0:
 64 |                 cvec.append(top/bot)
 65 |             else:
 66 |                 cvec.append(0.0)
 67 | 
 68 |         cvec.append(0.0)
 69 |         cvec = np.array(cvec)
 70 |         return cvec
 71 | 
 72 | 
 73 | class TrAPCWLMetric(CWLMetric):
 74 |     """
 75 |     According to Sanderson (http://www.marksanderson.org/publications/my_papers/FnTIR.pdf)
 76 |     Harman was the first to publish the non-interpolated AP measure.
 77 |     However, apparently Harman's paper had an error, the demonminator was the number of relevant items retrieved
 78 |     and not the total number of relevant items (known). This was later corrected.
 79 |     """
 80 |     def __init__(self):
 81 |         super(CWLMetric, self).__init__()
 82 |         self.metric_name = "TrAP"
 83 |         self.bibtex = """
 84 |             @article{Harman:1992:ESIR,
 85 |             author = {Donna Harman},
 86 |             title = {Evaluation Issues in Information Retrieval},
 87 |             journal = {Information Processing and Management},
 88 |             volume = {28},
 89 |             number = {4},
 90 |             pages = {439 - -440},
 91 |             year = {1992},
 92 |             }
 93 |         """
 94 | 
 95 |     def name(self):
 96 |         return self.metric_name
 97 | 
 98 |     def c_vector(self, ranking, worse_case=True):
 99 |         wvec = self.w_vector(ranking, worse_case)
100 | 
101 |         cvec = []
102 |         for i in range(0, len(wvec)-1):
103 |             if wvec[i] > 0.0:
104 |                 cvec.append(wvec[i+1] / wvec[i])
105 |             else:
106 |                 cvec.append(0.0)
107 | 
108 |         cvec.append(0.0)
109 |         cvec = np.array(cvec)
110 | 
111 |         return cvec
112 | 
113 |     def w_vector(self, ranking, worse_case=True):
114 |         wvec = []
115 |         c_costs = np.cumsum(ranking.get_cost_vector(worse_case))
116 |         c_gains = np.cumsum(ranking.get_gain_vector(worse_case))
117 | 
118 |         i = 0
119 |         while (c_gains[i] == 0) and (i < len(c_gains)-1):
120 |             c_gains[i] = 1.0
121 |             i += 1
122 | 
123 |         total_rels = ranking.get_total_rels(worse_case)
124 |         wvec = np.divide(c_gains, c_costs)
125 |         if total_rels > 0:
126 |             wvec = wvec / total_rels
127 | 
128 |         return np.array(wvec)
129 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_bpm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from cwl.ruler.measures.cwl_metrics import CWLMetric
  3 | 
  4 | """
  5 | Bejewelled Player Model (BPM) by Zhang et al 2017
  6 | 
  7 | Gains are assumed to be scaled to be between: 0.0 - 1.0
  8 | thus rel_max is assumed to be 1.0. 
  9 | 
 10 | In Zhang et al (2017), rel_max is an integer i.e. 0,1,2,3 (for a 4 levels of grades)
 11 | and the rel level is raised to the power of 2. To encode this within the C/W/L BPM, 
 12 | the rel levels would need to be re-scaled to be between one and zero. 
 13 | 
 14 | Static: takes T (i.e. E_b) and K (i.e. E_c) in Zhang et al (2017)
 15 | T is the total amount of gain desired - similar to T in INST and IFT
 16 | 
 17 | K is the total amout of cost willing to be spent, similar to k in precision,
 18 | however K can be any unit of cost (depending on the costs file),
 19 | while k in P@k, is the number of documents.
 20 | In Zhang et al (2017), K is k the number of documents, 
 21 | but here we provide the generalized verison, 
 22 | such that K can be set based on the costs specified for each doc (as per the cost file)
 23 | 
 24 | 
 25 | Dynamic: Also takes: hb, hc and gain_med ( i.e. rel_med in Zhang et al (2017)
 26 | hb 
 27 | 
 28 | gain_med is the median gain (i.e. value between 0 and 1.0)
 29 | if gain observed at position i is higher than gain_med,
 30 | than T is increased, while K is increased
 31 | 
 32 | if gain observed at position i is lower than gain_med,
 33 | than T is decreased, while K is decreased
 34 | 
 35 | The change in gain is: T <- T + hb * (gain[i] - gain_med)
 36 | The change in cost is: K <- K + hc * (gain[i] - gain_med)
 37 | 
 38 | hb and hc are therefore scaling parameters.
 39 | 
 40 | """
 41 | 
 42 | class BPMCWLMetric(CWLMetric):
 43 | 
 44 |     def __init__(self, T=1.0, K=10):
 45 |         CWLMetric.__init__(self)
 46 |         # super(CWLMetric, self).__init__()
 47 |         self.metric_name = "BPM-Static-T={0}-K={1}".format(T, K)
 48 |         self.T = T # E_b the total amount of benefit desired
 49 |         self.K = K # E_c the total amount of cost or documents willing to be examined
 50 |         self.bibtex = """
 51 |         @inproceedings{Zhang:2017:EWS:3077136.3080841,
 52 |         author = {Zhang, Fan and Liu, Yiqun and Li, Xin and Zhang, Min and Xu, Yinghui and Ma, Shaoping},
 53 |         title = {Evaluating Web Search with a Bejeweled Player Model},
 54 |         booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval},
 55 |         series = {SIGIR '17},
 56 |         year = {2017},
 57 |         location = {Shinjuku, Tokyo, Japan},
 58 |         pages = {425--434},
 59 |         url = {http://doi.acm.org/10.1145/3077136.3080841},
 60 |         } 
 61 |         """
 62 | 
 63 |     def name(self):
 64 |         return "BPM-Static-T={0}-K={1}".format(self.T,self.K)
 65 | 
 66 | 
 67 |     def c_vector(self, ranking, worse_case=True):
 68 |         gains = ranking.get_gain_vector(worse_case)
 69 |         costs = ranking.get_cost_vector(worse_case)
 70 | 
 71 |         c_gain = np.cumsum(gains)
 72 |         c_cost = np.cumsum(costs)
 73 | 
 74 |         # GAIN Constraint
 75 |         rr_cvec = np.zeros(len(gains))
 76 |         i = 0
 77 |         # continue until the gain accumulated exceeds T
 78 |         while i < len(gains) and (c_gain[i] < self.T):
 79 |             rr_cvec[i] = 1.0
 80 |             i = i + 1
 81 | 
 82 |         # COST Constraint
 83 |         p_cvec = np.zeros(len(costs))
 84 |         i = 0
 85 |         # continue until the costs accumulated exceeds K
 86 |         while i < len(costs) and (c_cost[i] < self.K):
 87 |             p_cvec[i] = 1.0
 88 |             i = i + 1
 89 | 
 90 |         # combine the two continuation vectors
 91 |         bpm_cvec = np.zeros(len(costs))
 92 |         i = 0
 93 |         while i < len(costs):
 94 |             if (rr_cvec[i] == 1.0) and (p_cvec[i] == 1.0):
 95 |                 bpm_cvec[i] = 1.0
 96 |             i = i + 1
 97 | 
 98 |         return bpm_cvec
 99 | 
100 | 
101 | 
102 | 
103 | class BPMDCWLMetric(CWLMetric):
104 | 
105 |     def __init__(self, T=1, K=10, hb=1.0, hc=1.0, gain_med=0.5):
106 |         super().__init__()
107 |         self.metric_name = "BPM-Dynamic-T={0}-K={1}-hb={2}-hc={3}".format(T,K,hb,hc)
108 |         self.T = T # E_b the total amount of benefit desired
109 |         self.K = K # E_c the total amount of cost or documents willing to be examined
110 |         self.hb = hb # the scaling factor to adjust the T constraint by
111 |         self.hc = hc # the scaling factor to adjust the K constraint by
112 |         self.gain_med = gain_med # i.e. rel_med to adjust the T and K by
113 |         self.bibtex = """
114 |         @inproceedings{Zhang:2017:EWS:3077136.3080841,
115 |         author = {Zhang, Fan and Liu, Yiqun and Li, Xin and Zhang, Min and Xu, Yinghui and Ma, Shaoping},
116 |         title = {Evaluating Web Search with a Bejeweled Player Model},
117 |         booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval},
118 |         series = {SIGIR '17},
119 |         year = {2017},
120 |         location = {Shinjuku, Tokyo, Japan},
121 |         pages = {425--434},
122 |         url = {http://doi.acm.org/10.1145/3077136.3080841},
123 |         } 
124 |         """
125 | 
126 |     def name(self):
127 |         return "BPM-Dynamic-T={0}-K={1}-hb={2}-hc={3}".format(self.T,self.K, self.hb, self.hc)
128 | 
129 |     def c_vector(self, ranking, worse_case=True):
130 |         gains = ranking.get_gain_vector(worse_case)
131 |         costs = ranking.get_cost_vector(worse_case)
132 |         c_gain = np.cumsum(gains)
133 |         c_cost = np.cumsum(costs)
134 | 
135 |         # GAIN Constraint
136 |         rr_cvec = np.zeros(len(gains))
137 |         i = 0
138 |         T = self.T
139 |         # continue until the gain accumulated exceeds T
140 |         while i < len(gains) and (c_gain[i] < T):
141 |             rr_cvec[i] = 1.0
142 |             # Now Update T, depending on gain[i]
143 |             T = T + self.hb * (gains[i] - self.gain_med)
144 | 
145 |             i = i + 1
146 |         # COST Constraint
147 |         p_cvec = np.zeros(len(costs))
148 |         i = 0
149 |         K = self.K
150 |         # continue until the costs accumulated exceeds K
151 |         while i < len(costs) and (c_cost[i] < K):
152 |             p_cvec[i] = 1.0
153 |             # Now Update K, depending on gain[i]
154 |             T = T + self.hc * (gains[i] - self.gain_med)
155 |             i = i + 1
156 | 
157 |         # combine the two continuation vectors
158 |         bpm_cvec = np.zeros(len(costs))
159 |         i = 0
160 |         while i < len(costs):
161 |             if (rr_cvec[i] == 1.0) and (p_cvec[i] == 1.0):
162 |                 bpm_cvec[i] = 1.0
163 |             i = i + 1
164 | 
165 |         return bpm_cvec
166 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_dcg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 4 | 
 5 | 
 6 | """
 7 | Discounted Cumulative Gain by Jarvelin and Kekalainen (2002)
 8 | The discount is scaled so that forms a proper probability distribution
 9 | 
10 | k is the rank cut off i.e number of items to be examined
11 | base is the base of the log for the discounting, which is set to 2 by default as per the original paper.
12 | """
13 | 
14 | class NDCGCWLMetric(CWLMetric):
15 |     def __init__(self, k):
16 |         super().__init__()
17 |         self.metric_name = "NDCG-k@{0}".format(k)
18 |         self.k = k
19 |         self.base = 2.0
20 |         self.bibtex = """
21 |         @article{Jarvelin:2002:CGE:582415.582418,
22 |         author = {J\"{a}rvelin, Kalervo and Kek\"{a}l\"{a}inen, Jaana},
23 |         title = {Cumulated Gain-based Evaluation of IR Techniques},
24 |         journal = {ACM Trans. Inf. Syst.},
25 |         volume = {20},
26 |         number = {4},
27 |         year = {2002},
28 |         pages = {422--446},
29 |         numpages = {25},
30 |         url = {http://doi.acm.org/10.1145/582415.582418},
31 |         }
32 |         """
33 | 
34 |     def name(self):
35 |         return "NDCG-k@{0}".format(self.k)
36 | 
37 |     def c_vector(self, ranking, worse_case=True):
38 | 
39 |         cvec = []
40 |         for i in range(1, ranking.n+1):
41 |             if i < self.k:
42 |                 cvec.append(math.log(i+1, self.base)/math.log(i+2, self.base))
43 |             else:
44 |                 cvec.append(0.0)
45 | 
46 |         cvec = np.array(cvec)
47 | 
48 |         return cvec
49 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_ift.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
  4 | 
  5 | """
  6 | Information Foraging Based Measure by Azzopardi et al (2018)
  7 | 
  8 | T is the target gain (i.e. how much is desired)
  9 | A is the average rate of gain that expected
 10 | b1/b2 are intercept parameters
 11 | R1/R2 are 'rationality' parameters, as R1/R2 are increased to infinity then the searcher becomes increasingly rational, 
 12 | and will stop if T is met, or A is not met. but as R1/R2 are decreased to zero, the the searcher will become ambivalent
 13 | towards T or A respectively, and fall back to the default b1/b2 intercepts. i.e. T or A will not influence the decision
 14 |  to continue.
 15 |  
 16 |  As a result, if R1/R2 are set to zero, then the metric becomes akin to RBP.
 17 |  If R1 is set to inf, then once T gain is acquired, the searcher will stop - which is akin or RR (where T would equal 1)
 18 |  If R1/R2 are set inbetween, then it suggests that as the user approaches T, they become more likely to stop, as they 
 19 |  are getting closer to their goal, and once they reach their goal, they are still likely to continue (but to a lesser 
 20 |  and lesser degree).  Similiarly, if the user is experiencing a rate of gain higher than A, then they are much more likely to continue, 
 21 |  but as the rate of gain decreases and gets further from A, then user is less likely to continue.
 22 | 
 23 | IFTGoalCWLMetric implements the Goal only variant
 24 | IFTRateCWLMetric implements the Rate only variant
 25 | IFTGoalRateCWLMetric implements the Goal and Rate variant - which was shown to be the most accurate in Azzopardi et al.
 26 | """
 27 | 
 28 | 
 29 | class IFTGoalCWLMetric(CWLMetric):
 30 |     def __init__(self, T, b1, R1):
 31 |         super().__init__()
 32 |         self.metric_name = "IFT-C1-T={0}-b1={1}-R1={2}".format(T,b1,R1)
 33 |         self.b1 = b1
 34 |         self.T = T
 35 |         self.R1 = R1
 36 |         self.bibtex = "@inproceedings{Azzopardi:2018:MUS:3209978.3210027," \
 37 |                       "author = {Azzopardi, Leif and Thomas, Paul and Craswell, Nick}," \
 38 |                       "title = {Measuring the Utility of Search Engine Result Pages: An Information Foraging Based Measure}," \
 39 |                       "booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval}," \
 40 |                       "series = {SIGIR '18}," \
 41 |                       "year = {2018}," \
 42 |                       "location = {Ann Arbor, MI, USA}," \
 43 |                       "pages = {605--614}," \
 44 |                       "numpages = {10}," \
 45 |                       "} "
 46 | 
 47 |     def name(self):
 48 |         return "IFT-C1-T={0}-b1={1}-R1={2}".format(self.T, self.b1, self.R1)
 49 | 
 50 |     def c_vector(self, ranking, worse_case=True):
 51 |         gains = ranking.get_gain_vector(worse_case)
 52 |         c_gains = np.cumsum(gains)
 53 |         cvec = []
 54 |         for i in range(0, len(gains)):
 55 |             c1 = self.c1_func(c_gains[i])
 56 |             cvec.append(c1)
 57 |         cvec = np.array(cvec)
 58 |         return cvec
 59 | 
 60 |     def c1_func(self, yi):
 61 |         ex = (1.0 + self.b1 * math.pow(math.e, ((self.T-yi) * self.R1)))
 62 |         return 1.0 - math.pow(ex, -1.0)
 63 | 
 64 | 
 65 | class IFTRateCWLMetric(CWLMetric):
 66 |     def __init__(self, A, b2,  R2):
 67 |         super().__init__()
 68 |         self.metric_name = "IFT-C2-A={0}-b2={1}-R2={2}".format(A, b2, R2)
 69 |         self.b2 = b2
 70 |         self.A = A
 71 |         self.R2 = R2
 72 |         self.bibtex = "@inproceedings{Azzopardi:2018:MUS:3209978.3210027," \
 73 |                       "author = {Azzopardi, Leif and Thomas, Paul and Craswell, Nick}," \
 74 |                       "title = {Measuring the Utility of Search Engine Result Pages: An Information Foraging Based Measure}," \
 75 |                       "booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval}," \
 76 |                       "series = {SIGIR '18}," \
 77 |                       "year = {2018}," \
 78 |                       "location = {Ann Arbor, MI, USA}," \
 79 |                       "pages = {605--614}," \
 80 |                       "numpages = {10}," \
 81 |                       "} "
 82 | 
 83 |     def name(self):
 84 |         return "IFT-C2-A={0}-b2={1}-R2={2}".format(self.A, self.b2, self.R2)
 85 | 
 86 |     def c_vector(self, ranking, worse_case=True):
 87 |         gains = ranking.get_gain_vector(worse_case)
 88 |         costs = ranking.get_cost_vector(worse_case)
 89 | 
 90 |         c_gains = np.cumsum(gains)
 91 |         c_costs = np.cumsum(costs)
 92 |         cvec = []
 93 |         for i in range(0, len(gains)):
 94 |             c2 = self.c2_func(c_gains[i], c_costs[i])
 95 |             cvec.append(c2)
 96 | 
 97 |         cvec = np.array(cvec)
 98 | 
 99 |         return cvec
100 | 
101 |     def c2_func(self, yi, ki):
102 |         ex = (1.0 + self.b2 * math.pow(math.e, ((self.A - (yi/ki)) * self.R2)))
103 |         return math.pow(ex, -1.0)
104 | 
105 | 
106 | class IFTGoalRateCWLMetric(CWLMetric):
107 |     def __init__(self, T, b1, R1, A, b2,  R2):
108 |         super().__init__()
109 |         self.metric_name = "IFT-C1-C2-T={0}-b1={1}-R1={2}-A={3}-b2={4}-R2={5}".format(T, b1, R1, A, b2, R2)
110 |         self.b1 = b1
111 |         self.T = T
112 |         self.R1 = R1
113 |         self.b2 = b2
114 |         self.A = A
115 |         self.R2 = R2
116 |         self.bibtex = """
117 |         @inproceedings{Azzopardi:2018:MUS:3209978.3210027,
118 |         author = {Azzopardi, Leif and Thomas, Paul and Craswell, Nick},
119 |         title = {Measuring the Utility of Search Engine Result Pages: An Information Foraging Based Measure},
120 |         booktitle = {The 41st International ACM SIGIR Conference on Research \&\#38; Development in Information Retrieval},
121 |         series = {SIGIR '18},
122 |         year = {2018},
123 |         location = {Ann Arbor, MI, USA},
124 |         pages = {605--614},
125 |         numpages = {10},
126 |         } 
127 |         """
128 | 
129 |     def name(self):
130 |         return "IFT-C1-C2-T={0}-b1={1}-R1={2}-A={3}-b2={4}-R2={5}".format(self.T, self.b1, self.R1, self.A, self.b2, self.R2)
131 | 
132 |     def c_vector(self, ranking, worse_case=True):
133 |         gains = ranking.get_gain_vector(worse_case)
134 |         costs = ranking.get_cost_vector(worse_case)
135 |         c_gains = np.cumsum(gains)
136 |         c_costs = np.cumsum(costs)
137 |         cvec = []
138 |         for i in range(0, len(gains)):
139 | 
140 |             c1 = self.c1_func(c_gains[i])
141 |             c2 = self.c2_func(c_gains[i], c_costs[i])
142 |             cvec.append(c1*c2)
143 | 
144 |         cvec = np.array(cvec)
145 | 
146 |         return cvec
147 | 
148 |     def c2_func(self, yi, ki):
149 |         ex = (1.0 + self.b2 * math.pow(math.e, ((self.A - (yi/ki)) * self.R2)))
150 |         return math.pow(ex, -1.0)
151 | 
152 |     def c1_func(self, yi):
153 |         ex = (1.0 + self.b1 * math.pow(math.e, ((self.T-yi) * self.R1)))
154 |         return 1.0 - math.pow(ex, -1.0)
155 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_insq.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 4 | 
 5 | """
 6 | INSQ by Moffat et al (is a variant on INST)
 7 | 
 8 | T denotes the desired amount of gain.
 9 | """
10 | 
11 | 
12 | class INSQCWLMetric(CWLMetric):
13 | 
14 |     def __init__(self, T=1.0):
15 |         super().__init__()
16 |         self.metric_name = "INSQ-T={0}    ".format(T)
17 |         self.T = T
18 |         self.bibtex = """
19 |         @inproceedings{Moffat:2012:MMI:2407085.2407092,
20 |         author = {Moffat, Alistair and Scholer, Falk and Thomas, Paul},
21 |         title = {Models and Metrics: IR Evaluation As a User Process},
22 |         booktitle = {Proceedings of the Seventeenth Australasian Document Computing Symposium},
23 |         series = {ADCS '12},
24 |         year = {2012},
25 |         location = {Dunedin, New Zealand},
26 |         pages = {47--54},
27 |         url = {http://doi.acm.org/10.1145/2407085.2407092},
28 |         } 
29 |         """
30 | 
31 |     def name(self):
32 |         return "INSQ-T={0}".format(self.T)
33 | 
34 |     def c_vector(self, ranking, worse_case=True):
35 |         gains = ranking.get_gain_vector(worse_case)
36 |         cg = np.cumsum(gains)
37 |         cvec = []
38 |         for i in range(0, len(cg)):
39 |             ci = (((i+1.0) + (2.0 * self.T)-1.0) / ((i+1.0) + (2.0 * self.T)))**2.0
40 |             cvec.append(ci)
41 | 
42 |         cvec = np.array(cvec)
43 |         return cvec
44 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_inst.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 4 | 
 5 | """
 6 | INST is from Moffat et al., Australasian Document Computing Symposium 2015
 7 | 
 8 | T: Is the desired amount of relevant items or gain, 
 9 | depending on whether gain is binary (0,1) or graded (0..1.0)
10 | """
11 | 
12 | class INSTCWLMetric(CWLMetric):
13 | 
14 |     # INST requires gains to be in range [0, 1]
15 |     MINGAIN = 0.0
16 |     MAXGAIN = 1.0
17 | 
18 |     def __init__(self, T=1.0):
19 |         super().__init__()
20 |         self.metric_name = "INST-T={0}    ".format(T)
21 |         self.T = T
22 |         self.bibtex = """
23 |         @inproceedings{Moffat:2015:IAM:2838931.2838938,
24 |         author = {Moffat, Alistair and Bailey, Peter and Scholer, Falk and Thomas, Paul},
25 |         title = {INST: An Adaptive Metric for Information Retrieval Evaluation},
26 |         booktitle = {Proceedings of the 20th Australasian Document Computing Symposium},
27 |         series = {ADCS '15},
28 |         year = {2015},
29 |         location = {Parramatta, NSW, Australia},
30 |         pages = {5:1--5:4},
31 |         articleno = {5},
32 |         numpages = {4},
33 |         url = {http://doi.acm.org/10.1145/2838931.2838938}
34 |         } 
35 |         """
36 | 
37 |     def name(self):
38 |         return "INST-T={0}".format(self.T)
39 | 
40 |     def c_vector(self, ranking, worse_case=True):
41 |         gains = ranking.get_gain_vector(worse_case)
42 |         self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains)
43 |         c_gains = np.cumsum(gains)
44 |         cvec = []
45 |         for i in range(0, len(c_gains)):
46 |             Ti = self.T - c_gains[i]
47 |             ci = (((i+1.0)+self.T+Ti-1.0) / ((i+1.0)+self.T+Ti))**2.0
48 |             cvec.append(ci)
49 | 
50 |         cvec = np.array(cvec)
51 |         return cvec
52 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_metrics.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import logging
  4 | 
  5 | logger = logging.getLogger('cwl')
  6 | 
  7 | class CWLMetric(object):
  8 | 
  9 |     def __init__(self):
 10 |         self.expected_utility = 0.0
 11 |         self.expected_cost = 0.0
 12 |         self.expected_total_utility = 0.0
 13 |         self.expected_total_cost = 0.0
 14 |         self.expected_items = 0.0
 15 |         self.residual_expected_utility = None
 16 |         self.residual_expected_cost = None
 17 |         self.residual_expected_total_utility = None
 18 |         self.residual_expected_total_cost = None
 19 |         self.residual_expected_items = None
 20 |         self.residuals = False
 21 |         self.metric_name = "Undefined"
 22 |         self.ranking = None
 23 |         self.bibtex = ""
 24 | 
 25 |     def name(self):
 26 |         return self.metric_name
 27 | 
 28 |     def c_vector(self, ranking, worse_case=True):
 29 |         """
 30 |         Create a vector of C probabilities (i.e. probability of continuing from position i to position i+1)
 31 |         Note: when defining a metric is best/easiest to re-implement this function.
 32 |         :param ranking: CWL Ranking object
 33 |         :param worse_case: Boolean, to denote whether to estimate based on assuming the
 34 |             worse case i.e. unjudged are considered to be zero gain, and max cost or
 35 |             best case i.e. worse_case=False, and unjudged are considered to be max gain, and min cost
 36 |             Note that the Ranking object handles what is returned in the gain and cost vectors.
 37 |         :return: returns the C vector probabilities
 38 |         """
 39 |         cvec = np.ones(len(ranking.get_gain_vector(worse_case)))
 40 |         return cvec
 41 | 
 42 |     def l_vector(self, ranking, worse_case=True):
 43 |         """
 44 |         Create a vector of L probabilities (i.e. the Likelihoods of stopping at position i given the C vector)
 45 |         :param ranking: CWL Ranking object
 46 |         :param worse_case: Boolean, to denote whether to estimate based on assuming the
 47 |         :return: returns the L vector probabilities
 48 |         """
 49 |         cvec = self.c_vector(ranking, worse_case)
 50 |         logger.debug("{0} {1} {2} {3}".format(ranking.topic_id, self.name(), "cvec", cvec[0:11]))
 51 |         cshift = np.append(np.array([1.0]), cvec[0:-1])
 52 |         lvec = np.cumprod(cshift)
 53 |         lvec = np.multiply(lvec, (np.subtract(np.ones(len(cvec)), cvec)))
 54 |         logger.debug("{0} {1} {2} {3}".format(ranking.topic_id, self.name(), "lvec", lvec[0:11]))
 55 |         return lvec
 56 | 
 57 |     def w_vector(self, ranking, worse_case=True):
 58 |         """
 59 |         Create a vector of E probabilities (i.e. probability of examining item i)
 60 |         Note: when defining a metric is best/easiest to re-implement this function.
 61 |         :param ranking: CWL Ranking object
 62 |         :param worse_case: Boolean, to denote whether to estimate based on assuming the
 63 |         :return: returns the W vector probabilities
 64 |         """
 65 |         cvec = self.c_vector(ranking, worse_case)
 66 |         cvec = cvec[0:-1]
 67 |         cvec_prod = np.cumprod(cvec)
 68 |         cvec_prod = np.pad(cvec_prod, (1, 0), 'constant', constant_values=1.0)
 69 |         w1 = np.divide(1.0, np.sum(cvec_prod))
 70 |         w_tail = np.multiply(cvec_prod[1:len(cvec_prod)], w1)
 71 |         wvec = np.append(w1, w_tail)
 72 |         logger.debug("{0} {1} {2} {3}".format(ranking.topic_id, self.name(), "wvec", wvec[0:11]))
 73 |         return wvec
 74 | 
 75 |     def measure(self, ranking):
 76 |         """
 77 |         Given the ranking, measure estimates the various measurements given the CWL framework
 78 |         if residuals are required, these are also computed.
 79 |         :param ranking: CWL Ranking object
 80 |         :return: the expected utility per item
 81 |         """
 82 |         self.ranking = ranking
 83 |         # score based on worse case - lower bounds
 84 |         (eu, etu, ec, etc, ei) = self._do_score(ranking, True)
 85 | 
 86 |         self.expected_utility = eu
 87 |         self.expected_total_utility = etu
 88 |         self.expected_cost = ec
 89 |         self.expected_total_cost = etc
 90 |         self.expected_items = ei
 91 | 
 92 |         if self.residuals:
 93 |             # score based on best case - upper bounds
 94 |             (eu, etu, ec, etc, ei) = self._do_score(ranking, False)
 95 | 
 96 |             # compute the residual i.e. the difference between the upper and lower bounds
 97 |             self.residual_expected_utility = eu - self.expected_utility
 98 |             self.residual_expected_total_utility = etu - self.expected_total_utility
 99 |             self.residual_expected_cost = ec - self.expected_cost
100 |             self.residual_expected_total_cost = etc - self.expected_total_cost
101 |             self.residual_expected_items = ei - self.expected_items
102 | 
103 |         # return the rate of gain per document
104 |         return self.expected_utility
105 | 
106 |     def _do_score(self, ranking, worse_case=True):
107 |         """
108 |         An internal function that handles the scoring of a ranking given the CWL machinery.
109 |         :param ranking: CWL Ranking object
110 |         :return: the expected utility per item
111 |         :return: returns the expected utility per item, etc..
112 |         """
113 |         wvec = self.w_vector(ranking, worse_case)
114 |         lvec = self.l_vector(ranking, worse_case)
115 |         gain_vec = ranking.get_gain_vector(worse_case)
116 |         cost_vec = ranking.get_cost_vector(worse_case)
117 |         cum_gains = np.cumsum(gain_vec)
118 |         cum_costs = np.cumsum(cost_vec)
119 |         expected_utility = np.sum(np.dot(wvec, gain_vec))
120 |         expected_total_utility = np.sum(np.dot(lvec, cum_gains))
121 |         expected_cost = np.sum(np.dot(wvec, cost_vec))
122 |         expected_total_cost = np.sum(np.dot(lvec, cum_costs))
123 |         expected_items = 1.0 / wvec[0]
124 |         return expected_utility, expected_total_utility, expected_cost, expected_total_cost, expected_items
125 | 
126 |     def report(self):
127 |         if self.residuals:
128 |             print("{0}\t{1}\t{2:.4f}\t{3:.4f}\t{4:.4f}\t{5:.4f}\t{6:.4f}\t{7:.4f}\t{8:.4f}\t{9:.4f}\t{10:.4f}\t{11:.4f}".format(
129 |                 self.ranking.topic_id, self.name(), self.expected_utility, self.expected_total_utility,
130 |                 self.expected_cost, self.expected_total_cost, self.expected_items,
131 |                 self.residual_expected_utility, self.residual_expected_total_utility,
132 |                 self.residual_expected_cost, self.residual_expected_total_cost, self.residual_expected_items
133 |             ))
134 |         else:
135 |             print("{0}\t{1}\t{2:.4f}\t{3:.4f}\t{4:.4f}\t{5:.4f}\t{6:.4f}".format(
136 |                 self.ranking.topic_id, self.name(), self.expected_utility, self.expected_total_utility,
137 |                 self.expected_cost, self.expected_total_cost, self.expected_items,
138 |             ))
139 | 
140 |     def csv(self):
141 |         return ("{0},{1:.3f},{2:.3f},{3:.3f},{4:.3f},{5:.3f}".format(
142 |             self.name(), self.expected_utility, self.expected_total_utility, self.expected_cost,
143 |             self.expected_total_cost, self.expected_items))
144 | 
145 |     def get_scores(self):
146 |         """
147 |         :return: list with values of each measurement for the previously measured ranking
148 |         """
149 |         scores = [
150 |          self.expected_utility,
151 |          self.expected_total_utility,
152 |          self.expected_cost,
153 |          self.expected_total_cost,
154 |          self.expected_items]
155 |         return scores
156 | 
157 |     def _pad_vector(self, vec1, n, val):
158 |         """
159 |         Pads vector 1 up to size n, with the value val
160 |         :param vec1: np array
161 |         :param n: size of the desired array
162 |         :param val: the value to be inserted if padding is required
163 |         :return: the padded vector
164 |         """
165 |         if len(vec1) < n:
166 |             vec1 = np.pad(vec1, (0, n-len(vec1)), 'constant', constant_values=val)
167 |         return vec1
168 | 
169 |     def validate_gain_range(self, min_allowed_gain, max_allowed_gain, gain_vec):
170 |         """
171 |         Checks that the gain vector does not violate any metric assumptions
172 |         These assumptions (about the min or max gain) should be provided by
173 |         the calling metric class.
174 |         """
175 |         if np.min(gain_vec) < min_allowed_gain:
176 |             raise ValueError("Supplied gain values violate metric assumptions: Metric = {}.\n "
177 |                              "The minimum allowable gain for this metric is: {}.".format(self.name(), min_allowed_gain))
178 |         if np.max(gain_vec) > max_allowed_gain:
179 |             raise ValueError("Supplied gain values ({}) violate metric assumptions: Metric = {}.\n "
180 |                              "The maximum allowable gain for this "
181 |                              "metric is: {}.".format(np.max(gain_vec), self.name(), max_allowed_gain))
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_nerr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from cwl.ruler.measures.cwl_metrics import CWLMetric
  3 | 
  4 | '''
  5 | The suite of Not (but Nearly) ERR metrics (NERR) from the Azzopardi et al. in
  6 | ICTIR 2021 ("ERR is not C/W/L...").
  7 | 
  8 | There are four specific instances of NERR metrics which correspond to equations
  9 | presented in the aforementioned research paper: NERReq{8, 9, 10, 11}.
 10 | 
 11 | Note that NERReq8 and NERReq9 are designed to be truncated at k, whereas
 12 | NRReq10 runs to full depth according to the parameter phi (akin to RBP) and
 13 | NRReq11 runs to full depth according to the parameter T (akin to INST).
 14 | '''
 15 | 
 16 | 
 17 | # Option One (Equation 8)
 18 | class NERReq8CWLMetric(CWLMetric):
 19 | 
 20 |     # NERReq8 requires gains to be in range [0, 1]
 21 |     MINGAIN = 0.0
 22 |     MAXGAIN = 1.0
 23 | 
 24 |     def __init__(self, k):
 25 |         super().__init__()
 26 |         self.metric_name = "NERR-EQ8@k={0}".format(k)
 27 |         self.k = k
 28 |         self.bibtex = """
 29 |         @inproceedings{Azzopardi:2021:ECE:3471158.3472239,
 30 |         author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair},
 31 |         title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics},
 32 |         booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval},
 33 |         series = {ICTIR '21},
 34 |         location = {Virtual Event, Canada},
 35 |         url = {https://doi.org/10.1145/3471158.3472239},
 36 |         doi = {3471158.3472239},
 37 |         }
 38 |         """
 39 | 
 40 |     def c_vector(self, ranking, worse_case=True):
 41 |         gains = ranking.get_gain_vector(worse_case)
 42 |         self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains)
 43 |         cvec = np.zeros(len(gains))
 44 |         i = 0
 45 |         while i < len(gains) and i < self.k - 1: 
 46 |           cvec[i] = 1 - gains[i]
 47 |           i = i + 1
 48 |         return np.array(cvec)
 49 | 
 50 | 
 51 | # Option Two (Equation 9)
 52 | class NERReq9CWLMetric(CWLMetric):
 53 | 
 54 |     # NERReq9 requires gains to be in range [0, 1]
 55 |     MINGAIN = 0.0
 56 |     MAXGAIN = 1.0
 57 | 
 58 |     def __init__(self, k):
 59 |         super().__init__()
 60 |         self.metric_name = "NERR-EQ9@k={0}".format(k)
 61 |         self.k = k
 62 |         self.bibtex = """
 63 |         @inproceedings{Azzopardi:2021:ECE:3471158.3472239,
 64 |         author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair},
 65 |         title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics},
 66 |         booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval},
 67 |         series = {ICTIR '21},
 68 |         location = {Virtual Event, Canada},
 69 |         url = {https://doi.org/10.1145/3471158.3472239},
 70 |         doi = {3471158.3472239},
 71 |         }
 72 |         """
 73 | 
 74 |     def c_vector(self, ranking, worse_case=True):
 75 |         gains = ranking.get_gain_vector(worse_case)
 76 |         self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains)
 77 |         cvec = np.zeros(len(gains))
 78 |         i = 0
 79 |         while i < len(gains) and i < self.k - 1: 
 80 |           rank = i + 1
 81 |           cvec[i] = (1.0*rank/(rank+1.0)) * (1.0-gains[i])
 82 |           i = i + 1
 83 |         return np.array(cvec)
 84 | 
 85 | 
 86 | # Option Three (Equation 10)
 87 | class NERReq10CWLMetric(CWLMetric):
 88 | 
 89 |     # NERReq10 requires gains to be in range [0, 1]
 90 |     MINGAIN = 0.0
 91 |     MAXGAIN = 1.0
 92 | 
 93 |     def __init__(self, phi=0.9):
 94 |         super().__init__()
 95 |         self.metric_name = "NERR-EQ10@phi={0}".format(phi)
 96 |         self.phi = phi
 97 |         self.bibtex = """
 98 |         @inproceedings{Azzopardi:2021:ECE:3471158.3472239,
 99 |         author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair},
100 |         title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics},
101 |         booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval},
102 |         series = {ICTIR '21},
103 |         location = {Virtual Event, Canada},
104 |         url = {https://doi.org/10.1145/3471158.3472239},
105 |         doi = {3471158.3472239},
106 |         }
107 |         """
108 | 
109 |     def c_vector(self, ranking, worse_case=True):
110 |         gains = ranking.get_gain_vector(worse_case)
111 |         self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains)
112 |         cvec = np.zeros(len(gains))
113 |         i = 0
114 |         while i < len(gains):
115 |           cvec[i] = self.phi * (1 - gains[i])
116 |           i = i + 1
117 |         return np.array(cvec)
118 | 
119 | 
120 | # Option Four (Equation 11)
121 | class NERReq11CWLMetric(CWLMetric):
122 | 
123 |     # NERReq11 requires gains to be in range [0, 1]
124 |     MINGAIN = 0.0
125 |     MAXGAIN = 1.0
126 | 
127 |     def __init__(self, T=1.0):
128 |         super().__init__()
129 |         self.metric_name = "NERR-EQ11@T={0}".format(T)
130 |         self.T = T
131 |         self.bibtex = """
132 |         @inproceedings{Azzopardi:2021:ECE:3471158.3472239,
133 |         author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair},
134 |         title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics},
135 |         booktitle = {Proceedings of the 2021 ACM SIGIR on International Conference on Theory of Information Retrieval},
136 |         series = {ICTIR '21},
137 |         location = {Virtual Event, Canada},
138 |         url = {https://doi.org/10.1145/3471158.3472239},
139 |         doi = {3471158.3472239},
140 |         }
141 |         """
142 |  
143 |     def c_vector(self, ranking, worse_case=True):
144 |         gains = ranking.get_gain_vector(worse_case)
145 |         self.validate_gain_range(self.MINGAIN, self.MAXGAIN, gains)
146 |         cvec = np.zeros(len(gains))
147 |         i = 0
148 |         while i < len(gains):
149 |           rank = i + 1
150 |           cvec[i] = (((rank + (2.0 * self.T)-1.0) / (rank + (2.0 * self.T)))**2.0) * (1.0-gains[i])
151 |           i = i + 1
152 |         return np.array(cvec)
153 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_npv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 3 | 
 4 | """
 5 | An economic metric derived directly from computing the Net Present Value of a given list.
 6 | r is the rate by how much the user discounts the future interaction
 7 | 
 8 | Note that NPV is equivalent to RBP where theta = 1/(1+rate).
 9 | 
10 | This means that patience (theta) can be expressed 
11 | as a how much searchers discount the future value for an alternative perspective.
12 | 
13 | """
14 | 
15 | 
16 | class NPVCWLMetric(CWLMetric):
17 | 
18 |     def __init__(self, rate=0.1):
19 |         super().__init__()
20 |         self.metric_name = "NPV-r@{0}".format(rate)
21 |         self.rate = rate
22 |         self.bibtex = """
23 |         @inproceedings{azzopardi2019cwl,
24 |         author = {Azzopardi, Leif and Thomas, Paul and Moffat, Alistair}
25 |         title = {cwl\_eval: An Evaluation Tool for Information Retrieval},
26 |         booktitle = {Proc. of the 42nd International ACM SIGIR Conference},
27 |         series = {SIGIR '19},
28 |         year = {2019} 
29 |         }
30 |         """
31 | 
32 |     def name(self):
33 |         return "NPV-r@{0}".format(self.rate)
34 | 
35 |     def c_vector(self, ranking, worse_case=True):
36 |         gains = ranking.get_gain_vector(worse_case)
37 |         cvec = np.dot(np.ones(len(gains)), (1.0/(1.0+self.rate)))
38 |         return cvec
39 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_precision.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 3 | 
 4 | '''
 5 | (Graded) Precision at k, where k is assumed to be the number of items to be examined.
 6 | 
 7 | if the gains are set to (0 or 1) then binary precision is calculated,
 8 | if the gains are set to 0..1.0 then graded precision is calculated
 9 | 
10 | Note that CG@k / R@k and P@k are essentially related.
11 | where the EU/Doc is P@k, while the EU/Serp (ETU) is CG@k or R@k
12 | 
13 | Van Rijsbergen (and Salton) both mention calculating precision at k - though in the context of computing the PR curve.
14 | P@k was used much later more widely in the 1990s through TREC.
15 | '''
16 | 
17 | 
18 | class PrecisionCWLMetric(CWLMetric):
19 | 
20 |     def __init__(self, k=10):
21 |         super().__init__()
22 |         self.metric_name = "P@{0}".format(k)
23 |         self.k = k
24 |         self.bibtex = """
25 |         @misc{rijsbergen:1979:ir,
26 |         title={Information Retrieval.},
27 |         author={Van Rijsbergen, Cornelis J},
28 |         year={1979},
29 |         publisher={USA: Butterworth-Heinemann}
30 |         }
31 |         """
32 | 
33 |     def name(self):
34 |         return "P@{0}".format(self.k)
35 | 
36 |     def c_vector(self, ranking, worse_case=True):
37 |         cvec = np.ones(self.k-1)
38 |         cvec = self._pad_vector(cvec, ranking.n, 0.0)
39 |         return cvec
40 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_rbp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 3 | 
 4 | """
 5 | Rank Biased Precision by Moffat and Zobel
 6 | 
 7 | theta denotes the patience of a user - higher thetas means that they are more likely to continue down the ranked list
 8 | 
 9 | A very simple user model where theta is the continuation probability.
10 | 
11 | RBP is directly related to Net Present Value ( see cwl_npv.NPCWLmetric)
12 | and RBP is also related to Time Biased Gain (see cwl_tbg.TBGCWLMetric)
13 | 
14 | """
15 | 
16 | 
17 | class RBPCWLMetric(CWLMetric):
18 | 
19 |     def __init__(self, theta=0.9):
20 |         #CWLMetric.__init__(self)
21 |         super().__init__()
22 |         self.metric_name = "RBP@{0}".format(theta)
23 |         self.theta = theta
24 |         self.bibtex = """
25 |         @article{Moffat:2008:RPM:1416950.1416952,
26 |         author = {Moffat, Alistair and Zobel, Justin},
27 |         title = {Rank-biased Precision for Measurement of Retrieval Effectiveness},
28 |         journal = {ACM Trans. Inf. Syst.},
29 |         volume = {27},
30 |         number = {1},
31 |         year = {2008},
32 |         pages = {2:1--2:27},
33 |         articleno = {2},
34 |         numpages = {27},
35 |         url = {http://doi.acm.org/10.1145/1416950.1416952},
36 |         } 
37 |         """
38 | 
39 |     def name(self):
40 |         return "RBP@{0}".format(self.theta)
41 | 
42 |     def c_vector(self, ranking, worse_case=True):
43 |         cvec = np.dot(np.ones(ranking.n), self.theta)
44 |         return cvec
45 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_rr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 3 | 
 4 | """
 5 | Reciprocal Rank (RR) - From TREC-5 in 1996 by Kantor and Voorhees
 6 | """
 7 | 
 8 | 
 9 | class RRCWLMetric(CWLMetric):
10 | 
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.metric_name = "RR"
14 |         self.bibtex = """
15 |         @article{kantor2000trec,
16 |         title={The TREC-5 Confusion Track},
17 |         author={Kantor, Paul and Voorhees, Ellen},
18 |         journal={Information Retrieval},
19 |         volume={2},
20 |         number={2-3},
21 |         pages={165--176},
22 |         year={2000}
23 |         }
24 |         """
25 | 
26 |     def name(self):
27 |         return "RR"
28 | 
29 |     def c_vector(self, ranking, worse_case=True):
30 |         gains = ranking.get_gain_vector(worse_case)
31 |         cvec = np.zeros(len(gains))
32 |         i = 0
33 |         found_gain = False
34 |         while i < len(gains) and not found_gain:
35 |             if (gains[i] > 0):
36 |                 found_gain = True
37 |             else:
38 |                 cvec[i] = 1.0
39 |             i = i + 1
40 | 
41 |         return cvec
42 | 
43 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_set.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 4 | 
 5 | """
 6 | Search Economic Metric based on Azzopardi (2014)'s economic model of search.
 7 | 
 8 | Given the total gain function g(i) = i^beta
 9 | where i is the rank of the item, and beta controls the amount of discount. 
10 | There is no explicit reference to a relevance vector in the paper as it makes 
11 | an assumption about how much, on average, a user would get by going to the next rank.
12 | So essentially, there is an implicit assumption that each item provides one unit of gain.
13 | But here the implementation will use the same discounting scheme - but the observed relevance/gain vector.
14 | 
15 | Note that for each k, the expected total utility (ETU from CWL) @k = g(k) when all items are relevant.
16 | 
17 | 0 <= beta <= 1.0 - and is the amount of diminishing returns that the user experiences
18 | k = 1...n up to 1000 - is the cut-off which the user will stop.
19 | 
20 | note that when beta = 1.0 and k = k, then user model is equivalent to the P@k user model.
21 | 
22 | """
23 | 
24 | 
25 | class SETCWLMetric(CWLMetric):
26 | 
27 |     def __init__(self, beta=0.5, k=10):
28 |         super().__init__()
29 |         self.k = k
30 |         self.beta = beta
31 |         self.metric_name = self.name()
32 |         self.bibtex = """
33 |         @inproceedings{Azzopardi:2014:MIE:2600428.2609574,
34 |         author = {Azzopardi, Leif},
35 |         title = {Modelling Interaction with Economic Models of Search},
36 |         booktitle = {Proceedings of the 37th International ACM SIGIR Conference 
37 |                 on Research \&\#38; Development in Information Retrieval},
38 |         year = {2014},
39 |         location = {Gold Coast, Queensland, Australia},
40 |         pages = {3--12},
41 |         numpages = {10},
42 |         url = {http://doi.acm.org/10.1145/2600428.2609574},
43 |         } 
44 |         """
45 | 
46 |     def name(self):
47 |         return "SET-k@{0}-b@{1}".format(self.k, self.beta)
48 | 
49 |     def _weight(self, i):
50 |         return math.pow(i + 1, self.beta) - math.pow(i, self.beta)
51 | 
52 |     def c_vector(self, ranking, worse_case=True):
53 | 
54 |         cvec = []
55 |         for i in range(1, ranking.n + 1):
56 |             if i < self.k:
57 |                 cvec.append(self._weight(i+1)/self._weight(i))
58 |             else:
59 |                 cvec.append(0.0)
60 | 
61 |         cvec = np.array(cvec)
62 | 
63 |         return cvec
64 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_tbg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 4 | 
 5 | 
 6 | """
 7 | Time Biased Gain by Smucker and Clarke
 8 | 
 9 | H is the halflife which stipulates how quickly the gain decays over time
10 | 
11 | TBG is equivalent to RBP if the cost of items is equal.
12 | 
13 | Note in the formulation below the weight is normalized so that a probability vector is formed for W (i.e. it sums to one).
14 | I.e. the weights are re-scaled.
15 | 
16 | Also note that the costs vectors should pre-compute apriori the cost of each element, 
17 | and if no gain is assigned to duplicate/similar items, then qrel file used should be pre-processed to zero out duplicate 
18 | items see subsequently.
19 | 
20 | TODO(): Consider implementing duplicate sensitive qrel handler that would be duplicate aware.
21 | 
22 | """
23 | 
24 | class TBGCWLMetric(CWLMetric):
25 |     def __init__(self, halflife=224):
26 |         super().__init__()
27 |         self.metric_name = "TBG-H@{0} ".format(halflife)
28 |         self.halflife = halflife
29 |         self.bibtex = """
30 |         @inproceedings{Smucker:2012:TCE:2348283.2348300,
31 |         author = {Smucker, Mark D. and Clarke, Charles L.A.},
32 |         title = {Time-based Calibration of Effectiveness Measures},
33 |         booktitle = {Proceedings of the 35th International ACM SIGIR Conference
34 |          on Research and Development in Information Retrieval},
35 |         series = {SIGIR '12},
36 |         year = {2012},
37 |         location = {Portland, Oregon, USA},
38 |         pages = {95--104},
39 |         numpages = {10},
40 |         url = {http://doi.acm.org/10.1145/2348283.2348300},
41 |         } 
42 |         """
43 | 
44 |     def name(self):
45 |         return "TBG-H@{0} ".format(self.halflife)
46 | 
47 |     def c_vector(self, ranking, worse_case=True):
48 |         wvec = self.w_vector(ranking, worse_case)
49 |         cvec = []
50 |         for i in range(0, len(wvec)-1):
51 |             if wvec[i] > 0.0:
52 |                 cvec.append( wvec[i+1]/ wvec[i])
53 |             else:
54 |                 cvec.append(0.0)
55 | 
56 |         cvec.append(0.0)
57 |         cvec = np.array(cvec)
58 | 
59 |         return cvec
60 | 
61 |     def w_vector(self, ranking, worse_case=True):
62 |         costs = ranking.get_cost_vector(worse_case)
63 |         wvec = []
64 |         c_costs = np.cumsum(costs)
65 |         start = 0.0
66 | 
67 |         norm = self.integral_decay(0.0)
68 |         wvec.append(norm)
69 | 
70 |         for i in range(0, len(c_costs)-1):
71 |             weight_i = self.integral_decay(c_costs[i])
72 |             norm = norm + weight_i
73 |             wvec.append(weight_i)
74 | 
75 |         wvec = np.divide(np.array(wvec), norm)
76 |         return wvec
77 | 
78 |     def integral_decay(self, x):
79 |         h = self.halflife
80 |         return (h * (2.0 ** (-x/h))) / math.log(2.0, math.e)
81 | 


--------------------------------------------------------------------------------
/cwl/ruler/measures/cwl_umeasure.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from cwl.ruler.measures.cwl_metrics import CWLMetric
 4 | 
 5 | 
 6 | """
 7 | U-Measure by Saka and Dou (2013)
 8 | 
 9 | The metric assumes that as searchers read more and more text they are less likely to continue.
10 | 
11 | L expresses the half-life associated with reading text. A higher L means the searcher is more likely to continue.
12 | 
13 | The cost used should be expressed in characters - but as this is porportional to time - time could be used as well.
14 | Note that if costs are in terms of characters - then the EC and ETC will then be in units based on characters (obviously)
15 | 
16 | """
17 | 
18 | class UMeasureCWLMetric(CWLMetric):
19 |     def __init__(self, L=1000):
20 |         super().__init__()
21 |         self.metric_name = "U-L@{0} ".format(L)
22 |         self.L = L
23 |         self.bibtex = """
24 |         @inproceedings{Sakai:2013:SRR:2484028.2484031,
25 |         author = {Sakai, Tetsuya and Dou, Zhicheng},
26 |         title = {Summaries, Ranked Retrieval and Sessions: A Unified Framework for Information Access Evaluation}
27 |         booktitle = {Proceedings of the 36th International ACM SIGIR Conference on Research and Development in Information Retrieval},
28 |         series = {SIGIR '13},
29 |         year = {2013},
30 |         location = {Dublin, Ireland},
31 |         pages = {473--482},
32 |         numpages = {10},
33 |         url = {http://doi.acm.org/10.1145/2484028.2484031}
34 |         } 
35 |         """
36 | 
37 |     def name(self):
38 |         return "U-L@{0} ".format(self.L)
39 | 
40 |     def c_vector(self, ranking, worse_case=True):
41 |         wvec = self.w_vector(ranking, worse_case)
42 |         cvec = []
43 |         for i in range(0, len(wvec)-1):
44 |             if wvec[i] > 0.0:
45 |                 cvec.append(wvec[i+1] / wvec[i])
46 |             else:
47 |                 cvec.append(0.0)
48 | 
49 |         cvec.append(0.0)
50 |         cvec = np.array(cvec)
51 |         return cvec
52 | 
53 |     def w_vector(self, ranking, worse_case=True):
54 |         wvec = []
55 |         # to get the positions, cumulative sum the costs..
56 |         # costs are assumed to length of each document
57 |         costs = ranking.get_cost_vector(worse_case)
58 |         c_costs = np.cumsum(costs)
59 |         start = 0
60 |         norm = 0.0
61 |         for i in range(0, len(c_costs)-1):
62 |             weight_i = self.pos_decay(start)
63 |             start = c_costs[i]
64 |             wvec.append(weight_i)
65 |             norm = norm + weight_i
66 |         wvec.append(0.0)
67 | 
68 |         # now normalize the wvec to sum to one.
69 |         wvec = np.divide(np.array(wvec), norm)
70 |         return wvec
71 | 
72 | 
73 |     def pos_decay(self, pos):
74 |         return max(0.0, (1.0 - (pos / self.L)))
75 | 


--------------------------------------------------------------------------------
/cwl/ruler/ranking.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Ranking(object):
  5 | 
  6 |     def __init__(self, topic_id, gains, costs, max_gain=1.0, min_gain=0.0, max_cost=1.0, min_cost=1.0, max_n=1000):
  7 |         """
  8 |         The ranking object encapsulates the data about the items in the ranked list.
  9 |         The gains and costs vectors should only be accessed through the two getter methods
 10 |         as these will construct the list of gains and costs upto MAX_N and handle any unjudged items
 11 |         :param topic_id: a string to denote the topic
 12 |         :param gains: a vector of floats to represent the gain associated with each item in the list
 13 |         :param costs: a vector of floats to represent the cost of each item in the list
 14 |         :param max_gain: float that is greater than zero
 15 |         :param min_gain: float that is greater than zero
 16 |         :param max_cost: float that is greater than zero (and greater to or equal to min_cost)
 17 |         :param min_cost: float that is greater than zero (no free lunches)
 18 |         """
 19 |         self.topic_id = topic_id
 20 |         self._gains = gains
 21 |         self._costs = costs
 22 |         self.total_qrel_gain = 0.0
 23 |         self.total_qrel_rels = 0.0
 24 |         self.max_gain = max_gain
 25 |         self.min_gain = min_gain
 26 |         self.max_cost = max_cost
 27 |         self.min_cost = min_cost
 28 |         self.n = max_n
 29 |         # Calculates a lower bound on the total gain and total relevant items
 30 |         # For metrics like AP to be computed accurately, these values need to be
 31 |         # manually set after creating the ranking i.e. set w.r.t the QRELs file
 32 |         # As the QRELs file has all the KNOWN relevant items.
 33 |         for g in gains:
 34 |             if g > 0.0:
 35 |                 self.total_qrel_gain += g
 36 |                 self.total_qrel_rels += 1.0
 37 | 
 38 |     def get_gain_vector(self, worse_case=True):
 39 |         # pad out the vector to size n
 40 |         # convert all NaNs to min (worse case) or max (best case)
 41 |         if worse_case:
 42 |             gains = self._pad_trunc_vector(self._gains, self.n, self.min_gain)
 43 |             gains[np.isnan(gains)] = self.min_gain
 44 |             return gains
 45 |         else:
 46 |             gains = self._pad_trunc_vector(self._gains, self.n, self.max_gain)
 47 |             gains[np.isnan(gains)] = self.max_gain
 48 |             return gains
 49 | 
 50 |     def get_cost_vector(self, worse_case=True):
 51 |         # pad out the vector to size n
 52 |         # convert all NaNs to max (worse case) or min (best case)
 53 |         if worse_case:
 54 |             costs = self._pad_trunc_vector(self._costs, self.n, self.max_cost)
 55 |             costs[np.isnan(costs)] = self.max_cost
 56 |             return costs
 57 |         else:
 58 |             costs = self._pad_trunc_vector(self._costs, self.n, self.min_cost)
 59 |             costs[np.isnan(costs)] = self.min_cost
 60 |             return costs
 61 | 
 62 |     def get_total_gain(self, worse_case=True):
 63 |         if worse_case:
 64 |             return self.total_qrel_gain
 65 |         else:
 66 |             # return the max of self.total_qrel_gain
 67 |             return max(np.sum(self.get_gain_vector(worse_case)), self.total_qrel_gain)
 68 | 
 69 |     def get_total_cost(self, worse_case=True):
 70 |         return np.sum(self.get_cost_vector(worse_case))
 71 | 
 72 |     def get_total_rels(self, worse_case=True):
 73 |         if worse_case:
 74 |             return self.total_qrel_rels
 75 |         else:
 76 |             # return the max of self.total_qrel_rels
 77 |             gains = np.array(self.get_gain_vector(worse_case))
 78 |             # convert gain values to rel values
 79 |             gains[gains > 0.0] = 1.0
 80 |             return max(np.sum(gains), self.total_qrel_rels)
 81 | 
 82 |     def _pad_trunc_vector(self, vec1, n, val):
 83 |         """
 84 |         Pads vector 1 up to size n, with the value val
 85 |         :param vec1: np array
 86 |         :param n: size of the desired array
 87 |         :param val: the value to be inserted if padding is required
 88 |         :return: the padded vector
 89 |         """
 90 |         if len(vec1) < n:
 91 |             vec1 = np.pad(vec1, (0, n-len(vec1)), 'constant', constant_values=val)
 92 |         else:
 93 |             vec1 = vec1[0:n]
 94 |         return np.array(vec1)
 95 | 
 96 |     def report(self):
 97 |         if self.show_report:
 98 |             print("Topic: {0}".format(self.topic_id))
 99 |             print(self.topic_id, self.gains[:10])
100 |             print(self.topic_id, self.costs[:10])
101 | 
102 | 
103 | class RankingMaker(object):
104 |     """
105 |     This helper class builds Rankings
106 |     """
107 |     def __init__(self, topic_id, gain_handler, cost_dict=None, max_gain=1.0, min_gain=0.0, max_cost=1.0, min_cost=1.0, max_n=1000):
108 |         """
109 |         Iteratively builds up the ranked list of items (via the add function) then returns the final ranking
110 |         by calling get_ranking
111 |         :param topic_id: (string) represents the topic id - should match the topic id in the results file
112 |         :param gain_handler: seeker.trec_qrel_handler.TrecQrelHandler
113 |         :param cost_dict: a dictionary containing the element_type (key) and cost (float, value).
114 |         :param max_gain: if an item is unjudged, when worse_case=False, then set gain to max_gain
115 |         :param max_cost: if an item is unjudged, when worse_case=True, then set cost to max_cost
116 |         :param min_cost: if an item is unjudged, when worse_case=False, then set the cost to min_cost
117 |         """
118 |         self.topic_id = topic_id
119 |         self.gain_handler = gain_handler
120 |         self.cost_lookup = cost_dict
121 |         self.total_qrel_gain = 0.0
122 |         self.total_qrel_rels = 0.0
123 |         self._gains = []
124 |         self._costs = []
125 |         self.max_gain = max_gain
126 |         self.min_gain = min_gain
127 |         self.max_cost = max_cost
128 |         self.min_cost = min_cost
129 |         self.show_report = False
130 |         self.max_n = max_n
131 | 
132 |     def add(self, doc_id, element_type):
133 |         gain = self.gain_handler.get_value_if_exists(self.topic_id, doc_id)
134 |         # if the item is not judged, then insert a NaN value for the gain
135 |         # the Ranking object will resolve the NaN value as a min or max gain
136 |         if gain is None:
137 |             self._gains.append(np.nan)
138 |         else:
139 |             self._gains.append(gain)
140 | 
141 |         cost = self._get_cost(doc_id, element_type)
142 |         self._costs.append(cost)
143 | 
144 |     def _get_cost(self, doc_id, element_type):
145 |         """
146 |         For a given document and element type returns the cost given the cost dictionary (cost_lookup)
147 |         if no cost lookup exists or if the element is not in the dictionary, a nan value is assigned.
148 |         :param doc_id: string
149 |         :param element_type: string
150 |         :return: return a float or nan value
151 |         """
152 |         if self.cost_lookup is None:
153 |             return np.nan
154 |         else:
155 |             if element_type in self.cost_lookup:
156 |                 return self.cost_lookup[element_type]
157 |             else:
158 |                 return np.nan
159 | 
160 |     def get_ranking(self):
161 |         """
162 |         Creates and returns a Ranking given the gains and costs added to the ranked lists.
163 |         :return: ruler.ranking.Ranking
164 |         """
165 |         ranking = Ranking(self.topic_id, self._gains, self._costs, self.max_gain, self.min_gain, self.max_cost, self.min_cost, self.max_n)
166 |         ranking.total_qrel_rels = self.gain_handler.get_total_rels(self.topic_id)
167 |         ranking.total_qrel_gain = self.gain_handler.get_total_gains(self.topic_id)
168 |         return ranking
169 | 
170 |     def report(self):
171 |         if self.show_report:
172 |             print("Topic: {0}".format(self.topic_id))
173 |             print(self.topic_id, self.gains[:10])
174 |             print(self.topic_id, self.costs[:10])
175 | 


--------------------------------------------------------------------------------
/cwl/seeker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/seeker/__init__.py


--------------------------------------------------------------------------------
/cwl/seeker/common_helpers.py:
--------------------------------------------------------------------------------
 1 | # seekiir Framework - Common Files
 2 | # Helper Functions and Classes
 3 | 
 4 | def file_exists(filename):
 5 |     '''
 6 |     Helper function which returns a boolean value indicating if the file specified by string parameter filename exists.
 7 |     Solution from http://stackoverflow.com/questions/82831/how-do-i-check-if-a-file-exists-using-python
 8 |     '''
 9 |     try:
10 |         with open(filename) as f: pass
11 |         return True
12 |     except IOError:
13 |         return False
14 | 
15 | 
16 | 
17 | class AutoVivification(dict):
18 |     def __getitem__(self, item):
19 |         try:
20 |             return dict.__getitem__(self, item)
21 |         except KeyError:
22 |             value = self[item] = type(self)()
23 |             return value
24 | 


--------------------------------------------------------------------------------
/cwl/seeker/topic_document_file_handler.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from cwl.seeker.common_helpers import file_exists
  3 | from cwl.seeker.common_helpers import AutoVivification
  4 | 
  5 | 
  6 | class TopicDocumentFileHandler(object):
  7 |     def __init__(self, filename=None):
  8 |         self.data = AutoVivification()
  9 |         if filename:
 10 |             self.read_file(filename)
 11 | 
 12 | 
 13 |     def _put_in_line(self, line):
 14 |         # handles the specific format of the line (assumes 3 columns: topic document value )
 15 |         parts = line.partition(' ')
 16 |         topic = parts[0]
 17 |         parts = parts[2].partition(' ')
 18 |         doc = parts[0]
 19 |         value = parts[2].strip()
 20 |         self.put_value(topic, doc, value)
 21 | 
 22 |     def _get_out_line(self, topic, doc):
 23 |         # outputs the topic document and value in a specific way.
 24 |         return "%s %s %d\n" % (topic, doc, self.data[topic][doc])
 25 | 
 26 |     def read_file(self, filename):
 27 |         if file_exists(filename):
 28 |             infile = open(filename, "r")
 29 |             while infile:
 30 |                 line = infile.readline()
 31 |                 if not line:
 32 |                     infile.close()
 33 |                     break
 34 |                 else:
 35 |                     self._put_in_line(line)
 36 | 
 37 |     def save_file(self, filename, append=False):
 38 |         if append:
 39 |             outfile = open(filename, "a")
 40 |         else:
 41 |             outfile = open(filename, "w")
 42 | 
 43 |         for t in self.get_topic_list():
 44 |             for d in self.get_doc_list(t):
 45 |                 out_line = self._get_out_line(t, d)
 46 |                 outfile.write(out_line)
 47 | 
 48 |         outfile.close()
 49 | 
 50 |     def put_value(self, topic, doc, value):
 51 |         if topic and doc:
 52 |             self.data[topic][doc] = float(value)
 53 | 
 54 |     def get_value(self, topic, doc):
 55 |         if topic not in self.data:
 56 |             return 0.0
 57 |         
 58 |         if self.data[topic][doc]:
 59 |             return self.data[topic][doc]
 60 |         else:
 61 |             return 0.0
 62 |     
 63 |     def get_value_if_exists(self, topic, doc):
 64 |         if topic not in self.data.keys():
 65 |             return None
 66 | 
 67 |         if doc in self.data[topic].keys():
 68 |             return float(self.data[topic][doc])
 69 |         else:
 70 |             return None
 71 | 
 72 |     def get_doc_list(self, topic):
 73 |         if self.data[topic]:
 74 |             return self.data[topic]
 75 |         else:
 76 |             return []
 77 | 
 78 |     def get_topic_list(self):
 79 |         tl = []
 80 |         if self.data:
 81 |             for topic in self.data.keys():
 82 |                 tl.append(topic)
 83 | 
 84 |         return tl
 85 | 
 86 |     def get_topic_doc_dict(self):
 87 |         return self.data
 88 | 
 89 |     def add_topic_doc(self, topic, doc, value):
 90 |         self.data[topic][doc] = value
 91 | 
 92 |     def inc_topic_doc(self, topic, doc, value=1.0):
 93 |         if self.data[topic][doc]:
 94 |             self.data[topic][doc] = self.data[topic][doc] + value
 95 |         else:
 96 |             self.data[topic][doc] = value
 97 | 
 98 |     def __str__(self):
 99 |         return 'TOPICS READ IN: ' + str(len(self.data))
100 | 


--------------------------------------------------------------------------------
/cwl/seeker/trec_qrel_handler.py:
--------------------------------------------------------------------------------
 1 | from cwl.seeker.common_helpers import file_exists
 2 | from cwl.seeker.common_helpers import AutoVivification
 3 | from cwl.seeker.topic_document_file_handler import TopicDocumentFileHandler
 4 | 
 5 | 
 6 | class TrecQrelHandler(TopicDocumentFileHandler):
 7 | 
 8 |     def __init__(self, filename=None):
 9 |         super(TrecQrelHandler, self).__init__(filename)
10 | 
11 |     def _put_in_line(self, line):
12 |         """
13 |         For TREC QREL the Format is:
14 |             Topic Iteration Document Judgement
15 |             Iteration is not used.
16 |         """
17 |         parts = line.split()
18 |         topic = parts[0]
19 |         doc = parts[2].strip()
20 |         judgement = parts[3].strip()
21 |         self.put_value(topic, doc, judgement)
22 | 
23 |     def _get_out_line(self, topic, doc):
24 |         # outputs the topic document and value as the TREC QREL Format with iteration default to zero
25 |         return "%s 0 %s %d\n" % (topic, doc, self.data[topic][doc])
26 | 
27 |     def validate_gains(self, min_gain=0.0, max_gain=1.0):
28 |         """
29 |         Iterates all gains and checks to ensure they are below the value of
30 |         max_gain. 
31 |         """
32 |         all_gains = self.get_topic_doc_dict()
33 |         for topic_id in all_gains:
34 |             for gain in all_gains[topic_id].values():
35 |                 if gain > max_gain:
36 |                     raise ValueError("Detected a gain value ({})  greater than the maximum ({}).\n"
37 |                                      "Please check your input gain file".format(gain,max_gain))
38 |                 if gain < min_gain:
39 |                     raise ValueError("Detected a gain value ({}) less than minimum ({}).\n "
40 |                                      "Please check your input gain file.".format(gain,min_gain))
41 |  
42 |     def get_total_gains(self, topic):
43 | 
44 |         doc_list = self.get_doc_list(topic)
45 |         gain = 0.0
46 |         for doc in doc_list:
47 |             gain += self.get_value(topic, doc)
48 |         return gain
49 | 
50 |     def get_total_rels(self, topic):
51 |         doc_list = self.get_doc_list(topic)
52 |         rels = 0.0
53 |         for doc in doc_list:
54 |             if self.get_value(topic, doc) > 0.0:
55 |                 rels += 1.0
56 |         return rels
57 | 


--------------------------------------------------------------------------------
/cwl/seeker/trec_result_handler.py:
--------------------------------------------------------------------------------
 1 | from cwl.seeker.common_helpers import file_exists
 2 | from cwl.seeker.common_helpers import AutoVivification
 3 | from cwl.seeker.topic_document_file_handler import TopicDocumentFileHandler
 4 | 
 5 | 
 6 | def process_trec_line(line):
 7 |     # handles the specific format of the line - assumes 6 columns TREC Result format
 8 |     # topic QO document rank score EXP
 9 |     parts = line.partition(' ')
10 |     topic = parts[0]
11 |     parts = parts[2].partition(' ')
12 |     parts = parts[2].partition(' ')
13 |     docid = parts[0]
14 |     parts = parts[2].partition(' ')
15 |     rank = parts[0]
16 |     parts = parts[2].partition(' ')
17 |     score = parts[0]
18 | 
19 |     return (topic, docid, rank, score)
20 | 
21 | 
22 | class TrecResultHandler(TopicDocumentFileHandler):
23 | 
24 |     def __init__(self, filename=None):
25 |         super(TrecResultHandler, self).__init__(filename)
26 | 
27 |     def _put_in_line(self, line):
28 |         topic, docid, rank, score = process_trec_line(line)
29 |         self.put_value(topic, docid, score)
30 | 
31 |     def _get_out_line(self, topic, doc, rank, score):
32 |         # outputs in TREC Result format
33 |         return "{0} Q0 {1} {2} {3} EXP\n".format(topic, doc.strip(), rank, score)
34 | 
35 |     def get_score(self, topic, doc):
36 |         if self.data[topic][doc]:
37 |             return self.data[topic][doc][1]
38 |         else:
39 |             return 0.0
40 | 
41 |     def update_score(self, topic, doc, score):
42 |         if self.data[topic][doc]:
43 |             self.data[topic][doc][1] = score
44 |             return True
45 |         return False
46 | 
47 | 
48 |     def get_value(self, topic, doc):
49 |         if self.data[topic][doc]:
50 |             return self.data[topic][doc][0]
51 |         else:
52 |             return 0
53 | 
54 |     def get_rank(self, topic, doc):
55 |         return self.get_value(topic, doc)
56 | 
57 | 
58 |     def get_ranking(self, topic):
59 |         '''
60 |         Returns an ordered list of tuples (doc,rank, score)
61 |         '''
62 |         udl = self.get_doc_list(topic)
63 |         dl = []
64 |         for d in udl:
65 |             dl.append((d, self.get_score(topic,d)))
66 |         odl = sorted(dl, key=lambda doc: doc[1],reverse=True)
67 | 
68 |         return odl
69 | 
70 |     def save_file(self, filename, append=False):
71 |         ''' Saves the docs ordered by rank for each topic
72 |         '''
73 |         if append:
74 |             outfile = open(filename, "a")
75 |         else:
76 |             outfile = open(filename, "w")
77 | 
78 |         for t in self.get_topic_list():
79 |             odl = self.get_ranking(t)
80 |             rank = 1
81 |             for d in odl:
82 |                 out_line = self._get_out_line(t,d[0], rank, d[1])
83 |                 rank += 1
84 |                 outfile.write (out_line)
85 | 
86 |         outfile.close()
87 | 
88 |     def clear(self):
89 |         self.data = AutoVivification()
90 | 


--------------------------------------------------------------------------------
/cwl/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ireval/cwl/4c3ea6f282c2fe6246e22afd674293152f48dfb6/cwl/tests/__init__.py


--------------------------------------------------------------------------------
/cwl/tests/big_gain_file:
--------------------------------------------------------------------------------
 1 | T1   00  D1   2.0
 2 | T1   00  D2   0.0
 3 | T1   00  D3   2.0
 4 | T1   00  D4   1.0
 5 | T1   00  D7   2.0
 6 | T1   00  D8   0.0
 7 | T1   00  D9   1.0
 8 | T1   00  D10  0.0
 9 | T2   00  D1   0.2
10 | T2   00  D2   0.4
11 | T2   00  D5   1.0
12 | T2   00  D6   0.0
13 | T2   00  D7   0.2
14 | T2   00  D8   2.0
15 | T2   00  D9   0.4
16 | T2   00  D10  0.0
17 | T3   00  D4   0.0
18 | T3   00  D5   0.0
19 | T3   00  D6   0.8
20 | T3   00  D7   0.2
21 | T3   00  D8   0.4
22 | T3   00  D9   0.0
23 | T3   00  D10  2.0


--------------------------------------------------------------------------------
/cwl/tests/common_metric_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import sys
 3 | #sys.path.insert(0, '../')
 4 | 
 5 | from cwl.ruler.measures.cwl_precision import PrecisionCWLMetric
 6 | from cwl.ruler.ranking import Ranking
 7 | 
 8 | 
 9 | class TestPrecision(unittest.TestCase):
10 | 
11 |     def setUp(self):
12 |         self.p1 = PrecisionCWLMetric(k=1)
13 |         self.p5 = PrecisionCWLMetric(k=5)
14 | 
15 |     def test_patone_ranking1(self):
16 |         """
17 |         Test that Precision at one is correct for each ranking.
18 |         """
19 | 
20 |         ranking1 = Ranking("T1", [1], [1])
21 |         self.p1.measure(ranking1)
22 |         self.assertEqual(self.p1.expected_utility, 1.0)
23 |         self.assertEqual(self.p1.expected_total_utility, 1.0)
24 | 
25 | 
26 |     def test_patone_ranking2(self):
27 |         """
28 |         Test that Precision at one is correct for each ranking.
29 |         """
30 |         ranking2 = Ranking("T2", [1, 0], [1, 1])
31 |         self.p1.measure(ranking2)
32 |         self.assertEqual(self.p1.expected_utility, 1.0)
33 |         self.assertEqual(self.p1.expected_total_utility, 1.0)
34 | 
35 | 
36 |     def test_patone_ranking3(self):
37 |         """
38 |         Test that Precision at one is correct for each ranking.
39 |         """
40 |         ranking3 = Ranking("T3", [0, 1], [1, 1])
41 |         self.p1.measure(ranking3)
42 |         self.assertEqual(self.p1.expected_utility, 0.0)
43 |         self.assertEqual(self.p1.expected_total_utility, 0.0)
44 | 
45 | 
46 | 
47 |     def test_padding(self):
48 |         """
49 |         Test that Precision at one is correct for each ranking.
50 |         """
51 |         ranking3 = Ranking("T3", [0, 1], [1, 1])
52 |         self.p5.measure(ranking3)
53 |         self.assertEqual(self.p5.expected_utility, 0.2)
54 |         self.assertEqual(self.p5.expected_total_utility, 1.0)
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     unittest.main()


--------------------------------------------------------------------------------
/cwl/tests/cost_file:
--------------------------------------------------------------------------------
1 | E1  1.0
2 | E2  2.0
3 | E3  5.0
4 | E4  7.5
5 | 


--------------------------------------------------------------------------------
/cwl/tests/dcg_precision_metrics_file:
--------------------------------------------------------------------------------
1 | PrecisionCWLMetric(1)
2 | PrecisionCWLMetric(5)
3 | PrecisionCWLMetric(10)
4 | PrecisionCWLMetric(20)
5 | NDCGCWLMetric(1)
6 | NDCGCWLMetric(5)
7 | NDCGCWLMetric(10)
8 | NDCGCWLMetric(20)
9 | 


--------------------------------------------------------------------------------
/cwl/tests/gain_file:
--------------------------------------------------------------------------------
 1 | T1   00  D1   1.0
 2 | T1   00  D2   0.0
 3 | T1   00  D3   1.0
 4 | T1   00  D4   1.0
 5 | T1   00  D7   1.0
 6 | T1   00  D8   0.0
 7 | T1   00  D9   1.0
 8 | T1   00  D10  0.0
 9 | T2   00  D1   0.2
10 | T2   00  D2   0.4
11 | T2   00  D5   1.0
12 | T2   00  D6   0.0
13 | T2   00  D7   0.2
14 | T2   00  D8   1.0
15 | T2   00  D9   0.4
16 | T2   00  D10  0.0
17 | T3   00  D4   0.0
18 | T3   00  D5   0.0
19 | T3   00  D6   0.8
20 | T3   00  D7   0.2
21 | T3   00  D8   0.4
22 | T3   00  D9   0.0
23 | T3   00  D10  1.0


--------------------------------------------------------------------------------
/cwl/tests/metrics_file:
--------------------------------------------------------------------------------
 1 | PrecisionCWLMetric(1)
 2 | PrecisionCWLMetric(5)
 3 | PrecisionCWLMetric(10)
 4 | PrecisionCWLMetric(20)
 5 | RBPCWLMetric(0.9)
 6 | NDCGCWLMetric(10)
 7 | RRCWLMetric()
 8 | APCWLMetric()
 9 | INSTCWLMetric(1)
10 | INSQCWLMetric(1)
11 | BPMCWLMetric(1,1000)
12 | BPMCWLMetric(1000,10)
13 | BPMCWLMetric(1.2,10)
14 | BPMDCWLMetric(1,1000)
15 | BPMDCWLMetric(1000,10)
16 | BPMDCWLMetric(1.2,10)
17 | UMeasureCWLMetric(50)
18 | UMeasureCWLMetric(10)
19 | TBGCWLMetric(22)
20 | IFTGoalCWLMetric(2.0, 0.9, 1)
21 | IFTGoalCWLMetric(2.0, 0.9, 10)
22 | IFTGoalCWLMetric(2.0, 0.9, 100)
23 | IFTRateCWLMetric(0.2, 0.9, 1)
24 | IFTRateCWLMetric(0.2, 0.9, 10)
25 | IFTRateCWLMetric(0.2, 0.9, 100)
26 | IFTGoalRateCWLMetric(2.0,0.9,10, 0.2, 0.9, 10)
27 | IFTGoalRateCWLMetric(2.0,0.9,100, 0.2, 0.9, 100)


--------------------------------------------------------------------------------
/cwl/tests/neg_gain_file:
--------------------------------------------------------------------------------
 1 | T1   00  D1   1.0
 2 | T1   00  D2   -1.0
 3 | T1   00  D3   1.0
 4 | T1   00  D4   1.0
 5 | T1   00  D7   1.0
 6 | T1   00  D8   -1.0
 7 | T1   00  D9   1.0
 8 | T1   00  D10  -1.0
 9 | T2   00  D1   0.2
10 | T2   00  D2   0.4
11 | T2   00  D5   1.0
12 | T2   00  D6   0.0
13 | T2   00  D7   0.2
14 | T2   00  D8   1.0
15 | T2   00  D9   0.4
16 | T2   00  D10  0.0
17 | T3   00  D4   0.0
18 | T3   00  D5   -1.0
19 | T3   00  D6   0.8
20 | T3   00  D7   0.2
21 | T3   00  D8   0.4
22 | T3   00  D9   -1.0
23 | T3   00  D10  1.0


--------------------------------------------------------------------------------
/cwl/tests/precision_metrics:
--------------------------------------------------------------------------------
1 | PrecisionCWLMetric(1)
2 | PrecisionCWLMetric(2)
3 | PrecisionCWLMetric(3)
4 | PrecisionCWLMetric(4)
5 | PrecisionCWLMetric(5)
6 | PrecisionCWLMetric(10)
7 | 


--------------------------------------------------------------------------------
/cwl/tests/qrel_file:
--------------------------------------------------------------------------------
 1 | T1   00  D1   1
 2 | T1   00  D2   0
 3 | T1   00  D3   1
 4 | T1   00  D4   1
 5 | T1   00  D7   1
 6 | T1   00  D8   0
 7 | T1   00  D9   1
 8 | T1   00  D10  0
 9 | T2   00  D1   1
10 | T2   00  D2   1
11 | T2   00  D5   1
12 | T2   00  D6   0
13 | T2   00  D7   1
14 | T2   00  D8   1
15 | T2   00  D9   1
16 | T2   00  D10  0
17 | T3   00  D4   0
18 | T3   00  D5   0
19 | T3   00  D6   1
20 | T3   00  D7   1
21 | T3   00  D8   1
22 | T3   00  D9   0
23 | T3   00  D10  1
24 | 


--------------------------------------------------------------------------------
/cwl/tests/ranking_test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import sys
  3 | import numpy as np
  4 | sys.path.insert(0,'./')
  5 | 
  6 | from cwl.ruler.ranking import Ranking
  7 | from cwl.ruler.ranking import RankingMaker
  8 | from cwl.seeker.trec_qrel_handler import TrecQrelHandler
  9 | 
 10 | class TestRanking(unittest.TestCase):
 11 | 
 12 |     def setUp(self):
 13 |         self.ranking1 = Ranking("T1", [1., 0., 0.5, 1., 0.0], [1., 1., 1., 1., 1.])
 14 |         self.ranking2 = Ranking("T2", [1., np.nan, 0.5, 1., np.nan], [1., 1., 1., 1., 1.])
 15 |         self.ranking3 = Ranking("T3",
 16 |                                 [1., np.nan, 0.5, 1., np.nan, 0.0, 0.0, 0.0, 0.0, 0.0],
 17 |                                 [1., 1., 1., 1., 1., 2., 2., 2., 2., 2.],
 18 |                                 max_gain=2.0, max_cost=5.0)
 19 | 
 20 |         self.ranking4 = Ranking("T4",
 21 |                                 [1., np.nan, 0.5, 1., np.nan, 0.0, 0.0, 0.0, 0.0, 0.0],
 22 |                                 [],
 23 |                                 max_gain=2.0, max_cost=3.0, min_cost=2.0)
 24 | 
 25 |         self.ranking5 = Ranking("T5",
 26 |                                 [1., 0., 1., 1., 1., 0.0, 0.0, 0.0, 0.0, 0.0],
 27 |                                 [])
 28 | 
 29 |     def test_ranking1_total_rels(self):
 30 |         """
 31 |         Test whether the tail is filled with min gain (worse case), and max gain (best case)
 32 |         Assumes that MAX_N = 1000
 33 |         """
 34 |         min_total = self.ranking1.get_total_rels()
 35 |         max_total = self.ranking1.get_total_rels(worse_case=False)
 36 | 
 37 |         self.assertEqual(min_total, 3.0)
 38 |         self.assertEqual(max_total, 998.0)
 39 | 
 40 | 
 41 |     def test_ranking1_total_gain(self):
 42 |         """
 43 |         Test whether the tail is filled with min gain (worse case), and max gain (best case)
 44 |         """
 45 |         min_total = self.ranking1.get_total_gain()
 46 |         max_total = self.ranking1.get_total_gain(worse_case=False)
 47 |         # print(self.ranking1.get_gain_vector(worse_case=False))
 48 |         # print(max_total)
 49 |         self.assertEqual(min_total, 2.5)
 50 |         self.assertEqual(max_total, 997.5)
 51 | 
 52 |     def test_ranking2_total_rels(self):
 53 |         """
 54 |         Test whether the tail is filled with min gain (worse case), and max gain (best case)
 55 |         and that the np.nans are converted to min and max gain.
 56 |         """
 57 |         min_total = self.ranking2.get_total_rels()
 58 |         max_total = self.ranking2.get_total_rels(worse_case=False)
 59 |         # print(self.ranking1.get_gain_vector(worse_case=False))
 60 |         # print(max_total)
 61 |         self.assertEqual(min_total, 3.0)
 62 |         self.assertEqual(max_total, 1000.0)
 63 | 
 64 |     def test_ranking2_total_gain(self):
 65 |         """
 66 |         Test whether the tail is filled with min gain (worse case), and max gain (best case)
 67 |         and that the np.nans are converted to min and max gain.
 68 |         """
 69 |         min_total = self.ranking2.get_total_gain()
 70 |         max_total = self.ranking2.get_total_gain(worse_case=False)
 71 |         # print(self.ranking1.get_gain_vector(worse_case=False))
 72 |         # print(max_total)
 73 |         self.assertEqual(min_total, 2.5)
 74 |         self.assertEqual(max_total, 999.5)
 75 | 
 76 |     def test_ranking3_total_rels(self):
 77 |         """
 78 |         Test whether the tail is filled with min gain (worse case), and max gain (best case)
 79 |         and that the np.nans are converted to min and max gain.
 80 |         """
 81 |         min_total = self.ranking3.get_total_rels()
 82 |         max_total = self.ranking3.get_total_rels(worse_case=False)
 83 |         # print(self.ranking1.get_gain_vector(worse_case=False))
 84 |         # print(max_total)
 85 |         self.assertEqual(min_total, 3.0)
 86 |         self.assertEqual(max_total, 995.0)
 87 | 
 88 |     def test_ranking3_total_gain(self):
 89 |         """
 90 |         Test whether the tail is filled with min gain (worse case), and max gain (best case)
 91 |         and that the np.nans are converted to min and max gain.
 92 |         """
 93 |         min_total = self.ranking3.get_total_gain()
 94 |         max_total = self.ranking3.get_total_gain(worse_case=False)
 95 |         # print(self.ranking1.get_gain_vector(worse_case=False))
 96 |         # print(max_total)
 97 |         self.assertEqual(min_total, 2.5)
 98 |         self.assertEqual(max_total, 1986.5)
 99 | 
100 | 
101 |     def test_ranking3_total_cost(self):
102 |         """
103 |         Test whether the tail is filled with max_cost (worse case)
104 |         and min cost (best case)
105 |         """
106 |         max_total = np.sum(self.ranking3.get_cost_vector())
107 |         min_total = np.sum(self.ranking3.get_cost_vector(worse_case=False))
108 |         self.assertEqual(max_total, 4965.0)
109 |         self.assertEqual(min_total, 1005.0)
110 | 
111 |     def test_ranking4_total_cost_when_no_cost_vector_is_supplied(self):
112 |         """
113 |         Test whether the tail is filled with max_cost (worse case)
114 |         and min cost (best case)
115 |         Note this is the reverse
116 |         """
117 |         max_total = self.ranking4.get_total_cost()
118 |         min_total = self.ranking4.get_total_cost(worse_case=False)
119 |         self.assertEqual(max_total, 3000.0)
120 |         self.assertEqual(min_total, 2000.0)
121 | 
122 |     def test_ranking5_sum_over_top_ranks(self):
123 |         min_gains = self.ranking5.get_gain_vector()
124 |         max_gains = self.ranking5.get_gain_vector(worse_case=False)
125 |         # print(min_gains[0:5])
126 |         self.assertEqual(np.sum(min_gains[0:5]), 4)
127 |         self.assertEqual(np.sum(max_gains[0:5]), 4)
128 | 
129 | class TestRankingMaker(unittest.TestCase):
130 | 
131 |     def setUp(self):
132 |         gh = TrecQrelHandler("qrel_file")
133 |         gh.put_value("T1", "D1", 1.0)
134 |         gh.put_value("T1", "D2", 0.0)
135 |         gh.put_value("T1", "D3", 1.0)
136 |         gh.put_value("T1", "D4", 0.0)
137 |         gh.put_value("T1", "D5", 1.0)
138 |         gh.put_value("T1", "D6", 0.0)
139 |         gh.put_value("T1", "D7", 1.0)
140 |         gh.put_value("T1", "D8", 0.0)
141 |         gh.put_value("T1", "D9", 0.0)
142 |         gh.put_value("T1", "D10", 1.0)
143 | 
144 |         self.rm = RankingMaker(topic_id="T1", gain_handler=gh, cost_dict=None)
145 |         docs = ["D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10"]
146 |         for d in docs:
147 |             self.rm.add(d, "")
148 | 
149 |     def test_ranking(self):
150 |         ranking = self.rm.get_ranking()
151 |         #print(ranking.)
152 |         min_gains = ranking.get_gain_vector()
153 |         max_gains = ranking.get_gain_vector(worse_case=False)
154 |         #print(min_gains[0:20])
155 |         # print(np.cumsum(gains)[0:20])
156 |         # print(gains[0:10])
157 |         self.assertEqual(np.sum(min_gains[0:20]), 5.0)
158 |         self.assertEqual(np.sum(max_gains[0:20]), 15.0)
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     unittest.main()


--------------------------------------------------------------------------------
/cwl/tests/result_file:
--------------------------------------------------------------------------------
 1 | T1  E2  D1  1   4.3 R1
 2 | T1  E2  D2  2   4.2 R1
 3 | T1  E1  D3  3   4.1 R1
 4 | T1  E2  D4  4   3.9 R1
 5 | T1  E3  D5  5   3.8 R1
 6 | T1  E1  D6  6   3.7 R1
 7 | T1  E2  D7  7   3.6 R1
 8 | T1  E1  D8  8   3.5 R1
 9 | T1  E2  D9  9   3.4 R1
10 | T1  E3  D10  10  3.3 R1
11 | T2  E1  D1  1   4.3 R1
12 | T2  E1  D2  2   4.2 R1
13 | T2  E1  D3  3   4.1 R1
14 | T2  E2  D4  4   3.9 R1
15 | T2  E2  D5  5   3.8 R1
16 | T2  E1  D6  6   3.7 R1
17 | T2  E2  D7  7   3.6 R1
18 | T2  E1  D8  8   3.5 R1
19 | T2  E2  D9  9   3.4 R1
20 | T2  E3  D10  10  3.3 R1
21 | T3  E3  D1  1   4.3 R1
22 | T3  E2  D2  2   4.2 R1
23 | T3  E1  D3  3   4.1 R1
24 | T3  E2  D4  4   3.9 R1
25 | T3  E2  D5  5   3.8 R1
26 | T3  E1  D6  6   3.7 R1
27 | T3  E2  D7  7   3.6 R1
28 | T3  E1  D8  8   3.5 R1
29 | T3  E2  D9  9   3.4 R1
30 | T3  E3  D10  10  3.3 R1
31 | 


--------------------------------------------------------------------------------
/make-instructions.txt:
--------------------------------------------------------------------------------
 1 | Update verision number and any requirements in:
 2 | 
 3 | 	setup.py
 4 | 
 5 | Create the source distribution:
 6 | 
 7 | 	python setup.py sdist
 8 | 
 9 | Make sure twine is installed (pip install twine) and then do the upload:
10 | 
11 | 	twine upload dist/*
12 | 
13 | You will need your username and password for PyPi.
14 | 
15 | To see if the changes worked you can upgrade with:
16 | 
17 | 	pip install cwl-eval --upgrade
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/make-requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.4
 2 | bleach==3.3.0
 3 | certifi==2019.3.9
 4 | chardet==3.0.4
 5 | distlib==0.3.1
 6 | docutils==0.14
 7 | filelock==3.0.12
 8 | idna==2.8
 9 | numpy==1.23.4
10 | packaging==20.9
11 | pkginfo==1.5.0.1
12 | Pygments==2.7.4
13 | pyparsing==2.4.7
14 | readme-renderer==24.0
15 | requests==2.22.0
16 | requests-toolbelt==0.9.1
17 | six==1.12.0
18 | tqdm==4.32.1
19 | twine==1.13.0
20 | urllib3==1.26.5
21 | virtualenv==20.4.2
22 | webencodings==0.5.1
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.23.4
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open('README.md', 'r') as f:
 4 |     long_description = f.read()
 5 | 
 6 | setuptools.setup(
 7 |     name='cwl-eval',
 8 |     version='1.0.12',
 9 |     
10 |     scripts=['cwl-eval'],
11 |     
12 |     author='Leif Azzopardi, Paul Thomas, Alistair Moffat',
13 |     author_email='leifos@acm.org, pathom@microsoft.com, ammoffat@unimelb.edu.au',
14 |     
15 |     description='An information retrieval evaluation script based on the C/W/L framework '
16 |                 'that is TREC Compatible and provides a replacement for INST_EVAL, RBP_EVAL, '
17 |                 'TBG_EVAL, UMeasure and TREC_EVAL scripts. All measurements are reported in '
18 |                 'the same units making all metrics directly comparable.',
19 |     
20 |     long_description=long_description,
21 |     long_description_content_type='text/markdown',
22 |     
23 |     url='https://github.com/ireval/cwl',
24 | 
25 |     packages=setuptools.find_packages(),
26 |     
27 |     python_requires='>=3',
28 | 
29 |     install_requires=[
30 |         'numpy',
31 |     ],
32 |     
33 |     classifiers=[
34 |         'Intended Audience :: Science/Research',
35 |         'Programming Language :: Python :: 3 :: Only',
36 |         'Topic :: Scientific/Engineering :: Information Analysis',
37 |         'License :: OSI Approved :: MIT License',
38 |         'Development Status :: 3 - Alpha',
39 | 
40 |     ],
41 | 
42 | )
43 | 


--------------------------------------------------------------------------------