├── .gitignore ├── LICENSE ├── README.md ├── affiliation ├── __init__.py ├── _affiliation_zone.py ├── _integral_interval.py ├── _single_ground_truth_event.py ├── generics.py └── metrics.py ├── data ├── machinetemp_adversary.gz ├── machinetemp_greenhouse.gz ├── machinetemp_groundtruth.gz ├── machinetemp_lstmad.gz ├── machinetemp_luminol.gz ├── machinetemp_trivial.gz ├── nyctaxi_adversary.gz ├── nyctaxi_greenhouse.gz ├── nyctaxi_groundtruth.gz ├── nyctaxi_lstmad.gz ├── nyctaxi_luminol.gz ├── nyctaxi_trivial.gz ├── swat_adversary.gz ├── swat_groundtruth.gz ├── swat_iforest.gz ├── swat_ocsvm.gz ├── swat_seq2seq.gz ├── swat_trivial.gz ├── twitteraapl_adversary.gz ├── twitteraapl_greenhouse.gz ├── twitteraapl_groundtruth.gz ├── twitteraapl_lstmad.gz ├── twitteraapl_luminol.gz └── twitteraapl_trivial.gz ├── setup.py └── tests ├── __init__.py ├── test_affiliation_zone.py ├── test_data.py ├── test_generics.py ├── test_integral_interval.py ├── test_metrics.py └── test_single_ground_truth_event.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Alexis Huet and others 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # affiliation-metrics-py 2 | 3 | Python 3 implementation of the affiliation metrics and tests for reproducing the experiments described in *Local Evaluation of Time Series Anomaly Detection Algorithms*, accepted in KDD 2022 Research Track: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 4 | 5 | ### Installation 6 | 7 | Type `pip install .` to install the *affiliation* 8 | package. Only the [standard Python library](https://docs.python.org/3/library/index.html) is needed, there is no dependency to external libraries. 9 | 10 | ### Usage 11 | 12 | In a Python session, the following lines give an example for computing 13 | the affiliation metrics from prediction and ground truth vectors: 14 | 15 | ``` 16 | from affiliation.generics import convert_vector_to_events 17 | from affiliation.metrics import pr_from_events 18 | 19 | vector_pred = [0, 0, 0, 0, 1, 0, 0, 0, 1, 0] 20 | vector_gt = [0, 0, 0, 1, 0, 0, 0, 1, 1, 1] 21 | 22 | events_pred = convert_vector_to_events(vector_pred) # [(4, 5), (8, 9)] 23 | events_gt = convert_vector_to_events(vector_gt) # [(3, 4), (7, 10)] 24 | Trange = (0, len(vector_pred)) 25 | 26 | pr_from_events(events_pred, events_gt, Trange) 27 | ``` 28 | 29 | which gives as output: 30 | ``` 31 | {'precision': 0.82, 32 | 'recall': 0.84, 33 | 'individual_precision_probabilities': [0.63, 1.0], 34 | 'individual_recall_probabilities': [0.82, 0.87], 35 | 'individual_precision_distances': [0.5, 0.0], 36 | 'individual_recall_distances': [0.5, 0.33]} 37 | ``` 38 | 39 | ### Testing and reproducibility 40 | 41 | The unit tests can be run by typing: 42 | 43 | ``` 44 | python -m unittest discover 45 | ``` 46 | 47 | The results from the paper are also tested. 48 | The specific tests of the results are located at `tests/test_data.py` and tested 49 | against data located in the folder `data/`. 50 | -------------------------------------------------------------------------------- /affiliation/__init__.py: -------------------------------------------------------------------------------- 1 | from affiliation.metrics import pr_from_events 2 | -------------------------------------------------------------------------------- /affiliation/_affiliation_zone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from affiliation._integral_interval import interval_intersection 4 | 5 | def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)): 6 | """ 7 | Helper for `E_gt_func` 8 | 9 | :param j: index from 0 to len(Js) (included) on which to get the start 10 | :param Js: ground truth events, as a list of couples 11 | :param Trange: range of the series where Js is included 12 | :return: generalized start such that the middle of t_start and t_stop 13 | always gives the affiliation zone 14 | """ 15 | b = max(Trange) 16 | n = len(Js) 17 | if j == n: 18 | return(2*b - t_stop(n-1, Js, Trange)) 19 | else: 20 | return(Js[j][0]) 21 | 22 | def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)): 23 | """ 24 | Helper for `E_gt_func` 25 | 26 | :param j: index from 0 to len(Js) (included) on which to get the stop 27 | :param Js: ground truth events, as a list of couples 28 | :param Trange: range of the series where Js is included 29 | :return: generalized stop such that the middle of t_start and t_stop 30 | always gives the affiliation zone 31 | """ 32 | if j == -1: 33 | a = min(Trange) 34 | return(2*a - t_start(0, Js, Trange)) 35 | else: 36 | return(Js[j][1]) 37 | 38 | def E_gt_func(j, Js, Trange): 39 | """ 40 | Get the affiliation zone of element j of the ground truth 41 | 42 | :param j: index from 0 to len(Js) (excluded) on which to get the zone 43 | :param Js: ground truth events, as a list of couples 44 | :param Trange: range of the series where Js is included, can 45 | be (-math.inf, math.inf) for distance measures 46 | :return: affiliation zone of element j of the ground truth represented 47 | as a couple 48 | """ 49 | range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2 50 | range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2 51 | return((range_left, range_right)) 52 | 53 | def get_all_E_gt_func(Js, Trange): 54 | """ 55 | Get the affiliation partition from the ground truth point of view 56 | 57 | :param Js: ground truth events, as a list of couples 58 | :param Trange: range of the series where Js is included, can 59 | be (-math.inf, math.inf) for distance measures 60 | :return: affiliation partition of the events 61 | """ 62 | # E_gt is the limit of affiliation/attraction for each ground truth event 63 | E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))] 64 | return(E_gt) 65 | 66 | def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]): 67 | """ 68 | Cut the events into the affiliation zones 69 | The presentation given here is from the ground truth point of view, 70 | but it is also used in the reversed direction in the main function. 71 | 72 | :param Is: events as a list of couples 73 | :param E_gt: range of the affiliation zones 74 | :return: a list of list of intervals (each interval represented by either 75 | a couple or None for empty interval). The outer list is indexed by each 76 | affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`. 77 | """ 78 | out = [None] * len(E_gt) 79 | for j in range(len(E_gt)): 80 | E_gt_j = E_gt[j] 81 | discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E 82 | discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E 83 | kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)] 84 | Is_j = [x for x, y in zip(Is, kept_index)] 85 | out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j] 86 | return(out) 87 | -------------------------------------------------------------------------------- /affiliation/_integral_interval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import math 4 | from affiliation.generics import _sum_wo_nan 5 | """ 6 | In order to shorten the length of the variables, 7 | the general convention in this file is to let: 8 | - I for a predicted event (start, stop), 9 | - Is for a list of predicted events, 10 | - J for a ground truth event, 11 | - Js for a list of ground truth events. 12 | """ 13 | 14 | def interval_length(J = (1,2)): 15 | """ 16 | Length of an interval 17 | 18 | :param J: couple representating the start and stop of an interval, or None 19 | :return: length of the interval, and 0 for a None interval 20 | """ 21 | if J is None: 22 | return(0) 23 | return(J[1] - J[0]) 24 | 25 | def sum_interval_lengths(Is = [(1,2),(3,4),(5,6)]): 26 | """ 27 | Sum of length of the intervals 28 | 29 | :param Is: list of intervals represented by starts and stops 30 | :return: sum of the interval length 31 | """ 32 | return(sum([interval_length(I) for I in Is])) 33 | 34 | def interval_intersection(I = (1, 3), J = (2, 4)): 35 | """ 36 | Intersection between two intervals I and J 37 | I and J should be either empty or represent a positive interval (no point) 38 | 39 | :param I: an interval represented by start and stop 40 | :param J: a second interval of the same form 41 | :return: an interval representing the start and stop of the intersection (or None if empty) 42 | """ 43 | if I is None: 44 | return(None) 45 | if J is None: 46 | return(None) 47 | 48 | I_inter_J = (max(I[0], J[0]), min(I[1], J[1])) 49 | if I_inter_J[0] >= I_inter_J[1]: 50 | return(None) 51 | else: 52 | return(I_inter_J) 53 | 54 | def interval_subset(I = (1, 3), J = (0, 6)): 55 | """ 56 | Checks whether I is a subset of J 57 | 58 | :param I: an non empty interval represented by start and stop 59 | :param J: a second non empty interval of the same form 60 | :return: True if I is a subset of J 61 | """ 62 | if (I[0] >= J[0]) and (I[1] <= J[1]): 63 | return True 64 | else: 65 | return False 66 | 67 | def cut_into_three_func(I, J): 68 | """ 69 | Cut an interval I into a partition of 3 subsets: 70 | the elements before J, 71 | the elements belonging to J, 72 | and the elements after J 73 | 74 | :param I: an interval represented by start and stop, or None for an empty one 75 | :param J: a non empty interval 76 | :return: a triplet of three intervals, each represented by either (start, stop) or None 77 | """ 78 | if I is None: 79 | return((None, None, None)) 80 | 81 | I_inter_J = interval_intersection(I, J) 82 | if I == I_inter_J: 83 | I_before = None 84 | I_after = None 85 | elif I[1] <= J[0]: 86 | I_before = I 87 | I_after = None 88 | elif I[0] >= J[1]: 89 | I_before = None 90 | I_after = I 91 | elif (I[0] <= J[0]) and (I[1] >= J[1]): 92 | I_before = (I[0], I_inter_J[0]) 93 | I_after = (I_inter_J[1], I[1]) 94 | elif I[0] <= J[0]: 95 | I_before = (I[0], I_inter_J[0]) 96 | I_after = None 97 | elif I[1] >= J[1]: 98 | I_before = None 99 | I_after = (I_inter_J[1], I[1]) 100 | else: 101 | raise ValueError('unexpected unconsidered case') 102 | return(I_before, I_inter_J, I_after) 103 | 104 | def get_pivot_j(I, J): 105 | """ 106 | Get the single point of J that is the closest to I, called 'pivot' here, 107 | with the requirement that I should be outside J 108 | 109 | :param I: a non empty interval (start, stop) 110 | :param J: another non empty interval, with empty intersection with I 111 | :return: the element j of J that is the closest to I 112 | """ 113 | if interval_intersection(I, J) is not None: 114 | raise ValueError('I and J should have a void intersection') 115 | 116 | j_pivot = None # j_pivot is a border of J 117 | if max(I) <= min(J): 118 | j_pivot = min(J) 119 | elif min(I) >= max(J): 120 | j_pivot = max(J) 121 | else: 122 | raise ValueError('I should be outside J') 123 | return(j_pivot) 124 | 125 | def integral_mini_interval(I, J): 126 | """ 127 | In the specific case where interval I is located outside J, 128 | integral of distance from x to J over the interval x \in I. 129 | This is the *integral* i.e. the sum. 130 | It's not the mean (not divided by the length of I yet) 131 | 132 | :param I: a interval (start, stop), or None 133 | :param J: a non empty interval, with empty intersection with I 134 | :return: the integral of distances d(x, J) over x \in I 135 | """ 136 | if I is None: 137 | return(0) 138 | 139 | j_pivot = get_pivot_j(I, J) 140 | a = min(I) 141 | b = max(I) 142 | return((b-a)*abs((j_pivot - (a+b)/2))) 143 | 144 | def integral_interval_distance(I, J): 145 | """ 146 | For any non empty intervals I, J, compute the 147 | integral of distance from x to J over the interval x \in I. 148 | This is the *integral* i.e. the sum. 149 | It's not the mean (not divided by the length of I yet) 150 | The interval I can intersect J or not 151 | 152 | :param I: a interval (start, stop), or None 153 | :param J: a non empty interval 154 | :return: the integral of distances d(x, J) over x \in I 155 | """ 156 | # I and J are single intervals (not generic sets) 157 | # I is a predicted interval in the range of affiliation of J 158 | 159 | def f(I_cut): 160 | return(integral_mini_interval(I_cut, J)) 161 | # If I_middle is fully included into J, it is 162 | # the distance to J is always 0 163 | def f0(I_middle): 164 | return(0) 165 | 166 | cut_into_three = cut_into_three_func(I, J) 167 | # Distance for now, not the mean: 168 | # Distance left: Between cut_into_three[0] and the point min(J) 169 | d_left = f(cut_into_three[0]) 170 | # Distance middle: Between cut_into_three[1] = I inter J, and J 171 | d_middle = f0(cut_into_three[1]) 172 | # Distance right: Between cut_into_three[2] and the point max(J) 173 | d_right = f(cut_into_three[2]) 174 | # It's an integral so summable 175 | return(d_left + d_middle + d_right) 176 | 177 | def integral_mini_interval_P_CDFmethod__min_piece(I, J, E): 178 | """ 179 | Helper of `integral_mini_interval_Pprecision_CDFmethod` 180 | In the specific case where interval I is located outside J, 181 | compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with: 182 | - m the smallest distance from J to E, 183 | - d_min the smallest distance d(x, J) from x \in I to J 184 | - d_max the largest distance d(x, J) from x \in I to J 185 | 186 | :param I: a single predicted interval, a non empty interval (start, stop) 187 | :param J: ground truth interval, a non empty interval, with empty intersection with I 188 | :param E: the affiliation/influence zone for J, represented as a couple (start, stop) 189 | :return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$ 190 | """ 191 | if interval_intersection(I, J) is not None: 192 | raise ValueError('I and J should have a void intersection') 193 | if not interval_subset(J, E): 194 | raise ValueError('J should be included in E') 195 | if not interval_subset(I, E): 196 | raise ValueError('I should be included in E') 197 | 198 | e_min = min(E) 199 | j_min = min(J) 200 | j_max = max(J) 201 | e_max = max(E) 202 | i_min = min(I) 203 | i_max = max(I) 204 | 205 | d_min = max(i_min - j_max, j_min - i_max) 206 | d_max = max(i_max - j_max, j_min - i_min) 207 | m = min(j_min - e_min, e_max - j_max) 208 | A = min(d_max, m)**2 - min(d_min, m)**2 209 | B = max(d_max, m) - max(d_min, m) 210 | C = (1/2)*A + m*B 211 | return(C) 212 | 213 | def integral_mini_interval_Pprecision_CDFmethod(I, J, E): 214 | """ 215 | Integral of the probability of distances over the interval I. 216 | In the specific case where interval I is located outside J, 217 | compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$. 218 | This is the *integral* i.e. the sum (not the mean) 219 | 220 | :param I: a single predicted interval, a non empty interval (start, stop) 221 | :param J: ground truth interval, a non empty interval, with empty intersection with I 222 | :param E: the affiliation/influence zone for J, represented as a couple (start, stop) 223 | :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$ 224 | """ 225 | integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E) 226 | 227 | e_min = min(E) 228 | j_min = min(J) 229 | j_max = max(J) 230 | e_max = max(E) 231 | i_min = min(I) 232 | i_max = max(I) 233 | d_min = max(i_min - j_max, j_min - i_max) 234 | d_max = max(i_max - j_max, j_min - i_min) 235 | integral_linear_piece = (1/2)*(d_max**2 - d_min**2) 236 | integral_remaining_piece = (j_max - j_min)*(i_max - i_min) 237 | 238 | DeltaI = i_max - i_min 239 | DeltaE = e_max - e_min 240 | 241 | output = DeltaI - (1/DeltaE)*(integral_min_piece + integral_linear_piece + integral_remaining_piece) 242 | return(output) 243 | 244 | def integral_interval_probaCDF_precision(I, J, E): 245 | """ 246 | Integral of the probability of distances over the interval I. 247 | Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$. 248 | This is the *integral* i.e. the sum (not the mean) 249 | 250 | :param I: a single (non empty) predicted interval in the zone of affiliation of J 251 | :param J: ground truth interval 252 | :param E: affiliation/influence zone for J 253 | :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$ 254 | """ 255 | # I and J are single intervals (not generic sets) 256 | def f(I_cut): 257 | if I_cut is None: 258 | return(0) 259 | else: 260 | return(integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E)) 261 | 262 | # If I_middle is fully included into J, it is 263 | # integral of 1 on the interval I_middle, so it's |I_middle| 264 | def f0(I_middle): 265 | if I_middle is None: 266 | return(0) 267 | else: 268 | return(max(I_middle) - min(I_middle)) 269 | 270 | cut_into_three = cut_into_three_func(I, J) 271 | # Distance for now, not the mean: 272 | # Distance left: Between cut_into_three[0] and the point min(J) 273 | d_left = f(cut_into_three[0]) 274 | # Distance middle: Between cut_into_three[1] = I inter J, and J 275 | d_middle = f0(cut_into_three[1]) 276 | # Distance right: Between cut_into_three[2] and the point max(J) 277 | d_right = f(cut_into_three[2]) 278 | # It's an integral so summable 279 | return(d_left + d_middle + d_right) 280 | 281 | def cut_J_based_on_mean_func(J, e_mean): 282 | """ 283 | Helper function for the recall. 284 | Partition J into two intervals: before and after e_mean 285 | (e_mean represents the center element of E the zone of affiliation) 286 | 287 | :param J: ground truth interval 288 | :param e_mean: a float number (center value of E) 289 | :return: a couple partitionning J into (J_before, J_after) 290 | """ 291 | if J is None: 292 | J_before = None 293 | J_after = None 294 | elif e_mean >= max(J): 295 | J_before = J 296 | J_after = None 297 | elif e_mean <= min(J): 298 | J_before = None 299 | J_after = J 300 | else: # e_mean is across J 301 | J_before = (min(J), e_mean) 302 | J_after = (e_mean, max(J)) 303 | 304 | return((J_before, J_after)) 305 | 306 | def integral_mini_interval_Precall_CDFmethod(I, J, E): 307 | """ 308 | Integral of the probability of distances over the interval J. 309 | In the specific case where interval J is located outside I, 310 | compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$. 311 | This is the *integral* i.e. the sum (not the mean) 312 | 313 | :param I: a single (non empty) predicted interval 314 | :param J: ground truth (non empty) interval, with empty intersection with I 315 | :param E: the affiliation/influence zone for J, represented as a couple (start, stop) 316 | :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$ 317 | """ 318 | # The interval J should be located outside I 319 | # (so it's either the left piece or the right piece w.r.t I) 320 | i_pivot = get_pivot_j(J, I) 321 | e_min = min(E) 322 | e_max = max(E) 323 | e_mean = (e_min + e_max) / 2 324 | 325 | # If i_pivot is outside E (it's possible), then 326 | # the distance is worst that any random element within E, 327 | # so we set the recall to 0 328 | if i_pivot <= min(E): 329 | return(0) 330 | elif i_pivot >= max(E): 331 | return(0) 332 | # Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d 333 | 334 | cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean) 335 | J_before = cut_J_based_on_e_mean[0] 336 | J_after = cut_J_based_on_e_mean[1] 337 | 338 | iemin_mean = (e_min + i_pivot)/2 339 | cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean) 340 | J_before_closeE = cut_Jbefore_based_on_iemin_mean[0] # before e_mean and closer to e_min than i_pivot ~ J_before_before 341 | J_before_closeI = cut_Jbefore_based_on_iemin_mean[1] # before e_mean and closer to i_pivot than e_min ~ J_before_after 342 | 343 | iemax_mean = (e_max + i_pivot)/2 344 | cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean) 345 | J_after_closeI = cut_Jafter_based_on_iemax_mean[0] # after e_mean and closer to i_pivot than e_max ~ J_after_before 346 | J_after_closeE = cut_Jafter_based_on_iemax_mean[1] # after e_mean and closer to e_max than i_pivot ~ J_after_after 347 | 348 | if J_before_closeE is not None: 349 | j_before_before_min = min(J_before_closeE) # == min(J) 350 | j_before_before_max = max(J_before_closeE) 351 | else: 352 | j_before_before_min = math.nan 353 | j_before_before_max = math.nan 354 | 355 | if J_before_closeI is not None: 356 | j_before_after_min = min(J_before_closeI) # == j_before_before_max if existing 357 | j_before_after_max = max(J_before_closeI) # == max(J_before) 358 | else: 359 | j_before_after_min = math.nan 360 | j_before_after_max = math.nan 361 | 362 | if J_after_closeI is not None: 363 | j_after_before_min = min(J_after_closeI) # == min(J_after) 364 | j_after_before_max = max(J_after_closeI) 365 | else: 366 | j_after_before_min = math.nan 367 | j_after_before_max = math.nan 368 | 369 | if J_after_closeE is not None: 370 | j_after_after_min = min(J_after_closeE) # == j_after_before_max if existing 371 | j_after_after_max = max(J_after_closeE) # == max(J) 372 | else: 373 | j_after_after_min = math.nan 374 | j_after_after_max = math.nan 375 | 376 | # <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE --> 377 | # j_bb_min j_bb_max j_ba_min j_ba_max j_ab_min j_ab_max j_aa_min j_aa_max 378 | # (with `b` for before and `a` for after in the previous variable names) 379 | 380 | # vs e_mean m = min(t-e_min, e_max-t) d=|i_pivot-t| min(d,m) \int min(d,m)dt \int d dt \int_(min(d,m)+d)dt \int_{t \in J}(min(d,m)+d)dt 381 | # Case J_before_closeE & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = t-e_min t^2/2-e_min*t i_pivot*t-t^2/2 t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA) 382 | # Case J_before_closeI & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2) 383 | # Case J_after_closeI & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2) 384 | # Case J_after_closeE & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = e_max-t e_max*t-t^2/2 i_pivot*t-t^2/2 e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2 (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) 385 | # 386 | # Case J_before_closeE & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-e_min t^2/2-e_min*t t^2/2-i_pivot*t t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) 387 | # Case J_before_closeI & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA) 388 | # Case J_after_closeI & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA) 389 | # Case J_after_closeE & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = e_max-t e_max*t-t^2/2 t^2/2-i_pivot*t e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA) 390 | 391 | if i_pivot >= max(J): 392 | part1_before_closeE = (i_pivot-e_min)*(j_before_before_max - j_before_before_min) # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min 393 | part2_before_closeI = 2*i_pivot*(j_before_after_max-j_before_after_min) - (j_before_after_max**2 - j_before_after_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min 394 | part3_after_closeI = 2*i_pivot*(j_after_before_max-j_after_before_min) - (j_after_before_max**2 - j_after_before_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min 395 | part4_after_closeE = (e_max+i_pivot)*(j_after_after_max-j_after_after_min) - (j_after_after_max**2 - j_after_after_min**2) # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min 396 | out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE] 397 | elif i_pivot <= min(J): 398 | part1_before_closeE = (j_before_before_max**2 - j_before_before_min**2) - (e_min+i_pivot)*(j_before_before_max-j_before_before_min) # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min 399 | part2_before_closeI = (j_before_after_max**2 - j_before_after_min**2) - 2*i_pivot*(j_before_after_max-j_before_after_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min 400 | part3_after_closeI = (j_after_before_max**2 - j_after_before_min**2) - 2*i_pivot*(j_after_before_max - j_after_before_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min 401 | part4_after_closeE = (e_max-i_pivot)*(j_after_after_max - j_after_after_min) # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min 402 | out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE] 403 | else: 404 | raise ValueError('The i_pivot should be outside J') 405 | 406 | out_integral_min_dm_plus_d = _sum_wo_nan(out_parts) # integral on all J, i.e. sum of the disjoint parts 407 | 408 | # We have for each point t of J: 409 | # \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d) 410 | # Since t is a single-point here, and we are in the case where i_pivot is inside E. 411 | # The integral is then given by: 412 | # C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt 413 | # = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt 414 | # = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt] 415 | # = |J| - (1/|E|) * out_integral_min_dm_plus_d 416 | DeltaJ = max(J) - min(J) 417 | DeltaE = max(E) - min(E) 418 | C = DeltaJ - (1/DeltaE) * out_integral_min_dm_plus_d 419 | 420 | return(C) 421 | 422 | def integral_interval_probaCDF_recall(I, J, E): 423 | """ 424 | Integral of the probability of distances over the interval J. 425 | Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$. 426 | This is the *integral* i.e. the sum (not the mean) 427 | 428 | :param I: a single (non empty) predicted interval 429 | :param J: ground truth (non empty) interval 430 | :param E: the affiliation/influence zone for J 431 | :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$ 432 | """ 433 | # I and J are single intervals (not generic sets) 434 | # E is the outside affiliation interval of J (even for recall!) 435 | # (in particular J \subset E) 436 | # 437 | # J is the portion of the ground truth affiliated to I 438 | # I is a predicted interval (can be outside E possibly since it's recall) 439 | def f(J_cut): 440 | if J_cut is None: 441 | return(0) 442 | else: 443 | return integral_mini_interval_Precall_CDFmethod(I, J_cut, E) 444 | 445 | # If J_middle is fully included into I, it is 446 | # integral of 1 on the interval J_middle, so it's |J_middle| 447 | def f0(J_middle): 448 | if J_middle is None: 449 | return(0) 450 | else: 451 | return(max(J_middle) - min(J_middle)) 452 | 453 | cut_into_three = cut_into_three_func(J, I) # it's J that we cut into 3, depending on the position w.r.t I 454 | # since we integrate over J this time. 455 | # 456 | # Distance for now, not the mean: 457 | # Distance left: Between cut_into_three[0] and the point min(I) 458 | d_left = f(cut_into_three[0]) 459 | # Distance middle: Between cut_into_three[1] = J inter I, and I 460 | d_middle = f0(cut_into_three[1]) 461 | # Distance right: Between cut_into_three[2] and the point max(I) 462 | d_right = f(cut_into_three[2]) 463 | # It's an integral so summable 464 | return(d_left + d_middle + d_right) 465 | -------------------------------------------------------------------------------- /affiliation/_single_ground_truth_event.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import math 4 | from affiliation._affiliation_zone import ( 5 | get_all_E_gt_func, 6 | affiliation_partition) 7 | from affiliation._integral_interval import ( 8 | integral_interval_distance, 9 | integral_interval_probaCDF_precision, 10 | integral_interval_probaCDF_recall, 11 | interval_length, 12 | sum_interval_lengths) 13 | 14 | def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)): 15 | """ 16 | Compute the individual average distance from Is to a single ground truth J 17 | 18 | :param Is: list of predicted events within the affiliation zone of J 19 | :param J: couple representating the start and stop of a ground truth interval 20 | :return: individual average precision directed distance number 21 | """ 22 | if all([I is None for I in Is]): # no prediction in the current area 23 | return(math.nan) # undefined 24 | return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is)) 25 | 26 | def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)): 27 | """ 28 | Compute the individual precision probability from Is to a single ground truth J 29 | 30 | :param Is: list of predicted events within the affiliation zone of J 31 | :param J: couple representating the start and stop of a ground truth interval 32 | :param E: couple representing the start and stop of the zone of affiliation of J 33 | :return: individual precision probability in [0, 1], or math.nan if undefined 34 | """ 35 | if all([I is None for I in Is]): # no prediction in the current area 36 | return(math.nan) # undefined 37 | return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is)) 38 | 39 | def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)): 40 | """ 41 | Compute the individual average distance from a single J to the predictions Is 42 | 43 | :param Is: list of predicted events within the affiliation zone of J 44 | :param J: couple representating the start and stop of a ground truth interval 45 | :return: individual average recall directed distance number 46 | """ 47 | Is = [I for I in Is if I is not None] # filter possible None in Is 48 | if len(Is) == 0: # there is no prediction in the current area 49 | return(math.inf) 50 | E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf)) # here from the point of view of the predictions 51 | Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is 52 | return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J)) 53 | 54 | def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)): 55 | """ 56 | Compute the individual recall probability from a single ground truth J to Is 57 | 58 | :param Is: list of predicted events within the affiliation zone of J 59 | :param J: couple representating the start and stop of a ground truth interval 60 | :param E: couple representing the start and stop of the zone of affiliation of J 61 | :return: individual recall probability in [0, 1] 62 | """ 63 | Is = [I for I in Is if I is not None] # filter possible None in Is 64 | if len(Is) == 0: # there is no prediction in the current area 65 | return(0) 66 | E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions 67 | Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is 68 | return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J)) 69 | -------------------------------------------------------------------------------- /affiliation/generics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from itertools import groupby 4 | from operator import itemgetter 5 | import math 6 | import gzip 7 | import glob 8 | import os 9 | 10 | def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]): 11 | """ 12 | Convert a binary vector (indicating 1 for the anomalous instances) 13 | to a list of events. The events are considered as durations, 14 | i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1). 15 | 16 | :param vector: a list of elements belonging to {0, 1} 17 | :return: a list of couples, each couple representing the start and stop of 18 | each event 19 | """ 20 | positive_indexes = [idx for idx, val in enumerate(vector) if val > 0] 21 | events = [] 22 | for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]): 23 | cur_cut = list(map(itemgetter(1), g)) 24 | events.append((cur_cut[0], cur_cut[-1])) 25 | 26 | # Consistent conversion in case of range anomalies (for indexes): 27 | # A positive index i is considered as the interval [i, i+1), 28 | # so the last index should be moved by 1 29 | events = [(x, y+1) for (x,y) in events] 30 | 31 | return(events) 32 | 33 | def infer_Trange(events_pred, events_gt): 34 | """ 35 | Given the list of events events_pred and events_gt, get the 36 | smallest possible Trange corresponding to the start and stop indexes 37 | of the whole series. 38 | Trange will not influence the measure of distances, but will impact the 39 | measures of probabilities. 40 | 41 | :param events_pred: a list of couples corresponding to predicted events 42 | :param events_gt: a list of couples corresponding to ground truth events 43 | :return: a couple corresponding to the smallest range containing the events 44 | """ 45 | if len(events_gt) == 0: 46 | raise ValueError('The gt events should contain at least one event') 47 | if len(events_pred) == 0: 48 | # empty prediction, base Trange only on events_gt (which is non empty) 49 | return(infer_Trange(events_gt, events_gt)) 50 | 51 | min_pred = min([x[0] for x in events_pred]) 52 | min_gt = min([x[0] for x in events_gt]) 53 | max_pred = max([x[1] for x in events_pred]) 54 | max_gt = max([x[1] for x in events_gt]) 55 | Trange = (min(min_pred, min_gt), max(max_pred, max_gt)) 56 | return(Trange) 57 | 58 | def has_point_anomalies(events): 59 | """ 60 | Checking whether events contain point anomalies, i.e. 61 | events starting and stopping at the same time. 62 | 63 | :param events: a list of couples corresponding to predicted events 64 | :return: True is the events have any point anomalies, False otherwise 65 | """ 66 | if len(events) == 0: 67 | return(False) 68 | return(min([x[1] - x[0] for x in events]) == 0) 69 | 70 | def _sum_wo_nan(vec): 71 | """ 72 | Sum of elements, ignoring math.isnan ones 73 | 74 | :param vec: vector of floating numbers 75 | :return: sum of the elements, ignoring math.isnan ones 76 | """ 77 | vec_wo_nan = [e for e in vec if not math.isnan(e)] 78 | return(sum(vec_wo_nan)) 79 | 80 | def _len_wo_nan(vec): 81 | """ 82 | Count of elements, ignoring math.isnan ones 83 | 84 | :param vec: vector of floating numbers 85 | :return: count of the elements, ignoring math.isnan ones 86 | """ 87 | vec_wo_nan = [e for e in vec if not math.isnan(e)] 88 | return(len(vec_wo_nan)) 89 | 90 | def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'): 91 | """ 92 | Load a file compressed with gz, such that each line of the 93 | file is either 0 (representing a normal instance) or 1 (representing) 94 | an anomalous instance. 95 | :param filename: file path to the gz compressed file 96 | :return: list of integers with either 0 or 1 97 | """ 98 | with gzip.open(filename, 'rb') as f: 99 | content = f.read().splitlines() 100 | content = [int(x) for x in content] 101 | return(content) 102 | 103 | def read_all_as_events(): 104 | """ 105 | Load the files contained in the folder `data/` and convert 106 | to events. The length of the series is kept. 107 | The convention for the file name is: `dataset_algorithm.gz` 108 | :return: two dictionaries: 109 | - the first containing the list of events for each dataset and algorithm, 110 | - the second containing the range of the series for each dataset 111 | """ 112 | filepaths = glob.glob('data/*.gz') 113 | datasets = dict() 114 | Tranges = dict() 115 | for filepath in filepaths: 116 | vector = read_gz_data(filepath) 117 | events = convert_vector_to_events(vector) 118 | # ad hoc cut for those files 119 | cut_filepath = (os.path.split(filepath)[1]).split('_') 120 | data_name = cut_filepath[0] 121 | algo_name = (cut_filepath[1]).split('.')[0] 122 | if not data_name in datasets: 123 | datasets[data_name] = dict() 124 | Tranges[data_name] = (0, len(vector)) 125 | datasets[data_name][algo_name] = events 126 | return(datasets, Tranges) 127 | 128 | def f1_func(p, r): 129 | """ 130 | Compute the f1 function 131 | :param p: precision numeric value 132 | :param r: recall numeric value 133 | :return: f1 numeric value 134 | """ 135 | return(2*p*r/(p+r)) 136 | -------------------------------------------------------------------------------- /affiliation/metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from affiliation.generics import ( 4 | infer_Trange, 5 | has_point_anomalies, 6 | _len_wo_nan, 7 | _sum_wo_nan, 8 | read_all_as_events) 9 | from affiliation._affiliation_zone import ( 10 | get_all_E_gt_func, 11 | affiliation_partition) 12 | from affiliation._single_ground_truth_event import ( 13 | affiliation_precision_distance, 14 | affiliation_recall_distance, 15 | affiliation_precision_proba, 16 | affiliation_recall_proba) 17 | 18 | def test_events(events): 19 | """ 20 | Verify the validity of the input events 21 | :param events: list of events, each represented by a couple (start, stop) 22 | :return: None. Raise an error for incorrect formed or non ordered events 23 | """ 24 | if type(events) is not list: 25 | raise TypeError('Input `events` should be a list of couples') 26 | if not all([type(x) is tuple for x in events]): 27 | raise TypeError('Input `events` should be a list of tuples') 28 | if not all([len(x) == 2 for x in events]): 29 | raise ValueError('Input `events` should be a list of couples (start, stop)') 30 | if not all([x[0] <= x[1] for x in events]): 31 | raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop') 32 | if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]): 33 | raise ValueError('Couples of input `events` should be disjoint and ordered') 34 | 35 | def pr_from_events(events_pred, events_gt, Trange): 36 | """ 37 | Compute the affiliation metrics including the precision/recall in [0,1], 38 | along with the individual precision/recall distances and probabilities 39 | 40 | :param events_pred: list of predicted events, each represented by a couple 41 | indicating the start and the stop of the event 42 | :param events_gt: list of ground truth events, each represented by a couple 43 | indicating the start and the stop of the event 44 | :param Trange: range of the series where events_pred and events_gt are included, 45 | represented as a couple (start, stop) 46 | :return: dictionary with precision, recall, and the individual metrics 47 | """ 48 | # testing the inputs 49 | test_events(events_pred) 50 | test_events(events_gt) 51 | 52 | # other tests 53 | minimal_Trange = infer_Trange(events_pred, events_gt) 54 | if not Trange[0] <= minimal_Trange[0]: 55 | raise ValueError('`Trange` should include all the events') 56 | if not minimal_Trange[1] <= Trange[1]: 57 | raise ValueError('`Trange` should include all the events') 58 | 59 | if len(events_gt) == 0: 60 | raise ValueError('Input `events_gt` should have at least one event') 61 | 62 | if has_point_anomalies(events_pred) or has_point_anomalies(events_gt): 63 | raise ValueError('Cannot manage point anomalies currently') 64 | 65 | if Trange is None: 66 | # Set as default, but Trange should be indicated if probabilities are used 67 | raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function') 68 | 69 | E_gt = get_all_E_gt_func(events_gt, Trange) 70 | aff_partition = affiliation_partition(events_pred, E_gt) 71 | 72 | # Computing precision distance 73 | d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)] 74 | 75 | # Computing recall distance 76 | d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)] 77 | 78 | # Computing precision 79 | p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)] 80 | 81 | # Computing recall 82 | p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)] 83 | 84 | if _len_wo_nan(p_precision) > 0: 85 | p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision) 86 | else: 87 | p_precision_average = p_precision[0] # math.nan 88 | p_recall_average = sum(p_recall) / len(p_recall) 89 | 90 | dict_out = dict({'precision': p_precision_average, 91 | 'recall': p_recall_average, 92 | 'individual_precision_probabilities': p_precision, 93 | 'individual_recall_probabilities': p_recall, 94 | 'individual_precision_distances': d_precision, 95 | 'individual_recall_distances': d_recall}) 96 | return(dict_out) 97 | 98 | def produce_all_results(): 99 | """ 100 | Produce the affiliation precision/recall for all files 101 | contained in the `data` repository 102 | :return: a dictionary indexed by data names, each containing a dictionary 103 | indexed by algorithm names, each containing the results of the affiliation 104 | metrics (precision, recall, individual probabilities and distances) 105 | """ 106 | datasets, Tranges = read_all_as_events() # read all the events in folder `data` 107 | results = dict() 108 | for data_name in datasets.keys(): 109 | results_data = dict() 110 | for algo_name in datasets[data_name].keys(): 111 | if algo_name != 'groundtruth': 112 | results_data[algo_name] = pr_from_events(datasets[data_name][algo_name], 113 | datasets[data_name]['groundtruth'], 114 | Tranges[data_name]) 115 | results[data_name] = results_data 116 | return(results) 117 | -------------------------------------------------------------------------------- /data/machinetemp_adversary.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_adversary.gz -------------------------------------------------------------------------------- /data/machinetemp_greenhouse.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_greenhouse.gz -------------------------------------------------------------------------------- /data/machinetemp_groundtruth.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_groundtruth.gz -------------------------------------------------------------------------------- /data/machinetemp_lstmad.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_lstmad.gz -------------------------------------------------------------------------------- /data/machinetemp_luminol.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_luminol.gz -------------------------------------------------------------------------------- /data/machinetemp_trivial.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_trivial.gz -------------------------------------------------------------------------------- /data/nyctaxi_adversary.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_adversary.gz -------------------------------------------------------------------------------- /data/nyctaxi_greenhouse.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_greenhouse.gz -------------------------------------------------------------------------------- /data/nyctaxi_groundtruth.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_groundtruth.gz -------------------------------------------------------------------------------- /data/nyctaxi_lstmad.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_lstmad.gz -------------------------------------------------------------------------------- /data/nyctaxi_luminol.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_luminol.gz -------------------------------------------------------------------------------- /data/nyctaxi_trivial.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_trivial.gz -------------------------------------------------------------------------------- /data/swat_adversary.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_adversary.gz -------------------------------------------------------------------------------- /data/swat_groundtruth.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_groundtruth.gz -------------------------------------------------------------------------------- /data/swat_iforest.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_iforest.gz -------------------------------------------------------------------------------- /data/swat_ocsvm.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_ocsvm.gz -------------------------------------------------------------------------------- /data/swat_seq2seq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_seq2seq.gz -------------------------------------------------------------------------------- /data/swat_trivial.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_trivial.gz -------------------------------------------------------------------------------- /data/twitteraapl_adversary.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_adversary.gz -------------------------------------------------------------------------------- /data/twitteraapl_greenhouse.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_greenhouse.gz -------------------------------------------------------------------------------- /data/twitteraapl_groundtruth.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_groundtruth.gz -------------------------------------------------------------------------------- /data/twitteraapl_lstmad.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_lstmad.gz -------------------------------------------------------------------------------- /data/twitteraapl_luminol.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_luminol.gz -------------------------------------------------------------------------------- /data/twitteraapl_trivial.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_trivial.gz -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | setup(name='affiliation', 3 | version='1.0', 4 | description='Compute the affiliation metrics', 5 | author='Alexis Huet and others', 6 | author_email='alexis.huet@huawei.com', 7 | platforms=['any'], 8 | license='MIT', 9 | url='https://github.com/ahstat/affiliation-metrics-py', 10 | packages=find_packages(), 11 | ) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_affiliation_zone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import unittest 4 | 5 | import math 6 | from affiliation._affiliation_zone import ( 7 | E_gt_func, 8 | get_all_E_gt_func, 9 | affiliation_partition) 10 | 11 | """ 12 | Function `E_gt_func` is correct, even for the borders 13 | """ 14 | class Test_E_gt_func(unittest.TestCase): 15 | def test_generic(self): 16 | Trange = (1, 30) 17 | events_gt = [(3,7), (10,18), (20,21)] 18 | j = 0 19 | self.assertEqual(E_gt_func(j, events_gt, Trange)[0], min(Trange)) 20 | self.assertEqual(E_gt_func(j, events_gt, Trange)[1], (10+7)/2) 21 | j = 1 22 | self.assertEqual(E_gt_func(j, events_gt, Trange)[0], (10+7)/2) 23 | self.assertEqual(E_gt_func(j, events_gt, Trange)[1], (18+20)/2) 24 | j = 2 25 | self.assertEqual(E_gt_func(j, events_gt, Trange)[0], (18+20)/2) 26 | self.assertEqual(E_gt_func(j, events_gt, Trange)[1], max(Trange)) 27 | 28 | # Case j = 1 29 | Trange = (1, 30) 30 | events_gt = [(3,20)] 31 | j = 0 32 | self.assertEqual(E_gt_func(j, events_gt, Trange)[0], min(Trange)) 33 | self.assertEqual(E_gt_func(j, events_gt, Trange)[1], max(Trange)) 34 | 35 | """ 36 | Function `get_all_E_gt_func` is correct 37 | """ 38 | class Test_get_all_E_gt_func(unittest.TestCase): 39 | def test_generic(self): 40 | Trange = (1, 30) 41 | events_gt = [(3,7), (10,18), (20,21)] 42 | cut_aff2 = get_all_E_gt_func(events_gt, Trange) 43 | self.assertEqual(cut_aff2[0], (min(Trange), (10+7)/2)) 44 | self.assertEqual(cut_aff2[1], ((10+7)/2, (18+20)/2)) 45 | self.assertEqual(cut_aff2[2], ((18+20)/2, max(Trange))) 46 | 47 | """ 48 | Function `affiliation_partition` is correct 49 | """ 50 | class Test_affiliation_partition(unittest.TestCase): 51 | def test_precision_direction(self): 52 | """ 53 | Test of the function in the 'precision' direction I --> J in one example 54 | """ 55 | events_pred = [(1,3), (6,18), (25,26)] 56 | events_gt = [(1,8), (16,17), (25,28), (29,31)] 57 | Trange = (-math.inf, math.inf) 58 | E_gt = get_all_E_gt_func(events_gt, Trange) 59 | M = affiliation_partition(events_pred, E_gt) 60 | 61 | # Check of dimension of the lists 62 | self.assertEqual(len(M), len(events_gt)) 63 | self.assertEqual(len(M[0]), len(events_pred)) 64 | 65 | # First element, related to the first affiliation zone (-inf, 12) 66 | self.assertEqual(M[0][0], (1, 3)) # zone1, first prediction 67 | self.assertEqual(M[0][1], (6, 12)) # zone1, 2nd prediction 68 | self.assertEqual(M[0][2], None) # zone1, 3rd prediction 69 | 70 | # Second element, related to the second affiliation zone (12, 21) 71 | self.assertEqual(M[1][0], None) # zone2, first prediction 72 | self.assertEqual(M[1][1], (12, 18)) # zone2, 2nd prediction 73 | self.assertEqual(M[1][2], None) # zone2, 3rd prediction 74 | 75 | # Third element, related to the third affiliation zone (25, 28) 76 | self.assertEqual(M[2][0], None) # zone3, first prediction 77 | self.assertEqual(M[2][1], None) # zone3, 2nd prediction 78 | self.assertEqual(M[2][2], (25, 26)) # zone3, 3rd prediction 79 | 80 | # Fourth element, related to the fourth affiliation zone (29, 31) 81 | self.assertEqual(M[3][0], None) # zone4, first prediction 82 | self.assertEqual(M[3][1], None) # zone4, 2nd prediction 83 | self.assertEqual(M[3][2], None) # zone4, 3rd prediction 84 | 85 | def test_single_gt_and_pred(self): 86 | """ 87 | Test of shape of the output of the function with only 88 | one prediction and one ground truth intervals 89 | """ 90 | Trange = (-math.inf, math.inf) 91 | 92 | events_pred = [(1,3), (5,10)] 93 | events_gt = [(1,8), (16,17)] 94 | E_gt = get_all_E_gt_func(events_gt, Trange) 95 | M0 = affiliation_partition(events_pred, E_gt) 96 | 97 | # One pred, more than one gt 98 | events_pred = [(1,3)] 99 | events_gt = [(1,8), (16,17)] 100 | E_gt = get_all_E_gt_func(events_gt, Trange) 101 | M1 = affiliation_partition(events_pred, E_gt) 102 | 103 | # One gt, more than one pred 104 | events_pred = [(1,3), (5,10)] 105 | events_gt = [(1,8)] 106 | E_gt = get_all_E_gt_func(events_gt, Trange) 107 | M2 = affiliation_partition(events_pred, E_gt) 108 | 109 | events_pred = [(2,3)] 110 | events_gt = [(1,8)] 111 | E_gt = get_all_E_gt_func(events_gt, Trange) 112 | M3 = affiliation_partition(events_pred, E_gt) 113 | 114 | # Check of dimension of the lists (here like gt) 115 | self.assertEqual(len(M0), 2) 116 | self.assertEqual(len(M1), 2) 117 | self.assertEqual(len(M2), 1) 118 | self.assertEqual(len(M3), 1) 119 | # Check of dimension of the lists (here like pred) 120 | self.assertEqual(len(M0[0]), 2) 121 | self.assertEqual(len(M1[0]), 1) 122 | self.assertEqual(len(M2[0]), 2) 123 | self.assertEqual(len(M3[0]), 1) 124 | 125 | def test_zero_pred(self): 126 | """ 127 | Test of shape of the output of the function with no 128 | prediction 129 | """ 130 | Trange = (-math.inf, math.inf) 131 | 132 | events_pred = [] 133 | events_gt = [(1,8), (16,17)] 134 | E_gt = get_all_E_gt_func(events_gt, Trange) 135 | M = affiliation_partition(events_pred, E_gt) 136 | 137 | # Check of dimension of the lists 138 | self.assertEqual(len(M), len(events_gt)) 139 | self.assertEqual(len(M[0]), len(events_pred)) # here with len(events_pred) == 0 140 | 141 | if __name__ == '__main__': 142 | unittest.main() 143 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import unittest 4 | 5 | import glob 6 | import re 7 | import math 8 | from affiliation.generics import ( 9 | read_gz_data, 10 | convert_vector_to_events, 11 | f1_func) 12 | from affiliation.metrics import produce_all_results 13 | 14 | """ 15 | Check reproducibility of the results 16 | """ 17 | class Test_data(unittest.TestCase): 18 | def test_description_data(self): 19 | """ 20 | Check description of the data sets: 21 | - number of instances, 22 | - proportion of anomalous instances, 23 | - number of events in the ground truth 24 | """ 25 | filepaths = glob.glob('data/*.gz') 26 | for filepath in filepaths: 27 | vector = read_gz_data(filepath) 28 | if re.search('machinetemp_', filepath): 29 | self.assertEqual(len(vector), 17682) 30 | if re.search('_groundtruth', filepath): 31 | self.assertAlmostEqual(sum(vector)/len(vector), 0.0641, places=3) 32 | events = convert_vector_to_events(vector) 33 | self.assertEqual(len(events), 2) 34 | if re.search('nyctaxi_', filepath): 35 | self.assertEqual(len(vector), 2307) 36 | if re.search('_groundtruth', filepath): 37 | self.assertAlmostEqual(sum(vector)/len(vector), 0.2691, places=3) 38 | events = convert_vector_to_events(vector) 39 | self.assertEqual(len(events), 3) 40 | if re.search('twitteraapl_', filepath): 41 | self.assertEqual(len(vector), 11889) 42 | if re.search('_groundtruth', filepath): 43 | self.assertAlmostEqual(sum(vector)/len(vector), 0.0667, places=3) 44 | events = convert_vector_to_events(vector) 45 | self.assertEqual(len(events), 2) 46 | if re.search('swat_', filepath): 47 | self.assertEqual(len(vector), 449919) 48 | if re.search('_groundtruth', filepath): 49 | self.assertAlmostEqual(sum(vector)/len(vector), 0.1214, places=3) 50 | events = convert_vector_to_events(vector) 51 | self.assertEqual(len(events), 35) 52 | 53 | def test_cells(self): 54 | """ 55 | Check each cell by applying the affiliation metrics 56 | """ 57 | # table from the article to be checked 58 | table_in_article = {'machinetemp': {}, 59 | 'nyctaxi': {}, 60 | 'twitteraapl': {}, 61 | 'swat': {}} 62 | 63 | table_in_article['machinetemp']['trivial'] = '1.00/0.50/0.66' 64 | table_in_article['machinetemp']['adversary'] = '0.49/1.00/0.66' 65 | table_in_article['machinetemp']['greenhouse'] = '0.71/0.99/0.83' 66 | table_in_article['machinetemp']['lstmad'] = '0.50/1.00/0.67' 67 | table_in_article['machinetemp']['luminol'] = '0.54/0.99/0.70' 68 | 69 | table_in_article['nyctaxi']['trivial'] = '1.00/0.30/0.46' 70 | table_in_article['nyctaxi']['adversary'] = '0.54/1.00/0.70' 71 | table_in_article['nyctaxi']['greenhouse'] = '0.51/0.99/0.67' 72 | table_in_article['nyctaxi']['lstmad'] = '0.51/1.00/0.67' 73 | table_in_article['nyctaxi']['luminol'] = '0.38/0.79/0.51' 74 | 75 | table_in_article['twitteraapl']['trivial'] = '1.00/0.49/0.66' 76 | table_in_article['twitteraapl']['adversary'] = '0.50/1.00/0.67' 77 | table_in_article['twitteraapl']['greenhouse'] = '0.78/0.98/0.87' 78 | table_in_article['twitteraapl']['lstmad'] = '0.66/0.99/0.79' 79 | table_in_article['twitteraapl']['luminol'] = '0.73/0.98/0.83' 80 | 81 | table_in_article['swat']['trivial'] = '1.00/0.03/0.06' 82 | table_in_article['swat']['adversary'] = '0.53/1.00/0.69' 83 | table_in_article['swat']['iforest'] = '0.52/0.84/0.64' 84 | table_in_article['swat']['ocsvm'] = '0.65/0.70/0.68' 85 | table_in_article['swat']['seq2seq'] = '0.86/0.79/0.83' 86 | 87 | # checking the table 88 | results = produce_all_results() # produce results 89 | 90 | # Check results related to `best_algos` and `pr_of_best_algo` 91 | for data_name in results.keys(): 92 | for algo_name in results[data_name].keys(): 93 | p = results[data_name][algo_name]['precision'] 94 | r = results[data_name][algo_name]['recall'] 95 | f1 = f1_func(p, r) 96 | # convert to a string with two decimals 97 | p, r, f1 = ['%.2f' % x for x in [p, r, f1]] 98 | cell_obtained_from_results = str(p) + '/' + str(r) + '/' + str(f1) 99 | self.assertEqual(cell_obtained_from_results, table_in_article[data_name][algo_name]) 100 | 101 | def test_single_events_results(self): 102 | """ 103 | Check single events results for swat with iforest/seq2seq 104 | """ 105 | results = produce_all_results() # produce results 106 | p_precis = dict() 107 | p_recall = dict() 108 | p_f1 = dict() 109 | for algo_name in ['iforest', 'seq2seq']: 110 | p_precis_raw = results['swat'][algo_name]['individual_precision_probabilities'] 111 | p_recall_raw = results['swat'][algo_name]['individual_recall_probabilities'] 112 | p_precis[algo_name] = [round(x, 2) for x in p_precis_raw] 113 | p_recall[algo_name] = [round(x, 2) for x in p_recall_raw] 114 | p_f1[algo_name] = [round(f1_func(x[0], x[1]), 2) for x in zip(p_precis_raw, p_recall_raw)] 115 | 116 | p_out = dict() 117 | for algo_name in ['iforest', 'seq2seq']: 118 | p_out[algo_name] = ['%.2f' % x[0] + '/' + '%.2f' % x[1] + '/' + '%.2f' % x[2] for x in zip(p_precis[algo_name], p_recall[algo_name], p_f1[algo_name])] 119 | 120 | self.assertEqual(p_out['iforest'][0:6], 121 | ['0.37/0.53/0.44', 122 | '1.00/0.91/0.95', 123 | '0.76/0.99/0.86', 124 | 'nan/0.00/nan', 125 | '0.38/0.60/0.46', 126 | '0.09/0.21/0.12']) 127 | self.assertEqual(p_out['seq2seq'][0:6], 128 | ['0.96/1.00/0.98', 129 | '0.86/1.00/0.93', 130 | '0.73/0.78/0.75', 131 | '0.39/0.71/0.50', 132 | '0.71/0.97/0.82', 133 | '0.88/1.00/0.94']) 134 | 135 | """ 136 | Check the number of events for which seq2seq is better than iforest 137 | """ 138 | nb_events = len(p_precis['seq2seq']) 139 | 140 | nb_seq2seq_nan_pred = sum([math.isnan(x) for x in p_precis['seq2seq']]) 141 | nb_iforest_nan_pred = sum([math.isnan(x) for x in p_precis['iforest']]) 142 | 143 | idx_nan1 = [idx for idx, val in enumerate(p_precis['seq2seq']) if math.isnan(val)] 144 | idx_nan2 = [idx for idx, val in enumerate(p_precis['iforest']) if math.isnan(val)] 145 | idx_nan = idx_nan1 + idx_nan2 146 | p_precis_seq2seq_not_nan = [val for idx, val in enumerate(p_precis['seq2seq']) if idx not in idx_nan] 147 | p_precis_iforest_not_nan = [val for idx, val in enumerate(p_precis['iforest']) if idx not in idx_nan] 148 | p_recall_seq2seq_not_nan = [val for idx, val in enumerate(p_recall['seq2seq']) if idx not in idx_nan] 149 | p_recall_iforest_not_nan = [val for idx, val in enumerate(p_recall['iforest']) if idx not in idx_nan] 150 | 151 | nb_both_better = sum([(p_seq > p_ifo) and (r_seq > r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)]) 152 | nb_better_precision_only = sum([(p_seq > p_ifo) and (r_seq == r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)]) 153 | 154 | nb_better_precision_worse_recall = sum([(p_seq > p_ifo) and (r_seq < r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)]) 155 | nb_better_recall_worse_precision = sum([(p_seq < p_ifo) and (r_seq > r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)]) 156 | nb_equivocal = nb_better_precision_worse_recall + nb_better_recall_worse_precision 157 | 158 | nb_both_worse = sum([(p_seq < p_ifo) and (r_seq < r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)]) 159 | 160 | # check that number of events is 35 161 | self.assertEqual(nb_events, 35) 162 | # check the number of zone without predictions are 6 for seq2seq and 2 for iforest 163 | self.assertEqual(nb_seq2seq_nan_pred, 6) 164 | self.assertEqual(nb_iforest_nan_pred, 2) 165 | # check that we covered all the possibilities for those specific results 166 | self.assertEqual(nb_seq2seq_nan_pred + nb_iforest_nan_pred + nb_both_better + nb_better_precision_only + nb_equivocal + nb_both_worse, nb_events) 167 | # check the number of better elements 168 | self.assertEqual(nb_both_better + nb_better_precision_only, 21) 169 | self.assertEqual(nb_both_better, 13) 170 | self.assertEqual(nb_better_precision_only, 8) 171 | # check the number of equivocal results 172 | self.assertEqual(nb_equivocal, 4) 173 | # check the number of worse results 174 | self.assertEqual(nb_both_worse, 2) 175 | 176 | if __name__ == '__main__': 177 | unittest.main() -------------------------------------------------------------------------------- /tests/test_generics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import unittest 4 | 5 | import math 6 | from affiliation.generics import ( 7 | convert_vector_to_events, 8 | infer_Trange, 9 | has_point_anomalies, 10 | _sum_wo_nan, 11 | _len_wo_nan) 12 | 13 | """ 14 | Function `convert_vector_to_events` 15 | """ 16 | class Test_convert_vector_to_events(unittest.TestCase): 17 | def test_empty(self): 18 | """ 19 | Empty vector gives empty events 20 | """ 21 | tested = convert_vector_to_events([]) 22 | expected = [] 23 | self.assertEqual(tested, expected) 24 | 25 | def test_zeros(self): 26 | """ 27 | Zeros vector gives empty events 28 | """ 29 | tested = convert_vector_to_events([0]) 30 | expected = [] 31 | self.assertEqual(tested, expected) 32 | 33 | tested = convert_vector_to_events([0, 0, 0, 0, 0]) 34 | expected = [] 35 | self.assertEqual(tested, expected) 36 | 37 | def test_ones(self): 38 | """ 39 | Ones vector gives single long event 40 | """ 41 | tested = convert_vector_to_events([1]) 42 | expected = [(0, 1)] # of length of one index by convention 43 | self.assertEqual(tested, expected) 44 | 45 | tested = convert_vector_to_events([1, 1, 1, 1, 1]) 46 | expected = [(0, 5)] 47 | self.assertEqual(tested, expected) 48 | 49 | def test_border(self): 50 | """ 51 | Test with elements on the border 52 | """ 53 | tested = convert_vector_to_events([1, 1, 0, 0, 0]) 54 | expected = [(0, 2)] 55 | self.assertEqual(tested, expected) 56 | 57 | tested = convert_vector_to_events([0, 0, 1, 1, 1]) 58 | expected = [(2, 5)] 59 | self.assertEqual(tested, expected) 60 | 61 | tested = convert_vector_to_events([1, 0, 1, 1, 1]) 62 | expected = [(0, 1), (2, 5)] 63 | self.assertEqual(tested, expected) 64 | 65 | tested = convert_vector_to_events([1, 1, 0, 1, 0, 1, 1, 1]) 66 | expected = [(0, 2), (3, 4), (5, 8)] 67 | self.assertEqual(tested, expected) 68 | 69 | def test_generic(self): 70 | """ 71 | Test without elements on the border, generic case 72 | """ 73 | tested = convert_vector_to_events([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 74 | 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 75 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79 | 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) 80 | expected = [(6, 12), (17, 20), (60, 63)] 81 | self.assertEqual(tested, expected) 82 | 83 | """ 84 | Function `infer_Trange` 85 | """ 86 | class Test_infer_Trange(unittest.TestCase): 87 | def test_empty_predictions(self): 88 | """ 89 | Infer Trange with empty predictions 90 | """ 91 | tested = infer_Trange([], [(-1,2), (3,4), (6,20)]) 92 | expected = (-1, 20) 93 | self.assertEqual(tested, expected) 94 | 95 | def test_empty_groundtruth(self): 96 | """ 97 | Raise error for empty ground truth 98 | """ 99 | with self.assertRaises(ValueError): 100 | infer_Trange([(-1,2), (3,4), (6,20)], []) 101 | 102 | def test_generic(self): 103 | """ 104 | Infer Trange with generic predictions/groundtruth 105 | """ 106 | tested = infer_Trange([(-3, 4), (5, 6)], [(-1,2), (3,4), (6,20)]) 107 | expected = (-3, 20) 108 | self.assertEqual(tested, expected) 109 | 110 | tested = infer_Trange([(-3, 4), (5, 6)], [(-1,2), (3,4)]) 111 | expected = (-3, 6) 112 | self.assertEqual(tested, expected) 113 | 114 | tested = infer_Trange([(0, 4), (5, 6)], [(-1,2), (3,4), (6,20)]) 115 | expected = (-1, 20) 116 | self.assertEqual(tested, expected) 117 | 118 | tested = infer_Trange([(-1,2), (3,4), (6,20)], [(-3, 4), (5, 6)]) 119 | expected = (-3, 20) 120 | self.assertEqual(tested, expected) 121 | 122 | """ 123 | Function `has_point_anomalies` 124 | """ 125 | class Test_has_point_anomalies(unittest.TestCase): 126 | def test_empty_event(self): 127 | """ 128 | Test with an empty event 129 | """ 130 | tested = has_point_anomalies([]) 131 | expected = False # no event, so no point anomaly too 132 | self.assertEqual(tested, expected) 133 | 134 | def test_generic(self): 135 | """ 136 | Check whether contain point anomalies with generic events 137 | """ 138 | tested = has_point_anomalies([(1, 2)]) 139 | expected = False 140 | self.assertEqual(tested, expected) 141 | 142 | tested = has_point_anomalies([(1, 2), (3, 4), (8, 10)]) 143 | expected = False 144 | self.assertEqual(tested, expected) 145 | 146 | tested = has_point_anomalies([(1, 2), (3, 3), (8, 10)]) 147 | expected = True 148 | self.assertEqual(tested, expected) 149 | 150 | tested = has_point_anomalies([(1, 1), (3, 3), (8, 8)]) 151 | expected = True 152 | self.assertEqual(tested, expected) 153 | 154 | tested = has_point_anomalies([(1, 1)]) 155 | expected = True 156 | self.assertEqual(tested, expected) 157 | 158 | """ 159 | Functions `_sum_wo_nan` and `_len_wo_nan` 160 | """ 161 | class Test_sum_len_wo_nan(unittest.TestCase): 162 | def test_empty_event(self): 163 | """ 164 | Test with an empty event 165 | """ 166 | tested = _sum_wo_nan([]) 167 | expected = 0 168 | self.assertEqual(tested, expected) 169 | 170 | tested = _len_wo_nan([]) 171 | expected = 0 172 | self.assertEqual(tested, expected) 173 | 174 | def test_generic(self): 175 | """ 176 | Check with either math.nan or not 177 | """ 178 | vec = [1, 4, 3] 179 | tested = _sum_wo_nan(vec) 180 | expected = 8 181 | self.assertEqual(tested, expected) 182 | tested = _len_wo_nan(vec) 183 | expected = 3 184 | self.assertEqual(tested, expected) 185 | 186 | vec = [1, math.nan, 3] 187 | tested = _sum_wo_nan(vec) 188 | expected = 4 189 | self.assertEqual(tested, expected) 190 | tested = _len_wo_nan(vec) 191 | expected = 2 192 | self.assertEqual(tested, expected) 193 | 194 | vec = [math.nan, math.nan, 3] 195 | tested = _sum_wo_nan(vec) 196 | expected = 3 197 | self.assertEqual(tested, expected) 198 | tested = _len_wo_nan(vec) 199 | expected = 1 200 | self.assertEqual(tested, expected) 201 | 202 | vec = [math.nan, math.nan, math.nan] # like an empty vec after removing math.nan 203 | tested = _sum_wo_nan(vec) 204 | expected = 0 205 | self.assertEqual(tested, expected) 206 | tested = _len_wo_nan(vec) 207 | expected = 0 208 | self.assertEqual(tested, expected) 209 | 210 | if __name__ == '__main__': 211 | unittest.main() 212 | -------------------------------------------------------------------------------- /tests/test_integral_interval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import unittest 4 | 5 | from affiliation._integral_interval import ( 6 | interval_length, 7 | sum_interval_lengths, 8 | interval_intersection, 9 | interval_subset, 10 | cut_into_three_func, 11 | get_pivot_j, 12 | integral_mini_interval, 13 | integral_interval_distance, 14 | integral_mini_interval_P_CDFmethod__min_piece, 15 | integral_mini_interval_Pprecision_CDFmethod, 16 | integral_interval_probaCDF_precision, 17 | cut_J_based_on_mean_func, 18 | integral_interval_probaCDF_recall) 19 | 20 | """ 21 | Function `interval_length` 22 | """ 23 | class Test_interval_length(unittest.TestCase): 24 | def test_empty(self): 25 | """ 26 | None interval gives 0 length 27 | """ 28 | tested = interval_length(None) 29 | expected = 0 30 | self.assertEqual(tested, expected) 31 | 32 | def test_generic(self): 33 | """ 34 | Test correct length of the interval 35 | """ 36 | tested = interval_length((1, 2)) 37 | expected = 1 38 | self.assertEqual(tested, expected) 39 | 40 | tested = interval_length((-1, 3.5)) 41 | expected = 4.5 42 | self.assertEqual(tested, expected) 43 | 44 | """ 45 | Function `sum_interval_lengths` 46 | """ 47 | class Test_sum_interval_lengths(unittest.TestCase): 48 | def test_empty(self): 49 | """ 50 | Empty event gives 0 length 51 | """ 52 | tested = sum_interval_lengths([]) 53 | expected = 0 54 | self.assertEqual(tested, expected) 55 | 56 | def test_generic(self): 57 | """ 58 | Test correct sum of length of the intervals 59 | """ 60 | tested = sum_interval_lengths([(1, 2)]) 61 | expected = 1 62 | self.assertEqual(tested, expected) 63 | 64 | tested = sum_interval_lengths([(1, 2), (3.5, 4)]) 65 | expected = 1+0.5 66 | self.assertEqual(tested, expected) 67 | 68 | """ 69 | Function `interval_intersection` 70 | """ 71 | class Test_interval_intersection(unittest.TestCase): 72 | def test_empty(self): 73 | """ 74 | None when one or more interval is None 75 | """ 76 | tested = interval_intersection(None, None) 77 | expected = None 78 | self.assertEqual(tested, expected) 79 | 80 | tested = interval_intersection(None, (1,2)) 81 | expected = None 82 | self.assertEqual(tested, expected) 83 | 84 | tested = interval_intersection((1,2), None) 85 | expected = None 86 | self.assertEqual(tested, expected) 87 | 88 | def test_generic(self): 89 | """ 90 | Test correct intersection of the intervals 91 | """ 92 | tested = interval_intersection((1, 2), (1, 2)) 93 | expected = (1, 2) 94 | self.assertEqual(tested, expected) 95 | 96 | tested = interval_intersection((1, 2), (2, 3)) 97 | expected = None # because intersection or [1, 2) and [2, 3) 98 | self.assertEqual(tested, expected) 99 | 100 | tested = interval_intersection((1, 2), (3, 4)) 101 | expected = None 102 | self.assertEqual(tested, expected) 103 | 104 | tested = interval_intersection((1, 3), (2, 4)) 105 | expected = (2, 3) 106 | self.assertEqual(tested, expected) 107 | 108 | tested = interval_intersection((2, 4), (1, 3)) 109 | expected = (2, 3) 110 | self.assertEqual(tested, expected) 111 | 112 | tested = interval_intersection((-1, 5), (1, 3)) 113 | expected = (1, 3) 114 | self.assertEqual(tested, expected) 115 | 116 | tested = interval_intersection((1, 3), (-1, 5)) 117 | expected = (1, 3) 118 | self.assertEqual(tested, expected) 119 | 120 | tested = interval_intersection((1, 10), (0, 5)) 121 | expected = (1, 5) 122 | self.assertEqual(tested, expected) 123 | 124 | """ 125 | Function `interval_subset` 126 | """ 127 | class Test_interval_subset(unittest.TestCase): 128 | def test_empty(self): 129 | """ 130 | Error for empty interval 131 | """ 132 | with self.assertRaises(TypeError): 133 | interval_subset(None, None) 134 | 135 | with self.assertRaises(TypeError): 136 | interval_subset(None, (1,2)) 137 | 138 | with self.assertRaises(TypeError): 139 | interval_subset((1,2), None) 140 | 141 | def test_generic(self): 142 | """ 143 | Test correct check of subset 144 | """ 145 | tested = interval_subset((1, 2), (1, 2)) 146 | expected = True 147 | self.assertEqual(tested, expected) 148 | 149 | tested = interval_subset((1, 2), (1, 3)) 150 | expected = True 151 | self.assertEqual(tested, expected) 152 | 153 | tested = interval_subset((1, 2), (0, 3)) 154 | expected = True 155 | self.assertEqual(tested, expected) 156 | 157 | tested = interval_subset((1, 3), (2, 3)) 158 | expected = False 159 | self.assertEqual(tested, expected) 160 | 161 | tested = interval_subset((1, 3), (-1, 2)) 162 | expected = False 163 | self.assertEqual(tested, expected) 164 | 165 | tested = interval_subset((1, 3), (-1, 0)) 166 | expected = False 167 | self.assertEqual(tested, expected) 168 | 169 | """ 170 | Function `cut_into_three_func` 171 | """ 172 | class Test_cut_into_three_func(unittest.TestCase): 173 | def test_examples(self): 174 | """ 175 | Example 1 176 | """ 177 | I = (0, 1.5) 178 | J = (1, 2) 179 | tested = cut_into_three_func(I, J) 180 | self.assertEqual(len(tested), 3) 181 | self.assertEqual(tested[0], (0, 1)) 182 | self.assertEqual(tested[1], (1, 1.5)) 183 | self.assertEqual(tested[2], None) 184 | 185 | """ 186 | Example 2 with elements both before and after 187 | """ 188 | I = (-1, 10) 189 | J = (1.4, 2.4) 190 | tested = cut_into_three_func(I, J) 191 | self.assertEqual(len(tested), 3) 192 | self.assertEqual(tested[0], (-1, 1.4)) 193 | self.assertEqual(tested[1], (1.4, 2.4)) 194 | self.assertEqual(tested[2], (2.4, 10)) 195 | 196 | """ 197 | Example 3 with only elements before 198 | """ 199 | I = (-1, 1) 200 | J = (1.4, 2.4) 201 | tested = cut_into_three_func(I, J) 202 | self.assertEqual(len(tested), 3) 203 | self.assertEqual(tested[0], (-1, 1)) 204 | self.assertEqual(tested[1], None) 205 | self.assertEqual(tested[2], None) 206 | 207 | """ 208 | Example 4 with only elements at middle 209 | """ 210 | I = (1.6, 2) 211 | J = (1.4, 2.4) 212 | tested = cut_into_three_func(I, J) 213 | self.assertEqual(len(tested), 3) 214 | self.assertEqual(tested[0], None) 215 | self.assertEqual(tested[1], (1.6, 2)) 216 | self.assertEqual(tested[2], None) 217 | 218 | """ 219 | Example 5 with only elements after 220 | """ 221 | I = (4, 5) 222 | J = (1.4, 2.4) 223 | tested = cut_into_three_func(I, J) 224 | self.assertEqual(len(tested), 3) 225 | self.assertEqual(tested[0], None) 226 | self.assertEqual(tested[1], None) 227 | self.assertEqual(tested[2], (4,5)) 228 | 229 | """ 230 | Function `get_pivot_j` 231 | """ 232 | class Test_get_pivot_j(unittest.TestCase): 233 | def test_examples(self): 234 | """ 235 | Examples 236 | """ 237 | I = (4, 5) 238 | J = (1.4, 2.4) 239 | self.assertEqual(get_pivot_j(I, J), 2.4) # max(J) 240 | 241 | I = (0, 1) 242 | J = (1.4, 2.4) 243 | self.assertEqual(get_pivot_j(I, J), 1.4) # min(J) 244 | 245 | I = (0, 1.5) 246 | J = (1.4, 2.4) 247 | with self.assertRaises(ValueError): 248 | # intersection I inter J is not void 249 | get_pivot_j(I, J) 250 | 251 | """ 252 | Function `integral_mini_interval` 253 | """ 254 | class Test_integral_mini_interval(unittest.TestCase): 255 | def test_examples(self): 256 | """ 257 | Examples 258 | """ 259 | I = (4, 5) 260 | J = (1.4, 2.4) 261 | # We look at sum distance between every element of [4,5] to 2.4 the closest element of J 262 | # Distance is going from 4-2.4 to 5-2.4 i.e. 1.6 to 2.6, and increases linearly 263 | # There is 1.6 with a time duration of 1, and in addition the triangle 1/2 (integral from 0 to 1 of tdt) 264 | # Globally 1.6+1/2 265 | self.assertEqual(integral_mini_interval(I, J), 1.6 + 1/2) 266 | 267 | I = (0.1, 1.2) 268 | J = (1.4, 2.4) 269 | # We look at sum distance between every element of [0.1,1.2] to 1.4 the closest element of J 270 | # Distance is going from 1.3 to 0.2 and decreases linearly 271 | # There is 0.2 with a time duration of deltaI=1.1, and in addition 272 | # a decreases from 1.1 to 0 during 1.1 (integral from 0 to 1.1 of tdt) which is 1.1^2/2 273 | # Globally 0.2*1.1+1.1^2/2 274 | self.assertAlmostEqual(integral_mini_interval(I, J), 0.2*1.1+1.1**2/2) 275 | 276 | I = (0, 1.5) 277 | J = (1.4, 2.4) 278 | with self.assertRaises(ValueError): 279 | # intersection I inter J is not void 280 | integral_mini_interval(I, J) 281 | 282 | """ 283 | Function `integral_interval` 284 | """ 285 | class Test_integral_interval(unittest.TestCase): 286 | def test_examples(self): 287 | """ 288 | Function integral_interval *for distance* verifies some tests 289 | """ 290 | ## For I included in J, it's 0 291 | I = (0, 1.5) 292 | J = (-1, 2.4) 293 | self.assertEqual(integral_interval_distance(I, J), 0) 294 | 295 | J = (-1, 2.4) 296 | I = J 297 | self.assertEqual(integral_interval_distance(I, J), 0) 298 | 299 | ## The integral is same from I or I\J 300 | I = (-10, 1.5) 301 | J = (-1, 2.4) 302 | I_minus_J = (-10, -1) # I \ J 303 | self.assertEqual(integral_interval_distance(I, J), 304 | integral_interval_distance(I_minus_J, J)) 305 | 306 | # previous test 307 | I = (-10, 20) 308 | J = (-1, 2.4) 309 | self.assertEqual(integral_interval_distance(I, J), 310 | 195.38) 311 | 312 | """ 313 | Function `integral_mini_interval_P_CDFmethod__min_piece` 314 | """ 315 | class Test_integral_mini_interval_P_CDFmethod__min_piece(unittest.TestCase): 316 | def test_examples(self): 317 | """ 318 | Check this component of `integral_mini_interval_Pprecision_CDFmethod` 319 | in three cases, by recomputing the formulas 320 | 321 | It is three cases with I totally outside J 322 | I = (i_min, i_max) 323 | J = (j_min, j_max) 324 | E = (e_min, e_max) 325 | """ 326 | # Closed-form for $C = \int_{d_min}^{d_max} \min(m, x) dx$ 327 | # Case 1: $d_max <= m$: 328 | # C = \int_{d_min}^{d_max} x dx = (1/2)*(d_max^2 - d_min^2) 329 | # Case 2: $d_min < m < d_max$: 330 | # C = \int_{d_min}^{m} x dx + \int_{m}^{d_max} m dx 331 | # = (1/2)*(m^2 - d_min^2) + m (d_max - m) 332 | # Case 3: $m <= d_min$: 333 | # C = \int_{d_min}^{d_max} m dx = m (d_max - d_min) 334 | # 335 | # For the combinated of all three cases, we see first that: 336 | # A = min(d_max,m)^2 - min(d_min, m)^2 is: d_max^2 - d_min^2 (case1); m^2 - d_min^2 (case2); 0 (case3) 337 | # and then that: 338 | # B = max(d_max, m) - max(d_min, m) is: 0 (case1); d_max - m (case2); d_max-d_min (case3) 339 | # so that: 340 | # C = (1/2)*A + m*B 341 | # It is checked below, for each case C1, C2, C3: 342 | 343 | """ Case 1 """ 344 | e_min = 0.7176185 345 | j_min = 1.570739 346 | j_max = 1.903998 347 | e_max = 2.722883 348 | i_min = 0.924204 349 | i_max = 1.376826 350 | d_min = max(i_min - j_max, j_min - i_max) # 0.1939125 351 | d_max = max(i_max - j_max, j_min - i_min) # 0.6465346 352 | m = min(j_min - e_min, e_max - j_max) # 0.8188856 353 | C_case1 = (1/2)*(d_max - d_min)*(d_max + d_min) # 0.1902024 354 | C_case2 = (1/2)*(m**2 - d_min**2) + m * (d_max - m) # 0.17535 355 | C_case3 = (d_max - d_min)*m # 0.3706457 356 | A = min(d_max, m)**2 - min(d_min, m)**2 # 0.3804049 357 | B = max(d_max, m) - max(d_min, m) # 0 358 | C = (1/2)*A + m*B # 0.1902024 359 | # Actual test 360 | I = (i_min, i_max) 361 | J = (j_min, j_max) 362 | E = (e_min, e_max) 363 | self.assertTrue(d_max <= m) # it is the case 1 364 | self.assertEqual(C_case1, C) 365 | self.assertEqual(integral_mini_interval_P_CDFmethod__min_piece(I, J, E), C) 366 | 367 | """ Case 2 """ 368 | e_min = 0.3253522 369 | j_min = 0.5569796 370 | j_max = 0.8238064 371 | e_max = 1.403741 372 | i_min = 0.8751017 373 | i_max = 1.116294 374 | d_min = max(i_min - j_max, j_min - i_max) # 0.05129532 375 | d_max = max(i_max - j_max, j_min - i_min) # 0.2924877 376 | m = min(j_min - e_min, e_max - j_max) # 0.2316275 377 | C_case1 = (1/2)*(d_max - d_min)*(d_max + d_min) # 0.04145893 378 | C_case2 = (1/2)*(m**2 - d_min**2) + m * (d_max - m) # 0.03960695 379 | C_case3 = (d_max - d_min)*m # 0.05586679 380 | A = min(d_max, m)**2 - min(d_min, m)**2 # 0.05102008 381 | B = max(d_max, m) - max(d_min, m) # 0.06086027 382 | C = (1/2)*A + m*B # 0.03960695 383 | # Actual test 384 | I = (i_min, i_max) 385 | J = (j_min, j_max) 386 | E = (e_min, e_max) 387 | self.assertTrue(d_min < m) # it is the case 2 388 | self.assertTrue(m < d_max) # it is the case 2 389 | self.assertEqual(C_case2, C) 390 | self.assertEqual(integral_mini_interval_P_CDFmethod__min_piece(I, J, E), C) 391 | 392 | """ Case 3 """ 393 | e_min = 0.6516738 394 | j_min = 1.523338 395 | j_max = 1.958426 396 | e_max = 2.435003 397 | i_min = 0.767282 398 | i_max = 0.7753016 399 | d_min = max(i_min - j_max, j_min - i_max) # 0.7480365 400 | d_max = max(i_max - j_max, j_min - i_min) # 0.7560561 401 | m = min(j_min - e_min, e_max - j_max) # 0.4765765 402 | C_case1 = (1/2)*(d_max - d_min)*(d_max + d_min) # 0.006031113 403 | C_case2 = (1/2)*(m**2 - d_min**2) + m * (d_max - m) # -0.03302331 404 | C_case3 = (d_max - d_min)*m # 0.003821954 405 | A = min(d_max, m)**2 - min(d_min, m)**2 # 0 406 | B = max(d_max, m) - max(d_min, m) # 0.008019604 407 | C = (1/2)*A + m*B # 0.003821954 408 | # Actual test 409 | I = (i_min, i_max) 410 | J = (j_min, j_max) 411 | E = (e_min, e_max) 412 | self.assertTrue(m <= d_min) # it is the case 3 413 | self.assertEqual(C_case3, C) 414 | self.assertEqual(integral_mini_interval_P_CDFmethod__min_piece(I, J, E), C) 415 | 416 | """ 417 | Function `integral_mini_interval_Pprecision_CDFmethod` 418 | """ 419 | class Test_integral_mini_interval_Pprecision_CDFmethod(unittest.TestCase): 420 | def test_symmetric(self): 421 | """ 422 | Check function `integral_mini_interval_Pprecision_CDFmethod` 423 | in the symmetric case i.e. J is centered on E, 424 | and when I goes from the min of E to the max of E. 425 | 426 | Check in three cases 427 | """ 428 | lists = [dict({'e_min': 0.2655087, 429 | 'j_min': 0.9202326, 430 | 'j_max': 1.187741, 431 | 'e_max': 1.842465}), 432 | dict({'e_min': 0.3721239, 433 | 'j_min': 0.7253212, 434 | 'j_max': 0.9439665, 435 | 'e_max': 1.297164}), 436 | dict({'e_min': 0.5728534, 437 | 'j_min': 0.8431135, 438 | 'j_max': 1.35991, 439 | 'e_max': 1.63017})] 440 | 441 | for my_dict in lists: 442 | e_min = my_dict['e_min'] 443 | j_min = my_dict['j_min'] 444 | j_max = my_dict['j_max'] 445 | e_max = my_dict['e_max'] 446 | 447 | # on the left 448 | i_min_left = e_min 449 | i_max_left = j_min 450 | # on the right 451 | i_min_right = j_max 452 | i_max_right = e_max 453 | m = min(j_min - e_min, e_max - j_max) 454 | M = max(j_min - e_min, e_max - j_max) # same because symmetric 455 | 456 | # Actual test 457 | I_left = (i_min_left, i_max_left) 458 | I_right = (i_min_right, i_max_right) 459 | J = (j_min, j_max) 460 | E = (e_min, e_max) 461 | integral_left = integral_mini_interval_Pprecision_CDFmethod(I_left, J, E) 462 | integral_middle = max(J) - min(J) 463 | integral_right = integral_mini_interval_Pprecision_CDFmethod(I_right, J, E) 464 | m = min(J) - min(E) 465 | M = max(E) - max(J) 466 | DeltaJ = max(J) - min(J) 467 | DeltaE = max(E) - min(E) 468 | self.assertAlmostEqual((1-DeltaJ/DeltaE)*m/2, integral_left) 469 | self.assertAlmostEqual(DeltaJ, integral_middle) 470 | self.assertAlmostEqual((1-DeltaJ/DeltaE)*M/2, integral_right) 471 | # Explanation: 472 | # In case of symmetry the value is 1 for elements in J, 473 | # outside, it goes from (1 - DeltaJ/DeltaE) the closer to J, 474 | # until 0 at min(E) and max(E). 475 | # Since it's symmetric it decreases always linearly, on both side 476 | # It is (1 - DeltaJ/DeltaE) and not 1 as the border of J because 477 | # there is already DeltaJ/DeltaE of the probability took on the interval J 478 | # 479 | # So e.g. on the left, it's a triangle of height (1 - DeltaJ/DeltaE) and length 480 | # m (or M, it's the same since it's symmetric), so the answer. 481 | 482 | def test_almost_point(self): 483 | """ 484 | Check a property of the function 485 | `integral_mini_interval_Pprecision_CDFmethod` in the almost point 486 | case, i.e. J of duration 1e-9 487 | Check in three cases 488 | """ 489 | lists = [dict({'e_min': 0.2655087, 490 | 'j_min': 0.9202326, 491 | 'e_max': 1.842465}), 492 | dict({'e_min': 0.3721239, 493 | 'j_min': 0.7253212, 494 | 'e_max': 1.297164}), 495 | dict({'e_min': 0.5728534, 496 | 'j_min': 0.8431135, 497 | 'e_max': 1.63017})] 498 | 499 | for my_dict in lists: 500 | e_min = my_dict['e_min'] 501 | j_min = my_dict['j_min'] 502 | j_max = j_min + 1e-9 # almost point case 503 | e_max = my_dict['e_max'] 504 | 505 | # on the left 506 | i_min_left = e_min 507 | i_max_left = j_min 508 | # on the right 509 | i_min_right = j_max 510 | i_max_right = e_max 511 | 512 | # Actual test 513 | I_left = (i_min_left, i_max_left) 514 | I_right = (i_min_right, i_max_right) 515 | J = (j_min, j_max) 516 | E = (e_min, e_max) 517 | integral_left = integral_mini_interval_Pprecision_CDFmethod(I_left, J, E) 518 | # integral_middle = max(J) - min(J) 519 | integral_right = integral_mini_interval_Pprecision_CDFmethod(I_right, J, E) 520 | DeltaE = max(E) - min(E) 521 | self.assertAlmostEqual((integral_left + integral_right)/DeltaE, 1/2) 522 | # Explanation: for point anomaly, the mean value should be 1/2 523 | 524 | """ 525 | Function `integral_interval_probaCDF_precision` 526 | """ 527 | class Test_integral_interval_probaCDF_precision(unittest.TestCase): 528 | def test_basics(self): 529 | """ 530 | Some tests *for proba_CDF precision* integral 531 | """ 532 | ## For I close to the border of E, it's close to 0% 533 | # (after taking the mean i.e. dividing by |I|) 534 | E = (-3, 3) 535 | J = (-1, 2.4) 536 | x = -2.5 537 | I1 = (-3, x) 538 | DeltaI1 = max(I1) - min(I1) 539 | self.assertTrue(integral_interval_probaCDF_precision(I1, J, E) / DeltaI1 < 0.05) 540 | 541 | x = -2.8 542 | I2 = (-3, x) 543 | DeltaI2 = max(I2) - min(I2) 544 | self.assertTrue(integral_interval_probaCDF_precision(I2, J, E) / DeltaI2 < integral_interval_probaCDF_precision(I1, J, E) / DeltaI1) 545 | 546 | x = -2.99 547 | I3 = (-3, x) 548 | DeltaI3 = max(I3) - min(I3) 549 | self.assertTrue(integral_interval_probaCDF_precision(I3, J, E) / DeltaI3 < integral_interval_probaCDF_precision(I2, J, E) / DeltaI2) 550 | 551 | def test_closed(self): 552 | """ 553 | proba_CDF precision integral verifies closed form integral when I=E 554 | """ 555 | def closed_form_for_I_equals_to_E_proba_CDF(J, E): 556 | # The total integral (when I is the whole interval E) is given by the sum: 557 | # I = (1-DeltaJ/DeltaE)*m/2 + (1-DeltaJ/DeltaE)*M/2 + DeltaJ 558 | # and M+m = DeltaE - DeltaJ so 559 | # I = (1-DeltaJ/DeltaE)*(DeltaE - DeltaJ)/2 + DeltaJ 560 | # = (DeltaE - DeltaJ - DeltaJ + DeltaJ^2/DeltaE + 2*DeltaJ)/2 (*) 561 | # = (DeltaE + DeltaJ^2/DeltaE)/2 562 | DeltaE = max(E) - min(E) 563 | DeltaJ = max(J) - min(J) 564 | return((DeltaE + DeltaJ**2/DeltaE)/2) 565 | 566 | E = (-3, 3) 567 | J = (-1, 2.4) 568 | I = E 569 | self.assertAlmostEqual(integral_interval_probaCDF_precision(I, J, E), 570 | closed_form_for_I_equals_to_E_proba_CDF(J, E)) 571 | 572 | E = (-10, 3) 573 | J = (0, 2.9) 574 | I = E 575 | self.assertAlmostEqual(integral_interval_probaCDF_precision(I, J, E), 576 | closed_form_for_I_equals_to_E_proba_CDF(J, E)) 577 | 578 | """ 579 | Function `cut_J_based_on_mean_func` 580 | """ 581 | class Test_cut_J_based_on_mean_func(unittest.TestCase): 582 | def test_generic(self): 583 | J = None 584 | e_mean = 1.5 585 | tested = cut_J_based_on_mean_func(J, e_mean) 586 | expected = (None, None) 587 | self.assertEqual(tested, expected) 588 | 589 | J = (2, 3) 590 | e_mean = 1.5 591 | tested = cut_J_based_on_mean_func(J, e_mean) 592 | expected = (None, J) 593 | self.assertEqual(tested, expected) 594 | 595 | J = (0, 1) 596 | e_mean = 1.5 597 | tested = cut_J_based_on_mean_func(J, e_mean) 598 | expected = (J, None) 599 | self.assertEqual(tested, expected) 600 | 601 | J = (0, 5) 602 | e_mean = 1.5 603 | tested = cut_J_based_on_mean_func(J, e_mean) 604 | expected = ((0, 1.5), (1.5, 5)) 605 | self.assertEqual(tested, expected) 606 | 607 | J = (0, 1.5) 608 | e_mean = 1.5 609 | tested = cut_J_based_on_mean_func(J, e_mean) 610 | expected = (J, None) 611 | self.assertEqual(tested, expected) 612 | 613 | J = (1.5, 2) 614 | e_mean = 1.5 615 | tested = cut_J_based_on_mean_func(J, e_mean) 616 | expected = (None, J) 617 | self.assertEqual(tested, expected) 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | """ 645 | Functions `integral_interval_probaCDF_recall` and `integral_mini_interval_Precall_CDFmethod` 646 | """ 647 | class Test_integral_interval_probaCDF_recall(unittest.TestCase): 648 | def test_almost_point(self): 649 | """ 650 | Check a property of the function 651 | `integral_interval_probaCDF_recall` in the almost point 652 | case, i.e. when both I and J are both almost-point anomalies 653 | Check in three cases 654 | """ 655 | size_event = 1e-9 # almost a point anomaly 656 | # J is an interval of length 2*size_event 657 | # I is also an interval of 2*length size_event 658 | # E is a (longer) interval 659 | # The recall of J from I should be 1 when I is close to J, then decrease to 0 when I is closer and closer to E, 660 | # and keep to be 0 outside E. 661 | 662 | ## We take J at the middle of E 663 | E = (1, 3) 664 | J = (2-size_event, 2+size_event) 665 | DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean 666 | 667 | # a. I is at position J, so the recall should be 1 668 | I = (2-size_event, 2+size_event) 669 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 1) 670 | 671 | # b. I is close to J, so the recall should be high 672 | I = (1.98-size_event, 1.98+size_event) 673 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ > 0.95) 674 | 675 | # c. I is at middle between max(E) and min(J), so the recall should be 0.5 676 | I = (1.5-size_event, 1.5+size_event) 677 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5) 678 | # c'. Same for I at the other side 679 | I = (2.5-size_event, 2.5+size_event) 680 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5) 681 | 682 | # d. I is close to the edge of E, the recall should be low 683 | I = (1.01-size_event, 1.01+size_event) 684 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1) 685 | I = (2.99-size_event, 2.99+size_event) 686 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1) 687 | 688 | # e. I is at the edge of E, the recall should be 0 689 | I = (1-size_event, 1+size_event) 690 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0) 691 | I = (3-size_event, 3+size_event) 692 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0) 693 | 694 | # f. I is outside E, the recall should be 0 695 | I = (-4-size_event, -4+size_event) 696 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0) 697 | I = (10-size_event, 10+size_event) 698 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0) 699 | 700 | def test_partially_almost_point(self): 701 | """ 702 | Check the recall probability when J is almost-point anomaly 703 | and I is growing 704 | """ 705 | size_event = 1e-9 # almost a point anomaly 706 | # J is an interval of length 2*size_event 707 | # E is a (longer) interval 708 | # The recall of J from I should be 1 when I is close to J, then decrease to 0 when I is closer and closer to E, 709 | # and keep to be 0 outside E. 710 | 711 | # In the following, the pivot is has in the previous test, so it does not change anything 712 | # to have I not a point anomaly for the recall 713 | 714 | ## We take J at the middle of E 715 | E = (1, 3) 716 | J = (2-size_event, 2+size_event) 717 | DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean 718 | 719 | # J is included in I, so the recall should be 1 720 | I = (1-size_event, 3+size_event) 721 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 1) 722 | 723 | # I is close to J, so the recall should be high 724 | I = (1-size_event, 1.98+size_event) 725 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ > 0.95) 726 | 727 | # c. I is at middle between max(E) and min(J), so the recall should be 0.5 728 | I = (1-size_event, 1.5+size_event) 729 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5) 730 | # c'. Same for I at the other side 731 | I = (2.5-size_event, 3+size_event) 732 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5) 733 | 734 | # d. I is close to the edge of E, the recall should be low 735 | I = (1-size_event, 1.01+size_event) 736 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1) 737 | I = (2.99-size_event, 3+size_event) 738 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1) 739 | 740 | # e. I is at the edge of E, the recall should be 0 741 | I = (0-size_event, 1+size_event) 742 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0) 743 | I = (3-size_event, 5+size_event) 744 | self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0) 745 | 746 | def test_special_cases(self): 747 | """ 748 | Check that when J is as large as E and I is a point-anomaly 749 | at the middle of J, the recall is 0.625, 750 | and when I is at the border, the recall is 0.25 751 | (p = 1 in the article) 752 | """ 753 | size_event = 1e-9 # almost a point anomaly 754 | x_center = 2 755 | delta = 1 756 | E = (x_center - delta, x_center + delta) 757 | J = (x_center - delta, x_center + delta) 758 | DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean 759 | 760 | I = (x_center-size_event, x_center+size_event) 761 | p_recall = integral_interval_probaCDF_recall(I, J, E) / DeltaJ # it's the constant 5/8 when |J|=|E| 762 | self.assertAlmostEqual(p_recall, 0.625) # 5/8 == 0.625 763 | 764 | I = (x_center - delta-size_event, x_center - delta+size_event) 765 | p_recall = integral_interval_probaCDF_recall(I, J, E) / DeltaJ 766 | self.assertAlmostEqual(p_recall, 0.25) 767 | 768 | I = (x_center + delta - size_event, x_center + delta +size_event) 769 | p_recall = integral_interval_probaCDF_recall(I, J, E) / DeltaJ 770 | self.assertAlmostEqual(p_recall, 0.25) 771 | 772 | def test_behavior_when_I_increases(self): 773 | """ 774 | Check that recall is better and better for a prediction I 775 | centered in the middle of J that grows symmetrically 776 | """ 777 | size_event = 1e-9 778 | E = (-5, 5) 779 | J = (-3, 3) 780 | DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean 781 | I = (0-size_event, 0+size_event) 782 | # 0.708 in that case, because more possibility for a random pred to miss the gt event 783 | # compared to the 0.625 constant when |E|=|J| 784 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ > 0.625) 785 | 786 | I2 = (-2, 2) # 0.9666 in that case 787 | self.assertTrue(integral_interval_probaCDF_recall(I2, J, E) / DeltaJ > 0.625) 788 | 789 | I1 = (-1, 1) # 0.8666 in that case 790 | self.assertTrue(integral_interval_probaCDF_recall(I1, J, E) / DeltaJ > 0.625) 791 | 792 | # Better recall for I2 compared to I1 793 | self.assertTrue(integral_interval_probaCDF_recall(I2, J, E) / DeltaJ > integral_interval_probaCDF_recall(I1, J, E) / DeltaJ) 794 | 795 | # Better recall for I29 compared to I2 796 | I29 = (-2.9, 2.9) # 0.999666 797 | self.assertTrue(integral_interval_probaCDF_recall(I29, J, E) / DeltaJ > integral_interval_probaCDF_recall(I2, J, E) / DeltaJ) 798 | 799 | def test_behavior_when_E_increases(self): 800 | """ 801 | Check that recall goes to 1 when |E| increases to the right 802 | without chaning I 803 | """ 804 | size_event = 1e-9 805 | J = (-3, 3) 806 | DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean 807 | I = (10, 10+size_event) 808 | 809 | # |E| is growing to the right until infinity, recall should be better and better 810 | E10 = (-10, 10) 811 | integral_interval_probaCDF_recall(I, J, E10) / DeltaJ # 0 812 | E12 = (-10, 12) 813 | integral_interval_probaCDF_recall(I, J, E12) / DeltaJ # 0.1590909 814 | E18 = (-10, 18) 815 | integral_interval_probaCDF_recall(I, J, E18) / DeltaJ # 0.3392857 816 | E30 = (-10, 30) 817 | integral_interval_probaCDF_recall(I, J, E30) / DeltaJ # 0.5375 818 | E100 = (-10, 100) 819 | integral_interval_probaCDF_recall(I, J, E100) / DeltaJ # 0.8318182 820 | E10000 = (-10, 10000) 821 | integral_interval_probaCDF_recall(I, J, E10000) / DeltaJ # 0.9981518 822 | 823 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E10) / DeltaJ < integral_interval_probaCDF_recall(I, J, E12) / DeltaJ) 824 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E12) / DeltaJ < integral_interval_probaCDF_recall(I, J, E18) / DeltaJ) 825 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E18) / DeltaJ < integral_interval_probaCDF_recall(I, J, E30) / DeltaJ) 826 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E30) / DeltaJ < integral_interval_probaCDF_recall(I, J, E100) / DeltaJ) 827 | self.assertTrue(integral_interval_probaCDF_recall(I, J, E100) / DeltaJ < integral_interval_probaCDF_recall(I, J, E10000) / DeltaJ) 828 | 829 | if __name__ == '__main__': 830 | unittest.main() 831 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import unittest 4 | 5 | import math 6 | from affiliation.metrics import ( 7 | test_events, 8 | pr_from_events) 9 | 10 | """ 11 | Function `test_events` prevents some mistakes as input 12 | """ 13 | class Test_test_events(unittest.TestCase): 14 | def test_generic(self): 15 | with self.assertRaises(TypeError): 16 | events = (1,3) 17 | test_events(events) 18 | with self.assertRaises(TypeError): 19 | events = [[1,3],[4,5]] 20 | test_events(events) 21 | with self.assertRaises(ValueError): 22 | events = [(1,3),(4,5,6)] 23 | test_events(events) 24 | with self.assertRaises(ValueError): 25 | events = [(1,3),(5,4)] 26 | test_events(events) 27 | with self.assertRaises(ValueError): 28 | events = [(4,6),(1,2)] 29 | test_events(events) 30 | with self.assertRaises(ValueError): 31 | events = [(4,6),(6,7)] # borders are not disjoint 32 | test_events(events) 33 | 34 | """ 35 | Function `pr_from_events` 36 | """ 37 | class Test_pr_from_events(unittest.TestCase): 38 | def test_empty(self): 39 | """ 40 | With empty entries for predictions, 41 | the recall should be 0, and the predictions undefined 42 | (corresponding to resp. infinite and undefined distances) 43 | 44 | Note: It is not allowed to have events_gt empty 45 | """ 46 | events_pred = [] 47 | events_gt = [(1,10)] 48 | Trange = (1,10) 49 | results = pr_from_events(events_pred, events_gt, Trange) 50 | self.assertTrue(math.isnan(results['precision'])) 51 | self.assertEqual(results['recall'], 0) 52 | 53 | self.assertEqual(len(results['individual_precision_probabilities']), 1) 54 | self.assertEqual(len(results['individual_recall_probabilities']), 1) 55 | self.assertEqual(len(results['individual_precision_distances']), 1) 56 | self.assertEqual(len(results['individual_recall_distances']), 1) 57 | 58 | self.assertEqual(results['individual_recall_distances'][0], math.inf) 59 | self.assertEqual(results['individual_recall_probabilities'][0], 0) 60 | self.assertTrue(math.isnan(results['individual_precision_distances'][0])) 61 | self.assertTrue(math.isnan(results['individual_precision_probabilities'][0])) 62 | 63 | def test_generic_precision_distance(self): 64 | """ Example 1 for precision distance """ 65 | events_pred = [(1,3), (6,18), (25,26)] 66 | events_gt = [(1,8), (16,17), (25,28), (29,31)] 67 | Trange = (1, 31) 68 | results = pr_from_events(events_pred, events_gt, Trange) 69 | self.assertEqual(results['individual_precision_distances'], 70 | [8/8, 8.5/6, 0/1, math.nan]) 71 | # Explanation 72 | # For first ground truth, we group the elements of the first column 73 | # for all predictions: 74 | # * Prediction 1: [1,3), which is fully inside [1,8) so the distance is 0 75 | # * Prediction 2: pred=[6, 12) vs gt1=[1,8) so distance of 8 76 | # * Prediction 3: not affiliated with gt1 77 | # In total, total distance is 8, for a total interval of 2+6=8, so the mean precision for gt1 is 8/8=1 78 | # 79 | # For second gt: 80 | # * Prediction 1: not affiliated with gt2 81 | # * Prediction 2: pred=[12, 18) vs gt=[16,17) so distance of 8.5 82 | # * Prediction 3: not affiliated with gt2 83 | # In total, total distance is 8.5, for a total interval of 6, so the mean precision for gt1 is 8.5/6 84 | # 85 | # For third gt: 86 | # Only third prediction [25,26) is affiliated to the gt3=[25,28), and distance is 0 over a predicted 87 | # interval of 1, so the distance is 0/1 88 | # 89 | # For last gt: 90 | # No prediction on the affiliated interval, so distance is 0/0 = NaN 91 | 92 | """ Example 2 for precision distance with one gt and one pred only """ 93 | events_pred = [(1,3), (5,10)] 94 | events_gt = [(1,8), (16,17)] 95 | Trange = (1, 31) 96 | results0 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances'] 97 | 98 | events_pred = [(1,3)] 99 | events_gt = [(1,8), (16,17)] 100 | results1 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances'] 101 | 102 | events_pred = [(1,3), (5,10)] 103 | events_gt = [(1,8)] 104 | results2 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances'] 105 | 106 | events_pred = [(2,3)] 107 | events_gt = [(1,8)] 108 | results3 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances'] 109 | 110 | self.assertEqual(results0, [(0+2**2/2)/7, math.nan]) 111 | self.assertEqual(results1, [0, math.nan]) 112 | self.assertEqual(results2, [results0[0]]) # answer is still a list, of length 1 113 | self.assertEqual(results3, [results1[0]]) 114 | 115 | def test_generic_recall_distance(self): 116 | """ Example 1 for recall distance """ 117 | events_pred = [(1,3), (6,18), (25,26)] 118 | events_gt = [(1,8), (16,17), (25,28), (29,31)] 119 | Trange = (1, 31) 120 | results = pr_from_events(events_pred, events_gt, Trange) 121 | self.assertEqual(results['individual_recall_distances'], 122 | [2.25/7, 0/1, 2/3, math.inf]) 123 | # Explanation 124 | # 125 | # For the first gt: 126 | # * Recall regarding prediction 1: gt1@aff_pred1=[1,(3+6)/2), and pred1=[1,3), so the distance is (4.5-3)^2/2 127 | # * Recall regarding prediction 2: gt1@aff_pred2=[(3+6)/2,8), and pred2=[6,18), so the distance is (6-4.5)^2/2 128 | # * gt1 is not affiliated to the other predictions 129 | # In total, total distance is (4.5-3)^2/2+(6-4.5)^2/2 = 2.25 130 | # And the length of gt1 is 7, hence the result. 131 | # 132 | # For the second gt: 133 | # * Recall regarding prediction 2: gt2@aff_pred2=gt2=[16,17), and pred2=[6,18), so the recall distance is 0 134 | # * gt2 is not affiliated to the other predictions 135 | # In total, total distance is 0 136 | # And the length of gt2 is 1, hence the result. 137 | # 138 | # For the third gt: 139 | # * Recall regarding prediction 3: gt3@aff_pred3=gt3=[25,28), and pred3=[25,26), so the recall distance is 2^2/2 140 | # * gt3 is not affiliated to the other predictions 141 | # In total, total distance is 2 142 | # And the length of gt3 is 3, hence the result. 143 | # 144 | # For the last gt, there is no affiliated prediction, so the recall distance 145 | # is infinite. 146 | 147 | """ Example 2 for recall distance with one gt and one pred only """ 148 | events_pred = [(1,3), (5,10)] 149 | events_gt = [(1,8), (16,17)] 150 | Trange = (1, 31) 151 | results0 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances'] 152 | 153 | events_pred = [(1,3)] 154 | events_gt = [(1,8), (16,17)] 155 | results1 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances'] 156 | 157 | events_pred = [(1,3), (5,10)] 158 | events_gt = [(1,8)] 159 | results2 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances'] 160 | 161 | events_pred = [(1,3)] 162 | events_gt = [(1,8)] 163 | results3 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances'] 164 | 165 | self.assertEqual(results0, [(1**2/2+1**2/2)/7, math.inf]) 166 | self.assertEqual(results1, [(5**2/2)/7, math.inf]) 167 | self.assertEqual(results2, [results0[0]]) # answer is still a list, of length 1 168 | self.assertEqual(results3, [results1[0]]) 169 | # Explanation: 170 | # for results0: 171 | # gt1: 4 is the cut of affiliation of gt1 between pred1 and pred2, on both side distance from 0 to 1, for a gt1 of length 7 172 | # gt2: distance is outside the zone of affiliation, hence infinite 173 | # 174 | # for results1: 175 | # gt1: on [3,8), distance from 0 to 5, so 5^2/2, over a total length of gt1 of 7 176 | # gt2: infinite too 177 | # 178 | # for results2: 179 | # gt1: like first part of results0 180 | # 181 | # for results3: 182 | # gt1: like first part of results1 183 | 184 | def test_check_coherence(self): 185 | """ 186 | Check coherence of the results in one example 187 | """ 188 | events_pred = [(1,3), (6,18), (25,26)] 189 | events_gt = [(1,8), (16,17), (25,28), (29,31)] 190 | Trange = (1,40) 191 | results = pr_from_events(events_pred, events_gt, Trange) 192 | 193 | # around the third gt (25,28), only (25,26) is affiliated, with a 194 | # which is fully included, hence a precision probability of 1 195 | self.assertEqual(results['individual_precision_probabilities'][2], 1) 196 | 197 | # around the fourth gt (29,31), there is no prediction 198 | # hence a precision probability which is undefined 199 | # and also gives a recall probability of 0 (and a distance of math.inf) 200 | self.assertTrue(math.isnan(results['individual_precision_probabilities'][3])) 201 | self.assertEqual(results['individual_recall_probabilities'][3], 0) 202 | self.assertEqual(results['individual_recall_distances'][3], math.inf) 203 | 204 | # The second gt (16,17) is fully recalled by (6,18), so the recall is 1 205 | # and the corresponding distance is 0 206 | self.assertEqual(results['individual_recall_probabilities'][1], 1) 207 | self.assertEqual(results['individual_recall_distances'][1], 0) 208 | 209 | def test_paper(self): 210 | """ 211 | Example of the paper 212 | """ 213 | events_gt = [(0, 10*60), (50*60, 70*60), (170*60, 175*60)] 214 | events_pred = [(5*60,6*60), (7*60,10*60), (11*60,12*60), 215 | (40*60, 60*60), (115*60, 130*60), (135*60, 140*60), 216 | (165*60,170*60)] 217 | Trange = (0, 180*60) 218 | results = pr_from_events(events_pred, events_gt, Trange) 219 | self.assertAlmostEqual(results['individual_precision_distances'], 220 | [18, 60*11.5, 60*31.25]) 221 | self.assertAlmostEqual(results['individual_recall_distances'], 222 | [76.5, 60*2.5, 60*2.5]) 223 | self.assertAlmostEqual(results['individual_precision_probabilities'][1], 224 | 0.672222222) 225 | self.assertAlmostEqual(results['individual_recall_probabilities'][1], 226 | 0.944444444) 227 | 228 | if __name__ == '__main__': 229 | unittest.main() 230 | -------------------------------------------------------------------------------- /tests/test_single_ground_truth_event.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import unittest 4 | 5 | import math 6 | from affiliation._single_ground_truth_event import ( 7 | affiliation_precision_distance, 8 | affiliation_recall_distance, 9 | affiliation_precision_proba, 10 | affiliation_recall_proba) 11 | 12 | """ 13 | Function `affiliation_precision_distance` is correct 14 | """ 15 | class Test_affiliation_precision_distance(unittest.TestCase): 16 | def test_generic(self): 17 | J = (1, 8) 18 | self.assertEqual(affiliation_precision_distance([(1,3)], J), 0) 19 | self.assertEqual(affiliation_precision_distance([(1,8)], J), 0) 20 | self.assertEqual(affiliation_precision_distance([(1,9)], J), (1/2)/8) 21 | self.assertEqual(affiliation_precision_distance([(0,9)], J), 1/9) 22 | self.assertEqual(affiliation_precision_distance([(1,2), (3,4)], J), 0) 23 | self.assertEqual(affiliation_precision_distance([(7,9)], J), 1/4) 24 | self.assertEqual(affiliation_precision_distance([(8,9)], J), 1/2) 25 | self.assertEqual(affiliation_precision_distance([(9,10)], J), 3/2) 26 | self.assertEqual(affiliation_precision_distance([(1,2),(9,10)], J), 3/4) 27 | 28 | # previous tests 29 | # with pred=[6, 12) vs gt=[1,8): 0 at first, then int tdt from 0 to 4 (on [8, 12)), which is 8, 30 | # then divided by the length 12-6=6 31 | self.assertEqual(affiliation_precision_distance([(6,12)], (1,8)), 8/6) 32 | # with pred=[12, 18) vs gt=[16,17): 0 at the middle, on the left (4^2)/2, on the right (1^2)/2, sum is 8.5 33 | # then divided by the length 6 34 | self.assertEqual(affiliation_precision_distance([(12,18)], (16,17)), 8.5/6) 35 | 36 | def test_empty(self): 37 | """ 38 | With empty or None entries, return undefined (represented with math.nan) 39 | """ 40 | J = (1, 8) 41 | self.assertTrue(math.isnan(affiliation_precision_distance([], J))) 42 | self.assertTrue(math.isnan(affiliation_precision_distance([None, None], J))) 43 | 44 | def test_paper(self): 45 | """ 46 | Example of the paper 47 | """ 48 | J = (0, 10*60) 49 | Is = [(5*60,6*60), (7*60,10*60), (11*60,12*60)] 50 | self.assertEqual(affiliation_precision_distance(Is, J), 18) 51 | 52 | J = (50*60, 70*60) 53 | Is = [(40*60, 60*60), (115*60,120*60)] 54 | self.assertEqual(affiliation_precision_distance(Is, J), 60*11.5) 55 | 56 | J = (170*60, 175*60) 57 | Is = [(120*60, 130*60), (135*60, 140*60), (165*60,170*60)] 58 | self.assertEqual(affiliation_precision_distance(Is, J), 60*31.25) 59 | 60 | """ 61 | Function `affiliation_recall_distance` is correct 62 | """ 63 | class Test_affiliation_recall_distance(unittest.TestCase): 64 | def test_generic(self): 65 | J = (1, 8) 66 | self.assertEqual(affiliation_recall_distance([(1,3)], J), 0*(2/7) + 2.5*(5/7)) 67 | self.assertEqual(affiliation_recall_distance([(1,8)], J), 0) 68 | self.assertEqual(affiliation_recall_distance([(1,9)], J), 0) 69 | self.assertEqual(affiliation_recall_distance([(0,9)], J), 0) 70 | 71 | def test_empty(self): 72 | """ 73 | With empty or None entries, return +inf (recall is always defined) 74 | but here there is no prediction in the zone, meaning that the recall 75 | is bad 76 | """ 77 | J = (1, 8) 78 | self.assertEqual(affiliation_recall_distance([], J), math.inf) 79 | self.assertEqual(affiliation_recall_distance([None, None], J), math.inf) 80 | 81 | def test_paper(self): 82 | """ 83 | Example of the paper 84 | """ 85 | J = (0, 10*60) 86 | Is = [(5*60,6*60), (7*60,10*60), (11*60,12*60)] 87 | self.assertEqual(affiliation_recall_distance(Is, J), 76.5) 88 | 89 | J = (50*60, 70*60) 90 | Is = [(40*60, 60*60), (115*60,120*60)] 91 | self.assertEqual(affiliation_recall_distance(Is, J), 60*2.5) 92 | 93 | J = (170*60, 175*60) 94 | Is = [(120*60, 130*60), (135*60, 140*60), (165*60,170*60)] 95 | self.assertEqual(affiliation_recall_distance(Is, J), 60*2.5) 96 | 97 | """ 98 | Function `affiliation_precision_proba` 99 | """ 100 | class Test_affiliation_precision_proba(unittest.TestCase): 101 | def test_empty(self): 102 | """ 103 | With empty or None entries, return undefined (represented with math.nan) 104 | """ 105 | J = (1, 8) 106 | E = J 107 | self.assertTrue(math.isnan(affiliation_precision_proba([], J, E))) 108 | self.assertTrue(math.isnan(affiliation_precision_proba([None, None], J, E))) 109 | 110 | def test_paper(self): 111 | """ 112 | Example of the paper 113 | """ 114 | J = (50*60, 70*60) 115 | Is = [(40*60, 60*60), (115*60,120*60)] 116 | E = (30*60, 120*60) 117 | self.assertAlmostEqual(affiliation_precision_proba(Is, J, E), 0.672222222) 118 | 119 | """ 120 | Function `affiliation_recall_proba` 121 | """ 122 | class Test_affiliation_recall_proba(unittest.TestCase): 123 | def test_empty(self): 124 | """ 125 | With empty or None entries, return 0 126 | """ 127 | J = (1, 8) 128 | E = J 129 | self.assertEqual(affiliation_recall_proba([], J, E), 0) 130 | self.assertEqual(affiliation_recall_proba([None, None], J, E), 0) 131 | 132 | def test_paper(self): 133 | """ 134 | Example of the paper 135 | """ 136 | J = (50*60, 70*60) 137 | Is = [(40*60, 60*60), (115*60,120*60)] 138 | E = (30*60, 120*60) 139 | self.assertAlmostEqual(affiliation_recall_proba(Is, J, E), 0.944444444) 140 | 141 | """ 142 | Misc 143 | """ 144 | class Test_single_ground_truth_event_misc(unittest.TestCase): 145 | def test_generic(self): 146 | """ 147 | Check accordance with previous values 148 | """ 149 | E = (1, 90+1) 150 | Is = [(11,30+1),(86,90+1)] 151 | J = (21, 40+1) 152 | self.assertAlmostEqual(affiliation_recall_distance(Is, J), 2.5) 153 | self.assertAlmostEqual(affiliation_precision_distance(Is, J), 11.5) 154 | self.assertAlmostEqual(affiliation_recall_proba(Is, J, E), 0.944444444) 155 | self.assertAlmostEqual(affiliation_precision_proba(Is, J, E), 0.672222222) 156 | 157 | if __name__ == '__main__': 158 | unittest.main() 159 | --------------------------------------------------------------------------------