├── .gitignore
├── LICENSE
├── README.md
├── affiliation
    ├── __init__.py
    ├── _affiliation_zone.py
    ├── _integral_interval.py
    ├── _single_ground_truth_event.py
    ├── generics.py
    └── metrics.py
├── data
    ├── machinetemp_adversary.gz
    ├── machinetemp_greenhouse.gz
    ├── machinetemp_groundtruth.gz
    ├── machinetemp_lstmad.gz
    ├── machinetemp_luminol.gz
    ├── machinetemp_trivial.gz
    ├── nyctaxi_adversary.gz
    ├── nyctaxi_greenhouse.gz
    ├── nyctaxi_groundtruth.gz
    ├── nyctaxi_lstmad.gz
    ├── nyctaxi_luminol.gz
    ├── nyctaxi_trivial.gz
    ├── swat_adversary.gz
    ├── swat_groundtruth.gz
    ├── swat_iforest.gz
    ├── swat_ocsvm.gz
    ├── swat_seq2seq.gz
    ├── swat_trivial.gz
    ├── twitteraapl_adversary.gz
    ├── twitteraapl_greenhouse.gz
    ├── twitteraapl_groundtruth.gz
    ├── twitteraapl_lstmad.gz
    ├── twitteraapl_luminol.gz
    └── twitteraapl_trivial.gz
├── setup.py
└── tests
    ├── __init__.py
    ├── test_affiliation_zone.py
    ├── test_data.py
    ├── test_generics.py
    ├── test_integral_interval.py
    ├── test_metrics.py
    └── test_single_ground_truth_event.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Alexis Huet and others
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # affiliation-metrics-py
 2 | 
 3 | Python 3 implementation of the affiliation metrics and tests for reproducing the experiments described in *Local Evaluation of Time Series Anomaly Detection Algorithms*, accepted in KDD 2022 Research Track: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining.
 4 | 
 5 | ### Installation
 6 | 
 7 | Type `pip install .` to install the *affiliation*
 8 | package. Only the [standard Python library](https://docs.python.org/3/library/index.html) is needed, there is no dependency to external libraries.
 9 | 
10 | ### Usage
11 | 
12 | In a Python session, the following lines give an example for computing 
13 | the affiliation metrics from prediction and ground truth vectors:
14 | 
15 | ```
16 | from affiliation.generics import convert_vector_to_events
17 | from affiliation.metrics import pr_from_events
18 | 
19 | vector_pred = [0, 0, 0, 0, 1, 0, 0, 0, 1, 0]
20 | vector_gt   = [0, 0, 0, 1, 0, 0, 0, 1, 1, 1]
21 | 
22 | events_pred = convert_vector_to_events(vector_pred) # [(4, 5), (8, 9)]
23 | events_gt = convert_vector_to_events(vector_gt)     # [(3, 4), (7, 10)]
24 | Trange = (0, len(vector_pred))
25 | 
26 | pr_from_events(events_pred, events_gt, Trange)
27 | ```
28 | 
29 | which gives as output:
30 | ```
31 |    {'precision': 0.82,
32 |     'recall': 0.84,
33 |     'individual_precision_probabilities': [0.63, 1.0],
34 |     'individual_recall_probabilities': [0.82, 0.87],
35 |     'individual_precision_distances': [0.5, 0.0],
36 |     'individual_recall_distances': [0.5, 0.33]}
37 | ```
38 | 
39 | ### Testing and reproducibility
40 | 
41 | The unit tests can be run by typing:
42 | 
43 | ```
44 |     python -m unittest discover
45 | ```
46 | 
47 | The results from the paper are also tested. 
48 | The specific tests of the results are located at `tests/test_data.py` and tested
49 | against data located in the folder `data/`. 
50 | 


--------------------------------------------------------------------------------
/affiliation/__init__.py:
--------------------------------------------------------------------------------
1 | from affiliation.metrics import pr_from_events
2 | 


--------------------------------------------------------------------------------
/affiliation/_affiliation_zone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | from affiliation._integral_interval import interval_intersection
 4 | 
 5 | def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
 6 |     """
 7 |     Helper for `E_gt_func`
 8 |     
 9 |     :param j: index from 0 to len(Js) (included) on which to get the start
10 |     :param Js: ground truth events, as a list of couples
11 |     :param Trange: range of the series where Js is included
12 |     :return: generalized start such that the middle of t_start and t_stop 
13 |     always gives the affiliation zone
14 |     """
15 |     b = max(Trange)
16 |     n = len(Js)
17 |     if j == n:
18 |         return(2*b - t_stop(n-1, Js, Trange))
19 |     else:
20 |         return(Js[j][0])
21 | 
22 | def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
23 |     """
24 |     Helper for `E_gt_func`
25 |     
26 |     :param j: index from 0 to len(Js) (included) on which to get the stop
27 |     :param Js: ground truth events, as a list of couples
28 |     :param Trange: range of the series where Js is included
29 |     :return: generalized stop such that the middle of t_start and t_stop 
30 |     always gives the affiliation zone
31 |     """
32 |     if j == -1:
33 |         a = min(Trange)
34 |         return(2*a - t_start(0, Js, Trange))
35 |     else:
36 |         return(Js[j][1])
37 | 
38 | def E_gt_func(j, Js, Trange):
39 |     """
40 |     Get the affiliation zone of element j of the ground truth
41 |     
42 |     :param j: index from 0 to len(Js) (excluded) on which to get the zone
43 |     :param Js: ground truth events, as a list of couples
44 |     :param Trange: range of the series where Js is included, can 
45 |     be (-math.inf, math.inf) for distance measures
46 |     :return: affiliation zone of element j of the ground truth represented
47 |     as a couple
48 |     """
49 |     range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2
50 |     range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2
51 |     return((range_left, range_right))
52 | 
53 | def get_all_E_gt_func(Js, Trange):
54 |     """
55 |     Get the affiliation partition from the ground truth point of view
56 |     
57 |     :param Js: ground truth events, as a list of couples
58 |     :param Trange: range of the series where Js is included, can 
59 |     be (-math.inf, math.inf) for distance measures
60 |     :return: affiliation partition of the events
61 |     """
62 |     # E_gt is the limit of affiliation/attraction for each ground truth event
63 |     E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
64 |     return(E_gt)
65 | 
66 | def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]):
67 |     """
68 |     Cut the events into the affiliation zones
69 |     The presentation given here is from the ground truth point of view,
70 |     but it is also used in the reversed direction in the main function.
71 |     
72 |     :param Is: events as a list of couples
73 |     :param E_gt: range of the affiliation zones
74 |     :return: a list of list of intervals (each interval represented by either 
75 |     a couple or None for empty interval). The outer list is indexed by each
76 |     affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`.
77 |     """
78 |     out = [None] * len(E_gt)
79 |     for j in range(len(E_gt)):
80 |         E_gt_j = E_gt[j]
81 |         discarded_idx_before = [I[1] < E_gt_j[0] for I in Is]  # end point of predicted I is before the begin of E
82 |         discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
83 |         kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
84 |         Is_j = [x for x, y in zip(Is, kept_index)]
85 |         out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
86 |     return(out)
87 | 


--------------------------------------------------------------------------------
/affiliation/_integral_interval.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import math
  4 | from affiliation.generics import _sum_wo_nan
  5 | """
  6 | In order to shorten the length of the variables,
  7 | the general convention in this file is to let:
  8 |     - I for a predicted event (start, stop),
  9 |     - Is for a list of predicted events,
 10 |     - J for a ground truth event,
 11 |     - Js for a list of ground truth events.
 12 | """
 13 | 
 14 | def interval_length(J = (1,2)):
 15 |     """
 16 |     Length of an interval
 17 |     
 18 |     :param J: couple representating the start and stop of an interval, or None
 19 |     :return: length of the interval, and 0 for a None interval
 20 |     """
 21 |     if J is None:
 22 |         return(0)
 23 |     return(J[1] - J[0])
 24 | 
 25 | def sum_interval_lengths(Is = [(1,2),(3,4),(5,6)]):
 26 |     """
 27 |     Sum of length of the intervals
 28 |     
 29 |     :param Is: list of intervals represented by starts and stops
 30 |     :return: sum of the interval length
 31 |     """
 32 |     return(sum([interval_length(I) for I in Is]))
 33 | 
 34 | def interval_intersection(I = (1, 3), J = (2, 4)): 
 35 |     """
 36 |     Intersection between two intervals I and J
 37 |     I and J should be either empty or represent a positive interval (no point)
 38 |     
 39 |     :param I: an interval represented by start and stop
 40 |     :param J: a second interval of the same form
 41 |     :return: an interval representing the start and stop of the intersection (or None if empty)
 42 |     """
 43 |     if I is None:
 44 |         return(None)
 45 |     if J is None:
 46 |         return(None)
 47 |         
 48 |     I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
 49 |     if I_inter_J[0] >= I_inter_J[1]:
 50 |         return(None)
 51 |     else:
 52 |         return(I_inter_J)
 53 | 
 54 | def interval_subset(I = (1, 3), J = (0, 6)):
 55 |     """
 56 |     Checks whether I is a subset of J
 57 |     
 58 |     :param I: an non empty interval represented by start and stop
 59 |     :param J: a second non empty interval of the same form
 60 |     :return: True if I is a subset of J
 61 |     """
 62 |     if (I[0] >= J[0]) and (I[1] <= J[1]):
 63 |         return True
 64 |     else:
 65 |         return False
 66 | 
 67 | def cut_into_three_func(I, J):
 68 |     """
 69 |     Cut an interval I into a partition of 3 subsets:
 70 |         the elements before J,
 71 |         the elements belonging to J,
 72 |         and the elements after J
 73 |     
 74 |     :param I: an interval represented by start and stop, or None for an empty one
 75 |     :param J: a non empty interval
 76 |     :return: a triplet of three intervals, each represented by either (start, stop) or None
 77 |     """
 78 |     if I is None:
 79 |         return((None, None, None))
 80 |     
 81 |     I_inter_J = interval_intersection(I, J)
 82 |     if I == I_inter_J:
 83 |         I_before = None
 84 |         I_after = None
 85 |     elif I[1] <= J[0]:
 86 |         I_before = I
 87 |         I_after = None
 88 |     elif I[0] >= J[1]:
 89 |         I_before = None
 90 |         I_after = I
 91 |     elif (I[0] <= J[0]) and (I[1] >= J[1]):
 92 |         I_before = (I[0], I_inter_J[0])
 93 |         I_after = (I_inter_J[1], I[1])
 94 |     elif I[0] <= J[0]:
 95 |         I_before = (I[0], I_inter_J[0])
 96 |         I_after = None
 97 |     elif I[1] >= J[1]:
 98 |         I_before = None
 99 |         I_after = (I_inter_J[1], I[1])
100 |     else:
101 |         raise ValueError('unexpected unconsidered case')
102 |     return(I_before, I_inter_J, I_after)
103 |   
104 | def get_pivot_j(I, J):
105 |     """
106 |     Get the single point of J that is the closest to I, called 'pivot' here,
107 |     with the requirement that I should be outside J
108 |     
109 |     :param I: a non empty interval (start, stop)
110 |     :param J: another non empty interval, with empty intersection with I
111 |     :return: the element j of J that is the closest to I
112 |     """
113 |     if interval_intersection(I, J) is not None:
114 |         raise ValueError('I and J should have a void intersection')
115 | 
116 |     j_pivot = None # j_pivot is a border of J
117 |     if max(I) <= min(J):
118 |         j_pivot = min(J)
119 |     elif min(I) >= max(J):
120 |         j_pivot = max(J)
121 |     else:
122 |         raise ValueError('I should be outside J')
123 |     return(j_pivot)
124 | 
125 | def integral_mini_interval(I, J):
126 |     """
127 |     In the specific case where interval I is located outside J,
128 |     integral of distance from x to J over the interval x \in I.
129 |     This is the *integral* i.e. the sum.
130 |     It's not the mean (not divided by the length of I yet)
131 |     
132 |     :param I: a interval (start, stop), or None
133 |     :param J: a non empty interval, with empty intersection with I
134 |     :return: the integral of distances d(x, J) over x \in I
135 |     """
136 |     if I is None:
137 |         return(0)
138 | 
139 |     j_pivot = get_pivot_j(I, J)
140 |     a = min(I)
141 |     b = max(I)
142 |     return((b-a)*abs((j_pivot - (a+b)/2)))
143 | 
144 | def integral_interval_distance(I, J):
145 |     """
146 |     For any non empty intervals I, J, compute the
147 |     integral of distance from x to J over the interval x \in I.
148 |     This is the *integral* i.e. the sum. 
149 |     It's not the mean (not divided by the length of I yet)
150 |     The interval I can intersect J or not
151 |     
152 |     :param I: a interval (start, stop), or None
153 |     :param J: a non empty interval
154 |     :return: the integral of distances d(x, J) over x \in I
155 |     """
156 |     # I and J are single intervals (not generic sets)
157 |     # I is a predicted interval in the range of affiliation of J
158 |     
159 |     def f(I_cut):
160 |         return(integral_mini_interval(I_cut, J))
161 |     # If I_middle is fully included into J, it is
162 |     # the distance to J is always 0
163 |     def f0(I_middle):
164 |         return(0)
165 | 
166 |     cut_into_three = cut_into_three_func(I, J)
167 |     # Distance for now, not the mean:
168 |     # Distance left: Between cut_into_three[0] and the point min(J)
169 |     d_left = f(cut_into_three[0])
170 |     # Distance middle: Between cut_into_three[1] = I inter J, and J
171 |     d_middle = f0(cut_into_three[1])
172 |     # Distance right: Between cut_into_three[2] and the point max(J)
173 |     d_right = f(cut_into_three[2])
174 |     # It's an integral so summable
175 |     return(d_left + d_middle + d_right)
176 | 
177 | def integral_mini_interval_P_CDFmethod__min_piece(I, J, E):
178 |     """
179 |     Helper of `integral_mini_interval_Pprecision_CDFmethod`
180 |     In the specific case where interval I is located outside J,
181 |     compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with:
182 |     - m the smallest distance from J to E,
183 |     - d_min the smallest distance d(x, J) from x \in I to J
184 |     - d_max the largest distance d(x, J) from x \in I to J
185 |     
186 |     :param I: a single predicted interval, a non empty interval (start, stop)
187 |     :param J: ground truth interval, a non empty interval, with empty intersection with I
188 |     :param E: the affiliation/influence zone for J, represented as a couple (start, stop)
189 |     :return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$
190 |     """
191 |     if interval_intersection(I, J) is not None:
192 |         raise ValueError('I and J should have a void intersection')
193 |     if not interval_subset(J, E):
194 |         raise ValueError('J should be included in E')
195 |     if not interval_subset(I, E):
196 |         raise ValueError('I should be included in E')
197 | 
198 |     e_min = min(E)
199 |     j_min = min(J)
200 |     j_max = max(J)
201 |     e_max = max(E)
202 |     i_min = min(I)
203 |     i_max = max(I)
204 |   
205 |     d_min = max(i_min - j_max, j_min - i_max)
206 |     d_max = max(i_max - j_max, j_min - i_min)
207 |     m = min(j_min - e_min, e_max - j_max)
208 |     A = min(d_max, m)**2 - min(d_min, m)**2
209 |     B = max(d_max, m) - max(d_min, m)
210 |     C = (1/2)*A + m*B
211 |     return(C)
212 | 
213 | def integral_mini_interval_Pprecision_CDFmethod(I, J, E):
214 |     """
215 |     Integral of the probability of distances over the interval I.
216 |     In the specific case where interval I is located outside J,
217 |     compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
218 |     This is the *integral* i.e. the sum (not the mean)
219 |     
220 |     :param I: a single predicted interval, a non empty interval (start, stop)
221 |     :param J: ground truth interval, a non empty interval, with empty intersection with I
222 |     :param E: the affiliation/influence zone for J, represented as a couple (start, stop)
223 |     :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
224 |     """
225 |     integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E)
226 |   
227 |     e_min = min(E)
228 |     j_min = min(J)
229 |     j_max = max(J)
230 |     e_max = max(E)
231 |     i_min = min(I)
232 |     i_max = max(I)
233 |     d_min = max(i_min - j_max, j_min - i_max)
234 |     d_max = max(i_max - j_max, j_min - i_min)
235 |     integral_linear_piece = (1/2)*(d_max**2 - d_min**2)
236 |     integral_remaining_piece = (j_max - j_min)*(i_max - i_min)
237 |     
238 |     DeltaI = i_max - i_min
239 |     DeltaE = e_max - e_min
240 |     
241 |     output = DeltaI - (1/DeltaE)*(integral_min_piece + integral_linear_piece + integral_remaining_piece)
242 |     return(output)
243 | 
244 | def integral_interval_probaCDF_precision(I, J, E):
245 |     """
246 |     Integral of the probability of distances over the interval I.
247 |     Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
248 |     This is the *integral* i.e. the sum (not the mean)
249 |     
250 |     :param I: a single (non empty) predicted interval in the zone of affiliation of J
251 |     :param J: ground truth interval
252 |     :param E: affiliation/influence zone for J
253 |     :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
254 |     """
255 |     # I and J are single intervals (not generic sets)
256 |     def f(I_cut):
257 |         if I_cut is None:
258 |             return(0)
259 |         else:
260 |             return(integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E))
261 |             
262 |     # If I_middle is fully included into J, it is
263 |     # integral of 1 on the interval I_middle, so it's |I_middle|
264 |     def f0(I_middle):
265 |         if I_middle is None:
266 |             return(0)
267 |         else:
268 |             return(max(I_middle) - min(I_middle))
269 |     
270 |     cut_into_three = cut_into_three_func(I, J)
271 |     # Distance for now, not the mean:
272 |     # Distance left: Between cut_into_three[0] and the point min(J)
273 |     d_left = f(cut_into_three[0])
274 |     # Distance middle: Between cut_into_three[1] = I inter J, and J
275 |     d_middle = f0(cut_into_three[1])
276 |     # Distance right: Between cut_into_three[2] and the point max(J)
277 |     d_right = f(cut_into_three[2])
278 |     # It's an integral so summable
279 |     return(d_left + d_middle + d_right)
280 | 
281 | def cut_J_based_on_mean_func(J, e_mean):
282 |     """
283 |     Helper function for the recall.
284 |     Partition J into two intervals: before and after e_mean
285 |     (e_mean represents the center element of E the zone of affiliation)
286 |     
287 |     :param J: ground truth interval
288 |     :param e_mean: a float number (center value of E)
289 |     :return: a couple partitionning J into (J_before, J_after)
290 |     """
291 |     if J is None:
292 |         J_before = None
293 |         J_after = None
294 |     elif e_mean >= max(J):
295 |         J_before = J
296 |         J_after = None
297 |     elif e_mean <= min(J):
298 |         J_before = None
299 |         J_after = J
300 |     else: # e_mean is across J
301 |         J_before = (min(J), e_mean)
302 |         J_after = (e_mean, max(J))
303 |         
304 |     return((J_before, J_after))
305 | 
306 | def integral_mini_interval_Precall_CDFmethod(I, J, E):
307 |     """
308 |     Integral of the probability of distances over the interval J.
309 |     In the specific case where interval J is located outside I,
310 |     compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
311 |     This is the *integral* i.e. the sum (not the mean)
312 |     
313 |     :param I: a single (non empty) predicted interval
314 |     :param J: ground truth (non empty) interval, with empty intersection with I
315 |     :param E: the affiliation/influence zone for J, represented as a couple (start, stop)
316 |     :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
317 |     """
318 |     # The interval J should be located outside I 
319 |     # (so it's either the left piece or the right piece w.r.t I)
320 |     i_pivot = get_pivot_j(J, I)
321 |     e_min = min(E)
322 |     e_max = max(E)
323 |     e_mean = (e_min + e_max) / 2
324 |     
325 |     # If i_pivot is outside E (it's possible), then
326 |     # the distance is worst that any random element within E,
327 |     # so we set the recall to 0
328 |     if i_pivot <= min(E):
329 |         return(0)
330 |     elif i_pivot >= max(E):
331 |         return(0)
332 |     # Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d
333 |     
334 |     cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean)
335 |     J_before = cut_J_based_on_e_mean[0]
336 |     J_after = cut_J_based_on_e_mean[1]
337 |   
338 |     iemin_mean = (e_min + i_pivot)/2
339 |     cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean)
340 |     J_before_closeE = cut_Jbefore_based_on_iemin_mean[0] # before e_mean and closer to e_min than i_pivot ~ J_before_before
341 |     J_before_closeI = cut_Jbefore_based_on_iemin_mean[1] # before e_mean and closer to i_pivot than e_min ~ J_before_after
342 |     
343 |     iemax_mean = (e_max + i_pivot)/2
344 |     cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean)
345 |     J_after_closeI = cut_Jafter_based_on_iemax_mean[0] # after e_mean and closer to i_pivot than e_max ~ J_after_before
346 |     J_after_closeE = cut_Jafter_based_on_iemax_mean[1] # after e_mean and closer to e_max than i_pivot ~ J_after_after
347 |     
348 |     if J_before_closeE is not None:
349 |         j_before_before_min = min(J_before_closeE) # == min(J)
350 |         j_before_before_max = max(J_before_closeE)
351 |     else:
352 |         j_before_before_min = math.nan
353 |         j_before_before_max = math.nan
354 |   
355 |     if J_before_closeI is not None:
356 |         j_before_after_min = min(J_before_closeI) # == j_before_before_max if existing
357 |         j_before_after_max = max(J_before_closeI) # == max(J_before)
358 |     else:
359 |         j_before_after_min = math.nan
360 |         j_before_after_max = math.nan
361 |    
362 |     if J_after_closeI is not None:
363 |         j_after_before_min = min(J_after_closeI) # == min(J_after)
364 |         j_after_before_max = max(J_after_closeI) 
365 |     else:
366 |         j_after_before_min = math.nan
367 |         j_after_before_max = math.nan
368 |     
369 |     if J_after_closeE is not None:
370 |         j_after_after_min = min(J_after_closeE) # == j_after_before_max if existing
371 |         j_after_after_max = max(J_after_closeE) # == max(J)
372 |     else:
373 |         j_after_after_min = math.nan
374 |         j_after_after_max = math.nan
375 |   
376 |     # <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE -->
377 |     # j_bb_min       j_bb_max j_ba_min       j_ba_max j_ab_min      j_ab_max j_aa_min      j_aa_max
378 |     # (with `b` for before and `a` for after in the previous variable names)
379 |     
380 |     #                                          vs e_mean  m = min(t-e_min, e_max-t)  d=|i_pivot-t|   min(d,m)                            \int min(d,m)dt   \int d dt        \int_(min(d,m)+d)dt                                    \int_{t \in J}(min(d,m)+d)dt
381 |     # Case J_before_closeE & i_pivot after J   before     t-e_min                    i_pivot-t       min(i_pivot-t,t-e_min) = t-e_min    t^2/2-e_min*t     i_pivot*t-t^2/2  t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t      (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA)
382 |     # Case J_before_closeI & i_pivot after J   before     t-e_min                    i_pivot-t       min(i_pivot-t,t-e_min) = i_pivot-t  i_pivot*t-t^2/2   i_pivot*t-t^2/2  i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2      2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
383 |     # Case J_after_closeI & i_pivot after J    after      e_max-t                    i_pivot-t       min(i_pivot-t,e_max-t) = i_pivot-t  i_pivot*t-t^2/2   i_pivot*t-t^2/2  i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2      2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
384 |     # Case J_after_closeE & i_pivot after J    after      e_max-t                    i_pivot-t       min(i_pivot-t,e_max-t) = e_max-t    e_max*t-t^2/2     i_pivot*t-t^2/2  e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2  (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2)
385 |     #
386 |     # Case J_before_closeE & i_pivot before J  before     t-e_min                    t-i_pivot       min(t-i_pivot,t-e_min) = t-e_min    t^2/2-e_min*t     t^2/2-i_pivot*t  t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t  tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA)
387 |     # Case J_before_closeI & i_pivot before J  before     t-e_min                    t-i_pivot       min(t-i_pivot,t-e_min) = t-i_pivot  t^2/2-i_pivot*t   t^2/2-i_pivot*t  t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t      tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
388 |     # Case J_after_closeI & i_pivot before J   after      e_max-t                    t-i_pivot       min(t-i_pivot,e_max-t) = t-i_pivot  t^2/2-i_pivot*t   t^2/2-i_pivot*t  t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t      tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
389 |     # Case J_after_closeE & i_pivot before J   after      e_max-t                    t-i_pivot       min(t-i_pivot,e_max-t) = e_max-t    e_max*t-t^2/2     t^2/2-i_pivot*t  e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t      (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA)
390 |     
391 |     if i_pivot >= max(J):
392 |         part1_before_closeE = (i_pivot-e_min)*(j_before_before_max - j_before_before_min) # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min
393 |         part2_before_closeI = 2*i_pivot*(j_before_after_max-j_before_after_min) - (j_before_after_max**2 - j_before_after_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min
394 |         part3_after_closeI = 2*i_pivot*(j_after_before_max-j_after_before_min) - (j_after_before_max**2 - j_after_before_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min  
395 |         part4_after_closeE = (e_max+i_pivot)*(j_after_after_max-j_after_after_min) - (j_after_after_max**2 - j_after_after_min**2) # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min
396 |         out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
397 |     elif i_pivot <= min(J):
398 |         part1_before_closeE = (j_before_before_max**2 - j_before_before_min**2) - (e_min+i_pivot)*(j_before_before_max-j_before_before_min) # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min
399 |         part2_before_closeI = (j_before_after_max**2 - j_before_after_min**2) - 2*i_pivot*(j_before_after_max-j_before_after_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min
400 |         part3_after_closeI = (j_after_before_max**2 - j_after_before_min**2) - 2*i_pivot*(j_after_before_max - j_after_before_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min
401 |         part4_after_closeE = (e_max-i_pivot)*(j_after_after_max - j_after_after_min) # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min
402 |         out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
403 |     else:
404 |         raise ValueError('The i_pivot should be outside J')
405 |     
406 |     out_integral_min_dm_plus_d = _sum_wo_nan(out_parts) # integral on all J, i.e. sum of the disjoint parts
407 | 
408 |     # We have for each point t of J:
409 |     # \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d)
410 |     # Since t is a single-point here, and we are in the case where i_pivot is inside E.
411 |     # The integral is then given by:
412 |     # C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt
413 |     #   = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt
414 |     #   = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt]
415 |     #   = |J| - (1/|E|) * out_integral_min_dm_plus_d    
416 |     DeltaJ = max(J) - min(J)
417 |     DeltaE = max(E) - min(E)
418 |     C = DeltaJ - (1/DeltaE) * out_integral_min_dm_plus_d
419 |     
420 |     return(C)
421 | 
422 | def integral_interval_probaCDF_recall(I, J, E):
423 |     """
424 |     Integral of the probability of distances over the interval J.
425 |     Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
426 |     This is the *integral* i.e. the sum (not the mean)
427 | 
428 |     :param I: a single (non empty) predicted interval
429 |     :param J: ground truth (non empty) interval
430 |     :param E: the affiliation/influence zone for J
431 |     :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
432 |     """
433 |     # I and J are single intervals (not generic sets)
434 |     # E is the outside affiliation interval of J (even for recall!)
435 |     # (in particular J \subset E)
436 |     #
437 |     # J is the portion of the ground truth affiliated to I
438 |     # I is a predicted interval (can be outside E possibly since it's recall)
439 |     def f(J_cut):
440 |         if J_cut is None:
441 |             return(0)
442 |         else:
443 |             return integral_mini_interval_Precall_CDFmethod(I, J_cut, E)
444 | 
445 |     # If J_middle is fully included into I, it is
446 |     # integral of 1 on the interval J_middle, so it's |J_middle|
447 |     def f0(J_middle):
448 |         if J_middle is None:
449 |             return(0)
450 |         else:
451 |             return(max(J_middle) - min(J_middle))
452 |     
453 |     cut_into_three = cut_into_three_func(J, I) # it's J that we cut into 3, depending on the position w.r.t I
454 |     # since we integrate over J this time.
455 |     #
456 |     # Distance for now, not the mean:
457 |     # Distance left: Between cut_into_three[0] and the point min(I)
458 |     d_left = f(cut_into_three[0])
459 |     # Distance middle: Between cut_into_three[1] = J inter I, and I
460 |     d_middle = f0(cut_into_three[1])
461 |     # Distance right: Between cut_into_three[2] and the point max(I)
462 |     d_right = f(cut_into_three[2])
463 |     # It's an integral so summable
464 |     return(d_left + d_middle + d_right)
465 | 


--------------------------------------------------------------------------------
/affiliation/_single_ground_truth_event.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import math
 4 | from affiliation._affiliation_zone import (
 5 |         get_all_E_gt_func, 
 6 |         affiliation_partition)
 7 | from affiliation._integral_interval import (
 8 |         integral_interval_distance,
 9 |         integral_interval_probaCDF_precision, 
10 |         integral_interval_probaCDF_recall, 
11 |         interval_length,
12 |         sum_interval_lengths)
13 | 
14 | def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
15 |     """
16 |     Compute the individual average distance from Is to a single ground truth J
17 |     
18 |     :param Is: list of predicted events within the affiliation zone of J
19 |     :param J: couple representating the start and stop of a ground truth interval
20 |     :return: individual average precision directed distance number
21 |     """
22 |     if all([I is None for I in Is]): # no prediction in the current area
23 |         return(math.nan) # undefined
24 |     return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
25 | 
26 | def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
27 |     """
28 |     Compute the individual precision probability from Is to a single ground truth J
29 |     
30 |     :param Is: list of predicted events within the affiliation zone of J
31 |     :param J: couple representating the start and stop of a ground truth interval
32 |     :param E: couple representing the start and stop of the zone of affiliation of J
33 |     :return: individual precision probability in [0, 1], or math.nan if undefined
34 |     """
35 |     if all([I is None for I in Is]): # no prediction in the current area
36 |         return(math.nan) # undefined
37 |     return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
38 | 
39 | def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
40 |     """
41 |     Compute the individual average distance from a single J to the predictions Is
42 |     
43 |     :param Is: list of predicted events within the affiliation zone of J
44 |     :param J: couple representating the start and stop of a ground truth interval
45 |     :return: individual average recall directed distance number
46 |     """
47 |     Is = [I for I in Is if I is not None] # filter possible None in Is
48 |     if len(Is) == 0: # there is no prediction in the current area
49 |         return(math.inf)
50 |     E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf))  # here from the point of view of the predictions
51 |     Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
52 |     return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
53 | 
54 | def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
55 |     """
56 |     Compute the individual recall probability from a single ground truth J to Is
57 |     
58 |     :param Is: list of predicted events within the affiliation zone of J
59 |     :param J: couple representating the start and stop of a ground truth interval
60 |     :param E: couple representing the start and stop of the zone of affiliation of J
61 |     :return: individual recall probability in [0, 1]
62 |     """
63 |     Is = [I for I in Is if I is not None] # filter possible None in Is
64 |     if len(Is) == 0: # there is no prediction in the current area
65 |         return(0)
66 |     E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
67 |     Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
68 |     return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
69 | 


--------------------------------------------------------------------------------
/affiliation/generics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | from itertools import groupby
  4 | from operator import itemgetter
  5 | import math
  6 | import gzip
  7 | import glob
  8 | import os
  9 | 
 10 | def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]):
 11 |     """
 12 |     Convert a binary vector (indicating 1 for the anomalous instances)
 13 |     to a list of events. The events are considered as durations,
 14 |     i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
 15 |     
 16 |     :param vector: a list of elements belonging to {0, 1}
 17 |     :return: a list of couples, each couple representing the start and stop of
 18 |     each event
 19 |     """
 20 |     positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
 21 |     events = []
 22 |     for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]):
 23 |         cur_cut = list(map(itemgetter(1), g))
 24 |         events.append((cur_cut[0], cur_cut[-1]))
 25 |     
 26 |     # Consistent conversion in case of range anomalies (for indexes):
 27 |     # A positive index i is considered as the interval [i, i+1),
 28 |     # so the last index should be moved by 1
 29 |     events = [(x, y+1) for (x,y) in events]
 30 |         
 31 |     return(events)
 32 | 
 33 | def infer_Trange(events_pred, events_gt):
 34 |     """
 35 |     Given the list of events events_pred and events_gt, get the
 36 |     smallest possible Trange corresponding to the start and stop indexes 
 37 |     of the whole series.
 38 |     Trange will not influence the measure of distances, but will impact the
 39 |     measures of probabilities.
 40 |     
 41 |     :param events_pred: a list of couples corresponding to predicted events
 42 |     :param events_gt: a list of couples corresponding to ground truth events
 43 |     :return: a couple corresponding to the smallest range containing the events
 44 |     """
 45 |     if len(events_gt) == 0:
 46 |         raise ValueError('The gt events should contain at least one event')
 47 |     if len(events_pred) == 0:
 48 |         # empty prediction, base Trange only on events_gt (which is non empty)
 49 |         return(infer_Trange(events_gt, events_gt))
 50 |         
 51 |     min_pred = min([x[0] for x in events_pred])
 52 |     min_gt = min([x[0] for x in events_gt])
 53 |     max_pred = max([x[1] for x in events_pred])
 54 |     max_gt = max([x[1] for x in events_gt])
 55 |     Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
 56 |     return(Trange)
 57 | 
 58 | def has_point_anomalies(events):
 59 |     """
 60 |     Checking whether events contain point anomalies, i.e.
 61 |     events starting and stopping at the same time.
 62 |     
 63 |     :param events: a list of couples corresponding to predicted events
 64 |     :return: True is the events have any point anomalies, False otherwise
 65 |     """
 66 |     if len(events) == 0:
 67 |         return(False)
 68 |     return(min([x[1] - x[0] for x in events]) == 0)
 69 | 
 70 | def _sum_wo_nan(vec):
 71 |     """
 72 |     Sum of elements, ignoring math.isnan ones
 73 |     
 74 |     :param vec: vector of floating numbers
 75 |     :return: sum of the elements, ignoring math.isnan ones
 76 |     """
 77 |     vec_wo_nan = [e for e in vec if not math.isnan(e)]
 78 |     return(sum(vec_wo_nan))
 79 |     
 80 | def _len_wo_nan(vec):
 81 |     """
 82 |     Count of elements, ignoring math.isnan ones
 83 |     
 84 |     :param vec: vector of floating numbers
 85 |     :return: count of the elements, ignoring math.isnan ones
 86 |     """
 87 |     vec_wo_nan = [e for e in vec if not math.isnan(e)]
 88 |     return(len(vec_wo_nan))
 89 | 
 90 | def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'):
 91 |     """
 92 |     Load a file compressed with gz, such that each line of the
 93 |     file is either 0 (representing a normal instance) or 1 (representing)
 94 |     an anomalous instance.
 95 |     :param filename: file path to the gz compressed file
 96 |     :return: list of integers with either 0 or 1
 97 |     """
 98 |     with gzip.open(filename, 'rb') as f:
 99 |         content = f.read().splitlines()
100 |     content = [int(x) for x in content]
101 |     return(content)
102 | 
103 | def read_all_as_events():
104 |     """
105 |     Load the files contained in the folder `data/` and convert
106 |     to events. The length of the series is kept.
107 |     The convention for the file name is: `dataset_algorithm.gz`
108 |     :return: two dictionaries:
109 |         - the first containing the list of events for each dataset and algorithm,
110 |         - the second containing the range of the series for each dataset
111 |     """
112 |     filepaths = glob.glob('data/*.gz')
113 |     datasets = dict()
114 |     Tranges = dict()
115 |     for filepath in filepaths:
116 |         vector = read_gz_data(filepath)
117 |         events = convert_vector_to_events(vector)
118 |         # ad hoc cut for those files
119 |         cut_filepath = (os.path.split(filepath)[1]).split('_')
120 |         data_name = cut_filepath[0]
121 |         algo_name = (cut_filepath[1]).split('.')[0]
122 |         if not data_name in datasets:
123 |             datasets[data_name] = dict()
124 |             Tranges[data_name] = (0, len(vector))
125 |         datasets[data_name][algo_name] = events
126 |     return(datasets, Tranges)
127 | 
128 | def f1_func(p, r):
129 |     """
130 |     Compute the f1 function
131 |     :param p: precision numeric value
132 |     :param r: recall numeric value
133 |     :return: f1 numeric value
134 |     """
135 |     return(2*p*r/(p+r))
136 | 


--------------------------------------------------------------------------------
/affiliation/metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | from affiliation.generics import (
  4 |         infer_Trange,
  5 |         has_point_anomalies, 
  6 |         _len_wo_nan, 
  7 |         _sum_wo_nan,
  8 |         read_all_as_events)
  9 | from affiliation._affiliation_zone import (
 10 |         get_all_E_gt_func, 
 11 |         affiliation_partition)
 12 | from affiliation._single_ground_truth_event import (
 13 |         affiliation_precision_distance,
 14 |         affiliation_recall_distance,
 15 |         affiliation_precision_proba,
 16 |         affiliation_recall_proba)
 17 | 
 18 | def test_events(events):
 19 |     """
 20 |     Verify the validity of the input events
 21 |     :param events: list of events, each represented by a couple (start, stop)
 22 |     :return: None. Raise an error for incorrect formed or non ordered events
 23 |     """
 24 |     if type(events) is not list:
 25 |         raise TypeError('Input `events` should be a list of couples')
 26 |     if not all([type(x) is tuple for x in events]):
 27 |         raise TypeError('Input `events` should be a list of tuples')
 28 |     if not all([len(x) == 2 for x in events]):
 29 |         raise ValueError('Input `events` should be a list of couples (start, stop)')
 30 |     if not all([x[0] <= x[1] for x in events]):
 31 |         raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
 32 |     if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]):
 33 |         raise ValueError('Couples of input `events` should be disjoint and ordered')
 34 | 
 35 | def pr_from_events(events_pred, events_gt, Trange):
 36 |     """
 37 |     Compute the affiliation metrics including the precision/recall in [0,1],
 38 |     along with the individual precision/recall distances and probabilities
 39 |     
 40 |     :param events_pred: list of predicted events, each represented by a couple
 41 |     indicating the start and the stop of the event
 42 |     :param events_gt: list of ground truth events, each represented by a couple
 43 |     indicating the start and the stop of the event
 44 |     :param Trange: range of the series where events_pred and events_gt are included,
 45 |     represented as a couple (start, stop)
 46 |     :return: dictionary with precision, recall, and the individual metrics
 47 |     """
 48 |     # testing the inputs
 49 |     test_events(events_pred)
 50 |     test_events(events_gt)
 51 |     
 52 |     # other tests
 53 |     minimal_Trange = infer_Trange(events_pred, events_gt)
 54 |     if not Trange[0] <= minimal_Trange[0]:
 55 |         raise ValueError('`Trange` should include all the events')
 56 |     if not minimal_Trange[1] <= Trange[1]:
 57 |         raise ValueError('`Trange` should include all the events')
 58 |     
 59 |     if len(events_gt) == 0:
 60 |         raise ValueError('Input `events_gt` should have at least one event')
 61 | 
 62 |     if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
 63 |         raise ValueError('Cannot manage point anomalies currently')
 64 | 
 65 |     if Trange is None:
 66 |         # Set as default, but Trange should be indicated if probabilities are used
 67 |         raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
 68 | 
 69 |     E_gt = get_all_E_gt_func(events_gt, Trange)
 70 |     aff_partition = affiliation_partition(events_pred, E_gt)
 71 | 
 72 |     # Computing precision distance
 73 |     d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
 74 |     
 75 |     # Computing recall distance
 76 |     d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
 77 | 
 78 |     # Computing precision
 79 |     p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
 80 | 
 81 |     # Computing recall
 82 |     p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
 83 | 
 84 |     if _len_wo_nan(p_precision) > 0:
 85 |         p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
 86 |     else:
 87 |         p_precision_average = p_precision[0] # math.nan
 88 |     p_recall_average = sum(p_recall) / len(p_recall)
 89 | 
 90 |     dict_out = dict({'precision': p_precision_average,
 91 |                      'recall': p_recall_average,
 92 |                      'individual_precision_probabilities': p_precision,
 93 |                      'individual_recall_probabilities': p_recall,
 94 |                      'individual_precision_distances': d_precision,
 95 |                      'individual_recall_distances': d_recall})
 96 |     return(dict_out)
 97 | 
 98 | def produce_all_results():
 99 |     """
100 |     Produce the affiliation precision/recall for all files
101 |     contained in the `data` repository
102 |     :return: a dictionary indexed by data names, each containing a dictionary
103 |     indexed by algorithm names, each containing the results of the affiliation
104 |     metrics (precision, recall, individual probabilities and distances)
105 |     """
106 |     datasets, Tranges = read_all_as_events() # read all the events in folder `data`
107 |     results = dict()
108 |     for data_name in datasets.keys():
109 |         results_data = dict()
110 |         for algo_name in datasets[data_name].keys():
111 |             if algo_name != 'groundtruth':
112 |                 results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
113 |                                                          datasets[data_name]['groundtruth'],
114 |                                                          Tranges[data_name])
115 |         results[data_name] = results_data
116 |     return(results)
117 | 


--------------------------------------------------------------------------------
/data/machinetemp_adversary.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_adversary.gz


--------------------------------------------------------------------------------
/data/machinetemp_greenhouse.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_greenhouse.gz


--------------------------------------------------------------------------------
/data/machinetemp_groundtruth.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_groundtruth.gz


--------------------------------------------------------------------------------
/data/machinetemp_lstmad.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_lstmad.gz


--------------------------------------------------------------------------------
/data/machinetemp_luminol.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_luminol.gz


--------------------------------------------------------------------------------
/data/machinetemp_trivial.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/machinetemp_trivial.gz


--------------------------------------------------------------------------------
/data/nyctaxi_adversary.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_adversary.gz


--------------------------------------------------------------------------------
/data/nyctaxi_greenhouse.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_greenhouse.gz


--------------------------------------------------------------------------------
/data/nyctaxi_groundtruth.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_groundtruth.gz


--------------------------------------------------------------------------------
/data/nyctaxi_lstmad.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_lstmad.gz


--------------------------------------------------------------------------------
/data/nyctaxi_luminol.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_luminol.gz


--------------------------------------------------------------------------------
/data/nyctaxi_trivial.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/nyctaxi_trivial.gz


--------------------------------------------------------------------------------
/data/swat_adversary.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_adversary.gz


--------------------------------------------------------------------------------
/data/swat_groundtruth.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_groundtruth.gz


--------------------------------------------------------------------------------
/data/swat_iforest.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_iforest.gz


--------------------------------------------------------------------------------
/data/swat_ocsvm.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_ocsvm.gz


--------------------------------------------------------------------------------
/data/swat_seq2seq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_seq2seq.gz


--------------------------------------------------------------------------------
/data/swat_trivial.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/swat_trivial.gz


--------------------------------------------------------------------------------
/data/twitteraapl_adversary.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_adversary.gz


--------------------------------------------------------------------------------
/data/twitteraapl_greenhouse.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_greenhouse.gz


--------------------------------------------------------------------------------
/data/twitteraapl_groundtruth.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_groundtruth.gz


--------------------------------------------------------------------------------
/data/twitteraapl_lstmad.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_lstmad.gz


--------------------------------------------------------------------------------
/data/twitteraapl_luminol.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_luminol.gz


--------------------------------------------------------------------------------
/data/twitteraapl_trivial.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahstat/affiliation-metrics-py/8d8449858096bbade6a6e70848d05c9cc9b846fe/data/twitteraapl_trivial.gz


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | setup(name='affiliation',
 3 |       version='1.0',
 4 |       description='Compute the affiliation metrics',
 5 |       author='Alexis Huet and others',
 6 |       author_email='alexis.huet@huawei.com',
 7 |       platforms=['any'],
 8 |       license='MIT',
 9 |       url='https://github.com/ahstat/affiliation-metrics-py',
10 |       packages=find_packages(),
11 |       )


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/test_affiliation_zone.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import unittest
  4 | 
  5 | import math
  6 | from affiliation._affiliation_zone import (
  7 |         E_gt_func,
  8 |         get_all_E_gt_func,
  9 |         affiliation_partition)
 10 | 
 11 | """
 12 | Function `E_gt_func` is correct, even for the borders
 13 | """
 14 | class Test_E_gt_func(unittest.TestCase):
 15 |     def test_generic(self):
 16 |         Trange = (1, 30)
 17 |         events_gt = [(3,7), (10,18), (20,21)]
 18 |         j = 0
 19 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[0], min(Trange))
 20 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[1], (10+7)/2)
 21 |         j = 1
 22 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[0], (10+7)/2)
 23 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[1], (18+20)/2)
 24 |         j = 2
 25 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[0], (18+20)/2)
 26 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[1], max(Trange))
 27 |       
 28 |         # Case j = 1
 29 |         Trange = (1, 30)
 30 |         events_gt = [(3,20)]
 31 |         j = 0
 32 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[0], min(Trange))
 33 |         self.assertEqual(E_gt_func(j, events_gt, Trange)[1], max(Trange))
 34 | 
 35 | """
 36 | Function `get_all_E_gt_func` is correct
 37 | """
 38 | class Test_get_all_E_gt_func(unittest.TestCase):
 39 |     def test_generic(self):
 40 |         Trange = (1, 30)
 41 |         events_gt = [(3,7), (10,18), (20,21)]
 42 |         cut_aff2 = get_all_E_gt_func(events_gt, Trange)
 43 |         self.assertEqual(cut_aff2[0], (min(Trange), (10+7)/2))
 44 |         self.assertEqual(cut_aff2[1], ((10+7)/2, (18+20)/2))
 45 |         self.assertEqual(cut_aff2[2], ((18+20)/2, max(Trange)))
 46 | 
 47 | """
 48 | Function `affiliation_partition` is correct
 49 | """
 50 | class Test_affiliation_partition(unittest.TestCase):
 51 |     def test_precision_direction(self):
 52 |         """
 53 |         Test of the function in the 'precision' direction I --> J  in one example
 54 |         """
 55 |         events_pred = [(1,3), (6,18), (25,26)]
 56 |         events_gt = [(1,8), (16,17), (25,28), (29,31)]
 57 |         Trange = (-math.inf, math.inf)
 58 |         E_gt = get_all_E_gt_func(events_gt, Trange)
 59 |         M = affiliation_partition(events_pred, E_gt)
 60 |         
 61 |         # Check of dimension of the lists
 62 |         self.assertEqual(len(M), len(events_gt))
 63 |         self.assertEqual(len(M[0]), len(events_pred))
 64 |         
 65 |         # First element, related to the first affiliation zone (-inf, 12)
 66 |         self.assertEqual(M[0][0], (1, 3)) # zone1, first prediction
 67 |         self.assertEqual(M[0][1], (6, 12)) # zone1, 2nd prediction
 68 |         self.assertEqual(M[0][2], None) # zone1, 3rd prediction
 69 |         
 70 |         # Second element, related to the second affiliation zone (12, 21)
 71 |         self.assertEqual(M[1][0], None) # zone2, first prediction
 72 |         self.assertEqual(M[1][1], (12, 18)) # zone2, 2nd prediction
 73 |         self.assertEqual(M[1][2], None) # zone2, 3rd prediction
 74 | 
 75 |         # Third element, related to the third affiliation zone (25, 28)
 76 |         self.assertEqual(M[2][0], None) # zone3, first prediction
 77 |         self.assertEqual(M[2][1], None) # zone3, 2nd prediction
 78 |         self.assertEqual(M[2][2], (25, 26)) # zone3, 3rd prediction
 79 |         
 80 |         # Fourth element, related to the fourth affiliation zone (29, 31)
 81 |         self.assertEqual(M[3][0], None) # zone4, first prediction
 82 |         self.assertEqual(M[3][1], None) # zone4, 2nd prediction
 83 |         self.assertEqual(M[3][2], None) # zone4, 3rd prediction
 84 | 
 85 |     def test_single_gt_and_pred(self):
 86 |         """
 87 |         Test of shape of the output of the function with only 
 88 |         one prediction and one ground truth intervals
 89 |         """
 90 |         Trange = (-math.inf, math.inf)
 91 |         
 92 |         events_pred = [(1,3), (5,10)]
 93 |         events_gt = [(1,8), (16,17)]
 94 |         E_gt = get_all_E_gt_func(events_gt, Trange)
 95 |         M0 = affiliation_partition(events_pred, E_gt)
 96 |         
 97 |         # One pred, more than one gt
 98 |         events_pred = [(1,3)]
 99 |         events_gt = [(1,8), (16,17)]
100 |         E_gt = get_all_E_gt_func(events_gt, Trange)
101 |         M1 = affiliation_partition(events_pred, E_gt)
102 |         
103 |         # One gt, more than one pred
104 |         events_pred = [(1,3), (5,10)]
105 |         events_gt = [(1,8)]
106 |         E_gt = get_all_E_gt_func(events_gt, Trange)
107 |         M2 = affiliation_partition(events_pred, E_gt)
108 | 
109 |         events_pred = [(2,3)]
110 |         events_gt = [(1,8)]
111 |         E_gt = get_all_E_gt_func(events_gt, Trange)
112 |         M3 = affiliation_partition(events_pred, E_gt)
113 |         
114 |         # Check of dimension of the lists (here like gt)
115 |         self.assertEqual(len(M0), 2)
116 |         self.assertEqual(len(M1), 2)
117 |         self.assertEqual(len(M2), 1)
118 |         self.assertEqual(len(M3), 1)
119 |         # Check of dimension of the lists (here like pred)
120 |         self.assertEqual(len(M0[0]), 2)
121 |         self.assertEqual(len(M1[0]), 1)
122 |         self.assertEqual(len(M2[0]), 2)
123 |         self.assertEqual(len(M3[0]), 1)
124 |         
125 |     def test_zero_pred(self):
126 |         """
127 |         Test of shape of the output of the function with no
128 |         prediction
129 |         """
130 |         Trange = (-math.inf, math.inf)
131 |         
132 |         events_pred = []
133 |         events_gt = [(1,8), (16,17)]
134 |         E_gt = get_all_E_gt_func(events_gt, Trange)
135 |         M = affiliation_partition(events_pred, E_gt)
136 |         
137 |         # Check of dimension of the lists
138 |         self.assertEqual(len(M), len(events_gt))
139 |         self.assertEqual(len(M[0]), len(events_pred)) # here with len(events_pred) == 0
140 | 
141 | if __name__ == '__main__':
142 |     unittest.main()
143 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import unittest
  4 | 
  5 | import glob
  6 | import re
  7 | import math
  8 | from affiliation.generics import (
  9 |         read_gz_data,
 10 |         convert_vector_to_events,
 11 |         f1_func)
 12 | from affiliation.metrics import produce_all_results
 13 | 
 14 | """
 15 | Check reproducibility of the results
 16 | """
 17 | class Test_data(unittest.TestCase):       
 18 |     def test_description_data(self):
 19 |         """
 20 |         Check description of the data sets:
 21 |         - number of instances,
 22 |         - proportion of anomalous instances,
 23 |         - number of events in the ground truth
 24 |         """
 25 |         filepaths = glob.glob('data/*.gz')
 26 |         for filepath in filepaths:
 27 |             vector = read_gz_data(filepath)
 28 |             if re.search('machinetemp_', filepath):
 29 |                 self.assertEqual(len(vector), 17682)
 30 |                 if re.search('_groundtruth', filepath):
 31 |                     self.assertAlmostEqual(sum(vector)/len(vector), 0.0641, places=3)
 32 |                     events = convert_vector_to_events(vector)
 33 |                     self.assertEqual(len(events), 2)
 34 |             if re.search('nyctaxi_', filepath):
 35 |                 self.assertEqual(len(vector), 2307)
 36 |                 if re.search('_groundtruth', filepath):
 37 |                     self.assertAlmostEqual(sum(vector)/len(vector), 0.2691, places=3)
 38 |                     events = convert_vector_to_events(vector)
 39 |                     self.assertEqual(len(events), 3)
 40 |             if re.search('twitteraapl_', filepath):
 41 |                 self.assertEqual(len(vector), 11889)
 42 |                 if re.search('_groundtruth', filepath):
 43 |                     self.assertAlmostEqual(sum(vector)/len(vector), 0.0667, places=3)
 44 |                     events = convert_vector_to_events(vector)
 45 |                     self.assertEqual(len(events), 2)
 46 |             if re.search('swat_', filepath):
 47 |                 self.assertEqual(len(vector), 449919)
 48 |                 if re.search('_groundtruth', filepath):
 49 |                     self.assertAlmostEqual(sum(vector)/len(vector), 0.1214, places=3)
 50 |                     events = convert_vector_to_events(vector)
 51 |                     self.assertEqual(len(events), 35)
 52 | 
 53 |     def test_cells(self):
 54 |         """
 55 |         Check each cell by applying the affiliation metrics
 56 |         """
 57 |         # table from the article to be checked
 58 |         table_in_article = {'machinetemp': {},
 59 |                             'nyctaxi': {},
 60 |                             'twitteraapl': {},
 61 |                             'swat': {}}
 62 |         
 63 |         table_in_article['machinetemp']['trivial']    = '1.00/0.50/0.66'
 64 |         table_in_article['machinetemp']['adversary']  = '0.49/1.00/0.66'
 65 |         table_in_article['machinetemp']['greenhouse'] = '0.71/0.99/0.83'
 66 |         table_in_article['machinetemp']['lstmad']     = '0.50/1.00/0.67'
 67 |         table_in_article['machinetemp']['luminol']    = '0.54/0.99/0.70'
 68 | 
 69 |         table_in_article['nyctaxi']['trivial']        = '1.00/0.30/0.46'
 70 |         table_in_article['nyctaxi']['adversary']      = '0.54/1.00/0.70'
 71 |         table_in_article['nyctaxi']['greenhouse']     = '0.51/0.99/0.67'
 72 |         table_in_article['nyctaxi']['lstmad']         = '0.51/1.00/0.67'
 73 |         table_in_article['nyctaxi']['luminol']        = '0.38/0.79/0.51'
 74 | 
 75 |         table_in_article['twitteraapl']['trivial']    = '1.00/0.49/0.66'
 76 |         table_in_article['twitteraapl']['adversary']  = '0.50/1.00/0.67'
 77 |         table_in_article['twitteraapl']['greenhouse'] = '0.78/0.98/0.87'
 78 |         table_in_article['twitteraapl']['lstmad']     = '0.66/0.99/0.79'
 79 |         table_in_article['twitteraapl']['luminol']    = '0.73/0.98/0.83'
 80 | 
 81 |         table_in_article['swat']['trivial']           = '1.00/0.03/0.06'
 82 |         table_in_article['swat']['adversary']         = '0.53/1.00/0.69'
 83 |         table_in_article['swat']['iforest']           = '0.52/0.84/0.64'
 84 |         table_in_article['swat']['ocsvm']             = '0.65/0.70/0.68'
 85 |         table_in_article['swat']['seq2seq']           = '0.86/0.79/0.83'
 86 | 
 87 |         # checking the table
 88 |         results = produce_all_results() # produce results
 89 |         
 90 |         # Check results related to `best_algos` and `pr_of_best_algo`
 91 |         for data_name in results.keys():
 92 |             for algo_name in results[data_name].keys():
 93 |                 p = results[data_name][algo_name]['precision']
 94 |                 r = results[data_name][algo_name]['recall']
 95 |                 f1 = f1_func(p, r)
 96 |                 # convert to a string with two decimals
 97 |                 p, r, f1 = ['%.2f' % x for x in [p, r, f1]]
 98 |                 cell_obtained_from_results = str(p) + '/' + str(r) + '/' + str(f1)
 99 |                 self.assertEqual(cell_obtained_from_results, table_in_article[data_name][algo_name])
100 | 
101 |     def test_single_events_results(self):
102 |         """
103 |         Check single events results for swat with iforest/seq2seq
104 |         """        
105 |         results = produce_all_results() # produce results
106 |         p_precis = dict()
107 |         p_recall = dict()
108 |         p_f1 = dict()
109 |         for algo_name in ['iforest', 'seq2seq']:
110 |             p_precis_raw = results['swat'][algo_name]['individual_precision_probabilities']
111 |             p_recall_raw = results['swat'][algo_name]['individual_recall_probabilities']
112 |             p_precis[algo_name] = [round(x, 2) for x in p_precis_raw]
113 |             p_recall[algo_name] = [round(x, 2) for x in p_recall_raw]
114 |             p_f1[algo_name] = [round(f1_func(x[0], x[1]), 2) for x in zip(p_precis_raw, p_recall_raw)]
115 | 
116 |         p_out = dict()
117 |         for algo_name in ['iforest', 'seq2seq']:
118 |             p_out[algo_name] = ['%.2f' % x[0] + '/' + '%.2f' % x[1] + '/' + '%.2f' % x[2] for x in zip(p_precis[algo_name], p_recall[algo_name], p_f1[algo_name])]
119 | 
120 |         self.assertEqual(p_out['iforest'][0:6],
121 |                          ['0.37/0.53/0.44',
122 |                           '1.00/0.91/0.95',
123 |                           '0.76/0.99/0.86',
124 |                           'nan/0.00/nan',
125 |                           '0.38/0.60/0.46',
126 |                           '0.09/0.21/0.12'])
127 |         self.assertEqual(p_out['seq2seq'][0:6],
128 |                          ['0.96/1.00/0.98', 
129 |                           '0.86/1.00/0.93', 
130 |                           '0.73/0.78/0.75', 
131 |                           '0.39/0.71/0.50', 
132 |                           '0.71/0.97/0.82', 
133 |                           '0.88/1.00/0.94'])
134 |         
135 |         """
136 |         Check the number of events for which seq2seq is better than iforest
137 |         """
138 |         nb_events = len(p_precis['seq2seq'])
139 |         
140 |         nb_seq2seq_nan_pred = sum([math.isnan(x) for x in p_precis['seq2seq']])
141 |         nb_iforest_nan_pred = sum([math.isnan(x) for x in p_precis['iforest']])
142 |         
143 |         idx_nan1 = [idx for idx, val in enumerate(p_precis['seq2seq']) if math.isnan(val)]
144 |         idx_nan2 = [idx for idx, val in enumerate(p_precis['iforest']) if math.isnan(val)]
145 |         idx_nan = idx_nan1 + idx_nan2
146 |         p_precis_seq2seq_not_nan = [val for idx, val in enumerate(p_precis['seq2seq']) if idx not in idx_nan]
147 |         p_precis_iforest_not_nan = [val for idx, val in enumerate(p_precis['iforest']) if idx not in idx_nan]
148 |         p_recall_seq2seq_not_nan = [val for idx, val in enumerate(p_recall['seq2seq']) if idx not in idx_nan]
149 |         p_recall_iforest_not_nan = [val for idx, val in enumerate(p_recall['iforest']) if idx not in idx_nan]
150 | 
151 |         nb_both_better = sum([(p_seq > p_ifo) and (r_seq > r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)])
152 |         nb_better_precision_only = sum([(p_seq > p_ifo) and (r_seq == r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)])
153 | 
154 |         nb_better_precision_worse_recall = sum([(p_seq > p_ifo) and (r_seq < r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)])
155 |         nb_better_recall_worse_precision = sum([(p_seq < p_ifo) and (r_seq > r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)])
156 |         nb_equivocal = nb_better_precision_worse_recall + nb_better_recall_worse_precision
157 |         
158 |         nb_both_worse = sum([(p_seq < p_ifo) and (r_seq < r_ifo) for p_seq, p_ifo, r_seq, r_ifo in zip(p_precis_seq2seq_not_nan, p_precis_iforest_not_nan, p_recall_seq2seq_not_nan, p_recall_iforest_not_nan)])
159 | 
160 |         # check that number of events is 35
161 |         self.assertEqual(nb_events, 35)
162 |         # check the number of zone without predictions are 6 for seq2seq and 2 for iforest
163 |         self.assertEqual(nb_seq2seq_nan_pred, 6)
164 |         self.assertEqual(nb_iforest_nan_pred, 2)  
165 |         # check that we covered all the possibilities for those specific results
166 |         self.assertEqual(nb_seq2seq_nan_pred + nb_iforest_nan_pred + nb_both_better + nb_better_precision_only + nb_equivocal + nb_both_worse, nb_events)
167 |         # check the number of better elements
168 |         self.assertEqual(nb_both_better + nb_better_precision_only, 21)
169 |         self.assertEqual(nb_both_better, 13)
170 |         self.assertEqual(nb_better_precision_only, 8)
171 |         # check the number of equivocal results
172 |         self.assertEqual(nb_equivocal, 4)
173 |         # check the number of worse results
174 |         self.assertEqual(nb_both_worse, 2)
175 |         
176 | if __name__ == '__main__':
177 |     unittest.main()


--------------------------------------------------------------------------------
/tests/test_generics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import unittest
  4 | 
  5 | import math
  6 | from affiliation.generics import (
  7 |         convert_vector_to_events,
  8 |         infer_Trange,
  9 |         has_point_anomalies,
 10 |         _sum_wo_nan,
 11 |         _len_wo_nan)
 12 | 
 13 | """
 14 | Function `convert_vector_to_events`
 15 | """
 16 | class Test_convert_vector_to_events(unittest.TestCase):
 17 |     def test_empty(self):
 18 |         """
 19 |         Empty vector gives empty events
 20 |         """
 21 |         tested = convert_vector_to_events([])
 22 |         expected = []
 23 |         self.assertEqual(tested, expected)
 24 |         
 25 |     def test_zeros(self):
 26 |         """
 27 |         Zeros vector gives empty events
 28 |         """
 29 |         tested = convert_vector_to_events([0])
 30 |         expected = []
 31 |         self.assertEqual(tested, expected)
 32 |         
 33 |         tested = convert_vector_to_events([0, 0, 0, 0, 0])
 34 |         expected = []
 35 |         self.assertEqual(tested, expected)
 36 | 
 37 |     def test_ones(self):
 38 |         """
 39 |         Ones vector gives single long event
 40 |         """
 41 |         tested = convert_vector_to_events([1])
 42 |         expected = [(0, 1)] # of length of one index by convention
 43 |         self.assertEqual(tested, expected)
 44 |         
 45 |         tested = convert_vector_to_events([1, 1, 1, 1, 1])
 46 |         expected = [(0, 5)]
 47 |         self.assertEqual(tested, expected)
 48 |         
 49 |     def test_border(self):
 50 |         """
 51 |         Test with elements on the border
 52 |         """
 53 |         tested = convert_vector_to_events([1, 1, 0, 0, 0])
 54 |         expected = [(0, 2)]
 55 |         self.assertEqual(tested, expected)
 56 |         
 57 |         tested = convert_vector_to_events([0, 0, 1, 1, 1])
 58 |         expected = [(2, 5)]
 59 |         self.assertEqual(tested, expected)
 60 |         
 61 |         tested = convert_vector_to_events([1, 0, 1, 1, 1])
 62 |         expected = [(0, 1), (2, 5)]
 63 |         self.assertEqual(tested, expected)
 64 |         
 65 |         tested = convert_vector_to_events([1, 1, 0, 1, 0, 1, 1, 1])
 66 |         expected = [(0, 2), (3, 4), (5, 8)]
 67 |         self.assertEqual(tested, expected)
 68 | 
 69 |     def test_generic(self):
 70 |         """
 71 |         Test without elements on the border, generic case
 72 |         """
 73 |         tested = convert_vector_to_events([0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
 74 |                                            1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
 75 |                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 76 |                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 77 |                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 78 |                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 79 |                                            1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
 80 |         expected = [(6, 12), (17, 20), (60, 63)]
 81 |         self.assertEqual(tested, expected)
 82 | 
 83 | """
 84 | Function `infer_Trange`
 85 | """
 86 | class Test_infer_Trange(unittest.TestCase):
 87 |     def test_empty_predictions(self):
 88 |         """
 89 |         Infer Trange with empty predictions
 90 |         """
 91 |         tested = infer_Trange([], [(-1,2), (3,4), (6,20)])
 92 |         expected = (-1, 20)
 93 |         self.assertEqual(tested, expected)
 94 | 
 95 |     def test_empty_groundtruth(self):
 96 |         """
 97 |         Raise error for empty ground truth
 98 |         """
 99 |         with self.assertRaises(ValueError):
100 |             infer_Trange([(-1,2), (3,4), (6,20)], [])
101 | 
102 |     def test_generic(self):
103 |         """
104 |         Infer Trange with generic predictions/groundtruth
105 |         """
106 |         tested = infer_Trange([(-3, 4), (5, 6)], [(-1,2), (3,4), (6,20)])
107 |         expected = (-3, 20)
108 |         self.assertEqual(tested, expected)
109 |         
110 |         tested = infer_Trange([(-3, 4), (5, 6)], [(-1,2), (3,4)])
111 |         expected = (-3, 6)
112 |         self.assertEqual(tested, expected)
113 |         
114 |         tested = infer_Trange([(0, 4), (5, 6)], [(-1,2), (3,4), (6,20)])
115 |         expected = (-1, 20)
116 |         self.assertEqual(tested, expected)
117 |         
118 |         tested = infer_Trange([(-1,2), (3,4), (6,20)], [(-3, 4), (5, 6)])
119 |         expected = (-3, 20)
120 |         self.assertEqual(tested, expected)
121 | 
122 | """
123 | Function `has_point_anomalies`
124 | """
125 | class Test_has_point_anomalies(unittest.TestCase):
126 |     def test_empty_event(self):
127 |         """
128 |         Test with an empty event
129 |         """
130 |         tested = has_point_anomalies([])
131 |         expected = False # no event, so no point anomaly too
132 |         self.assertEqual(tested, expected)
133 | 
134 |     def test_generic(self):
135 |         """
136 |         Check whether contain point anomalies with generic events
137 |         """
138 |         tested = has_point_anomalies([(1, 2)])
139 |         expected = False
140 |         self.assertEqual(tested, expected)
141 |         
142 |         tested = has_point_anomalies([(1, 2), (3, 4), (8, 10)])
143 |         expected = False
144 |         self.assertEqual(tested, expected)
145 |         
146 |         tested = has_point_anomalies([(1, 2), (3, 3), (8, 10)])
147 |         expected = True
148 |         self.assertEqual(tested, expected)
149 |         
150 |         tested = has_point_anomalies([(1, 1), (3, 3), (8, 8)])
151 |         expected = True
152 |         self.assertEqual(tested, expected)
153 |         
154 |         tested = has_point_anomalies([(1, 1)])
155 |         expected = True
156 |         self.assertEqual(tested, expected)
157 | 
158 | """
159 | Functions `_sum_wo_nan` and `_len_wo_nan`
160 | """
161 | class Test_sum_len_wo_nan(unittest.TestCase):
162 |     def test_empty_event(self):
163 |         """
164 |         Test with an empty event
165 |         """
166 |         tested = _sum_wo_nan([])
167 |         expected = 0
168 |         self.assertEqual(tested, expected)
169 |         
170 |         tested = _len_wo_nan([])
171 |         expected = 0
172 |         self.assertEqual(tested, expected)
173 | 
174 |     def test_generic(self):
175 |         """
176 |         Check with either math.nan or not
177 |         """
178 |         vec = [1, 4, 3]
179 |         tested = _sum_wo_nan(vec)
180 |         expected = 8
181 |         self.assertEqual(tested, expected)
182 |         tested = _len_wo_nan(vec)
183 |         expected = 3
184 |         self.assertEqual(tested, expected)
185 |         
186 |         vec = [1, math.nan, 3]
187 |         tested = _sum_wo_nan(vec)
188 |         expected = 4
189 |         self.assertEqual(tested, expected)
190 |         tested = _len_wo_nan(vec)
191 |         expected = 2
192 |         self.assertEqual(tested, expected)
193 |         
194 |         vec = [math.nan, math.nan, 3]
195 |         tested = _sum_wo_nan(vec)
196 |         expected = 3
197 |         self.assertEqual(tested, expected)
198 |         tested = _len_wo_nan(vec)
199 |         expected = 1
200 |         self.assertEqual(tested, expected)
201 |         
202 |         vec = [math.nan, math.nan, math.nan] # like an empty vec after removing math.nan
203 |         tested = _sum_wo_nan(vec)
204 |         expected = 0
205 |         self.assertEqual(tested, expected)
206 |         tested = _len_wo_nan(vec)
207 |         expected = 0
208 |         self.assertEqual(tested, expected)
209 |         
210 | if __name__ == '__main__':
211 |     unittest.main()
212 | 


--------------------------------------------------------------------------------
/tests/test_integral_interval.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import unittest
  4 | 
  5 | from affiliation._integral_interval import (
  6 |         interval_length,
  7 |         sum_interval_lengths,
  8 |         interval_intersection,
  9 |         interval_subset,
 10 |         cut_into_three_func,
 11 |         get_pivot_j,
 12 |         integral_mini_interval,
 13 |         integral_interval_distance,
 14 |         integral_mini_interval_P_CDFmethod__min_piece,
 15 |         integral_mini_interval_Pprecision_CDFmethod,
 16 |         integral_interval_probaCDF_precision,
 17 |         cut_J_based_on_mean_func,
 18 |         integral_interval_probaCDF_recall)
 19 | 
 20 | """
 21 | Function `interval_length`
 22 | """
 23 | class Test_interval_length(unittest.TestCase):
 24 |     def test_empty(self):
 25 |         """
 26 |         None interval gives 0 length
 27 |         """
 28 |         tested = interval_length(None)
 29 |         expected = 0
 30 |         self.assertEqual(tested, expected)
 31 |         
 32 |     def test_generic(self):
 33 |         """
 34 |         Test correct length of the interval
 35 |         """
 36 |         tested = interval_length((1, 2))
 37 |         expected = 1
 38 |         self.assertEqual(tested, expected)
 39 |         
 40 |         tested = interval_length((-1, 3.5))
 41 |         expected = 4.5
 42 |         self.assertEqual(tested, expected)
 43 | 
 44 | """
 45 | Function `sum_interval_lengths`
 46 | """
 47 | class Test_sum_interval_lengths(unittest.TestCase):
 48 |     def test_empty(self):
 49 |         """
 50 |         Empty event gives 0 length
 51 |         """
 52 |         tested = sum_interval_lengths([])
 53 |         expected = 0
 54 |         self.assertEqual(tested, expected)
 55 |         
 56 |     def test_generic(self):
 57 |         """
 58 |         Test correct sum of length of the intervals
 59 |         """
 60 |         tested = sum_interval_lengths([(1, 2)])
 61 |         expected = 1
 62 |         self.assertEqual(tested, expected)
 63 |         
 64 |         tested = sum_interval_lengths([(1, 2), (3.5, 4)])
 65 |         expected = 1+0.5
 66 |         self.assertEqual(tested, expected)
 67 | 
 68 | """
 69 | Function `interval_intersection`
 70 | """
 71 | class Test_interval_intersection(unittest.TestCase):
 72 |     def test_empty(self):
 73 |         """
 74 |         None when one or more interval is None
 75 |         """
 76 |         tested = interval_intersection(None, None)
 77 |         expected = None
 78 |         self.assertEqual(tested, expected)
 79 |         
 80 |         tested = interval_intersection(None, (1,2))
 81 |         expected = None
 82 |         self.assertEqual(tested, expected)
 83 |         
 84 |         tested = interval_intersection((1,2), None)
 85 |         expected = None
 86 |         self.assertEqual(tested, expected)
 87 |         
 88 |     def test_generic(self):
 89 |         """
 90 |         Test correct intersection of the intervals
 91 |         """
 92 |         tested = interval_intersection((1, 2), (1, 2))
 93 |         expected = (1, 2)
 94 |         self.assertEqual(tested, expected)
 95 |         
 96 |         tested = interval_intersection((1, 2), (2, 3))
 97 |         expected = None # because intersection or [1, 2) and [2, 3)
 98 |         self.assertEqual(tested, expected)
 99 |         
100 |         tested = interval_intersection((1, 2), (3, 4))
101 |         expected = None
102 |         self.assertEqual(tested, expected)
103 |         
104 |         tested = interval_intersection((1, 3), (2, 4))
105 |         expected = (2, 3)
106 |         self.assertEqual(tested, expected)
107 |         
108 |         tested = interval_intersection((2, 4), (1, 3))
109 |         expected = (2, 3)
110 |         self.assertEqual(tested, expected)
111 |         
112 |         tested = interval_intersection((-1, 5), (1, 3))
113 |         expected = (1, 3)
114 |         self.assertEqual(tested, expected)
115 |                 
116 |         tested = interval_intersection((1, 3), (-1, 5))
117 |         expected = (1, 3)
118 |         self.assertEqual(tested, expected)
119 |         
120 |         tested = interval_intersection((1, 10), (0, 5))
121 |         expected = (1, 5)
122 |         self.assertEqual(tested, expected)
123 | 
124 | """
125 | Function `interval_subset`
126 | """
127 | class Test_interval_subset(unittest.TestCase):
128 |     def test_empty(self):
129 |         """
130 |         Error for empty interval
131 |         """
132 |         with self.assertRaises(TypeError):
133 |             interval_subset(None, None)
134 |         
135 |         with self.assertRaises(TypeError):
136 |             interval_subset(None, (1,2))
137 |             
138 |         with self.assertRaises(TypeError):
139 |             interval_subset((1,2), None)
140 |         
141 |     def test_generic(self):
142 |         """
143 |         Test correct check of subset
144 |         """
145 |         tested = interval_subset((1, 2), (1, 2))
146 |         expected = True
147 |         self.assertEqual(tested, expected)
148 |         
149 |         tested = interval_subset((1, 2), (1, 3))
150 |         expected = True
151 |         self.assertEqual(tested, expected)
152 | 
153 |         tested = interval_subset((1, 2), (0, 3))
154 |         expected = True
155 |         self.assertEqual(tested, expected)
156 |         
157 |         tested = interval_subset((1, 3), (2, 3))
158 |         expected = False
159 |         self.assertEqual(tested, expected)
160 | 
161 |         tested = interval_subset((1, 3), (-1, 2))
162 |         expected = False
163 |         self.assertEqual(tested, expected)
164 |         
165 |         tested = interval_subset((1, 3), (-1, 0))
166 |         expected = False
167 |         self.assertEqual(tested, expected)
168 |         
169 | """
170 | Function `cut_into_three_func`
171 | """
172 | class Test_cut_into_three_func(unittest.TestCase):
173 |     def test_examples(self):
174 |         """
175 |         Example 1
176 |         """
177 |         I = (0, 1.5)
178 |         J = (1, 2)
179 |         tested = cut_into_three_func(I, J)
180 |         self.assertEqual(len(tested), 3)
181 |         self.assertEqual(tested[0], (0, 1))
182 |         self.assertEqual(tested[1], (1, 1.5))
183 |         self.assertEqual(tested[2], None)
184 |         
185 |         """
186 |         Example 2 with elements both before and after
187 |         """
188 |         I = (-1, 10)
189 |         J = (1.4, 2.4)
190 |         tested = cut_into_three_func(I, J)
191 |         self.assertEqual(len(tested), 3)
192 |         self.assertEqual(tested[0], (-1, 1.4))
193 |         self.assertEqual(tested[1], (1.4, 2.4))
194 |         self.assertEqual(tested[2], (2.4, 10))
195 |                
196 |         """
197 |         Example 3 with only elements before
198 |         """
199 |         I = (-1, 1)
200 |         J = (1.4, 2.4)
201 |         tested = cut_into_three_func(I, J)
202 |         self.assertEqual(len(tested), 3)
203 |         self.assertEqual(tested[0], (-1, 1))
204 |         self.assertEqual(tested[1], None)
205 |         self.assertEqual(tested[2], None)
206 |         
207 |         """
208 |         Example 4 with only elements at middle
209 |         """
210 |         I = (1.6, 2)
211 |         J = (1.4, 2.4)
212 |         tested = cut_into_three_func(I, J)
213 |         self.assertEqual(len(tested), 3)
214 |         self.assertEqual(tested[0], None)
215 |         self.assertEqual(tested[1], (1.6, 2))
216 |         self.assertEqual(tested[2], None)
217 | 
218 |         """
219 |         Example 5 with only elements after
220 |         """
221 |         I = (4, 5)
222 |         J = (1.4, 2.4)
223 |         tested = cut_into_three_func(I, J)
224 |         self.assertEqual(len(tested), 3)
225 |         self.assertEqual(tested[0], None)
226 |         self.assertEqual(tested[1], None)
227 |         self.assertEqual(tested[2], (4,5))
228 | 
229 | """
230 | Function `get_pivot_j`
231 | """
232 | class Test_get_pivot_j(unittest.TestCase):
233 |     def test_examples(self):
234 |         """
235 |         Examples
236 |         """
237 |         I = (4, 5)
238 |         J = (1.4, 2.4)
239 |         self.assertEqual(get_pivot_j(I, J), 2.4) # max(J)
240 |         
241 |         I = (0, 1)
242 |         J = (1.4, 2.4)
243 |         self.assertEqual(get_pivot_j(I, J), 1.4) # min(J)
244 |         
245 |         I = (0, 1.5)
246 |         J = (1.4, 2.4)
247 |         with self.assertRaises(ValueError):
248 |             # intersection I inter J is not void
249 |             get_pivot_j(I, J)
250 | 
251 | """
252 | Function `integral_mini_interval`
253 | """
254 | class Test_integral_mini_interval(unittest.TestCase):
255 |     def test_examples(self):
256 |         """
257 |         Examples
258 |         """
259 |         I = (4, 5)
260 |         J = (1.4, 2.4)
261 |         # We look at sum distance between every element of [4,5] to 2.4 the closest element of J
262 |         # Distance is going from 4-2.4 to 5-2.4 i.e. 1.6 to 2.6, and increases linearly
263 |         # There is 1.6 with a time duration of 1, and in addition the triangle 1/2 (integral from 0 to 1 of tdt)
264 |         # Globally 1.6+1/2
265 |         self.assertEqual(integral_mini_interval(I, J), 1.6 + 1/2)
266 |         
267 |         I = (0.1, 1.2)
268 |         J = (1.4, 2.4)
269 |         # We look at sum distance between every element of [0.1,1.2] to 1.4 the closest element of J
270 |         # Distance is going from 1.3 to 0.2 and decreases linearly
271 |         # There is 0.2 with a time duration of deltaI=1.1, and in addition
272 |         # a decreases from 1.1 to 0 during 1.1 (integral from 0 to 1.1 of tdt) which is 1.1^2/2
273 |         # Globally 0.2*1.1+1.1^2/2
274 |         self.assertAlmostEqual(integral_mini_interval(I, J), 0.2*1.1+1.1**2/2)
275 |         
276 |         I = (0, 1.5)
277 |         J = (1.4, 2.4)
278 |         with self.assertRaises(ValueError):
279 |             # intersection I inter J is not void
280 |             integral_mini_interval(I, J)
281 | 
282 | """
283 | Function `integral_interval`
284 | """
285 | class Test_integral_interval(unittest.TestCase):
286 |     def test_examples(self):
287 |         """
288 |         Function integral_interval *for distance* verifies some tests
289 |         """
290 |         ## For I included in J, it's 0
291 |         I = (0, 1.5)
292 |         J = (-1, 2.4)
293 |         self.assertEqual(integral_interval_distance(I, J), 0)
294 |         
295 |         J = (-1, 2.4)
296 |         I = J
297 |         self.assertEqual(integral_interval_distance(I, J), 0)
298 |         
299 |         ## The integral is same from I or I\J
300 |         I = (-10, 1.5)
301 |         J = (-1, 2.4)
302 |         I_minus_J = (-10, -1) # I \ J
303 |         self.assertEqual(integral_interval_distance(I, J),
304 |                          integral_interval_distance(I_minus_J, J))
305 |         
306 |         # previous test
307 |         I = (-10, 20)
308 |         J = (-1, 2.4)
309 |         self.assertEqual(integral_interval_distance(I, J),
310 |                          195.38)
311 | 
312 | """
313 | Function `integral_mini_interval_P_CDFmethod__min_piece`
314 | """
315 | class Test_integral_mini_interval_P_CDFmethod__min_piece(unittest.TestCase):
316 |     def test_examples(self):
317 |         """
318 |         Check this component of `integral_mini_interval_Pprecision_CDFmethod`
319 |         in three cases, by recomputing the formulas
320 |         
321 |         It is three cases with I totally outside J
322 |         I = (i_min, i_max)
323 |         J = (j_min, j_max)
324 |         E = (e_min, e_max)
325 |         """        
326 |         # Closed-form for $C = \int_{d_min}^{d_max} \min(m, x) dx$
327 |         # Case 1: $d_max <= m$:
328 |         # C = \int_{d_min}^{d_max} x dx = (1/2)*(d_max^2 - d_min^2)
329 |         # Case 2: $d_min < m < d_max$:
330 |         # C = \int_{d_min}^{m} x dx + \int_{m}^{d_max} m dx
331 |         #   = (1/2)*(m^2 - d_min^2) + m (d_max - m)
332 |         # Case 3: $m <= d_min$:
333 |         # C = \int_{d_min}^{d_max} m dx = m (d_max - d_min)
334 |         #
335 |         # For the combinated of all three cases, we see first that:
336 |         # A = min(d_max,m)^2 - min(d_min, m)^2 is: d_max^2 - d_min^2 (case1); m^2 - d_min^2 (case2); 0           (case3)
337 |         # and then that:
338 |         # B = max(d_max, m) - max(d_min, m)    is: 0 (case1);                 d_max - m (case2);     d_max-d_min (case3)
339 |         # so that:
340 |         # C = (1/2)*A + m*B
341 |         # It is checked below, for each case C1, C2, C3:
342 | 
343 |         """ Case 1 """
344 |         e_min = 0.7176185
345 |         j_min = 1.570739
346 |         j_max = 1.903998
347 |         e_max = 2.722883
348 |         i_min = 0.924204
349 |         i_max = 1.376826
350 |         d_min = max(i_min - j_max, j_min - i_max) # 0.1939125
351 |         d_max = max(i_max - j_max, j_min - i_min) # 0.6465346
352 |         m = min(j_min - e_min, e_max - j_max) # 0.8188856
353 |         C_case1 = (1/2)*(d_max - d_min)*(d_max + d_min) # 0.1902024
354 |         C_case2 = (1/2)*(m**2 - d_min**2) + m * (d_max - m) # 0.17535
355 |         C_case3 = (d_max - d_min)*m # 0.3706457
356 |         A = min(d_max, m)**2 - min(d_min, m)**2 # 0.3804049
357 |         B = max(d_max, m) - max(d_min, m) # 0
358 |         C = (1/2)*A + m*B # 0.1902024
359 |         # Actual test
360 |         I = (i_min, i_max)
361 |         J = (j_min, j_max)
362 |         E = (e_min, e_max)
363 |         self.assertTrue(d_max <= m) # it is the case 1
364 |         self.assertEqual(C_case1, C)
365 |         self.assertEqual(integral_mini_interval_P_CDFmethod__min_piece(I, J, E), C)
366 |   
367 |         """ Case 2 """
368 |         e_min = 0.3253522
369 |         j_min = 0.5569796
370 |         j_max = 0.8238064
371 |         e_max = 1.403741
372 |         i_min = 0.8751017
373 |         i_max = 1.116294
374 |         d_min = max(i_min - j_max, j_min - i_max) # 0.05129532
375 |         d_max = max(i_max - j_max, j_min - i_min) # 0.2924877
376 |         m = min(j_min - e_min, e_max - j_max) # 0.2316275
377 |         C_case1 = (1/2)*(d_max - d_min)*(d_max + d_min) # 0.04145893
378 |         C_case2 = (1/2)*(m**2 - d_min**2) + m * (d_max - m) # 0.03960695
379 |         C_case3 = (d_max - d_min)*m # 0.05586679
380 |         A = min(d_max, m)**2 - min(d_min, m)**2 # 0.05102008
381 |         B = max(d_max, m) - max(d_min, m) # 0.06086027
382 |         C = (1/2)*A + m*B # 0.03960695
383 |         # Actual test
384 |         I = (i_min, i_max)
385 |         J = (j_min, j_max)
386 |         E = (e_min, e_max)
387 |         self.assertTrue(d_min < m) # it is the case 2
388 |         self.assertTrue(m < d_max) # it is the case 2
389 |         self.assertEqual(C_case2, C)
390 |         self.assertEqual(integral_mini_interval_P_CDFmethod__min_piece(I, J, E), C)
391 | 
392 |         """ Case 3 """
393 |         e_min = 0.6516738
394 |         j_min = 1.523338
395 |         j_max = 1.958426
396 |         e_max = 2.435003
397 |         i_min = 0.767282
398 |         i_max = 0.7753016
399 |         d_min = max(i_min - j_max, j_min - i_max) # 0.7480365
400 |         d_max = max(i_max - j_max, j_min - i_min) # 0.7560561
401 |         m = min(j_min - e_min, e_max - j_max) # 0.4765765
402 |         C_case1 = (1/2)*(d_max - d_min)*(d_max + d_min) # 0.006031113
403 |         C_case2 = (1/2)*(m**2 - d_min**2) + m * (d_max - m) # -0.03302331
404 |         C_case3 = (d_max - d_min)*m # 0.003821954
405 |         A = min(d_max, m)**2 - min(d_min, m)**2 # 0
406 |         B = max(d_max, m) - max(d_min, m) # 0.008019604
407 |         C = (1/2)*A + m*B # 0.003821954
408 |         # Actual test
409 |         I = (i_min, i_max)
410 |         J = (j_min, j_max)
411 |         E = (e_min, e_max)
412 |         self.assertTrue(m <= d_min) # it is the case 3
413 |         self.assertEqual(C_case3, C)
414 |         self.assertEqual(integral_mini_interval_P_CDFmethod__min_piece(I, J, E), C)
415 | 
416 | """
417 | Function `integral_mini_interval_Pprecision_CDFmethod`
418 | """
419 | class Test_integral_mini_interval_Pprecision_CDFmethod(unittest.TestCase):
420 |     def test_symmetric(self):
421 |         """
422 |         Check function `integral_mini_interval_Pprecision_CDFmethod`
423 |         in the symmetric case i.e. J is centered on E,
424 |         and when I goes from the min of E to the max of E.
425 |         
426 |         Check in three cases
427 |         """    
428 |         lists = [dict({'e_min': 0.2655087,
429 |                         'j_min': 0.9202326,
430 |                         'j_max': 1.187741,
431 |                         'e_max': 1.842465}),
432 |                  dict({'e_min': 0.3721239,
433 |                         'j_min': 0.7253212,
434 |                         'j_max': 0.9439665,
435 |                         'e_max': 1.297164}),
436 |                  dict({'e_min': 0.5728534,
437 |                         'j_min': 0.8431135,
438 |                         'j_max': 1.35991,
439 |                         'e_max': 1.63017})]
440 | 
441 |         for my_dict in lists:
442 |             e_min = my_dict['e_min']
443 |             j_min = my_dict['j_min']
444 |             j_max = my_dict['j_max']
445 |             e_max = my_dict['e_max']
446 |             
447 |             # on the left
448 |             i_min_left = e_min
449 |             i_max_left = j_min
450 |             # on the right
451 |             i_min_right = j_max
452 |             i_max_right = e_max        
453 |             m = min(j_min - e_min, e_max - j_max)
454 |             M = max(j_min - e_min, e_max - j_max) # same because symmetric
455 |     
456 |             # Actual test
457 |             I_left = (i_min_left, i_max_left)
458 |             I_right = (i_min_right, i_max_right)
459 |             J = (j_min, j_max)
460 |             E = (e_min, e_max)
461 |             integral_left = integral_mini_interval_Pprecision_CDFmethod(I_left, J, E)
462 |             integral_middle = max(J) - min(J)
463 |             integral_right = integral_mini_interval_Pprecision_CDFmethod(I_right, J, E)
464 |             m = min(J) - min(E)
465 |             M = max(E) - max(J)
466 |             DeltaJ = max(J) - min(J)
467 |             DeltaE = max(E) - min(E)
468 |             self.assertAlmostEqual((1-DeltaJ/DeltaE)*m/2, integral_left)
469 |             self.assertAlmostEqual(DeltaJ, integral_middle)
470 |             self.assertAlmostEqual((1-DeltaJ/DeltaE)*M/2, integral_right)
471 |             # Explanation:
472 |             # In case of symmetry the value is 1 for elements in J,
473 |             # outside, it goes from (1 - DeltaJ/DeltaE) the closer to J,
474 |             # until 0 at min(E) and max(E).
475 |             # Since it's symmetric it decreases always linearly, on both side
476 |             # It is (1 - DeltaJ/DeltaE) and not 1 as the border of J because
477 |             # there is already DeltaJ/DeltaE of the probability took on the interval J
478 |             #
479 |             # So e.g. on the left, it's a triangle of height (1 - DeltaJ/DeltaE) and length
480 |             # m (or M, it's the same since it's symmetric), so the answer.
481 |         
482 |     def test_almost_point(self):
483 |         """
484 |         Check a property of the function 
485 |         `integral_mini_interval_Pprecision_CDFmethod` in the almost point 
486 |         case, i.e. J of duration 1e-9
487 |         Check in three cases
488 |         """    
489 |         lists = [dict({'e_min': 0.2655087,
490 |                         'j_min': 0.9202326,
491 |                         'e_max': 1.842465}),
492 |                  dict({'e_min': 0.3721239,
493 |                         'j_min': 0.7253212,
494 |                         'e_max': 1.297164}),
495 |                  dict({'e_min': 0.5728534,
496 |                         'j_min': 0.8431135,
497 |                         'e_max': 1.63017})]
498 | 
499 |         for my_dict in lists:
500 |             e_min = my_dict['e_min']
501 |             j_min = my_dict['j_min']
502 |             j_max = j_min + 1e-9 # almost point case
503 |             e_max = my_dict['e_max']
504 |             
505 |             # on the left
506 |             i_min_left = e_min
507 |             i_max_left = j_min
508 |             # on the right
509 |             i_min_right = j_max
510 |             i_max_right = e_max        
511 | 
512 |             # Actual test
513 |             I_left = (i_min_left, i_max_left)
514 |             I_right = (i_min_right, i_max_right)
515 |             J = (j_min, j_max)
516 |             E = (e_min, e_max)
517 |             integral_left = integral_mini_interval_Pprecision_CDFmethod(I_left, J, E)
518 |             # integral_middle = max(J) - min(J)
519 |             integral_right = integral_mini_interval_Pprecision_CDFmethod(I_right, J, E)
520 |             DeltaE = max(E) - min(E)
521 |             self.assertAlmostEqual((integral_left + integral_right)/DeltaE, 1/2)
522 |             # Explanation: for point anomaly, the mean value should be 1/2
523 | 
524 | """
525 | Function `integral_interval_probaCDF_precision`
526 | """
527 | class Test_integral_interval_probaCDF_precision(unittest.TestCase):
528 |     def test_basics(self):
529 |         """
530 |         Some tests *for proba_CDF precision* integral
531 |         """
532 |         ## For I close to the border of E, it's close to 0%
533 |         # (after taking the mean i.e. dividing by |I|)
534 |         E = (-3, 3)
535 |         J = (-1, 2.4)
536 |         x = -2.5
537 |         I1 = (-3, x)
538 |         DeltaI1 = max(I1) - min(I1)
539 |         self.assertTrue(integral_interval_probaCDF_precision(I1, J, E) / DeltaI1 < 0.05)
540 | 
541 |         x = -2.8
542 |         I2 = (-3, x)
543 |         DeltaI2 = max(I2) - min(I2)
544 |         self.assertTrue(integral_interval_probaCDF_precision(I2, J, E) / DeltaI2 < integral_interval_probaCDF_precision(I1, J, E) / DeltaI1)
545 | 
546 |         x = -2.99
547 |         I3 = (-3, x)
548 |         DeltaI3 = max(I3) - min(I3)
549 |         self.assertTrue(integral_interval_probaCDF_precision(I3, J, E) / DeltaI3 < integral_interval_probaCDF_precision(I2, J, E) / DeltaI2)
550 |   
551 |     def test_closed(self):
552 |         """
553 |         proba_CDF precision integral verifies closed form integral when I=E
554 |         """
555 |         def closed_form_for_I_equals_to_E_proba_CDF(J, E):
556 |             # The total integral (when I is the whole interval E) is given by the sum:
557 |             # I = (1-DeltaJ/DeltaE)*m/2 + (1-DeltaJ/DeltaE)*M/2 + DeltaJ
558 |             # and M+m = DeltaE - DeltaJ so
559 |             # I = (1-DeltaJ/DeltaE)*(DeltaE - DeltaJ)/2 + DeltaJ
560 |             #   = (DeltaE - DeltaJ - DeltaJ + DeltaJ^2/DeltaE + 2*DeltaJ)/2         (*)
561 |             #   = (DeltaE + DeltaJ^2/DeltaE)/2
562 |             DeltaE = max(E) - min(E)
563 |             DeltaJ = max(J) - min(J)
564 |             return((DeltaE + DeltaJ**2/DeltaE)/2)
565 | 
566 |         E = (-3, 3)
567 |         J = (-1, 2.4)
568 |         I = E
569 |         self.assertAlmostEqual(integral_interval_probaCDF_precision(I, J, E),
570 |                                closed_form_for_I_equals_to_E_proba_CDF(J, E))
571 | 
572 |         E = (-10, 3)
573 |         J = (0, 2.9)
574 |         I = E
575 |         self.assertAlmostEqual(integral_interval_probaCDF_precision(I, J, E),
576 |                                closed_form_for_I_equals_to_E_proba_CDF(J, E))
577 | 
578 | """
579 | Function `cut_J_based_on_mean_func`
580 | """
581 | class Test_cut_J_based_on_mean_func(unittest.TestCase):
582 |     def test_generic(self):
583 |         J = None
584 |         e_mean = 1.5
585 |         tested = cut_J_based_on_mean_func(J, e_mean)
586 |         expected = (None, None)
587 |         self.assertEqual(tested, expected)
588 |         
589 |         J = (2, 3)
590 |         e_mean = 1.5
591 |         tested = cut_J_based_on_mean_func(J, e_mean)
592 |         expected = (None, J)
593 |         self.assertEqual(tested, expected)
594 |         
595 |         J = (0, 1)
596 |         e_mean = 1.5
597 |         tested = cut_J_based_on_mean_func(J, e_mean)
598 |         expected = (J, None)
599 |         self.assertEqual(tested, expected)
600 |         
601 |         J = (0, 5)
602 |         e_mean = 1.5
603 |         tested = cut_J_based_on_mean_func(J, e_mean)
604 |         expected = ((0, 1.5), (1.5, 5))
605 |         self.assertEqual(tested, expected)
606 |         
607 |         J = (0, 1.5)
608 |         e_mean = 1.5
609 |         tested = cut_J_based_on_mean_func(J, e_mean)
610 |         expected = (J, None)
611 |         self.assertEqual(tested, expected)
612 |         
613 |         J = (1.5, 2)
614 |         e_mean = 1.5
615 |         tested = cut_J_based_on_mean_func(J, e_mean)
616 |         expected = (None, J)
617 |         self.assertEqual(tested, expected)
618 | 
619 | 
620 | 
621 | 
622 | 
623 | 
624 | 
625 | 
626 | 
627 | 
628 | 
629 | 
630 | 
631 | 
632 | 
633 | 
634 | 
635 | 
636 | 
637 | 
638 | 
639 | 
640 | 
641 | 
642 | 
643 | 
644 | """
645 | Functions `integral_interval_probaCDF_recall` and `integral_mini_interval_Precall_CDFmethod`
646 | """
647 | class Test_integral_interval_probaCDF_recall(unittest.TestCase):
648 |     def test_almost_point(self):
649 |         """
650 |         Check a property of the function 
651 |         `integral_interval_probaCDF_recall` in the almost point 
652 |         case, i.e. when both I and J are both almost-point anomalies
653 |         Check in three cases
654 |         """    
655 |         size_event = 1e-9 # almost a point anomaly
656 |         # J is an interval of length 2*size_event
657 |         # I is also an interval of 2*length size_event
658 |         # E is a (longer) interval
659 |         # The recall of J from I should be 1 when I is close to J, then decrease to 0 when I is closer and closer to E,
660 |         # and keep to be 0 outside E.
661 |         
662 |         ## We take J at the middle of E
663 |         E = (1, 3)
664 |         J = (2-size_event, 2+size_event)
665 |         DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean
666 |       
667 |         # a. I is at position J, so the recall should be 1
668 |         I = (2-size_event, 2+size_event)
669 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 1)
670 |       
671 |         # b. I is close to J, so the recall should be high
672 |         I = (1.98-size_event, 1.98+size_event)
673 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ > 0.95)
674 |       
675 |         # c. I is at middle between max(E) and min(J), so the recall should be 0.5
676 |         I = (1.5-size_event, 1.5+size_event)
677 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5)
678 |         # c'. Same for I at the other side
679 |         I = (2.5-size_event, 2.5+size_event)
680 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5)
681 |       
682 |         # d. I is close to the edge of E, the recall should be low
683 |         I = (1.01-size_event, 1.01+size_event)
684 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1)
685 |         I = (2.99-size_event, 2.99+size_event)
686 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1)
687 |       
688 |         # e. I is at the edge of E, the recall should be 0
689 |         I = (1-size_event, 1+size_event)
690 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0)
691 |         I = (3-size_event, 3+size_event)
692 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0)
693 |       
694 |         # f. I is outside E, the recall should be 0
695 |         I = (-4-size_event, -4+size_event)
696 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0)
697 |         I = (10-size_event, 10+size_event)
698 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0)
699 |       
700 |     def test_partially_almost_point(self):
701 |         """
702 |         Check the recall probability when J is almost-point anomaly 
703 |         and I is growing
704 |         """    
705 |         size_event = 1e-9 # almost a point anomaly
706 |         # J is an interval of length 2*size_event
707 |         # E is a (longer) interval
708 |         # The recall of J from I should be 1 when I is close to J, then decrease to 0 when I is closer and closer to E,
709 |         # and keep to be 0 outside E.
710 |       
711 |         # In the following, the pivot is has in the previous test, so it does not change anything
712 |         # to have I not a point anomaly for the recall
713 |       
714 |         ## We take J at the middle of E
715 |         E = (1, 3)
716 |         J = (2-size_event, 2+size_event)
717 |         DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean
718 |       
719 |         # J is included in I, so the recall should be 1
720 |         I = (1-size_event, 3+size_event)
721 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 1)
722 |       
723 |         #  I is close to J, so the recall should be high
724 |         I = (1-size_event, 1.98+size_event)
725 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ > 0.95)
726 |       
727 |         # c. I is at middle between max(E) and min(J), so the recall should be 0.5
728 |         I = (1-size_event, 1.5+size_event)
729 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5)
730 |         # c'. Same for I at the other side
731 |         I = (2.5-size_event, 3+size_event)
732 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0.5)
733 |       
734 |         # d. I is close to the edge of E, the recall should be low
735 |         I = (1-size_event, 1.01+size_event)
736 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1)
737 |         I = (2.99-size_event, 3+size_event)
738 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ < 0.1)
739 |       
740 |         # e. I is at the edge of E, the recall should be 0
741 |         I = (0-size_event, 1+size_event)
742 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0)
743 |         I = (3-size_event, 5+size_event)
744 |         self.assertAlmostEqual(integral_interval_probaCDF_recall(I, J, E) / DeltaJ, 0)
745 |       
746 |     def test_special_cases(self):
747 |         """
748 |         Check that when J is as large as E and I is a point-anomaly 
749 |         at the middle of J, the recall is 0.625, 
750 |         and when I is at the border, the recall is 0.25
751 |         (p = 1 in the article)
752 |         """    
753 |         size_event = 1e-9 # almost a point anomaly
754 |         x_center = 2
755 |         delta = 1
756 |         E = (x_center - delta, x_center + delta)
757 |         J = (x_center - delta, x_center + delta)
758 |         DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean
759 |       
760 |         I = (x_center-size_event, x_center+size_event)
761 |         p_recall = integral_interval_probaCDF_recall(I, J, E) / DeltaJ # it's the constant 5/8 when |J|=|E|
762 |         self.assertAlmostEqual(p_recall, 0.625) # 5/8 == 0.625
763 |       
764 |         I = (x_center - delta-size_event, x_center - delta+size_event)
765 |         p_recall = integral_interval_probaCDF_recall(I, J, E) / DeltaJ
766 |         self.assertAlmostEqual(p_recall, 0.25)
767 |       
768 |         I = (x_center + delta - size_event, x_center + delta +size_event)
769 |         p_recall = integral_interval_probaCDF_recall(I, J, E) / DeltaJ
770 |         self.assertAlmostEqual(p_recall, 0.25)
771 | 
772 |     def test_behavior_when_I_increases(self):
773 |         """
774 |         Check that recall is better and better for a prediction I 
775 |         centered in the middle of J that grows symmetrically
776 |         """    
777 |         size_event = 1e-9
778 |         E = (-5, 5)
779 |         J = (-3, 3)
780 |         DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean
781 |         I = (0-size_event, 0+size_event)
782 |         # 0.708 in that case, because more possibility for a random pred to miss the gt event
783 |         # compared to the 0.625 constant when |E|=|J|
784 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E) / DeltaJ > 0.625)
785 |       
786 |         I2 = (-2, 2) # 0.9666 in that case
787 |         self.assertTrue(integral_interval_probaCDF_recall(I2, J, E) / DeltaJ > 0.625)
788 |       
789 |         I1 = (-1, 1) # 0.8666 in that case
790 |         self.assertTrue(integral_interval_probaCDF_recall(I1, J, E) / DeltaJ > 0.625)
791 |       
792 |         # Better recall for I2 compared to I1
793 |         self.assertTrue(integral_interval_probaCDF_recall(I2, J, E) / DeltaJ > integral_interval_probaCDF_recall(I1, J, E) / DeltaJ)
794 |       
795 |         # Better recall for I29 compared to I2
796 |         I29 = (-2.9, 2.9) # 0.999666
797 |         self.assertTrue(integral_interval_probaCDF_recall(I29, J, E) / DeltaJ > integral_interval_probaCDF_recall(I2, J, E) / DeltaJ)
798 |         
799 |     def test_behavior_when_E_increases(self):
800 |         """
801 |         Check that recall goes to 1 when |E| increases to the right 
802 |         without chaning I
803 |         """    
804 |         size_event = 1e-9
805 |         J = (-3, 3)
806 |         DeltaJ = max(J) - min(J) # divide by J the size to obtain the mean
807 |         I = (10, 10+size_event)
808 |       
809 |         # |E| is growing to the right until infinity, recall should be better and better
810 |         E10 = (-10, 10)
811 |         integral_interval_probaCDF_recall(I, J, E10) / DeltaJ # 0
812 |         E12 = (-10, 12)
813 |         integral_interval_probaCDF_recall(I, J, E12) / DeltaJ # 0.1590909
814 |         E18 = (-10, 18)
815 |         integral_interval_probaCDF_recall(I, J, E18) / DeltaJ # 0.3392857
816 |         E30 = (-10, 30)
817 |         integral_interval_probaCDF_recall(I, J, E30) / DeltaJ # 0.5375
818 |         E100 = (-10, 100)
819 |         integral_interval_probaCDF_recall(I, J, E100) / DeltaJ # 0.8318182
820 |         E10000 = (-10, 10000)
821 |         integral_interval_probaCDF_recall(I, J, E10000) / DeltaJ # 0.9981518
822 |       
823 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E10) / DeltaJ < integral_interval_probaCDF_recall(I, J, E12) / DeltaJ)
824 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E12) / DeltaJ < integral_interval_probaCDF_recall(I, J, E18) / DeltaJ)
825 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E18) / DeltaJ < integral_interval_probaCDF_recall(I, J, E30) / DeltaJ)
826 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E30) / DeltaJ < integral_interval_probaCDF_recall(I, J, E100) / DeltaJ)
827 |         self.assertTrue(integral_interval_probaCDF_recall(I, J, E100) / DeltaJ < integral_interval_probaCDF_recall(I, J, E10000) / DeltaJ)
828 | 
829 | if __name__ == '__main__':
830 |     unittest.main()
831 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import unittest
  4 | 
  5 | import math
  6 | from affiliation.metrics import (
  7 |         test_events,
  8 |         pr_from_events)
  9 | 
 10 | """
 11 | Function `test_events` prevents some mistakes as input
 12 | """
 13 | class Test_test_events(unittest.TestCase):
 14 |     def test_generic(self):
 15 |         with self.assertRaises(TypeError):
 16 |             events = (1,3)
 17 |             test_events(events)
 18 |         with self.assertRaises(TypeError):
 19 |             events = [[1,3],[4,5]]
 20 |             test_events(events)
 21 |         with self.assertRaises(ValueError):
 22 |             events = [(1,3),(4,5,6)]
 23 |             test_events(events)
 24 |         with self.assertRaises(ValueError):
 25 |             events = [(1,3),(5,4)]
 26 |             test_events(events)
 27 |         with self.assertRaises(ValueError):
 28 |             events = [(4,6),(1,2)]
 29 |             test_events(events)
 30 |         with self.assertRaises(ValueError):
 31 |             events = [(4,6),(6,7)] # borders are not disjoint
 32 |             test_events(events)
 33 | 
 34 | """
 35 | Function `pr_from_events`
 36 | """
 37 | class Test_pr_from_events(unittest.TestCase):
 38 |     def test_empty(self):
 39 |         """
 40 |         With empty entries for predictions,
 41 |         the recall should be 0, and the predictions undefined
 42 |         (corresponding to resp. infinite and undefined distances)
 43 |         
 44 |         Note: It is not allowed to have events_gt empty
 45 |         """
 46 |         events_pred = []
 47 |         events_gt = [(1,10)]
 48 |         Trange = (1,10)
 49 |         results = pr_from_events(events_pred, events_gt, Trange)
 50 |         self.assertTrue(math.isnan(results['precision']))
 51 |         self.assertEqual(results['recall'], 0)
 52 |         
 53 |         self.assertEqual(len(results['individual_precision_probabilities']), 1)
 54 |         self.assertEqual(len(results['individual_recall_probabilities']), 1)
 55 |         self.assertEqual(len(results['individual_precision_distances']), 1)
 56 |         self.assertEqual(len(results['individual_recall_distances']), 1)
 57 |         
 58 |         self.assertEqual(results['individual_recall_distances'][0], math.inf)
 59 |         self.assertEqual(results['individual_recall_probabilities'][0], 0)
 60 |         self.assertTrue(math.isnan(results['individual_precision_distances'][0]))
 61 |         self.assertTrue(math.isnan(results['individual_precision_probabilities'][0]))
 62 | 
 63 |     def test_generic_precision_distance(self):
 64 |         """ Example 1 for precision distance """
 65 |         events_pred = [(1,3), (6,18), (25,26)]
 66 |         events_gt = [(1,8), (16,17), (25,28), (29,31)]
 67 |         Trange = (1, 31)
 68 |         results = pr_from_events(events_pred, events_gt, Trange)
 69 |         self.assertEqual(results['individual_precision_distances'],
 70 |                         [8/8, 8.5/6, 0/1, math.nan])
 71 |         # Explanation
 72 |         # For first ground truth, we group the elements of the first column 
 73 |         # for all predictions:
 74 |         # * Prediction 1: [1,3), which is fully inside [1,8) so the distance is 0
 75 |         # * Prediction 2: pred=[6, 12) vs gt1=[1,8) so distance of 8
 76 |         # * Prediction 3: not affiliated with gt1
 77 |         # In total, total distance is 8, for a total interval of 2+6=8, so the mean precision for gt1 is 8/8=1
 78 |         #
 79 |         # For second gt:
 80 |         # * Prediction 1: not affiliated with gt2
 81 |         # * Prediction 2: pred=[12, 18) vs gt=[16,17) so distance of 8.5
 82 |         # * Prediction 3: not affiliated with gt2
 83 |         # In total, total distance is 8.5, for a total interval of 6, so the mean precision for gt1 is 8.5/6
 84 |         # 
 85 |         # For third gt:
 86 |         # Only third prediction [25,26) is affiliated to the gt3=[25,28), and distance is 0 over a predicted
 87 |         # interval of 1, so the distance is 0/1
 88 |         #
 89 |         # For last gt:
 90 |         # No prediction on the affiliated interval, so distance is 0/0 = NaN
 91 | 
 92 |         """ Example 2 for precision distance with one gt and one pred only """
 93 |         events_pred = [(1,3), (5,10)]
 94 |         events_gt = [(1,8), (16,17)]
 95 |         Trange = (1, 31)
 96 |         results0 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances']
 97 |         
 98 |         events_pred = [(1,3)]
 99 |         events_gt = [(1,8), (16,17)]
100 |         results1 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances']
101 |         
102 |         events_pred = [(1,3), (5,10)]
103 |         events_gt = [(1,8)]
104 |         results2 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances']
105 |         
106 |         events_pred = [(2,3)]
107 |         events_gt = [(1,8)]
108 |         results3 = pr_from_events(events_pred, events_gt, Trange)['individual_precision_distances']
109 | 
110 |         self.assertEqual(results0, [(0+2**2/2)/7, math.nan])
111 |         self.assertEqual(results1, [0, math.nan])
112 |         self.assertEqual(results2, [results0[0]]) # answer is still a list, of length 1
113 |         self.assertEqual(results3, [results1[0]])
114 | 
115 |     def test_generic_recall_distance(self):
116 |         """ Example 1 for recall distance """
117 |         events_pred = [(1,3), (6,18), (25,26)]
118 |         events_gt = [(1,8), (16,17), (25,28), (29,31)]
119 |         Trange = (1, 31)
120 |         results = pr_from_events(events_pred, events_gt, Trange)
121 |         self.assertEqual(results['individual_recall_distances'],
122 |                         [2.25/7, 0/1, 2/3, math.inf])
123 |         # Explanation
124 |         #
125 |         # For the first gt:
126 |         # * Recall regarding prediction 1: gt1@aff_pred1=[1,(3+6)/2), and pred1=[1,3), so the distance is (4.5-3)^2/2
127 |         # * Recall regarding prediction 2: gt1@aff_pred2=[(3+6)/2,8), and pred2=[6,18), so the distance is (6-4.5)^2/2
128 |         # * gt1 is not affiliated to the other predictions
129 |         # In total, total distance is (4.5-3)^2/2+(6-4.5)^2/2 = 2.25
130 |         # And the length of gt1 is 7, hence the result.
131 |         #
132 |         # For the second gt:
133 |         # * Recall regarding prediction 2: gt2@aff_pred2=gt2=[16,17), and pred2=[6,18), so the recall distance is 0
134 |         # * gt2 is not affiliated to the other predictions
135 |         # In total, total distance is 0
136 |         # And the length of gt2 is 1, hence the result.
137 |         #
138 |         # For the third gt:
139 |         # * Recall regarding prediction 3: gt3@aff_pred3=gt3=[25,28), and pred3=[25,26), so the recall distance is 2^2/2
140 |         # * gt3 is not affiliated to the other predictions
141 |         # In total, total distance is 2
142 |         # And the length of gt3 is 3, hence the result.
143 |         #
144 |         # For the last gt, there is no affiliated prediction, so the recall distance
145 |         # is infinite.
146 | 
147 |         """ Example 2 for recall distance with one gt and one pred only """
148 |         events_pred = [(1,3), (5,10)]
149 |         events_gt = [(1,8), (16,17)]
150 |         Trange = (1, 31)
151 |         results0 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances']
152 |         
153 |         events_pred = [(1,3)]
154 |         events_gt = [(1,8), (16,17)]
155 |         results1 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances']
156 |         
157 |         events_pred = [(1,3), (5,10)]
158 |         events_gt = [(1,8)]
159 |         results2 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances']
160 |         
161 |         events_pred = [(1,3)]
162 |         events_gt = [(1,8)]
163 |         results3 = pr_from_events(events_pred, events_gt, Trange)['individual_recall_distances']
164 | 
165 |         self.assertEqual(results0, [(1**2/2+1**2/2)/7, math.inf])
166 |         self.assertEqual(results1, [(5**2/2)/7, math.inf])
167 |         self.assertEqual(results2, [results0[0]]) # answer is still a list, of length 1
168 |         self.assertEqual(results3, [results1[0]])
169 |         # Explanation:
170 |         #   for results0:
171 |         # gt1: 4 is the cut of affiliation of gt1 between pred1 and pred2, on both side distance from 0 to 1, for a gt1 of length 7
172 |         # gt2: distance is outside the zone of affiliation, hence infinite
173 |         # 
174 |         #   for results1:
175 |         # gt1: on [3,8), distance from 0 to 5, so 5^2/2, over a total length of gt1 of 7
176 |         # gt2: infinite too
177 |         #
178 |         #   for results2:
179 |         # gt1: like first part of results0
180 |         #
181 |         #   for results3:
182 |         # gt1: like first part of results1
183 |     
184 |     def test_check_coherence(self):
185 |         """
186 |         Check coherence of the results in one example
187 |         """
188 |         events_pred = [(1,3), (6,18), (25,26)]
189 |         events_gt = [(1,8), (16,17), (25,28), (29,31)]
190 |         Trange = (1,40)
191 |         results = pr_from_events(events_pred, events_gt, Trange)
192 |         
193 |         # around the third gt (25,28), only (25,26) is affiliated, with a
194 |         # which is fully included, hence a precision probability of 1
195 |         self.assertEqual(results['individual_precision_probabilities'][2], 1)
196 | 
197 |         # around the fourth gt (29,31), there is no prediction
198 |         # hence a precision probability which is undefined
199 |         # and also gives a recall probability of 0 (and a distance of math.inf)
200 |         self.assertTrue(math.isnan(results['individual_precision_probabilities'][3]))
201 |         self.assertEqual(results['individual_recall_probabilities'][3], 0)
202 |         self.assertEqual(results['individual_recall_distances'][3], math.inf)
203 |         
204 |         # The second gt (16,17) is fully recalled by (6,18), so the recall is 1
205 |         # and the corresponding distance is 0
206 |         self.assertEqual(results['individual_recall_probabilities'][1], 1)
207 |         self.assertEqual(results['individual_recall_distances'][1], 0)
208 |         
209 |     def test_paper(self):
210 |         """
211 |         Example of the paper
212 |         """
213 |         events_gt = [(0, 10*60), (50*60, 70*60), (170*60, 175*60)]
214 |         events_pred = [(5*60,6*60), (7*60,10*60), (11*60,12*60), 
215 |                        (40*60, 60*60), (115*60, 130*60), (135*60, 140*60), 
216 |                        (165*60,170*60)]
217 |         Trange = (0, 180*60)
218 |         results = pr_from_events(events_pred, events_gt, Trange)
219 |         self.assertAlmostEqual(results['individual_precision_distances'],
220 |                                [18, 60*11.5, 60*31.25])
221 |         self.assertAlmostEqual(results['individual_recall_distances'],
222 |                                [76.5, 60*2.5, 60*2.5])
223 |         self.assertAlmostEqual(results['individual_precision_probabilities'][1],
224 |                                0.672222222)
225 |         self.assertAlmostEqual(results['individual_recall_probabilities'][1],
226 |                                0.944444444)
227 | 
228 | if __name__ == '__main__':
229 |     unittest.main()
230 | 


--------------------------------------------------------------------------------
/tests/test_single_ground_truth_event.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import unittest
  4 | 
  5 | import math
  6 | from affiliation._single_ground_truth_event import (
  7 |         affiliation_precision_distance,
  8 |         affiliation_recall_distance,
  9 |         affiliation_precision_proba,
 10 |         affiliation_recall_proba)
 11 | 
 12 | """
 13 | Function `affiliation_precision_distance` is correct
 14 | """
 15 | class Test_affiliation_precision_distance(unittest.TestCase):
 16 |     def test_generic(self):
 17 |         J = (1, 8)        
 18 |         self.assertEqual(affiliation_precision_distance([(1,3)], J), 0)
 19 |         self.assertEqual(affiliation_precision_distance([(1,8)], J), 0)
 20 |         self.assertEqual(affiliation_precision_distance([(1,9)], J), (1/2)/8)
 21 |         self.assertEqual(affiliation_precision_distance([(0,9)], J), 1/9)
 22 |         self.assertEqual(affiliation_precision_distance([(1,2), (3,4)], J), 0)
 23 |         self.assertEqual(affiliation_precision_distance([(7,9)], J), 1/4)
 24 |         self.assertEqual(affiliation_precision_distance([(8,9)], J), 1/2)
 25 |         self.assertEqual(affiliation_precision_distance([(9,10)], J), 3/2)        
 26 |         self.assertEqual(affiliation_precision_distance([(1,2),(9,10)], J), 3/4) 
 27 |        
 28 |         # previous tests
 29 |         # with pred=[6, 12) vs gt=[1,8): 0 at first, then int tdt from 0 to 4 (on [8, 12)), which is 8,
 30 |         # then divided by the length 12-6=6
 31 |         self.assertEqual(affiliation_precision_distance([(6,12)], (1,8)), 8/6)
 32 |         # with pred=[12, 18) vs gt=[16,17): 0 at the middle, on the left (4^2)/2, on the right (1^2)/2, sum is 8.5 
 33 |         # then divided by the length 6
 34 |         self.assertEqual(affiliation_precision_distance([(12,18)], (16,17)), 8.5/6)
 35 |     
 36 |     def test_empty(self):
 37 |         """
 38 |         With empty or None entries, return undefined (represented with math.nan)
 39 |         """
 40 |         J = (1, 8)
 41 |         self.assertTrue(math.isnan(affiliation_precision_distance([], J)))
 42 |         self.assertTrue(math.isnan(affiliation_precision_distance([None, None], J)))
 43 | 
 44 |     def test_paper(self):
 45 |         """
 46 |         Example of the paper
 47 |         """
 48 |         J = (0, 10*60)
 49 |         Is = [(5*60,6*60), (7*60,10*60), (11*60,12*60)]
 50 |         self.assertEqual(affiliation_precision_distance(Is, J), 18)
 51 | 
 52 |         J = (50*60, 70*60)
 53 |         Is = [(40*60, 60*60), (115*60,120*60)]
 54 |         self.assertEqual(affiliation_precision_distance(Is, J), 60*11.5)
 55 | 
 56 |         J = (170*60, 175*60)
 57 |         Is = [(120*60, 130*60), (135*60, 140*60), (165*60,170*60)]
 58 |         self.assertEqual(affiliation_precision_distance(Is, J), 60*31.25)
 59 | 
 60 | """
 61 | Function `affiliation_recall_distance` is correct
 62 | """
 63 | class Test_affiliation_recall_distance(unittest.TestCase):
 64 |     def test_generic(self):
 65 |         J = (1, 8)
 66 |         self.assertEqual(affiliation_recall_distance([(1,3)], J), 0*(2/7) + 2.5*(5/7))
 67 |         self.assertEqual(affiliation_recall_distance([(1,8)], J), 0)
 68 |         self.assertEqual(affiliation_recall_distance([(1,9)], J), 0)
 69 |         self.assertEqual(affiliation_recall_distance([(0,9)], J), 0)
 70 |     
 71 |     def test_empty(self):
 72 |         """
 73 |         With empty or None entries, return +inf (recall is always defined)
 74 |         but here there is no prediction in the zone, meaning that the recall
 75 |         is bad
 76 |         """
 77 |         J = (1, 8)
 78 |         self.assertEqual(affiliation_recall_distance([], J), math.inf)
 79 |         self.assertEqual(affiliation_recall_distance([None, None], J), math.inf)
 80 | 
 81 |     def test_paper(self):
 82 |         """
 83 |         Example of the paper
 84 |         """
 85 |         J = (0, 10*60)
 86 |         Is = [(5*60,6*60), (7*60,10*60), (11*60,12*60)]
 87 |         self.assertEqual(affiliation_recall_distance(Is, J), 76.5)
 88 | 
 89 |         J = (50*60, 70*60)
 90 |         Is = [(40*60, 60*60), (115*60,120*60)]
 91 |         self.assertEqual(affiliation_recall_distance(Is, J), 60*2.5)
 92 | 
 93 |         J = (170*60, 175*60)
 94 |         Is = [(120*60, 130*60), (135*60, 140*60), (165*60,170*60)]
 95 |         self.assertEqual(affiliation_recall_distance(Is, J), 60*2.5)
 96 | 
 97 | """
 98 | Function `affiliation_precision_proba`
 99 | """
100 | class Test_affiliation_precision_proba(unittest.TestCase):
101 |     def test_empty(self):
102 |         """
103 |         With empty or None entries, return undefined (represented with math.nan)
104 |         """
105 |         J = (1, 8)
106 |         E = J
107 |         self.assertTrue(math.isnan(affiliation_precision_proba([], J, E)))
108 |         self.assertTrue(math.isnan(affiliation_precision_proba([None, None], J, E)))
109 | 
110 |     def test_paper(self):
111 |         """
112 |         Example of the paper
113 |         """
114 |         J = (50*60, 70*60)
115 |         Is = [(40*60, 60*60), (115*60,120*60)]
116 |         E = (30*60, 120*60)
117 |         self.assertAlmostEqual(affiliation_precision_proba(Is, J, E), 0.672222222)
118 | 
119 | """
120 | Function `affiliation_recall_proba`
121 | """
122 | class Test_affiliation_recall_proba(unittest.TestCase):
123 |     def test_empty(self):
124 |         """
125 |         With empty or None entries, return 0
126 |         """
127 |         J = (1, 8)
128 |         E = J
129 |         self.assertEqual(affiliation_recall_proba([], J, E), 0)
130 |         self.assertEqual(affiliation_recall_proba([None, None], J, E), 0)
131 | 
132 |     def test_paper(self):
133 |         """
134 |         Example of the paper
135 |         """
136 |         J = (50*60, 70*60)
137 |         Is = [(40*60, 60*60), (115*60,120*60)]
138 |         E = (30*60, 120*60)
139 |         self.assertAlmostEqual(affiliation_recall_proba(Is, J, E), 0.944444444)
140 | 
141 | """
142 | Misc
143 | """
144 | class Test_single_ground_truth_event_misc(unittest.TestCase):
145 |     def test_generic(self):
146 |         """
147 |         Check accordance with previous values
148 |         """
149 |         E = (1, 90+1)
150 |         Is = [(11,30+1),(86,90+1)]
151 |         J = (21, 40+1)
152 |         self.assertAlmostEqual(affiliation_recall_distance(Is, J), 2.5)
153 |         self.assertAlmostEqual(affiliation_precision_distance(Is, J), 11.5)
154 |         self.assertAlmostEqual(affiliation_recall_proba(Is, J, E), 0.944444444)
155 |         self.assertAlmostEqual(affiliation_precision_proba(Is, J, E), 0.672222222)
156 | 
157 | if __name__ == '__main__':
158 |     unittest.main()
159 | 


--------------------------------------------------------------------------------