├── EVT
    ├── __init__.py
    ├── __pycache__
    │   ├── spot.cpython-36.pyc
    │   ├── spot.cpython-37.pyc
    │   └── __init__.cpython-37.pyc
    └── spot.py
├── shape
    ├── __init__.py
    ├── __pycache__
    │   ├── RMDF.cpython-36.pyc
    │   ├── RMDF.cpython-37.pyc
    │   └── __init__.cpython-37.pyc
    └── RMDF.py
├── generator
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── pattern.cpython-36.pyc
    │   ├── pattern.cpython-37.pyc
    │   ├── noise_generator.cpython-36.pyc
    │   ├── noise_generator.cpython-37.pyc
    │   ├── season_generator.cpython-36.pyc
    │   ├── season_generator.cpython-37.pyc
    │   ├── trend_generator.cpython-36.pyc
    │   ├── trend_generator.cpython-37.pyc
    │   ├── abstract_generator.cpython-36.pyc
    │   ├── abstract_generator.cpython-37.pyc
    │   ├── anomaly_generator.cpython-37.pyc
    │   ├── additive_anomaly_generator.cpython-36.pyc
    │   └── additive_anomaly_generator.cpython-37.pyc
    ├── trend_generator.py
    ├── pattern.py
    ├── pearson.py
    ├── abstract_generator.py
    ├── season_generator.py
    ├── noise_generator.py
    ├── additive_anomaly_generator.py
    └── test.py
├── tsagen_visual
    ├── __init__.py
    ├── __pycache__
    │   ├── visual.cpython-36.pyc
    │   ├── visual.cpython-37.pyc
    │   └── __init__.cpython-37.pyc
    └── visual.py
├── requirements.txt
├── setup.py
├── gen_data_for_correlation_analysis.py
├── meta_features.yaml
├── computational_weight_alalysis.py
├── README.md
├── TSAGen.py
├── gen.py
└── Assembler.py


/EVT/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/shape/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/generator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tsagen_visual/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/requirements.txt


--------------------------------------------------------------------------------
/EVT/__pycache__/spot.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/EVT/__pycache__/spot.cpython-36.pyc


--------------------------------------------------------------------------------
/EVT/__pycache__/spot.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/EVT/__pycache__/spot.cpython-37.pyc


--------------------------------------------------------------------------------
/shape/__pycache__/RMDF.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/shape/__pycache__/RMDF.cpython-36.pyc


--------------------------------------------------------------------------------
/shape/__pycache__/RMDF.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/shape/__pycache__/RMDF.cpython-37.pyc


--------------------------------------------------------------------------------
/EVT/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/EVT/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/shape/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/shape/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/pattern.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/pattern.cpython-36.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/pattern.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/pattern.cpython-37.pyc


--------------------------------------------------------------------------------
/tsagen_visual/__pycache__/visual.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/tsagen_visual/__pycache__/visual.cpython-36.pyc


--------------------------------------------------------------------------------
/tsagen_visual/__pycache__/visual.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/tsagen_visual/__pycache__/visual.cpython-37.pyc


--------------------------------------------------------------------------------
/tsagen_visual/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/tsagen_visual/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/noise_generator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/noise_generator.cpython-36.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/noise_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/noise_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/season_generator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/season_generator.cpython-36.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/season_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/season_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/trend_generator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/trend_generator.cpython-36.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/trend_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/trend_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/abstract_generator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/abstract_generator.cpython-36.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/abstract_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/abstract_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/anomaly_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/anomaly_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/additive_anomaly_generator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/additive_anomaly_generator.cpython-36.pyc


--------------------------------------------------------------------------------
/generator/__pycache__/additive_anomaly_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AprilCal/TSAGen/HEAD/generator/__pycache__/additive_anomaly_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/generator/trend_generator.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/5/14.
 4 | # Trend generator.
 5 | 
 6 | import numpy as np
 7 | import math
 8 | 
 9 | class TrendGenerator():
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def _expression(self,zeta,ba,x):
14 |         k = math.tan(zeta)
15 |         b = ba
16 |         return k*x+b
17 | 
18 |     def _inject(self):
19 |         pass
20 | 
21 |     def gen(self,ba,zeta,size):
22 |         x = np.arange(size)
23 |         trend = []
24 |         for i in x:
25 |             trend.append(self._expression(zeta,ba,i))
26 |         label = np.zeros(size,dtype=np.int)
27 |         return (np.array(trend),label)
28 | 


--------------------------------------------------------------------------------
/generator/pattern.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/6/6.
 4 | # Additive anomaly patterns.
 5 | 
 6 | import numpy as np
 7 | 
 8 | def typeI(w1,w2,h):
 9 |     return list(map(lambda x: exprForTypeII(w1,w2,h,x), np.arange(w1+w2+1)))
10 | 
11 | def typeII(w,h1,h2):
12 |     k = (h2-h1)/(w-1)
13 |     return list(map(lambda x: linear(k,h1,x), np.arange(w+1)))
14 | 
15 | def a(w1,h,x):
16 |     return h*(np.e**((-np.log(1/1000)/w1)*(x-w1)))
17 | 
18 | def b(w1,w2,h,x):
19 |     return h*(np.e**((np.log(1/1000)/w2)*(x-w1)))
20 | 
21 | # function expressions.
22 | def linear(k,b,x):
23 |     return k*x+b
24 | 
25 | def exprForTypeII(w1,w2,h,x):
26 |     if(x<=w1):
27 |         return a(w1,h,x)
28 |     else:
29 |         return b(w1,w2,h,x)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/5/14。
 4 | # Usage: python setup.py install
 5 | 
 6 | from distutils.core import setup
 7 | setup(name="shape", version="1.0", description="RMDF", author="Chengyu", py_modules=['shape.RMDF'])
 8 | setup(name="generator", version="1.0", description="some generators", author="Chengyu", py_modules=['generator.additive_anomaly_generator','generator.pattern','generator.abstract_generator','generator.noise_generator','generator.trend_generator','generator.season_generator','generator.anomaly_generator'])
 9 | setup(name="EVT", version="1.0", description="Extreme Value Theory", author="Chengyu", py_modules=['EVT.spot'])
10 | setup(name="visual", version="1.0", description="visualization tools", author="Chengyu", py_modules=['tsagen_visual.visual'])


--------------------------------------------------------------------------------
/generator/pearson.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/5/15.
 4 | # Pearson Distribution System.
 5 | 
 6 | # Usage:
 7 | # p = Pearson()
 8 | # p.pearsrnd(nu,sigma,skew,kurt,size)
 9 | # pearsrnd returns a np.array
10 | 
11 | import matlab
12 | import matlab.engine
13 | import numpy as np
14 | 
15 | engine = matlab.engine.start_matlab()
16 | 
17 | class Pearson:
18 |     def __init__(self):
19 |         self.engine = engine # Start MATLAB process
20 |         # engine = matlab.engine.start_matlab("-desktop") # Start MATLAB process with graphic UI
21 |     def pearsrnd(self,mu,sigma,skew,kurt,size):
22 |         result = self.engine.pearsrnd(matlab.double([mu]),
23 |                                     matlab.double([sigma]),
24 |                                     matlab.double([skew]),
25 |                                     matlab.double([kurt]),
26 |                                     matlab.double([1]),
27 |                                     matlab.double([size]))[0]
28 |         return np.array(result)


--------------------------------------------------------------------------------
/gen_data_for_correlation_analysis.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2021/3/8.
 4 | # Generating data for correlation analysis.
 5 | 
 6 | import numpy as np
 7 | import Assembler as assem
 8 | import generator.trend_generator as tg
 9 | import matplotlib.pyplot as plt
10 | import generator.noise_generator as ng
11 | import generator.season_generator as sg
12 | import matplotlib.pyplot as plt
13 | 
14 | # season_generator = sg.SeasonGeneratorWithShapeDeformation(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
15 | season_generator = sg.NormalSeasonGenerator(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
16 | noise_generator = ng.Gaussian()
17 | # noise_generator = ng.GaussianWithChangePoints()
18 | trend_generator = tg.TrendGenerator()
19 | 
20 | season = [season_generator.gen_season() for x in range(1)]
21 | length = len(season[0][0])
22 | noise = noise_generator.gen(0,0.5,length)
23 | trend = trend_generator.gen(15,0,length)
24 | 
25 | # assembler = assem.AbstractAssembler(season,noise,trend,'season')
26 | # assembler = assem.AbstractAssembler(season,noise,trend,'season')
27 | assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'season', q=10e-7,a_type='type2')
28 | assembler.assemble()
29 | assembler.save(path='output/TSACorr')


--------------------------------------------------------------------------------
/meta_features.yaml:
--------------------------------------------------------------------------------
 1 | # Output path
 2 | OUT_PATH: './test_path'
 3 | # Total number of time series.
 4 | TOTAL_NUM: 5
 5 | 
 6 | # A list of 3 elements (e.g., [a, b, c], a is start value, b is end value, c is step length)
 7 | # means generatring this variable in variable-control manner.
 8 | # Only one controlled variable is allowed in a configuration file, and meanwhile, 
 9 | # other variables must be precise values (not meta feature).
10 | 
11 | # A list of 5 elements (e.g., [a, b, c, d, e], i.e., [min, lower_quartile, mid, upper_quartile, max])
12 | # means meta feature. There can be multiple meta features in a configuration file.
13 | 
14 | # We give some examples below.
15 | 
16 | # Use meta features.
17 | FEATURES:
18 | #TREND:
19 |   theta1: [4,5,6,7,8] # level
20 |   theta2: 0.01 # trend slope
21 | 
22 | #SEASON:
23 |   theta3: [2,4,5,6,10]  # amplitude
24 |   theta4: 1000 # cycle length, i.e., 1/frequency.
25 |   theta5: 10 # num_of_cycle
26 |   d: 10 # recursion depth
27 |   d_hat: 8 # forking depth
28 |   k1: 0.2
29 |   k2: 0.2
30 | 
31 | #NOISE:
32 |   theta6: 0 # mean
33 |   theta7: [0.1,0.2,0.3,0.4,1] # std
34 |   theta8: 0 # skew
35 |   theta9: 3 # kurt
36 | 
37 | ANOMALY:
38 |   type: spike # spike/deformation/vanish/typeI/typeII
39 |   severity: 10
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/tsagen_visual/visual.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/6/26
 4 | # visualization tools.
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | import numpy as np
 8 | 
 9 | air_force_blue = '#5D8AA8'
10 | 
11 | def show(values,labels,title='default',a_color='red',dilated=True, figure_size=(8,6)):
12 |     plt.rcParams['pdf.fonttype'] = 42
13 |     plt.rcParams['ps.fonttype'] = 42
14 |     # check length
15 |     if len(values)!=len(labels):
16 |         print('length of values must equal length of labels')
17 |     i = 0
18 |     length = len(values)
19 |     # adjecent label of value 1 will be grouped into the same group
20 |     groups = []
21 |     while i < length:
22 |         if labels[i] == 0:
23 |             i += 1
24 |             continue
25 |         else:
26 |             start = i
27 |             while i<length and labels[i]==1:
28 |                 i += 1
29 |             end = i-1
30 |             groups.append([start,end])
31 |     sub1, ax = plt.subplots(figsize=figure_size)
32 |     ax.plot(np.arange(length),values,color=air_force_blue)
33 |     for p in groups:
34 |         ax.plot(np.arange(p[0],p[1]+1),values[p[0]:p[1]+1],color=a_color)
35 |         ax.plot(np.arange(p[0]-1,p[1]+2),values[p[0]-1:p[1]+2],color=a_color)
36 |     ax.set_xlabel('TIME',size='20')
37 |     ax.set_ylabel('KPI VALUE',size='20')
38 |     plt.title(title)
39 |     return sub1


--------------------------------------------------------------------------------
/computational_weight_alalysis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import Assembler as assem
 3 | import generator.trend_generator as tg
 4 | import matplotlib.pyplot as plt
 5 | import generator.noise_generator as ng
 6 | import generator.season_generator as sg
 7 | import matplotlib.pyplot as plt
 8 | import time
 9 | 
10 | #====================================================================================
11 | # Group1 
12 | # spike, beat, type1, type2, control noise level.
13 | seas_time_list = []
14 | noise_time_list = []
15 | trend_time_list = []
16 | struc_time_list = []
17 | 
18 | for cycle_num in [10,20,30,40,50,60,70,80,90,100]:
19 |     season_generator = sg.NormalSeasonGenerator(cycle_num,10,1000,drift_a=0,drift_f=0,forking_depth=0)
20 |     noise_generator = ng.Gaussian()
21 |     trend_generator = tg.TrendGenerator()
22 | 
23 |     seas_start = time.time()
24 |     season = season_generator.gen_season()
25 |     seas_end = time.time()
26 |     # print(seas_end-seas_start)
27 | 
28 |     length = len(season[0])
29 | 
30 |     noise_start = time.time()
31 |     noise = [noise_generator.gen(0,2,length)]
32 |     noise_end = time.time()
33 |     # print(noise_end-noise_start)
34 | 
35 |     trend_start = time.time()
36 |     trend = trend_generator.gen(0,0,length)
37 |     trend_end = time.time()
38 |     # print(trend_end-trend_start)
39 | 
40 |     struc_start = time.time()
41 |     assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'noise',10e-7,0.2,a_type='beat')
42 | 
43 |     assembler.assemble()
44 |     assembler.save(path='output2/spk')
45 |     struc_end = time.time()
46 |     # print(struc_end-struc_start)
47 | 
48 |     time_seas = seas_end-seas_start
49 |     time_noise = noise_end-noise_start
50 |     time_trend = trend_end-trend_start
51 |     time_struc = struc_end-struc_start
52 |     print(time_seas,time_noise,time_trend,time_struc)
53 |     seas_time_list.append(time_seas)
54 |     noise_time_list.append(time_noise)
55 |     trend_time_list.append(time_trend)
56 |     struc_time_list.append(time_struc)
57 | 
58 | import matplotlib.pyplot as plt
59 | plt.clf()
60 | plt.plot(seas_time_list,label='seas')
61 | plt.plot(noise_time_list,label='noise')
62 | plt.plot(trend_time_list,label='trend')
63 | plt.plot(struc_time_list,label='ano')
64 | plt.legend()
65 | plt.show()
66 | 
67 | print(seas_time_list)
68 | print(noise_time_list)
69 | print(trend_time_list)
70 | print(struc_time_list)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TSAGen
 2 | Repository for paper ''TSAGen: Synthetic Time Series Generation for KPI Anomaly Detection''
 3 | 
 4 | TSAGen is undergoing a refactor and some features are currently unavailable. We are sorry for this inconvenience.
 5 | 
 6 | This repository will be maintained for a long time. Fell free to submit a issue or contact us if you encounter any issue when using TSAGen.
 7 | 
 8 | # Installation
 9 | ```bash
10 | git clone https://github.com/AprilCal/TSAGen.git
11 | cd ./TSAGen
12 | pip install requirements.txt
13 | python setup.py install
14 | ```
15 | 
16 | ## Command Usage
17 | You can use TSAGen in command line.
18 | ```bash
19 | python TSAGen.py -h
20 | python TSAGen.py --mode=m --meta='./meta_features.yaml'
21 | ```
22 | ## API Usage
23 | You can directly use the API provided by TSAGen. The source file ''gen.py'' contains many examples for the API usage.
24 | For example:
25 | ```python
26 | # Choose the generator for season, noise, and trend component, respectively.
27 | season_generator = sg.NormalSeasonGenerator(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
28 | noise_generator = ng.Gaussian()
29 | trend_generator = tg.TrendGenerator()
30 | 
31 | # Control the noise component as a variable.
32 | season = season_generator.gen_season()
33 | length = len(season[0])
34 | noise = [noise_generator.gen(0,sigma,length) for sigma in np.linspace(0.5,2,100)]
35 | trend = trend_generator.gen(0,0,length)
36 | 
37 | # Assemble components.
38 | assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'noise',10e-7,0.2,a_type='beat')
39 | assembler.assemble()
40 | ```
41 | ## Define a new type of anomaly
42 | There are hooks reserved for extension. You can easily define a new type of anomaly by extending the AbstractGenerator.
43 | For example, if you want to define a noise anomaly, you can just extend the class 'Pearson' and implement the \_inject() function, just like:
44 | ```python
45 | class PearsonWithChangePoints(Pearson):
46 |     def _inject(self):
47 |     	# anomaly position
48 |         pos_list = [0.5,0.8]
49 |         # anomaly length
50 |         a_len = 20
51 |         for pos in pos_list:
52 |             position = int(pos*len(self.noise))
53 |             # Calculate anomaly noise segment.
54 |             a_segment = np.random.normal(self.mu, self.sigma*10, a_len)
55 |             # Inject anomaly and change the label.
56 |             self.noise[position:position+a_len] = a_segment
57 |             self.label[position:position+a_len] = np.ones(len(a_segment),dtype=np.int)
58 | ```
59 | 
60 | # Notes
61 | Note that, to enable the pearson distribution, the installation of MATLAB and other configurations are required.
62 | Please install Matlab and run the following commands.
63 | ```bash
64 | cd matlabroot/extern/engines/python
65 | python setup.py install
66 | ```
67 | 
68 | 


--------------------------------------------------------------------------------
/generator/abstract_generator.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/5/13.
 4 | # Abstract generators.
 5 | 
 6 | import numpy as np
 7 | import math
 8 | import matplotlib.pyplot as plt
 9 | import pandas as pd
10 | from shape.RMDF import RMDF
11 | 
12 | def sine_p(size):
13 |     # print(np.pi/np.arange(len))
14 |     # print(np.linspace(10,100,91))
15 |     return np.sin(np.linspace(0,np.pi,size))
16 | 
17 | def sine(size):
18 |     return np.sin(np.linspace(0,2*np.pi,size))
19 | 
20 | # std_size is the standard size of a cycle.
21 | # cycle_num is the number of cycles.
22 | # The overall length of returned seasonal component is
23 | # std_size * cycle_number, in the absense of drift.
24 | # drift_a, drift_f are drift factors of amplitude and
25 | # frequency, respectively.
26 | def sine_p_season(std_size, cycle_num, drift_a, drift_f):
27 |     sines = [sine_p(std_size) for x in range(cycle_num)]
28 |     return np.concatenate(sines)
29 | 
30 | def sine_season(std_size, cycle_num, drift_a, drift_f):
31 |     sines = sine(std_size)
32 |     sines = [sine(std_size)[1:] for x in range(cycle_num)]
33 |     return np.concatenate(sines)
34 | 
35 | # Normal season generator
36 | # template method.
37 | # implement hook to custimize anomaly.
38 | class AbstractSeasonGenerator():
39 |     def __init__(self,cycle_num,amplitude,cycle_len,drift_a=0,drift_f=0,forking_depth=0):
40 |         self.cycle_list = []
41 |         self.label_list = []
42 | 
43 |         self.cycle_num = int(cycle_num)
44 |         self.amplitude = amplitude
45 |         self.cycle_len = int(cycle_len)
46 |         self.drift_a = drift_a
47 |         self.drift_f = drift_f
48 |         self.forking_depth = forking_depth
49 | 
50 |         self.drift_a_for_every_cycle = np.random.uniform(1,1+self.drift_a,self.cycle_num)
51 |         self.drift_f_for_every_cycle = np.random.uniform(1,1+self.drift_f,self.cycle_num)
52 | 
53 |         self.cycle_generator = RMDF(depth=10)
54 |         self.cycle_generator.gen_anchor()
55 | 
56 |     def _gen(self):
57 |         amplitude = self.amplitude
58 |         for drift_amp, length_d in zip(self.drift_a_for_every_cycle, self.drift_f_for_every_cycle):
59 |             # apply drift
60 |             length = int(length_d*self.cycle_len)
61 |             season = amplitude*drift_amp*self.cycle_generator.gen(self.forking_depth, length)
62 |             self.cycle_list.append(season)
63 |             self.label_list.append(np.zeros(len(season),dtype=np.int))
64 | 
65 |     # hook
66 |     def _inject(self):
67 |         pass
68 | 
69 |     def gen_season(self):
70 |         self._gen()
71 |         self._inject()
72 |         season = np.concatenate(self.cycle_list)
73 |         label = np.concatenate(self.label_list)
74 |         self.cycle_list = []
75 |         self.label_list = []
76 |         self.cycle_generator.clear_all()
77 |         return (season,label)


--------------------------------------------------------------------------------
/generator/season_generator.py:
--------------------------------------------------------------------------------
 1 | from generator.abstract_generator import AbstractSeasonGenerator
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | class NormalSeasonGenerator(AbstractSeasonGenerator):
 6 |     def _inject(self):
 7 |         # do nothing
 8 |         pass
 9 | 
10 | class SeasonGeneratorWithShapeDeformation(AbstractSeasonGenerator):
11 |     def _inject(self):
12 |         pos_list = [5]
13 |         forking_depth = 9
14 |         for pos in pos_list:
15 |             amplitude = self.amplitude
16 |             drift_a = self.drift_a_for_every_cycle[pos]
17 |             length_d = self.drift_f_for_every_cycle[pos]
18 |             anomaly_cycle = drift_a*amplitude*self.cycle_generator.gen(forking_depth, int(length_d*self.cycle_len))
19 |             self.cycle_list[pos] = anomaly_cycle
20 |             self.label_list[pos] = np.ones(len(anomaly_cycle),dtype=np.int)
21 | 
22 | class SeasonGeneratorWithCycleVanish(AbstractSeasonGenerator):
23 |     def _inject(self):
24 |         pos_list = [5]
25 |         forking_depth = 9
26 |         for pos in pos_list:
27 |             amplitude = self.drift_a_for_every_cycle[pos]
28 |             length_d = self.drift_f_for_every_cycle[pos]
29 |             # anomaly_cycle = amplitude*self.cycle_generator.gen(forking_depth, int(length_d*self.cycle_len))
30 |             anomaly_cycle = np.zeros(int(length_d*self.cycle_len))
31 |             self.cycle_list[pos] = anomaly_cycle
32 |             self.label_list[pos] = np.ones(len(anomaly_cycle),dtype=np.int)
33 |             
34 | class example_season_generator(AbstractSeasonGenerator):
35 |     def _inject(self):
36 |         pos_list = [5,6]
37 |         forking_depth = 10
38 |         for pos in pos_list:
39 |             amplitude = self.drift_a_for_every_cycle[pos]
40 |             length_d = self.drift_f_for_every_cycle[pos]
41 |             anomaly_cycle = amplitude*self.cycle_generator.gen(forking_depth, int(length_d*self.cycle_len))
42 |             self.cycle_list[pos] = anomaly_cycle
43 |             self.label_list[pos] = np.ones(len(anomaly_cycle),dtype=np.int)
44 | 
45 | 
46 | class SeasonGeneratorFactory():
47 | 
48 |     def __init__(self,cycle_num,amplitude,cycle_len,drift_a=0,drift_f=0,forking_depth=0):
49 |         self.cycle_num = cycle_num
50 |         self.amplitude = amplitude
51 |         self.cycle_len = cycle_len
52 |         self.drift_a = drift_a
53 |         self.drift_f = drift_f
54 |         self.forking_depth = forking_depth
55 |         
56 |     def get_generator(self,anomaly_type=None):
57 |         if anomaly_type is None:
58 |             return AbstractSeasonGenerator(self.cycle_num,self.amplitude,self.cycle_len,self.drift_a,self.drift_f,self.forking_depth)
59 |         elif anomaly_type == 'deformation':
60 |             return SeasonGeneratorWithShapeDeformation(self.cycle_num,self.amplitude,self.cycle_len,self.drift_a,self.drift_f,self.forking_depth)
61 |         elif anomaly_type == 'vanish':
62 |             return SeasonGeneratorWithCycleVanish(self.cycle_num,self.amplitude,self.cycle_len,self.drift_a,self.drift_f,self.forking_depth)


--------------------------------------------------------------------------------
/generator/noise_generator.py:
--------------------------------------------------------------------------------
 1 | #!python3.6
 2 | 
 3 | # Created by Chengyu on 2020/5/14。
 4 | # Noise Generator.
 5 | 
 6 | import numpy as np
 7 | import matlab
 8 | import matlab.engine
 9 | 
10 | class Gaussian():
11 |     def __init__(self):
12 |         self.noise = []
13 |         self.label = []
14 |         self.sigma = 0
15 |         pass
16 |     
17 |     def _inject(self):
18 |         pass
19 | 
20 |     def gen(self, mu, sigma, size):
21 |         self.noise = np.random.normal(mu, sigma, size)
22 |         self.label = np.zeros(size, dtype=np.int)
23 |         self.mu = mu
24 |         self.sigma = sigma
25 |         self._inject()
26 |         return (self.noise, self.label)
27 | 
28 | class GaussianWithChangePoints(Gaussian):
29 |     def _inject(self):
30 |         pos_list = [0.5,0.8]
31 |         a_len = 20
32 |         for pos in pos_list:
33 |             position = int(pos*len(self.noise))
34 |             a_segment = np.random.normal(self.mu, self.sigma*10, a_len)
35 |             self.noise[position:position+a_len] = a_segment
36 |             self.label[position:position+a_len] = np.ones(len(a_segment),dtype=np.int)
37 | 
38 | print("starting matlab.")
39 | engine = matlab.engine.start_matlab()
40 | 
41 | class Pearson:
42 |     def __init__(self):
43 |         # print("starting matlab.")
44 |         self.engine = engine # Start MATLAB process
45 |         # engine = matlab.engine.start_matlab("-desktop") # Start MATLAB process with graphic UI
46 | 
47 |         self.noise = []
48 |         self.label = []
49 | 
50 |     def gen(self,mu,sigma,skew,kurt,size):
51 |         
52 |         self.mu = mu
53 |         self.sigma = sigma
54 |         self.skew = skew
55 |         self.kurt = kurt
56 | 
57 |         self.noise = self.engine.pearsrnd(matlab.double([mu]),
58 |                                     matlab.double([sigma]),
59 |                                     matlab.double([skew]),
60 |                                     matlab.double([kurt]),
61 |                                     matlab.double([1]),
62 |                                     matlab.double([size]))[0]
63 |                                     
64 |         self.label = np.zeros(size, dtype=np.int)
65 |         self._inject()
66 |         return (np.array(self.noise), np.array(self.label))
67 |     
68 |     def _inject(self):
69 |         pass
70 | 
71 | class PearsonWithChangePoints(Pearson):
72 |     def _inject(self):
73 |         pos_list = [0.5,0.8]
74 |         a_len = 20
75 |         for pos in pos_list:
76 |             position = int(pos*len(self.noise))
77 |             a_segment = np.random.normal(self.mu, self.sigma*10, a_len)
78 |             self.noise[position:position+a_len] = a_segment
79 |             self.label[position:position+a_len] = np.ones(len(a_segment),dtype=np.int)
80 | 
81 | class NoiseGeneratorFactory():
82 | 
83 |     def __init__(self):
84 |         pass
85 | 
86 |     def get_generator(self,anomaly_type=None):
87 |         if anomaly_type is None:
88 |             return Pearson()
89 |         elif anomaly_type == 'change_points':
90 |             return PearsonWithChangePoints()


--------------------------------------------------------------------------------
/TSAGen.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created by Chengyu on 2021/7/18.
  3 | '''
  4 | 
  5 | import argparse
  6 | from numpy.core.fromnumeric import size
  7 | from numpy.lib.function_base import iterable
  8 | import yaml
  9 | import os
 10 | import numpy as np
 11 | import Assembler as assem
 12 | import generator.trend_generator as tg
 13 | import generator.noise_generator as ng
 14 | import generator.season_generator as sg
 15 | 
 16 | def r(zeta):
 17 |     pos = np.random.choice([1,2,3,4],1,p=[1/4,1/4,1/4,1/4])
 18 |     if pos == 1:
 19 |         return np.random.uniform(zeta[0],zeta[1])
 20 |     elif pos == 2:
 21 |         return np.random.uniform(zeta[1],zeta[2])
 22 |     elif pos == 3:
 23 |         return np.random.uniform(zeta[2],zeta[3])
 24 |     else:
 25 |         return np.random.uniform(zeta[3],zeta[4])
 26 | 
 27 | def gen_theta(theta,size):
 28 |     theta_list = [r(theta) for x in range(0,size)]
 29 |     return theta_list
 30 | 
 31 | def pre_process_config(config):
 32 |     # Deal with season features.
 33 |     for feature in config:
 34 |         # print(config['SEASON'][feature])
 35 |         if not isinstance(config[feature], list):
 36 |             # padding
 37 |             config[feature]=[config[feature] for x in range(0,5)]
 38 |     return config
 39 | 
 40 | def gen_m(config):
 41 |     num = config['TOTAL_NUM']
 42 |     features = pre_process_config(config['FEATURES'])
 43 |     # print(features)
 44 |     theta1_list = gen_theta(features['theta1'],num)
 45 |     theta2_list = gen_theta(features['theta2'],num)
 46 |     theta3_list = gen_theta(features['theta3'],num)
 47 |     theta4_list = gen_theta(features['theta4'],num)
 48 |     theta5_list = gen_theta(features['theta5'],num)
 49 |     theta6_list = gen_theta(features['theta6'],num)
 50 |     theta7_list = gen_theta(features['theta7'],num)
 51 |     theta8_list = gen_theta(features['theta8'],num)
 52 |     theta9_list = gen_theta(features['theta9'],num)
 53 |     k1_list = gen_theta(features['k1'],num)
 54 |     k2_list = gen_theta(features['k2'],num)
 55 |     season_list = []
 56 |     noise_list = []
 57 |     trend_list = []
 58 |     for i in range(0,num):
 59 |         noise_generator = ng.NoiseGeneratorFactory().get_generator(None)
 60 |         trend_generator = tg.TrendGenerator()
 61 |         season_generator = sg.SeasonGeneratorFactory(theta5_list[i],theta3_list[i],theta4_list[i],k1_list[i],k2_list[i],forking_depth=7).get_generator(None)
 62 | 
 63 |         season = season_generator.gen_season()
 64 |         noise = noise_generator.gen(theta6_list[i],theta7_list[i],theta8_list[i],theta9_list[i],len(season[0]))
 65 |         trend = trend_generator.gen(theta1_list[i],theta2_list[i],len(season[0]))
 66 |         season_list.append(season)
 67 |         noise_list.append(noise)
 68 |         trend_list.append(trend)
 69 |         
 70 | 
 71 |         # print(noise[1].shape,trend[1].shape,season[1].shape)
 72 |         # print(noise[0].shape,trend[0].shape,season[0].shape)
 73 |         print(noise)
 74 |         print(trend)
 75 |         print(season)
 76 |     assembler = assem.AbstractAssembler(season_list,noise_list,trend_list)
 77 |     assembler.assemble()
 78 |     assembler.save(path=out_path)
 79 | 
 80 | def gen_v(config):
 81 |     config
 82 |     # season_generator = sg.SeasonGeneratorFactory(10,10,200,drift_a=0,drift_f=0,forking_depth=7,'vanish')
 83 |     noise_generator = ng.Gaussian()
 84 |     trend_generator = tg.TrendGenerator()
 85 | 
 86 | if __name__ == "__main__":
 87 |     parser = argparse.ArgumentParser(description='TSAGen help document.')
 88 |     parser.add_argument('--mode', type=str, choices=['m', 'v'], help='generation mode. m: meta manner; v: variable-control manner.')
 89 |     parser.add_argument('--meta', type=str, help='meta features path')
 90 | 
 91 |     args = parser.parse_args()
 92 | 
 93 |     meta_path = args.meta
 94 |     
 95 |     with open(meta_path) as f:
 96 |         config = yaml.load(f,Loader=yaml.FullLoader)
 97 |         out_path = config['OUT_PATH']
 98 |         if not os.path.exists(out_path):
 99 |             os.mkdir(out_path)
100 | 
101 |         if args.mode=='m':
102 |             gen_m(config)
103 |         else:
104 |             gen_v(config)


--------------------------------------------------------------------------------
/gen.py:
--------------------------------------------------------------------------------
  1 | #!python3.6
  2 | 
  3 | # Created by Chengyu on 2021/1/2.
  4 | # examples.
  5 | 
  6 | import numpy as np
  7 | import Assembler as assem
  8 | import generator.trend_generator as tg
  9 | import generator.noise_generator as ng
 10 | import generator.season_generator as sg
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | #====================================================================================
 14 | # Group1 
 15 | # spike, beat, type1, type2, control noise level.
 16 | season_generator = sg.NormalSeasonGenerator(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
 17 | noise_generator = ng.Gaussian()
 18 | trend_generator = tg.TrendGenerator()
 19 | 
 20 | season = season_generator.gen_season()
 21 | length = len(season[0])
 22 | noise = [noise_generator.gen(0,sigma,length) for sigma in np.linspace(0.5,2,100)]
 23 | trend = trend_generator.gen(0,0,length)
 24 | 
 25 | # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'noise',10e-7,0.2,a_type='spike')
 26 | assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'noise',10e-7,0.2,a_type='beat')
 27 | # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'noise',10e-7,0.2,a_type='type1')
 28 | # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'noise',10e-7,0.2,a_type='type2')
 29 | assembler.assemble()
 30 | 
 31 | # assembler.save(path='output/TSABen/group_2/spike')
 32 | # assembler.save(path='output/TSABen/group_1/beat_noise')
 33 | # assembler.save(path='output/TSABen/group_1/type1_noise')
 34 | # assembler.save(path='output/TSABen/group_1/type2_noise')
 35 | 
 36 | #====================================================================================
 37 | # Group2
 38 | # spike, beat, type1, type2
 39 | # season_generator = sg.NormalSeasonGenerator(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
 40 | # noise_generator = ng.Gaussian()
 41 | # trend_generator = tg.TrendGenerator()
 42 | 
 43 | # season = [season_generator.gen_season() for x in range(100)]
 44 | # length = len(season[0][0])
 45 | # noise = noise_generator.gen(0,0.5,length)
 46 | # trend = trend_generator.gen(0,0,length)
 47 | 
 48 | # # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'season',10e-7,0.2,a_type='spike')
 49 | # # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'season',10e-7,0.2,a_type='beat')
 50 | # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'season',10e-7,0.2,a_type='type1')
 51 | # # assembler = assem.AssemblerWithAdditiveAnomalyInjector_v1(season,noise,trend,'season',10e-7,0.2,a_type='type2')
 52 | # assembler.assemble()
 53 | 
 54 | # # assembler.save(path='output/TSABen/group_2/spike')
 55 | # # assembler.save(path='output/TSABen/group_2/beat')
 56 | # assembler.save(path='output/TSABen/group_2/type1')
 57 | # # assembler.save(path='output/TSABen/group_2/type2')
 58 | 
 59 | #====================================================================================
 60 | # Group2
 61 | # cycle deformation.
 62 | # season_generator = sg.SeasonGeneratorWithShapeDeformation(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
 63 | # noise_generator = ng.Gaussian()
 64 | # trend_generator = tg.TrendGenerator()
 65 | 
 66 | # season = [season_generator.gen_season() for x in range(100)]
 67 | # length = len(season[0][0])
 68 | # noise = noise_generator.gen(0,0.5,length)
 69 | # trend = trend_generator.gen(15,0,length)
 70 | 
 71 | # assembler = assem.AbstractAssembler(season,noise,trend,'season')
 72 | # assembler.assemble()
 73 | # # assembler.save(path='output/TSABen/group_2/cycle_deformation')
 74 | 
 75 | # cycle vanish.
 76 | # season_generator = sg.SeasonGeneratorWithCycleVanish(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
 77 | # noise_generator = ng.Gaussian()
 78 | # trend_generator = tg.TrendGenerator()
 79 | 
 80 | # season = [season_generator.gen_season() for x in range(100)]
 81 | # length = len(season[0][0])
 82 | # noise = noise_generator.gen(0,0.5,length)
 83 | # trend = trend_generator.gen(15,0,length)
 84 | 
 85 | # assembler = assem.AbstractAssembler(season,noise,trend,'season')
 86 | # assembler.assemble()
 87 | # assembler.save(path='output/TSABen/group_2/cycle_vanish')
 88 | 
 89 | # change point
 90 | # season_generator = sg.NormalSeasonGenerator(10,10,200,drift_a=0,drift_f=0,forking_depth=7)
 91 | # noise_generator = ng.GaussianWithChangePoints()
 92 | # trend_generator = tg.TrendGenerator()
 93 | 
 94 | # season = [season_generator.gen_season() for x in range(100)]
 95 | # length = len(season[0][0])
 96 | # noise = noise_generator.gen(0,1,length)
 97 | # trend = trend_generator.gen(15,0,length)
 98 | 
 99 | # assembler = assem.AbstractAssembler(season,noise,trend,'season')
100 | # assembler.assemble()
101 | # assembler.save(path='output/TSABen/group_2/change_point')


--------------------------------------------------------------------------------
/generator/additive_anomaly_generator.py:
--------------------------------------------------------------------------------
  1 | #!python3.6
  2 | 
  3 | # Created by Chengyu on 2020/6/6.
  4 | # Additive anomaly generator.
  5 | 
  6 | import numpy as np
  7 | import generator.pattern as pt
  8 | import random
  9 | 
 10 | def partition(length, seg_num):
 11 |     if seg_num >= length:
 12 |         print("error:segment num > length.")
 13 |     sublen = int(length/seg_num)
 14 |     pos = 0
 15 |     pt = []
 16 |     for i in range(0,seg_num-1):
 17 |         pt.append([pos,pos+sublen])
 18 |         pos += sublen
 19 |     pt.append([pos,length])
 20 |     return pt
 21 | 
 22 | # insert spike anomaly
 23 | def insert_spike_anomaly(kpi,label,upt,dwt,pos_list):
 24 |     kpi = kpi.copy()
 25 |     length = len(kpi)
 26 |     for pos in pos_list:
 27 |         position = int(pos*length)
 28 |         degree = [upt[position]-kpi[position]]
 29 |         a = degree
 30 |         for i in np.arange(len(a)):
 31 |             kpi[position+i] = a[i]+kpi[position+i]
 32 |             label[position+i] = 1
 33 |     return (kpi, label)
 34 | 
 35 | def insert_beat_anomaly(kpi,label,upt,dwt,pos_list):
 36 |     kpi = kpi.copy()
 37 |     length = len(kpi)
 38 |     for pos in pos_list:
 39 |         position = int(pos*length)
 40 |         direction = 1 #np.random.choice([-1,1],1,p=[0.5,0.5])
 41 |         degree1 = upt[position]-kpi[position]
 42 |         degree2 = kpi[position+1]-upt[position+1]
 43 |         a = [degree1,degree2]
 44 |         for i in np.arange(len(a)):
 45 |             kpi[position+i] = a[i]+kpi[position+i]
 46 |             label[position+i] = 1
 47 |     return (kpi,label)
 48 | 
 49 | def insert_type1_anomaly(kpi, label, upt, dwt, pos_list):
 50 |     kpi = kpi.copy()
 51 |     length = len(kpi)
 52 |     for pos in pos_list:
 53 |         position = int(pos*length)
 54 |         # direction = 1 #np.random.choice([-1,1],1,p=[0.5,0.5])
 55 |         degree = upt[position]-kpi[position]
 56 |         a = pt.typeI(10,20,degree)
 57 |         for i in np.arange(len(a)):
 58 |             kpi[position+i] = a[i]+kpi[position+i]
 59 |             label[position+i] = 1
 60 |     return (kpi, label)
 61 | 
 62 | def insert_type2_anomaly(kpi, label, upt, dwt, pos_list):
 63 |     kpi = kpi.copy()
 64 |     length = len(kpi)
 65 |     for pos in pos_list:
 66 |         position = int(pos*length)
 67 |         direction = 1 #np.random.choice([-1,1],1,p=[0.5,0.5])
 68 |         a_l = 20
 69 |         degree1 = upt[position]-kpi[position]
 70 |         degree2 = upt[position]-kpi[position+a_l]
 71 |         a = pt.typeII(20,degree1,degree2)
 72 |         for i in np.arange(len(a)):
 73 |             kpi[position+i] = a[i]+kpi[position+i]
 74 |             label[position+i] = 1
 75 |     return (kpi, label)
 76 | 
 77 | # def insert_fluctuate_anomaly(noise,label,degree,num,moms):
 78 | #     noise = noise.copy()
 79 | #     mu = moms[0]
 80 | #     sigma = moms[1]
 81 | #     skew = -10*moms[2]
 82 | #     kurt = moms[3]
 83 | 
 84 | #     pt = partition(len(noise),num)
 85 | #     for k in pt:
 86 | #         a_length = int(random.uniform(50,150))
 87 | #         pos = int(random.uniform(k[0],k[1]-a_length))
 88 | 
 89 | #         noiseGenerator = ng.NoiseGenerator()
 90 | #         a_noise = noiseGenerator.genNoise(moms[0],moms[1],moms[2],moms[3],150)
 91 | #         print('a_noise',a_noise)
 92 | #         for i in range(0,a_length):
 93 | #             noise[pos+i] = a_noise[i]
 94 | #             label[pos+i] = 1
 95 | #     return noise,label
 96 | 
 97 | # # insert null point.
 98 | # def insert_null_anomaly(kpi,label,num):
 99 | #     kpi = kpi.copy()
100 | #     pt = partition(len(kpi),num)
101 | #     for k in pt:
102 | #         pos = int(random.uniform(k[0],k[1]-1))
103 | #         kpi[pos]=0
104 | #         label[pos-1]=1
105 | #         label[pos]=1
106 | #         label[pos+1]=1
107 | #     return kpi,label
108 | 
109 | # # insert dip anomaly
110 | # def insert_dip_anomaly(kpi,label,num, upt, dwt):
111 | #     kpi = kpi.copy()
112 | #     pt = partition(len(kpi),num)
113 | #     for k in pt:
114 | #         pos = int(random.uniform(k[0],k[1]-1))
115 | #         label[pos-1]=1
116 | #         label[pos]=1
117 | #         label[pos+1]=1
118 | #         degree = kpi[pos]-dwt[pos]
119 | #         kpi[pos]+=degree
120 | #     return kpi,label
121 | 
122 | # this type is defined in the paper of Microsoft.
123 | def insert_point_anomaly(kpi,label,num):
124 |     pt = partition(len(kpi),num)
125 |     for k in pt:
126 |         pos = int(random.uniform(k[0],k[1]-1))
127 | 
128 |         local_mean = np.mean(kpi[:pos])
129 |         mean = np.mean(kpi[pos-50:pos+50])
130 |         var = np.var(kpi[pos-50:pos+50])
131 |         r = np.random.normal(0,1,1)
132 |         print(local_mean,mean,var,r)
133 |         x = (local_mean+mean)*(1+var)*r+kpi[pos]
134 | 
135 |         kpi[pos] = x
136 |         label[pos-1]=1
137 |         label[pos]=1
138 |         label[pos+1]=1
139 |     return kpi,label


--------------------------------------------------------------------------------
/shape/RMDF.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import copy
  4 | 
  5 | class RMDF():
  6 |     def __init__(self,depth=10,ascent_rate=20,start=np.array([0,0]),end=np.array([1,0])):
  7 |         # self.control_points = [[],[],[],[],[],[],[],[],[],[],[]]
  8 |         # self.control_points_copy = [[],[],[],[],[],[],[],[],[],[],[]]
  9 |         # self.anchor = [[],[],[],[],[],[],[],[],[],[],[]]
 10 |         self.control_points = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
 11 |         self.control_points_copy = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
 12 |         self.anchor = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
 13 |         self.depth = depth
 14 |         self.start = start
 15 |         self.end = end
 16 |         self.ascent_rate = ascent_rate
 17 | 
 18 |     def gen_anchor(self):
 19 |         start = self.start
 20 |         end = self.end
 21 |         self.control_points[0].append([[start[0],end[0]],start,end])
 22 |         for d in range(self.depth):
 23 |             for e in self.control_points[d]:
 24 |                 start = e[1]
 25 |                 end = e[2]
 26 |                 l = self.__length(start,end)
 27 |                 pmid = self.__mid(start,end)
 28 |                 h = np.random.normal(0,l/self.ascent_rate)
 29 | 
 30 |                 zeta = math.atan(h/(l/2))
 31 |                 l2 = math.sqrt(h*h+(l/2)*(l/2))
 32 |                 T = np.matrix([[math.cos(zeta),-math.sin(zeta)],[math.sin(zeta),math.cos(zeta)]])
 33 |                 a = np.matrix([[pmid[0]-start[0]],[pmid[1]-start[1]]])
 34 |                 b = np.matmul(T,a)*(l2/l*2)
 35 |                 p = np.array([start[0]+b[0,0],start[1]+b[1,0]])
 36 | 
 37 |                 self.control_points[d+1].append([[start[0],p[0]],start,p])
 38 |                 self.control_points[d+1].append([[p[0],end[0]],p,end])
 39 |             # ll = ll/2
 40 |         self.anchor = self.control_points.copy()
 41 |         # self.__std_anchor()
 42 |     
 43 |     def clear_all(self):
 44 |         self.__clear(self.depth+1)
 45 |         self.gen_anchor()
 46 | 
 47 |     def gen(self,forking_depth,length):
 48 |         self.__clear(forking_depth)
 49 |         self.__forking(forking_depth)
 50 |         self.__std()
 51 |         x_ = np.arange(0,1,1/length)
 52 |         y = np.array([self.__expression(x,10) for x in x_])
 53 |         # y2 = np.array([self.__expression(x,2) for x in x_])
 54 |         # y3 = np.array([self.__expression(x,3) for x in x_])
 55 |         # y4 = np.array([self.__expression(x,4) for x in x_])
 56 |         # y5 = np.array([self.__expression(x,5) for x in x_])
 57 |         # y6 = np.array([self.__expression(x,6) for x in x_])
 58 |         # y7 = np.array([self.__expression(x,7) for x in x_])
 59 |         # y8 = np.array([self.__expression(x,8) for x in x_])
 60 |         # y9 = np.array([self.__expression(x,9) for x in x_])
 61 |         # return y,y2,y3,y4,y5,y6,y7,y8,y9
 62 |         return y
 63 | 
 64 |     def __std_anchor(self):
 65 |         point_list = list(map(lambda x:x[2],self.anchor[10]))
 66 |         y_value_list = list(map(lambda x:x[1], point_list))
 67 |         max_y = np.max(y_value_list)
 68 |         min_y = np.min(y_value_list)
 69 |         height = max_y-min_y
 70 |         for i in range(len(self.anchor[10])):
 71 |             self.anchor[10][i][2][1]=self.anchor[10][i][2][1]/height
 72 | 
 73 |     def __std(self):
 74 |         point_list = list(map(lambda x:x[2],self.control_points_copy[10]))
 75 |         y_value_list = list(map(lambda x:x[1], point_list))
 76 |         max_y = np.max(y_value_list)
 77 |         min_y = np.min(y_value_list)
 78 |         height = max_y-min_y
 79 |         for i in range(len(self.control_points_copy[10])):
 80 |             self.control_points_copy[10][i][2][1] = self.control_points_copy[10][i][2][1]/height
 81 | 
 82 |     def __expression(self,x,depth):
 83 |         expression = self.control_points_copy[depth]
 84 |         for e in expression:
 85 |             if x>=e[0][0] and x<=e[0][1]:
 86 |                 p1 = e[1]
 87 |                 p2 = e[2]
 88 |                 k = (p2[1]-p1[1])/(p2[0]-p1[0])
 89 |                 b = p1[1]-k*p1[0]
 90 |                 return k*x+b
 91 | 
 92 |     def __forking(self,forking_depth):
 93 |         shared_depth = self.depth - forking_depth
 94 |         for d in range(shared_depth,self.depth):
 95 |             for e in self.control_points[d]:
 96 |                 start = e[1]
 97 |                 end = e[2]
 98 |                 l = self.__length(start,end)
 99 |                 pmid = self.__mid(start,end)
100 |                 h = np.random.normal(0,l/self.ascent_rate)
101 | 
102 |                 zeta = math.atan(h/(l/2))
103 |                 l2 = math.sqrt(h*h+(l/2)*(l/2))
104 |                 T = np.matrix([[math.cos(zeta),-math.sin(zeta)],[math.sin(zeta),math.cos(zeta)]])
105 |                 a = np.matrix([[pmid[0]-start[0]],[pmid[1]-start[1]]])
106 |                 b = np.matmul(T,a)*(l2/l*2)
107 |                 p = np.array([start[0]+b[0,0],start[1]+b[1,0]])
108 | 
109 |                 self.control_points[d+1].append([[start[0],p[0]],start,p])
110 |                 self.control_points[d+1].append([[p[0],end[0]],p,end])
111 |         self.control_points_copy = copy.deepcopy(self.control_points)
112 | 
113 |     def __clear(self,forking_depth):
114 |         # clear the latest forking_depth layer.
115 |         shared_depth = self.depth - forking_depth
116 |         for i in range(shared_depth,self.depth):
117 |             self.control_points[i+1]=[]
118 | 
119 |     def __length(self,p1,p2):
120 |         # length of line (p1,p2)
121 |         # p = [x,y]
122 |         # L2-norm
123 |         return np.linalg.norm(p1-p2)
124 |     
125 |     def __mid(self,p1,p2):
126 |         # mid point of line(p1,p2)
127 |         x = (p2[0]+p1[0])/2
128 |         y = (p2[1]+p1[1])/2
129 |         return np.array([x,y])


--------------------------------------------------------------------------------
/Assembler.py:
--------------------------------------------------------------------------------
  1 | #!python3.6
  2 | 
  3 | # Created by Chengyu on 2020/12/9.
  4 | # Assembler.
  5 | 
  6 | import numpy as np
  7 | import os
  8 | import matplotlib.pyplot as plt
  9 | import pandas as pd
 10 | import generator.pattern as pattern
 11 | import generator.additive_anomaly_generator as ag
 12 | from tqdm import tqdm
 13 | from tsagen_visual.visual import show
 14 | from EVT.spot import bidSPOT
 15 | 
 16 | # This is an abstract calss.
 17 | # method assemble() is template method.
 18 | # inject() should be rewritten according to your need.
 19 | class AbstractAssembler():
 20 |     def __init__(self, season, noise, trend, control=None):
 21 |         self.season = season
 22 |         self.trend = trend
 23 |         self.noise = noise
 24 |         self.label = 0
 25 |         self.additive = 0
 26 |         self.results = []
 27 |         self.control = control
 28 |     
 29 |     # template method.
 30 |     def assemble(self):
 31 |         # invoke hook
 32 |         self._inject()
 33 |         # print(self.control)
 34 |         if self.control == 'season':
 35 |             for s,l in self.season:
 36 |                 label = np.bitwise_or(l,self.noise[1])
 37 |                 label = np.bitwise_or(label,self.trend[1])
 38 |                 self.results.append((s+self.noise[0]+self.trend[0],label))
 39 |         elif self.control == 'noise':
 40 |             for n,l in self.noise:
 41 |                 label = np.bitwise_or(l,self.season[1])
 42 |                 label = np.bitwise_or(label,self.trend[1])
 43 |                 self.results.append((n+self.season[0]+self.trend[0],label))
 44 |         elif self.control == 'trend':
 45 |             for t,l in self.season:
 46 |                 label = np.bitwise_or(l,self.noise[1])
 47 |                 label = np.bitwise_or(label,self.season[1])
 48 |                 self.results.append((t+self.noise[0]+self.season[0],label))
 49 |         elif self.control == 'drift_f':
 50 |             pass
 51 |         else:
 52 |             for i in range(0,len(self.season)):
 53 |                 label = np.bitwise_or(self.noise[i][1],self.season[i][1])
 54 |                 label = np.bitwise_or(label,self.trend[i][1])
 55 |                 self.results.append((self.noise[i][0]+self.season[i][0]+self.trend[i][0],label))
 56 |         self._post_inject()
 57 | 
 58 |     # hook.
 59 |     def _inject(self):
 60 |         pass
 61 | 
 62 |     # post hook.
 63 |     def _post_inject(self):
 64 |         pass
 65 | 
 66 |     def save(self,path='output',preifix='synthetic',plot=True,fig_size=(16,4)):
 67 |         idx = 0
 68 |         for r, l in tqdm(self.results):
 69 |             df = pd.DataFrame()
 70 |             df['timestamp']=np.arange(len(r))
 71 |             df['value']=r
 72 |             df['label']=l
 73 | 
 74 |             if not os.path.exists(path+'/data'):
 75 |                 os.makedirs(path+'/data')
 76 |             if not os.path.exists(path+'/fig'):
 77 |                 os.makedirs(path+'/fig')
 78 |             filename = path+'/data/'+preifix+'_'+str(idx)
 79 |             figname = path+'/fig/'+preifix + '_' + str(idx)
 80 |             df.to_csv(filename + '.csv',index=None)
 81 |             if plot:
 82 |                 sub = show(df['value'],df['label'],title=filename,figure_size=fig_size)
 83 |                 sub.savefig(figname + '.jpg')
 84 |                 # plt.show()
 85 |                 plt.close()
 86 |             idx += 1
 87 | 
 88 | # Assmebler with additive anomaly injector.
 89 | class AssemblerWithAdditiveAnomalyInjector(AbstractAssembler):
 90 |     def __init__(self, season, noise, trend, control=None, q=10e-5, init_portion=0.2):
 91 |         AbstractAssembler.__init__(self, season, noise, trend, control)
 92 |         self.q = q
 93 |         self.init_portion = init_portion
 94 | 
 95 |     def _post_inject(self):
 96 |         # establish low probablity boundary.
 97 |         q = self.q
 98 |         d = 10
 99 |         init_portion = self.init_portion
100 |         idx = 0
101 |         for result, label in self.results:
102 |             length = len(result)
103 |             init_data = result[:int(length*init_portion)]
104 |             s = bidSPOT(q,d)
105 |             s.fit(init_data, result)
106 |             s.initialize()
107 |             r = s.run()
108 |             s.plot(r)
109 |             # plt.show()
110 |             upper_thresholds = r['upper_thresholds']
111 |             lower_thresholds = r['lower_thresholds']
112 | 
113 |             # r,l = insert_type2_anomaly(result,label,upper_thresholds,lower_thresholds,[0.3,0.4,0.5,0.6,0.7])
114 |             r,l = ag.insert_spike_anomaly(result,label,upper_thresholds,lower_thresholds,[0.3,0.4,0.5,0.6,0.7]) #insert_type2_anomaly(result,label,upper_thresholds,lower_thresholds,[0.3,0.4,0.5,0.6,0.7])
115 |             self.results[idx]=(r,l)
116 | 
117 | # Assmebler with additive anomaly injector.
118 | # control noise level.
119 | class AssemblerWithAdditiveAnomalyInjector_v1(AbstractAssembler):
120 |     def __init__(self, season, noise, trend, control=None, q=10e-7, init_portion=0.2, a_type='spike'):
121 |         AbstractAssembler.__init__(self, season, noise, trend, control)
122 |         self.q = q
123 |         self.a_type = a_type
124 |         self.init_portion = init_portion
125 | 
126 |     def _post_inject(self):
127 |         # establish low probablity boundary.
128 |         q = self.q
129 |         d = 10
130 |         init_portion = self.init_portion
131 |         idx = 0
132 |         for result, label in self.results:
133 |             length = len(result)
134 |             init_data = result[:int(length*init_portion)]
135 |             s = bidSPOT(q,d)
136 |             s.fit(init_data, result)
137 |             s.initialize()
138 |             r = s.run()
139 |             s.plot(r)
140 |             # plt.show()
141 |             upper_thresholds = r['upper_thresholds']
142 |             lower_thresholds = r['lower_thresholds']
143 |             if self.a_type == 'spike':
144 |                 r,l = ag.insert_spike_anomaly(result,label,upper_thresholds,lower_thresholds,[0.3,0.4,0.5,0.6,0.7])
145 |             elif self.a_type == 'beat':
146 |                 r,l = ag.insert_beat_anomaly(result,label,upper_thresholds,lower_thresholds,[0.3,0.4,0.5,0.6,0.7])
147 |             elif self.a_type == 'type1':
148 |                 r,l = ag.insert_type1_anomaly(result,label,upper_thresholds,lower_thresholds,[0.3,0.4,0.5,0.6,0.7])
149 |             elif self.a_type == 'type2':
150 |                 r,l = ag.insert_type2_anomaly(result,label,upper_thresholds,lower_thresholds,[0.6,0.9])
151 |             else:
152 |                 print('a_type does not exist.')
153 |             self.results[idx]=(r,l)
154 |             idx += 1
155 | 


--------------------------------------------------------------------------------
/generator/test.py:
--------------------------------------------------------------------------------
  1 | #!python3.6
  2 | 
  3 | # Created by Chengyu on 2020/5/13.
  4 | # season generator.
  5 | 
  6 | import numpy as np
  7 | import math
  8 | import matplotlib.pyplot as plt
  9 | import pandas as pd
 10 | from tqdm import tqdm
 11 | from sklearn.metrics import mean_squared_error
 12 | 
 13 | def sine_p(size):
 14 |     # print(np.pi/np.arange(len))
 15 |     # print(np.linspace(10,100,91))
 16 |     return np.sin(np.linspace(0,np.pi,size))
 17 | 
 18 | def sine(size):
 19 |     return np.sin(np.linspace(0,2*np.pi,size))
 20 | 
 21 | # std_size is the standard size of a cycle.
 22 | # cycle_num is the number of cycles.
 23 | # The overall length of returned seasonal component is
 24 | # std_size * cycle_number, in the absense of drift.
 25 | # drift_a, drift_f are drift factors of amplitude and
 26 | # frequency, respectively.
 27 | def sine_p_season(std_size, cycle_num, drift_a, drift_f):
 28 |     sines = [sine_p(std_size) for x in range(cycle_num)]
 29 |     return np.concatenate(sines)
 30 | 
 31 | def sine_season(std_size, cycle_num, drift_a, drift_f):
 32 |     sines = sine(std_size)
 33 |     sines = [sine(std_size)[1:] for x in range(cycle_num)]
 34 |     return np.concatenate(sines)
 35 | 
 36 | # length of line (p1,p2)
 37 | # p = [x,y]
 38 | def length(p1,p2):
 39 |     # 2-norm
 40 |     return np.linalg.norm(p1-p2)
 41 | 
 42 | # mid of line(p1,p2)
 43 | def mid(p1,p2):
 44 |     x = (p2[0]+p1[0])/2
 45 |     y = (p2[1]+p1[1])/2
 46 |     return np.array([x,y])
 47 | 
 48 | def std():
 49 |     point_list = list(map(lambda x:x[2],expression))
 50 |     # print(point_list)
 51 |     y_value_list = list(map(lambda x:x[1], point_list))
 52 |     max_y = np.max(y_value_list)
 53 |     min_y = np.min(y_value_list)
 54 |     height = max_y-min_y
 55 |     for i in range(len(expression)):
 56 |         expression[i][2][1]=expression[i][2][1]/height
 57 | 
 58 | def std10():
 59 |     point_list = list(map(lambda x:x[2],expression_[10]))
 60 |     # print(point_list)
 61 |     y_value_list = list(map(lambda x:x[1], point_list))
 62 |     max_y = np.max(y_value_list)
 63 |     min_y = np.min(y_value_list)
 64 |     height = max_y-min_y
 65 |     for i in range(len(expression_[10])):
 66 |         expression_[10][i][2][1]=expression_[10][i][2][1]/height
 67 | 
 68 | # plot curve of depth d
 69 | # d start from 0
 70 | def func_of_d(x,depth):
 71 |     expression = expression_[depth]
 72 |     for e in expression:
 73 |         if x>=e[0][0] and x<=e[0][1]:
 74 |             p1 = e[1]
 75 |             p2 = e[2]
 76 |             k = (p2[1]-p1[1])/(p2[0]-p1[0])
 77 |             b = p1[1]-k*p1[0]
 78 |             return k*x+b
 79 | 
 80 | expression_ = [[],[],[],[],[],[],[],[],[],[],[]]
 81 | 
 82 | # RMDF loop version.
 83 | def RMDF_loop(H,sigma,max_depth):
 84 |     start = np.array([0,0])
 85 |     end = np.array([1,0])
 86 |     expression_[0].append([[start[0],end[0]],start,end])
 87 |     for d in range(max_depth):
 88 |         for e in expression_[d]:
 89 |             start = e[1]
 90 |             end = e[2]
 91 |             l = length(start,end)
 92 |             pmid = mid(start,end)
 93 |             h = np.random.normal(0,l/8)
 94 | 
 95 |             zeta = math.atan(h/(l/2))
 96 |             l2 = math.sqrt(h*h+(l/2)*(l/2))
 97 |             T = np.matrix([[math.cos(zeta),-math.sin(zeta)],[math.sin(zeta),math.cos(zeta)]])
 98 |             a = np.matrix([[pmid[0]-start[0]],[pmid[1]-start[1]]])
 99 |             b = np.matmul(T,a)*(l2/l*2)
100 |             p = np.array([start[0]+b[0,0],start[1]+b[1,0]])
101 | 
102 |             expression_[d+1].append([[start[0],p[0]],start,p])
103 |             expression_[d+1].append([[p[0],end[0]],p,end])
104 | 
105 | # RMDF_loop(0.3,0.2,10)
106 | 
107 | # RMDF recursive version.
108 | def RMDF(start,end,depth,H,sigma,max_depth):
109 |     if depth >= max_depth:
110 |         expression_[depth].append([[start[0],end[0]],start,end])
111 |         return
112 |     else:
113 |         expression_[depth].append([[start[0],end[0]],start,end])
114 | 
115 |     l = length(start,end)
116 |     pmid = mid(start,end)
117 |     h = np.random.normal(0,l/8)
118 | 
119 |     zeta = math.atan(h/(l/2))
120 |     l2 = math.sqrt(h*h+(l/2)*(l/2))
121 |     T = np.matrix([[math.cos(zeta),-math.sin(zeta)],[math.sin(zeta),math.cos(zeta)]])
122 |     a = np.matrix([[pmid[0]-start[0]],[pmid[1]-start[1]]])
123 |     b = np.matmul(T,a)*(l2/l*2)
124 |     p = np.array([start[0]+b[0,0],start[1]+b[1,0]])
125 |     RMDF(start,p,depth+1,H,sigma,max_depth)
126 |     RMDF(p,end,depth+1,H,sigma,max_depth)
127 | 
128 | # experiment code
129 | # RMDF(np.array([0,0]),np.array([1,0]),0,0.3,0.2,5)
130 | 
131 | def draww():
132 |     plt.rcParams['pdf.fonttype'] = 42
133 |     plt.rcParams['ps.fonttype'] = 42
134 |     air_force_blue = '#5D8AA8'
135 |     sub1 = plt.subplot(151)
136 |     sub2 = plt.subplot(152)
137 |     sub3 = plt.subplot(153)
138 |     plt.subplots_adjust(wspace =0, hspace =0)
139 |     x = np.arange(0,1,1/1000)
140 | 
141 |     # std()3
142 |     y = [func_of_d(x,3) for x in np.arange(0,1,1/1000)]
143 |     sub1.plot(x,y,linewidth=2,color='#5D8AA8')
144 |     y = [func_of_d(x,4) for x in np.arange(0,1,1/1000)]
145 |     sub2.plot(x,y,linewidth=2,color='#5D8AA8')
146 |     y = [func_of_d(x,10) for x in np.arange(0,1,1/1000)]
147 |     sub3.plot(x,y,linewidth=2,color='#5D8AA8')
148 | 
149 |     sub1.set_title("d = 3", y=-0.3,fontsize=25)
150 |     sub1.set_yticks([])
151 |     sub2.set_title("d = 4", y=-0.3,fontsize=25)
152 |     sub2.set_yticks([])
153 |     sub3.set_title("d = 6", y=-0.3,fontsize=25)
154 |     sub3.set_yticks([])
155 |     plt.show()
156 | 
157 | # draww()
158 | 
159 | # RMDF loop version.
160 | def RMDF_shared(shared_depth):
161 |     start = np.array([0,0])
162 |     end = np.array([1,0])
163 |     expression_[0].append([[start[0],end[0]],start,end])
164 |     for d in range(shared_depth):
165 |         for e in expression_[d]:
166 |             start = e[1]
167 |             end = e[2]
168 |             l = length(start,end)
169 |             pmid = mid(start,end)
170 |             h = np.random.normal(0,l/8)
171 | 
172 |             zeta = math.atan(h/(l/2))
173 |             l2 = math.sqrt(h*h+(l/2)*(l/2))
174 |             T = np.matrix([[math.cos(zeta),-math.sin(zeta)],[math.sin(zeta),math.cos(zeta)]])
175 |             a = np.matrix([[pmid[0]-start[0]],[pmid[1]-start[1]]])
176 |             b = np.matmul(T,a)*(l2/l*2)
177 |             p = np.array([start[0]+b[0,0],start[1]+b[1,0]])
178 | 
179 |             expression_[d+1].append([[start[0],p[0]],start,p])
180 |             expression_[d+1].append([[p[0],end[0]],p,end])
181 | 
182 | def clear(shared_depth,max_depth):
183 |     for i in range(shared_depth,max_depth):
184 |         expression_[i+1]=[]
185 | 
186 | def RMDF_diverge(shared_depth,max_depth):
187 |     for d in range(shared_depth,max_depth):
188 |         for e in expression_[d]:
189 |             start = e[1]
190 |             end = e[2]
191 |             l = length(start,end)
192 |             pmid = mid(start,end)
193 |             h = np.random.normal(0,l/8)
194 | 
195 |             zeta = math.atan(h/(l/2))
196 |             l2 = math.sqrt(h*h+(l/2)*(l/2))
197 |             T = np.matrix([[math.cos(zeta),-math.sin(zeta)],[math.sin(zeta),math.cos(zeta)]])
198 |             a = np.matrix([[pmid[0]-start[0]],[pmid[1]-start[1]]])
199 |             b = np.matmul(T,a)*(l2/l*2)
200 |             p = np.array([start[0]+b[0,0],start[1]+b[1,0]])
201 | 
202 |             expression_[d+1].append([[start[0],p[0]],start,p])
203 |             expression_[d+1].append([[p[0],end[0]],p,end])
204 | 
205 | RMDF_shared(10)
206 | 
207 | 
208 | def dtw_distance(ts_a, ts_b, d=lambda x,y: abs(x-y), mww=10000):
209 |     """Computes dtw distance between two time series
210 |     
211 |     Args:
212 |         ts_a: time series a
213 |         ts_b: time series b
214 |         d: distance function
215 |         mww: max warping window, int, optional (default = infinity)
216 |         
217 |     Returns:
218 |         dtw distance
219 |     """
220 |     
221 |     # Create cost matrix via broadcasting with large int
222 |     ts_a, ts_b = np.array(ts_a), np.array(ts_b)
223 |     M, N = len(ts_a), len(ts_b)
224 |     cost = np.ones((M, N))
225 | 
226 |     # Initialize the first row and column
227 |     cost[0, 0] = d(ts_a[0], ts_b[0])
228 |     for i in range(1, M):
229 |         cost[i, 0] = cost[i-1, 0] + d(ts_a[i], ts_b[0])
230 | 
231 |     for j in range(1, N):
232 |         cost[0, j] = cost[0, j-1] + d(ts_a[0], ts_b[j])
233 | 
234 |     # Populate rest of cost matrix within window
235 |     for i in range(1, M):
236 |         for j in range(max(1, i - mww), min(N, i + mww)):
237 |             choices = cost[i-1, j-1], cost[i, j-1], cost[i-1, j]
238 |             cost[i, j] = min(choices) + d(ts_a[i], ts_b[j])
239 | 
240 |     # Return DTW distance given window 
241 |     return cost[-1, -1]
242 | 
243 | 
244 | y = []
245 | def gen_1000curves_and_save():
246 |     x_ = np.arange(0,1,1/1000)
247 |     for i in tqdm(range(100)):
248 |         clear(0,10)
249 |         RMDF_shared(10)
250 |         std10()
251 |         y1 = [func_of_d(x,10) for x in x_]
252 |         clear(9,10)
253 |         RMDF_diverge(9,10)
254 |         std10()
255 |         y2 = [func_of_d(x,10) for x in x_]
256 |         clear(8,10)
257 |         RMDF_diverge(8,10)
258 |         std10()
259 |         y3 = [func_of_d(x,10) for x in x_]
260 |         clear(7,10)
261 |         RMDF_diverge(7,10)
262 |         std10()
263 |         y4 = [func_of_d(x,10) for x in x_]
264 |         clear(6,10)
265 |         RMDF_diverge(6,10)
266 |         std10()
267 |         y5 = [func_of_d(x,10) for x in x_]
268 |         clear(5,10)
269 |         RMDF_diverge(5,10)
270 |         std10()
271 |         y6 = [func_of_d(x,10) for x in x_]
272 |         clear(4,10)
273 |         RMDF_diverge(4,10)
274 |         std10()
275 |         y7 = [func_of_d(x,10) for x in x_]
276 |         clear(3,10)
277 |         RMDF_diverge(3,10)
278 |         std10()
279 |         y8 = [func_of_d(x,10) for x in x_]
280 |         clear(2,10)
281 |         RMDF_diverge(2,10)
282 |         std10()
283 |         y9 = [func_of_d(x,10) for x in x_]
284 |         clear(1,10)
285 |         RMDF_diverge(1,10)
286 |         std10()
287 |         y10 = [func_of_d(x,10) for x in x_]
288 |         y.append((y1,y2,y3,y4,y5,y6,y7,y8,y9,y10))
289 |     # save list
290 |     a = np.array(y)
291 |     np.save('exp_data_in_paper/curves.npy',a)
292 | 
293 | distance_list = [[],[],[],[],[],[],[],[],[]]
294 | mse_list = [[],[],[],[],[],[],[],[],[]]
295 | rmse_list = [[],[],[],[],[],[],[],[],[]]
296 | 
297 | def calculate_mse_and_save():
298 |     curves=np.load('exp_data_in_paper/curves.npy')
299 |     curves=curves.tolist()
300 |     for curve in tqdm(curves):
301 |         anchor = curve[0]
302 |         for sample,idx in zip(curve[1:],range(9)):
303 |             mse = mean_squared_error(anchor,sample)
304 |             mse_list[idx].append(mse)
305 |     mse = np.array(mse_list)
306 |     np.save('exp_data_in_paper/mse.npy',mse)
307 | 
308 | def calculate_rmse_and_save():
309 |     curves=np.load('exp_data_in_paper/curves.npy')
310 |     curves=curves.tolist()
311 |     for curve in tqdm(curves):
312 |         anchor = curve[0]
313 |         for sample,idx in zip(curve[1:],range(9)):
314 |             rmse = np.sqrt(mean_squared_error(anchor,sample))
315 |             rmse_list[idx].append(rmse)
316 |     rmse = np.array(rmse_list)
317 |     np.save('exp_data_in_paper/rmse.npy',rmse)
318 | 
319 | def calculate_DTW_and_save():
320 |     curves=np.load('exp_data_in_paper/curves.npy')
321 |     curves=curves.tolist()
322 |     for curve in tqdm(curves):
323 |         anchor = curve[0]
324 |         for sample,idx in zip(curve[1:],range(9)):
325 |             distance = dtw_distance(anchor,sample,mww=100)
326 |             distance_list[idx].append(distance)
327 |     dtw = np.array(distance_list)
328 |     np.save('exp_data_in_paper/dtw.npy',dtw)
329 | 
330 | def draw_boxplot_of_DTW():
331 |     distance_list=np.load('exp_data_in_paper/dtw.npy')
332 |     distance_list=distance_list.tolist()
333 |     # remove outlier
334 |     new_distance_list = []
335 |     for dist in distance_list:
336 |         new_distance_list.append(np.sort(dist)[:-10])
337 |     f = plt.figure(figsize=(16, 6))
338 |     plt.rcParams['pdf.fonttype'] = 42
339 |     plt.rcParams['ps.fonttype'] = 42
340 |     plt.boxplot(new_distance_list,labels=['1','2','3','4','5','6','7','8','9'],whis=1.5,sym = '.',showmeans=True)
341 |     plt.xlabel('forking depth',size='20')
342 |     plt.ylabel('DTW cost',size='20')
343 |     plt.show()
344 | 
345 | def draw_boxplot_of_MSE():
346 |     distance_list=np.load('exp_data_in_paper/mse.npy')
347 |     distance_list=distance_list.tolist()
348 |     plt.rcParams['pdf.fonttype'] = 42
349 |     plt.rcParams['ps.fonttype'] = 42
350 |     plt.boxplot(distance_list,labels=['1','2','3','4','5','6','7','8','9'],whis=1.5,sym = '.',showmeans=True)
351 |     plt.xlabel('forking depth',size='20')
352 |     plt.ylabel('MSE',size='20')
353 |     plt.show()
354 | 
355 | def draw_boxplot_of_RMSE():
356 |     distance_list=np.load('exp_data_in_paper/rmse.npy')
357 |     distance_list=distance_list.tolist()
358 |     # remove outlier
359 |     new_distance_list = []
360 |     for dist in distance_list:
361 |         new_distance_list.append(np.sort(dist)[:-10])
362 |     f = plt.figure(figsize=(16, 6))
363 |     plt.rcParams['pdf.fonttype'] = 42
364 |     plt.rcParams['ps.fonttype'] = 42
365 |     plt.boxplot(new_distance_list,labels=['1','2','3','4','5','6','7','8','9'],whis=1.5,sym = '*',showmeans=True)
366 |     plt.xlabel('forking depth',size='20')
367 |     plt.ylabel('RMSE',size='20')
368 |     plt.show()
369 | 
370 | # # gen
371 | # gen_1000curves_and_save()
372 | 
373 | # # calculate distance
374 | # calculate_DTW_and_save()
375 | # calculate_mse_and_save()
376 | # calculate_rmse_and_save()
377 | 
378 | # # draw
379 | # draw_boxplot_of_MSE()
380 | # draw_boxplot_of_RMSE()
381 | # draw_boxplot_of_DTW()
382 | 
383 | 
384 | 
385 | 
386 | 
387 | 
388 | # # DO NOT TOUCH THIS!!!
389 | plt.rcParams['pdf.fonttype'] = 42
390 | plt.rcParams['ps.fonttype'] = 42
391 | air_force_blue = '#5D8AA8'
392 | sub1 = plt.subplot(151)
393 | sub2 = plt.subplot(152)
394 | sub3 = plt.subplot(153)
395 | sub4 = plt.subplot(154)
396 | sub5 = plt.subplot(155)
397 | 
398 | plt.subplots_adjust(wspace =0, hspace =0)
399 | x = np.arange(0,1,1/1000)
400 | 
401 | std10()
402 | y1 = [func_of_d(x,10) for x in np.arange(0,1,1/1000)]
403 | 
404 | clear(6,10)
405 | RMDF_diverge(6,10)
406 | std10()
407 | y2 = [func_of_d(x,10) for x in np.arange(0,1,1/1000)]
408 | 
409 | clear(3,10)
410 | RMDF_diverge(3,10)
411 | std10()
412 | y3 = [func_of_d(x,10) for x in np.arange(0,1,1/1000)]
413 | 
414 | clear(2,10)
415 | RMDF_diverge(2,10)
416 | std10()
417 | y4 = [func_of_d(x,10) for x in np.arange(0,1,1/1000)]
418 | 
419 | clear(1,10)
420 | RMDF_diverge(1,10)
421 | std10()
422 | y5 = [func_of_d(x,10) for x in np.arange(0,1,1/1000)]
423 | 
424 | sub1.plot(x,y1,linewidth=2,color='#5D8AA8')
425 | sub2.plot(x,y2,linewidth=2,color='#5D8AA8')
426 | sub3.plot(x,y3,linewidth=2,color='#5D8AA8')
427 | sub4.plot(x,y4,linewidth=2,color='#5D8AA8')
428 | sub5.plot(x,y5,linewidth=2,color='#5D8AA8')
429 | 
430 | sub1.set_title("contrast", y=-0.3,fontsize=25)
431 | sub2.set_title(r'$\hat{d} = 2$', y=-0.3,fontsize=25)
432 | sub3.set_title(r'$\hat{d} = 6$', y=-0.3,fontsize=25)
433 | sub4.set_title(r'$\hat{d} = 8$', y=-0.3,fontsize=25)
434 | sub5.set_title(r'$\hat{d} = 9$', y=-0.3,fontsize=25)
435 | 
436 | sub1.set_yticks([])
437 | sub2.set_yticks([])
438 | sub3.set_yticks([])
439 | sub4.set_yticks([])
440 | sub5.set_yticks([])
441 | sub1.set_xticks([])
442 | sub2.set_xticks([])
443 | sub3.set_xticks([])
444 | sub4.set_xticks([])
445 | sub5.set_xticks([])
446 | 
447 | plt.show()


--------------------------------------------------------------------------------
/EVT/spot.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | # -*- coding: utf-8 -*-
   3 | """
   4 | Created on Mon Dec 12 10:08:16 2016
   5 | 
   6 | @author: Alban Siffer 
   7 | @company: Amossys
   8 | @license: GNU GPLv3
   9 | """
  10 | 
  11 | from scipy.optimize import minimize
  12 | from math import log,floor
  13 | import numpy as np
  14 | import pandas as pd
  15 | import matplotlib.pyplot as plt
  16 | import tqdm
  17 | 
  18 | # colors for plot
  19 | deep_saffron = '#FF9933'
  20 | air_force_blue = '#5D8AA8'
  21 | 
  22 | 
  23 | """
  24 | ================================= MAIN CLASS ==================================
  25 | """
  26 | 
  27 | class SPOT:
  28 |     """
  29 |     This class allows to run SPOT algorithm on univariate dataset (upper-bound)
  30 |     
  31 |     Attributes
  32 |     ----------
  33 |     proba : float
  34 |         Detection level (risk), chosen by the user
  35 |         
  36 |     extreme_quantile : float
  37 |         current threshold (bound between normal and abnormal events)
  38 |         
  39 |     data : numpy.array
  40 |         stream
  41 |     
  42 |     init_data : numpy.array
  43 |         initial batch of observations (for the calibration/initialization step)
  44 |     
  45 |     init_threshold : float
  46 |         initial threshold computed during the calibration step
  47 |     
  48 |     peaks : numpy.array
  49 |         array of peaks (excesses above the initial threshold)
  50 |     
  51 |     n : int
  52 |         number of observed values
  53 |     
  54 |     Nt : int
  55 |         number of observed peaks
  56 |     """
  57 |     
  58 |     def __init__(self, q = 1e-4):
  59 |         """
  60 |         Constructor
  61 | 
  62 | 	    Parameters
  63 | 	    ----------
  64 | 	    q
  65 | 		    Detection level (risk)
  66 | 	
  67 | 	    Returns
  68 | 	    ----------
  69 |     	SPOT object
  70 |         """
  71 |         self.proba = q
  72 |         self.extreme_quantile = None
  73 |         self.data = None
  74 |         self.init_data = None
  75 |         self.init_threshold = None
  76 |         self.peaks = None
  77 |         self.n = 0
  78 |         self.Nt = 0
  79 |         
  80 |     def __str__(self):
  81 |         s = ''
  82 |         s += 'Streaming Peaks-Over-Threshold Object\n'
  83 |         s += 'Detection level q = %s\n' % self.proba
  84 |         if self.data is not None:
  85 |             s += 'Data imported : Yes\n'
  86 |             s += '\t initialization  : %s values\n' % self.init_data.size
  87 |             s += '\t stream : %s values\n' % self.data.size
  88 |         else:
  89 |             s += 'Data imported : No\n'
  90 |             return s
  91 |             
  92 |         if self.n == 0:
  93 |             s += 'Algorithm initialized : No\n'
  94 |         else:
  95 |             s += 'Algorithm initialized : Yes\n'
  96 |             s += '\t initial threshold : %s\n' % self.init_threshold
  97 |             
  98 |             r = self.n-self.init_data.size
  99 |             if r > 0:
 100 |                 s += 'Algorithm run : Yes\n'
 101 |                 s += '\t number of observations : %s (%.2f %%)\n' % (r,100*r/self.n)
 102 |             else:
 103 |                 s += '\t number of peaks  : %s\n' % self.Nt
 104 |                 s += '\t extreme quantile : %s\n' % self.extreme_quantile
 105 |                 s += 'Algorithm run : No\n'
 106 |         return s
 107 |     
 108 |     
 109 |     def fit(self,init_data,data):
 110 |         """
 111 |         Import data to SPOT object
 112 |         
 113 |         Parameters
 114 | 	    ----------
 115 | 	    init_data : list, numpy.array or pandas.Series
 116 | 		    initial batch to calibrate the algorithm
 117 |             
 118 |         data : numpy.array
 119 | 		    data for the run (list, np.array or pd.series)
 120 | 	
 121 |         """
 122 |         if isinstance(data,list):
 123 |             self.data = np.array(data)
 124 |         elif isinstance(data,np.ndarray):
 125 |             self.data = data
 126 |         elif isinstance(data,pd.Series):
 127 |             self.data = data.values
 128 |         else:
 129 |             print('This data format (%s) is not supported' % type(data))
 130 |             return
 131 |             
 132 |         if isinstance(init_data,list):
 133 |             self.init_data = np.array(init_data)
 134 |         elif isinstance(init_data,np.ndarray):
 135 |             self.init_data = init_data
 136 |         elif isinstance(init_data,pd.Series):
 137 |             self.init_data = init_data.values
 138 |         elif isinstance(init_data,int):
 139 |             self.init_data = self.data[:init_data]
 140 |             self.data = self.data[init_data:]
 141 |         elif isinstance(init_data,float) & (init_data<1) & (init_data>0):
 142 |             r = int(init_data*data.size)
 143 |             self.init_data = self.data[:r]
 144 |             self.data = self.data[r:]
 145 |         else:
 146 |             print('The initial data cannot be set')
 147 |             return
 148 |         
 149 |     def add(self,data):
 150 |         """
 151 |         This function allows to append data to the already fitted data
 152 |         
 153 |         Parameters
 154 | 	    ----------
 155 | 	    data : list, numpy.array, pandas.Series
 156 | 		    data to append
 157 |         """
 158 |         if isinstance(data,list):
 159 |             data = np.array(data)
 160 |         elif isinstance(data,np.ndarray):
 161 |             data = data
 162 |         elif isinstance(data,pd.Series):
 163 |             data = data.values
 164 |         else:
 165 |             print('This data format (%s) is not supported' % type(data))
 166 |             return
 167 |         
 168 |         self.data = np.append(self.data,data)
 169 |         return
 170 |     
 171 |     def initialize(self, level = 0.98, verbose = True):
 172 |         """
 173 |         Run the calibration (initialization) step
 174 |         
 175 |         Parameters
 176 | 	    ----------
 177 |         level : float
 178 |             (default 0.98) Probability associated with the initial threshold t 
 179 | 	    verbose : bool
 180 | 		    (default = True) If True, gives details about the batch initialization
 181 |         """
 182 |         level = level-floor(level)
 183 |         
 184 |         n_init = self.init_data.size
 185 |         
 186 |         S = np.sort(self.init_data)     # we sort X to get the empirical quantile
 187 |         self.init_threshold = S[int(level*n_init)] # t is fixed for the whole algorithm
 188 | 
 189 |         # initial peaks
 190 |         self.peaks = self.init_data[self.init_data>self.init_threshold]-self.init_threshold 
 191 |         self.Nt = self.peaks.size
 192 |         self.n = n_init
 193 |         
 194 |         if verbose:
 195 |             print('Initial threshold : %s' % self.init_threshold)
 196 |             print('Number of peaks : %s' % self.Nt)
 197 |             print('Grimshaw maximum log-likelihood estimation ... ', end = '')
 198 |             
 199 |         g,s,l = self._grimshaw()
 200 |         self.extreme_quantile = self._quantile(g,s)
 201 |         
 202 |         if verbose:
 203 |             print('[done]')
 204 |             print('\t'+chr(0x03B3) + ' = ' + str(g))
 205 |             print('\t'+chr(0x03C3) + ' = ' + str(s))
 206 |             print('\tL = ' + str(l))
 207 |             print('Extreme quantile (probability = %s): %s' % (self.proba,self.extreme_quantile))
 208 |         
 209 |         return 
 210 |     
 211 |     
 212 |     
 213 |     
 214 |     def _rootsFinder(fun,jac,bounds,npoints,method):
 215 |         """
 216 |         Find possible roots of a scalar function
 217 |         
 218 |         Parameters
 219 |         ----------
 220 |         fun : function
 221 | 		    scalar function 
 222 |         jac : function
 223 |             first order derivative of the function  
 224 |         bounds : tuple
 225 |             (min,max) interval for the roots search    
 226 |         npoints : int
 227 |             maximum number of roots to output      
 228 |         method : str
 229 |             'regular' : regular sample of the search interval, 'random' : uniform (distribution) sample of the search interval
 230 |         
 231 |         Returns
 232 |         ----------
 233 |         numpy.array
 234 |             possible roots of the function
 235 |         """
 236 |         if method == 'regular':
 237 |             step = (bounds[1]-bounds[0])/(npoints+1)
 238 |             X0 = np.arange(bounds[0]+step,bounds[1],step)
 239 |         elif method == 'random':
 240 |             X0 = np.random.uniform(bounds[0],bounds[1],npoints)
 241 |         
 242 |         def objFun(X,f,jac):
 243 |             g = 0
 244 |             j = np.zeros(X.shape)
 245 |             i = 0
 246 |             for x in X:
 247 |                 fx = f(x)
 248 |                 g = g+fx**2
 249 |                 j[i] = 2*fx*jac(x)
 250 |                 i = i+1
 251 |             return g,j
 252 |         
 253 |         opt = minimize(lambda X:objFun(X,fun,jac), X0, 
 254 |                        method='L-BFGS-B', 
 255 |                        jac=True, bounds=[bounds]*len(X0))
 256 |         
 257 |         X = opt.x
 258 |         np.round(X,decimals = 5)
 259 |         return np.unique(X)
 260 |     
 261 |     
 262 |     def _log_likelihood(Y,gamma,sigma):
 263 |         """
 264 |         Compute the log-likelihood for the Generalized Pareto Distribution (μ=0)
 265 |         
 266 |         Parameters
 267 |         ----------
 268 |         Y : numpy.array
 269 | 		    observations
 270 |         gamma : float
 271 |             GPD index parameter
 272 |         sigma : float
 273 |             GPD scale parameter (>0)   
 274 | 
 275 |         Returns
 276 |         ----------
 277 |         float
 278 |             log-likelihood of the sample Y to be drawn from a GPD(γ,σ,μ=0)
 279 |         """
 280 |         n = Y.size
 281 |         if gamma != 0:
 282 |             tau = gamma/sigma
 283 |             L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
 284 |         else:
 285 |             L = n * ( 1 + log(Y.mean()) )
 286 |         return L
 287 | 
 288 | 
 289 |     def _grimshaw(self,epsilon = 1e-8, n_points = 10):
 290 |         """
 291 |         Compute the GPD parameters estimation with the Grimshaw's trick
 292 |         
 293 |         Parameters
 294 |         ----------
 295 |         epsilon : float
 296 | 		    numerical parameter to perform (default : 1e-8)
 297 |         n_points : int
 298 |             maximum number of candidates for maximum likelihood (default : 10)
 299 | 
 300 |         Returns
 301 |         ----------
 302 |         gamma_best,sigma_best,ll_best
 303 |             gamma estimates, sigma estimates and corresponding log-likelihood
 304 |         """
 305 |         def u(s):
 306 |             return 1 + np.log(s).mean()
 307 |             
 308 |         def v(s):
 309 |             return np.mean(1/s)
 310 |         
 311 |         def w(Y,t):
 312 |             s = 1+t*Y
 313 |             us = u(s)
 314 |             vs = v(s)
 315 |             return us*vs-1
 316 |         
 317 |         def jac_w(Y,t):
 318 |             s = 1+t*Y
 319 |             us = u(s)
 320 |             vs = v(s)
 321 |             jac_us = (1/t)*(1-vs)
 322 |             jac_vs = (1/t)*(-vs+np.mean(1/s**2))
 323 |             return us*jac_vs+vs*jac_us
 324 |             
 325 |     
 326 |         Ym = self.peaks.min()
 327 |         YM = self.peaks.max()
 328 |         Ymean = self.peaks.mean()
 329 |         
 330 |         
 331 |         a = -1/YM
 332 |         if abs(a)<2*epsilon:
 333 |             epsilon = abs(a)/n_points
 334 |         
 335 |         a = a + epsilon
 336 |         b = 2*(Ymean-Ym)/(Ymean*Ym)
 337 |         c = 2*(Ymean-Ym)/(Ym**2)
 338 |     
 339 |         # We look for possible roots
 340 |         left_zeros = SPOT._rootsFinder(lambda t: w(self.peaks,t),
 341 |                                  lambda t: jac_w(self.peaks,t),
 342 |                                  (a+epsilon,-epsilon),
 343 |                                  n_points,'regular')
 344 |         
 345 |         right_zeros = SPOT._rootsFinder(lambda t: w(self.peaks,t),
 346 |                                   lambda t: jac_w(self.peaks,t),
 347 |                                   (b,c),
 348 |                                   n_points,'regular')
 349 |     
 350 |         # all the possible roots
 351 |         zeros = np.concatenate((left_zeros,right_zeros))
 352 |         
 353 |         # 0 is always a solution so we initialize with it
 354 |         gamma_best = 0
 355 |         sigma_best = Ymean
 356 |         ll_best = SPOT._log_likelihood(self.peaks,gamma_best,sigma_best)
 357 |         
 358 |         # we look for better candidates
 359 |         for z in zeros:
 360 |             gamma = u(1+z*self.peaks)-1
 361 |             sigma = gamma/z
 362 |             ll = SPOT._log_likelihood(self.peaks,gamma,sigma)
 363 |             if ll>ll_best:
 364 |                 gamma_best = gamma
 365 |                 sigma_best = sigma
 366 |                 ll_best = ll
 367 |     
 368 |         return gamma_best,sigma_best,ll_best
 369 | 
 370 |     
 371 | 
 372 |     def _quantile(self,gamma,sigma):
 373 |         """
 374 |         Compute the quantile at level 1-q
 375 |         
 376 |         Parameters
 377 |         ----------
 378 |         gamma : float
 379 | 		    GPD parameter
 380 |         sigma : float
 381 |             GPD parameter
 382 | 
 383 |         Returns
 384 |         ----------
 385 |         float
 386 |             quantile at level 1-q for the GPD(γ,σ,μ=0)
 387 |         """
 388 |         r = self.n * self.proba / self.Nt
 389 |         if gamma != 0:
 390 |             return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1)
 391 |         else:
 392 |             return self.init_threshold - sigma*log(r)
 393 | 
 394 |         
 395 |     def run(self, with_alarm = True):
 396 |         """
 397 |         Run SPOT on the stream
 398 |         
 399 |         Parameters
 400 |         ----------
 401 |         with_alarm : bool
 402 | 		    (default = True) If False, SPOT will adapt the threshold assuming \
 403 |             there is no abnormal values
 404 | 
 405 | 
 406 |         Returns
 407 |         ----------
 408 |         dict
 409 |             keys : 'thresholds' and 'alarms'
 410 |             
 411 |             'thresholds' contains the extreme quantiles and 'alarms' contains \
 412 |             the indexes of the values which have triggered alarms
 413 |             
 414 |         """
 415 |         if (self.n>self.init_data.size):
 416 |             print('Warning : the algorithm seems to have already been run, you \
 417 |             should initialize before running again')
 418 |             return {}
 419 |         
 420 |         # list of the thresholds
 421 |         th = []
 422 |         alarm = []
 423 |         # Loop over the stream
 424 |         for i in tqdm.tqdm(range(self.data.size)):
 425 |     
 426 |             # If the observed value exceeds the current threshold (alarm case)
 427 |             if self.data[i]>self.extreme_quantile:
 428 |                 # if we want to alarm, we put it in the alarm list
 429 |                 if with_alarm:
 430 |                     alarm.append(i)
 431 |                 # otherwise we add it in the peaks
 432 |                 else:
 433 |                     self.peaks = np.append(self.peaks,self.data[i]-self.init_threshold)
 434 |                     self.Nt += 1
 435 |                     self.n += 1
 436 |                     # and we update the thresholds
 437 | 
 438 |                     g,s,l = self._grimshaw()
 439 |                     self.extreme_quantile = self._quantile(g,s)
 440 | 
 441 |             # case where the value exceeds the initial threshold but not the alarm ones
 442 |             elif self.data[i]>self.init_threshold:
 443 |                     # we add it in the peaks
 444 |                     self.peaks = np.append(self.peaks,self.data[i]-self.init_threshold)
 445 |                     self.Nt += 1
 446 |                     self.n += 1
 447 |                     # and we update the thresholds
 448 | 
 449 |                     g,s,l = self._grimshaw()
 450 |                     self.extreme_quantile = self._quantile(g,s)
 451 |             else:
 452 |                 self.n += 1
 453 | 
 454 |                 
 455 |             th.append(self.extreme_quantile) # thresholds record
 456 |         
 457 |         return {'thresholds' : th, 'alarms': alarm}
 458 |     
 459 | 
 460 |     def plot(self,run_results,with_alarm = True):
 461 |         """
 462 |         Plot the results of given by the run
 463 |         
 464 |         Parameters
 465 |         ----------
 466 |         run_results : dict
 467 |             results given by the 'run' method
 468 |         with_alarm : bool
 469 | 		    (default = True) If True, alarms are plotted.
 470 | 
 471 | 
 472 |         Returns
 473 |         ----------
 474 |         list
 475 |             list of the plots
 476 |             
 477 |         """
 478 |         x = range(self.data.size)
 479 |         K = run_results.keys()
 480 |         
 481 |         ts_fig, = plt.plot(x,self.data,color=air_force_blue)
 482 |         fig = [ts_fig]
 483 |         
 484 |         if 'thresholds' in K:
 485 |             th = run_results['thresholds']
 486 |             th_fig, = plt.plot(x,th,color=deep_saffron,lw=2,ls='dashed')
 487 |             fig.append(th_fig)
 488 |         
 489 |         if with_alarm and ('alarms' in K):
 490 |             alarm = run_results['alarms']
 491 |             al_fig = plt.scatter(alarm,self.data[alarm],color='red')
 492 |             fig.append(al_fig)
 493 |             
 494 |         plt.xlim((0,self.data.size))
 495 | 
 496 |         
 497 |         return fig
 498 |             
 499 | 
 500 | 
 501 | 
 502 | 
 503 | 
 504 | 
 505 | 
 506 | 
 507 | 
 508 | """
 509 | ============================ UPPER & LOWER BOUNDS =============================
 510 | """
 511 | 
 512 | 
 513 | 
 514 | 
 515 | class biSPOT:
 516 |     """
 517 |     This class allows to run biSPOT algorithm on univariate dataset (upper and lower bounds)
 518 |     
 519 |     Attributes
 520 |     ----------
 521 |     proba : float
 522 |         Detection level (risk), chosen by the user
 523 |         
 524 |     extreme_quantile : float
 525 |         current threshold (bound between normal and abnormal events)
 526 |         
 527 |     data : numpy.array
 528 |         stream
 529 |     
 530 |     init_data : numpy.array
 531 |         initial batch of observations (for the calibration/initialization step)
 532 |     
 533 |     init_threshold : float
 534 |         initial threshold computed during the calibration step
 535 |     
 536 |     peaks : numpy.array
 537 |         array of peaks (excesses above the initial threshold)
 538 |     
 539 |     n : int
 540 |         number of observed values
 541 |     
 542 |     Nt : int
 543 |         number of observed peaks
 544 |     """
 545 |     def __init__(self, q = 1e-4):
 546 |         """
 547 |         Constructor
 548 | 
 549 | 	    Parameters
 550 | 	    ----------
 551 | 	    q
 552 | 		    Detection level (risk)
 553 | 	
 554 | 	    Returns
 555 | 	    ----------
 556 |         biSPOT object
 557 |         """
 558 |         self.proba = q
 559 |         self.data = None
 560 |         self.init_data = None
 561 |         self.n = 0
 562 |         nonedict =  {'up':None,'down':None}
 563 |         
 564 |         self.extreme_quantile = dict.copy(nonedict)
 565 |         self.init_threshold = dict.copy(nonedict)
 566 |         self.peaks = dict.copy(nonedict)
 567 |         self.gamma = dict.copy(nonedict)
 568 |         self.sigma = dict.copy(nonedict)
 569 |         self.Nt = {'up':0,'down':0}
 570 |         
 571 |         
 572 |     def __str__(self):
 573 |         s = ''
 574 |         s += 'Streaming Peaks-Over-Threshold Object\n'
 575 |         s += 'Detection level q = %s\n' % self.proba
 576 |         if self.data is not None:
 577 |             s += 'Data imported : Yes\n'
 578 |             s += '\t initialization  : %s values\n' % self.init_data.size
 579 |             s += '\t stream : %s values\n' % self.data.size
 580 |         else:
 581 |             s += 'Data imported : No\n'
 582 |             return s
 583 |             
 584 |         if self.n == 0:
 585 |             s += 'Algorithm initialized : No\n'
 586 |         else:
 587 |             s += 'Algorithm initialized : Yes\n'
 588 |             s += '\t initial threshold : %s\n' % self.init_threshold
 589 |             
 590 |             r = self.n-self.init_data.size
 591 |             if r > 0:
 592 |                 s += 'Algorithm run : Yes\n'
 593 |                 s += '\t number of observations : %s (%.2f %%)\n' % (r,100*r/self.n)
 594 |                 s += '\t triggered alarms : %s (%.2f %%)\n' % (len(self.alarm),100*len(self.alarm)/self.n)
 595 |             else:
 596 |                 s += '\t number of peaks  : %s\n' % self.Nt
 597 |                 s += '\t upper extreme quantile : %s\n' % self.extreme_quantile['up']
 598 |                 s += '\t lower extreme quantile : %s\n' % self.extreme_quantile['down']
 599 |                 s += 'Algorithm run : No\n'
 600 |         return s
 601 |     
 602 |     
 603 |     def fit(self,init_data,data):
 604 |         """
 605 |         Import data to biSPOT object
 606 |         
 607 |         Parameters
 608 | 	    ----------
 609 | 	    init_data : list, numpy.array or pandas.Series
 610 | 		    initial batch to calibrate the algorithm ()
 611 |             
 612 |         data : numpy.array
 613 | 		    data for the run (list, np.array or pd.series)
 614 | 	
 615 |         """
 616 |         if isinstance(data,list):
 617 |             self.data = np.array(data)
 618 |         elif isinstance(data,np.ndarray):
 619 |             self.data = data
 620 |         elif isinstance(data,pd.Series):
 621 |             self.data = data.values
 622 |         else:
 623 |             print('This data format (%s) is not supported' % type(data))
 624 |             return
 625 |             
 626 |         if isinstance(init_data,list):
 627 |             self.init_data = np.array(init_data)
 628 |         elif isinstance(init_data,np.ndarray):
 629 |             self.init_data = init_data
 630 |         elif isinstance(init_data,pd.Series):
 631 |             self.init_data = init_data.values
 632 |         elif isinstance(init_data,int):
 633 |             self.init_data = self.data[:init_data]
 634 |             self.data = self.data[init_data:]
 635 |         elif isinstance(init_data,float) & (init_data<1) & (init_data>0):
 636 |             r = int(init_data*data.size)
 637 |             self.init_data = self.data[:r]
 638 |             self.data = self.data[r:]
 639 |         else:
 640 |             print('The initial data cannot be set')
 641 |             return
 642 |         
 643 |     def add(self,data):
 644 |         """
 645 |         This function allows to append data to the already fitted data
 646 |         
 647 |         Parameters
 648 | 	    ----------
 649 | 	    data : list, numpy.array, pandas.Series
 650 | 		    data to append
 651 |         """
 652 |         if isinstance(data,list):
 653 |             data = np.array(data)
 654 |         elif isinstance(data,np.ndarray):
 655 |             data = data
 656 |         elif isinstance(data,pd.Series):
 657 |             data = data.values
 658 |         else:
 659 |             print('This data format (%s) is not supported' % type(data))
 660 |             return
 661 |         
 662 |         self.data = np.append(self.data,data)
 663 |         return
 664 | 
 665 |     def initialize(self, verbose = True):
 666 |         """
 667 |         Run the calibration (initialization) step
 668 |         
 669 |         Parameters
 670 | 	    ----------
 671 | 	    verbose : bool
 672 | 		    (default = True) If True, gives details about the batch initialization
 673 |         """
 674 |         n_init = self.init_data.size
 675 |         
 676 |         S = np.sort(self.init_data)     # we sort X to get the empirical quantile
 677 |         self.init_threshold['up'] = S[int(0.98*n_init)] # t is fixed for the whole algorithm
 678 |         self.init_threshold['down'] = S[int(0.02*n_init)] # t is fixed for the whole algorithm
 679 | 
 680 |         # initial peaks
 681 |         self.peaks['up'] = self.init_data[self.init_data>self.init_threshold['up']]-self.init_threshold['up']
 682 |         self.peaks['down'] = -(self.init_data[self.init_data<self.init_threshold['down']]-self.init_threshold['down'])
 683 |         self.Nt['up'] = self.peaks['up'].size
 684 |         self.Nt['down'] = self.peaks['down'].size
 685 |         self.n = n_init
 686 |         
 687 |         if verbose:
 688 |             print('Initial threshold : %s' % self.init_threshold)
 689 |             print('Number of peaks : %s' % self.Nt)
 690 |             print('Grimshaw maximum log-likelihood estimation ... ', end = '')
 691 |             
 692 |         l = {'up':None,'down':None}
 693 |         for side in ['up','down']:
 694 |             g,s,l[side] = self._grimshaw(side)
 695 |             self.extreme_quantile[side] = self._quantile(side,g,s)
 696 |             self.gamma[side] = g
 697 |             self.sigma[side] = s
 698 |         
 699 |         ltab = 20
 700 |         form = ('\t'+'%20s' + '%20.2f' + '%20.2f')
 701 |         if verbose:
 702 |             print('[done]')
 703 |             print('\t' + 'Parameters'.rjust(ltab) + 'Upper'.rjust(ltab) + 'Lower'.rjust(ltab))
 704 |             print('\t' + '-'*ltab*3)
 705 |             print(form % (chr(0x03B3),self.gamma['up'],self.gamma['down']))
 706 |             print(form % (chr(0x03C3),self.sigma['up'],self.sigma['down']))
 707 |             print(form % ('likelihood',l['up'],l['down']))
 708 |             print(form % ('Extreme quantile',self.extreme_quantile['up'],self.extreme_quantile['down']))
 709 |             print('\t' + '-'*ltab*3)
 710 |         return 
 711 |     
 712 |     
 713 |     
 714 |     
 715 |     def _rootsFinder(fun,jac,bounds,npoints,method):
 716 |         """
 717 |         Find possible roots of a scalar function
 718 |         
 719 |         Parameters
 720 |         ----------
 721 |         fun : function
 722 | 		    scalar function 
 723 |         jac : function
 724 |             first order derivative of the function  
 725 |         bounds : tuple
 726 |             (min,max) interval for the roots search    
 727 |         npoints : int
 728 |             maximum number of roots to output      
 729 |         method : str
 730 |             'regular' : regular sample of the search interval, 'random' : uniform (distribution) sample of the search interval
 731 |         
 732 |         Returns
 733 |         ----------
 734 |         numpy.array
 735 |             possible roots of the function
 736 |         """
 737 |         if method == 'regular':
 738 |             step = (bounds[1]-bounds[0])/(npoints+1)
 739 |             X0 = np.arange(bounds[0]+step,bounds[1],step)
 740 |         elif method == 'random':
 741 |             X0 = np.random.uniform(bounds[0],bounds[1],npoints)
 742 |         
 743 |         def objFun(X,f,jac):
 744 |             g = 0
 745 |             j = np.zeros(X.shape)
 746 |             i = 0
 747 |             for x in X:
 748 |                 fx = f(x)
 749 |                 g = g+fx**2
 750 |                 j[i] = 2*fx*jac(x)
 751 |                 i = i+1
 752 |             return g,j
 753 |         opt = minimize(lambda X:objFun(X,fun,jac), X0, 
 754 |                        method='L-BFGS-B', 
 755 |                        jac=True, bounds=[bounds]*len(X0))
 756 |         
 757 |         X = opt.x
 758 |         np.round(X,decimals = 5)
 759 |         return np.unique(X)
 760 |     
 761 |     
 762 |     def _log_likelihood(Y,gamma,sigma):
 763 |         """
 764 |         Compute the log-likelihood for the Generalized Pareto Distribution (μ=0)
 765 |         
 766 |         Parameters
 767 |         ----------
 768 |         Y : numpy.array
 769 | 		    observations
 770 |         gamma : float
 771 |             GPD index parameter
 772 |         sigma : float
 773 |             GPD scale parameter (>0)   
 774 | 
 775 |         Returns
 776 |         ----------
 777 |         float
 778 |             log-likelihood of the sample Y to be drawn from a GPD(γ,σ,μ=0)
 779 |         """
 780 |         n = Y.size
 781 |         if gamma != 0:
 782 |             tau = gamma/sigma
 783 |             L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
 784 |         else:
 785 |             L = n * ( 1 + log(Y.mean()) )
 786 |         return L
 787 | 
 788 | 
 789 |     def _grimshaw(self,side,epsilon = 1e-8, n_points = 10):
 790 |         """
 791 |         Compute the GPD parameters estimation with the Grimshaw's trick
 792 |         
 793 |         Parameters
 794 |         ----------
 795 |         epsilon : float
 796 | 		    numerical parameter to perform (default : 1e-8)
 797 |         n_points : int
 798 |             maximum number of candidates for maximum likelihood (default : 10)
 799 | 
 800 |         Returns
 801 |         ----------
 802 |         gamma_best,sigma_best,ll_best
 803 |             gamma estimates, sigma estimates and corresponding log-likelihood
 804 |         """
 805 |         def u(s):
 806 |             return 1 + np.log(s).mean()
 807 |             
 808 |         def v(s):
 809 |             return np.mean(1/s)
 810 |         
 811 |         def w(Y,t):
 812 |             s = 1+t*Y
 813 |             us = u(s)
 814 |             vs = v(s)
 815 |             return us*vs-1
 816 |         
 817 |         def jac_w(Y,t):
 818 |             s = 1+t*Y
 819 |             us = u(s)
 820 |             vs = v(s)
 821 |             jac_us = (1/t)*(1-vs)
 822 |             jac_vs = (1/t)*(-vs+np.mean(1/s**2))
 823 |             return us*jac_vs+vs*jac_us
 824 |             
 825 |     
 826 |         Ym = self.peaks[side].min()
 827 |         YM = self.peaks[side].max()
 828 |         Ymean = self.peaks[side].mean()
 829 |         
 830 |         
 831 |         a = -1/YM
 832 |         if abs(a)<2*epsilon:
 833 |             epsilon = abs(a)/n_points
 834 |         
 835 |         a = a + epsilon
 836 |         b = 2*(Ymean-Ym)/(Ymean*Ym)
 837 |         c = 2*(Ymean-Ym)/(Ym**2)
 838 |     
 839 |         # We look for possible roots
 840 |         left_zeros = biSPOT._rootsFinder(lambda t: w(self.peaks[side],t),
 841 |                                  lambda t: jac_w(self.peaks[side],t),
 842 |                                  (a+epsilon,-epsilon),
 843 |                                  n_points,'regular')
 844 |         
 845 |         right_zeros = biSPOT._rootsFinder(lambda t: w(self.peaks[side],t),
 846 |                                   lambda t: jac_w(self.peaks[side],t),
 847 |                                   (b,c),
 848 |                                   n_points,'regular')
 849 |     
 850 |         # all the possible roots
 851 |         zeros = np.concatenate((left_zeros,right_zeros))
 852 |         
 853 |         # 0 is always a solution so we initialize with it
 854 |         gamma_best = 0
 855 |         sigma_best = Ymean
 856 |         ll_best = biSPOT._log_likelihood(self.peaks[side],gamma_best,sigma_best)
 857 |         
 858 |         # we look for better candidates
 859 |         for z in zeros:
 860 |             gamma = u(1+z*self.peaks[side])-1
 861 |             sigma = gamma/z
 862 |             ll = biSPOT._log_likelihood(self.peaks[side],gamma,sigma)
 863 |             if ll>ll_best:
 864 |                 gamma_best = gamma
 865 |                 sigma_best = sigma
 866 |                 ll_best = ll
 867 |     
 868 |         return gamma_best,sigma_best,ll_best
 869 | 
 870 |     
 871 | 
 872 |     def _quantile(self,side,gamma,sigma):
 873 |         """
 874 |         Compute the quantile at level 1-q for a given side
 875 |         
 876 |         Parameters
 877 |         ----------
 878 |         side : str
 879 |             'up' or 'down'
 880 |         gamma : float
 881 | 		    GPD parameter
 882 |         sigma : float
 883 |             GPD parameter
 884 | 
 885 |         Returns
 886 |         ----------
 887 |         float
 888 |             quantile at level 1-q for the GPD(γ,σ,μ=0)
 889 |         """
 890 |         if side == 'up':
 891 |             r = self.n * self.proba / self.Nt[side]
 892 |             if gamma != 0:
 893 |                 return self.init_threshold['up'] + (sigma/gamma)*(pow(r,-gamma)-1)
 894 |             else:
 895 |                 return self.init_threshold['up'] - sigma*log(r)
 896 |         elif side == 'down':
 897 |             r = self.n * self.proba / self.Nt[side]
 898 |             if gamma != 0:
 899 |                 return self.init_threshold['down'] - (sigma/gamma)*(pow(r,-gamma)-1)
 900 |             else:
 901 |                 return self.init_threshold['down'] + sigma*log(r)
 902 |         else:
 903 |             print('error : the side is not right')
 904 | 
 905 |         
 906 |     def run(self, with_alarm = True):
 907 |         """
 908 |         Run biSPOT on the stream
 909 |         
 910 |         Parameters
 911 |         ----------
 912 |         with_alarm : bool
 913 | 		    (default = True) If False, SPOT will adapt the threshold assuming \
 914 |             there is no abnormal values
 915 | 
 916 | 
 917 |         Returns
 918 |         ----------
 919 |         dict
 920 |             keys : 'upper_thresholds', 'lower_thresholds' and 'alarms'
 921 |             
 922 |             '***-thresholds' contains the extreme quantiles and 'alarms' contains \
 923 |             the indexes of the values which have triggered alarms
 924 |             
 925 |         """
 926 |         if (self.n>self.init_data.size):
 927 |             print('Warning : the algorithm seems to have already been run, you \
 928 |             should initialize before running again')
 929 |             return {}
 930 |         
 931 |         # list of the thresholds
 932 |         thup = []
 933 |         thdown = []
 934 |         alarm = []
 935 |         # Loop over the stream
 936 |         for i in tqdm.tqdm(range(self.data.size)):
 937 |     
 938 |             # If the observed value exceeds the current threshold (alarm case)
 939 |             if self.data[i]>self.extreme_quantile['up'] :
 940 |                 # if we want to alarm, we put it in the alarm list
 941 |                 if with_alarm:
 942 |                     alarm.append(i)
 943 |                 # otherwise we add it in the peaks
 944 |                 else:
 945 |                     self.peaks['up'] = np.append(self.peaks['up'],self.data[i]-self.init_threshold['up'])
 946 |                     self.Nt['up'] += 1
 947 |                     self.n += 1
 948 |                     # and we update the thresholds
 949 | 
 950 |                     g,s,l = self._grimshaw('up')
 951 |                     self.extreme_quantile['up'] = self._quantile('up',g,s)
 952 | 
 953 |             # case where the value exceeds the initial threshold but not the alarm ones
 954 |             elif self.data[i]>self.init_threshold['up']:
 955 |                     # we add it in the peaks
 956 |                     self.peaks['up'] = np.append(self.peaks['up'],self.data[i]-self.init_threshold['up'])
 957 |                     self.Nt['up'] += 1
 958 |                     self.n += 1
 959 |                     # and we update the thresholds
 960 | 
 961 |                     g,s,l = self._grimshaw('up')
 962 |                     self.extreme_quantile['up'] = self._quantile('up',g,s)
 963 |                     
 964 |             elif self.data[i]<self.extreme_quantile['down'] :
 965 |                 # if we want to alarm, we put it in the alarm list
 966 |                 if with_alarm:
 967 |                     alarm.append(i)
 968 |                 # otherwise we add it in the peaks
 969 |                 else:
 970 |                     self.peaks['down'] = np.append(self.peaks['down'],-(self.data[i]-self.init_threshold['down']))
 971 |                     self.Nt['down'] += 1
 972 |                     self.n += 1
 973 |                     # and we update the thresholds
 974 | 
 975 |                     g,s,l = self._grimshaw('down')
 976 |                     self.extreme_quantile['down'] = self._quantile('down',g,s)
 977 | 
 978 |             # case where the value exceeds the initial threshold but not the alarm ones
 979 |             elif self.data[i]<self.init_threshold['down']:
 980 |                     # we add it in the peaks
 981 |                     self.peaks['down'] = np.append(self.peaks['down'],-(self.data[i]-self.init_threshold['down']))
 982 |                     self.Nt['down'] += 1
 983 |                     self.n += 1
 984 |                     # and we update the thresholds
 985 | 
 986 |                     g,s,l = self._grimshaw('down')
 987 |                     self.extreme_quantile['down'] = self._quantile('down',g,s)
 988 |             else:
 989 |                 self.n += 1
 990 | 
 991 |                 
 992 |             thup.append(self.extreme_quantile['up']) # thresholds record
 993 |             thdown.append(self.extreme_quantile['down']) # thresholds record
 994 |         
 995 |         return {'upper_thresholds' : thup,'lower_thresholds' : thdown, 'alarms': alarm}
 996 |     
 997 |     def plot(self,run_results,with_alarm = True):
 998 |         """
 999 |         Plot the results of given by the run
1000 |         
1001 |         Parameters
1002 |         ----------
1003 |         run_results : dict
1004 |             results given by the 'run' method
1005 |         with_alarm : bool
1006 | 		    (default = True) If True, alarms are plotted.
1007 | 
1008 | 
1009 |         Returns
1010 |         ----------
1011 |         list
1012 |             list of the plots
1013 |             
1014 |         """
1015 |         x = range(self.data.size)
1016 |         K = run_results.keys()
1017 |         
1018 |         ts_fig, = plt.plot(x,self.data,color=air_force_blue)
1019 |         fig = [ts_fig]
1020 |         
1021 |         if 'upper_thresholds' in K:
1022 |             thup = run_results['upper_thresholds']
1023 |             uth_fig, = plt.plot(x,thup,color=deep_saffron,lw=2,ls='dashed')
1024 |             fig.append(uth_fig)
1025 |             
1026 |         if 'lower_thresholds' in K:
1027 |             thdown = run_results['lower_thresholds']
1028 |             lth_fig, = plt.plot(x,thdown,color=deep_saffron,lw=2,ls='dashed')
1029 |             fig.append(lth_fig)
1030 |         
1031 |         if with_alarm and ('alarms' in K):
1032 |             alarm = run_results['alarms']
1033 |             al_fig = plt.scatter(alarm,self.data[alarm],color='red')
1034 |             fig.append(al_fig)
1035 |             
1036 |         plt.xlim((0,self.data.size))
1037 | 
1038 |         
1039 |         return fig
1040 | 
1041 | 
1042 | 
1043 | 
1044 | 
1045 | 
1046 | 
1047 | 
1048 | """
1049 | ================================= WITH DRIFT ==================================
1050 | """
1051 | 
1052 | def backMean(X,d):
1053 |     M = []
1054 |     w = X[:d].sum()
1055 |     M.append(w/d)
1056 |     for i in range(d,len(X)):
1057 |         w = w - X[i-d] + X[i]
1058 |         M.append(w/d)
1059 |     return np.array(M)
1060 | 
1061 | 
1062 | 
1063 | class dSPOT:
1064 |     """
1065 |     This class allows to run DSPOT algorithm on univariate dataset (upper-bound)
1066 |     
1067 |     Attributes
1068 |     ----------
1069 |     proba : float
1070 |         Detection level (risk), chosen by the user
1071 |         
1072 |     depth : int
1073 |         Number of observations to compute the moving average
1074 |         
1075 |     extreme_quantile : float
1076 |         current threshold (bound between normal and abnormal events)
1077 |         
1078 |     data : numpy.array
1079 |         stream
1080 |     
1081 |     init_data : numpy.array
1082 |         initial batch of observations (for the calibration/initialization step)
1083 |     
1084 |     init_threshold : float
1085 |         initial threshold computed during the calibration step
1086 |     
1087 |     peaks : numpy.array
1088 |         array of peaks (excesses above the initial threshold)
1089 |     
1090 |     n : int
1091 |         number of observed values
1092 |     
1093 |     Nt : int
1094 |         number of observed peaks
1095 |     """
1096 |     def __init__(self, q, depth):
1097 |         self.proba = q
1098 |         self.extreme_quantile = None
1099 |         self.data = None
1100 |         self.init_data = None
1101 |         self.init_threshold = None
1102 |         self.peaks = None
1103 |         self.n = 0
1104 |         self.Nt = 0
1105 |         self.depth = depth
1106 |         
1107 |     def __str__(self):
1108 |         s = ''
1109 |         s += 'Streaming Peaks-Over-Threshold Object\n'
1110 |         s += 'Detection level q = %s\n' % self.proba
1111 |         if self.data is not None:
1112 |             s += 'Data imported : Yes\n'
1113 |             s += '\t initialization  : %s values\n' % self.init_data.size
1114 |             s += '\t stream : %s values\n' % self.data.size
1115 |         else:
1116 |             s += 'Data imported : No\n'
1117 |             return s
1118 |             
1119 |         if self.n == 0:
1120 |             s += 'Algorithm initialized : No\n'
1121 |         else:
1122 |             s += 'Algorithm initialized : Yes\n'
1123 |             s += '\t initial threshold : %s\n' % self.init_threshold
1124 |             
1125 |             r = self.n-self.init_data.size
1126 |             if r > 0:
1127 |                 s += 'Algorithm run : Yes\n'
1128 |                 s += '\t number of observations : %s (%.2f %%)\n' % (r,100*r/self.n)
1129 |                 s += '\t triggered alarms : %s (%.2f %%)\n' % (len(self.alarm),100*len(self.alarm)/self.n)
1130 |             else:
1131 |                 s += '\t number of peaks  : %s\n' % self.Nt
1132 |                 s += '\t extreme quantile : %s\n' % self.extreme_quantile
1133 |                 s += 'Algorithm run : No\n'
1134 |         return s
1135 |     
1136 |     
1137 |     def fit(self,init_data,data):
1138 |         """
1139 |         Import data to DSPOT object
1140 |         
1141 |         Parameters
1142 | 	    ----------
1143 | 	    init_data : list, numpy.array or pandas.Series
1144 | 		    initial batch to calibrate the algorithm
1145 |             
1146 |         data : numpy.array
1147 | 		    data for the run (list, np.array or pd.series)
1148 | 	
1149 |         """
1150 |         if isinstance(data,list):
1151 |             self.data = np.array(data)
1152 |         elif isinstance(data,np.ndarray):
1153 |             self.data = data
1154 |         elif isinstance(data,pd.Series):
1155 |             self.data = data.values
1156 |         else:
1157 |             print('This data format (%s) is not supported' % type(data))
1158 |             return
1159 |             
1160 |         if isinstance(init_data,list):
1161 |             self.init_data = np.array(init_data)
1162 |         elif isinstance(init_data,np.ndarray):
1163 |             self.init_data = init_data
1164 |         elif isinstance(init_data,pd.Series):
1165 |             self.init_data = init_data.values
1166 |         elif isinstance(init_data,int):
1167 |             self.init_data = self.data[:init_data]
1168 |             self.data = self.data[init_data:]
1169 |         elif isinstance(init_data,float) & (init_data<1) & (init_data>0):
1170 |             r = int(init_data*data.size)
1171 |             self.init_data = self.data[:r]
1172 |             self.data = self.data[r:]
1173 |         else:
1174 |             print('The initial data cannot be set')
1175 |             return
1176 |         
1177 |     def add(self,data):
1178 |         """
1179 |         This function allows to append data to the already fitted data
1180 |         
1181 |         Parameters
1182 | 	    ----------
1183 | 	    data : list, numpy.array, pandas.Series
1184 | 		    data to append
1185 |         """
1186 |         if isinstance(data,list):
1187 |             data = np.array(data)
1188 |         elif isinstance(data,np.ndarray):
1189 |             data = data
1190 |         elif isinstance(data,pd.Series):
1191 |             data = data.values
1192 |         else:
1193 |             print('This data format (%s) is not supported' % type(data))
1194 |             return
1195 |         
1196 |         self.data = np.append(self.data,data)
1197 |         return
1198 |     
1199 |     def initialize(self, verbose = True):
1200 |         """
1201 |         Run the calibration (initialization) step
1202 |         
1203 |         Parameters
1204 | 	    ----------
1205 | 	    verbose : bool
1206 | 		    (default = True) If True, gives details about the batch initialization
1207 |         """
1208 |         n_init = self.init_data.size - self.depth
1209 |         
1210 |         M = backMean(self.init_data,self.depth)
1211 |         T = self.init_data[self.depth:]-M[:-1] # new variable
1212 |         
1213 |         S = np.sort(T)     # we sort X to get the empirical quantile
1214 |         self.init_threshold = S[int(0.98*n_init)] # t is fixed for the whole algorithm
1215 | 
1216 |         # initial peaks
1217 |         self.peaks = T[T>self.init_threshold]-self.init_threshold 
1218 |         self.Nt = self.peaks.size
1219 |         self.n = n_init
1220 |         
1221 |         if verbose:
1222 |             print('Initial threshold : %s' % self.init_threshold)
1223 |             print('Number of peaks : %s' % self.Nt)
1224 |             print('Grimshaw maximum log-likelihood estimation ... ', end = '')
1225 |             
1226 |         g,s,l = self._grimshaw()
1227 |         self.extreme_quantile = self._quantile(g,s)
1228 |         
1229 |         if verbose:
1230 |             print('[done]')
1231 |             print('\t'+chr(0x03B3) + ' = ' + str(g))
1232 |             print('\t'+chr(0x03C3) + ' = ' + str(s))
1233 |             print('\tL = ' + str(l))
1234 |             print('Extreme quantile (probability = %s): %s' % (self.proba,self.extreme_quantile))
1235 |         
1236 |         return
1237 |     
1238 |     
1239 |     
1240 |     
1241 |     def _rootsFinder(fun,jac,bounds,npoints,method):
1242 |         """
1243 |         Find possible roots of a scalar function
1244 |         
1245 |         Parameters
1246 |         ----------
1247 |         fun : function
1248 | 		    scalar function 
1249 |         jac : function
1250 |             first order derivative of the function  
1251 |         bounds : tuple
1252 |             (min,max) interval for the roots search    
1253 |         npoints : int
1254 |             maximum number of roots to output      
1255 |         method : str
1256 |             'regular' : regular sample of the search interval, 'random' : uniform (distribution) sample of the search interval
1257 |         
1258 |         Returns
1259 |         ----------
1260 |         numpy.array
1261 |             possible roots of the function
1262 |         """
1263 |         if method == 'regular':
1264 |             step = (bounds[1]-bounds[0])/(npoints+1)
1265 |             X0 = np.arange(bounds[0]+step,bounds[1],step)
1266 |         elif method == 'random':
1267 |             X0 = np.random.uniform(bounds[0],bounds[1],npoints)
1268 |         
1269 |         def objFun(X,f,jac):
1270 |             g = 0
1271 |             j = np.zeros(X.shape)
1272 |             i = 0
1273 |             for x in X:
1274 |                 fx = f(x)
1275 |                 g = g+fx**2
1276 |                 j[i] = 2*fx*jac(x)
1277 |                 i = i+1
1278 |             return g,j
1279 |         
1280 |         opt = minimize(lambda X:objFun(X,fun,jac), X0, 
1281 |                        method='L-BFGS-B', 
1282 |                        jac=True, bounds=[bounds]*len(X0))
1283 |         
1284 |         X = opt.x
1285 |         np.round(X,decimals = 5)
1286 |         return np.unique(X)
1287 |     
1288 |     
1289 |     def _log_likelihood(Y,gamma,sigma):
1290 |         """
1291 |         Compute the log-likelihood for the Generalized Pareto Distribution (μ=0)
1292 |         
1293 |         Parameters
1294 |         ----------
1295 |         Y : numpy.array
1296 | 		    observations
1297 |         gamma : float
1298 |             GPD index parameter
1299 |         sigma : float
1300 |             GPD scale parameter (>0)   
1301 | 
1302 |         Returns
1303 |         ----------
1304 |         float
1305 |             log-likelihood of the sample Y to be drawn from a GPD(γ,σ,μ=0)
1306 |         """
1307 |         n = Y.size
1308 |         if gamma != 0:
1309 |             tau = gamma/sigma
1310 |             L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
1311 |         else:
1312 |             L = n * ( 1 + log(Y.mean()) )
1313 |         return L
1314 | 
1315 | 
1316 |     def _grimshaw(self,epsilon = 1e-8, n_points = 10):
1317 |         """
1318 |         Compute the GPD parameters estimation with the Grimshaw's trick
1319 |         
1320 |         Parameters
1321 |         ----------
1322 |         epsilon : float
1323 | 		    numerical parameter to perform (default : 1e-8)
1324 |         n_points : int
1325 |             maximum number of candidates for maximum likelihood (default : 10)
1326 | 
1327 |         Returns
1328 |         ----------
1329 |         gamma_best,sigma_best,ll_best
1330 |             gamma estimates, sigma estimates and corresponding log-likelihood
1331 |         """
1332 |         def u(s):
1333 |             return 1 + np.log(s).mean()
1334 |             
1335 |         def v(s):
1336 |             return np.mean(1/s)
1337 |         
1338 |         def w(Y,t):
1339 |             s = 1+t*Y
1340 |             us = u(s)
1341 |             vs = v(s)
1342 |             return us*vs-1
1343 |         
1344 |         def jac_w(Y,t):
1345 |             s = 1+t*Y
1346 |             us = u(s)
1347 |             vs = v(s)
1348 |             jac_us = (1/t)*(1-vs)
1349 |             jac_vs = (1/t)*(-vs+np.mean(1/s**2))
1350 |             return us*jac_vs+vs*jac_us
1351 |             
1352 |     
1353 |         Ym = self.peaks.min()
1354 |         YM = self.peaks.max()
1355 |         Ymean = self.peaks.mean()
1356 |         
1357 |         
1358 |         a = -1/YM
1359 |         if abs(a)<2*epsilon:
1360 |             epsilon = abs(a)/n_points
1361 |         
1362 |         a = a + epsilon
1363 |         b = 2*(Ymean-Ym)/(Ymean*Ym)
1364 |         c = 2*(Ymean-Ym)/(Ym**2)
1365 |     
1366 |         # We look for possible roots
1367 |         left_zeros = SPOT._rootsFinder(lambda t: w(self.peaks,t),
1368 |                                  lambda t: jac_w(self.peaks,t),
1369 |                                  (a+epsilon,-epsilon),
1370 |                                  n_points,'regular')
1371 |         
1372 |         right_zeros = SPOT._rootsFinder(lambda t: w(self.peaks,t),
1373 |                                   lambda t: jac_w(self.peaks,t),
1374 |                                   (b,c),
1375 |                                   n_points,'regular')
1376 |     
1377 |         # all the possible roots
1378 |         zeros = np.concatenate((left_zeros,right_zeros))
1379 |         
1380 |         # 0 is always a solution so we initialize with it
1381 |         gamma_best = 0
1382 |         sigma_best = Ymean
1383 |         ll_best = SPOT._log_likelihood(self.peaks,gamma_best,sigma_best)
1384 |         
1385 |         # we look for better candidates
1386 |         for z in zeros:
1387 |             gamma = u(1+z*self.peaks)-1
1388 |             sigma = gamma/z
1389 |             ll = dSPOT._log_likelihood(self.peaks,gamma,sigma)
1390 |             if ll>ll_best:
1391 |                 gamma_best = gamma
1392 |                 sigma_best = sigma
1393 |                 ll_best = ll
1394 |     
1395 |         return gamma_best,sigma_best,ll_best
1396 | 
1397 |     
1398 | 
1399 |     def _quantile(self,gamma,sigma):
1400 |         """
1401 |         Compute the quantile at level 1-q
1402 |         
1403 |         Parameters
1404 |         ----------
1405 |         gamma : float
1406 | 		    GPD parameter
1407 |         sigma : float
1408 |             GPD parameter
1409 | 
1410 |         Returns
1411 |         ----------
1412 |         float
1413 |             quantile at level 1-q for the GPD(γ,σ,μ=0)
1414 |         """
1415 |         r = self.n * self.proba / self.Nt
1416 |         if gamma != 0:
1417 |             return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1)
1418 |         else:
1419 |             return self.init_threshold - sigma*log(r)
1420 | 
1421 |         
1422 |     def run(self, with_alarm = True):
1423 |         """
1424 |         Run biSPOT on the stream
1425 |         
1426 |         Parameters
1427 |         ----------
1428 |         with_alarm : bool
1429 | 		    (default = True) If False, SPOT will adapt the threshold assuming \
1430 |             there is no abnormal values
1431 | 
1432 | 
1433 |         Returns
1434 |         ----------
1435 |         dict
1436 |             keys : 'upper_thresholds', 'lower_thresholds' and 'alarms'
1437 |             
1438 |             '***-thresholds' contains the extreme quantiles and 'alarms' contains \
1439 |             the indexes of the values which have triggered alarms
1440 |             
1441 |         """
1442 |         if (self.n>self.init_data.size):
1443 |             print('Warning : the algorithm seems to have already been run, you \
1444 |             should initialize before running again')
1445 |             return {}
1446 |         
1447 |         # actual normal window
1448 |         W = self.init_data[-self.depth:]
1449 |         
1450 |         # list of the thresholds
1451 |         th = []
1452 |         alarm = []
1453 |         # Loop over the stream
1454 |         for i in tqdm.tqdm(range(self.data.size)):
1455 |             Mi = W.mean()
1456 |             # If the observed value exceeds the current threshold (alarm case)
1457 |             if (self.data[i]-Mi)>self.extreme_quantile:
1458 |                 # if we want to alarm, we put it in the alarm list
1459 |                 if with_alarm:
1460 |                     alarm.append(i)
1461 |                 # otherwise we add it in the peaks
1462 |                 else:
1463 |                     self.peaks = np.append(self.peaks,self.data[i]-Mi-self.init_threshold)
1464 |                     self.Nt += 1
1465 |                     self.n += 1
1466 |                     # and we update the thresholds
1467 | 
1468 |                     g,s,l = self._grimshaw()
1469 |                     self.extreme_quantile = self._quantile(g,s) #+ Mi
1470 |                     W = np.append(W[1:],self.data[i])
1471 | 
1472 |             # case where the value exceeds the initial threshold but not the alarm ones
1473 |             elif (self.data[i]-Mi)>self.init_threshold:
1474 |                     # we add it in the peaks
1475 |                     self.peaks = np.append(self.peaks,self.data[i]-Mi-self.init_threshold)
1476 |                     self.Nt += 1
1477 |                     self.n += 1
1478 |                     # and we update the thresholds
1479 | 
1480 |                     g,s,l = self._grimshaw()
1481 |                     self.extreme_quantile = self._quantile(g,s) #+ Mi
1482 |                     W = np.append(W[1:],self.data[i])
1483 |             else:
1484 |                 self.n += 1
1485 |                 W = np.append(W[1:],self.data[i])
1486 | 
1487 |                 
1488 |             th.append(self.extreme_quantile+Mi) # thresholds record
1489 |         
1490 |         return {'thresholds' : th, 'alarms': alarm}
1491 |     
1492 | 
1493 |     def plot(self,run_results, with_alarm = True):
1494 |         """
1495 |         Plot the results given by the run
1496 |         
1497 |         Parameters
1498 |         ----------
1499 |         run_results : dict
1500 |             results given by the 'run' method
1501 |         with_alarm : bool
1502 | 		    (default = True) If True, alarms are plotted.
1503 | 
1504 | 
1505 |         Returns
1506 |         ----------
1507 |         list
1508 |             list of the plots
1509 |             
1510 |         """
1511 |         x = range(self.data.size)
1512 |         K = run_results.keys()
1513 |         
1514 |         ts_fig, = plt.plot(x,self.data,color=air_force_blue)
1515 |         fig = [ts_fig]
1516 |         
1517 | #        if 'upper_thresholds' in K:
1518 | #            thup = run_results['upper_thresholds']
1519 | #            uth_fig, = plt.plot(x,thup,color=deep_saffron,lw=2,ls='dashed')
1520 | #            fig.append(uth_fig)
1521 | #            
1522 | #        if 'lower_thresholds' in K:
1523 | #            thdown = run_results['lower_thresholds']
1524 | #            lth_fig, = plt.plot(x,thdown,color=deep_saffron,lw=2,ls='dashed')
1525 | #            fig.append(lth_fig)
1526 |         
1527 |         if 'thresholds' in K:
1528 |             th = run_results['thresholds']
1529 |             th_fig, = plt.plot(x,th,color=deep_saffron,lw=2,ls='dashed')
1530 |             fig.append(th_fig)
1531 |         
1532 |         if with_alarm and ('alarms' in K):
1533 |             alarm = run_results['alarms']
1534 |             if len(alarm)>0:
1535 |                 plt.scatter(alarm,self.data[alarm],color='red')
1536 |             
1537 |         plt.xlim((0,self.data.size))
1538 | 
1539 |         
1540 |         return fig
1541 |             
1542 | 
1543 | 
1544 | 
1545 | 
1546 | 
1547 | 
1548 | """
1549 | =========================== DRIFT & DOUBLE BOUNDS =============================
1550 | """
1551 | 
1552 | 
1553 | 
1554 | class bidSPOT:
1555 |     """
1556 |     This class allows to run DSPOT algorithm on univariate dataset (upper and lower bounds)
1557 |     
1558 |     Attributes
1559 |     ----------
1560 |     proba : float
1561 |         Detection level (risk), chosen by the user
1562 |         
1563 |     depth : int
1564 |         Number of observations to compute the moving average
1565 |         
1566 |     extreme_quantile : float
1567 |         current threshold (bound between normal and abnormal events)
1568 |         
1569 |     data : numpy.array
1570 |         stream
1571 |     
1572 |     init_data : numpy.array
1573 |         initial batch of observations (for the calibration/initialization step)
1574 |     
1575 |     init_threshold : float
1576 |         initial threshold computed during the calibration step
1577 |     
1578 |     peaks : numpy.array
1579 |         array of peaks (excesses above the initial threshold)
1580 |     
1581 |     n : int
1582 |         number of observed values
1583 |     
1584 |     Nt : int
1585 |         number of observed peaks
1586 |     """
1587 |     def __init__(self, q = 1e-4, depth = 10):
1588 |         self.proba = q
1589 |         self.data = None
1590 |         self.init_data = None
1591 |         self.n = 0
1592 |         self.depth = depth
1593 |         
1594 |         nonedict =  {'up':None,'down':None}
1595 |         
1596 |         self.extreme_quantile = dict.copy(nonedict)
1597 |         self.init_threshold = dict.copy(nonedict)
1598 |         self.peaks = dict.copy(nonedict)
1599 |         self.gamma = dict.copy(nonedict)
1600 |         self.sigma = dict.copy(nonedict)
1601 |         self.Nt = {'up':0,'down':0}
1602 |         
1603 |         
1604 |     def __str__(self):
1605 |         s = ''
1606 |         s += 'Streaming Peaks-Over-Threshold Object\n'
1607 |         s += 'Detection level q = %s\n' % self.proba
1608 |         if self.data is not None:
1609 |             s += 'Data imported : Yes\n'
1610 |             s += '\t initialization  : %s values\n' % self.init_data.size
1611 |             s += '\t stream : %s values\n' % self.data.size
1612 |         else:
1613 |             s += 'Data imported : No\n'
1614 |             return s
1615 |             
1616 |         if self.n == 0:
1617 |             s += 'Algorithm initialized : No\n'
1618 |         else:
1619 |             s += 'Algorithm initialized : Yes\n'
1620 |             s += '\t initial threshold : %s\n' % self.init_threshold
1621 |             
1622 |             r = self.n-self.init_data.size
1623 |             if r > 0:
1624 |                 s += 'Algorithm run : Yes\n'
1625 |                 s += '\t number of observations : %s (%.2f %%)\n' % (r,100*r/self.n)
1626 |                 s += '\t triggered alarms : %s (%.2f %%)\n' % (len(self.alarm),100*len(self.alarm)/self.n)
1627 |             else:
1628 |                 s += '\t number of peaks  : %s\n' % self.Nt
1629 |                 s += '\t upper extreme quantile : %s\n' % self.extreme_quantile['up']
1630 |                 s += '\t lower extreme quantile : %s\n' % self.extreme_quantile['down']
1631 |                 s += 'Algorithm run : No\n'
1632 |         return s
1633 |     
1634 |     
1635 |     def fit(self,init_data,data):
1636 |         """
1637 |         Import data to biDSPOT object
1638 |         
1639 |         Parameters
1640 | 	    ----------
1641 | 	    init_data : list, numpy.array or pandas.Series
1642 | 		    initial batch to calibrate the algorithm
1643 |             
1644 |         data : numpy.array
1645 | 		    data for the run (list, np.array or pd.series)
1646 | 	
1647 |         """
1648 |         if isinstance(data,list):
1649 |             self.data = np.array(data)
1650 |         elif isinstance(data,np.ndarray):
1651 |             self.data = data
1652 |         elif isinstance(data,pd.Series):
1653 |             self.data = data.values
1654 |         else:
1655 |             print('This data format (%s) is not supported' % type(data))
1656 |             return
1657 |             
1658 |         if isinstance(init_data,list):
1659 |             self.init_data = np.array(init_data)
1660 |         elif isinstance(init_data,np.ndarray):
1661 |             self.init_data = init_data
1662 |         elif isinstance(init_data,pd.Series):
1663 |             self.init_data = init_data.values
1664 |         elif isinstance(init_data,int):
1665 |             self.init_data = self.data[:init_data]
1666 |             self.data = self.data[init_data:]
1667 |         elif isinstance(init_data,float) & (init_data<1) & (init_data>0):
1668 |             r = int(init_data*data.size)
1669 |             self.init_data = self.data[:r]
1670 |             self.data = self.data[r:]
1671 |         else:
1672 |             print('The initial data cannot be set')
1673 |             return
1674 |         
1675 |     def add(self,data):
1676 |         """
1677 |         This function allows to append data to the already fitted data
1678 |         
1679 |         Parameters
1680 | 	    ----------
1681 | 	    data : list, numpy.array, pandas.Series
1682 | 		    data to append
1683 |         """
1684 |         if isinstance(data,list):
1685 |             data = np.array(data)
1686 |         elif isinstance(data,np.ndarray):
1687 |             data = data
1688 |         elif isinstance(data,pd.Series):
1689 |             data = data.values
1690 |         else:
1691 |             print('This data format (%s) is not supported' % type(data))
1692 |             return
1693 |         
1694 |         self.data = np.append(self.data,data)
1695 |         return
1696 |     
1697 |     def initialize(self, verbose = True):
1698 |         """
1699 |         Run the calibration (initialization) step
1700 |         
1701 |         Parameters
1702 | 	    ----------
1703 | 	    verbose : bool
1704 | 		    (default = True) If True, gives details about the batch initialization
1705 |         """
1706 |         n_init = self.init_data.size - self.depth
1707 |         
1708 |         M = backMean(self.init_data,self.depth)
1709 |         T = self.init_data[self.depth:]-M[:-1] # new variable
1710 |         
1711 |         S = np.sort(T)     # we sort T to get the empirical quantile
1712 |         self.init_threshold['up'] = S[int(0.98*n_init)] # t is fixed for the whole algorithm
1713 |         self.init_threshold['down'] = S[int(0.02*n_init)] # t is fixed for the whole algorithm
1714 | 
1715 |         # initial peaks
1716 |         self.peaks['up'] = T[T>self.init_threshold['up']]-self.init_threshold['up']
1717 |         self.peaks['down'] = -( T[ T<self.init_threshold['down'] ] - self.init_threshold['down'] )
1718 |         self.Nt['up'] = self.peaks['up'].size
1719 |         self.Nt['down'] = self.peaks['down'].size
1720 |         self.n = n_init
1721 |         
1722 |         if verbose:
1723 |             print('Initial threshold : %s' % self.init_threshold)
1724 |             print('Number of peaks : %s' % self.Nt)
1725 |             print('Grimshaw maximum log-likelihood estimation ... ', end = '')
1726 |             
1727 |         l = {'up':None,'down':None}
1728 |         for side in ['up','down']:
1729 |             g,s,l[side] = self._grimshaw(side)
1730 |             self.extreme_quantile[side] = self._quantile(side,g,s)
1731 |             self.gamma[side] = g
1732 |             self.sigma[side] = s
1733 |         
1734 |         ltab = 20
1735 |         form = ('\t'+'%20s' + '%20.2f' + '%20.2f')
1736 |         if verbose:
1737 |             print('[done]')
1738 |             print('\t' + 'Parameters'.rjust(ltab) + 'Upper'.rjust(ltab) + 'Lower'.rjust(ltab))
1739 |             print('\t' + '-'*ltab*3)
1740 |             print(form % (chr(0x03B3),self.gamma['up'],self.gamma['down']))
1741 |             print(form % (chr(0x03C3),self.sigma['up'],self.sigma['down']))
1742 |             print(form % ('likelihood',l['up'],l['down']))
1743 |             print(form % ('Extreme quantile',self.extreme_quantile['up'],self.extreme_quantile['down']))
1744 |             print('\t' + '-'*ltab*3)
1745 |         return 
1746 |     
1747 |     
1748 |     
1749 |     
1750 |     def _rootsFinder(fun,jac,bounds,npoints,method):
1751 |         """
1752 |         Find possible roots of a scalar function
1753 |         
1754 |         Parameters
1755 |         ----------
1756 |         fun : function
1757 | 		    scalar function 
1758 |         jac : function
1759 |             first order derivative of the function  
1760 |         bounds : tuple
1761 |             (min,max) interval for the roots search    
1762 |         npoints : int
1763 |             maximum number of roots to output      
1764 |         method : str
1765 |             'regular' : regular sample of the search interval, 'random' : uniform (distribution) sample of the search interval
1766 |         
1767 |         Returns
1768 |         ----------
1769 |         numpy.array
1770 |             possible roots of the function
1771 |         """
1772 |         if method == 'regular':
1773 |             step = (bounds[1]-bounds[0])/(npoints+1)
1774 |             X0 = np.arange(bounds[0]+step,bounds[1],step)
1775 |         elif method == 'random':
1776 |             X0 = np.random.uniform(bounds[0],bounds[1],npoints)
1777 |         
1778 |         def objFun(X,f,jac):
1779 |             g = 0
1780 |             j = np.zeros(X.shape)
1781 |             i = 0
1782 |             for x in X:
1783 |                 fx = f(x)
1784 |                 g = g+fx**2
1785 |                 j[i] = 2*fx*jac(x)
1786 |                 i = i+1
1787 |             return g,j
1788 |         
1789 |         opt = minimize(lambda X:objFun(X,fun,jac), X0, 
1790 |                        method='L-BFGS-B', 
1791 |                        jac=True, bounds=[bounds]*len(X0))
1792 |         
1793 |         X = opt.x
1794 |         np.round(X,decimals = 5)
1795 |         return np.unique(X)
1796 |     
1797 |     
1798 |     def _log_likelihood(Y,gamma,sigma):
1799 |         """
1800 |         Compute the log-likelihood for the Generalized Pareto Distribution (μ=0)
1801 |         
1802 |         Parameters
1803 |         ----------
1804 |         Y : numpy.array
1805 | 		    observations
1806 |         gamma : float
1807 |             GPD index parameter
1808 |         sigma : float
1809 |             GPD scale parameter (>0)   
1810 | 
1811 |         Returns
1812 |         ----------
1813 |         float
1814 |             log-likelihood of the sample Y to be drawn from a GPD(γ,σ,μ=0)
1815 |         """
1816 |         n = Y.size
1817 |         if gamma != 0:
1818 |             tau = gamma/sigma
1819 |             L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
1820 |         else:
1821 |             L = n * ( 1 + log(Y.mean()) )
1822 |         return L
1823 | 
1824 | 
1825 |     def _grimshaw(self,side,epsilon = 1e-8, n_points = 8):
1826 |         """
1827 |         Compute the GPD parameters estimation with the Grimshaw's trick
1828 |         
1829 |         Parameters
1830 |         ----------
1831 |         epsilon : float
1832 | 		    numerical parameter to perform (default : 1e-8)
1833 |         n_points : int
1834 |             maximum number of candidates for maximum likelihood (default : 10)
1835 | 
1836 |         Returns
1837 |         ----------
1838 |         gamma_best,sigma_best,ll_best
1839 |             gamma estimates, sigma estimates and corresponding log-likelihood
1840 |         """
1841 |         def u(s):
1842 |             return 1 + np.log(s).mean()
1843 |             
1844 |         def v(s):
1845 |             return np.mean(1/s)
1846 |         
1847 |         def w(Y,t):
1848 |             s = 1+t*Y
1849 |             us = u(s)
1850 |             vs = v(s)
1851 |             return us*vs-1
1852 |         
1853 |         def jac_w(Y,t):
1854 |             s = 1+t*Y
1855 |             us = u(s)
1856 |             vs = v(s)
1857 |             jac_us = (1/t)*(1-vs)
1858 |             jac_vs = (1/t)*(-vs+np.mean(1/s**2))
1859 |             return us*jac_vs+vs*jac_us
1860 |             
1861 |     
1862 |         Ym = self.peaks[side].min()
1863 |         YM = self.peaks[side].max()
1864 |         Ymean = self.peaks[side].mean()
1865 |         
1866 |         
1867 |         a = -1/YM
1868 |         if abs(a)<2*epsilon:
1869 |             epsilon = abs(a)/n_points
1870 |         
1871 |         a = a + epsilon
1872 |         b = 2*(Ymean-Ym)/(Ymean*Ym)
1873 |         c = 2*(Ymean-Ym)/(Ym**2)
1874 |     
1875 |         # We look for possible roots
1876 |         left_zeros = bidSPOT._rootsFinder(lambda t: w(self.peaks[side],t),
1877 |                                  lambda t: jac_w(self.peaks[side],t),
1878 |                                  (a+epsilon,-epsilon),
1879 |                                  n_points,'regular')
1880 |         
1881 |         right_zeros = bidSPOT._rootsFinder(lambda t: w(self.peaks[side],t),
1882 |                                   lambda t: jac_w(self.peaks[side],t),
1883 |                                   (b,c),
1884 |                                   n_points,'regular')
1885 |     
1886 |         # all the possible roots
1887 |         zeros = np.concatenate((left_zeros,right_zeros))
1888 |         
1889 |         # 0 is always a solution so we initialize with it
1890 |         gamma_best = 0
1891 |         sigma_best = Ymean
1892 |         ll_best = bidSPOT._log_likelihood(self.peaks[side],gamma_best,sigma_best)
1893 |         
1894 |         # we look for better candidates
1895 |         for z in zeros:
1896 |             gamma = u(1+z*self.peaks[side])-1
1897 |             sigma = gamma/z
1898 |             ll = bidSPOT._log_likelihood(self.peaks[side],gamma,sigma)
1899 |             if ll>ll_best:
1900 |                 gamma_best = gamma
1901 |                 sigma_best = sigma
1902 |                 ll_best = ll
1903 |     
1904 |         return gamma_best,sigma_best,ll_best
1905 | 
1906 |     
1907 | 
1908 |     def _quantile(self,side,gamma,sigma):
1909 |         """
1910 |         Compute the quantile at level 1-q for a given side
1911 |         
1912 |         Parameters
1913 |         ----------
1914 |         side : str
1915 |             'up' or 'down'
1916 |         gamma : float
1917 | 		    GPD parameter
1918 |         sigma : float
1919 |             GPD parameter
1920 | 
1921 |         Returns
1922 |         ----------
1923 |         float
1924 |             quantile at level 1-q for the GPD(γ,σ,μ=0)
1925 |         """
1926 |         if side == 'up':
1927 |             r = self.n * self.proba / self.Nt[side]
1928 |             if gamma != 0:
1929 |                 return self.init_threshold['up'] + (sigma/gamma)*(pow(r,-gamma)-1)
1930 |             else:
1931 |                 return self.init_threshold['up'] - sigma*log(r)
1932 |         elif side == 'down':
1933 |             r = self.n * self.proba / self.Nt[side]
1934 |             if gamma != 0:
1935 |                 return self.init_threshold['down'] - (sigma/gamma)*(pow(r,-gamma)-1)
1936 |             else:
1937 |                 return self.init_threshold['down'] + sigma*log(r)
1938 |         else:
1939 |             print('error : the side is not right')
1940 | 
1941 |         
1942 |     def run(self, with_alarm = True, plot = True):
1943 |         """
1944 |         Run biDSPOT on the stream
1945 |         
1946 |         Parameters
1947 |         ----------
1948 |         with_alarm : bool
1949 | 		    (default = True) If False, SPOT will adapt the threshold assuming \
1950 |             there is no abnormal values
1951 | 
1952 | 
1953 |         Returns
1954 |         ----------
1955 |         dict
1956 |             keys : 'upper_thresholds', 'lower_thresholds' and 'alarms'
1957 |             
1958 |             '***-thresholds' contains the extreme quantiles and 'alarms' contains \
1959 |             the indexes of the values which have triggered alarms
1960 |             
1961 |         """
1962 |         if (self.n>self.init_data.size):
1963 |             print('Warning : the algorithm seems to have already been run, you \
1964 |             should initialize before running again')
1965 |             return {}
1966 |         
1967 |         # actual normal window
1968 |         W = self.init_data[-self.depth:]
1969 |         
1970 |         # list of the thresholds
1971 |         thup = []
1972 |         thdown = []
1973 |         alarm = []
1974 |         # Loop over the stream
1975 |         for i in tqdm.tqdm(range(self.data.size)):
1976 |             Mi = W.mean()
1977 |             Ni = self.data[i]-Mi
1978 |             # If the observed value exceeds the current threshold (alarm case)
1979 |             if Ni>self.extreme_quantile['up'] :
1980 |                 # if we want to alarm, we put it in the alarm list
1981 |                 if with_alarm:
1982 |                     alarm.append(i)
1983 |                 # otherwise we add it in the peaks
1984 |                 else:
1985 |                     self.peaks['up'] = np.append(self.peaks['up'],Ni-self.init_threshold['up'])
1986 |                     self.Nt['up'] += 1
1987 |                     self.n += 1
1988 |                     # and we update the thresholds
1989 | 
1990 |                     g,s,l = self._grimshaw('up')
1991 |                     self.extreme_quantile['up'] = self._quantile('up',g,s)
1992 |                     W = np.append(W[1:],self.data[i])
1993 |                     
1994 |             # case where the value exceeds the initial threshold but not the alarm ones
1995 |             elif Ni>self.init_threshold['up']:
1996 |                     # we add it in the peaks
1997 |                     self.peaks['up'] = np.append(self.peaks['up'],Ni-self.init_threshold['up'])
1998 |                     self.Nt['up'] += 1
1999 |                     self.n += 1
2000 |                     # and we update the thresholds
2001 |                     g,s,l = self._grimshaw('up')
2002 |                     self.extreme_quantile['up'] = self._quantile('up',g,s)
2003 |                     W = np.append(W[1:],self.data[i])
2004 |                     
2005 |             elif Ni<self.extreme_quantile['down'] :
2006 |                 # if we want to alarm, we put it in the alarm list
2007 |                 if with_alarm:
2008 |                     alarm.append(i)
2009 |                 # otherwise we add it in the peaks
2010 |                 else:
2011 |                     self.peaks['down'] = np.append(self.peaks['down'],-(Ni-self.init_threshold['down']))
2012 |                     self.Nt['down'] += 1
2013 |                     self.n += 1
2014 |                     # and we update the thresholds
2015 | 
2016 |                     g,s,l = self._grimshaw('down')
2017 |                     self.extreme_quantile['down'] = self._quantile('down',g,s)
2018 |                     W = np.append(W[1:],self.data[i])
2019 |                     
2020 |             # case where the value exceeds the initial threshold but not the alarm ones
2021 |             elif Ni<self.init_threshold['down']:
2022 |                     # we add it in the peaks
2023 |                     self.peaks['down'] = np.append(self.peaks['down'],-(Ni-self.init_threshold['down']))
2024 |                     self.Nt['down'] += 1
2025 |                     self.n += 1
2026 |                     # and we update the thresholds
2027 | 
2028 |                     g,s,l = self._grimshaw('down')
2029 |                     self.extreme_quantile['down'] = self._quantile('down',g,s)
2030 |                     W = np.append(W[1:],self.data[i])
2031 |             else:
2032 |                 self.n += 1
2033 |                 W = np.append(W[1:],self.data[i])
2034 | 
2035 |                 
2036 |             thup.append(self.extreme_quantile['up']+Mi) # upper thresholds record
2037 |             thdown.append(self.extreme_quantile['down']+Mi) # lower thresholds record
2038 |         
2039 |         return {'upper_thresholds' : thup,'lower_thresholds' : thdown, 'alarms': alarm}
2040 |     
2041 | 
2042 |     def plot(self,run_results, with_alarm = True):
2043 |         """
2044 |         Plot the results given by the run
2045 |         
2046 |         Parameters
2047 |         ----------
2048 |         run_results : dict
2049 |             results given by the 'run' method
2050 |         with_alarm : bool
2051 | 		    (default = True) If True, alarms are plotted.
2052 | 
2053 | 
2054 |         Returns
2055 |         ----------
2056 |         list
2057 |             list of the plots
2058 |             
2059 |         """
2060 |         x = range(self.data.size)
2061 |         K = run_results.keys()
2062 |         
2063 |         ts_fig, = plt.plot(x,self.data,color=air_force_blue)
2064 |         fig = [ts_fig]
2065 |         
2066 |         if 'upper_thresholds' in K:
2067 |             thup = run_results['upper_thresholds']
2068 |             uth_fig, = plt.plot(x,thup,color=deep_saffron,lw=2,ls='dashed')
2069 |             fig.append(uth_fig)
2070 |             
2071 |         if 'lower_thresholds' in K:
2072 |             thdown = run_results['lower_thresholds']
2073 |             lth_fig, = plt.plot(x,thdown,color=deep_saffron,lw=2,ls='dashed')
2074 |             fig.append(lth_fig)
2075 |         
2076 |         if with_alarm and ('alarms' in K):
2077 |             alarm = run_results['alarms']
2078 |             if len(alarm)>0:
2079 |                 al_fig = plt.scatter(alarm,self.data[alarm],color='red')
2080 |                 fig.append(al_fig)
2081 |             
2082 |         plt.xlim((0,self.data.size))
2083 | 
2084 |         return fig


--------------------------------------------------------------------------------