├── README.md
└── changepoint_detection.py


/README.md:
--------------------------------------------------------------------------------
1 | # change_point_detection
2 | Examples of online and offline changepoint detection using the ruptures and changefinder packages
3 | 


--------------------------------------------------------------------------------
/changepoint_detection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import eia
 5 | import changefinder
 6 | import ruptures as rpt
 7 | 
 8 | def retrieve_time_series(api, series_ID):
 9 |     """
10 |     Return the time series dataframe, based on API and unique Series ID
11 |     api: API that we're connected to
12 |     series_ID: string. Name of the series that we want to pull from the EIA API
13 |     """
14 |     #Retrieve Data By Series ID 
15 |     series_search = api.data_by_series(series=series_ID)
16 |     ##Create a pandas dataframe from the retrieved time series
17 |     df = pd.DataFrame(series_search)
18 |     return df
19 | 
20 | if __name__ == "__main__" :
21 |     #Create EIA API using your specific API key
22 |     api_key = 'YOUR API KEY HERE'
23 |     api = eia.API(api_key)
24 |     
25 |     #Pull the oil WTI price data
26 |     series_ID='PET.RWTC.D'
27 |     price_df=retrieve_time_series(api, series_ID)
28 |     price_df.reset_index(level=0, inplace=True)
29 |     #Rename the columns for easer analysis
30 |     price_df.rename(columns={'index':'Date',
31 |             price_df.columns[1]:'WTI_Price'}, 
32 |             inplace=True)
33 |     #Format the 'Date' column 
34 |     price_df['Date']=price_df['Date'].astype(str).str[:-3]
35 |     #Convert the Date column into a date object
36 |     price_df['Date']=pd.to_datetime(price_df['Date'], format='%Y %m%d')
37 |     #Subset to only include data going back to 2014
38 |     price_df=price_df[(price_df['Date']>='2014-01-01')]
39 | 
40 |     #Convert the time series values to a numpy 1D array
41 |     points=np.array(price_df['WTI_Price'])
42 |     
43 |     #RUPTURES PACKAGE
44 |     #Changepoint detection with the Pelt search method
45 |     model="rbf"
46 |     algo = rpt.Pelt(model=model).fit(points)
47 |     result = algo.predict(pen=10)
48 |     rpt.display(points, result, figsize=(10, 6))
49 |     plt.title('Change Point Detection: Pelt Search Method')
50 |     plt.show()  
51 |     
52 |     #Changepoint detection with the Binary Segmentation search method
53 |     model = "l2"  
54 |     algo = rpt.Binseg(model=model).fit(points)
55 |     my_bkps = algo.predict(n_bkps=10)
56 |     # show results
57 |     rpt.show.display(points, my_bkps, figsize=(10, 6))
58 |     plt.title('Change Point Detection: Binary Segmentation Search Method')
59 |     plt.show()
60 |     
61 |     #Changepoint detection with window-based search method
62 |     model = "l2"  
63 |     algo = rpt.Window(width=40, model=model).fit(points)
64 |     my_bkps = algo.predict(n_bkps=10)
65 |     rpt.show.display(points, my_bkps, figsize=(10, 6))
66 |     plt.title('Change Point Detection: Window-Based Search Method')
67 |     plt.show()
68 |     
69 |     #Changepoint detection with dynamic programming search method
70 |     model = "l1"  
71 |     algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(points)
72 |     my_bkps = algo.predict(n_bkps=10)
73 |     rpt.show.display(points, my_bkps, figsize=(10, 6))
74 |     plt.title('Change Point Detection: Dynamic Programming Search Method')
75 |     plt.show()
76 |     
77 |     #Create a synthetic data set to test against
78 |     points=np.concatenate([np.random.rand(100)+5,
79 |                                      np.random.rand(100)+10,
80 |                                      np.random.rand(100)+5])
81 |     #CHANGEFINDER PACKAGE
82 |     f, (ax1, ax2) = plt.subplots(2, 1)
83 |     f.subplots_adjust(hspace=0.4)
84 |     ax1.plot(points)
85 |     ax1.set_title("data point")
86 |     #Initiate changefinder function
87 |     cf = changefinder.ChangeFinder()
88 |     scores = [cf.update(p) for p in points]
89 |     ax2.plot(scores)
90 |     ax2.set_title("anomaly score")
91 |     plt.show()
92 |     
93 |     
94 |     


--------------------------------------------------------------------------------