├── README.md └── changepoint_detection.py /README.md: -------------------------------------------------------------------------------- 1 | # change_point_detection 2 | Examples of online and offline changepoint detection using the ruptures and changefinder packages 3 | -------------------------------------------------------------------------------- /changepoint_detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import eia 5 | import changefinder 6 | import ruptures as rpt 7 | 8 | def retrieve_time_series(api, series_ID): 9 | """ 10 | Return the time series dataframe, based on API and unique Series ID 11 | api: API that we're connected to 12 | series_ID: string. Name of the series that we want to pull from the EIA API 13 | """ 14 | #Retrieve Data By Series ID 15 | series_search = api.data_by_series(series=series_ID) 16 | ##Create a pandas dataframe from the retrieved time series 17 | df = pd.DataFrame(series_search) 18 | return df 19 | 20 | if __name__ == "__main__" : 21 | #Create EIA API using your specific API key 22 | api_key = 'YOUR API KEY HERE' 23 | api = eia.API(api_key) 24 | 25 | #Pull the oil WTI price data 26 | series_ID='PET.RWTC.D' 27 | price_df=retrieve_time_series(api, series_ID) 28 | price_df.reset_index(level=0, inplace=True) 29 | #Rename the columns for easer analysis 30 | price_df.rename(columns={'index':'Date', 31 | price_df.columns[1]:'WTI_Price'}, 32 | inplace=True) 33 | #Format the 'Date' column 34 | price_df['Date']=price_df['Date'].astype(str).str[:-3] 35 | #Convert the Date column into a date object 36 | price_df['Date']=pd.to_datetime(price_df['Date'], format='%Y %m%d') 37 | #Subset to only include data going back to 2014 38 | price_df=price_df[(price_df['Date']>='2014-01-01')] 39 | 40 | #Convert the time series values to a numpy 1D array 41 | points=np.array(price_df['WTI_Price']) 42 | 43 | #RUPTURES PACKAGE 44 | #Changepoint detection with the Pelt search method 45 | model="rbf" 46 | algo = rpt.Pelt(model=model).fit(points) 47 | result = algo.predict(pen=10) 48 | rpt.display(points, result, figsize=(10, 6)) 49 | plt.title('Change Point Detection: Pelt Search Method') 50 | plt.show() 51 | 52 | #Changepoint detection with the Binary Segmentation search method 53 | model = "l2" 54 | algo = rpt.Binseg(model=model).fit(points) 55 | my_bkps = algo.predict(n_bkps=10) 56 | # show results 57 | rpt.show.display(points, my_bkps, figsize=(10, 6)) 58 | plt.title('Change Point Detection: Binary Segmentation Search Method') 59 | plt.show() 60 | 61 | #Changepoint detection with window-based search method 62 | model = "l2" 63 | algo = rpt.Window(width=40, model=model).fit(points) 64 | my_bkps = algo.predict(n_bkps=10) 65 | rpt.show.display(points, my_bkps, figsize=(10, 6)) 66 | plt.title('Change Point Detection: Window-Based Search Method') 67 | plt.show() 68 | 69 | #Changepoint detection with dynamic programming search method 70 | model = "l1" 71 | algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(points) 72 | my_bkps = algo.predict(n_bkps=10) 73 | rpt.show.display(points, my_bkps, figsize=(10, 6)) 74 | plt.title('Change Point Detection: Dynamic Programming Search Method') 75 | plt.show() 76 | 77 | #Create a synthetic data set to test against 78 | points=np.concatenate([np.random.rand(100)+5, 79 | np.random.rand(100)+10, 80 | np.random.rand(100)+5]) 81 | #CHANGEFINDER PACKAGE 82 | f, (ax1, ax2) = plt.subplots(2, 1) 83 | f.subplots_adjust(hspace=0.4) 84 | ax1.plot(points) 85 | ax1.set_title("data point") 86 | #Initiate changefinder function 87 | cf = changefinder.ChangeFinder() 88 | scores = [cf.update(p) for p in points] 89 | ax2.plot(scores) 90 | ax2.set_title("anomaly score") 91 | plt.show() 92 | 93 | 94 | --------------------------------------------------------------------------------