├── wrappers.py ├── fit.py ├── README.md ├── example.py └── segment.py /wrappers.py: -------------------------------------------------------------------------------- 1 | from numpy import arange, array, ones 2 | from numpy.linalg import lstsq 3 | 4 | def leastsquareslinefit(sequence,seq_range): 5 | """Return the parameters and error for a least squares line fit of one segment of a sequence""" 6 | x = arange(seq_range[0],seq_range[1]+1) 7 | y = array(sequence[seq_range[0]:seq_range[1]+1]) 8 | A = ones((len(x),2),float) 9 | A[:,0] = x 10 | (p,residuals,rank,s) = lstsq(A,y) 11 | try: 12 | error = residuals[0] 13 | except IndexError: 14 | error = 0.0 15 | return (p,error) 16 | -------------------------------------------------------------------------------- /fit.py: -------------------------------------------------------------------------------- 1 | from wrappers import leastsquareslinefit 2 | 3 | # compute_error functions 4 | 5 | def sumsquared_error(sequence, segment): 6 | """Return the sum of squared errors for a least squares line fit of one segment of a sequence""" 7 | x0,y0,x1,y1 = segment 8 | p, error = leastsquareslinefit(sequence,(x0,x1)) 9 | return error 10 | 11 | # create_segment functions 12 | 13 | def regression(sequence, seq_range): 14 | """Return (x0,y0,x1,y1) of a line fit to a segment of a sequence using linear regression""" 15 | p, error = leastsquareslinefit(sequence,seq_range) 16 | y0 = p[0]*seq_range[0] + p[1] 17 | y1 = p[0]*seq_range[1] + p[1] 18 | return (seq_range[0],y0,seq_range[1],y1) 19 | 20 | def interpolate(sequence, seq_range): 21 | """Return (x0,y0,x1,y1) of a line fit to a segment using a simple interpolation""" 22 | return (seq_range[0], sequence[seq_range[0]], seq_range[1], sequence[seq_range[1]]) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Simple Sequence Segmenting 2 | ========================== 3 | 4 | This repository contains Python code I wrote for segmenting 1-D time series. In other words, 5 | it can be used for transforming a time series into a piecewise linear represenation. 6 | The algorithms are Python implementations of the "classical" algorithms, as described in 7 | [An Online Algorithm for Segmenting Time Series][keogh], including: 8 | 9 | - the sliding window algorithm; 10 | - the top-down algorithm; and 11 | - the bottom-up algorithm. 12 | 13 | The code is *not* optimized for performance in any way, but I've found it useful for 14 | experimenting and data exploration. 15 | 16 | Requirements 17 | ------------ 18 | 19 | The segmenting algorithms use [NumPy's][numpy] least squares fitting routine, so naturally it depends on [NumPy][numpy]. 20 | 21 | Example 22 | ------- 23 | 24 | You can run the code to see example output by running the example.py script. The script 25 | requires [matplotlib][mpl] to display the plots. 26 | 27 | The example uses ECG data I found on an [ECG data site][ecg]. 28 | 29 | 30 | [keogh]: http://www.cs.ucr.edu/~eamonn/icdm-01.pdf "Keogh et al. An Online Algorithm for Segmenting Time Series" 31 | [numpy]: http://numpy.scipy.org "NumPy" 32 | [mpl]: http://matplotlib.sourceforge.net "Matplotlib" 33 | [ecg]: http://myweb.msoe.edu/~martynsc/signals/ecg/ecg.html "ECG Data" 34 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from matplotlib.pylab import gca, figure, plot, subplot, title, xlabel, ylabel, xlim,show 2 | from matplotlib.lines import Line2D 3 | import segment 4 | import fit 5 | 6 | def draw_plot(data,plot_title): 7 | plot(range(len(data)),data,alpha=0.8,color='red') 8 | title(plot_title) 9 | xlabel("Samples") 10 | ylabel("Signal") 11 | xlim((0,len(data)-1)) 12 | 13 | def draw_segments(segments): 14 | ax = gca() 15 | for segment in segments: 16 | line = Line2D((segment[0],segment[2]),(segment[1],segment[3])) 17 | ax.add_line(line) 18 | 19 | with open("example_data/16265-normalecg.txt") as f: 20 | file_lines = f.readlines() 21 | 22 | data = [float(x.split("\t")[2].strip()) for x in file_lines[100:320]] 23 | 24 | max_error = 0.005 25 | 26 | #sliding window with regression 27 | figure() 28 | segments = segment.slidingwindowsegment(data, fit.regression, fit.sumsquared_error, max_error) 29 | draw_plot(data,"Sliding window with regression") 30 | draw_segments(segments) 31 | 32 | #bottom-up with regression 33 | figure() 34 | segments = segment.bottomupsegment(data, fit.regression, fit.sumsquared_error, max_error) 35 | draw_plot(data,"Bottom-up with regression") 36 | draw_segments(segments) 37 | 38 | #top-down with regression 39 | figure() 40 | segments = segment.topdownsegment(data, fit.regression, fit.sumsquared_error, max_error) 41 | draw_plot(data,"Top-down with regression") 42 | draw_segments(segments) 43 | 44 | 45 | 46 | #sliding window with simple interpolation 47 | figure() 48 | segments = segment.slidingwindowsegment(data, fit.interpolate, fit.sumsquared_error, max_error) 49 | draw_plot(data,"Sliding window with simple interpolation") 50 | draw_segments(segments) 51 | 52 | #bottom-up with simple interpolation 53 | figure() 54 | segments = segment.bottomupsegment(data, fit.interpolate, fit.sumsquared_error, max_error) 55 | draw_plot(data,"Bottom-up with simple interpolation") 56 | draw_segments(segments) 57 | 58 | #top-down with simple interpolation 59 | figure() 60 | segments = segment.topdownsegment(data, fit.interpolate, fit.sumsquared_error, max_error) 61 | draw_plot(data,"Top-down with simple interpolation") 62 | draw_segments(segments) 63 | 64 | 65 | show() 66 | 67 | -------------------------------------------------------------------------------- /segment.py: -------------------------------------------------------------------------------- 1 | 2 | def slidingwindowsegment(sequence, create_segment, compute_error, max_error, seq_range=None): 3 | """ 4 | Return a list of line segments that approximate the sequence. 5 | 6 | The list is computed using the sliding window technique. 7 | 8 | Parameters 9 | ---------- 10 | sequence : sequence to segment 11 | create_segment : a function of two arguments (sequence, sequence range) that returns a line segment that approximates the sequence data in the specified range 12 | compute_error: a function of two argments (sequence, segment) that returns the error from fitting the specified line segment to the sequence data 13 | max_error: the maximum allowable line segment fitting error 14 | 15 | """ 16 | if not seq_range: 17 | seq_range = (0,len(sequence)-1) 18 | 19 | start = seq_range[0] 20 | end = start 21 | result_segment = create_segment(sequence,(seq_range[0],seq_range[1])) 22 | while end < seq_range[1]: 23 | end += 1 24 | test_segment = create_segment(sequence,(start,end)) 25 | error = compute_error(sequence,test_segment) 26 | if error <= max_error: 27 | result_segment = test_segment 28 | else: 29 | break 30 | 31 | if end == seq_range[1]: 32 | return [result_segment] 33 | else: 34 | return [result_segment] + slidingwindowsegment(sequence, create_segment, compute_error, max_error, (end-1,seq_range[1])) 35 | 36 | def bottomupsegment(sequence, create_segment, compute_error, max_error): 37 | """ 38 | Return a list of line segments that approximate the sequence. 39 | 40 | The list is computed using the bottom-up technique. 41 | 42 | Parameters 43 | ---------- 44 | sequence : sequence to segment 45 | create_segment : a function of two arguments (sequence, sequence range) that returns a line segment that approximates the sequence data in the specified range 46 | compute_error: a function of two argments (sequence, segment) that returns the error from fitting the specified line segment to the sequence data 47 | max_error: the maximum allowable line segment fitting error 48 | 49 | """ 50 | segments = [create_segment(sequence,seq_range) for seq_range in zip(range(len(sequence))[:-1],range(len(sequence))[1:])] 51 | mergesegments = [create_segment(sequence,(seg1[0],seg2[2])) for seg1,seg2 in zip(segments[:-1],segments[1:])] 52 | mergecosts = [compute_error(sequence,segment) for segment in mergesegments] 53 | 54 | while min(mergecosts) < max_error: 55 | idx = mergecosts.index(min(mergecosts)) 56 | segments[idx] = mergesegments[idx] 57 | del segments[idx+1] 58 | 59 | if idx > 0: 60 | mergesegments[idx-1] = create_segment(sequence,(segments[idx-1][0],segments[idx][2])) 61 | mergecosts[idx-1] = compute_error(sequence,mergesegments[idx-1]) 62 | 63 | if idx+1 < len(mergecosts): 64 | mergesegments[idx+1] = create_segment(sequence,(segments[idx][0],segments[idx+1][2])) 65 | mergecosts[idx+1] = compute_error(sequence,mergesegments[idx]) 66 | 67 | del mergesegments[idx] 68 | del mergecosts[idx] 69 | 70 | return segments 71 | 72 | def topdownsegment(sequence, create_segment, compute_error, max_error, seq_range=None): 73 | """ 74 | Return a list of line segments that approximate the sequence. 75 | 76 | The list is computed using the bottom-up technique. 77 | 78 | Parameters 79 | ---------- 80 | sequence : sequence to segment 81 | create_segment : a function of two arguments (sequence, sequence range) that returns a line segment that approximates the sequence data in the specified range 82 | compute_error: a function of two argments (sequence, segment) that returns the error from fitting the specified line segment to the sequence data 83 | max_error: the maximum allowable line segment fitting error 84 | 85 | """ 86 | if not seq_range: 87 | seq_range = (0,len(sequence)-1) 88 | 89 | bestlefterror,bestleftsegment = float('inf'), None 90 | bestrighterror,bestrightsegment = float('inf'), None 91 | bestidx = None 92 | 93 | for idx in range(seq_range[0]+1,seq_range[1]): 94 | segment_left = create_segment(sequence,(seq_range[0],idx)) 95 | error_left = compute_error(sequence,segment_left) 96 | segment_right = create_segment(sequence,(idx,seq_range[1])) 97 | error_right = compute_error(sequence, segment_right) 98 | if error_left + error_right < bestlefterror + bestrighterror: 99 | bestlefterror, bestrighterror = error_left, error_right 100 | bestleftsegment, bestrightsegment = segment_left, segment_right 101 | bestidx = idx 102 | 103 | if bestlefterror <= max_error: 104 | leftsegs = [bestleftsegment] 105 | else: 106 | leftsegs = topdownsegment(sequence, create_segment, compute_error, max_error, (seq_range[0],bestidx)) 107 | 108 | if bestrighterror <= max_error: 109 | rightsegs = [bestrightsegment] 110 | else: 111 | rightsegs = topdownsegment(sequence, create_segment, compute_error, max_error, (bestidx,seq_range[1])) 112 | 113 | return leftsegs + rightsegs 114 | --------------------------------------------------------------------------------