├── MK_Motif_Discovery.py
└── Motif_Matching.py


/MK_Motif_Discovery.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | import numpy as np
 3 | import random
 4 | 
 5 | class Motif_Discovery(object):
 6 | 	"""docstring for Motif_Discovery"""
 7 | 	def __init__(self):
 8 | 		super(Motif_Discovery, self).__init__()
 9 | 		# self.arg = arg
10 | 		
11 | 
12 | 	def distance(self, x, y):
13 | 		sum = 0
14 | 		i = 0
15 | 		while (i < len(x)):
16 | 			sum = sum + (x[i] - y[i])**2
17 | 			i = i + 1
18 | 		return (sum ** 0.5)
19 | 
20 | 	# MK Motif Discovery
21 | 	# @param R, int number of reference points
22 | 	# @param D, list, time series
23 | 	# @param tempLen, the length of the Template to Discover
24 | 	# @param r, the minimum dsitance between two motifs to be considered
25 | 	# returns the beginning index of the two most similar motifs
26 | 	def MK_Motif(self, D,R, tempLen = 30):
27 | 		R = min(R,1000)
28 | 		best_so_far = 999999999.0
29 | 		D = np.array(D)
30 | 		m = len(D)-tempLen # the number of time series to compare
31 | 		S = []
32 | 		Dist = []
33 | 		for i in range(R):
34 | 			r = random.randint(0,len(D)-tempLen)
35 | 			ref_i = D[r:r+tempLen] # a randomly chosen time series Dr from D
36 | 			Dist_i = []
37 | 			for j in range(m):
38 | 				Dj = D[j:j+tempLen]
39 | 				Dist_ij = self.distance(ref_i,Dj) # euclidian or DTW distance between two time series
40 | 				Dist_i.append(Dist_ij)
41 | 
42 | 				if Dist_ij < best_so_far and r!=j:
43 | 					best_so_far = Dist_ij
44 | 					# print "best_so_far: "+ `best_so_far`
45 | 					L1 = r
46 | 					L2 = j
47 | 			Si = np.std(Dist_i)
48 | 			S.append(Si)
49 | 			Dist.append(Dist_i)
50 | 		# find an ordering "Z" of the indicdes to the reference time series ref such that S_z(i) >= S_z(i+1)
51 | 		S_arr = np.array(S) # convert list to array
52 | 		Z = (-S_arr).argsort()
53 | 		# find an ordering "I" of the indices to the time series in D such that Dist_z(1),I(j) <= Dist_z(1),I(J+1)
54 | 		Dist = np.array(Dist)
55 | 
56 | 		I = (-Dist[Z[0]]).argsort()
57 | 		offset = 0
58 | 		abandon = False
59 | 
60 | 		while abandon == False:
61 | 			offset = offset + 1
62 | 			abandon = True
63 | 
64 | 			for j in range(R):
65 | 				reject = False
66 | 				for i in range(R):
67 | 					# print 
68 | 					# print "i " + `i`
69 | 					# print "j" + `j`
70 | 					# print "offset " + `offset`
71 | 					# print "len[I]: " + `len(I)`
72 | 					if j + offset < len(I):
73 | 						lower_bound = abs(Dist[Z[i],I[j]] - Dist[Z[i],I[j+offset]])
74 | 						if lower_bound > best_so_far:
75 | 							reject = True
76 | 							break
77 | 						elif i == 1:
78 | 							abandon = False
79 | 				if reject == False and j + offset < len(I):
80 | 					check_d = self.distance(D[I[j]:(I[j]+tempLen)],D[I[j+offset]:(I[j+offset]+tempLen)])
81 | 					# print "check_d: " + `check_d`
82 | 					if check_d < best_so_far and I[j]!=I[j+offset]:
83 | 						best_so_far = check_d
84 | 						L1 = I[j]
85 | 						L2 = I[j+offset]
86 | 		return [L1, L2]


--------------------------------------------------------------------------------
/Motif_Matching.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | import numpy as np
 3 | import csv
 4 | import time
 5 | 
 6 | 
 7 | def zNormalize(my_time_series):
 8 |     data = np.array(my_time_series)
 9 |     y_normed = (data-np.mean(data))/np.std(data)
10 |     return y_normed
11 | 
12 | # adapted from Mueen and Keogh 2012
13 | # Q query: a list of numbers. The motif to be matched to.
14 | # filePathT: A the path pointing to a csv containing one number per row. 
15 | # sample_rate: an integer. If you wish to sample every nth value of the time series, set this to n. Note that the matching motif much also be collected at this same sampling rate.
16 | # normalize: If true, compare distaces between z-normalized query and test-data. If false compare absolute distance
17 | # returns the first index of the closest fit, nn
18 | def find_matches(filePathT,Q,sample_rate=1, normalize = True):
19 |     with open(filePathT) as tsFile:
20 |         T = csv.reader(tsFile)
21 | 
22 |         bestSoFar = float("inf")
23 |         count = 0 # the 'step' in the time series
24 |         if normalize:
25 |             Q = zNormalize(Q)
26 |         m = len(Q)
27 |         X = [0]*m
28 |         ex = 0; ex2 = 0
29 |         
30 |         try:
31 |             tnext = float(T.next()[0]) # This implemntation assumes the CSV stores one value per line
32 |         except StopIteration:
33 |             print "empty list"
34 |             return 0
35 |         else:
36 |             hasNext = True
37 |             while hasNext: #can choose to limit the depth you look into the TS with "and count < 100000:"
38 |                 count = count + 1
39 |                 if count % sample_rate == 0:
40 |                     i = count/sample_rate % m
41 |                     X[i] = tnext # circular buffer to store current subsequence. 
42 |                     ex = ex + X[i] # iteratively sum up values to use for the mean
43 |                     ex2 = ex2 + X[i] ** 2 # sum up squared values to use for sdev
44 | 
45 |                     if count >= m-1:
46 |                         u = ex/m # u is mu, or the mean
47 |                         sdev = abs(ex2/m - u**2) ** (0.5) 
48 |                         j = 0
49 |                         dist = 0
50 | 
51 |                         # compare Q and T[i]
52 |                         while j < m and dist < bestSoFar:
53 |                             if normalize:
54 |                                 dist = dist + (Q[j]-(X[(i+j)%m]-u)/sdev)**2
55 |                             else:
56 |                                 dist = dist + abs(Q[j]-(X[(i+j+1)%m]))
57 |                             j = j + 1
58 | 
59 |                         if dist < bestSoFar:
60 |                             bestSoFar = dist
61 |                             nn = count - m # count gives the end of the matched motif. Move m spaces back to finds its head.
62 |                             
63 |                         # keep the mean and sdev and moving averages.
64 |                         ex = ex - X[(i+1)%m]
65 |                         ex2 = ex2 - X[(i+1)%m]**2
66 |                 try:
67 |                     tnext = float(T.next()[0])
68 |                 except StopIteration:
69 |                     print "end of list"
70 |                     hasNext = False
71 |         return nn # closest match spot in time series T


--------------------------------------------------------------------------------