├── .gitignore
├── README.md
├── alternates
    ├── __init__.py
    ├── bayes.py
    ├── clustering.py
    ├── coreset.py
    └── em.py
├── evaluation
    ├── Evaluator.py
    ├── Metrics.py
    └── __init__.py
├── example-data.txt
├── generate
    ├── SequenceDataGenerator.py
    ├── TrajectoryDataGenerator.py
    └── __init__.py
├── main.py
└── tsc
    ├── .DS_Store
    ├── __init__.py
    ├── examples
        ├── inconsistent.py
        ├── sineWave.py
        └── timeWarp.py
    └── tsc.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### TSC: Transition State Clustering
2 | Implements experiments to evaluate transition state clustering as described in: 
3 | 
4 | - Transition State Clustering: Unsupervised Surgical Trajectory Segmentation For Robot Learning. Sanjay Krishnan*, Animesh Garg*, Sachin Patil, Colin Lea, Greg Hager, Pieter Abbeel, Ken Goldberg (* denotes equal contribution).
5 | International Symposium on Robotics Research (ISRR), 2015.
6 | 
7 | Please visit  [http://berkeleyautomation.github.io/tsc](http://berkeleyautomation.github.io/tsc) for more info
8 | 


--------------------------------------------------------------------------------
/alternates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BerkeleyAutomation/tsc/6d3b1fb4a2ee3c33ae7ed695b6197cdfa99cfc4a/alternates/__init__.py


--------------------------------------------------------------------------------
/alternates/bayes.py:
--------------------------------------------------------------------------------
  1 | import pyhsmm
  2 | from pyhsmm.util.text import progprint_xrange
  3 | import numpy as np
  4 | np.seterr(divide='ignore') # these warnings are usually harmless for this code
  5 | from matplotlib import pyplot as plt
  6 | 
  7 | from pyhsmm.basic.distributions import NegativeBinomialIntegerR2Duration
  8 | import autoregressive.models as m
  9 | import autoregressive.distributions as di
 10 | 
 11 | """
 12 | Uses an off the shelf HiddenSemiMarkovModel
 13 | """
 14 | class HiddenSemiMarkovModel:
 15 | 
 16 | 	def __init__(self, verbose=True):
 17 | 		self.verbose = verbose
 18 | 		self.segmentation = []
 19 | 		self.model = []
 20 | 		
 21 | 		#internal variables not for outside reference
 22 | 		self._demonstrations = []
 23 | 		self._demonstration_sizes = []
 24 | 
 25 | 	def addDemonstration(self,demonstration):
 26 | 		demonstration = np.squeeze(np.array(demonstration))
 27 | 		demo_size = np.shape(demonstration)
 28 | 		
 29 | 		if self.verbose:
 30 | 			print "[Bayes] Adding a Demonstration of Size=", demo_size
 31 | 
 32 | 		self._demonstration_sizes.append(demo_size)
 33 | 
 34 | 		state_augmented = np.zeros((demo_size[0],2*demo_size[1]))
 35 | 		state_augmented[:,0:demo_size[1]] = demonstration
 36 | 		state_augmented[0:demo_size[0]-1,demo_size[1]:2*demo_size[1]] = demonstration[1:demo_size[0],:]
 37 | 		state_augmented[demo_size[0]-1,demo_size[1]:2*demo_size[1]] = state_augmented[demo_size[0]-1,0:demo_size[1]]
 38 | 		#state_augmented[:,2*demo_size[1]] = np.arange(0,demo_size[0],1)
 39 | 
 40 | 		#state_augmented = preprocessing.normalize(state_augmented, axis=0)
 41 | 
 42 | 		self._demonstrations.append(state_augmented)
 43 | 
 44 | 
 45 | 	"""
 46 | 	Essentially taken from Matt Johnson's demo
 47 | 	"""
 48 | 	def fit(self):
 49 | 		data = np.squeeze(np.array(self._demonstrations[0])) #np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T]
 50 | 		Nmax = 25
 51 | 
 52 | 		# and some hyperparameters
 53 | 		obs_dim = data.shape[1]
 54 | 		print data.shape
 55 | 		obs_hypparams = {'mu_0':np.zeros(obs_dim),
 56 |                 'sigma_0':np.eye(obs_dim),
 57 |                 'kappa_0':0.25,
 58 |                 'nu_0':obs_dim+2}
 59 | 		dur_hypparams = {'alpha_0':2*30,
 60 |                  'beta_0':2}
 61 | 
 62 | 		obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
 63 | 		dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]
 64 | 
 65 | 		posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
 66 |         	alpha=6.,gamma=6., # these can matter; see concentration-resampling.py
 67 |         	init_state_concentration=6., # pretty inconsequential
 68 |         	obs_distns=obs_distns,
 69 |         	dur_distns=dur_distns)
 70 | 		
 71 | 		for d in self._demonstrations:
 72 | 			posteriormodel.add_data(np.squeeze(np.array(d)),trunc=60) # duration truncation speeds things up when it's possible
 73 | 
 74 | 		for idx in progprint_xrange(50):
 75 | 			posteriormodel.resample_model()
 76 | 
 77 | 		new_segments = []
 78 | 		for i in range(0, len(self._demonstrations)):
 79 | 			new_segments.append(self.findTransitions(posteriormodel.states_list[i].stateseq))
 80 | 
 81 | 		self.segmentation = new_segments
 82 | 		self.model = posteriormodel
 83 | 
 84 | 	#this finds the segment end points
 85 | 	def findTransitions(self, predSeq):
 86 | 		transitions = []
 87 | 		prev = -1
 88 | 		for i,v in enumerate(predSeq):
 89 | 			if prev != v:
 90 | 				transitions.append(i)
 91 | 				#print i
 92 | 			prev = v 
 93 | 
 94 | 		transitions.append(i)
 95 | 		return transitions
 96 | 
 97 | 
 98 | """
 99 | Uses an off the shelf AutoregressiveMarkovModel
100 | """
101 | class AutoregressiveMarkovModel:
102 | 	def __init__(self, lag=4, alpha=1.5, gamma=4, nu=2, init_state_concentration=10, verbose=True):
103 | 		self.verbose = verbose
104 | 		self.segmentation = []
105 | 		self.model = []
106 | 		self.lag = lag
107 | 		self.alpha = alpha
108 | 		self.nu = nu
109 | 		self.gamma = gamma
110 | 		#self.cap=cap
111 | 		self.init_state_concentration = init_state_concentration
112 | 		
113 | 		#internal variables not for outside reference
114 | 		self._demonstrations = []
115 | 		self._demonstration_sizes = []
116 | 
117 | 	def addDemonstration(self,demonstration):
118 | 		demonstration = np.squeeze(np.array(demonstration))
119 | 		demo_size = np.shape(demonstration)
120 | 		
121 | 		if self.verbose:
122 | 			print "[Bayes] Adding a Demonstration of Size=", demo_size
123 | 
124 | 		self._demonstration_sizes.append(demo_size)
125 | 		self._demonstrations.append(demonstration)
126 | 
127 | 	"""
128 | 	Essentially taken from Matt Johnson's demo
129 | 	"""
130 | 	def fit(self):
131 | 		p = self._demonstration_sizes[0][1]
132 | 
133 | 		Nmax = self._demonstration_sizes[0][0]
134 | 		affine = True
135 | 		nlags = self.lag
136 | 		obs_distns=[di.AutoRegression(
137 |     				nu_0=self.nu, S_0=np.eye(p), M_0=np.zeros((p,2*p+affine)),
138 |     				K_0=np.eye(2*p+affine), affine=affine) for state in range(Nmax)]
139 | 
140 | 		dur_distns=[NegativeBinomialIntegerR2Duration(
141 |     				r_discrete_distn=np.ones(10.),alpha_0=1.,beta_0=1.) for state in range(Nmax)]
142 | 
143 | 		model = m.ARWeakLimitHDPHSMMIntNegBin(
144 |         alpha=self.alpha,gamma=self.gamma,init_state_concentration=self.init_state_concentration,
145 |         	obs_distns=obs_distns,
146 |         	dur_distns=dur_distns,
147 |         )
148 | 
149 | 
150 | 		for d in self._demonstrations:
151 | 			model.add_data(d,trunc=60)
152 | 
153 | 		#model.resample_model()
154 | 
155 | 		for itr in progprint_xrange(20):
156 | 			model.resample_model()
157 | 
158 | 		new_segments = []
159 | 		for i in range(0, len(self._demonstrations)):
160 | 			#print model.states_list[i].stateseq
161 | 			new_segments.append(self.findTransitions(model.states_list[i].stateseq))
162 | 
163 | 		self.segmentation = new_segments
164 | 		self.model = model
165 | 
166 | 	#this finds the segment end points
167 | 	def findTransitions(self, predSeq):
168 | 		transitions = []
169 | 		prev = -1
170 | 		for i,v in enumerate(predSeq):
171 | 			if prev != v:
172 | 				transitions.append(i)
173 | 				#print i
174 | 			prev = v 
175 | 
176 | 		transitions.append(i)
177 | 		return transitions
178 | 


--------------------------------------------------------------------------------
/alternates/clustering.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn import mixture, decomposition
  3 | #from sklearn import hmm
  4 | from hmmlearn.hmm import GaussianHMM
  5 | from sklearn import preprocessing
  6 | import coreset
  7 | 
  8 | """
  9 | The approach from Calinon et al.
 10 | using a GMM with time as a feature
 11 | optimized using the bayesian information
 12 | criterion.
 13 | 
 14 | Same input and output behavior as TSC
 15 | """
 16 | class TimeVaryingGaussianMixtureModel:
 17 | 
 18 | 	def __init__(self, max_segments=20, hard_param=-1, verbose=True):
 19 | 		self.verbose = verbose
 20 | 		self.segmentation = []
 21 | 		self.model = []
 22 | 		self.max_segments = max_segments
 23 | 		self.hard_param = hard_param
 24 | 		
 25 | 		#internal variables not for outside reference
 26 | 		self._demonstrations = []
 27 | 		self._demonstration_sizes = []
 28 | 
 29 | 	def addDemonstration(self,demonstration):
 30 | 		demo_size = np.shape(demonstration)
 31 | 		
 32 | 		if self.verbose:
 33 | 			print "[Clustering] Adding a Demonstration of Size=", demo_size
 34 | 
 35 | 		self._demonstration_sizes.append(demo_size)
 36 | 
 37 | 		time_augmented = np.zeros((demo_size[0],2*demo_size[1]+1))
 38 | 		time_augmented[:,0:demo_size[1]] = demonstration
 39 | 		time_augmented[0:demo_size[0]-1,demo_size[1]:2*demo_size[1]] = demonstration[1:demo_size[0],:]
 40 | 		time_augmented[:,2*demo_size[1]] = np.arange(0,demo_size[0],1)
 41 | 
 42 | 		#time_augmented = preprocessing.normalize(time_augmented, axis=0)
 43 | 
 44 | 		self._demonstrations.append(time_augmented)
 45 | 
 46 | 	#this fits using the BIC, unless hard param is specified
 47 | 	def fit(self):
 48 | 
 49 | 		if self.verbose:
 50 | 			print "[Clustering] Clearing old model and segmentation"
 51 | 		
 52 | 		self.segmentation = []
 53 | 		self.model = []
 54 | 
 55 | 
 56 | 		new_segments = []
 57 | 		new_model = []
 58 | 
 59 | 		for d in self._demonstrations:
 60 | 			gmm_list = []
 61 | 
 62 | 			if self.hard_param == -1:
 63 | 				for k in range(1,self.max_segments):
 64 | 					g = mixture.GMM(n_components=k)
 65 | 					g.fit(d) 
 66 | 					gmm_list.append((g.bic(d),g)) #lower bic better
 67 | 			else:
 68 | 				g = mixture.GMM(n_components=self.hard_param)
 69 | 				g.fit(d) 
 70 | 				gmm_list.append((g.bic(d),g))
 71 | 
 72 | 
 73 | 			gmm_list.sort()
 74 | 
 75 | 			new_segments.append(self.findTransitions(gmm_list[0][1].predict(d)))
 76 | 			new_model.append(gmm_list[0][1])
 77 | 
 78 | 		self.segmentation = new_segments
 79 | 		self.model = new_model
 80 | 
 81 | 	#this finds the segment end points
 82 | 	def findTransitions(self, predSeq):
 83 | 		transitions = []
 84 | 		prev = -1
 85 | 		for i,v in enumerate(predSeq):
 86 | 			if prev != v:
 87 | 				transitions.append(i)
 88 | 				#print i
 89 | 			prev = v 
 90 | 
 91 | 		transitions.append(i)
 92 | 		return transitions
 93 | 
 94 | """
 95 | The approach uses a HMM with Gaussian Emissions
 96 | """
 97 | class HMMGaussianMixtureModel:
 98 | 
 99 | 	def __init__(self, n_components, verbose=True):
100 | 		self.verbose = verbose
101 | 		self.segmentation = []
102 | 		self.model = []
103 | 		self.n_components = n_components
104 | 		
105 | 		#internal variables not for outside reference
106 | 		self._demonstrations = []
107 | 		self._demonstration_sizes = []
108 | 
109 | 	def addDemonstration(self,demonstration):
110 | 		demo_size = np.shape(demonstration)
111 | 		
112 | 		if self.verbose:
113 | 			print "[Clustering] Adding a Demonstration of Size=", demo_size
114 | 
115 | 		self._demonstration_sizes.append(demo_size)
116 | 		demonstration = preprocessing.normalize(demonstration,axis=1)
117 | 		self._demonstrations.append(demonstration)
118 | 
119 | 	#this fits using the BIC, unless hard param is specified
120 | 	def fit(self):
121 | 
122 | 		if self.verbose:
123 | 			print "[Clustering] Clearing old model and segmentation"
124 | 		
125 | 		self.segmentation = []
126 | 		self.model = []
127 | 
128 | 
129 | 		new_segments = []
130 | 		new_model = []
131 | 
132 | 		g = GaussianHMM(n_components=self.n_components)
133 | 
134 | 		all_demos = self._demonstrations[0]
135 | 		lens = [np.shape(self._demonstrations[0])[0]]
136 | 		for i in range(1, len(self._demonstrations)):
137 | 			all_demos = np.concatenate([all_demos,self._demonstrations[i]])
138 | 			lens.append(np.shape(self._demonstrations[i])[0])
139 | 
140 | 		g.fit(all_demos,lens) 
141 | 			
142 | 		for d in self._demonstrations:
143 | 			new_segments.append(self.findTransitions(g.predict(d)))
144 | 			#print g.predict(d)
145 | 			new_model.append(g)
146 | 
147 | 		self.segmentation = new_segments
148 | 		self.model = new_model
149 | 
150 | 	#this finds the segment end points
151 | 	def findTransitions(self, predSeq):
152 | 		transitions = []
153 | 		prev = -1
154 | 		for i,v in enumerate(predSeq):
155 | 			if prev != v:
156 | 				transitions.append(i)
157 | 				#print i
158 | 			prev = v 
159 | 
160 | 		transitions.append(i)
161 | 		return transitions
162 | 
163 | """
164 | Coresets
165 | """
166 | class CoresetSegmentation:
167 | 	def __init__(self, n_components, verbose=True):
168 | 		self.verbose = verbose
169 | 		self.segmentation = []
170 | 		self.model = []
171 | 		self.n_components = n_components
172 | 		
173 | 		#internal variables not for outside reference
174 | 		self._demonstrations = []
175 | 		self._demonstration_sizes = []
176 | 
177 | 	def addDemonstration(self,demonstration):
178 | 		demo_size = np.shape(demonstration)
179 | 		
180 | 		if self.verbose:
181 | 			print "[Clustering] Adding a Demonstration of Size=", demo_size
182 | 
183 | 		self._demonstration_sizes.append(demo_size)
184 | 
185 | 		time_augmented = np.zeros((demo_size[0],demo_size[1]+1))
186 | 		time_augmented[:,0:demo_size[1]] = demonstration
187 | 		time_augmented[:,demo_size[1]] = np.arange(0,demo_size[0],1)
188 | 
189 | 		self._demonstrations.append(time_augmented)
190 | 
191 | 	#this fits using the BIC, unless hard param is specified
192 | 	def fit(self):
193 | 
194 | 		if self.verbose:
195 | 			print "[Clustering] Clearing old model and segmentation"
196 | 		
197 | 		self.segmentation = []
198 | 		self.model = []
199 | 
200 | 
201 | 		new_segments = []
202 | 		new_model = []
203 | 
204 | 		total_size = np.sum([ds[0] for ds in self._demonstration_sizes])
205 | 		data_matrix = np.zeros((total_size,self._demonstration_sizes[0][1]+1))
206 | 		i = 0
207 | 		for d in self._demonstrations:
208 | 			N = np.shape(d)
209 | 			data_matrix[i:i+N[0],:] = d
210 | 			i = i + N[0]
211 | 
212 | 		new_model = coreset.get_coreset(data_matrix, self.n_components,self.n_components)[0]
213 | 
214 | 		self.segmentation = self.taskToTrajectory(new_model)
215 | 		self.model = new_model
216 | 
217 | 	def taskToTrajectory(self, new_model):
218 | 		result = []
219 | 		for d in self._demonstrations:
220 | 			Nm = np.shape(new_model)
221 | 			s = []
222 | 			for i in range(0,Nm[0]):
223 | 				l = []
224 | 				N = np.shape(d)
225 | 				for j in range(0,N[0]):
226 | 					l.append((np.linalg.norm(d[j,:]-new_model[i,:]),j))
227 | 				l.sort()
228 | 				s.append(l[0][1])
229 | 			s.append(0)
230 | 			s.append(N[0])
231 | 			result.append(s)
232 | 		return result
233 | 
234 | 
235 | 
236 | 


--------------------------------------------------------------------------------
/alternates/coreset.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import numpy as np
  3 | import scipy.linalg
  4 | import scipy.cluster
  5 | import scipy.spatial
  6 | import time
  7 | 
  8 | """
  9 | This is contrib code from
 10 | https://github.com/yoojioh/gamelanpy/tree/master/gamelanpy
 11 | """
 12 | 
 13 | 
 14 | def get_coreset(data, num_clusters, coreset_size, delta=0.1):
 15 |     '''
 16 |     Parameters
 17 |     ----------
 18 |     data: array-like, (num_frames, num_vars)
 19 |     num_clusters: int, number of clusters
 20 |     coreset_size: int, number of coreset samples
 21 |     delta: float, default=0.1
 22 | 
 23 |     Return
 24 |     ------
 25 |     samples: coreset samples
 26 |     weights: coreset weights
 27 |     '''
 28 | 
 29 |     logger = logging.getLogger()
 30 | 
 31 |     if len(data.shape) == 1:
 32 |         logger.debug('Input data is 1-D, converting it to 2-D')
 33 |         data = data[:, np.newaxis]
 34 | 
 35 |     num_frames, num_vars = data.shape
 36 | 
 37 |     if coreset_size < num_clusters:
 38 |         raise ValueError("coreset_size %d is less than num_mixtures %d" % (coreset_size, num_clusters))
 39 | 
 40 |     if num_frames < coreset_size:
 41 |         raise ValueError("num_frames %d is less than coreset_size %d" % (num_frames, num_clusters))
 42 | 
 43 |     data_remain = data.copy()
 44 | 
 45 |     samples = np.zeros((0, num_vars))
 46 |     # first, do the subsampling : pick core samples, and remove closest point to
 47 |     # it
 48 | 
 49 |     logger.debug('Before Coreset random sampling')
 50 | 
 51 |     num_iters = 0
 52 |     num_single_samples = int(1.0 * num_vars * num_clusters * np.log(1.0 / delta))
 53 |     logger.debug('num_single_samples: %d', num_single_samples)
 54 | 
 55 |     while data_remain.shape[0] > num_single_samples:
 56 |         cur_time = time.time()
 57 |         logger.debug('Starting iteration %d', num_iters)
 58 | 
 59 |         num_frames_remain = data_remain.shape[0]
 60 |         idx = np.random.permutation(num_frames_remain)[:num_single_samples]
 61 |         single_samples = data_remain[idx, :]
 62 | 
 63 |         prev_time = cur_time
 64 |         cur_time = time.time()
 65 |         logger.debug('After random sampling (took %.3f sec)', cur_time - prev_time)
 66 | 
 67 |         # Here we define similarity matrix, based on some measure of
 68 |         # similarity or kernel. Feel free to change
 69 | 
 70 |         dists = scipy.spatial.distance.cdist(data_remain, single_samples)
 71 | 
 72 |         prev_time = cur_time
 73 |         cur_time = time.time()
 74 |         logger.debug('After evaluating cdist (took %.3f sec)', cur_time - prev_time)
 75 | 
 76 |         # minimum distance from random samples
 77 |         min_dists = np.min(dists, axis=1)
 78 |         # median distance
 79 |         v = np.median(min_dists)
 80 | 
 81 |         # remove rows with distance <= median distance
 82 |         remove_idx = np.where(min_dists <= v)[0]
 83 | 
 84 |         # remove rows of remove_idx
 85 |         data_remain = np.delete(data_remain, remove_idx, 0)
 86 |         samples = np.vstack((samples, single_samples))
 87 |         logger.debug('Shape of the coreset samples so far (%d, %d)', samples.shape)
 88 |         logger.debug('Shape of the remaining samples (%d, %d)', data_remain.shape)
 89 | 
 90 |         prev_time = cur_time
 91 |         cur_time = time.time()
 92 |         logger.debug('End of iteration %d (took %.3f sec)', (num_iters, cur_time - prev_time))
 93 | 
 94 |         num_iters += 1
 95 |     # end of while loop
 96 | 
 97 |     logger.debug('Shape of the final remaining samples (%d, %d)', data_remain.shape)
 98 | 
 99 |     samples = np.vstack((samples, data_remain))
100 | 
101 |     logger.debug('Shape of the final coreset samples (%d, %d)', samples.shape)
102 | 
103 |     # now compute the weights of all the points, according to how close they
104 |     # are to the closest core-sample.
105 |     db_size = np.zeros(samples.shape[0])
106 |     min_dists = np.zeros(num_frames)
107 |     closest_sample_idx = np.zeros(num_frames)
108 |     for i in xrange(num_frames):
109 |         dists = scipy.spatial.distance.cdist(data[i:i+1, :], samples)
110 |         min_dist = np.min(dists)
111 |         min_idx = np.argmin(dists)
112 |         min_dists[i] = min_dist
113 |         closest_sample_idx[i] = min_idx
114 | 
115 |     for i in xrange(num_frames):
116 |         # for each datapoint, Ix[i] is the index of the coreset point
117 |         # it is assigned to.
118 |         db_size[closest_sample_idx[i]] += 1
119 | 
120 |     sq_sum_min_dists = (min_dists ** 2).sum()
121 |     m = np.zeros(num_frames)
122 |     for i in xrange(num_frames):
123 |         m[i] = np.ceil(5.0 / db_size[closest_sample_idx[i]] + (min_dists[i] ** 2) / sq_sum_min_dists)
124 | 
125 |     m_sum = m.sum()
126 |     cdf = (1.0 * m / m_sum).cumsum()
127 |     samples = np.zeros((coreset_size, num_vars))
128 |     weights = np.zeros(coreset_size)
129 | 
130 |     # Now, sample from the weighted points, to generate final corset
131 |     # and the corresponding weights
132 |     for i in xrange(coreset_size):
133 |         r = np.random.rand()
134 |         idx = (cdf <= r).sum()
135 |         samples[i, :] = data[idx, :]
136 |         weights[i] = m_sum / (coreset_size * m[idx])
137 | 
138 |     return samples, weights
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/alternates/em.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn import mixture, decomposition
  3 | from dpcluster import *
  4 | import matplotlib.pyplot as plt
  5 | import dtw
  6 | import autoregressive.distributions as di
  7 | 
  8 | """
  9 | Forward-backward algorithm 
 10 | """
 11 | class EMForwardBackward:
 12 | 
 13 | 	def __init__(self, n_components, n_iter = 20, pruning=0.8, verbose=True):
 14 | 		self.verbose = verbose
 15 | 		self.model = []
 16 | 		self.task_segmentation = []
 17 | 		self.segmentation = []
 18 | 		
 19 | 		#internal variables not for outside reference
 20 | 		self._demonstrations = []
 21 | 		self._demonstration_sizes = []
 22 | 		self._transitions = []
 23 | 		self._transition_states_scluster = []
 24 | 
 25 | 		self.n_components = n_components
 26 | 		self.n_iter = n_iter
 27 | 		self.pruning = pruning
 28 | 
 29 | 	def addDemonstration(self,demonstration):
 30 | 		demonstration = np.squeeze(np.array(demonstration))
 31 | 		demo_size = np.shape(demonstration)
 32 | 		
 33 | 		if self.verbose:
 34 | 			print "[EM] Adding a Demonstration of Size=", demo_size
 35 | 
 36 | 		self._demonstration_sizes.append(demo_size)
 37 | 
 38 | 		self._demonstrations.append(demonstration)
 39 | 
 40 | 
 41 | 
 42 | 	def fit(self):
 43 | 		data = np.squeeze(np.array(self._demonstrations[0])) 
 44 | 		obs_dim = data.shape[1]
 45 | 
 46 | 		#initializing A matrices randomly
 47 | 		transition_matrices = []
 48 | 		for i in range(0, self.n_components):
 49 | 			transition_matrices.append(np.random.randn(obs_dim,obs_dim))
 50 | 
 51 | 
 52 | 		#initializing covariance matrices as identity
 53 | 		covariance_matrices = []
 54 | 		for i in range(0, self.n_components):
 55 | 			covariance_matrices.append(np.eye(obs_dim))
 56 | 
 57 | 		#initializing likelihoods
 58 | 		lp = []
 59 | 
 60 | 		#actual em iterations
 61 | 		for iteration in range(0, self.n_iter):
 62 | 			
 63 | 			#calculate likelihoods and re-normalize
 64 | 
 65 | 			lpp = []
 66 | 			for i in range(0, len(self._demonstrations)):
 67 | 
 68 | 				demoassignment = []
 69 | 				for j in range(1, len(self._demonstrations[i])):
 70 | 					
 71 | 					newp = np.zeros((self.n_components,1))
 72 | 
 73 | 					for k in range(0, self.n_components):
 74 | 						xt = np.matrix(self._demonstrations[i][j-1])
 75 | 						xtt = np.matrix(self._demonstrations[i][j])
 76 | 						res = xtt.T - transition_matrices[k]*xt.T
 77 | 						ll = np.dot(np.dot(res.T, np.linalg.inv(covariance_matrices[k])), res)
 78 | 						scaling = 1.0/np.sqrt(np.linalg.det(covariance_matrices[k])*(2*np.pi)**obs_dim)
 79 | 
 80 | 						newp[k] = scaling*np.exp(-ll) 
 81 | 
 82 | 
 83 | 					newp = newp / np.sum(newp)
 84 | 
 85 | 					#print newp
 86 | 
 87 | 
 88 | 					demoassignment.append(newp)
 89 | 
 90 | 				lpp.append(demoassignment)
 91 | 
 92 | 			#print lpp, len(lpp[0])
 93 | 
 94 | 			#calculate transition and covariance matrices
 95 | 			new_transition_matrices = []
 96 | 			new_covariance_matrices = []
 97 | 			for k in range(0, self.n_components):
 98 | 
 99 | 				outer_product = np.zeros((2,2))
100 | 				cross_product = np.zeros((2,2))
101 | 
102 | 				#transitions
103 | 				for i in range(0, len(self._demonstrations)):
104 | 					for j in range(1, len(self._demonstrations[i])):
105 | 						xt = np.matrix(self._demonstrations[i][j-1])
106 | 						xtt = np.matrix(self._demonstrations[i][j])
107 | 						#print len(lpp), i, len(lpp[i]), j
108 | 						#print i,j, lpp[i][j-1][k]
109 | 						outer_product = outer_product + np.squeeze(lpp[i][j-1][k]) * (xt.T * xt)
110 | 						cross_product = cross_product + np.squeeze(lpp[i][j-1][k]) * (xt.T * xtt)
111 | 
112 | 				#for numerical instability
113 | 				A = (np.linalg.inv(outer_product)*cross_product)
114 | 
115 | 				new_transition_matrices.append(A)
116 | 
117 | 
118 | 				outer_product = np.zeros((2,2))
119 | 				normalization = 0
120 | 
121 | 				#covariances
122 | 				for i in range(0, len(self._demonstrations)):
123 | 					for j in range(1, len(self._demonstrations[i])):
124 | 						xt = np.matrix(self._demonstrations[i][j-1])
125 | 						xtt = np.matrix(self._demonstrations[i][j])
126 | 						res = xtt.T - np.dot(A, xt.T)
127 | 						outer_product = outer_product + np.squeeze(lpp[i][j-1][k])*(res * res.T)
128 | 						normalization = np.squeeze(lpp[i][j-1][k]) + normalization
129 | 
130 | 				
131 | 				new_covariance_matrices.append(outer_product/normalization)
132 | 
133 | 			print "[EM] Iteration", iteration
134 | 			transition_matrices  = new_transition_matrices
135 | 			covariance_matrices = new_covariance_matrices
136 | 			lp = lpp
137 | 
138 | 
139 | 		#print len(lp), len(lp[0])
140 | 		self.findTransitions(lp)
141 | 		self.clusterInState()
142 | 		self.pruneClusters()
143 | 		self.clusterInTime()
144 | 		self.taskToTrajectory()
145 | 
146 | 
147 | 	def findTransitions(self, lp):
148 | 		transitions = []
149 | 
150 | 		for i in range(0, len(self._demonstrations)):
151 | 
152 | 				assignment_index = []
153 | 
154 | 				for j in range(1, len(self._demonstrations[i])):
155 | 					vp = np.argmax(lp[i][j-1])
156 | 					assignment_index.append(vp)
157 | 
158 | 				assignment_index = self.smoothing(assignment_index)
159 | 					
160 | 				for j in range(1, len(assignment_index)):
161 | 					if assignment_index[j] != assignment_index[j-1]:
162 | 						transitions.append((i,j))
163 | 
164 | 		self._transitions = transitions
165 | 
166 | 		return self._transitions
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 	"""
174 | 		@@Taken from TSC code
175 | 	"""
176 | 
177 | 
178 | 	"""
179 | 	This applies smoothing to the indices to make sure
180 | 	rapid changes are discouraged
181 | 	"""
182 | 	def smoothing(self, indices):
183 | 		newIndices = indices
184 | 		for i in range(1,len(indices)-1):
185 | 			if indices[i] != indices[i-1] and indices[i] != indices[i+1] and indices[i+1] == indices[i-1]:
186 | 			   newIndices[i] = indices[i+1]
187 | 
188 | 			   if self.verbose:
189 | 			   	print "[EM] Smoothed out index=",i
190 | 
191 | 		return newIndices
192 | 
193 | 	
194 | 	"""
195 | 	This prunes transitions to a specified threshold
196 | 	"""
197 | 	def pruneClusters(self):
198 | 		distinct_clusters = set([c[2] for c in self._transition_states_scluster])
199 | 		N = len(self._demonstration_sizes)
200 | 		new_transitions = []
201 | 		for c in distinct_clusters:
202 | 			tD = set([d[0] for d in self._transition_states_scluster if d[2] == c])
203 | 			tS = [d for d in self._transition_states_scluster if d[2] == c]
204 | 			if (len(tD) +0.0)/N > self.pruning:
205 | 				new_transitions.extend(tS)
206 | 
207 | 		if self.verbose:
208 | 			print "[TSC] Transitions Before Pruning=", self._transition_states_scluster, "After=",new_transitions
209 | 
210 | 		self._transition_states_scluster = new_transitions
211 | 
212 | 
213 | 
214 | 	"""
215 | 	Takes the task segmentation and returns a trajectory
216 | 	segmentation. For conditioning reasons this doesn't 
217 | 	use DP-GMM but finds all clusters of size segmentl (automatically set)
218 | 	"""
219 | 	def taskToTrajectory(self):
220 | 		N = len(self._demonstration_sizes)
221 | 		for i in range(0,N):
222 | 			tSD = [(k[2],k[3],k[1]) for k in self.task_segmentation if k[0] == i]
223 | 			
224 | 			timeDict = {}
225 | 			for t in tSD:
226 | 				key = (t[0], t[1])
227 | 				if  key in timeDict:
228 | 					timeDict[key].append(t[2])
229 | 				else:
230 | 					timeDict[key] = [t[2]]
231 | 			
232 | 			print timeDict
233 | 
234 | 			tseg = [np.median(timeDict[k]) for k in timeDict]
235 | 			tseg.append(0)
236 | 			tseg.append(self._demonstration_sizes[i][0])
237 | 			self.segmentation.append(tseg)
238 | 
239 | 	"""
240 | 	Runs multiple runs of DPGMM takes the best clustering
241 | 	"""
242 | 	def DPGMM(self,data, dimensionality, p=0.9, k=1):
243 | 		runlist = []
244 | 		for i in range(0,k):
245 | 			runlist.append(self.DPGMM_Helper(data,dimensionality,p))
246 | 		runlist.sort()
247 | 
248 | 		print runlist
249 | 
250 | 		#return best
251 | 		return runlist[-1][1]
252 | 
253 | 	"""
254 | 	Uses Teodor's code to do DP GMM clustering
255 | 	"""
256 | 	def DPGMM_Helper(self,data, dimensionality, p=0.9):
257 | 		vdp = VDP(GaussianNIW(dimensionality))
258 | 		vdp.batch_learn(vdp.distr.sufficient_stats(data))		
259 | 		likelihoods = vdp.pseudo_resp(np.ascontiguousarray(data))[0]
260 | 
261 | 		real_clusters = 1
262 | 		cluster_s = vdp.cluster_sizes()
263 | 		total = np.sum(cluster_s)
264 | 		running_total = cluster_s[0]
265 | 		for i in range(1,len(vdp.cluster_sizes())):
266 | 			running_total = running_total + cluster_s[i]
267 | 			real_clusters = i + 1
268 | 			if running_total/total > p:
269 | 				break
270 | 
271 | 		return (-np.sum(vdp.al), [np.argmax(l[0:real_clusters]) for l in likelihoods])
272 | 
273 | 	"""
274 | 	This function applies the state clustering
275 | 	"""
276 | 	def clusterInState(self):
277 | 		tsN = len(self._transitions)
278 | 		p = self._demonstration_sizes[0][1]
279 | 		ts_data_array = np.zeros((tsN,p))
280 | 
281 | 		for i in range(0, tsN):
282 | 			ts = self._transitions[i]
283 | 			ts_data_array[i,:] = self._demonstrations[ts[0]][ts[1],:]
284 | 
285 | 
286 | 		#Apply the DP-GMM to find the state clusters
287 | 		indices = self.DPGMM(ts_data_array,p)
288 | 		indicesDict = list(set(indices))
289 | 
290 | 		self._transition_states_scluster = []
291 | 		self._distinct_state_clusters = 0
292 | 		
293 | 		if self.verbose:
294 | 			print "[EM] Removing previously learned state clusters "
295 | 
296 | 		#encode the first layer of clustering:
297 | 		for i in range(0,tsN):
298 | 			label = indicesDict.index(indices[i])
299 | 			tstuple = (self._transitions[i][0], self._transitions[i][1], label)
300 | 			self._transition_states_scluster.append(tstuple)
301 | 
302 | 		self._distinct_state_clusters = len(list(set(indices)))
303 | 		#print self._distinct_state_clusters
304 | 
305 | 		if self.verbose:
306 | 			print "[EM] Discovered State Clusters (demoid, time, statecluster): ", self._transition_states_scluster
307 | 
308 | 	"""
309 | 	This function applies the time sub-clustering
310 | 	"""
311 | 	def clusterInTime(self):
312 | 		p = self._demonstration_sizes[0][1]
313 | 
314 | 		unorderedmodel = []
315 | 
316 | 		for i in range(0,self._distinct_state_clusters):
317 | 			tsI = [s for s in self._transition_states_scluster if s[2]==i]
318 | 			ts_data_array = np.zeros((len(tsI),p))
319 | 			t_data_array = np.zeros((len(tsI),2))
320 | 			
321 | 			for j in range(0, len(tsI)):
322 | 				ts = tsI[j]
323 | 				ts_data_array[j,:] = self._demonstrations[ts[0]][ts[1],:]
324 | 
325 | 				t_data_array[j,0] = ts[1] + np.random.randn(1,1) #do this to avoid conditioning problems
326 | 				t_data_array[j,1] = ts[1] + np.random.randn(1,1) #do this to avoid conditioning problems
327 | 
328 | 			if len(tsI) == 0:
329 | 				continue
330 | 
331 | 			#Since there is only one state-cluster use a GMM
332 | 			mm  = mixture.GMM(n_components=1)
333 | 			mm.fit(ts_data_array)
334 | 
335 | 
336 | 			#subcluster in time
337 | 			indices = self.DPGMM(t_data_array,2,0.9)
338 | 			#print t_data_array, indices
339 | 			indicesDict = list(set(indices))
340 | 
341 | 			#finish off by storing two values the task segmentation	
342 | 			for j in range(0, len(tsI)):
343 | 				dd = set([tsI[n][0] for (n, ind) in enumerate(indices) if ind == indices[j]])
344 | 				
345 | 				#time pruning condition
346 | 				if (len(dd) + 0.0)/len(self._demonstration_sizes) < self.pruning:
347 | 					continue
348 | 
349 | 				self.task_segmentation.append((tsI[j][0],
350 | 										  	   tsI[j][1],
351 | 										       tsI[j][2],
352 | 										       indicesDict.index(indices[j])))
353 | 
354 | 			#GMM model
355 | 			unorderedmodel.append((np.median(t_data_array),mm))
356 | 
357 | 		unorderedmodel.sort()
358 | 		self.model = [u[1] for u in unorderedmodel]
359 | 
360 | 		if self.verbose:
361 | 			print "[TSC] Learned The Following Model: ", self.model
362 | 
363 | 
364 | 	#does the compaction
365 | 	def compaction(self,delta=-1):
366 | 		for i in range(0, len(self._demonstrations)):
367 | 			segs = self.segmentation[i]
368 | 			segs.sort()
369 | 			d = self._demonstrations[i]
370 | 
371 | 			prev = None
372 | 			removal_vals = []
373 | 
374 | 			for j in range(0,len(segs)-1):
375 | 				cur = d[segs[j]:segs[j+1],:]
376 | 
377 | 				if prev != None and len(cur) > 0 and len(prev) > 0:
378 | 					dist, cost, acc, path = dtw.dtw(cur, prev, dist=lambda x, y: np.linalg.norm(x - y, ord=2))
379 | 					cmetric = dist/len(path)
380 | 					if cmetric < delta:
381 | 						removal_vals.append(segs[j+1]) 
382 | 
383 | 						if self.verbose:
384 | 							print "[TSC] Compacting ", segs[j], segs[j+1]
385 | 
386 | 				prev = cur
387 | 
388 | 			self.segmentation[i] = [s for s in self.segmentation[i] if s not in removal_vals]
389 | 
390 | 


--------------------------------------------------------------------------------
/evaluation/Evaluator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The evaluator module runs multiple runs of 
  3 | an algorithm on different instances of the
  4 | same task.
  5 | """
  6 | import copy
  7 | import numpy as np
  8 | from generate.TrajectoryDataGenerator import *
  9 | import datetime
 10 | 
 11 | #system is a demonstration system
 12 | #algorithm is 
 13 | #metric a function seq x seq -> R
 14 | #K is the number of demonstrations
 15 | #also takes in an algorithm parameters
 16 | def run_1_time(system, 
 17 | 				algorithm,
 18 | 				initalcond,
 19 | 				metric, 
 20 | 				k=20,
 21 | 				lm=0, 
 22 | 				dp=0):
 23 | 
 24 | 	timestart = datetime.datetime.now()
 25 | 
 26 | 	a = copy.deepcopy(algorithm)
 27 | 		
 28 | 	gtlist = []
 29 | 	for j in range(0,k):
 30 | 		t = sampleDemonstrationFromSystem(system,initalcond,lm=lm, dp=dp)
 31 | 		a.addDemonstration(np.squeeze(t[0]))
 32 | 		gtlist.append(t[1])
 33 | 
 34 | 	a.fit()
 35 | 	result = []
 36 | 	for j in range(0,k):
 37 | 		a.segmentation[j].sort()
 38 | 		result.append(metric(a.segmentation[j], gtlist[j]))
 39 | 
 40 | 	print "Run Time",k, algorithm.__class__.__name__, datetime.datetime.now() - timestart
 41 | 
 42 | 	return (np.mean(result), np.var(result))
 43 | 
 44 | #on a list of systems it applies
 45 | #a list of algorithms
 46 | def run_k_system(system_list, 
 47 | 					 algorithm_list,
 48 | 					 initalcond,
 49 | 					 metric, 
 50 | 					 k=20,
 51 | 					 lm=0, 
 52 | 					 dp=0):
 53 | 
 54 | 	result = np.zeros((len(system_list),len(algorithm_list)))
 55 | 	for i,sys in enumerate(system_list):
 56 | 		for j,a in enumerate(algorithm_list):
 57 | 			result[i,j] = run_1_time(sys,a,initalcond,metric,k,lm,dp)[0]
 58 | 
 59 | 	return result
 60 | 
 61 | #runs N trials of the same system specs
 62 | def run_comparison_experiment(params,
 63 | 							  algorithm_list,
 64 | 					 		  initalcond,
 65 | 					 		  metric,
 66 | 					 		  N=5, 
 67 | 					 		  k=20,
 68 | 					 		  lm=0, 
 69 | 					 		  dp=0):
 70 | 	system_list = []
 71 | 	for i in range(0, N):
 72 | 		system_list.append(createNewDemonstrationSystem(k=params['k'],
 73 | 									   dims=params['dims'], 
 74 | 									   observation=params['observation'], 
 75 | 									   resonance=params['resonance'], 
 76 | 									   drift=params['drift']))
 77 | 
 78 | 	return run_k_system(system_list, 
 79 | 						algorithm_list, 
 80 | 						initalcond, 
 81 | 						metric, 
 82 | 						k=k,
 83 | 					 	lm=0, 
 84 | 					 	dp=0)
 85 | 
 86 | #can only sweep noise right now
 87 | def run_sweep_experiment(base_params,
 88 | 						 sweep_param,
 89 | 						 sweep_param_list,
 90 | 						 algorithm_list,
 91 | 						 initalcond,
 92 | 					 	 metric,
 93 | 					 	 N=5, 
 94 | 					 	 k=20,
 95 | 					 	 lm=0, 
 96 | 					 	 dp=0):
 97 | 	X = []
 98 | 	Y = []
 99 | 
100 | 	for s in sweep_param_list:
101 | 		X.append(s)
102 | 		params = copy.deepcopy(base_params)
103 | 		params[sweep_param] = [params[sweep_param][0], s]
104 | 		result = run_comparison_experiment(params,
105 | 										  algorithm_list,
106 | 										  initalcond,
107 | 					 	 				  metric,
108 | 					 	 				  N, k,lm, dp)
109 | 		print "Result vector", np.mean(result,axis=0)
110 | 		
111 | 		a = np.mean(result,axis=0)
112 | 		
113 | 		Y.append(a)
114 | 
115 | 	return (X,Y)
116 | 
117 | #plots the data structure that comes out of the parameter
118 | #sweep
119 | def plotY1Y2(points_tuple,
120 |              title,
121 |              xaxis,
122 |              yaxis,
123 |              legend=[],
124 |              loc = 'upper right',
125 |              filename="output.png",
126 |              ylim=0,
127 |              xlim=0):
128 | 
129 | 	import matplotlib.pyplot as plt
130 | 	from matplotlib import font_manager, rcParams
131 | 	rcParams.update({'figure.autolayout': True})
132 | 	rcParams.update({'font.size': 18})
133 | 	fprop = font_manager.FontProperties(fname='/Library/Fonts/Microsoft/Gill Sans MT.ttf') 
134 | 
135 | 	plt.figure() 
136 | 	colors = ['#00ff99','#0099ff','#ffcc00','#ff5050','#9900cc','#5050ff','#99cccc','#0de4f6']
137 | 	shape = ['s-', 'o-', '^-', 'v-', 'x-', 'h-']
138 | 
139 | 	X = points_tuple[0]
140 | 	Y = points_tuple[1]
141 | 	num_algos = len(Y[0])
142 | 
143 | 	for i in range(0, num_algos):
144 | 		ya = [j[i] for j in Y]
145 | 		plt.plot(X, ya, shape[i], linewidth=2.5,markersize=7,color=colors[i])
146 | 
147 | 	plt.legend(legend,loc=loc)
148 | 	plt.title(title)
149 | 	plt.xlabel(xaxis,fontproperties=fprop)
150 | 	plt.ylabel(yaxis,fontproperties=fprop)
151 | 	plt.ylim(ymin=ylim) 
152 | 	plt.xlim(xmin=xlim, xmax=X[len(X)-1])
153 | 	plt.grid(True)
154 | 	plt.savefig(filename,bbox_inches='tight')
155 | 


--------------------------------------------------------------------------------
/evaluation/Metrics.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements metrics for 
  3 | the evaluation algorithms.
  4 | """
  5 | import numpy as np
  6 | 
  7 | def nearest_neighbor_match(seq1, seq2):
  8 | 
  9 | 	intersection_list = []
 10 | 
 11 | 	for s1 in seq1:
 12 | 
 13 | 		mine = None
 14 | 		minv = None
 15 | 
 16 | 		for s2 in seq2: 
 17 | 			if minv == None or abs(s1-s2) < minv:
 18 | 				minv = abs(s1-s2)
 19 | 				mine = s2
 20 | 
 21 | 		intersection_list.append((s1,mine))
 22 | 
 23 | 	return intersection_list
 24 | 
 25 | def jaccard(seq1, seq2, tol=5):
 26 | 	#remove the edge effects
 27 | 	seq1 = [s for s in seq1 if abs(s) > 5 and abs(s-np.max(seq1)) > 5]
 28 | 	#seq1[1:len(seq1)-1]
 29 | 	seq2 = [s for s in seq2 if abs(s) > 5 and abs(s-np.max(seq2)) > 5]
 30 | 
 31 | 	if len(seq1) == 0 or len(seq2) == 0:
 32 | 		return 0
 33 | 
 34 | 	intersection_list = nearest_neighbor_match(seq1, seq2)
 35 | 	inter = len([t for t in intersection_list if abs(t[0]-t[1]) <= tol])
 36 | 	union = len(seq2) + len(seq1) - inter
 37 | 	return float(inter)/union
 38 | 
 39 | 
 40 | def segment_precision_recall (seg1,seg2):
 41 |     """
 42 |     s1: prediction -- should be [start, end]
 43 |     s2: ground truth subsegment- [start, end]
 44 |     """
 45 |     if len(seg1)>2 or len(seg2)>2:
 46 |         print "Incorrect input to segment_precision_recall"
 47 |         return
 48 | 
 49 |     #print seg1, seg2
 50 | 
 51 |     len_ref = seg2[1] - seg2[0] #first point in the sequence not included
 52 |     len_pred = seg1[1] - seg1[0]
 53 |     int_start = max(seg1[0],seg2[0])
 54 |     int_end = min(seg1[1],seg2[1])
 55 | 
 56 |     #True positives = length of intersection
 57 |     TP = float(max( int_end - int_start  , 0 ))
 58 | 
 59 |     # recall = true_positive/ condition_positive
 60 |     if len_ref >0:        
 61 |         recall = TP/len_ref
 62 |     else:
 63 |         recall = 0
 64 |     # precision = true_positive / test_outcome_positive
 65 |     if len_pred>0:
 66 |         precision =  TP/len_pred
 67 |     else:
 68 |         precision =0
 69 | 
 70 |     return precision, recall
 71 | 
 72 | def inter_over_union (seg1,seg2):
 73 |     """
 74 |     s1: prediction -- should be [start, end]
 75 |     s2: ground truth subsegment- [start, end]
 76 |     """
 77 |     if len(seg1)>2 or len(seg2)>2:
 78 |         print "Incorrect input to inter_over_union"
 79 |         return
 80 | 
 81 |     inter_start = max(seg1[0],seg2[0])
 82 |     inter_end = min(seg1[1],seg2[1])
 83 |     inter_len = float(max( inter_end - inter_start  , 0 ))
 84 | 
 85 |     # Union = setA + setB - (setA intersect setB)
 86 |     union_len = (seg2[1] - seg2[0]) + (seg1[1] - seg1[0]) - inter_len
 87 | 
 88 |     return inter_len/union_len
 89 | 
 90 | def f1_score (seg1,seg2):
 91 |     """
 92 |     s1: prediction -- should be [start, end]
 93 |     s2: ground truth subsegment- [start, end]
 94 |     """
 95 |     if len(seg1)>2 or len(seg2)>2:
 96 |         print "Incorrect input to f1_score"
 97 |         return
 98 |     
 99 |     p, r = segment_precision_recall (seg1,seg2)
100 | 
101 |     # f1-score = harmonic mean of precision and recall
102 |     return 2*p*r / (p+r)
103 |     
104 | 
105 | def segment_correspondence (seq1,seq2, similarity_measure = "recall"):
106 |     """
107 |     seq1: prediction 
108 |     seq2: ground truth subsegment
109 |     """
110 | 
111 |     wt_mat = np.zeros((len(seq1)-1,len(seq2)-1))
112 | 
113 |     #populate the pairwise weights
114 |     for i in range(len(seq1)-1):
115 |         s1 = [seq1[i], seq1[i+1] ]
116 | 
117 |         for j in range(len(seq2)-1):
118 |             s2 = [seq2[j], seq2[j+1] ]
119 |             
120 |             if similarity_measure == "recall":
121 |                 p, score = segment_precision_recall (s1,s2)
122 | 
123 |             elif similarity_measure == "f1_score":
124 |                 score = f1_score (s1,s2)                
125 | 
126 |             elif similarity_measure == "IOU":
127 |                 score = inter_over_union (s1,s2)                
128 | 
129 |             else:
130 |                 print "Need a valid similarity_measure for segment_correspondence"
131 |                 break     
132 |             
133 |             # print i,j, s1, s2 #debug            
134 |             wt_mat[i,j] = score
135 | 
136 | 
137 |     # association of each predicted segment with max wt in the row
138 |     max_ind = np.argmax(wt_mat, axis=1)
139 | 
140 |     return max_ind, wt_mat
141 | 
142 | 
143 | def frame_acc (seq1, seq2, similarity_measure='recall'):
144 |     """
145 |     Frame wise accuracy
146 |     seq1: predicted sequence -- algorithm output
147 |     seq2: reference -- ground truth    
148 |     """
149 |     max_ind, wt_mat = segment_correspondence (seq1,seq2, similarity_measure)
150 |     acc_score = 0.0
151 |     len_pred = seq1[-1] - seq1[0]
152 | 
153 |     for k1 in range(len(seq1)-1):
154 |         for i in range (int(round(seq1[k1])), int(round(seq1[k1+1]))):
155 |             start = seq2[max_ind[k1]]
156 |             end = seq2 [max_ind[k1] + 1]
157 |             
158 |             #print i, seq1[k1], max_ind[k1], start, end, acc_score
159 |             if i >= start and i< end:
160 |                 #correctly matched
161 |                 acc_score = acc_score +1
162 | 
163 |     # return frame wise accuracy aggregated for the full sequence
164 |     return acc_score/len_pred
165 | 
166 | def seg_acc (seq1, seq2, thresh = 0.4, similarity_measure = "recall"):
167 |     """
168 |     Segmentation Accuracy
169 |     
170 |     Defined as the ratio of the ground - truth segments that are correctly detected
171 |     A GT segment is said to be detected if there exists a predicted segment with an overlap 
172 |     greater than thresh. Overlap is defined as Inter-over-union
173 | 
174 |     Reference: http://watchnpatch.cs.cornell.edu/paper/watchnpatch_cvpr15.pdf (sec#6.3)    
175 |     """
176 |     #get correspondence
177 |     max_ind, wt_mat = segment_correspondence (seq1,seq2, similarity_measure)
178 |     
179 |     print max_ind
180 |     num_gt_segments = len(seq2) - 1
181 |     acc_score = np.zeros(num_gt_segments,)
182 | 
183 |     for k2 in range(num_gt_segments):
184 |         # only the ones associated with this ground truth segment
185 |         associated_predictions = [i for i, k in enumerate(max_ind) if k == k2]
186 |         for k1 in associated_predictions:
187 |             s1 = [seq1[k1], seq1[k1+1]]
188 |             s2 = [seq2[k2], seq2[k2+1]]  
189 |             #s2 = [ seq2[max_ind[k1]], seq2[max_ind[k1]+1] ]
190 |             score = inter_over_union (s1, s2)
191 |             
192 |             print s1, s2, score
193 |             #condition if the GT segment k2 is covered by any of predicted segments
194 |             if score >= max(thresh, acc_score[k2]) :
195 |                 acc_score[k2] = score
196 | 
197 |     #return ratio of GT segments covered
198 |     #return sum(acc_score)/num_gt_segments
199 |     return float(np.count_nonzero(acc_score))/num_gt_segments
200 | 
201 | 
202 | def evaluate(seq1, seq2, method='jaccard', **options):
203 |     """
204 |     generic evaluation call
205 |     seq1: predicted sequence -- algorithm output
206 |      seq2: reference -- ground truth	
207 |     method: which method to use. defaults to Jaccard (intersection over union)
208 |     """
209 |     if method == 'jaccard':
210 |         if 'tol' in options.keys():
211 |             return jaccard(seq1, seq2, tol=options['tol'])
212 |         else:
213 |             return jaccard(seq1, seq2)
214 | 
215 |     if method == 'frame_acc':
216 |         if 'similarity_measure' in options.keys():
217 |             return frame_acc(seq1, seq2, similarity_measure = options['similarity_measure'])
218 |         else:
219 |             return frame_acc(seq1, seq2)
220 | 
221 |     if method == 'seg_acc':
222 | 
223 |         if 'thresh' in options.keys():
224 |                 thresh = options['thresh']
225 |         else:
226 |                 thresh = 0.4
227 | 
228 |         if 'similarity_measure' in options.keys():
229 |             return frame_acc(seq1, seq2, similarity_measure = options['similarity_measure'], thresh = thresh)
230 |         else:
231 |             return seg_acc(seq1, seq2, thresh = thresh)
232 | 
233 | # ToDO: Implement this for later
234 | def DTW(c1,c2,dist=lambda x,y:abs(x-y)):
235 |     """
236 |     Segmentwise DTW similarity
237 |     """
238 |     pass
239 | 
240 | def edit_distance(c1,c2,match=lambda x,y:x==y):
241 |     """
242 |     ToDO: implement edit distance based on segment level DTW
243 |     """
244 |     pass


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BerkeleyAutomation/tsc/6d3b1fb4a2ee3c33ae7ed695b6197cdfa99cfc4a/evaluation/__init__.py


--------------------------------------------------------------------------------
/generate/SequenceDataGenerator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy
 3 | from scipy import linalg
 4 | 
 5 | """
 6 | This module provides some primitives for
 7 | generating random sequences of data.
 8 | """
 9 | 
10 | #Loops are drawn from a poisson distribution
11 | #with specified mean (lm) elements are dropped
12 | #wp dp
13 | def generateRandomSequence(k=5, lm=0, dp=0):
14 | 	seq = []
15 | 	for i in range(0,k):
16 | 		if np.random.rand(1,1) > dp:
17 | 			loops = np.random.poisson(lam=lm) + 1
18 | 			for j in range(0, loops):
19 | 				seq.append(i)
20 | 	return seq
21 | 
22 | #Generate targets for the specified state-space
23 | def generateTargetStates(k=5, dims=1, bounds=[-10,10]):
24 | 	return np.random.rand(k,dims)*(bounds[1]-bounds[0])+bounds[0]*np.ones((k,dims))
25 | 
26 | #Generates a dynamical system with the specified noise properties
27 | #selects uniformly at random from the specs
28 | def generateSystemSpecs(k = 5,
29 | 						drift=[0,0], 
30 | 						resonance=[0,0], 
31 | 						observation=[0,0]):
32 | 	specs = []
33 | 	for i in range(0,k):
34 | 		a = np.random.rand()*drift[1] + drift[0]
35 | 		b = np.random.rand()*resonance[1] + resonance[0]
36 | 		c = np.random.rand()*observation[1] + observation[0]
37 | 		specs.append((a,b,c))
38 | 	return specs
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/generate/TrajectoryDataGenerator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | from scipy import linalg
  4 | from scipy.misc import comb
  5 | from SequenceDataGenerator import *
  6 | 
  7 | """
  8 | This module provides the functions
  9 | to generate trajectory data of the 
 10 | specified dimensionality 
 11 | and characteristcs
 12 | """
 13 | 
 14 | #generates a new system that we can sample demonstrations from
 15 | def createNewDemonstrationSystem(k=5, 
 16 | 								 dims=1, 
 17 | 								 bounds=[-10,10],
 18 | 								 drift=[0,0], 
 19 | 								 resonance=[0,0], 
 20 | 								 observation=[0,0]):
 21 | 
 22 | 	#np.random.seed(0)
 23 | 	targets = generateTargetStates(k, dims, bounds)
 24 | 	print targets
 25 | 	system =  generateSystemSpecs(k,drift,resonance,observation) 
 26 | 	
 27 | 	return {'targets': targets,
 28 | 			'system': system,
 29 | 		    'bounds': bounds, 
 30 | 		    'segments':k, 
 31 | 		    'dimensionality':dims}
 32 | 
 33 | def bernstein_poly(i, n, t):
 34 |     """
 35 |      The Bernstein polynomial of n, i as a function of t
 36 |     """
 37 | 
 38 |     return comb(n, i) * ( t**(n-i) ) * (1 - t)**i
 39 | 
 40 | 
 41 | def bezier_curve(data, nTimes=50):
 42 |     points = [(i[0,0],i[1,0]) for i in data]
 43 |     nPoints = len(points)
 44 |     xPoints = np.array([p[0] for p in points])
 45 |     yPoints = np.array([p[1] for p in points])
 46 | 
 47 |     t = np.linspace(0.0, 1.0, nTimes)
 48 | 
 49 |     polynomial_array = np.array([ bernstein_poly(i, nPoints-1, t) for i in range(0, nPoints)   ])
 50 | 
 51 |     xvals = np.dot(xPoints, polynomial_array)
 52 |     yvals = np.dot(yPoints, polynomial_array)
 53 | 
 54 |     result = []
 55 |     for i in range(0,nTimes):
 56 |     	pt = np.zeros((2,1))
 57 |     	pt[0] = xvals[i]
 58 |     	pt[1] = yvals[i]
 59 |     	result.append(pt)
 60 | 
 61 |     return result
 62 | 
 63 | #from the start position runs the system til it reached end
 64 | #fits a bezier curve with the control points to avoid discretization
 65 | #effects
 66 | def interpolateToTarget(start, 
 67 | 					    end,
 68 | 					    specs,  
 69 | 					    maxiter=100):
 70 | 	cur = start
 71 | 	traj = []
 72 | 	i = 0
 73 | 
 74 | 	#run until less than tol or number of iterations exceeded
 75 | 	traj.append(start)
 76 | 
 77 | 	while i < maxiter:
 78 | 		traj.append(cur + np.random.randn(2,1)*specs[1])
 79 | 		cur = cur + (np.transpose(end)-start)/maxiter
 80 | 		i = i + 1
 81 | 
 82 | 	traj.append(np.transpose(end)+ np.random.randn(2,1)*specs[0])
 83 | 
 84 | 	interpolated = reversed(bezier_curve(traj))
 85 | 
 86 | 	return [t + np.random.randn(2,1)*specs[2] for t in interpolated ]
 87 | 
 88 | #sample from the system
 89 | #lm is the amount of looping
 90 | #dp is the amount of dropping
 91 | def sampleDemonstrationFromSystem(sys,start,lm=0, dp=0):
 92 | 	seq = generateRandomSequence(sys['segments'],lm,dp)
 93 | 	traj = [start]
 94 | 	prev = -1
 95 | 	prevStart = start
 96 | 
 97 | 	for j in seq:
 98 | 
 99 | 		#code for looping, go back to the previous start
100 | 		if prev == j:
101 | 			s = traj[-1]
102 | 			newSeg = interpolateToTarget(s,
103 | 									     prevStart,
104 | 									     sys['system'][j])
105 | 			traj.extend(newSeg)
106 | 		else:
107 | 			prevStart = np.transpose(traj[-1])
108 | 		
109 | 		#normal execution
110 | 		s = traj[-1]
111 | 		newSeg = interpolateToTarget(s,
112 | 									 np.matrix(sys['targets'][j,:]),
113 | 									 sys['system'][j])
114 | 		traj.extend(newSeg)
115 | 
116 | 		prev = j
117 | 
118 | 	return (traj, seqToGroundTruth(seq))
119 | 
120 | """
121 | The ground truth time sequence
122 | """
123 | def seqToGroundTruth(seq, nsteps=50):
124 | 	gt = [0]
125 | 	prev = -1
126 | 	for i in seq:
127 | 		if prev == i:
128 | 			gt.append(gt[-1]+2*nsteps)
129 | 		else:
130 | 			gt.append(gt[-1]+nsteps)
131 | 
132 | 		prev = i
133 | 
134 | 	return gt
135 | 
136 | ###Plot sample
137 | def plotData(traj):
138 | 	import matplotlib.pyplot as plt
139 | 	X = [t[0] for t in traj]
140 | 	Y = [t[1] for t in traj]
141 | 	plt.plot(X, Y, 'ro-')
142 | 	plt.show()
143 | 
144 | 


--------------------------------------------------------------------------------
/generate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BerkeleyAutomation/tsc/6d3b1fb4a2ee3c33ae7ed695b6197cdfa99cfc4a/generate/__init__.py


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | np.seterr(divide='ignore') # these warnings are usually harmless for this code
  3 | from matplotlib import pyplot as plt
  4 | import copy, os
  5 | from generate.TrajectoryDataGenerator import *
  6 | from tsc.tsc import TransitionStateClustering
  7 | from alternates.clustering import TimeVaryingGaussianMixtureModel, HMMGaussianMixtureModel, CoresetSegmentation
  8 | from alternates.bayes import HiddenSemiMarkovModel, AutoregressiveMarkovModel
  9 | from alternates.em import EMForwardBackward
 10 | from evaluation.Evaluator import *
 11 | from evaluation.Metrics import *
 12 | 
 13 | #creates a system whose regimes are uniformly sampled from the stochastic params
 14 | 
 15 | """
 16 | sys = createNewDemonstrationSystem(k=3,dims=2, observation=[0.1,0.1], resonance=[0.0,0.0], drift=[0.0,0.0])
 17 | t = sampleDemonstrationFromSystem(sys,np.ones((2,1)),lm=0.0, dp=0.0)
 18 | t2 = sampleDemonstrationFromSystem(sys,np.ones((2,1)),lm=0.0, dp=0.0)
 19 | t3 = sampleDemonstrationFromSystem(sys,np.ones((2,1)),lm=0.0, dp=0.0)
 20 | 
 21 | 
 22 | a.addDemonstration(t[0])
 23 | a.addDemonstration(t2[0])
 24 | a.addDemonstration(t3[0])
 25 | a.fit()
 26 | print a.segmentation
 27 | """
 28 | 
 29 | sys_params = {'k':3,'dims':2, 'observation':[0.0,0.1], 'resonance':[0.0,0.0], 'drift':[0,0.0]}
 30 | 
 31 | a = TransitionStateClustering(window_size=3, normalize=False, pruning=0.3,delta=-1)
 32 | #b = TimeVaryingGaussianMixtureModel(hard_param=3)
 33 | #c = HMMGaussianMixtureModel(n_components=3)
 34 | #d = CoresetSegmentation(n_components=4)
 35 | b = EMForwardBackward(n_components=3)
 36 | #f = AutoregressiveMarkovModel()
 37 | 
 38 | 
 39 | """
 40 | plotY1Y2(run_sweep_experiment(sys_params, 'observation', [0.1, 0.25, 0.5, 0.75, 1], [a, b], np.ones((2,1)), lambda x,y: evaluate(x,y,'jaccard', thresh=0.75), N=10, k=10),
 41 |              "(A) HF Observation Noise",
 42 |              "HF Noise",
 43 |              "Segment Accuracy",
 44 |              legend=["GMM-TSC", "EM-TSC"],
 45 |              loc = 'lower left',
 46 |              filename="output1.png",
 47 |              ylim=0.0,
 48 |              xlim=0.1)
 49 | """
 50 | 
 51 | 
 52 | plotY1Y2(run_sweep_experiment(sys_params, 'resonance', [0.1, 0.5, 1.0, 1.5, 2], [a, b], np.ones((2,1)), lambda x,y: evaluate(x,y,'jaccard', thresh=0.75), N=10, k=10),
 53 |              "(B) LF Observation Noise",
 54 |              "LF Noise",
 55 |              "Segment Accuracy",
 56 |              legend=["GMM-TSC", "EM-TSC"],
 57 |              loc = 'lower left',
 58 |              filename="output2.png",
 59 |              ylim=0.0,
 60 |              xlim=0.1)
 61 | 
 62 | """
 63 | plotY1Y2(run_sweep_experiment(sys_params, 'resonance', [0.1, 0.25, 0.5, 0.75, 1], [a,b,c,d,e,f], np.ones((2,1)), lambda x,y: evaluate(x,y,'seg_acc', thresh=0.75), N=2, k=10),
 64 |              "(B) LF Process Noise",
 65 |              "LF Noise",
 66 |              "Segment Accuracy",
 67 |              legend=["TSC", "GMM", "GMM+HMM", "Coreset", "HSMM", "ARHMM"],
 68 |              loc = 'lower left',
 69 |              filename="output2.png",
 70 |              ylim=0.0,
 71 |              xlim=0.1)
 72 | """
 73 | 
 74 | 
 75 | """
 76 | plotY1Y2(run_sweep_experiment(sys_params, 'observation', [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1], [a,b,c,d,e,f], np.ones((2,1)), lambda x,y: evaluate(x,y,'frame_acc'), N=2, k=10),
 77 |              "(B) HF Observation Noise",
 78 |              "HF Noise",
 79 |              "Frame Accuracy",
 80 |              legend=["TSC", "GMM", "GMM+HMM", "Coreset", "HSMM", "ARHMM"],
 81 |              loc = 'lower left',
 82 |              filename="output1b.png",
 83 |              ylim=0.0,
 84 |              xlim=0.1)
 85 | """
 86 | 
 87 | """
 88 | plotY1Y2(run_sweep_experiment(sys_params, 'resonance', [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1], [a,b,c,d,e,f], np.ones((2,1)), lambda x,y: evaluate(x,y,'frame_acc'), N=2, k=10),
 89 |              "(D) LF Process Noise",
 90 |              "LF Noise",
 91 |              "Frame Accuracy",
 92 |              legend=["TSC", "GMM", "GMM+HMM", "Coreset", "HSMM", "ARHMM"],
 93 |              loc = 'lower left',
 94 |              filename="output2b.png",
 95 |              ylim=0.0,
 96 |              xlim=0.1)
 97 | """
 98 | 
 99 | #plotData(t[0])
100 | 
101 | #
102 | #lm is the mean number of loops, dp is the probability of "missing"
103 | #t = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
104 | #u = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
105 | #v = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
106 | #w = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
107 | #x = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
108 | 
109 | 
110 | #0.3 o,  0.3, 3 r
111 | """
112 | a = TransitionStateClustering(window_size=3, normalize=False, pruning=0.3,delta=-1)
113 | b = TimeVaryingGaussianMixtureModel(hard_param=3)
114 | c = HMMGaussianMixtureModel(n_components=3)
115 | d = CoresetSegmentation(n_components=4)
116 | e = HiddenSemiMarkovModel()
117 | f = AutoregressiveMarkovModel()
118 | 
119 | plotY1Y2(run_sweep_experiment(sys_params, 'resonance', [0.01, 0.25, 0.5, 1, 2], [a,b,c,d,e,f], np.ones((2,1)), jaccard, N=5, k=5),
120 |              "LF Noise vs. Jaccard",
121 |              "LF Noise",
122 |              "Jaccard",
123 |              legend=["TSC", "GMM", "GMM+HMM", "Coreset", "HSMM", "ARHMM"],
124 |              loc = 'title',
125 |              filename="output2.png",
126 |              ylim=0.0,
127 |              xlim=0.1)
128 | """
129 | 
130 | """
131 | a = TransitionStateClustering(window_size=2, normalize=False, pruning=0.2,delta=-1)
132 | b = TransitionStateClustering(window_size=2, normalize=False, pruning=0.4,delta=-1)
133 | c = TransitionStateClustering(window_size=2, normalize=False, pruning=0.6,delta=-1)
134 | d = TransitionStateClustering(window_size=2, normalize=False, pruning=0.8,delta=-1)
135 | e = TransitionStateClustering(window_size=2, normalize=False, pruning=1.0,delta=-1)
136 | 
137 | plotY1Y2(run_sweep_experiment(sys_params, 'observation', [0.1, 0.2, 0.5, 0.75, 1, 1.5], [a,b,c,d,e], np.ones((2,1)), jaccard, N=5, k=20),
138 |              "LF Noise vs. Jaccard",
139 |              "LF Noise",
140 |              "Jaccard",
141 |              legend=["TSC (p=0.2)", "TSC (p=0.4)", "TSC (p=0.6)", "TSC (p=0.8)", "TSC (p=1.0)"],
142 |              loc = 'title',
143 |              filename="output5.png",
144 |              ylim=0.0,
145 |              xlim=0.1)
146 | """
147 | 
148 | 
149 | 
150 | 
151 | """
152 | a = TimeVaryingGaussianMixtureModel()
153 | #t = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
154 | #plotData(t)
155 | for i in range(0,20):
156 | 	t = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
157 | 	a.addDemonstration(np.squeeze(t))
158 | 
159 | a.fit(hard_param = 3)
160 | print a.segmentation
161 | """
162 | 
163 | 
164 | """
165 | a = HMMGaussianMixtureModel()
166 | #t = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
167 | #plotData(t)
168 | for i in range(0,20):
169 | 	t = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
170 | 	a.addDemonstration(np.squeeze(t))
171 | 
172 | a.fit(n_components = 4)
173 | print a.segmentation
174 | """
175 | 
176 | """
177 | a = CoresetSegmentation()
178 | for i in range(0,20):
179 | 	t = sampleDemonstrationFromSystem(sys,np.ones((2,1)), lm=0, dp=0)
180 | 	a.addDemonstration(np.squeeze(t))
181 | a.fit(n_components = 2)
182 | print a.segmentation
183 | """
184 | 
185 | #from alternates.coreset import *
186 | #print coreset.get_coreset(np.squeeze(t),3,3)
187 | 
188 | 
189 | 


--------------------------------------------------------------------------------
/tsc/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BerkeleyAutomation/tsc/6d3b1fb4a2ee3c33ae7ed695b6197cdfa99cfc4a/tsc/.DS_Store


--------------------------------------------------------------------------------
/tsc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BerkeleyAutomation/tsc/6d3b1fb4a2ee3c33ae7ed695b6197cdfa99cfc4a/tsc/__init__.py


--------------------------------------------------------------------------------
/tsc/examples/inconsistent.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This implements TSC over two inconsistent demos
 5 | """
 6 | import os,sys,inspect
 7 | import numpy as np
 8 | from sklearn import mixture
 9 | from sklearn.externals.six.moves import xrange
10 | import matplotlib.pyplot as plt
11 | import matplotlib as mpl
12 | 
13 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
14 | parentdir = os.path.dirname(currentdir)
15 | sys.path.insert(0,parentdir) 
16 | from tsc import *
17 | 
18 | # Number of samples per component
19 | n_samples = 50
20 | 
21 | # Generate random sample following a sine curve
22 | np.random.seed(0)
23 | X = np.zeros((n_samples, 2))
24 | step = 4 * np.pi / n_samples
25 | 
26 | for i in xrange(X.shape[0]):
27 | 	x = i * step - 6
28 | 	X[i, 0] = x + np.random.normal(0, 0.15)
29 | 	X[i, 1] = 3 * (np.sinc(x) + np.random.normal(0, .15))
30 | 
31 | 
32 | # Number of samples per component
33 | n_samples = 50
34 | 
35 | # Generate random sample following a sine curve
36 | np.random.seed(0)
37 | Y = np.zeros((n_samples, 2))
38 | step = 4 * np.pi / n_samples
39 | 
40 | for i in xrange(Y.shape[0]):
41 | 	x = i * step - 6
42 | 	Y[i, 0] = x + np.random.normal(0, 0.15)
43 | 	Y[i, 1] = 2 * (np.tanh(x) + np.random.normal(0, .15))
44 | 
45 | 		
46 | a = TransitionStateClustering(window_size=2)
47 | a.addDemonstration(X)
48 | a.addDemonstration(Y)
49 | a.fit(normalize=True)
50 | 
51 | plt.subplot(1,3,1)
52 | plt.scatter(X[:,0], X[:,1], color='b')
53 | plt.scatter(Y[:,0], Y[:,1], color='r')
54 | plt.scatter(X[a.segmentation[0],0], X[a.segmentation[0],1], s=100,color='k')
55 | plt.scatter(Y[a.segmentation[1],0], Y[a.segmentation[1],1], s=100,color='k')
56 | 
57 | plt.xlim(-8, 4 * np.pi - 6+2)
58 | plt.ylim(-5, 5)
59 | plt.title("TSC With RBF Normalization")
60 | 
61 | a = TransitionStateClustering(window_size=2)
62 | a.addDemonstration(X)
63 | a.addDemonstration(Y)
64 | a.fit(normalize=True, normalizeKern="poly")
65 | 
66 | plt.subplot(1,3,2)
67 | plt.scatter(X[:,0], X[:,1], color='b')
68 | plt.scatter(Y[:,0], Y[:,1], color='r')
69 | plt.scatter(X[a.segmentation[0],0], X[a.segmentation[0],1], s=100,color='k')
70 | plt.scatter(Y[a.segmentation[1],0], Y[a.segmentation[1],1], s=100,color='k')
71 | 
72 | plt.xlim(-8, 4 * np.pi - 6+2)
73 | plt.ylim(-5, 5)
74 | plt.title("TSC With Polynomial Normalization")
75 | 
76 | a = TransitionStateClustering(window_size=2)
77 | a.addDemonstration(X)
78 | a.addDemonstration(Y)
79 | a.fit(normalize=False)
80 | 
81 | plt.subplot(1,3,3)
82 | plt.scatter(X[:,0], X[:,1], color='b')
83 | plt.scatter(Y[:,0], Y[:,1], color='r')
84 | plt.scatter(X[a.segmentation[0],0], X[a.segmentation[0],1], s=100,color='k')
85 | plt.scatter(Y[a.segmentation[1],0], Y[a.segmentation[1],1], s=100,color='k')
86 | 
87 | plt.xlim(-8, 4 * np.pi - 6+2)
88 | plt.ylim(-5, 5)
89 | plt.title("TSC Without RBF Normalization")
90 | plt.show()


--------------------------------------------------------------------------------
/tsc/examples/sineWave.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | This implements a sineWave example
  5 | """
  6 | import os,sys,inspect
  7 | import numpy as np
  8 | from sklearn import mixture
  9 | from sklearn.externals.six.moves import xrange
 10 | import matplotlib.pyplot as plt
 11 | import matplotlib as mpl
 12 | 
 13 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 14 | parentdir = os.path.dirname(currentdir)
 15 | sys.path.insert(0,parentdir) 
 16 | from tsc import *
 17 | 
 18 | # Number of samples per component
 19 | n_samples = 100
 20 | 
 21 | # Generate random sample following a sine curve
 22 | np.random.seed(0)
 23 | X = np.zeros((n_samples, 2))
 24 | step = 4 * np.pi / n_samples
 25 | 
 26 | for i in xrange(X.shape[0]):
 27 | 	x = i * step - 6
 28 | 	X[i, 0] = x + np.random.normal(0, 0.1)
 29 | 	X[i, 1] = 3 * (np.sin(x) + np.random.normal(0, .1))
 30 | 
 31 | 
 32 | # Number of samples per component
 33 | n_samples = 100
 34 | 
 35 | # Generate random sample following a sine curve
 36 | np.random.seed(0)
 37 | Y = np.zeros((n_samples, 2))
 38 | step = 4 * np.pi / n_samples
 39 | 
 40 | for i in xrange(Y.shape[0]):
 41 | 	x = i * step - 6
 42 | 	Y[i, 0] = x + np.random.normal(0, 0.1)
 43 | 	Y[i, 1] = 1.0 * (np.sin(x) + np.random.normal(0, .1))
 44 | 
 45 | 		
 46 | a = TransitionStateClustering(window_size=2)
 47 | a.addDemonstration(X)
 48 | a.addDemonstration(Y)
 49 | a.fit(normalize=True)
 50 | 
 51 | markers =['o','x','o','x','o','x','o']
 52 | 
 53 | plt.subplot(1,2,1)
 54 | a.segmentation[0].sort()
 55 | 
 56 | inc = 0
 57 | previ = 0
 58 | for i in a.segmentation[0]:
 59 | 	plt.scatter(X[previ:i,0], X[previ:i,1], color='r', marker=markers[inc],s=50)
 60 | 	previ = i
 61 | 	print previ
 62 | 	inc = inc + 1
 63 | 
 64 | inc = 0
 65 | previ = 0
 66 | a.segmentation[1].sort()
 67 | for i in a.segmentation[1]:
 68 | 	plt.scatter(Y[previ:i,0], Y[previ:i,1], color='b', marker=markers[inc],s=50)
 69 | 	previ = i
 70 | 	inc = inc + 1
 71 | 
 72 | 
 73 | 
 74 | #plt.scatter(X[a.segmentation[0],0], X[a.segmentation[0],1], s=100,color='k')
 75 | #plt.scatter(Y[a.segmentation[1],0], Y[a.segmentation[1],1], s=100,color='k')
 76 | 
 77 | plt.xlim(-8, 4 * np.pi - 6+2)
 78 | plt.ylim(-5, 5)
 79 | plt.title("TSC With RBF Normalization")
 80 | 
 81 | a = TransitionStateClustering(window_size=2)
 82 | a.addDemonstration(X)
 83 | a.addDemonstration(Y)
 84 | a.fit(normalize=False)
 85 | 
 86 | plt.subplot(1,2,2)
 87 | inc = 0
 88 | previ = 0
 89 | a.segmentation[0].sort()
 90 | for i in a.segmentation[0]:
 91 | 	plt.scatter(X[previ:i,0], X[previ:i,1], color='r', marker=markers[inc],s=50)
 92 | 	previ = i
 93 | 	print previ
 94 | 	inc = inc + 1
 95 | 
 96 | inc = 0
 97 | previ = 0
 98 | a.segmentation[1].sort()
 99 | for i in a.segmentation[1]:
100 | 	plt.scatter(Y[previ:i,0], Y[previ:i,1], color='b', marker=markers[inc],s=50)
101 | 	previ = i
102 | 	inc = inc + 1
103 | 
104 | plt.xlim(-8, 4 * np.pi - 6+2)
105 | plt.ylim(-5, 5)
106 | plt.title("TSC Without RBF Normalization")
107 | plt.show()


--------------------------------------------------------------------------------
/tsc/examples/timeWarp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | This implements a time Warping example
 5 | """
 6 | import os,sys,inspect
 7 | import numpy as np
 8 | from sklearn import mixture
 9 | from sklearn.externals.six.moves import xrange
10 | import matplotlib.pyplot as plt
11 | import matplotlib as mpl
12 | 
13 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
14 | parentdir = os.path.dirname(currentdir)
15 | sys.path.insert(0,parentdir) 
16 | from tsc import *
17 | 
18 | # Number of samples per component
19 | n_samples = 100
20 | 
21 | # Generate random sample following a sine curve
22 | np.random.seed(0)
23 | X = np.zeros((n_samples, 2))
24 | step = 4 * np.pi / n_samples
25 | 
26 | for i in xrange(X.shape[0]):
27 | 	x = i * step - 6
28 | 	X[i, 0] = x + np.random.normal(0, 0.15)
29 | 	X[i, 1] = 3 * (np.sinc(x) + np.random.normal(0, .15))
30 | 
31 | 
32 | # Number of samples per component
33 | n_samples = 200
34 | 
35 | # Generate random sample following a sine curve
36 | np.random.seed(0)
37 | Y = np.zeros((n_samples, 2))
38 | step = 4 * np.pi / n_samples
39 | 
40 | for i in xrange(Y.shape[0]):
41 | 	x = i * step - 6
42 | 	Y[i, 0] = x + np.random.normal(0, 0.15)
43 | 	Y[i, 1] = 2 * (np.sinc(x) + np.random.normal(0, .15))
44 | 
45 | 		
46 | a = TransitionStateClustering(window_size=2)
47 | a.addDemonstration(X)
48 | a.addDemonstration(Y)
49 | a.fit(normalize=True)
50 | 
51 | plt.subplot(1,2,1)
52 | plt.scatter(X[:,0], X[:,1], color='b')
53 | plt.scatter(Y[:,0], Y[:,1], color='r')
54 | plt.scatter(X[a.segmentation[0],0], X[a.segmentation[0],1], s=100,color='k')
55 | plt.scatter(Y[a.segmentation[1],0], Y[a.segmentation[1],1], s=100,color='k')
56 | 
57 | plt.xlim(-8, 4 * np.pi - 6+2)
58 | plt.ylim(-5, 5)
59 | plt.title("TSC With RBF Normalization")
60 | 
61 | a = TransitionStateClustering(window_size=2)
62 | a.addDemonstration(X)
63 | a.addDemonstration(Y)
64 | a.fit(normalize=False)
65 | 
66 | plt.subplot(1,2,2)
67 | plt.scatter(X[:,0], X[:,1], color='b')
68 | plt.scatter(Y[:,0], Y[:,1], color='r')
69 | plt.scatter(X[a.segmentation[0],0], X[a.segmentation[0],1], s=100,color='k')
70 | plt.scatter(Y[a.segmentation[1],0], Y[a.segmentation[1],1], s=100,color='k')
71 | 
72 | plt.xlim(-8, 4 * np.pi - 6+2)
73 | plt.ylim(-5, 5)
74 | plt.title("TSC Without RBF Normalization")
75 | plt.show()


--------------------------------------------------------------------------------
/tsc/tsc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This class implements the basic transition state clustering
  3 | framework (TSC).
  4 | 
  5 | 
  6 | Transition State Clustering: Unsupervised Surgical 
  7 | Trajectory Segentation For Robot Learning. ISRR 2015. 
  8 | 
  9 | @inproceedings{krishnan2015tsc,
 10 |   title={Transition State Clustering: Unsupervised Surgical 
 11 |   Trajectory Segmentation For Robot Learning},
 12 |   author={Krishnan*, Sanjay and Garg*, Animesh and 
 13 |   Patil, Sachin and Lea, Colin and Hager, Gregory and 
 14 |   Abbeel, Pieter and Goldberg, Ken},
 15 |   booktitle={International Symposium of Robotics Research},
 16 |   year={2015},
 17 |   organization={Springer STAR}
 18 | }
 19 | """
 20 | import numpy as np
 21 | from sklearn import mixture, decomposition
 22 | from dpcluster import *
 23 | import matplotlib.pyplot as plt
 24 | import dtw
 25 | 
 26 | #debug
 27 | # import IPython
 28 | 
 29 | class TransitionStateClustering:
 30 | 
 31 | 	"""
 32 | 	A TransitionStateClustering model is constructed with
 33 | 	a window size and initializes a bunch of internal state
 34 | 	"""
 35 | 	def __init__(self, 
 36 | 				 window_size=2, 
 37 | 				 pruning=0.9,
 38 | 				 normalize=False,
 39 | 				 normalizeKern="rbf",
 40 | 				 delta=-1,
 41 | 				 verbose=True):
 42 | 
 43 | 		self.window_size = window_size
 44 | 		self.verbose = verbose
 45 | 		self.model = []
 46 | 		self.task_segmentation = []
 47 | 		self.segmentation = []
 48 | 		self.pruning = pruning
 49 | 		self.delta = delta
 50 | 		self.normalize = normalize
 51 | 		self.normalizeKern = normalizeKern
 52 | 		
 53 | 		#internal variables not for external reference
 54 | 		self._demonstrations = []
 55 | 		self._transitions = []
 56 | 		self._transition_states_scluster = []
 57 | 		self._demonstration_sizes = []
 58 | 		self._distinct_state_clusters = 0
 59 | 		#self.clipt = clipt
 60 | 
 61 | 	"""
 62 | 	This function adds a demonstration to the model
 63 | 	N x p (N time steps) (p dimensionality)
 64 | 	"""
 65 | 	def addDemonstration(self,demonstration):
 66 | 		demo_size = np.shape(demonstration)
 67 | 		
 68 | 		if self.verbose:
 69 | 			print "[TSC] Adding a Demonstration of Size=", demo_size
 70 | 
 71 | 		self._demonstration_sizes.append(demo_size)
 72 | 		self._demonstrations.append(demonstration)
 73 | 
 74 | 	"""
 75 | 	This function checks the sizes of the added demonstrations to make sure
 76 | 	the dimensionality is consistent, and it returns None if inconsistent
 77 | 	or the total length of all demonstration if consistent. 
 78 | 	"""
 79 | 	def checkSizes(self):
 80 | 		if len(self._demonstration_sizes) == 0:
 81 | 			return None
 82 | 
 83 | 		first_element_p = self._demonstration_sizes[0][1]
 84 | 		total_size =  self._demonstration_sizes[0][0]
 85 | 
 86 | 		for i in range(1, len(self._demonstration_sizes)):
 87 | 			if self._demonstration_sizes[i][1] != first_element_p:
 88 | 				return None
 89 | 			else:
 90 | 				total_size = total_size  + self._demonstration_sizes[i][0]
 91 | 
 92 | 		return total_size
 93 | 
 94 | 	"""
 95 | 	This function fits the TSC Model to demonstration data
 96 | 	it takes a pruning threshold and some DP-GMM hyperparameters
 97 | 	which are set to reasonable defaults
 98 | 	"""
 99 | 	def fit(self):
100 | 
101 | 		#first validate
102 | 		totalSize = self.checkSizes()
103 | 
104 | 		if totalSize == None:
105 | 			raise ValueError("All of the demonstrations must have the same dimensionality")
106 | 
107 | 		if self.verbose:
108 | 			print "[TSC] Clearing previously learned model"
109 | 
110 | 		N = len(self._demonstration_sizes)
111 | 		self.model = []
112 | 		self.task_segmentation = []
113 | 		self.segmentation = []
114 | 
115 | 		#helper routines
116 | 		self.identifyTransitions(totalSize,self.normalize,self.normalizeKern)
117 | 		self.clusterInState()
118 | 		self.pruneClusters()
119 | 		self.clusterInTime()
120 | 		self.taskToTrajectory()
121 | 
122 | 		self.compaction(self.delta)
123 | 
124 | 	"""
125 | 	This prunes transitions to a specified threshold
126 | 	"""
127 | 	def pruneClusters(self):
128 | 		distinct_clusters = set([c[2] for c in self._transition_states_scluster])
129 | 		N = len(self._demonstration_sizes)
130 | 		new_transitions = []
131 | 		for c in distinct_clusters:
132 | 			tD = set([d[0] for d in self._transition_states_scluster if d[2] == c])
133 | 			tS = [d for d in self._transition_states_scluster if d[2] == c]
134 | 			if (len(tD) +0.0)/N > self.pruning:
135 | 				new_transitions.extend(tS)
136 | 
137 | 		if self.verbose:
138 | 			print "[TSC] Transitions Before Pruning=", self._transition_states_scluster, "After=",new_transitions
139 | 
140 | 		self._transition_states_scluster = new_transitions
141 | 
142 | 	"""
143 | 	Takes the task segmentation and returns a trajectory
144 | 	segmentation. For conditioning reasons this doesn't 
145 | 	use DP-GMM but finds all clusters of size segmentl (automatically set)
146 | 	"""
147 | 	def taskToTrajectory(self):
148 | 		N = len(self._demonstration_sizes)
149 | 		for i in range(0,N):
150 | 			tSD = [(k[2],k[3],k[1]) for k in self.task_segmentation if k[0] == i]
151 | 			
152 | 			timeDict = {}
153 | 			for t in tSD:
154 | 				key = (t[0], t[1])
155 | 				if  key in timeDict:
156 | 					timeDict[key].append(t[2])
157 | 				else:
158 | 					timeDict[key] = [t[2]]
159 | 			
160 | 			print timeDict
161 | 
162 | 			tseg = [np.median(timeDict[k]) for k in timeDict]
163 | 			tseg.append(0)
164 | 			tseg.append(self._demonstration_sizes[i][0]-self.window_size)
165 | 			self.segmentation.append(tseg)
166 | 
167 | 
168 | 	"""
169 | 	This function identifies transition times in each demonstration
170 | 	"""
171 | 	def identifyTransitions(self, total_size, normalize, normalizeKern):
172 | 		p = self._demonstration_sizes[0][1]
173 | 		demo_data_array = np.zeros((total_size-self.window_size,p*self.window_size))
174 | 
175 | 		inc = 0
176 | 		for i in range(0,len(self._demonstrations)):
177 | 			n = self._demonstration_sizes[i][0]
178 | 			for j in range(self.window_size,n):
179 | 				window = self._demonstrations[i][j-self.window_size:j,:]
180 | 				demo_data_array[inc,:] = np.reshape(window,(1,p*self.window_size))
181 | 				inc = inc + 1
182 | 
183 | 		if self.verbose:
184 | 			print "[TSC] Created a window model with w=",self.window_size
185 | 
186 | 		if normalize:
187 | 			kpca = decomposition.KernelPCA(p*self.window_size,kernel=normalizeKern)
188 | 			demo_data_array = kpca.fit_transform(demo_data_array)
189 | 			if self.verbose:
190 | 				print "[TSC] Normalizing With Kernel Transformation"
191 | 
192 | 		"""
193 | 		Apply DP-GMM to find transitions
194 | 		"""
195 | 		indices = self.smoothing(self.DPGMM(demo_data_array, p*self.window_size))
196 | 
197 | 		#print indices
198 | 		
199 | 		if self.verbose:
200 | 			print "[TSC] Removing all previously learned transitions"
201 | 		
202 | 		inc = 0
203 | 		self._transitions = []
204 | 		for i in range(0,len(self._demonstrations)):
205 | 			n = self._demonstration_sizes[i][0]
206 | 			for j in range(self.window_size,n):
207 | 
208 | 				#by default the first/last state is a transition
209 | 				#otherwise it is the states where the indices are different
210 | 				if inc == 0 or j == self.window_size:
211 | 					pass#self._transitions.append((i,0))
212 | 				elif j == (n-1):
213 | 					pass#self._transitions.append((i,n-1))
214 | 				elif indices[inc-1] != indices[inc]:
215 | 					self._transitions.append((i,j-self.window_size))
216 | 
217 | 				inc = inc + 1
218 | 
219 | 		if self.verbose:
220 | 			print "[TSC] Discovered Transitions (demoid, time): ", self._transitions
221 | 
222 | 	"""
223 | 	This applies smoothing to the indices to make sure
224 | 	rapid changes are discouraged
225 | 	"""
226 | 	def smoothing(self, indices):
227 | 		newIndices = indices
228 | 		for i in range(1,len(indices)):
229 | 			if indices[i] != indices[i-1] and indices[i] != indices[i+1] and indices[i+1] == indices[i-1]:
230 | 			   newIndices[i] = indices[i+1]
231 | 
232 | 			   if self.verbose:
233 | 			   	print "[TSC] Smoothed out index=",i
234 | 
235 | 		return newIndices
236 | 
237 | 
238 | 	"""
239 | 	Runs multiple runs of DPGMM takes the best clustering
240 | 	"""
241 | 	def DPGMM(self,data, dimensionality, p=0.95, k=1):
242 | 		runlist = []
243 | 		for i in range(0,k):
244 | 			runlist.append(self.DPGMM_Helper(data,dimensionality,p))
245 | 		runlist.sort()
246 | 
247 | 		print runlist
248 | 
249 | 		#return best
250 | 		return runlist[-1][1]
251 | 
252 | 	"""
253 | 	Uses Teodor's code to do DP GMM clustering
254 | 	"""
255 | 	def DPGMM_Helper(self,data, dimensionality, p=0.95):
256 | 		vdp = VDP(GaussianNIW(dimensionality))
257 | 		vdp.batch_learn(vdp.distr.sufficient_stats(data))		
258 | 		likelihoods = vdp.pseudo_resp(np.ascontiguousarray(data))[0]
259 | 
260 | 		real_clusters = 1
261 | 		cluster_s = vdp.cluster_sizes()
262 | 		total = np.sum(cluster_s)
263 | 		running_total = cluster_s[0]
264 | 		for i in range(1,len(vdp.cluster_sizes())):
265 | 			running_total = running_total + cluster_s[i]
266 | 			real_clusters = i + 1
267 | 			if running_total/total > p:
268 | 				break
269 | 
270 | 		return (-np.sum(vdp.al), [np.argmax(l[0:real_clusters]) for l in likelihoods])
271 | 
272 | 	"""
273 | 	This function applies the state clustering
274 | 	"""
275 | 	def clusterInState(self):
276 | 		tsN = len(self._transitions)
277 | 		p = self._demonstration_sizes[0][1]
278 | 		ts_data_array = np.zeros((tsN,p))
279 | 
280 | 		for i in range(0, tsN):
281 | 			ts = self._transitions[i]
282 | 			ts_data_array[i,:] = self._demonstrations[ts[0]][ts[1],:]
283 | 
284 | 
285 | 		#Apply the DP-GMM to find the state clusters
286 | 		indices = self.DPGMM(ts_data_array,p)
287 | 		indicesDict = list(set(indices))
288 | 
289 | 		self._transition_states_scluster = []
290 | 		self._distinct_state_clusters = 0
291 | 		
292 | 		if self.verbose:
293 | 			print "[TSC] Removing previously learned state clusters "
294 | 
295 | 		#encode the first layer of clustering:
296 | 		for i in range(0,tsN):
297 | 			label = indicesDict.index(indices[i])
298 | 			tstuple = (self._transitions[i][0], self._transitions[i][1], label)
299 | 			self._transition_states_scluster.append(tstuple)
300 | 
301 | 		self._distinct_state_clusters = len(list(set(indices)))
302 | 		#print self._distinct_state_clusters
303 | 
304 | 		if self.verbose:
305 | 			print "[TSC] Discovered State Clusters (demoid, time, statecluster): ", self._transition_states_scluster
306 | 
307 | 	"""
308 | 	This function applies the time sub-clustering
309 | 	"""
310 | 	def clusterInTime(self):
311 | 		p = self._demonstration_sizes[0][1]
312 | 
313 | 		unorderedmodel = []
314 | 
315 | 		for i in range(0,self._distinct_state_clusters):
316 | 			tsI = [s for s in self._transition_states_scluster if s[2]==i]
317 | 			ts_data_array = np.zeros((len(tsI),p))
318 | 			t_data_array = np.zeros((len(tsI),2))
319 | 			
320 | 			for j in range(0, len(tsI)):
321 | 				ts = tsI[j]
322 | 				ts_data_array[j,:] = self._demonstrations[ts[0]][ts[1],:]
323 | 
324 | 				t_data_array[j,0] = ts[1] + np.random.randn(1,1) #do this to avoid conditioning problems
325 | 				t_data_array[j,1] = ts[1] + np.random.randn(1,1) #do this to avoid conditioning problems
326 | 
327 | 			if len(tsI) == 0:
328 | 				continue
329 | 
330 | 			#Since there is only one state-cluster use a GMM
331 | 			mm  = mixture.GMM(n_components=1)
332 | 			mm.fit(ts_data_array)
333 | 
334 | 
335 | 			#subcluster in time
336 | 			indices = self.DPGMM(t_data_array,2,0.9)
337 | 			#print t_data_array, indices
338 | 			indicesDict = list(set(indices))
339 | 
340 | 			#finish off by storing two values the task segmentation	
341 | 			for j in range(0, len(tsI)):
342 | 				dd = set([tsI[n][0] for (n, ind) in enumerate(indices) if ind == indices[j]])
343 | 				
344 | 				#time pruning condition
345 | 				if (len(dd) + 0.0)/len(self._demonstration_sizes) < self.pruning:
346 | 					continue
347 | 
348 | 				self.task_segmentation.append((tsI[j][0],
349 | 										  	   tsI[j][1],
350 | 										       tsI[j][2],
351 | 										       indicesDict.index(indices[j])))
352 | 
353 | 			#GMM model
354 | 			unorderedmodel.append((np.median(t_data_array),mm))
355 | 
356 | 		unorderedmodel.sort()
357 | 		self.model = [u[1] for u in unorderedmodel]
358 | 
359 | 		if self.verbose:
360 | 			print "[TSC] Learned The Following Model: ", self.model
361 | 
362 | 
363 | 	#does the compaction
364 | 	def compaction(self,delta=-1):
365 | 		for i in range(0, len(self._demonstrations)):
366 | 			segs = self.segmentation[i]
367 | 			segs.sort()
368 | 			d = self._demonstrations[i]
369 | 
370 | 			prev = None
371 | 			removal_vals = []
372 | 
373 | 			for j in range(0,len(segs)-1):
374 | 				cur = d[segs[j]:segs[j+1],:]
375 | 
376 | 				if prev != None and len(cur) > 0 and len(prev) > 0:
377 | 					dist, cost, acc, path = dtw.dtw(cur, prev, dist=lambda x, y: np.linalg.norm(x - y, ord=2))
378 | 					cmetric = dist/len(path)
379 | 					if cmetric < delta:
380 | 						removal_vals.append(segs[j+1]) 
381 | 
382 | 						if self.verbose:
383 | 							print "[TSC] Compacting ", segs[j], segs[j+1]
384 | 
385 | 				prev = cur
386 | 
387 | 			self.segmentation[i] = [s for s in self.segmentation[i] if s not in removal_vals]
388 | 			
389 | 
390 | 
391 | 
392 | 
393 | 
394 | 
395 | 


--------------------------------------------------------------------------------