├── README.md
├── cifar100_curriculum_alexnet.py
├── cifar100_curriculum_resnet.py
└── cifar100_kmeans.py


/README.md:
--------------------------------------------------------------------------------
 1 | # curriculum-learning-for-deep-learning
 2 | 
 3 | What is the curriculum learning?
 4 | * [Curriculum Learning](https://ronan.collobert.com/pub/matos/2009_curriculum_icml.pdf)
 5 | * [Automated Curriculum Learning for Neural Networks](http://proceedings.mlr.press/v70/graves17a/graves17a.pdf)
 6 | 
 7 | 
 8 | 		cifar100_curriculum_alexnet.py: curriculum learning for alexnet.
 9 | 
10 | 		cifar100_curriculum_resnet.py: curriculum learning for resnet.
11 | 
12 | 		cifar100_kmeans.py: data clustering. 
13 | 
14 | 
15 | Using k-means for clustering data hierarchically is quite simple. You can use [deep learning](https://github.com/elieJalbout/Clustering-with-Deep-learning) for clustering process.
16 | 


--------------------------------------------------------------------------------
/cifar100_curriculum_alexnet.py:
--------------------------------------------------------------------------------
  1 | import scipy.io
  2 | import tensorflow as tf
  3 | import os
  4 | from pylab import *
  5 | import numpy as np
  6 | import pickle
  7 | from numpy import *
  8 | 
  9 | def unpickle(file):
 10 |     import cPickle
 11 |     fo = open(file, 'rb')
 12 |     dict = cPickle.load(fo)
 13 |     fo.close()
 14 |     if 'data' in dict:
 15 |         dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 16 |     return dict
 17 |   
 18 | def load_data_one(f):
 19 |     batch = unpickle(f)
 20 |     data = batch['data']
 21 |     labels = batch['fine_labels']
 22 |     print "Loading %s: %d" % (f, len(data))
 23 |     return data, labels
 24 | 
 25 | def load_data(files, data_dir, label_count):
 26 |     data, labels = load_data_one(data_dir + '/' + files[0])
 27 |     for f in files[1:]:
 28 |         data_n, labels_n = load_data_one(data_dir + '/' + f)
 29 |         data = np.append(data, data_n, axis=0)
 30 |         labels = np.append(labels, labels_n, axis=0)
 31 |     labels = np.array([ [ float(i == label) for i in xrange(label_count) ] for label in labels ])
 32 |     return data, labels
 33 | 
 34 | TRAINING_ITERATIONS = 200000
 35 | WEIGHT_DECAY = 0.0001
 36 | batch_size = 32
 37 | learning_rate = 0.01
 38 | 
 39 | data_dir = '/home/binhdt/cifar100'
 40 | image_size = 32
 41 | image_dim = image_size * image_size * 3
 42 | meta = unpickle(data_dir + '/meta')
 43 | label_names = meta['fine_label_names']
 44 | label_count = len(label_names)
 45 | 
 46 | train_data, train_labels = load_data(['train'], data_dir, label_count)
 47 | test_data, test_labels = load_data(['test'], data_dir, label_count)
 48 | print "Train:", np.shape(train_data), np.shape(train_labels)
 49 | print "Test:", np.shape(test_data), np.shape(test_labels)
 50 | data = {'train_data': train_data, 'train_labels': train_labels, 'test_data': test_data, 'test_labels': test_labels}
 51 | cluster_density_sorted = pickle.load(open("cluster.p", "rb"))
 52 | nb_cluster = len(cluster_density_sorted)
 53 | 
 54 | def print_activations(t):
 55 |     print(t.op.name, ' ', t.get_shape().as_list())
 56 | 
 57 | def dense_to_one_hot(labels_dense, num_classes):
 58 |     num_labels = labels_dense.shape[0]
 59 |     index_offset = np.arange(num_labels) * num_classes
 60 |     labels_one_hot = np.zeros((num_labels, num_classes))
 61 |     labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
 62 |     return labels_one_hot
 63 | 
 64 | def read_images_from_disk(input_queue):
 65 |     label = input_queue[1]
 66 |     file_contents = tf.read_file(input_queue[0])
 67 |     example = tf.image.decode_jpeg(file_contents, channels=3)
 68 |     return example, label
 69 | 
 70 | def weight_variable(shape, name):
 71 |     initial = tf.truncated_normal(shape, stddev=0.01, name=name)
 72 |     return tf.Variable(initial)
 73 | 
 74 | def bias_variable(shape, name):
 75 |     initial = tf.constant(0.0, shape=shape, name=name)
 76 |     return tf.Variable(initial)
 77 | 
 78 | def conv(input, kernel, biases, k_h, k_w, c_o, s_h, s_w,  padding="VALID", group=1):
 79 |     c_i = input.get_shape()[-1]
 80 |     assert c_i % group == 0
 81 |     assert c_o % group == 0
 82 |     convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
 83 |     
 84 |     if group == 1:
 85 |         conv = convolve(input, kernel)
 86 |     else:
 87 |         input_groups = tf.split(3, group, input)
 88 |         kernel_groups = tf.split(3, group, kernel)
 89 |         output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
 90 |         conv = tf.concat(3, output_groups)
 91 |     return  tf.reshape(tf.nn.bias_add(conv, biases), [-1] + conv.get_shape().as_list()[1:])
 92 | 
 93 | def conv2d(x, W, stride_h, stride_w, padding='SAME'):
 94 |     return tf.nn.conv2d(x, W, strides=[1, stride_h, stride_w, 1], padding=padding)
 95 | 
 96 | graph = tf.Graph()
 97 | with graph.as_default():
 98 |     x = tf.placeholder('float', shape=[None, image_dim])
 99 |     y_ = tf.placeholder('float', shape=[None, label_count])
100 |     lr = tf.placeholder("float", shape=[])
101 | 
102 |     conv1W = weight_variable([3, 3, 3, 64], 'conv1W')
103 |     conv1b = bias_variable([64], 'conv1b')
104 |     conv2W = weight_variable([5, 5, 64, 192], 'conv2W')
105 |     conv2b = bias_variable([192], 'conv2b')
106 |     conv3W = weight_variable([3, 3, 192, 256], 'conv3W')
107 |     conv3b = bias_variable([256], 'conv3b')
108 |     fc8W = weight_variable([1 * 1 * 256, label_count], 'fc8W')
109 |     fc8b = bias_variable([label_count], 'fc8b')
110 |     keep_prob = tf.placeholder('float')
111 | 
112 |     def model(x):
113 |         k_h = 3; k_w = 3; c_o = 64; s_h = 4; s_w = 4; group = 1
114 |         conv1_in = conv(x, conv1W, conv1b, k_h, k_w, c_o, s_h, s_w, padding="SAME", group=group)
115 |         conv1 = tf.nn.relu(conv1_in)
116 |         radius = 5; alpha = 0.0001; beta = 0.75; bias = 1.0
117 |         lrn1 = tf.nn.local_response_normalization(conv1, depth_radius=radius, alpha=alpha, beta=beta, bias=bias)
118 |         maxpool1 = tf.nn.max_pool(lrn1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
119 |         k_h = 5; k_w = 5; c_o = 192; s_h = 1; s_w = 1; group = 1
120 |         conv2_in = conv(maxpool1, conv2W, conv2b, k_h, k_w, c_o, s_h, s_w, padding="SAME", group=group)
121 |         conv2 = tf.nn.relu(conv2_in)
122 |         maxpool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
123 |         k_h = 3; k_w = 3; c_o = 256; s_h = 1; s_w = 1; group = 1
124 |         conv3_in = conv(maxpool2, conv3W, conv3b, k_h, k_w, c_o, s_h, s_w, padding="SAME", group=group)
125 |         conv3 = tf.nn.relu(conv3_in)
126 |         maxpool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
127 |         fc7_drop = tf.nn.dropout(maxpool3, keep_prob)
128 |         print_activations(fc7_drop)
129 |         fc8 = tf.nn.xw_plus_b(tf.reshape(fc7_drop, [-1, int(prod(fc7_drop.get_shape()[1:]))]), fc8W, fc8b)
130 |         return fc8
131 | 
132 |     logits = model(tf.reshape(x, [ -1, 32, 32, 3 ]))
133 |     cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_))
134 |     regularizers =  tf.nn.l2_loss(conv1W) + tf.nn.l2_loss(conv1b) +\
135 |                     tf.nn.l2_loss(conv2W) + tf.nn.l2_loss(conv2b) +\
136 |                     tf.nn.l2_loss(conv3W) + tf.nn.l2_loss(conv3b) +\
137 |                     tf.nn.l2_loss(fc8W) + tf.nn.l2_loss(fc8b)
138 |     loss = tf.reduce_mean(cross_entropy + WEIGHT_DECAY * regularizers)
139 | 
140 |     train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(loss)
141 |     correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(logits),1), tf.argmax(y_,1))
142 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
143 |     saver = tf.train.Saver()
144 | 
145 | with tf.Session(graph=graph) as sess:
146 |     init = tf.global_variables_initializer()
147 |     sess.run(init)
148 |     for i in range(nb_cluster):
149 |         id = []
150 |         for j in range(i + 1):
151 |             id = id + cluster_density_sorted[j][1]
152 |         xtrain = train_data[id]
153 |         ytrain = train_labels[id]
154 | 
155 |         pi = np.random.permutation(len(xtrain))
156 |         xtrain, ytrain = xtrain[pi], ytrain[pi]
157 | 
158 |         if i > 0:
159 |             saver.restore(sess, './curriculum_alexnet_cluster' + str(i - 1) + '.ckpt')
160 | 
161 |         for it in range(TRAINING_ITERATIONS):
162 |             if it == TRAINING_ITERATIONS * 50/100: learning_rate = 0.001
163 |             if it == TRAINING_ITERATIONS * 75/100: learning_rate = 0.0001
164 |             if it * batch_size % len(xtrain) + batch_size <= len(xtrain):
165 |                 start = it * batch_size % len(xtrain)
166 |                 end = start + batch_size
167 |             else:
168 |                 start = it * batch_size % len(xtrain)
169 |                 end = len(xtrain)
170 | 
171 |             _, train_accuracy, cost = sess.run([train_step, accuracy, cross_entropy],
172 |                 feed_dict={x: xtrain[start:end], y_: ytrain[start:end], keep_prob: 0.5, lr: learning_rate})
173 | 
174 |             if it % 200 == 0:
175 |                 print i, it, train_accuracy, cost, accuracy.eval(feed_dict={x: test_data, y_: test_labels, keep_prob: 1.0})
176 | 
177 |         saver.save(sess, './curriculum_alexnet_cluster' + str(i) + '.ckpt')
178 | 
179 | sess.close()


--------------------------------------------------------------------------------
/cifar100_curriculum_resnet.py:
--------------------------------------------------------------------------------
  1 | import scipy.io
  2 | import tensorflow as tf
  3 | import os
  4 | from pylab import *
  5 | import numpy as np
  6 | import pickle
  7 | from numpy import *
  8 | import math
  9 | 
 10 | def unpickle(file):
 11 |     import cPickle
 12 |     fo = open(file, 'rb')
 13 |     dict = cPickle.load(fo)
 14 |     fo.close()
 15 |     if 'data' in dict:
 16 |         dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 17 |     return dict
 18 |   
 19 | def load_data_one(f):
 20 |     batch = unpickle(f)
 21 |     data = batch['data']
 22 |     labels = batch['fine_labels']
 23 |     print "Loading %s: %d" % (f, len(data))
 24 |     return data, labels
 25 | 
 26 | def load_data(files, data_dir, label_count):
 27 |     data, labels = load_data_one(data_dir + '/' + files[0])
 28 |     for f in files[1:]:
 29 |         data_n, labels_n = load_data_one(data_dir + '/' + f)
 30 |         data = np.append(data, data_n, axis=0)
 31 |         labels = np.append(labels, labels_n, axis=0)
 32 |     labels = np.array([ [ float(i == label) for i in xrange(label_count) ] for label in labels ])
 33 |     return data, labels
 34 | 
 35 | TRAINING_ITERATIONS = 200000
 36 | WEIGHT_DECAY = 0.0001
 37 | batch_size = 64
 38 | 
 39 | data_dir = '/home/binhdt/cifar100'
 40 | image_size = 32
 41 | image_dim = image_size * image_size * 3
 42 | meta = unpickle(data_dir + '/meta')
 43 | label_names = meta['fine_label_names']
 44 | label_count = len(label_names)
 45 | 
 46 | train_data, train_labels = load_data(['train'], data_dir, label_count)
 47 | test_data, test_labels = load_data(['test'], data_dir, label_count)
 48 | print "Train:", np.shape(train_data), np.shape(train_labels)
 49 | print "Test:", np.shape(test_data), np.shape(test_labels)
 50 | data = {'train_data': train_data, 'train_labels': train_labels, 'test_data': test_data, 'test_labels': test_labels}
 51 | cluster_density_sorted = pickle.load(open("cluster.p", "rb"))
 52 | nb_cluster = len(cluster_density_sorted)
 53 | 
 54 | def print_activations(t):
 55 |     print(t.op.name, ' ', t.get_shape().as_list())
 56 | 
 57 | def dense_to_one_hot(labels_dense, num_classes):
 58 |     num_labels = labels_dense.shape[0]
 59 |     index_offset = np.arange(num_labels) * num_classes
 60 |     labels_one_hot = np.zeros((num_labels, num_classes))
 61 |     labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
 62 |     return labels_one_hot
 63 | 
 64 | def read_images_from_disk(input_queue):
 65 |     label = input_queue[1]
 66 |     file_contents = tf.read_file(input_queue[0])
 67 |     example = tf.image.decode_jpeg(file_contents, channels=3)
 68 |     return example, label
 69 | 
 70 | 
 71 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 72 |     res = [ 0 ] * len(tensors)                                                                                           
 73 |     batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 74 |     total_size = len(batch_tensors[0][1])                                                                                
 75 |     batch_count = (total_size + batch_size - 1) / batch_size                                                             
 76 |     for batch_idx in xrange(batch_count):                                                                                
 77 |         current_batch_size = None                                                                                          
 78 |         for (placeholder, tensor) in batch_tensors:                                                                        
 79 |             batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 80 |             current_batch_size = len(batch_tensor)                                                                           
 81 |             feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 82 |         tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 83 |         res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 84 |     return [ r / float(total_size) for r in res ]
 85 | 
 86 | def weight_variable(shape):
 87 |     initial = tf.truncated_normal(shape, stddev=0.01)
 88 |     return tf.Variable(initial)
 89 | 
 90 | def bias_variable(shape):
 91 |     initial = tf.constant(0.01, shape=shape)
 92 |     return tf.Variable(initial)
 93 | 
 94 | def conv2d(input, in_features, out_features, kernel_size, stride):
 95 |     W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 96 |     return tf.nn.conv2d(input, W, [ 1, stride, stride, 1 ], padding='SAME')
 97 | 
 98 | def basic_block(input, in_features, out_features, stride, is_training, keep_prob):
 99 |     if stride == 1:
100 |         shortcut = input
101 |     else:
102 |         shortcut = tf.nn.avg_pool(input, [ 1, stride, stride, 1 ], [1, stride, stride, 1 ], 'VALID')
103 |         shortcut = tf.pad(shortcut, [[0, 0], [0, 0], [0, 0],
104 |             [(out_features-in_features)//2, (out_features-in_features)//2]])
105 |     current = conv2d(input, in_features, out_features, 3, stride)
106 |     current = tf.nn.dropout(current, keep_prob)
107 |     current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
108 |     current = tf.nn.relu(current)
109 |     current = conv2d(current, out_features, out_features, 3, 1)
110 |     current = tf.nn.dropout(current, keep_prob)
111 |     current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
112 |     # No final relu as per http://torch.ch/blog/2016/02/04/resnets.html
113 |     return current + shortcut
114 | 
115 | def block_stack(input, in_features, out_features, stride, depth, is_training, keep_prob):
116 |     current = basic_block(input, in_features, out_features, stride, is_training, keep_prob)
117 |     for _d in xrange(depth - 1):
118 |         current = basic_block(current, out_features, out_features, 1, is_training, keep_prob)
119 |     return current
120 | 
121 | graph = tf.Graph()
122 | with graph.as_default():
123 |     xs = tf.placeholder("float", shape=[None, image_dim])
124 |     ys = tf.placeholder("float", shape=[None, label_count])
125 |     lr = tf.placeholder("float", shape=[])
126 |     keep_prob = tf.placeholder(tf.float32)
127 |     is_training = tf.placeholder("bool", shape=[])
128 | 
129 |     current = tf.reshape(xs, [ -1, 32, 32, 3 ])
130 |     current = conv2d(current, 3, 16, 3, 1)
131 |     current = tf.nn.relu(current)
132 | 
133 |     # dimension is 32x32x16
134 |     current = block_stack(current, 16, 16, 1, 18, is_training, keep_prob)
135 |     current = block_stack(current, 16, 32, 2, 18, is_training, keep_prob)
136 |     # dimension is 16x16x32
137 |     current = block_stack(current, 32, 64, 2, 18, is_training, keep_prob)
138 |     # dimension is 8x8x64
139 | 
140 |     current = tf.reduce_mean(current, reduction_indices=[1, 2], name="avg_pool")
141 |     final_dim = 64
142 |     current = tf.reshape(current, [ -1, final_dim ])
143 |     Wfc = weight_variable([ final_dim, label_count ])
144 |     bfc = bias_variable([ label_count ])
145 |     ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
146 | 
147 |     cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
148 |     train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy)
149 |     correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
150 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
151 | 
152 | with tf.Session(graph=graph) as session:
153 |     session.run(tf.global_variables_initializer())
154 |     saver = tf.train.Saver()
155 |     
156 |     for i in xrange(0, nb_cluster):
157 |         id = []
158 |         for j in range(i + 1):
159 |             id = id + cluster_density_sorted[nb_cluster-j-1][1]
160 |         xtrain = train_data[id]
161 |         ytrain = train_labels[id]
162 | 
163 |         pi = np.random.permutation(len(xtrain))
164 |         xtrain, ytrain = xtrain[pi], ytrain[pi]
165 | 
166 |         if i > 0:
167 |             saver.restore(session, './curriculum_alexnet_cluster' + str(i - 1) + '.ckpt')
168 | 
169 |         batch_count = len(xtrain) / batch_size
170 |         batches_data = np.split(xtrain[:batch_count*batch_size], batch_count)
171 |         batches_labels = np.split(ytrain[:batch_count*batch_size], batch_count)
172 |         learning_rate = 0.1
173 |         
174 |         if i < nb_cluster - 1:
175 |             nb_epoch = 9
176 |         else:
177 |             nb_epoch = 300
178 | 
179 |         for epoch in xrange(1, 1+nb_epoch):
180 |             if epoch == math.floor(nb_epoch/3): learning_rate = 0.01
181 |             if epoch == math.floor(2*nb_epoch/3): learning_rate = 0.001
182 |             for batch_idx in xrange(batch_count):
183 |                 batch_data = batches_data[batch_idx]
184 |                 batch_labels = batches_labels[batch_idx]
185 |             
186 |                 batch_res = session.run([ train_step, cross_entropy, accuracy ],
187 |                     feed_dict = { xs: batch_data, ys: batch_labels, lr: learning_rate, is_training: True, keep_prob: 0.8 })
188 | 
189 |             test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
190 |                     feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
191 |             print epoch, batch_res[1:], test_results
192 | 
193 |         saver.save(session, './curriculum_alexnet_cluster' + str(i) + '.ckpt')


--------------------------------------------------------------------------------
/cifar100_kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.cluster import KMeans
 3 | import operator
 4 | from collections import defaultdict
 5 | import pickle
 6 | from sklearn.decomposition import PCA
 7 | 
 8 | def unpickle(file):
 9 |     import cPickle
10 |     fo = open(file, 'rb')
11 |     dict = cPickle.load(fo)
12 |     fo.close()
13 |     return dict
14 | 
15 | def load_data_one(f):
16 |     batch = unpickle(f)
17 |     print batch.keys()
18 |     data = batch['data']
19 |     labels = batch['fine_labels']
20 |     print "Loading %s: %d" % (f, len(data))
21 |     return data, labels
22 | 
23 | def load_data(files, data_dir, label_count):
24 |     data, labels = load_data_one(data_dir + '/' + files[0])
25 |     for f in files[1:]:
26 |         data_n, labels_n = load_data_one(data_dir + '/' + f)
27 |         data = np.append(data, data_n, axis=0)
28 |         labels = np.append(labels, labels_n, axis=0)
29 |     labels = np.array([ [ float(i == label) for i in xrange(label_count) ] for label in labels ])
30 |     return data, labels
31 | 
32 | def grayscale(a):
33 |         return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1) / 256.
34 | 
35 | def run():
36 |     data_dir = '../../cifar100'
37 |     image_size = 32
38 |     image_dim = image_size * image_size * 3
39 |     meta = unpickle(data_dir + '/meta')
40 |     label_names = meta['fine_label_names']
41 |     label_count = len(label_names)
42 | 
43 |     train_files = [ 'train' ]
44 |     train_data, train_labels = load_data(train_files, data_dir, label_count)
45 |     train_data = grayscale(train_data)
46 |     test_data, test_labels = load_data([ 'test' ], data_dir, label_count)
47 |     test_data = grayscale(test_data)
48 |     print "Train:", np.shape(train_data), np.shape(train_labels)
49 |     print "Test:", np.shape(test_data), np.shape(test_labels)
50 |     data = { 'train_data': train_data,
51 |             'train_labels': train_labels,
52 |             'test_data': test_data,
53 |             'test_labels': test_labels }
54 | 
55 |     reduced_data = PCA(n_components=2).fit_transform(train_data)
56 |     kmeans = KMeans(n_clusters=100, random_state=0, precompute_distances=True, max_iter=1000, n_init=20).fit(reduced_data)
57 | 
58 |     cluster_density = dict()
59 |     cluster = defaultdict(list)
60 | 
61 |     for i in range(len(kmeans.labels_)):
62 |         cluster[kmeans.labels_[i]].append(i)
63 |         if kmeans.labels_[i] in cluster_density:
64 |             cluster_density[kmeans.labels_[i]] = cluster_density[kmeans.labels_[i]] + np.linalg.norm(reduced_data[i] - kmeans.cluster_centers_[kmeans.labels_[i]])
65 |         else:
66 |             cluster_density[kmeans.labels_[i]] = np.linalg.norm(reduced_data[i] - kmeans.cluster_centers_[kmeans.labels_[i]])
67 | 
68 |     for i in set(kmeans.labels_):
69 |         cluster_density[i] = cluster_density[i]/len(cluster[i])
70 | 
71 |     curriculum_cluster = sorted(cluster_density.items(), key=operator.itemgetter(1))
72 |     print curriculum_cluster
73 |     cluster_density_sorted = list()
74 |     for tup in curriculum_cluster:
75 |         cluster_density_sorted.append((tup[0], cluster[tup[0]]))
76 |     pickle.dump(cluster_density_sorted, open( "cluster.p", "wb" ))
77 | 
78 | run()


--------------------------------------------------------------------------------