├── .idea
├── misc.xml
├── modules.xml
├── neural_optimizer_search.iml
├── vcs.xml
└── workspace.xml
├── Controller.py
├── README.md
├── Reinforce project report.pdf
├── __pycache__
├── Controller.cpython-35.pyc
├── my_optimizer.cpython-35.pyc
└── train_target.cpython-35.pyc
├── lg3.txt
├── main.py
├── my_optimizer.py
└── train_target.py
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/neural_optimizer_search.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
94 |
95 |
96 |
97 | predicted_action
98 | log
99 | Summary
100 | self.states
101 | graph
102 | ca
103 | scalar
104 | optimizer
105 | cell
106 | size
107 | state_input
108 | 5
109 | self.policy_actions
110 | model_inputs
111 | storeRollout
112 | divis
113 | division
114 | self.state_input
115 | self.discounted_rewards
116 | labels
117 | global_step
118 | total_rewards
119 | MAX_LA
120 | max
121 | self.beta_1
122 | self.beta_2
123 | sess
124 | discount_factor
125 | summary
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 | 1519568055126
251 |
252 |
253 | 1519568055126
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
--------------------------------------------------------------------------------
/Controller.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import random
3 | import numpy as np
4 |
5 | log_dir = 'log'
6 | state_space = {'size': [16, 16, 11, 11, 5],
7 | 'space': [[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16], [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],
8 | [1,2,3,4,5,6,7,8,9,10,11], [1,2,3,4,5,6,7,8,9,10,11], [1,2,3,4,5] ]}
9 |
10 |
11 | class Policy_network():
12 | def __init__(self, sess, optimizer, global_step,
13 | reg_param=0.001,
14 | discount_factor=0.99,
15 | exploration=0.8,
16 | controller_cells=32
17 | ):
18 | self.sess = sess
19 | self.optimizer = optimizer
20 | self.reg_param = reg_param
21 | self.discount_factor = discount_factor
22 | self.controller_cells = controller_cells
23 | self.global_step = global_step
24 | self.exploration = exploration
25 | self.cell_outputs = []
26 | self.policy_classifiers = []
27 | self.policy_actions = []
28 | self.policy_labels = []
29 |
30 |
31 | self.reward_buffer = [] # store rewards
32 | self.state_buffer = [] # store last state
33 |
34 | self.build_policy_network()
35 | var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
36 | self.sess.run(tf.variables_initializer(var_lists))
37 | self.writer = tf.summary.FileWriter(log_dir, graph=tf.get_default_graph())
38 |
39 | def build_policy_network(self):
40 | with tf.name_scope("policy_network"):
41 | # use NAS cell, can be replaced by any other cell, we can also use other RNN cell
42 | nas_cell = tf.contrib.rnn.NASCell(self.controller_cells)
43 | cell_state = nas_cell.zero_state(batch_size=1, dtype=tf.float32)
44 |
45 | #initially, cell input will be the state input
46 | with tf.name_scope('state_input'):
47 | state_input = tf.placeholder(dtype=tf.float32, shape=(1, None, 1), name='state_input')
48 | self.state_input = state_input
49 | cell_input = state_input
50 |
51 |
52 |
53 | for i in range(5):
54 | size = state_space['size'][i]
55 |
56 | with tf.name_scope('controller_output_%d' % i):
57 |
58 | outputs, final_state = tf.nn.dynamic_rnn(
59 | cell=nas_cell,
60 | inputs=cell_input,
61 | initial_state=cell_state,
62 | dtype=tf.float32
63 | )
64 |
65 | #add a new classifier for each layers output
66 | classifier = tf.layers.dense(inputs=outputs[:, -1, :], units=size, name='classifier_%d' % i, reuse=False)
67 | preds = tf.nn.softmax(classifier)
68 |
69 | #feed next layer with current output, as well as state
70 | cell_input = tf.expand_dims(classifier, -1, name='cell_output_%d' % i)
71 | #print('input_%d' % (i+1), cell_input)
72 | cell_state = final_state
73 |
74 | # store tensors for later loss computations
75 | self.cell_outputs.append(cell_input)
76 | self.policy_classifiers.append(classifier)
77 | self.policy_actions.append(preds)
78 |
79 |
80 |
81 |
82 | # collect all variables for regularization loss
83 | policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='policy_network')
84 |
85 | # compute loss and gradients
86 | with tf.name_scope("compute_gradients"):
87 | # gradients for selecting action from policy network
88 | self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")
89 | tf.summary.scalar('discounted_reward', tf.reduce_sum(self.discounted_rewards))
90 |
91 | #calculate sum of cross entrophy loss of all the individual classifiers
92 | cross_entrophy_loss = 0
93 |
94 | for i in range(5):
95 | classifier = self.policy_classifiers[i]
96 | size = state_space['size'][i]
97 |
98 | with tf.name_scope('state_%d' % (i+1)):
99 | labels = tf.placeholder(dtype=tf.float32, shape=(None, size), name='cell_label_%d' % i)
100 | self.policy_labels .append(labels)
101 |
102 | one_cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=classifier, labels=labels)
103 | print (classifier, labels)
104 | cross_entrophy_loss += one_cross_entropy_loss
105 | pg_loss = tf.reduce_mean(cross_entrophy_loss)
106 | reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) # regularization
107 |
108 | self.total_loss = pg_loss + self.reg_param * reg_loss
109 | tf.summary.scalar('total_loss', self.total_loss)
110 |
111 | # compute gradients
112 | self.gradients = self.optimizer.compute_gradients(self.total_loss)
113 |
114 |
115 | # compute policy gradients
116 | for i, (grad, var) in enumerate(self.gradients):
117 | if grad is not None:
118 | self.gradients[i] = (grad * self.discounted_rewards, var)
119 |
120 | # training update
121 | with tf.name_scope("train_policy_network"):
122 | # apply gradients to update policy network
123 | self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
124 |
125 |
126 |
127 |
128 |
129 |
130 | def get_action(self, state):
131 | if random.random() < self.exploration:
132 | return np.array([random.choice(range(1, 17)), random.choice(range(1, 17)), random.choice(range(1, 12)),
133 | random.choice(range(1, 12)), random.choice(range(1, 6))])
134 | else:
135 | preds = self.sess.run(self.policy_actions, {self.state_input: state})
136 | action = []
137 | for i, pred in enumerate(preds):
138 | print (pred)
139 | #sample action from predictions
140 | one_action = np.random.choice(
141 | state_space['space'][i],
142 | 1,
143 | p=pred[0]
144 | )
145 | action.append(one_action[0])
146 | return action
147 |
148 |
149 |
150 |
151 | def storeRollout(self, state, reward):
152 | self.reward_buffer.append(reward)
153 | self.state_buffer.append(state)
154 |
155 | def train_step(self, steps_count):
156 | states = np.array(self.state_buffer[-steps_count:])
157 | feed_dict = {}
158 | labels = []
159 | for i, state in enumerate(states[0]):
160 | one_hot = np.zeros(state_space['size'][i])
161 | one_hot[state-1] = 1.
162 | one_hot = np.expand_dims(one_hot, 0)
163 | labels.append(one_hot)
164 | feed_dict[self.policy_labels[i]] = one_hot
165 | print ('states:', states)
166 | states = np.expand_dims(states, -1)
167 | rewars = self.reward_buffer[-steps_count:]
168 | feed_dict[self.state_input] = states
169 | feed_dict[self.discounted_rewards] = rewars
170 |
171 |
172 | tf.summary.scalar('reward', rewars[0])
173 | merged = tf.summary.merge_all()
174 | _, ls, summary_str, global_step = self.sess.run([self.train_op, self.total_loss, merged, self.global_step],
175 | {self.state_input: states,
176 | self.discounted_rewards: rewars,
177 | self.policy_labels[0]: labels[0],
178 | self.policy_labels[1]: labels[1],
179 | self.policy_labels[2]: labels[2],
180 | self.policy_labels[3]: labels[3],
181 | self.policy_labels[4]: labels[4]})
182 | self.writer.add_summary(summary_str)
183 |
184 |
185 | # epsilon greedy with decay
186 | if global_step != 0 and global_step % 20 == 0 and self.exploration > 0.2:
187 | self.exploration *= 0.97
188 |
189 | return ls
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # neural_optimizer_search
2 |
3 | This program is an implementation to the 2017 ICML paper [Neural Optimizer Search with Reinforcement Learning](https://arxiv.org/abs/1709.07417)
4 |
5 | - Implement a controller RNN in tensorflow, update by `policy gradients`. Controller RNN is defined in `Controller.py`
6 | - Implement a small ConvNet target network in Keras, defined in `train_target.py`
7 | - `my_optimizer` convert a string generated by controller RNN into an optimizer compiled in keras model
8 | - `lg3.txt` register your training results over episodes
9 | - `\log` dir contains computaional graph and variable values during training Controller RNN, use tensorboard to check
10 | - It does not support distributed computing.
11 |
12 | # Usage
13 |
14 | - Adjust hyperparameters in `main.py`
15 | - By default, just execute `main.py`
16 |
17 | # Requirements
18 | - python >= 3.5
19 | - Keras >= 1.2.1
20 | - Tensorflow >= 1.4
21 |
22 | # More details
23 | - See report in the same folder
--------------------------------------------------------------------------------
/Reinforce project report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mogolola/neural_optimizer_search/c5ccd62173878029e0fb7ddceea7744374d34a77/Reinforce project report.pdf
--------------------------------------------------------------------------------
/__pycache__/Controller.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mogolola/neural_optimizer_search/c5ccd62173878029e0fb7ddceea7744374d34a77/__pycache__/Controller.cpython-35.pyc
--------------------------------------------------------------------------------
/__pycache__/my_optimizer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mogolola/neural_optimizer_search/c5ccd62173878029e0fb7ddceea7744374d34a77/__pycache__/my_optimizer.cpython-35.pyc
--------------------------------------------------------------------------------
/__pycache__/train_target.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mogolola/neural_optimizer_search/c5ccd62173878029e0fb7ddceea7744374d34a77/__pycache__/train_target.cpython-35.pyc
--------------------------------------------------------------------------------
/lg3.txt:
--------------------------------------------------------------------------------
1 | current time: 20:26:10.354183 episode: 0 loss: 11.962965 last_state: [ 2 16 4 1 2] last_reward: 0.09000000000000001 last_acc 0.1 moving_acc 0.009999999999999998learning_rate1e-05
2 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import datetime
4 | from train_target import Conv
5 | from Controller import Policy_network
6 |
7 | log_dir = 'log'
8 |
9 |
10 | MAX_EPISODES = 2500 # maximum episode of trials
11 | MAX_EPOCHS = 5 # maximum of epochs to train target network
12 | EXPLORATION = 0.8 # initial rate of exploration, epsilon greedy
13 | REGULARIZATION = 1e-3 # regularization rate
14 | CONTROLLER_CELLS = 32 # number of cells in rnn controller
15 | LEARNING_RATES = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1] #candidate learning rate for target network
16 |
17 |
18 |
19 | def main():
20 |
21 | global args
22 | sess = tf.Session()
23 | global_step = tf.Variable(0, trainable=False)
24 | learning_rate = tf.train.exponential_decay(0.99, global_step,
25 | 500, 0.96, staircase=True)
26 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
27 |
28 | policy_network = Policy_network(sess, optimizer, global_step,
29 | exploration=EXPLORATION,
30 | reg_param=REGULARIZATION,
31 | controller_cells=CONTROLLER_CELLS)
32 |
33 | step = 0
34 | # define initial input state
35 | state = np.array([[1, 9, 1, 1, 3]], dtype=np.float32)
36 | state = np.expand_dims(state, -1)
37 |
38 | total_rewards = 0
39 | moving_acc = 0.0 # approximate average accurency
40 | beta = 0.9 # parameter to update moving accurency
41 |
42 | for i_episode in range(MAX_EPISODES):
43 | action = policy_network.get_action(state)
44 | print("ca:", action)
45 |
46 | #train target network and get reward (accuracy on hold-out set)
47 | convsess = tf.Session()
48 | target = Conv(convsess)
49 | target_acc_rec = []
50 | for i, lr in enumerate(LEARNING_RATES):
51 | print('train target network with learning rate %d ========>' % lr)
52 | target.train_one_epoche(lr=lr, action=action)
53 | target_acc = target.test()
54 | target_acc_rec.append(target_acc)
55 | best_lr_index = int(np.argmax(target_acc_rec))
56 | best_lr = LEARNING_RATES[best_lr_index]
57 |
58 | print ('train target network with the best learning rate %d ===========>' % best_lr)
59 | target.train(lr=best_lr, action=action, epochs=MAX_EPOCHS)
60 | acc = target.test()
61 | tf.summary.scalar('accurency', acc)
62 | convsess.close()
63 | moving_acc = moving_acc * beta + acc * (1 - beta)
64 | reward = acc - moving_acc
65 |
66 |
67 |
68 | print("reward=====>", reward)
69 |
70 | total_rewards += reward
71 |
72 | # action is equal to state
73 | state = action
74 | policy_network.storeRollout(state, reward)
75 | state = np.expand_dims(state, 0)
76 | state = np.expand_dims(state, -1)
77 |
78 | step += 1
79 | ls = policy_network.train_step(1)
80 | log_str = "current time: " + str(datetime.datetime.now().time()) + " episode: " + \
81 | str(i_episode) + " loss: " + str(ls) + " last_state: " + str(np.squeeze(state)) + " last_reward: " + \
82 | str(reward) + " last_acc " + str(acc) + " moving_acc " + str(moving_acc) + 'learning_rate' + str(best_lr) + "\n"
83 | log = open("lg3.txt", "a+")
84 | log.write(log_str)
85 | log.close()
86 | print(log_str)
87 |
88 |
89 |
90 | if __name__ == '__main__':
91 | main()
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/my_optimizer.py:
--------------------------------------------------------------------------------
1 | from keras.optimizers import Optimizer
2 | from keras import backend as K
3 | from keras.legacy import interfaces
4 | import numpy as np
5 | import tensorflow as tf
6 |
7 | #Search space for action
8 | operands = {1: 'g', 2: 'g2', 3: 'g3', 4: 'm', 5: 'v', 6: 'y', 7: 'sign(g)', 8: 'sign(m)', 9: '1', 10: 'noise',
9 | 11: '10-4w', 12: '10-3w', 13: '10-2w', 14: '10-1w', 15: 'ADAM', 16: 'RMSProp'}
10 |
11 | unarys = {1: '1', 2: '-1', 3: 'exp', 4: 'log', 5: 'clip10-5', 6: 'clip10-4', 7: 'clip10-3', 8: 'drop0.1', 9:'drop0.3',
12 | 10: 'drop0.5', 11: 'sign'}
13 |
14 | binarys = {1: 'add', 2: 'sub', 3: 'mul', 4: 'div', 5: 'keep_left'}
15 |
16 | class my_optimizer(Optimizer):
17 | def __init__(self, lr=0.0001, beta_1=0.9, beta_2=0.999, beta_3=0.999,
18 | epsilon=None, decay=0., amsgrad=False, strings=None, **kwargs):
19 | super(my_optimizer, self).__init__(**kwargs)
20 | with K.name_scope(self.__class__.__name__):
21 | self.iterations = K.variable(0, dtype='int64', name='iterations')
22 | self.lr = K.variable(lr, name='lr')
23 | self.beta_1 = K.variable(beta_1, name='beta_1') #learning rate for m_t
24 | self.beta_2 = K.variable(beta_2, name='beta_2') #learning rate for v_t
25 | self.beta_3 = K.variable(beta_3, name='beta_3') #learning rate for y_t
26 | self.decay = K.variable(decay, name='decay')
27 | if epsilon is None:
28 | epsilon = K.epsilon()
29 | self.epsilon = epsilon
30 | self.initial_decay = decay
31 | self.amsgrad = amsgrad
32 |
33 | print (type(strings))
34 |
35 |
36 | self.op1, self.op2, self.unop1, self.unop2, self.biops = strings
37 |
38 |
39 | @interfaces.legacy_get_updates_support
40 | def get_updates(self, loss, params):
41 |
42 | grads = self.get_gradients(loss, params)
43 | accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
44 | self.updates = [K.update_add(self.iterations, 1)]
45 |
46 | lr = self.lr
47 | if self.initial_decay > 0:
48 | lr *= (1. / (1. + self.decay * K.cast(self.iterations,
49 | K.dtype(self.decay))))
50 |
51 | t = K.cast(self.iterations, K.floatx()) + 1
52 | lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
53 | (1. - K.pow(self.beta_1, t)))
54 |
55 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
56 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
57 | ys = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
58 |
59 |
60 |
61 | if self.amsgrad:
62 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
63 | else:
64 | vhats = [K.zeros(1) for _ in params]
65 | self.weights = [self.iterations] + ms + vs + vhats
66 |
67 | for p, g, m, v, y, vhat, a in zip(params, grads, ms, vs, ys, vhats, accumulators):
68 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
69 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
70 | y_t = (self.beta_3 * y) + (1. - self.beta_3) * K.pow(g, 3)
71 |
72 | new_a = self.beta_1 * a + (1. - self.beta_1) * K.square(g)
73 | self.updates.append(K.update(a, new_a))
74 |
75 | noise = tf.random_normal(
76 | K.int_shape(p),
77 | mean=0.0,
78 | stddev=0.1,
79 | dtype=K.dtype(p),
80 | )
81 |
82 | delta_adam = m_t / (K.sqrt(v_t) + self.epsilon)
83 |
84 | delta_rms = g / (K.sqrt(new_a) + self.epsilon)
85 | ############################################
86 | if self.op1 == 1:
87 | o1 = g
88 | elif self.op1 == 2:
89 | o1 = tf.square(g)
90 | elif self.op1 == 3:
91 | o1 = tf.pow(g, 3)
92 | elif self.op1 == 4:
93 | o1 = m_t
94 | elif self.op1 == 5:
95 | o1 = v_t
96 | elif self.op1 == 6:
97 | o1 = y_t
98 | elif self.op1 == 7:
99 | o1 = K.sign(g)
100 | elif self.op1 == 8:
101 | o1 = K.sign(m_t)
102 | elif self.op1 == 9:
103 | o1 = tf.constant(1, dtype=tf.float32)
104 | elif self.op1 == 10:
105 | o1 = noise
106 | elif self.op1 == 11:
107 | o1 = (10 ** (-4)) * p
108 | elif self.op1 == 12:
109 | o1 = (10 ** (-3)) * p
110 | elif self.op1 == 13:
111 | o1 = (10 ** (-2)) * p
112 | elif self.op1 ==14:
113 | o1 = (10 ** (-1)) * p
114 | elif self.op1 == 15:
115 | o1 = delta_adam
116 | elif self.op1 == 16:
117 | o1 = delta_rms
118 | ####################################
119 | if self.op2 == 1:
120 | o2 = g
121 | elif self.op2 == 2:
122 | o2 = tf.square(g)
123 | elif self.op2 == 3:
124 | o2 = tf.pow(g, 3)
125 | elif self.op2 == 4:
126 | o2 = m_t
127 | elif self.op2 == 5:
128 | o2 = v_t
129 | elif self.op2 == 6:
130 | o2 = y_t
131 | elif self.op2 == 7:
132 | o2 = K.sign(g)
133 | elif self.op2 == 8:
134 | o2 = K.sign(m_t)
135 | elif self.op2 == 9:
136 | o2 = tf.constant(1, dtype=tf.float32)
137 | elif self.op2 == 10:
138 | o2 = noise
139 | elif self.op2 == 11:
140 | o2 = (10 ** (-4)) * p
141 | elif self.op2 == 12:
142 | o2 = (10 ** (-3)) * p
143 | elif self.op2 == 13:
144 | o2 = (10 ** (-2)) * p
145 | elif self.op2 ==14:
146 | o2 = (10 ** (-1)) * p
147 | elif self.op2 == 15:
148 | o2 = delta_adam
149 | elif self.op2 == 16:
150 | o2 = delta_rms
151 | ##############################################
152 | if self.unop1 == 1:
153 | u1 = o1
154 | elif self.unop1 == 2:
155 | u1 = -o1
156 | elif self.unop1 == 3:
157 | u1 = K.exp(o1)
158 | elif self.unop1 == 4:
159 | u1 = K.log(K.abs(o1))
160 | elif self.unop1 == 5:
161 | u1 = K.clip(o1, -(10 ** (-5)), 10 ** (-5))
162 | elif self.unop1 == 6:
163 | u1 = K.clip(o1, -(10 ** (-4)), 10 ** (-4))
164 | elif self.unop1 == 7:
165 | u1 = K.clip(o1, -(10 ** (-3)), 10 ** (-3))
166 | elif self.unop1 -- 8:
167 | u1 = K.dropout(o1,0.9)
168 | elif self.unop1 == 9:
169 | u1 = K.dropout(o1, 0.7)
170 | elif self.unop1 == 10:
171 | u1 = K.dropout(o1, 0.5)
172 | elif self.unop1 == 11:
173 | u1 = K.sign(o1)
174 | ##############################################
175 |
176 | if self.unop2 == 1:
177 | u2 = o2
178 | elif self.unop2 == 2:
179 | u2 = -o2
180 | elif self.unop2 == 3:
181 | u2 = K.exp(o2)
182 | elif self.unop2 == 4:
183 | u2 = K.log(K.abs(o2))
184 | elif self.unop2 == 5:
185 | u2 = K.clip(o2, -(10 ** (-5)), 10 ** (-5))
186 | elif self.unop2 == 6:
187 | u2 = K.clip(o2, -(10 ** (-4)), 10 ** (-4))
188 | elif self.unop2 == 7:
189 | u2 = K.clip(o2, -(10 ** (-3)), 10 ** (-3))
190 | elif self.unop2 == 8:
191 | u2 = K.dropout(o2,0.9)
192 | elif self.unop2 == 9:
193 | u2 = K.dropout(o2, 0.7)
194 | elif self.unop2 == 10:
195 | u2 = K.dropout(o2, 0.5)
196 | elif self.unop2 == 11:
197 | u2 = K.sign(o2)
198 |
199 | #################################################
200 |
201 |
202 | if self.biops == 1:
203 | delta = u1 + u2
204 | elif self.biops == 2:
205 | delta = u1 - u2
206 | elif self.biops == 3:
207 | delta = u1 * u2
208 | elif self.biops == 4:
209 | delta = u1 / (u2 + self.epsilon)
210 | elif self.biops == 5:
211 | delta = u1
212 | ###################################################
213 |
214 |
215 | p_t = p - self.lr * delta
216 |
217 |
218 |
219 |
220 |
221 | self.updates.append(K.update(m, m_t))
222 | self.updates.append(K.update(v, v_t))
223 | self.updates.append(K.update(y, y_t))
224 | new_p = p_t
225 |
226 | # Apply constraints.
227 | if getattr(p, 'constraint', None) is not None:
228 | new_p = p.constraint(new_p)
229 |
230 | self.updates.append(K.update(p, new_p))
231 | return self.updates
232 |
233 |
234 |
235 |
236 | def get_config(self):
237 | config = {'lr': float(K.get_value(self.lr)),
238 | 'beta_1': float(K.get_value(self.beta_1)),
239 | 'beta_2': float(K.get_value(self.beta_2)),
240 | 'decay': float(K.get_value(self.decay)),
241 | 'epsilon': self.epsilon,
242 | 'amsgrad': self.amsgrad}
243 | base_config = super(my_optimizer, self).get_config()
244 | return dict(list(base_config.items()) + list(config.items()))
--------------------------------------------------------------------------------
/train_target.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from keras.datasets import cifar10
3 | from keras.utils import np_utils
4 | from keras.models import Sequential
5 | from keras.layers import Dense, Activation, Convolution2D, MaxPooling2D, Flatten
6 | from keras.layers.normalization import BatchNormalization
7 | from keras.optimizers import Adam, SGD
8 | from my_optimizer import my_optimizer
9 | from keras import backend as K
10 |
11 |
12 |
13 | class Conv():
14 | def __init__(self, convsess):
15 | (X_train, y_train), (X_test, y_test) = cifar10.load_data()
16 |
17 | self.X_train = X_train.astype('float32') / 255
18 | self.X_test = X_test.astype('float32') /255
19 | self.y_train = np_utils.to_categorical(y_train, num_classes=10)
20 | self.y_test = np_utils.to_categorical(y_test, num_classes=10)
21 | K.set_session(convsess)
22 |
23 | self.model = Sequential()
24 |
25 | # Conv layer 1 output shape (32, 32, 32)
26 | self.model.add(Convolution2D(
27 | batch_input_shape=(None, 32, 32, 3),
28 | filters=32,
29 | kernel_size=3,
30 | strides=1,
31 | padding='same', # Padding method
32 | data_format='channels_first',
33 | ))
34 | self.model.add(Activation('relu'))
35 | self.model.add(BatchNormalization())
36 |
37 | # Pooling layer 1 (max pooling) output shape (32, 16, 16)
38 | self.model.add(MaxPooling2D(
39 | pool_size=2,
40 | strides=2,
41 | padding='same', # Padding method
42 | data_format='channels_first',
43 | ))
44 |
45 | # Conv layer 2 output shape (64, 16, 16)
46 | self.model.add(Convolution2D(64, 3, strides=1, padding='same', data_format='channels_first'))
47 | self.model.add(Activation('relu'))
48 | self.model.add(BatchNormalization())
49 |
50 | # Pooling layer 2 (max pooling) output shape (64, 8, 8)
51 | self.model.add(MaxPooling2D(2, 2, 'same', data_format='channels_first'))
52 |
53 | # Fully connected layer 1 input shape (64 * 8 * 8) = (3136), output shape (1024)
54 | self.model.add(Flatten())
55 | self.model.add(Dense(1024))
56 | self.model.add(Activation('relu'))
57 |
58 | # Fully connected layer 2 to shape (10) for 10 classes
59 | self.model.add(Dense(10))
60 | self.model.add(Activation('softmax'))
61 |
62 | def train_one_epoche(self, lr, action):
63 | optimizer = my_optimizer(lr=lr, strings=action)
64 | #optimizer = SGD()
65 | self.model.compile(optimizer=optimizer,
66 | loss='categorical_crossentropy',
67 | metrics=['accuracy'])
68 | print('Training ------------')
69 | self.model.fit(self.X_train, self.y_train, epochs=1, batch_size=64, )
70 |
71 | def train(self, lr, action, epochs=5):
72 | optimizer = my_optimizer(lr=lr, strings=action)
73 | #optimizer = SGD()
74 | self.model.compile(optimizer=optimizer,
75 | loss='categorical_crossentropy',
76 | metrics=['accuracy'])
77 | print('Training ------------')
78 | self.model.fit(self.X_train, self.y_train, epochs=epochs, batch_size=64, )
79 |
80 | def test(self):
81 | print('\nTesting ------------')
82 | loss, accuracy = self.model.evaluate(self.X_test, self.y_test)
83 |
84 | print('\ntest loss: ', loss)
85 | print('\ntest accuracy: ', accuracy)
86 | return accuracy
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------