├── __init__.py
├── datasets
├── babi.py
├── __init__.py
├── text8.py
├── ptb.py
├── utils.py
├── repeat_copy.py
├── priority_sort.py
├── associative_recall.py
└── algorithm_learning.py
├── unit_test
├── __init__.py
├── image
│ ├── figure_4.png
│ └── figure_6.png
├── unit_test.py
├── head_test.py
├── memory_unit_test.py
├── visualization_unit_test.py
├── test_datasets.py
├── lstm2ntm_unit_test.py
└── ntm_unit_test.py
├── algorithm_learning
├── __init__.py
├── figure
│ ├── __init__.py
│ └── generate_figure.py
├── previous_version
│ ├── __init__.py
│ ├── learning_associative_recall.py
│ ├── learning_copy.py
│ ├── learning_priority_sort.py
│ ├── learning_repeat_copy.py
│ ├── learning_associative_recall_review.py
│ └── learning_dynamic_n_grams.py
├── learning_repeat_copy_ntm.py
├── learning_priority_sort_ntm.py
├── learning_associative_recall_ntm.py
├── util.py
├── learning_algorithm_lstm.py
├── learning_associative_recall_lstm.py
├── learning_priority_sort_lstm.py
└── learning_repeat_copy_lstm.py
├── .idea
├── encodings.xml
├── vcs.xml
├── modules.xml
├── misc.xml
└── NTM-Keras-New.iml
├── language_modeling
├── cbt_ntm.py
├── ptb_ntm.py
└── text8_ntm.py
├── question_answering
└── babi_ntm.py
├── controller.py
├── LICENSE
├── util.py
├── README.md
├── main.py
├── head.py
├── memory.py
├── lstm2ntm.py
└── dataset.py
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datasets/babi.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/unit_test/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/algorithm_learning/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/algorithm_learning/figure/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/unit_test/image/figure_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SigmaQuan/NTM-Keras/HEAD/unit_test/image/figure_4.png
--------------------------------------------------------------------------------
/unit_test/image/figure_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SigmaQuan/NTM-Keras/HEAD/unit_test/image/figure_6.png
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/language_modeling/cbt_ntm.py:
--------------------------------------------------------------------------------
1 | """
2 | Learning word-level language modeling on CBT data set with NTM.
3 | """
4 |
5 | import os
6 |
7 |
8 | FOLDER = "experiment_results/cbt/"
9 | if not os.path.isdir(FOLDER):
10 | os.makedirs(FOLDER)
11 | print("create folder: %s" % FOLDER)
12 |
--------------------------------------------------------------------------------
/language_modeling/ptb_ntm.py:
--------------------------------------------------------------------------------
1 | """
2 | Learning word-level language modeling on PTB data set with NTM.
3 | """
4 |
5 | import os
6 |
7 |
8 | FOLDER = "experiment_results/ptb/"
9 | if not os.path.isdir(FOLDER):
10 | os.makedirs(FOLDER)
11 | print("create folder: %s" % FOLDER)
12 |
--------------------------------------------------------------------------------
/question_answering/babi_ntm.py:
--------------------------------------------------------------------------------
1 | """
2 | Learning question answering on bAbI data set with NTM.
3 | """
4 |
5 | import os
6 |
7 |
8 | FOLDER = "experiment_results/babi/"
9 | if not os.path.isdir(FOLDER):
10 | os.makedirs(FOLDER)
11 | print("create folder: %s" % FOLDER)
12 |
13 |
--------------------------------------------------------------------------------
/language_modeling/text8_ntm.py:
--------------------------------------------------------------------------------
1 | """
2 | Learning word-level language modeling on text8 data set with NTM.
3 | """
4 |
5 | import os
6 |
7 |
8 | FOLDER = "experiment_results/text8/"
9 | if not os.path.isdir(FOLDER):
10 | os.makedirs(FOLDER)
11 | print("create folder: %s" % FOLDER)
12 |
13 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/unit_test/unit_test.py:
--------------------------------------------------------------------------------
1 | import mahotas as mh
2 | from matplotlib import pyplot as plt
3 | import numpy as np
4 |
5 |
6 | iteration = 2
7 | image = mh.imread("../experiment/copy_data_predict_%3d.png"%iteration)
8 | plt.imshow(image)
9 | plt.show()
10 |
11 | image = mh.colors.rgb2gray(image, dtype=np.uint8)
12 | plt.imshow(image)
--------------------------------------------------------------------------------
/algorithm_learning/learning_repeat_copy_ntm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """An implementation of learning priority sort algorithm_learning with NTM.
3 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
4 | Input dimension: "8"
5 | Output sequence length: equal to input sequence length.
6 | Output dimension: equal to input dimension.
7 | """
8 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/algorithm_learning/learning_priority_sort_ntm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """An implementation of learning priority sort algorithm_learning with NTM.
3 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
4 | Input dimension: "8"
5 | Output sequence length: equal to input sequence length.
6 | Output dimension: equal to input dimension.
7 | """
8 |
9 |
--------------------------------------------------------------------------------
/algorithm_learning/learning_associative_recall_ntm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """An implementation of learning associative recall algorithm_learning with NTM.
3 | Input sequence length: "2 ~ 6 items: (2*(3+1) ~ 6*(3+1))."
4 | Input dimension: "6+2", Item 3*6 bits
5 | Output sequence length: "3" one item .
6 | Output dimension: equal to input dimension.
7 | """
8 |
9 |
--------------------------------------------------------------------------------
/datasets/text8.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | import os
4 |
5 |
6 | def load_data(path='simple-examples.tgz'):
7 | """Loads the Text8 dataset.
8 |
9 | # Arguments
10 | path: path where to cache the dataset locally
11 | (relative to ~/.keras/datasets).
12 |
13 | # Returns
14 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
15 | """
--------------------------------------------------------------------------------
/datasets/ptb.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | import os
4 |
5 |
6 | def load_data(path='simple-examples.tgz'):
7 | """Loads the Penn Treebank (PTB) dataset.
8 |
9 | # Arguments
10 | path: path where to cache the dataset locally
11 | (relative to ~/.keras/datasets).
12 |
13 | # Returns
14 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
15 | """
--------------------------------------------------------------------------------
/.idea/NTM-Keras-New.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/unit_test/head_test.py:
--------------------------------------------------------------------------------
1 | # import keras.backend as K
2 | #
3 | # kvar = K.zeros((3, 54))
4 | # print('dimension: %d' % K.ndim(kvar))
5 | # print('data type: %s' % K.dtype(kvar))
6 | # print('total parameters: %d' % K.count_params(kvar))
7 | #
8 | # x = K.random_uniform_variable(shape=(2, 3), low=0, high=1) #
9 | # y = K.ones((4, 3, 5))
10 | # dot_x_y = K.dot(x, y)
11 | # print(K.int_shape(x))
12 | # print(K.int_shape(y))
13 | # print(K.int_shape(K.zeros_like(dot_x_y)))
14 | # # print('(2, 3) * (4, 3, 5) -> ', K.int_shape(dot_x_y))
15 |
16 | import babel
17 |
--------------------------------------------------------------------------------
/algorithm_learning/util.py:
--------------------------------------------------------------------------------
1 | """
2 | Add class LossHistory for recoding the history information of loss and
3 | accuracy during the training processing.
4 | """
5 | # import matplotlib.pyplot as plt
6 | # import numpy as np
7 |
8 | from keras.callbacks import Callback # Add by Steven Robot
9 | # from keras import backend as K
10 |
11 |
12 | class LossHistory(Callback):
13 | def on_train_begin(self, logs={}):
14 | self.losses = []
15 | self.acces = []
16 |
17 | def on_batch_end(self, batch, logs={}):
18 | self.losses.append(logs.get('loss'))
19 | self.acces.append(logs.get('acc'))
20 |
--------------------------------------------------------------------------------
/controller.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from keras.engine.topology import Layer
3 | import numpy as np
4 |
5 |
6 | class Controller(Layer):
7 | def __init__(self, output_dim, **kwargs):
8 | self.output_dim = output_dim
9 | super(Controller, self).__init__(**kwargs)
10 |
11 | def build(self, input_shape, mem_shape, n_heads, hidden_dim):
12 | input_dim = input_shape[1]
13 | initial_weight_value = np.random.random((input_dim, self.output_dim))
14 | self.W = K.variable(initial_weight_value)
15 | self.trainable_weights = [self.W]
16 | self.mem_shape = mem_shape
17 | self.Memory = np.zeros((mem_shape[0], mem_shape[1]))
18 |
--------------------------------------------------------------------------------
/datasets/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | import random
4 | import time
5 |
6 |
7 | def initialize_random_seed():
8 | np.random.seed(time.time())
9 | random.seed(time.time())
10 |
11 |
12 | def generate_random_binomial_(row, col):
13 | return np.random.binomial(
14 | 1, 0.5, (row, col)).astype(np.uint8)
15 |
16 |
17 | def generate_weightings(row, col):
18 | write_weightings = np.zeros((row, col), dtype=np.float32)
19 | read_weightings = np.zeros((row, col), dtype=np.float32)
20 | r = (row * 3) / 4
21 | for i in np.arange(0, col/2):
22 | write_weightings[r][i] = 1
23 | read_weightings[r][i + col/2] = 1
24 | r -= 1
25 |
26 | return write_weightings, read_weightings
27 |
--------------------------------------------------------------------------------
/unit_test/memory_unit_test.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | import theano.tensor as T
3 | import theano
4 | import memory
5 | import head
6 | #
7 | # number_of_memory_locations = 6
8 | # memory_vector_size = 3
9 | #
10 | # memory_t = memory.initial(number_of_memory_locations, memory_vector_size)
11 | #
12 | # weight_t = K.random_binomial((number_of_memory_locations, 1), 0.2)
13 | #
14 | # read_vector = head.reading(memory_t, weight_t)
15 | #
16 | # print memory_t.shape
17 | # print weight_t.shape
18 | # print read_vector
19 | #
20 |
21 |
22 | import numpy as np
23 |
24 | u = np.random.random((3))
25 | V = np.random.random((3, 4))
26 | similairty = np.dot(u, V) / (sum(abs(u)) * np.sum(abs(V), axis=0))
27 | print("u")
28 | print(u)
29 | print("V")
30 | print(V)
31 | print("similairty")
32 | print(similairty)
33 |
--------------------------------------------------------------------------------
/algorithm_learning/learning_algorithm_lstm.py:
--------------------------------------------------------------------------------
1 | """
2 | Run three algorithm learning tasks: "repeat copy", "associative recall" and
3 | "priority sort" one by one.
4 | """
5 |
6 | print("Learning algorithm_learning begin:")
7 | print("copy:")
8 | import learning_repeat_copy_lstm
9 |
10 | print("recall:")
11 | import learning_associative_recall_lstm
12 |
13 | print("sort:")
14 | import learning_priority_sort_lstm
15 | print("Learning algorithm_learning end.")
16 |
17 | #
18 | # def copy():
19 | # learning_repeat_copy_lstm
20 | #
21 | #
22 | # def recall():
23 | # learning_associative_recall_lstm
24 | #
25 | #
26 | # def sort():
27 | # learning_priority_sort_lstm
28 | #
29 | #
30 | # if __name__ == "__main__":
31 | # print("copy:")
32 | # copy()
33 | # print("recall:")
34 | # recall()
35 | # print("sort:")
36 | # sort()
37 |
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Zhibin Quan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | """
2 | contourf.
3 | """
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 |
7 | from keras.callbacks import Callback # Add by Steven Robot
8 | from keras import backend as K
9 |
10 |
11 | class LossHistory(Callback):
12 | def on_train_begin(self, logs={}):
13 | self.losses = []
14 | self.acces = []
15 |
16 | def on_batch_end(self, batch, logs={}):
17 | self.losses.append(logs.get('loss'))
18 | self.acces.append(logs.get('acc'))
19 |
20 | def show(w, w_title):
21 | """
22 | Show a weight matrix.
23 | :param w: the weight matrix.
24 | :param w_title: the title of the weight matrix
25 | :return: None.
26 | """
27 | # show w_z matrix of update gate.
28 | axes_w = plt.gca()
29 | plt.imshow(w)
30 | plt.colorbar()
31 | # plt.colorbar(orientation="horizontal")
32 | plt.xlabel("$w_{1}$")
33 | plt.ylabel("$w_{2}$")
34 | axes_w.set_xticks([])
35 | axes_w.set_yticks([])
36 | matrix_size = "$:\ %d \\times\ %d$" % (len(w[0]), len(w))
37 | w_title += matrix_size
38 | plt.title(w_title)
39 |
40 | # show the matrix.
41 | plt.show()
42 |
43 | if __name__ == "__main__":
44 | w = np.random.random((8, 10))
45 | title = " "
46 | show(w, title)
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Neural Turing Machines
2 |
3 | An attempt at replicating ["Neural Turing Machines"](http://arxiv.org/abs/1410.5401) (by Alex Graves, Greg Wayne, and Ivo Danihelka) in Keras.
4 |
5 |
6 | ## Prerequisites
7 | - Python 2.7
8 | - [Numpy 1.12.0](https://pypi.python.org/pypi/numpy)
9 | - [Scipy 0.18.1](https://pypi.python.org/pypi/scipy/)
10 | - [Theano 0.8.2](http://deeplearning.net/software/theano/)
11 | - [Keras 1.2.0](https://github.com/fchollet/keras)
12 | - [CUDA 8.0](https://developer.nvidia.com/cuda-downloads)
13 | - [cuDNN 5.0](https://developer.nvidia.com/cudnn)
14 | - [Matplotlib 1.5.3](http://matplotlib.org/1.5.3/index.html)
15 |
16 |
17 |
18 | ## Results
19 | ### Algorithms Learning
20 | **Repeat Copy**
21 | 
22 | **NTM Memory Use During the Copy Task**
23 | 
24 |
25 | **Associative Recall**
26 | (in progress)
27 |
28 | **Priority Sort**
29 | (in progress)
30 |
31 |
32 | ## Usage
33 | To train a repeat copy task with LSTM:
34 | ```
35 | $ python learning_repeat_copy_lstm.py
36 | ```
37 |
38 | To train a associative recall task with LSTM:
39 | ```
40 | $ python learning_associative_recall_lstm.py
41 | ```
42 |
43 | To train a priority sort task with LSTM:
44 | ```
45 | $ python learning_priority_sort_lstm.py
46 | ```
47 |
48 | To train three different tasks one by one with LSTM:
49 | ```
50 | $ python learning_algorithm_lstm.py
51 | ```
52 |
53 | ## Other NTM Implementations
54 | - carpedm20/NTM-tensorflow. Check out: https://github.com/carpedm20/NTM-tensorflow
55 | - shawntan/neural-turing-machines. Check out: https://github.com/shawntan/neural-turing-machines
56 | - snipsco/ntm-lasagne. Check out: https://github.com/snipsco/ntm-lasagne
57 |
58 |
59 | ## Future works
60 | - Training NTM to learning *repeat copy*.
61 | - Training NTM to learning *associative recall*.
62 | - Training NTM to learning *dynamical n-grams*.
63 | - Training NTM to learning *priority sort*.
64 | - Using NTM for other natural language processing tasks such as neural language model.
65 |
66 |
67 | ## Author
68 | Zhibin Quan / [@SigmaQuan](https://github.com/SigmaQuan)
--------------------------------------------------------------------------------
/datasets/repeat_copy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | from utils import initialize_random_seed
4 |
5 |
6 | # Initialize the random seed
7 | initialize_random_seed()
8 |
9 |
10 | def generate_one_sample(dimension, sequence_length, repeat_times):
11 | """Generate one sample of repeat copy algorithm.
12 |
13 | # Arguments
14 | dimension: the dimension of each input output tokens.
15 | sequence_length: the length of input sequence, i.e. the number of
16 | input tokens.
17 | repeat_times: repeat times of output.
18 |
19 | # Returns
20 | input_sequence: the input sequence of a sample.
21 | output_sequence: the output sequence of a sample.
22 | """
23 | # produce random sequence
24 | sequence = np.random.binomial(
25 | 1, 0.5, (sequence_length, dimension - 1)).astype(np.uint8)
26 |
27 | # allocate space for input sequence and output sequence
28 | input_sequence = np.zeros(
29 | (sequence_length + 1 + sequence_length * repeat_times, # + 1
30 | dimension),
31 | dtype=np.bool)
32 | output_sequence = np.zeros(
33 | (sequence_length + 1 + sequence_length * repeat_times, # + 1
34 | dimension),
35 | dtype=np.bool)
36 |
37 | # set value of input sequence
38 | input_sequence[:sequence_length, :-1] = sequence
39 | # input_sequence[sequence_length, -1] = repeat_times
40 | input_sequence[sequence_length, -1] = 1
41 |
42 | # set value of output sequence ## sequence_length + 1
43 | output_sequence[sequence_length+1:, :-1] = \
44 | np.tile(sequence, (repeat_times, 1))
45 | # "1": A special flag which indicate the begin of the output
46 | # output_sequence[sequence_length, -1] = 1
47 |
48 | # return the sample
49 | return input_sequence, output_sequence
50 |
51 |
52 | def generate_data_set(
53 | dimension,
54 | max_length_of_original_sequence,
55 | max_repeat_times,
56 | data_set_size):
57 | """Generate samples for learning repeat copy algorithm.
58 |
59 | # Arguments
60 | dimension: the dimension of each input output tokens.
61 | max_length_of_original_sequence: the max length of original sequence.
62 | max_repeat_times: the maximum repeat times.
63 | data_set_size: the size of total samples.
64 |
65 | # Returns
66 | input_sequences: the input sequences of total samples.
67 | output_sequences: the output sequences of total samples.
68 | repeat_times: the repeat times of each output sequence of total
69 | samples.
70 | """
71 | # produce random sequence lengths from uniform distribution
72 | # [1, max_length]
73 | sequence_lengths = np.random.randint(
74 | 1, max_length_of_original_sequence + 1, data_set_size)
75 |
76 | # produce random repeat times from uniform distribution
77 | # [1, max_repeat_times]
78 | repeat_times = np.random.randint(1, max_repeat_times + 1, data_set_size)
79 | input_sequences = np.zeros(
80 | (data_set_size,
81 | max_length_of_original_sequence * (max_repeat_times + 1) + 1, # + 1
82 | dimension),
83 | dtype=np.bool)
84 | output_sequences = np.zeros(
85 | (data_set_size,
86 | max_length_of_original_sequence * (max_repeat_times + 1) + 1, # + 1
87 | dimension),
88 | dtype=np.bool)
89 |
90 | # set the value for input sequences and output sequences
91 | for i in range(data_set_size):
92 | input_sequence, output_sequence = generate_one_sample(
93 | dimension, sequence_lengths[i], repeat_times[i])
94 | input_sequences[i, :sequence_lengths[i]*(repeat_times[i]+1)+1] = \
95 | input_sequence
96 | output_sequences[i, :sequence_lengths[i]*(repeat_times[i]+1)+1] = \
97 | output_sequence
98 |
99 | # return total samples
100 | return input_sequences, output_sequences, repeat_times
101 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import struct
2 | import numpy as np
3 |
4 | import random
5 |
6 | #
7 | # id_i = 0
8 | # value_i = 0.0
9 | # id_i_1 = 0
10 | # value_i_1 = 1.0
11 | # number = 0
12 | # with open("sort.dat", "r") as f:
13 | # for line in f:
14 | # line = line.replace('(', '')
15 | # line = line.replace(')', '')
16 | # line = line.replace(' ', '')
17 | # line = line.replace('\n', '')
18 | # # print line
19 | # id_value = line.split(',')
20 | #
21 | # if len(id_value) < 2:
22 | # continue
23 | # # print id_value
24 | #
25 | # id = int(id_value[0])
26 | # id = id*128/1024
27 | # value = float(id_value[1])
28 | # # print(id)
29 | # # print(value)
30 | #
31 | # if (id_i == id):
32 | # value_i = value_i + value
33 | # number = number + 1
34 | # elif (id < 2000):
35 | #
36 | # if (value_i/number < (value_i_1 - 0.009) and id_i < 100) or ((id_i > (id_i_1 + 20)) and id_i > 100) :
37 | # print '({0}, {1})'.format(id_i_1+1, value_i_1)
38 | # id_i_1 = id_i
39 | # value_i_1 = value_i/number
40 | #
41 | # id_i = id
42 | # value_i = value
43 | # number = 1
44 | #
45 | #
46 | #
47 | # id_i = 0
48 | # value_i = 0.0
49 | # id_i_1 = 0
50 | # value_i_1 = 1.0
51 | # number = 0
52 | # with open("recall.dat", "r") as f:
53 | # for line in f:
54 | # line = line.replace('(', '')
55 | # line = line.replace(')', '')
56 | # line = line.replace(' ', '')
57 | # line = line.replace('\n', '')
58 | # # print line
59 | # id_value = line.split(',')
60 | #
61 | # if len(id_value) < 2:
62 | # continue
63 | # # print id_value
64 | #
65 | # id = int(id_value[0])
66 | # # id = id*128/1024
67 | # value = float(id_value[1])
68 | # # print(id)
69 | # # print(value)
70 | #
71 | # id_i = id
72 | # value_i = value
73 | #
74 | # if (value_i < (value_i_1 - 0.03) and id_i < 100) or ((id_i > (id_i_1 + 40)) and id_i > 100):
75 | # print '({0}, {1})'.format(id_i_1, value_i_1)
76 | # id_i_1 = id_i
77 | # value_i_1 = value_i
78 | #
79 | #
80 |
81 | #
82 | # id_i = 0
83 | # value_i = 0.0
84 | # id_i_1 = 0
85 | # value_i_1 = 1.0
86 | # number = 0
87 | # with open("copy.dat", "r") as f:
88 | # for line in f:
89 | # line = line.replace('(', '')
90 | # line = line.replace(')', '')
91 | # line = line.replace(' ', '')
92 | # line = line.replace('\n', '')
93 | # # print line
94 | # id_value = line.split(',')
95 | #
96 | # if len(id_value) < 2:
97 | # continue
98 | # # print id_value
99 | #
100 | # id = int(id_value[0])
101 | # id = id*128/1024
102 | # value = float(id_value[1])
103 | # # print(id)
104 | # # print(value)
105 | #
106 | # if (id_i == id):
107 | # value_i = value_i + value
108 | # number = number + 1
109 | # elif (id < 2000):
110 | #
111 | # if (value_i/number < (value_i_1 - 0.009) and id_i < 100) or ((id_i > (id_i_1 + 20)) and id_i > 100) :
112 | # print '({0}, {1})'.format(id_i_1+1, value_i_1)
113 | # id_i_1 = id_i
114 | # value_i_1 = value_i/number
115 | #
116 | # id_i = id
117 | # value_i = value
118 | # number = 1
119 |
120 |
121 | import numpy as np
122 | import matplotlib.pyplot as plt
123 | from mpl_toolkits.mplot3d import Axes3D
124 |
125 | fig = plt.figure()
126 | ax = Axes3D(fig)
127 | N = np.arange(100, 4000, 20)
128 | p = np.arange(0.01, 0.99, 0.02)
129 | N, p = np.meshgrid(N, p)
130 | # R = np.sqrt(N ** 2 + p ** 2)
131 | # D = np.sin(R)
132 | D = N/p
133 |
134 | ax.plot_surface(N, p, D, rstride=1, cstride=1, cmap=plt.cm.hot)
135 | ax.contourf(N, p, D, zdir='z', offset=-2, cmap=plt.cm.hot)
136 | # ax.set_zlim(-2, 2)
137 |
138 | # savefig('../figures/plot3d_ex.png',dpi=48)
139 | plt.show()
140 |
--------------------------------------------------------------------------------
/head.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | import theano.tensor as T
3 |
4 |
5 | def batch_reading(head_num, memory_size, memory_dim, memory_t, weight_t):
6 | """
7 | Reading memory.
8 | :param head_num:
9 | :param memory_size:
10 | :param memory_dim:
11 | :param memory_t: the $N \times M$ memory matrix at time $t$, where $N$
12 | is the number of memory locations, and $M$ is the vector size at each
13 | location.
14 | :param weight_t: $w_t$ is a vector of weightings over the $N$ locations
15 | emitted by a reading head at time $t$.
16 |
17 | Since all weightings are normalized, the $N$ elements $w_t(i)$ of
18 | $\textbf{w}_t$ obey the following constraints:
19 | $$\sum_{i=1}^{N} w_t(i) = 1, 0 \le w_t(i) \le 1,\forall i$$
20 |
21 | The length $M$ read vector $r_t$ returned by the head is defined as a
22 | convex combination of the row-vectors $M_t(i)$ in memory:
23 | $$\textbf{r}_t \leftarrow \sum_{i=1}^{N}w_t(i)\textbf{M}_t(i)$$
24 | :return: the content reading from memory.
25 | """
26 | r_t_list = K.zeros_like((head_num * memory_dim, 1))
27 |
28 | for i in xrange(head_num):
29 | begin = i * memory_size
30 | end = begin + memory_size
31 | r_t = reading(memory_t, weight_t[begin:end])
32 | r_t_list[begin:end] = r_t
33 |
34 | return r_t_list
35 |
36 |
37 | def reading(memory_t, weight_t):
38 | """
39 | Reading memory.
40 | :param memory_t: the $N \times M$ memory matrix at time $t$, where $N$
41 | is the number of memory locations, and $M$ is the vector size at each
42 | location.
43 | :param weight_t: $w_t$ is a vector of weightings over the $N$ locations
44 | emitted by a reading head at time $t$.
45 |
46 | Since all weightings are normalized, the $N$ elements $w_t(i)$ of
47 | $\textbf{w}_t$ obey the following constraints:
48 | $$\sum_{i=1}^{N} w_t(i) = 1, 0 \le w_t(i) \le 1,\forall i$$
49 |
50 | The length $M$ read vector $r_t$ returned by the head is defined as a
51 | convex combination of the row-vectors $M_t(i)$ in memory:
52 | $$\textbf{r}_t \leftarrow \sum_{i=1}^{N}w_t(i)\textbf{M}_t(i)$$
53 | :return: the content reading from memory.
54 | """
55 | r_t = K.dot(memory_t, weight_t)
56 | return r_t
57 |
58 |
59 | def batch_writing(
60 | head_num, memory_size, memory_dim, memory_t_1,
61 | weight_t, eraser_t, adder_t):
62 | memory_t = memory_t_1
63 |
64 | for i in xrange(head_num):
65 | # get the addressing for writing
66 | address_begin = i * memory_size
67 | address_end = address_begin + memory_size
68 | content_begin = i * memory_dim
69 | content_end = content_begin + memory_dim
70 | memory_t = writing(
71 | memory_t_1,
72 | weight_t[address_begin:address_end],
73 | eraser_t[content_begin:content_end],
74 | adder_t[content_begin:content_end])
75 | memory_t_1 = memory_t
76 |
77 | return memory_t
78 |
79 |
80 | def writing(memory_t_1, weight_t, eraser_t, adder_t):
81 | """
82 | Each writing process contain two parts: an erase followed by an add.
83 | :param memory_t_1: the $N \times M$ memory matrix at time $t-1$, where $N$
84 | is the number of memory locations, and $M$ is the vector size at each
85 | location.
86 | :param weight_t: $w_t$ is a vector of weightings over the $N$ locations
87 | emitted by a writing head at time $t$.
88 | :param eraser_t:
89 | :param adder_t:
90 | :return:
91 | """
92 | # erase
93 | _memory_t = erase(memory_t_1, weight_t, eraser_t)
94 |
95 | # add
96 | memory_t = add(_memory_t, weight_t, adder_t)
97 | return memory_t
98 |
99 |
100 | def erase(memory_t_1, weight_t, eraser_t):
101 | '''
102 |
103 | :param memory_t_1:
104 | :param weight_t:
105 | :param eraser_t:
106 | :return:
107 | '''
108 | memory = memory_t_1 - T.outer(eraser_t, weight_t)
109 | # memory = memory_t_1 * (1 - weight_t * eraser_t)
110 | return memory
111 |
112 |
113 | def add(_memory_t, weight_t, adder_t):
114 | '''
115 |
116 | :param _memory_t:
117 | :param weight_t:
118 | :param adder_t:
119 | :return:
120 | '''
121 | memory_t = _memory_t + T.outer(adder_t, weight_t)
122 | # memory_t = _memory_t + weight_t * adder_t
123 | return memory_t
124 |
--------------------------------------------------------------------------------
/datasets/priority_sort.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | from utils import initialize_random_seed
4 |
5 |
6 | # Initialize the random seed
7 | initialize_random_seed()
8 |
9 |
10 | def generate_one_sample(
11 | dimension,
12 | input_sequence_length,
13 | output_sequence_length,
14 | priority_lower_bound,
15 | priority_upper_bound):
16 | """Generate one sample of priority sort algorithm.
17 |
18 | Arguments
19 | dimension: the dimension of input output sequences.
20 | input_sequence_length: the length of input sequence.
21 | output_sequence_length: the length of output sequence.
22 | priority_lower_bound: the lower bound of priority.
23 | priority_upper_bound: the upper bound of priority.
24 |
25 | Returns
26 | input_sequence: the input sequence of a sample.
27 | output_sequence: the output sequence of a sample.
28 | """
29 | sequence = input_sequence_length + output_sequence_length + 1
30 | input_sequence = np.random.binomial(
31 | 1, 0.5, (input_sequence_length, dimension + 1)).astype(np.uint8)
32 | output_sequence = np.zeros(
33 | (output_sequence_length, dimension + 1), dtype=np.uint8)
34 | input_priority = np.random.uniform(priority_lower_bound,
35 | priority_upper_bound,
36 | (input_sequence_length, 1))
37 | output_priority = sorted(
38 | input_priority, reverse=True)[:output_sequence_length]
39 | pair = [(input_sequence[i], input_priority[i])
40 | for i in range(input_sequence_length)]
41 | sorted_input_sequence = sorted(
42 | pair, key=lambda prior: prior[1], reverse=True)
43 | for i in range(output_sequence_length):
44 | output_sequence[i] = sorted_input_sequence[i][0]
45 |
46 | input_sequence_ = np.zeros((sequence, dimension + 2), dtype=np.float32)
47 | input_priority_ = np.zeros((sequence, 1), dtype=np.float32)
48 | output_sequence_ = np.zeros((sequence, dimension + 2), dtype=np.float32)
49 | output_priority_ = np.zeros((sequence, 1), dtype=np.float32)
50 |
51 | input_sequence_[:input_sequence_length, :-1] = input_sequence
52 | input_sequence_[input_sequence_length][-1] = 1
53 | input_priority_[:input_sequence_length] = input_priority
54 | output_sequence_[input_sequence_length+1:sequence, :-1] = output_sequence
55 | output_priority_[input_sequence_length+1:sequence] = output_priority
56 |
57 | # return input sequence, priority of each input, output sequence, priority
58 | # of each output
59 | return input_sequence_, input_priority_, output_sequence_, output_priority_
60 |
61 |
62 | def generate_data_set(
63 | dimension,
64 | input_sequence_length,
65 | output_sequence_length,
66 | priority_lower_bound,
67 | priority_upper_bound,
68 | data_set_size):
69 | """Generate samples for learning priority sort algorithm.
70 |
71 | Arguments
72 | dimension: the dimension of input output sequences.
73 | input_sequence_length: the length of input sequence.
74 | output_sequence_length: the length of output sequence.
75 | priority_lower_bound: the lower bound of priority.
76 | priority_upper_bound: the upper bound of priority.
77 | data_set_size: the size of one episode.
78 |
79 | Returns
80 | input_sequence: the input sequence of a sample.
81 | output_sequence: the output sequence of a sample.
82 | """
83 | sequence_length = input_sequence_length + output_sequence_length
84 | input_sequences = np.zeros(
85 | (data_set_size, sequence_length + 1, dimension + 2), dtype=np.float32)
86 | output_sequences = np.zeros(
87 | (data_set_size, sequence_length + 1, dimension + 2), dtype=np.float32)
88 | for i in range(data_set_size):
89 | input_sequence, input_priority, output_sequence, output_priority = \
90 | generate_one_sample(
91 | dimension,
92 | input_sequence_length,
93 | output_sequence_length,
94 | priority_lower_bound,
95 | priority_upper_bound)
96 | input_sequences[i] = input_sequence
97 | output_sequences[i] = output_sequence
98 | input_sequences[i][:, -2] = input_priority.transpose()
99 | output_sequences[i][:, -2] = output_priority.transpose()
100 |
101 | # return the total samples
102 | return input_sequences, output_sequences
103 |
--------------------------------------------------------------------------------
/datasets/associative_recall.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | from utils import initialize_random_seed
4 |
5 |
6 | # Initialize the random seed
7 | initialize_random_seed()
8 |
9 |
10 | def generate_items(dimension, item_size, episode_size):
11 | """Generate items of associative recall algorithm.
12 |
13 | # Arguments
14 | dimension: the dimension of input output sequences.
15 | item_size: the size of items.
16 | episode_size: the size of one episode.
17 |
18 | # Returns
19 | items: the generated item.
20 | """
21 | inner_item = np.random.binomial(
22 | 1, 0.5, ((item_size + 1) * episode_size, dimension)
23 | ).astype(np.uint8)
24 | items = np.zeros(((item_size + 1) * episode_size, dimension + 2),
25 | dtype=np.uint8)
26 | # items = np.zeros(((item_size + 1) * episode_size, dimension + 2),
27 | # dtype=np.bool)
28 | items[:, :-2] = inner_item
29 |
30 | separator = np.zeros((1, dimension + 2), dtype=np.uint8)
31 | # separator = np.zeros((1, dimension + 2), dtype=np.bool)
32 | separator[0][-2] = 1
33 | items[:(item_size + 1) * episode_size:(item_size + 1)] = separator[0]
34 |
35 | # return one items for associative recall
36 | return items
37 |
38 |
39 | def generate_one_sample(
40 | dimension, item_size, episode_size, max_episode_size):
41 | """Generate one sample of associative recall algorithm.
42 |
43 | Arguments
44 | dimension: the dimension of input output sequences.
45 | item_size: the size of one item.
46 | episode_size: the size of one episode.
47 | max_episode_size: the maximum episode size.
48 |
49 | Returns
50 | input_sequence: the input sequence of a sample.
51 | output_sequence: the output sequence of a sample.
52 | """
53 | sequence_length = (item_size+1) * (max_episode_size+2)
54 | input_sequence = np.zeros(
55 | (sequence_length, dimension + 2), dtype=np.uint8)
56 | # input_sequence = np.zeros(
57 | # (sequence_length, dimension + 2), dtype=np.bool)
58 | input_sequence[:(item_size + 1) * episode_size] = \
59 | generate_items(
60 | dimension, item_size, episode_size)
61 |
62 | separator = np.zeros((1, dimension + 2), dtype=np.uint8)
63 | # separator = np.zeros((1, dimension + 2), dtype=np.bool)
64 | separator[0][-2] = 1
65 | query_index = np.random.randint(0, episode_size-1)
66 |
67 | input_sequence[(item_size+1)*episode_size:(item_size+1)*(episode_size+1)] = \
68 | input_sequence[(item_size+1)*query_index:(item_size+1)*(query_index+1)]
69 | input_sequence[(item_size+1)*episode_size][-2] = 0
70 | input_sequence[(item_size+1)*episode_size][-1] = 1
71 | input_sequence[(item_size+1)*(episode_size+1)][-1] = 1
72 |
73 | output_sequence = np.zeros(
74 | (sequence_length, dimension + 2), dtype=np.uint8)
75 | # output_sequence = np.zeros(
76 | # (sequence_length, dimension + 2), dtype=np.bool)
77 | output_sequence[(item_size+1)*(episode_size+1):(item_size+1)*(episode_size+2)] = \
78 | input_sequence[(item_size+1)*(query_index+1):(item_size+1)*(query_index+2)]
79 | output_sequence[(item_size+1)*(episode_size+1)][-2] = 0
80 |
81 | # return one sample for associative recall
82 | return input_sequence, output_sequence
83 |
84 |
85 | def generate_data_set(
86 | dimension, item_size, max_episode_size, data_set_size):
87 | """Generate samples for learning associative recall algorithm.
88 |
89 | Arguments
90 | dimension: the dimension of input output sequences.
91 | item_size: the size of one item.
92 | max_episode_size: the maximum episode size.
93 | data_set_size: the size of one episode.
94 |
95 | Returns
96 | input_sequences: the input sequences of total samples.
97 | output_sequences: the output sequences of total samples.
98 | """
99 | episode_size = np.random.randint(2, max_episode_size + 1, data_set_size)
100 | sequence_length = (item_size+1) * (max_episode_size+2)
101 | input_sequences = np.zeros(
102 | (data_set_size, sequence_length, dimension + 2), dtype=np.uint8)
103 | output_sequences = np.zeros(
104 | (data_set_size, sequence_length, dimension + 2), dtype=np.uint8)
105 | # input_sequences = np.zeros(
106 | # (training_size, sequence_length, dimension + 2), dtype=np.bool)
107 | # output_sequences = np.zeros(
108 | # (training_size, sequence_length, dimension + 2), dtype=np.bool)
109 | for i in range(data_set_size):
110 | input_sequence, output_sequence = generate_one_sample(
111 | dimension, item_size, episode_size[i], max_episode_size)
112 | input_sequences[i] = input_sequence
113 | output_sequences[i] = output_sequence
114 |
115 | # return the total samples
116 | return input_sequences, output_sequences
117 |
--------------------------------------------------------------------------------
/unit_test/visualization_unit_test.py:
--------------------------------------------------------------------------------
1 | """
2 | contourf.
3 | """
4 | import numpy as np
5 | import visualization
6 | import dataset
7 |
8 |
9 | def test_show_matrix():
10 | w = np.random.random((8, 10))
11 | title = " "
12 | visualization.show(w, title)
13 |
14 |
15 | def test_show_multi_matrix():
16 | w = np.random.random((200, 300))
17 | w_z = np.random.random((200, 300))
18 | u_z = np.random.random((200, 250))
19 | w_r = w
20 | u_r = w
21 | w_h = w
22 | u_h = w
23 | w_z_title = "$Update\ gate: $\n $z_{t} = \sigma(W^{(z)}x_{t}+U^{(z)}h_{t-1}+b^{(z)})$\n $W^{(z)}$"
24 | u_z_title = "$Update\ gate: $\n $z_{t} = \sigma(W^{(z)}x_{t}+U^{(z)}h_{t-1}+b^{(z)})$\n $U^{(z)}$"
25 | w_r_title = "$Reset\ gate: $\n $r_{t} = \sigma(W^{(r)}x_{t}+U^{(r)}h_{t-1}+b^{(r)})$\n $W^{(r)}$"
26 | u_r_title = "$Reset\ gate: $\n $r_{t} = \sigma(W^{(r)}x_{t}+U^{(r)}h_{t-1}+b^{(r)})$\n $U^{(r)}$"
27 | w_h_title = "$Hidden: $\n $\\tilde{h}_{t} = \\tanh(Wx_{t}+U(r_{t}\odot h_{t-1})+b^{(h)})$\n $W$"
28 | u_h_title = "$Hidden: $\n $\\tilde{h}_{t} = \\tanh(Wx_{t}+U(r_{t}\odot h_{t-1})+b^{(h)})$\n $U$"
29 | visualization.show_multi_matirix(w_z, w_z_title, u_z, u_z_title, w_r, w_r_title, u_r, u_r_title,
30 | w_h, w_h_title, u_h, u_h_title)
31 |
32 |
33 | def test_show_copy_data():
34 | input_sequence_10, output_sequence_10 = dataset.generate_copy_sample(8, 10)
35 | input_sequence_20, output_sequence_20 = dataset.generate_copy_sample(8, 20)
36 | input_sequence_30, output_sequence_30 = dataset.generate_copy_sample(8, 30)
37 | input_sequence_50, output_sequence_50 = dataset.generate_copy_sample(8, 50)
38 | input_sequence_120, output_sequence_120 = dataset.generate_copy_sample(8, 120)
39 |
40 | input_sequence_10 = input_sequence_10.transpose()
41 | output_sequence_10 = output_sequence_10.transpose()
42 |
43 | input_sequence_20 = input_sequence_20.transpose()[:, 0:input_sequence_20.size/2]
44 | output_sequence_20 = output_sequence_20.transpose()[:, 0:output_sequence_20.size/2]
45 |
46 | input_sequence_30 = input_sequence_30.transpose()[:, 0:input_sequence_30.size/2]
47 | output_sequence_30 = output_sequence_30.transpose()[:, 0:output_sequence_30.size/2]
48 |
49 | input_sequence_50 = input_sequence_50.transpose()[:, 0:input_sequence_50.size/2]
50 | output_sequence_50 = output_sequence_50.transpose()[:, 0:output_sequence_50.size/2]
51 |
52 | input_sequence_120 = input_sequence_120.transpose()[:, 0:input_sequence_120.size/2]
53 | output_sequence_120 = output_sequence_120.transpose()[:, 0:output_sequence_120.size/2]
54 |
55 | print "10"
56 | print (input_sequence_10.shape[1]-1)/2
57 | print input_sequence_10
58 | print "20"
59 | print (input_sequence_20.shape[1]-1)/2
60 | print input_sequence_20
61 | print "30"
62 | print (input_sequence_30.shape[1]-1)/2
63 | print input_sequence_30
64 | print "50"
65 | print (input_sequence_50.shape[1]-1)/2
66 | print input_sequence_50
67 | print "120"
68 | print (input_sequence_120.shape[1]-1)/2
69 | print input_sequence_120
70 |
71 | image_file = 'image/figure_4.png'
72 |
73 | visualization.show_multi_copy_data(
74 | input_sequence_10[:, 0:(input_sequence_10.shape[1]-1)/2],
75 | output_sequence_10[:, (output_sequence_10.shape[1]+1)/2:output_sequence_10.shape[1]],
76 | input_sequence_20[:, 0:(input_sequence_20.shape[1]-1)/2],
77 | output_sequence_20[:, (output_sequence_20.shape[1]+1)/2:output_sequence_20.shape[1]],
78 | input_sequence_30[:, 0:(input_sequence_30.shape[1]-1)/2],
79 | output_sequence_30[:, (output_sequence_30.shape[1]+1)/2:output_sequence_30.shape[1]],
80 | input_sequence_50[:, 0:(input_sequence_50.shape[1]-1)/2],
81 | output_sequence_50[:, (output_sequence_50.shape[1]+1)/2:output_sequence_50.shape[1]],
82 | input_sequence_120[:, 0:(input_sequence_120.shape[1]-1)/2],
83 | output_sequence_120[:, (output_sequence_120.shape[1]+1)/2:output_sequence_120.shape[1]],
84 | image_file
85 | )
86 |
87 |
88 | def test_show_memory_of_copy_task():
89 | input_sequence, output_sequence = dataset.generate_copy_sample(8, 20)
90 | input_sequence = input_sequence.transpose()
91 | output_sequence = output_sequence.transpose()
92 | adds = dataset.generate_random_binomial_(16, 40)
93 | reads = dataset.generate_random_binomial_(16, 40)
94 | write_weightings, read_weightings = dataset.generate_weightings(36, 40)
95 | image_file = "image/figure_6.png"
96 | visualization.show_memory_of_copy_task(input_sequence, output_sequence,
97 | adds, reads,
98 | write_weightings, read_weightings,
99 | image_file)
100 |
101 |
102 |
103 | if __name__ == "__main__":
104 | # test_show_matrix()
105 | # test_show_multi_matrix()
106 | # test_show_copy_data()
107 | test_show_memory_of_copy_task()
108 |
109 |
110 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/learning_associative_recall.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "8"
6 | Output sequence length: equal to input sequence length.
7 | Output dimension: equal to input dimension.
8 | '''
9 |
10 | from __future__ import print_function
11 | from keras.models import Sequential
12 | # from keras.engine.training import slice_X
13 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven Robot
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | import time # Add by Steven Robot
20 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
21 | from keras.callbacks import Callback # Add by Steven Robot
22 | from algorithm_learning.util import LossHistory # Add by Steven Robot
23 | import os # Add by Steven Robot
24 |
25 |
26 | # Parameters for the model to train copying algorithm_learning
27 | TRAINING_SIZE = 1024000
28 | # TRAINING_SIZE = 128000
29 | # TRAINING_SIZE = 1280
30 | INPUT_DIMENSION_SIZE = 6
31 | ITEM_SIZE = 3
32 | MAX_EPISODE_SIZE = 6
33 | MAX_INPUT_LENGTH = (ITEM_SIZE+1) * (MAX_EPISODE_SIZE+2)
34 |
35 | # Try replacing SimpleRNN, GRU, or LSTM
36 | # RNN = recurrent.SimpleRNN
37 | # RNN = recurrent.GRU
38 | RNN = recurrent.LSTM
39 | HIDDEN_SIZE = 128*4
40 | LAYERS = 1
41 | # LAYERS = MAX_REPEAT_TIMES
42 | BATCH_SIZE = 1024
43 | FOLDER = "experiment_results/associative_recall/"
44 | if not os.path.isdir(FOLDER):
45 | os.makedirs(FOLDER)
46 | print("create folder: %s" % FOLDER)
47 |
48 | print()
49 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
50 | print('Generating data sets...')
51 | train_X, train_Y = dataset.generate_associative_recall_data_set(
52 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE)
53 | valid_X, valid_Y = dataset.generate_associative_recall_data_set(
54 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE/5)
55 |
56 | matrix_list = []
57 | matrix_list.append(train_X[0].transpose())
58 | matrix_list.append(train_Y[0].transpose())
59 | matrix_list.append(train_Y[0].transpose())
60 | name_list = []
61 | name_list.append("Input")
62 | name_list.append("Target")
63 | name_list.append("Predict")
64 | show_matrix = visualization.PlotDynamicalMatrix(matrix_list, name_list)
65 | random_index = np.random.randint(1, 128, 20)
66 | for i in range(20):
67 | matrix_list_update = []
68 | matrix_list_update.append(train_X[random_index[i]].transpose())
69 | matrix_list_update.append(train_Y[random_index[i]].transpose())
70 | matrix_list_update.append(train_Y[random_index[i]].transpose())
71 | show_matrix.update(matrix_list_update, name_list)
72 | show_matrix.save(FOLDER+"associative_recall_data_training_%2d.png" % i)
73 |
74 | print()
75 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
76 | print('Build model...')
77 | model = Sequential()
78 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
79 | # note: in a situation where your input sequences have a variable length,
80 | # use input_shape=(None, nb_feature).
81 | hidden_layer = RNN(
82 | HIDDEN_SIZE,
83 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE+2),
84 | init='glorot_uniform',
85 | inner_init='orthogonal',
86 | activation='tanh',
87 | # activation='hard_sigmoid',
88 | # activation='sigmoid',
89 | W_regularizer=None,
90 | U_regularizer=None,
91 | b_regularizer=None,
92 | dropout_W=0.0,
93 | dropout_U=0.0)
94 | model.add(hidden_layer)
95 |
96 | # For the decoder's input, we repeat the encoded input for each time step
97 | model.add(RepeatVector(MAX_INPUT_LENGTH))
98 | # The decoder RNN could be multiple layers stacked or a single layer
99 | for _ in range(LAYERS):
100 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
101 |
102 | # For each of step of the output sequence, decide which character should be chosen
103 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
104 | # model.add(Activation('softmax'))
105 | # model.add(Activation('hard_sigmoid'))
106 | model.add(Activation('sigmoid'))
107 |
108 | model.compile(loss='binary_crossentropy',
109 | # loss='mse',
110 | optimizer='adam',
111 | metrics=['accuracy'])
112 |
113 | print()
114 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
115 | print("Model architecture")
116 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_associative_recall.png")
117 | print("Model summary")
118 | print(model.summary())
119 | print("Model parameter count")
120 | print(model.count_params())
121 |
122 | print()
123 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
124 | print("Training...")
125 | # Train the model each generation and show predictions against the
126 | # validation dataset
127 | for iteration in range(1, 200):
128 | print()
129 | print('-' * 78)
130 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
131 | print('Iteration', iteration)
132 | history = LossHistory()
133 | check_pointer = ModelCheckpoint(
134 | filepath=FOLDER+"associative_recall_model_weights.hdf5",
135 | verbose=1, save_best_only=True)
136 | model.fit(train_X,
137 | train_Y,
138 | batch_size=BATCH_SIZE,
139 | nb_epoch=30,
140 | # nb_epoch=1,
141 | callbacks=[check_pointer, history],
142 | validation_data=(valid_X, valid_Y))
143 | print(len(history.losses))
144 | print(history.losses)
145 | print(len(history.acces))
146 | print(history.acces)
147 |
148 | ###
149 | # Select 20 samples from the validation set at random so we can
150 | # visualize errors
151 | for i in range(20):
152 | ind = np.random.randint(0, len(valid_X))
153 | inputs, outputs = valid_X[np.array([ind])], \
154 | valid_Y[np.array([ind])]
155 | predicts = model.predict(inputs, verbose=0)
156 | matrix_list_update = []
157 | matrix_list_update.append(inputs[0].transpose())
158 | matrix_list_update.append(outputs[0].transpose())
159 | matrix_list_update.append(predicts[0].transpose())
160 | show_matrix.update(matrix_list_update,
161 | name_list)
162 | show_matrix.save(FOLDER+"associative_data_predict_%3d.png"%iteration)
163 |
164 | show_matrix.close()
165 |
166 |
--------------------------------------------------------------------------------
/unit_test/test_datasets.py:
--------------------------------------------------------------------------------
1 | import visualization
2 | import numpy as np
3 | import datasets
4 |
5 |
6 | def test_copy_data_generation():
7 | input_sequence, output_sequence = datasets.generate_copy_sample(8, 10)
8 | print input_sequence
9 | print output_sequence
10 | input_sequence, output_sequence = datasets.generate_copy_sample(8, 20)
11 | print input_sequence
12 | print output_sequence
13 | input_sequence, output_sequence = datasets.generate_copy_sample(8, 30)
14 | print input_sequence
15 | print output_sequence
16 | input_sequence, output_sequence = datasets.generate_copy_sample(8, 50)
17 | print input_sequence
18 | print output_sequence
19 | input_sequence, output_sequence = datasets.generate_copy_sample(8, 120)
20 | print input_sequence
21 | print output_sequence
22 |
23 |
24 | def show_repeat_copy_data_generation():
25 | print('Generating data...')
26 | input_sequence, output_sequence, repeat_times = \
27 | datasets.generate_repeat_copy_data_set(7, 10, 100, 10)
28 | print(output_sequence[0].transpose().shape)
29 |
30 | for i in range(100):
31 | visualization.show_repeat_copy_data(
32 | output_sequence[i].transpose(),
33 | input_sequence[i].transpose(),
34 | "Output sequence $y^{(t)}$",
35 | "Input sequence $x^{(t)}$",
36 | "../experiment/repeat_copy_data_predict_%2d.pdf"%i,
37 | repeat_times[i]
38 | )
39 |
40 |
41 | def test_repeat_copy_data_generation():
42 | print('Generating data...')
43 | input_sequence, output_sequence, repeat_times = \
44 | datasets.generate_repeat_copy_data_set(4, 10, 20, 20)
45 |
46 | print(input_sequence.shape)
47 | matrix_list = []
48 | matrix_list.append(input_sequence[0].transpose())
49 | matrix_list.append(output_sequence[0].transpose())
50 | matrix_list.append(output_sequence[0].transpose())
51 | name_list = []
52 | name_list.append("Input")
53 | name_list.append("Target")
54 | name_list.append("Predict")
55 | show_matrix = visualization.PlotDynamicalMatrix4Repeat(matrix_list, name_list, repeat_times[0])
56 |
57 | for i in range(20):
58 | matrix_list_update = []
59 | matrix_list_update.append(input_sequence[i].transpose())
60 | matrix_list_update.append(output_sequence[i].transpose())
61 | matrix_list_update.append(output_sequence[i].transpose())
62 | show_matrix.update(matrix_list_update, name_list, repeat_times[i])
63 | show_matrix.save("../experiment/repeat_copy_data_predict_%2d.png"%i)
64 |
65 |
66 | def test_associative_recall_data():
67 | INPUT_DIMENSION_SIZE = 6
68 | # ITEM_SIZE = 4
69 | ITEM_SIZE = 3
70 | MAX_EPISODE_SIZE = 20
71 | TRAINING_SIZE = 100
72 | MAX_INPUT_LENGTH = (ITEM_SIZE+1) * (MAX_EPISODE_SIZE+2)
73 | print(MAX_INPUT_LENGTH)
74 | # item = dataset.generate_associative_recall_items(input_size, item_size, episode_size)
75 | # print(item)
76 |
77 | print('Generating data sets...')
78 | input_sequence, output_sequence = datasets.generate_associative_recall_data_set(
79 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE)
80 | # print input_sequence
81 | # print output_sequence
82 |
83 | print(input_sequence.shape)
84 | print(output_sequence.shape)
85 |
86 | for i in range(TRAINING_SIZE):
87 | # print(output_sequence[i].transpose())
88 | # print(input_sequence[i].transpose())
89 | visualization.show_associative_recall_data(
90 | output_sequence[i].transpose(),
91 | input_sequence[i].transpose(),
92 | "Output sequence $y^{(t)}$",
93 | "Input sequence $x^{(t)}$",
94 | "../experiment/associative_recall_%2d.pdf"%i
95 | )
96 |
97 |
98 | def test_n_gram_data():
99 | a = 0.5
100 | b = 0.5
101 | n = 6
102 | look_up_table = datasets.generate_probability_of_n_gram_by_beta(a, b, n)
103 | sequence_length = 50
104 | example_size = 100
105 | # print(look_up_table)
106 | train_X, train_Y = datasets.generate_dynamical_n_gram_data_set(
107 | look_up_table, n, sequence_length, example_size)
108 | # print(train_X)
109 | show_matrix = visualization.PlotDynamicalMatrix4NGram(
110 | train_X[0].transpose(), train_Y[0].transpose(), train_Y[0].transpose())
111 |
112 | for i in range(example_size):
113 | show_matrix.update(train_X[i].transpose(), train_Y[i].transpose(), train_Y[i].transpose())
114 | show_matrix.save("../experiment/n_gram_data_training_%2d.png"%i)
115 |
116 | show_matrix.close()
117 |
118 |
119 | def test_priority_sort_data():
120 | input_size = 8
121 | input_sequence_length = 20
122 | output_sequence_length = 16
123 | priority_lower_bound = -1
124 | priority_upper_bound = 1
125 | example_size = 10
126 | input_matrix = np.zeros((input_sequence_length+1, input_size+2), dtype=np.float32)
127 | output_matrix = np.zeros((output_sequence_length+1, input_size+2), dtype=np.float32)
128 |
129 | train_x_seq, train_y_seq = \
130 | datasets.generate_priority_sort_data_set(
131 | input_size,
132 | input_sequence_length,
133 | output_sequence_length,
134 | priority_lower_bound,
135 | priority_upper_bound,
136 | example_size)
137 |
138 | print(train_x_seq[0].shape)
139 | print(input_matrix.shape)
140 | input_matrix = train_x_seq[0]
141 | output_matrix = train_y_seq[0]
142 | show_matrix = visualization.PlotDynamicalMatrix4PrioritySort(
143 | input_matrix.transpose(),
144 | output_matrix.transpose(),
145 | output_matrix.transpose())
146 | for i in range(example_size):
147 | input_matrix = train_x_seq[i]
148 | output_matrix = train_y_seq[i]
149 | # input_matrix[:, :-1] = train_x_seq[i]
150 | # input_matrix[:, -1] = train_x_priority[i].reshape(input_sequence_length)
151 | # output_matrix[:, :-1] = train_y_seq[i]
152 | # output_matrix[:, -1] = train_y_priority[i].reshape(output_sequence_length)
153 | show_matrix.update(input_matrix.transpose(),
154 | output_matrix.transpose(),
155 | output_matrix.transpose())
156 | show_matrix.save("../experiment/priority_data_training_%2d.png"%i)
157 |
158 | show_matrix.close()
159 |
160 |
161 | if __name__ == "__main__":
162 | # test_copy_data_generation()
163 | # show_repeat_copy_data_generation()
164 | # test_repeat_copy_data_generation()
165 | test_associative_recall_data()
166 | # test_n_gram_data()
167 | # test_priority_sort_data()
168 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/learning_copy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "8"
6 | Output sequence length: equal to input sequence length.
7 | Output dimension: equal to input dimension.
8 | '''
9 |
10 | from __future__ import print_function
11 | from keras.models import Sequential
12 | # from keras.engine.training import slice_X
13 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven Robot
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
20 | from keras.callbacks import Callback # Add by Steven Robot
21 | import time # Add by Steven Robot
22 | from algorithm_learning.util import LossHistory # Add by Steven Robot
23 | import os # Add by Steven Robot
24 |
25 |
26 | # Parameters for the model to train copying algorithm_learning
27 | TRAINING_SIZE = 1024000 # for 8-bits length
28 | # TRAINING_SIZE = 128000 # for 4-bits length
29 | # TRAINING_SIZE = 1280
30 | INPUT_DIMENSION_SIZE = 8 + 1 # for 8-bits length
31 | # INPUT_DIMENSION_SIZE = 4 + 1 # for 4-bits length
32 | MAX_COPY_LENGTH = 20 # for 8-bits length
33 | # MAX_COPY_LENGTH = 10 # for 4-bits length
34 | MAX_INPUT_LENGTH = MAX_COPY_LENGTH + 1 + MAX_COPY_LENGTH
35 |
36 | # Try replacing SimpleRNN, GRU, or LSTM
37 | # RNN = recurrent.SimpleRNN
38 | # RNN = recurrent.GRU
39 | RNN = recurrent.LSTM
40 | # HIDDEN_SIZE = 128
41 | HIDDEN_SIZE = 128*4
42 | LAYERS = 1
43 | # BATCH_SIZE = 2048
44 | BATCH_SIZE = 1024
45 | # BATCH_SIZE = 512
46 | # BATCH_SIZE = 256
47 | # BATCH_SIZE = 128
48 | FOLDER = "experiment_results/copy/"
49 | if not os.path.isdir(FOLDER):
50 | os.makedirs(FOLDER)
51 | print("create folder: %s" % FOLDER)
52 |
53 | print()
54 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
55 | print('Generating data sets...')
56 | train_X, train_Y = dataset.generate_copy_data_set(
57 | INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE)
58 | valid_X, valid_Y = dataset.generate_copy_data_set(
59 | INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE/10)
60 |
61 | matrix_list = []
62 | matrix_list.append(train_X[0].transpose())
63 | matrix_list.append(train_Y[0].transpose())
64 | matrix_list.append(train_Y[0].transpose())
65 | name_list = []
66 | name_list.append("Input")
67 | name_list.append("Target")
68 | name_list.append("Predict")
69 | show_matrix = visualization.PlotDynamicalMatrix(
70 | matrix_list, name_list)
71 | random_index = np.random.randint(1, 128, 20)
72 | for i in range(20):
73 | matrix_list_update = []
74 | matrix_list_update.append(train_X[random_index[i]].transpose())
75 | matrix_list_update.append(train_Y[random_index[i]].transpose())
76 | matrix_list_update.append(train_Y[random_index[i]].transpose())
77 | show_matrix.update(matrix_list_update, name_list)
78 | show_matrix.save(FOLDER+"copy_data_training_%2d.png" % i)
79 |
80 | print()
81 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
82 | print('Build model...')
83 | model = Sequential()
84 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
85 | # note: in a situation where your input sequences have a variable length,
86 | # use input_shape=(None, nb_feature).
87 | hidden_layer = RNN(
88 | HIDDEN_SIZE,
89 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE),
90 | init='glorot_uniform',
91 | inner_init='orthogonal',
92 | activation='tanh',
93 | # activation='hard_sigmoid',
94 | # activation='sigmoid',
95 | W_regularizer=None,
96 | U_regularizer=None,
97 | b_regularizer=None,
98 | dropout_W=0.0,
99 | dropout_U=0.0)
100 | model.add(hidden_layer)
101 | # For the decoder's input, we repeat the encoded input for each time step
102 | model.add(RepeatVector(MAX_INPUT_LENGTH))
103 | # The decoder RNN could be multiple layers stacked or a single layer
104 | for _ in range(LAYERS):
105 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
106 |
107 | # For each of step of the output sequence, decide which character should be chosen
108 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE)))
109 | # model.add(Activation('softmax'))
110 | # model.add(Activation('hard_sigmoid'))
111 | model.add(Activation('sigmoid'))
112 |
113 | model.compile(loss='binary_crossentropy',
114 | # loss='mse',
115 | optimizer='adam',
116 | metrics=['accuracy'])
117 |
118 | print()
119 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
120 | print("Model architecture")
121 | plot(model, show_shapes=True, to_file=FOLDER+"simple_rnn_for_copying.png")
122 | print("Model summary")
123 | print(model.summary())
124 | print("Model parameter count")
125 | print(model.count_params())
126 |
127 |
128 | print()
129 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
130 | print("Training...")
131 | # Train the model each generation and show predictions against the
132 | # validation dataset
133 | for iteration in range(1, 200):
134 | print()
135 | print('-' * 78)
136 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
137 | print('Iteration', iteration)
138 | history = LossHistory()
139 | check_pointer = ModelCheckpoint(
140 | filepath=FOLDER+"copying_model_weights.hdf5",
141 | verbose=1, save_best_only=True)
142 | model.fit(train_X,
143 | train_Y,
144 | batch_size=BATCH_SIZE,
145 | # nb_epoch=10,
146 | nb_epoch=1,
147 | callbacks=[check_pointer, history],
148 | validation_data=(valid_X, valid_Y))
149 | ###
150 | # Select 3 samples from the validation set at random so we can
151 | # visualize errors
152 | for i in range(10):
153 | ind = np.random.randint(0, len(valid_X))
154 | # inputs = valid_X[ind]
155 | # outputs = valid_Y[ind]
156 | inputs, outputs = valid_X[np.array([ind])], valid_Y[np.array([ind])]
157 | predicts = model.predict(inputs, verbose=0)
158 | # print(inputs)
159 | # print(outputs)
160 | # print(predicts)
161 | matrix_list_update = []
162 | matrix_list_update.append(inputs[0].transpose())
163 | matrix_list_update.append(outputs[0].transpose())
164 | matrix_list_update.append(predicts[0].transpose())
165 | show_matrix.update(matrix_list_update, name_list)
166 | show_matrix.save(FOLDER+"copy_data_predict_%3d.png" % iteration)
167 |
168 | show_matrix.close()
169 |
--------------------------------------------------------------------------------
/unit_test/lstm2ntm_unit_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "8"
6 | Output sequence length: equal to input sequence length.
7 | Output dimension: equal to input dimension.
8 | '''
9 |
10 | from __future__ import print_function
11 | from keras.models import Sequential
12 | # from keras.engine.training import slice_X
13 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven Robot
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | import time # Add by Steven Robot
20 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
21 | from keras.callbacks import Callback # Add by Steven Robot
22 | from util import LossHistory # Add by Steven Robot
23 | import os # Add by Steven Robot
24 | # import ntm # Add by Steven Robot
25 | import lstm2ntm # Add by Steven Robot
26 |
27 |
28 | # Parameters for the model to train copying algorithm
29 | # TRAINING_SIZE = 1024000
30 | TRAINING_SIZE = 128000
31 | # TRAINING_SIZE = 1280
32 | INPUT_DIMENSION_SIZE = 6
33 | ITEM_SIZE = 3
34 | MAX_EPISODE_SIZE = 6
35 | MAX_INPUT_LENGTH = (ITEM_SIZE+1) * (MAX_EPISODE_SIZE+2)
36 |
37 | # Try replacing SimpleRNN, GRU, or LSTM
38 | # RNN = recurrent.SimpleRNN
39 | # RNN = recurrent.GRU
40 | # RNN = recurrent.LSTM
41 | RNN = lstm2ntm.NTM
42 | HIDDEN_SIZE = 256
43 | LAYERS = 2
44 | # LAYERS = MAX_REPEAT_TIMES
45 | # BATCH_SIZE = 1024
46 | BATCH_SIZE = 128
47 | MEMORY_DIM = 128
48 | MEMORY_SIZE = 20
49 | CONTROLLER_OUTPUT_DIM = 100
50 | LOCATION_SHIFT_RANGE = 1
51 | NUM_READ_HEAD = 1
52 | NUM_WRITE_HEAD = 1
53 |
54 | FOLDER = "experiment_results/associative_recall_ntm/"
55 | if not os.path.isdir(FOLDER):
56 | os.makedirs(FOLDER)
57 | print("create folder: %s" % FOLDER)
58 |
59 | print()
60 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
61 | print('Generating data sets...')
62 | train_X, train_Y = dataset.generate_associative_recall_data_set(
63 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE)
64 | valid_X, valid_Y = dataset.generate_associative_recall_data_set(
65 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE/5)
66 |
67 | matrix_list = []
68 | matrix_list.append(train_X[0].transpose())
69 | matrix_list.append(train_Y[0].transpose())
70 | matrix_list.append(train_Y[0].transpose())
71 | name_list = []
72 | name_list.append("Input")
73 | name_list.append("Target")
74 | name_list.append("Predict")
75 | show_matrix = visualization.PlotDynamicalMatrix(matrix_list, name_list)
76 | random_index = np.random.randint(1, 128, 20)
77 | for i in range(3):
78 | matrix_list_update = []
79 | matrix_list_update.append(train_X[random_index[i]].transpose())
80 | matrix_list_update.append(train_Y[random_index[i]].transpose())
81 | matrix_list_update.append(train_Y[random_index[i]].transpose())
82 | show_matrix.update(matrix_list_update, name_list)
83 | show_matrix.save(FOLDER+"associative_recall_data_training_%2d.png" % i)
84 |
85 | print()
86 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
87 | print('Build model...')
88 | model = Sequential()
89 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
90 | # note: in a situation where your input sequences have a variable length,
91 | # use input_shape=(None, nb_feature).
92 | model.add(RNN(
93 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE+2),
94 | # output_dim=INPUT_DIMENSION_SIZE+2,
95 | output_dim=MEMORY_DIM,
96 | memory_dim=MEMORY_DIM,
97 | memory_size=MEMORY_SIZE,
98 | controller_output_dim=CONTROLLER_OUTPUT_DIM,
99 | location_shift_range=LOCATION_SHIFT_RANGE,
100 | num_read_head=NUM_READ_HEAD,
101 | num_write_head=NUM_WRITE_HEAD,
102 | init='glorot_uniform',
103 | inner_init='orthogonal',
104 | return_sequences=True,
105 | # activation='hard_sigmoid',
106 | activation='tanh',
107 | # activation='sigmoid',
108 | W_regularizer=None,
109 | U_regularizer=None,
110 | R_regularizer=None,
111 | b_regularizer=None,
112 | dropout_W=0.0,
113 | dropout_U=0.0))
114 |
115 |
116 | # # For the decoder's input, we repeat the encoded input for each time step
117 | # # model.add(RepeatVector(MAX_INPUT_LENGTH))
118 | # # The decoder RNN could be multiple layers stacked or a single layer
119 | # for _ in range(LAYERS):
120 | # model.add(RNN(HIDDEN_SIZE, return_sequences=True))
121 |
122 | # For each of step of the output sequence, decide which character should be chosen
123 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
124 | # model.add(Activation('softmax'))
125 | # model.add(Activation('hard_sigmoid'))
126 | model.add(Activation('sigmoid'))
127 |
128 | model.compile(loss='binary_crossentropy',
129 | # loss='mse',
130 | optimizer='adam',
131 | metrics=['accuracy'])
132 |
133 | print()
134 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
135 | print("Model architecture")
136 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_associative_recall.png")
137 | print("Model summary")
138 | print(model.summary())
139 | print("Model parameter count")
140 | print(model.count_params())
141 |
142 | print()
143 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
144 | print("Training...")
145 | # Train the model each generation and show predictions against the
146 | # validation dataset
147 | for iteration in range(1, 200):
148 | print()
149 | print('-' * 78)
150 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
151 | print('Iteration', iteration)
152 | history = LossHistory()
153 | check_pointer = ModelCheckpoint(
154 | filepath=FOLDER+"associative_recall_model_weights.hdf5",
155 | verbose=1, save_best_only=True)
156 | model.fit(train_X,
157 | train_Y,
158 | batch_size=BATCH_SIZE,
159 | nb_epoch=30,
160 | # nb_epoch=1,
161 | callbacks=[check_pointer, history],
162 | validation_data=(valid_X, valid_Y))
163 | print(len(history.losses))
164 | print(history.losses)
165 | print(len(history.acces))
166 | print(history.acces)
167 |
168 | ###
169 | # Select 20 samples from the validation set at random so we can
170 | # visualize errors
171 | for i in range(20):
172 | ind = np.random.randint(0, len(valid_X))
173 | inputs, outputs = valid_X[np.array([ind])], \
174 | valid_Y[np.array([ind])]
175 | predicts = model.predict(inputs, verbose=0)
176 | matrix_list_update = []
177 | matrix_list_update.append(inputs[0].transpose())
178 | matrix_list_update.append(outputs[0].transpose())
179 | matrix_list_update.append(predicts[0].transpose())
180 | show_matrix.update(matrix_list_update,
181 | name_list)
182 | show_matrix.save(FOLDER+"associative_data_predict_%3d.png"%iteration)
183 |
184 | show_matrix.close()
185 |
--------------------------------------------------------------------------------
/algorithm_learning/figure/generate_figure.py:
--------------------------------------------------------------------------------
1 | import dataset
2 | import visualization
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | from matplotlib.backends.backend_pdf import PdfPages
6 |
7 |
8 | def show_repeat_copy_data(
9 | output_sequence_copy,
10 | input_sequence_copy,
11 | repeat_times_copy,
12 | output_sequence_recall,
13 | input_sequence_recall,
14 | output_sequence_sort,
15 | input_sequence_sort,
16 | input_name,
17 | output_name,
18 | image_file):
19 | # set figure size
20 | fig = plt.figure(figsize=(12, 5))
21 | #
22 | # # draw first line
23 | # axes_input_10 = plt.subplot2grid((9, 1), (1, 0), colspan=1)
24 | # axes_input_10.set_aspect('equal')
25 | # plt.imshow(output_sequence_copy, interpolation='none')
26 | # axes_input_10.set_xticks([])
27 | # axes_input_10.set_yticks([])
28 | # # draw second line
29 | # axes_output_10 = plt.subplot2grid((9, 1), (2, 0), colspan=1)
30 | # plt.imshow(input_sequence_copy, interpolation='none')
31 | # axes_output_10.set_xticks([])
32 | # axes_output_10.set_yticks([])
33 | # # draw third line
34 | # # plt.text(80, 12, "(a) repeat copy", ha='center')
35 | # # title = "Repeat times = %d" % repeat_times_copy
36 | # # plt.text(80, -12, title, ha='center')
37 | # # plt.text(-2, 5, output_name, ha='right')
38 | # # plt.text(-2, -5, input_name, ha='right')
39 | # # # plt.text(18, 12, 'Time $t$ $\longrightarrow$', ha='right')
40 | # # plt.text(9, 12, '$t$ $\longrightarrow$', ha='right')
41 | #
42 | # # draw first line
43 | # axes_input_10 = plt.subplot2grid((9, 1), (4, 0), colspan=1)
44 | # axes_input_10.set_aspect('equal')
45 | # plt.imshow(output_sequence_recall, interpolation='none')
46 | # axes_input_10.set_xticks([])
47 | # axes_input_10.set_yticks([])
48 | # # draw second line
49 | # axes_output_10 = plt.subplot2grid((9, 1), (5, 0), colspan=1)
50 | # plt.imshow(input_sequence_recall, interpolation='none')
51 | # axes_output_10.set_xticks([])
52 | # axes_output_10.set_yticks([])
53 | # # draw third line
54 | # # plt.text(80, 12, "(b) associative recall", ha='center')
55 | # # plt.text(-2, 5, output_name, ha='right')
56 | # # plt.text(-2, -5, input_name, ha='right')
57 | # # plt.text(9, 12, '$t$ $\longrightarrow$', ha='right')
58 |
59 | # draw first line
60 | axes_input_10 = plt.subplot2grid((9, 1), (7, 0), colspan=1)
61 | axes_input_10.set_aspect('equal')
62 | plt.imshow(output_sequence_sort, interpolation='none')
63 | axes_input_10.set_xticks([])
64 | axes_input_10.set_yticks([])
65 | # draw second line
66 | axes_output_10 = plt.subplot2grid((9, 1), (8, 0), colspan=1)
67 | plt.imshow(input_sequence_sort, interpolation='none')
68 | axes_output_10.set_xticks([])
69 | axes_output_10.set_yticks([])
70 | # draw third line
71 | # plt.text(80, 12, "(c) priority sort", ha='center')
72 | # plt.text(-2, 5, output_name, ha='right')
73 | # plt.text(-2, -5, input_name, ha='right')
74 | # plt.text(9, 12, '$t$ $\longrightarrow$', ha='right')
75 |
76 | # add color bars
77 | # # *rect* = [left, bottom, width, height]
78 | # cax = plt.axes([0.84, 0.1, 0.005, 0.71])
79 | cax = plt.axes([0.84, 0.1, 0.005, 0.165])
80 | cbar = plt.colorbar(cax=cax)
81 | # show colorbar
82 | # cbar = plt.colorbar(gci)
83 | # cbar.set_label('$T_B(K)$', fontdict=font)
84 | cbar.set_ticks(np.linspace(0, 1, 3))
85 | cbar.set_ticklabels(('0', '0.5', '1'))
86 |
87 | # show figure
88 | plt.show()
89 |
90 | # save image
91 | fig.savefig(image_file, dpi=75, format='pdf')
92 |
93 | # close plot GUI
94 | plt.close()
95 |
96 |
97 | def show_algorithm_learning_example():
98 | input_size_copy = 8
99 | sequence_length_copy = 10
100 | repeat_times = 15
101 | input_sequence_copy, output_sequence_copy = \
102 | dataset.generate_repeat_copy_sample(
103 | input_size_copy, sequence_length_copy, repeat_times)
104 | print(input_sequence_copy.shape)
105 | print(output_sequence_copy.shape)
106 |
107 | input_size_recall = 6
108 | # item_size = 4
109 | item_size = 3
110 | episode_size = 38
111 | max_episode_size = 38
112 | input_sequence_recall = np.zeros(input_sequence_copy.shape)
113 | output_sequence_recall = np.zeros(output_sequence_copy.shape)
114 | input_sequence_recall_, output_sequence_recall_ = \
115 | dataset.generate_associative_recall_sample(
116 | input_size_recall, item_size, episode_size, max_episode_size)
117 | input_sequence_recall[:-1] = input_sequence_recall_
118 | output_sequence_recall[:-1] = output_sequence_recall_
119 | print(input_sequence_recall.shape)
120 | print(output_sequence_recall.shape)
121 |
122 | input_size_sort = 6
123 | input_sequence_length = 80
124 | output_sequence_length = 80
125 | priority_lower_bound = 0
126 | priority_upper_bound = 1
127 | # input_sequence_sort = np.zeros(input_sequence_copy.shape)
128 | # output_sequence_sort = np.zeros(output_sequence_copy.shape)
129 | input_sequence_sort_, input_priority_, output_sequence_sort_, output_priority_ = \
130 | dataset.generate_priority_sort_sample(
131 | input_size_sort,
132 | input_sequence_length,
133 | output_sequence_length,
134 | priority_lower_bound,
135 | priority_upper_bound)
136 |
137 | sequence_length = input_sequence_length + output_sequence_length
138 | input_sequence_sort = np.zeros((sequence_length+1, input_size_sort+2), dtype=np.float32)
139 | output_sequence_sort = np.zeros((sequence_length+1, input_size_sort+2), dtype=np.float32)
140 | input_sequence_sort = input_sequence_sort_
141 | output_sequence_sort = output_sequence_sort_
142 | input_sequence_sort[:, -2] = input_priority_.transpose()[0]
143 | output_sequence_sort[:, -2] = output_priority_.transpose()[0]
144 | print(input_sequence_sort.shape)
145 | print(output_sequence_sort.shape)
146 |
147 | # print(input_sequence_sort[1:50, :])
148 | print(input_sequence_sort[:, -2])
149 | print(input_priority_.transpose()[0])
150 | show_repeat_copy_data(
151 | output_sequence_copy.transpose(),
152 | input_sequence_copy.transpose(),
153 | repeat_times,
154 | output_sequence_recall.transpose(),
155 | input_sequence_recall.transpose(),
156 | output_sequence_sort.transpose(),
157 | input_sequence_sort.transpose(),
158 | "$y^{(t)}$",
159 | "$x^{(t)}$",
160 | "../experiment/algorithm_learning_data.pdf")
161 | print("end..")
162 |
163 | # file_priority_input_sequence = "../experiment/file_priority_input_sequence.txt"
164 | # file_priority_output_sequence = "../experiment/file_priority_output_sequence.txt"
165 | #
166 | # priority_input_sequence = open(file_priority_input_sequence, 'w')
167 | # (row, column) = input_sequence_sort.shape
168 | # for i in range(row):
169 | # for j in range(column):
170 | # one_point = "%d %d %f\n"%(i, j, input_sequence_sort[i][j])
171 | # priority_input_sequence.write(one_point)
172 | # priority_input_sequence.close()
173 |
174 |
175 | if __name__ == "__main__":
176 | show_algorithm_learning_example()
177 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/learning_priority_sort.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "8"
6 | Output sequence length: equal to input sequence length.
7 | Output dimension: equal to input dimension.
8 | '''
9 |
10 | from __future__ import print_function
11 | from keras.models import Sequential
12 | # from keras.engine.training import slice_X
13 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven
18 | from keras.utils.visualize_util import plot # Add by Steven
19 | import time # Add by Steven Robot
20 | from keras.layers import Merge # Add by Steven Robot
21 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
22 | from keras.callbacks import Callback # Add by Steven Robot
23 | from algorithm_learning.util import LossHistory # Add by Steven Robot
24 | import os
25 |
26 |
27 | # Parameters for the model to train copying algorithm_learning
28 | # EXAMPLE_SIZE = 2560000
29 | # EXAMPLE_SIZE = 1024000
30 | EXAMPLE_SIZE = 128000
31 | # EXAMPLE_SIZE = 1280
32 | # INPUT_DIMENSION_SIZE = 8
33 | INPUT_DIMENSION_SIZE = 4
34 | INPUT_SEQUENCE_LENGTH = 20
35 | PRIORITY_OUTPUT_SEQUENCE_LENGTH = 16
36 | SEQUENCE_LENGTH = INPUT_SEQUENCE_LENGTH + PRIORITY_OUTPUT_SEQUENCE_LENGTH + 1
37 | PRIORITY_LOWER_BOUND = 0
38 | PRIORITY_UPPER_BOUND = 1
39 |
40 | # Try replacing SimpleRNN, GRU, or LSTM
41 | # RNN = recurrent.SimpleRNN
42 | # RNN = recurrent.GRU
43 | RNN = recurrent.LSTM
44 | # HIDDEN_SIZE = 128 # acc. 99.9%
45 | # HIDDEN_SIZE = 128*30 # 191919370 parameters
46 | # HIDDEN_SIZE = 128*16 # 54646794 parameters
47 | # HIDDEN_SIZE = 128*8 # 13691914 parameters
48 | HIDDEN_SIZE = 128*2 # 3438090 parameters
49 | # HIDDEN_SIZE = 128*1 # 220554 parameters
50 | # HIDDEN_SIZE = 64 # 57034 parameters
51 | LAYERS = 1
52 | # LAYERS = MAX_REPEAT_TIMES
53 | BATCH_SIZE = 1024
54 | # BATCH_SIZE = 16
55 | FOLDER = "experiment_results/priority_sort/"
56 | if not os.path.isdir(FOLDER):
57 | os.makedirs(FOLDER)
58 | print("create folder: %s" % FOLDER)
59 |
60 | print()
61 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
62 | print('Generating data sets...')
63 | train_x_seq, train_y_seq = \
64 | dataset.generate_priority_sort_data_set(
65 | INPUT_DIMENSION_SIZE,
66 | INPUT_SEQUENCE_LENGTH,
67 | PRIORITY_OUTPUT_SEQUENCE_LENGTH,
68 | PRIORITY_LOWER_BOUND,
69 | PRIORITY_UPPER_BOUND,
70 | EXAMPLE_SIZE)
71 | print(train_x_seq.shape)
72 | print(train_y_seq.shape)
73 | validation_x_seq, validation_y_seq = \
74 | dataset.generate_priority_sort_data_set(
75 | INPUT_DIMENSION_SIZE,
76 | INPUT_SEQUENCE_LENGTH,
77 | PRIORITY_OUTPUT_SEQUENCE_LENGTH,
78 | PRIORITY_LOWER_BOUND,
79 | PRIORITY_UPPER_BOUND,
80 | EXAMPLE_SIZE/10)
81 | print(validation_x_seq.shape)
82 | print(validation_y_seq.shape)
83 |
84 | input_matrix = np.zeros(
85 | (SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+1),
86 | dtype=np.float32)
87 | output_matrix = np.zeros(
88 | (SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+1),
89 | dtype=np.float32)
90 | predict_matrix = np.zeros(
91 | (SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+1),
92 | dtype=np.float32)
93 | input_matrix = train_x_seq[0]
94 | output_matrix = train_y_seq[0]
95 | predict_matrix = output_matrix
96 | show_matrix = visualization.PlotDynamicalMatrix4PrioritySort(
97 | input_matrix.transpose(),
98 | output_matrix.transpose(),
99 | predict_matrix.transpose())
100 | random_index = np.random.randint(1, 128, 20)
101 | for i in range(20):
102 | input_matrix = train_x_seq[random_index[i]]
103 | output_matrix = train_y_seq[random_index[i]]
104 | predict_matrix = output_matrix
105 | show_matrix.update(input_matrix.transpose(),
106 | output_matrix.transpose(),
107 | predict_matrix.transpose())
108 | show_matrix.save(FOLDER+"priority_data_training_%2d.png"%i)
109 | # show_matrix.close()
110 |
111 | print()
112 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
113 | print('Build model...')
114 | model = Sequential()
115 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
116 | # note: in a situation where your input sequences have a variable length,
117 | # use input_shape=(None, nb_feature).
118 | hidden_layer = RNN(
119 | HIDDEN_SIZE,
120 | input_shape=(SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+2),
121 | init='glorot_uniform',
122 | inner_init='orthogonal',
123 | activation='tanh',
124 | # activation='hard_sigmoid',
125 | # activation='sigmoid',
126 | W_regularizer=None,
127 | U_regularizer=None,
128 | b_regularizer=None,
129 | dropout_W=0.0,
130 | dropout_U=0.0)
131 | model.add(hidden_layer)
132 | model.add(
133 | Dense(HIDDEN_SIZE, input_shape=(SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+2)))
134 |
135 | # For the decoder's input, we repeat the encoded input for each time step
136 | model.add(RepeatVector(SEQUENCE_LENGTH))
137 | # The decoder RNN could be multiple layers stacked or a single layer
138 | for _ in range(LAYERS):
139 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
140 |
141 | # For each of step of the output sequence, decide which character should be chosen
142 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
143 | # model.add(Activation('softmax'))
144 | # model.add(Activation('hard_sigmoid'))
145 | model.add(Activation('sigmoid'))
146 |
147 | model.compile(loss='binary_crossentropy',
148 | #loss='mse',
149 | #loss='categorical_crossentropy',
150 | optimizer='adam',
151 | metrics=['accuracy'])
152 |
153 | print()
154 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
155 | print("Model architecture")
156 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_priority_sort.png")
157 | print("Model summary")
158 | print(model.summary())
159 | print("Model parameter count")
160 | print(model.count_params())
161 |
162 | print()
163 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
164 | print("Training...")
165 | # Train the model each generation and show predictions against the
166 | # validation dataset
167 | for iteration in range(1, 200):
168 | print()
169 | print('-' * 78)
170 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
171 | print('Iteration', iteration)
172 | history = LossHistory()
173 | check_pointer = ModelCheckpoint(
174 | filepath=FOLDER+"priority_sort_model_weights.hdf5",
175 | verbose=1, save_best_only=True)
176 | model.fit([train_x_seq],
177 | train_y_seq,
178 | batch_size=BATCH_SIZE,
179 | nb_epoch=10,
180 | # callbacks=[check_pointer, history],
181 | validation_data=([validation_x_seq], validation_y_seq))
182 | # print(len(history.losses))
183 | # print(history.losses)
184 | # print(len(history.acces))
185 | # print(history.acces)
186 |
187 | ###
188 | # Select 20 samples from the validation set at random so we can
189 | # visualize errors
190 | for i in range(20):
191 | ind = np.random.randint(0, len(validation_x_seq))
192 | inputs, outputs = validation_x_seq[np.array([ind])],\
193 | validation_y_seq[np.array([ind])]
194 | predicts = model.predict([inputs], verbose=0)
195 |
196 | input_matrix = validation_x_seq[np.array([ind])]
197 | output_matrix = validation_y_seq[np.array([ind])]
198 | predict_matrix = predicts
199 |
200 | show_matrix.update(input_matrix[0].transpose(),
201 | output_matrix[0].transpose(),
202 | predict_matrix[0].transpose())
203 | show_matrix.save(FOLDER+"priority_data_training_%2d_%2d.png" % (iteration, i))
204 |
205 | show_matrix.close()
206 |
207 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/learning_repeat_copy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "4"
6 | Repeat times: "5"
7 | Output sequence length: equal to input sequence length * repeat times.
8 | Output dimension: equal to input dimension.
9 | '''
10 |
11 | from __future__ import print_function
12 | from keras.models import Sequential
13 | # from keras.engine.training import slice_X
14 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
15 | import numpy as np
16 | # from six.moves import range
17 | import dataset # Add by Steven Robot
18 | import visualization # Add by Steven Robot
19 | from keras.utils.visualize_util import plot # Add by Steven Robot
20 | import time # Add by Steven Robot
21 | from keras.layers import Merge # Add by Steven Robot
22 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
23 | from keras.callbacks import Callback # Add by Steven Robot
24 | from algorithm_learning.util import LossHistory # Add by Steven Robot
25 | import os # Add by Steven Robot
26 |
27 |
28 | # Parameters for the model to train copying algorithm_learning
29 | TRAINING_SIZE = 1024000
30 | # TRAINING_SIZE = 128000
31 | # TRAINING_SIZE = 1280
32 | INPUT_DIMENSION_SIZE = 4 + 1
33 | MAX_COPY_LENGTH = 10
34 | # REPEAT_TIMES = 2
35 | # MAX_INPUT_LENGTH = MAX_COPY_LENGTH + 1 + REPEAT_TIMES * MAX_COPY_LENGTH + 1
36 | MAX_REPEAT_TIMES = 5
37 | MAX_INPUT_LENGTH = MAX_COPY_LENGTH + 1 + MAX_REPEAT_TIMES * MAX_COPY_LENGTH # + 1
38 |
39 | # Try replacing SimpleRNN, GRU, or LSTM
40 | # RNN = recurrent.SimpleRNN
41 | # RNN = recurrent.GRU
42 | RNN = recurrent.LSTM
43 | # HIDDEN_SIZE = 128 # acc. 99.9%
44 | HIDDEN_SIZE = 128*2
45 | LAYERS = 1
46 | # LAYERS = MAX_REPEAT_TIMES
47 | BATCH_SIZE = 1024
48 | # BATCH_SIZE = 128
49 | FOLDER = "experiment_results/repeat_copy/"
50 | if not os.path.isdir(FOLDER):
51 | os.makedirs(FOLDER)
52 | print("create folder: %s" % FOLDER)
53 |
54 | print()
55 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
56 | print('Generating data sets...')
57 | # Fix 2 times copying
58 | # train_X, train_Y = dataset.generate_repeat_copy_data_set(
59 | # INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE, REPEAT_TIMES)
60 | # valid_X, valid_Y = dataset.generate_repeat_copy_data_set(
61 | # INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE/10, REPEAT_TIMES)
62 | train_X, train_Y, train_repeats_times = dataset.generate_repeat_copy_data_set(
63 | INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE, MAX_REPEAT_TIMES)
64 | valid_X, valid_Y, valid_repeats_times = dataset.generate_repeat_copy_data_set(
65 | INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE/10, MAX_REPEAT_TIMES)
66 | print(train_repeats_times)
67 | print(valid_repeats_times)
68 | train_repeats_times = (train_repeats_times - 1.0) / (MAX_REPEAT_TIMES - 1.0)
69 | valid_repeats_times = (valid_repeats_times - 1.0) / (MAX_REPEAT_TIMES - 1.0)
70 | print(train_repeats_times)
71 | print(valid_repeats_times)
72 |
73 | matrix_list = []
74 | matrix_list.append(train_X[0].transpose())
75 | matrix_list.append(train_Y[0].transpose())
76 | matrix_list.append(train_Y[0].transpose())
77 | name_list = []
78 | name_list.append("Input")
79 | name_list.append("Target")
80 | name_list.append("Predict")
81 | show_matrix = visualization.PlotDynamicalMatrix4Repeat(
82 | matrix_list, name_list, train_repeats_times[0])
83 | random_index = np.random.randint(1, 128, 20)
84 | for i in range(20):
85 | matrix_list_update = []
86 | matrix_list_update.append(train_X[random_index[i]].transpose())
87 | matrix_list_update.append(train_Y[random_index[i]].transpose())
88 | matrix_list_update.append(train_Y[random_index[i]].transpose())
89 | show_matrix.update(matrix_list_update, name_list, train_repeats_times[random_index[i]])
90 | show_matrix.save(FOLDER+"repeat_copy_data_training_%2d.png"%i)
91 |
92 | print()
93 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
94 | print('Build model...')
95 | input_sequence = Sequential()
96 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
97 | # note: in a situation where your input sequences have a variable length,
98 | # use input_shape=(None, nb_feature).
99 | hidden_layer = RNN(
100 | HIDDEN_SIZE,
101 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE),
102 | init='glorot_uniform',
103 | inner_init='orthogonal',
104 | activation='tanh',
105 | # activation='hard_sigmoid',
106 | # activation='sigmoid',
107 | W_regularizer=None,
108 | U_regularizer=None,
109 | b_regularizer=None,
110 | dropout_W=0.0,
111 | dropout_U=0.0)
112 | input_sequence.add(hidden_layer)
113 |
114 | repeat_times = Sequential()
115 | repeat_times.add(Dense(16, input_dim=1))
116 | repeat_times.add(Activation('sigmoid'))
117 |
118 | merged = Merge([input_sequence, repeat_times], mode='concat')
119 |
120 | model = Sequential()
121 | model.add(merged)
122 |
123 | # For the decoder's input, we repeat the encoded input for each time step
124 | model.add(RepeatVector(MAX_INPUT_LENGTH))
125 | # The decoder RNN could be multiple layers stacked or a single layer
126 | for _ in range(LAYERS):
127 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
128 |
129 | # For each of step of the output sequence, decide which character should be chosen
130 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE)))
131 | # model.add(Activation('softmax'))
132 | # model.add(Activation('hard_sigmoid'))
133 | model.add(Activation('sigmoid'))
134 |
135 | model.compile(loss='binary_crossentropy',
136 | # loss='mse',
137 | optimizer='adam',
138 | metrics=['accuracy'])
139 |
140 | print()
141 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
142 | print("Model architecture")
143 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_repeat_copying.png")
144 | print("Model summary")
145 | print(model.summary())
146 | print("Model parameter count")
147 | print(model.count_params())
148 |
149 | print()
150 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
151 | print("Training...")
152 | # Train the model each generation and show predictions against the
153 | # validation dataset
154 | for iteration in range(1, 200):
155 | print()
156 | print('-' * 78)
157 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
158 | print('Iteration', iteration)
159 | history = LossHistory()
160 | check_pointer = ModelCheckpoint(
161 | filepath=FOLDER+"repeat_copying_model_weights.hdf5",
162 | verbose=1, save_best_only=True)
163 | model.fit([train_X, train_repeats_times],
164 | train_Y,
165 | batch_size=BATCH_SIZE,
166 | nb_epoch=30,
167 | # nb_epoch=1,
168 | callbacks=[check_pointer, history],
169 | validation_data=([valid_X, valid_repeats_times], valid_Y))
170 | # print(len(history.losses))
171 | # print(history.losses)
172 | # print(len(history.acces))
173 | # print(history.acces)
174 |
175 | ###
176 | # Select 20 samples from the validation set at random so we can
177 | # visualize errors
178 | for i in range(20):
179 | ind = np.random.randint(0, len(valid_X))
180 | inputs, repeats, outputs = valid_X[np.array([ind])], \
181 | valid_repeats_times[np.array([ind])], \
182 | valid_Y[np.array([ind])]
183 | predicts = model.predict([inputs, repeats], verbose=0)
184 | matrix_list_update = []
185 | matrix_list_update.append(inputs[0].transpose())
186 | matrix_list_update.append(outputs[0].transpose())
187 | matrix_list_update.append(predicts[0].transpose())
188 | show_matrix.update(matrix_list_update,
189 | name_list,
190 | valid_repeats_times[ind] * (MAX_REPEAT_TIMES - 1.0) + 1)
191 | show_matrix.save(FOLDER+"repeat_copy_data_predict_%3d.png" % iteration)
192 |
193 | show_matrix.close()
194 |
195 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/learning_associative_recall_review.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "8"
6 | Output sequence length: equal to input sequence length.
7 | Output dimension: equal to input dimension.
8 | '''
9 |
10 | from __future__ import print_function
11 | from keras.models import Sequential
12 | # from keras.engine.training import slice_X
13 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven Robot
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | import time # Add by Steven Robot
20 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
21 | from keras.callbacks import Callback # Add by Steven Robot
22 | from algorithm_learning.util import LossHistory # Add by Steven Robot
23 | import os # Add by Steven Robot
24 | from keras.optimizers import Adam # Add by Steven Robot
25 | import sys # Add by Steven Robot
26 |
27 |
28 | # Parameters for the model to train copying algorithm_learning
29 | TRAINING_SIZE = 3*1024000
30 | # TRAINING_SIZE = 10240
31 | # TRAINING_SIZE = 128000
32 | # TRAINING_SIZE = 1280
33 | INPUT_DIMENSION_SIZE = 6
34 | ITEM_SIZE = 3
35 | MAX_EPISODE_SIZE = 6
36 | MAX_INPUT_LENGTH = (ITEM_SIZE+1) * (MAX_EPISODE_SIZE+2)
37 |
38 |
39 | # Try replacing SimpleRNN, GRU, or LSTM
40 | # RNN = recurrent.SimpleRNN
41 | # RNN = recurrent.GRU
42 | RNN = recurrent.LSTM
43 | HIDDEN_SIZE = 256
44 | LAYERS = 2
45 | # LAYERS = MAX_REPEAT_TIMES
46 | BATCH_SIZE = 1024
47 | # BATCH_SIZE = 128
48 |
49 | folder_name = time.strftime('experiment_results/recall_lstm/%Y-%m-%d-%H-%M-%S/')
50 | # os.makedirs(folder_name)
51 | FOLDER = folder_name
52 | if not os.path.isdir(FOLDER):
53 | os.makedirs(FOLDER)
54 | print("create folder: %s" % FOLDER)
55 |
56 | start_time = time.time()
57 | sys_stdout = sys.stdout
58 | log_file = '%s/recall.log' % (folder_name)
59 | sys.stdout = open(log_file, 'a')
60 |
61 | print()
62 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
63 | print('Generating data sets...')
64 | train_X, train_Y = dataset.generate_associative_recall_data_set(
65 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE)
66 | valid_X, valid_Y = dataset.generate_associative_recall_data_set(
67 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE/5)
68 |
69 | matrix_list = []
70 | matrix_list.append(train_X[0].transpose())
71 | matrix_list.append(train_Y[0].transpose())
72 | matrix_list.append(train_Y[0].transpose())
73 | name_list = []
74 | name_list.append("Input")
75 | name_list.append("Target")
76 | name_list.append("Predict")
77 | show_matrix = visualization.PlotDynamicalMatrix(matrix_list, name_list)
78 | random_index = np.random.randint(1, 128, 20)
79 | for i in range(20):
80 | matrix_list_update = []
81 | matrix_list_update.append(train_X[random_index[i]].transpose())
82 | matrix_list_update.append(train_Y[random_index[i]].transpose())
83 | matrix_list_update.append(train_Y[random_index[i]].transpose())
84 | show_matrix.update(matrix_list_update, name_list)
85 | show_matrix.save(FOLDER+"associative_recall_data_training_%2d.png" % i)
86 |
87 | print()
88 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
89 | print('Build model...')
90 | model = Sequential()
91 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
92 | # note: in a situation where your input sequences have a variable length,
93 | # use input_shape=(None, nb_feature).
94 | model.add(RNN(
95 | HIDDEN_SIZE,
96 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE+2),
97 | init='glorot_uniform',
98 | inner_init='orthogonal',
99 | activation='tanh',
100 | return_sequences=True,
101 | # activation='hard_sigmoid',
102 | # activation='sigmoid',
103 | W_regularizer=None,
104 | U_regularizer=None,
105 | b_regularizer=None,
106 | dropout_W=0.0,
107 | dropout_U=0.0))
108 |
109 |
110 | # For the decoder's input, we repeat the encoded input for each time step
111 | # model.add(RepeatVector(MAX_INPUT_LENGTH))
112 | # The decoder RNN could be multiple layers stacked or a single layer
113 | for _ in range(LAYERS):
114 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
115 |
116 | # For each of step of the output sequence, decide which character should be chosen
117 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
118 | # model.add(Activation('softmax'))
119 | # model.add(Activation('hard_sigmoid'))
120 | model.add(Activation('sigmoid'))
121 |
122 | lr = 0.0001
123 | beta_1 = 0.9
124 | beta_2 = 0.999
125 | epsilon = 1e-8
126 | ADAM_ = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
127 |
128 | model.compile(loss='binary_crossentropy',
129 | # loss='mse',
130 | # optimizer='adam',
131 | optimizer=ADAM_,
132 | metrics=['accuracy'])
133 |
134 | print()
135 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
136 | print("Model architecture")
137 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_associative_recall.png")
138 | print("Model summary")
139 | print(model.summary())
140 | print("Model parameter count")
141 | print(model.count_params())
142 |
143 | print()
144 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
145 | print("Training...")
146 | # Train the model each generation and show predictions against the
147 | # validation dataset
148 | losses = []
149 | acces = []
150 | for iteration in range(1, 2):
151 | print()
152 | print('-' * 78)
153 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
154 | print('Iteration', iteration)
155 | history = LossHistory()
156 | check_pointer = ModelCheckpoint(
157 | filepath=FOLDER+"associative_recall_model_weights.hdf5",
158 | verbose=1, save_best_only=True)
159 | model.fit(train_X,
160 | train_Y,
161 | batch_size=BATCH_SIZE,
162 | # nb_epoch=30,
163 | nb_epoch=1,
164 | callbacks=[check_pointer, history],
165 | validation_data=(valid_X, valid_Y))
166 | # print(len(history.losses))
167 | # print(history.losses)
168 | # print(len(history.acces))
169 | # print(history.acces)
170 | losses.append(history.losses)
171 | acces.append(history.acces)
172 |
173 | ###
174 | # Select 20 samples from the validation set at random so we can
175 | # visualize errors
176 | for i in range(20):
177 | ind = np.random.randint(0, len(valid_X))
178 | inputs, outputs = valid_X[np.array([ind])], \
179 | valid_Y[np.array([ind])]
180 | predicts = model.predict(inputs, verbose=0)
181 | matrix_list_update = []
182 | matrix_list_update.append(inputs[0].transpose())
183 | matrix_list_update.append(outputs[0].transpose())
184 | matrix_list_update.append(predicts[0].transpose())
185 | show_matrix.update(matrix_list_update,
186 | name_list)
187 | show_matrix.save(FOLDER+"associative_data_predict_%3d.png"%iteration)
188 |
189 | show_matrix.close()
190 | print("\nlosses")
191 | print(len(losses))
192 | print(len(losses[0]))
193 | # print(losses.shape)
194 | sample_num = 1
195 | for los in losses:
196 | for lo in los:
197 | if sample_num % 100 == 1:
198 | print("(%d, %f)" % (sample_num, lo))
199 | sample_num = sample_num + 1
200 | # print(losses)
201 |
202 | print("\naccess")
203 | print(len(acces))
204 | print(len(acces[0]))
205 | # print(acces.shape)
206 | sample_num = 1
207 | for acc in acces:
208 | for ac in acc:
209 | if sample_num % 100 == 1:
210 | print("(%d, %f)" % (sample_num, ac))
211 | sample_num = sample_num + 1
212 | # print(acces)
213 |
214 | print ("task took %.3fs" % (float(time.time()) - start_time))
215 | sys.stdout.close()
216 | sys.stdout = sys_stdout
217 |
--------------------------------------------------------------------------------
/unit_test/ntm_unit_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "8"
6 | Output sequence length: equal to input sequence length.
7 | Output dimension: equal to input dimension.
8 | '''
9 |
10 | from __future__ import print_function
11 | from keras.models import Sequential
12 | # from keras.engine.training import slice_X
13 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven Robot
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | import time # Add by Steven Robot
20 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
21 | from keras.callbacks import Callback # Add by Steven Robot
22 | from util import LossHistory # Add by Steven Robot
23 | import os # Add by Steven Robot
24 | import ntm # Add by Steven Robot
25 | # import lstm2ntm # Add by Steven Robot
26 | from keras.layers import Input, Dense
27 | from keras.models import Model
28 |
29 |
30 | # Parameters for the model to train copying algorithm
31 | # TRAINING_SIZE = 1024000
32 | TRAINING_SIZE = 128000
33 | # TRAINING_SIZE = 1280
34 | INPUT_DIMENSION_SIZE = 6
35 | ITEM_SIZE = 3
36 | MAX_EPISODE_SIZE = 6
37 | MAX_INPUT_LENGTH = (ITEM_SIZE+1) * (MAX_EPISODE_SIZE+2)
38 |
39 | # Try replacing SimpleRNN, GRU, or LSTM
40 | # RNN = recurrent.SimpleRNN
41 | # RNN = recurrent.GRU
42 | # RNN = recurrent.LSTM
43 | # RNN = lstm2ntm.NTM
44 | RNN = ntm.NTM
45 | HIDDEN_SIZE = 256
46 | LAYERS = 2
47 | # LAYERS = MAX_REPEAT_TIMES
48 | # BATCH_SIZE = 1024
49 | BATCH_SIZE = 128
50 | MEMORY_DIM = 128
51 | MEMORY_SIZE = 20
52 | CONTROLLER_OUTPUT_DIM = 100
53 | LOCATION_SHIFT_RANGE = 1
54 | NUM_READ_HEAD = 1
55 | NUM_WRITE_HEAD = 1
56 |
57 | FOLDER = "experiment_results/associative_recall_ntm/"
58 | if not os.path.isdir(FOLDER):
59 | os.makedirs(FOLDER)
60 | print("create folder: %s" % FOLDER)
61 |
62 | print()
63 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
64 | print('Generating data sets...')
65 | train_X, train_Y = dataset.generate_associative_recall_data_set(
66 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE)
67 | valid_X, valid_Y = dataset.generate_associative_recall_data_set(
68 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE/5)
69 |
70 | matrix_list = []
71 | matrix_list.append(train_X[0].transpose())
72 | matrix_list.append(train_Y[0].transpose())
73 | matrix_list.append(train_Y[0].transpose())
74 | name_list = []
75 | name_list.append("Input")
76 | name_list.append("Target")
77 | name_list.append("Predict")
78 | show_matrix = visualization.PlotDynamicalMatrix(matrix_list, name_list)
79 | random_index = np.random.randint(1, 128, 20)
80 | for i in range(3):
81 | matrix_list_update = []
82 | matrix_list_update.append(train_X[random_index[i]].transpose())
83 | matrix_list_update.append(train_Y[random_index[i]].transpose())
84 | matrix_list_update.append(train_Y[random_index[i]].transpose())
85 | show_matrix.update(matrix_list_update, name_list)
86 | show_matrix.save(FOLDER+"associative_recall_data_training_%2d.png" % i)
87 |
88 | print()
89 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
90 | print('Build model...')
91 |
92 | print('Input sequence...')
93 | input_sequence = Input(shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE), name="input_sequence")
94 | print('NTM...')
95 | ntm_out = RNN(
96 | output_dim=MEMORY_DIM,
97 | memory_dim=MEMORY_DIM,
98 | memory_size=MEMORY_SIZE,
99 | controller_output_dim=CONTROLLER_OUTPUT_DIM,
100 | location_shift_range=LOCATION_SHIFT_RANGE,
101 | num_read_head=NUM_READ_HEAD,
102 | num_write_head=NUM_WRITE_HEAD,
103 | init='glorot_uniform',
104 | inner_init='orthogonal',
105 | return_sequences=True,
106 | # activation='hard_sigmoid',
107 | activation='tanh',
108 | # activation='sigmoid',
109 | W_regularizer=None,
110 | U_regularizer=None,
111 | R_regularizer=None,
112 | b_regularizer=None,
113 | dropout_W=0.0,
114 | dropout_U=0.0)(input_sequence)
115 | print('Output sequence...')
116 | output_sequence = Dense(
117 | output_dim=INPUT_DIMENSION_SIZE+2,
118 | activation='sigmoid',
119 | name='output_sequence')(ntm_out)
120 | print('Model...')
121 | model = Model(input=input_sequence, output=output_sequence)
122 |
123 | #
124 | # model = Sequential()
125 | # # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
126 | # # note: in a situation where your input sequences have a variable length,
127 | # # use input_shape=(None, nb_feature).
128 | # model.add(RNN(
129 | # input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE+2),
130 | # # output_dim=INPUT_DIMENSION_SIZE+2,
131 | # output_dim=MEMORY_DIM,
132 | # memory_dim=MEMORY_DIM,
133 | # memory_size=MEMORY_SIZE,
134 | # controller_output_dim=CONTROLLER_OUTPUT_DIM,
135 | # location_shift_range=LOCATION_SHIFT_RANGE,
136 | # num_read_head=NUM_READ_HEAD,
137 | # num_write_head=NUM_WRITE_HEAD,
138 | # init='glorot_uniform',
139 | # inner_init='orthogonal',
140 | # return_sequences=True,
141 | # # activation='hard_sigmoid',
142 | # activation='tanh',
143 | # # activation='sigmoid',
144 | # W_regularizer=None,
145 | # U_regularizer=None,
146 | # R_regularizer=None,
147 | # b_regularizer=None,
148 | # dropout_W=0.0,
149 | # dropout_U=0.0))
150 | #
151 | #
152 | # # # For the decoder's input, we repeat the encoded input for each time step
153 | # # # model.add(RepeatVector(MAX_INPUT_LENGTH))
154 | # # # The decoder RNN could be multiple layers stacked or a single layer
155 | # # for _ in range(LAYERS):
156 | # # model.add(RNN(HIDDEN_SIZE, return_sequences=True))
157 | #
158 | # # For each of step of the output sequence, decide which character should be chosen
159 | # model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
160 | # # model.add(Activation('softmax'))
161 | # # model.add(Activation('hard_sigmoid'))
162 | # model.add(Activation('sigmoid'))
163 |
164 | print('Compile...')
165 | model.compile(loss='binary_crossentropy',
166 | # loss='mse',
167 | optimizer='adam',
168 | metrics=['accuracy'])
169 |
170 | print()
171 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
172 | print("Model architecture")
173 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_associative_recall.png")
174 | print("Model summary")
175 | print(model.summary())
176 | print("Model parameter count")
177 | print(model.count_params())
178 |
179 | print()
180 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
181 | print("Training...")
182 | # Train the model each generation and show predictions against the
183 | # validation dataset
184 | for iteration in range(1, 200):
185 | print()
186 | print('-' * 78)
187 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
188 | print('Iteration', iteration)
189 | history = LossHistory()
190 | check_pointer = ModelCheckpoint(
191 | filepath=FOLDER+"associative_recall_model_weights.hdf5",
192 | verbose=1, save_best_only=True)
193 | model.fit(train_X,
194 | train_Y,
195 | batch_size=BATCH_SIZE,
196 | nb_epoch=30,
197 | # nb_epoch=1,
198 | callbacks=[check_pointer, history],
199 | validation_data=(valid_X, valid_Y))
200 | print(len(history.losses))
201 | print(history.losses)
202 | print(len(history.acces))
203 | print(history.acces)
204 |
205 | ###
206 | # Select 20 samples from the validation set at random so we can
207 | # visualize errors
208 | for i in range(20):
209 | ind = np.random.randint(0, len(valid_X))
210 | inputs, outputs = valid_X[np.array([ind])], \
211 | valid_Y[np.array([ind])]
212 | predicts = model.predict(inputs, verbose=0)
213 | matrix_list_update = []
214 | matrix_list_update.append(inputs[0].transpose())
215 | matrix_list_update.append(outputs[0].transpose())
216 | matrix_list_update.append(predicts[0].transpose())
217 | show_matrix.update(matrix_list_update,
218 | name_list)
219 | show_matrix.save(FOLDER+"associative_data_predict_%3d.png"%iteration)
220 |
221 | show_matrix.close()
222 |
--------------------------------------------------------------------------------
/algorithm_learning/learning_associative_recall_lstm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """An implementation of learning associative recall algorithm_learning with LSTM.
3 | Input sequence length: "2 ~ 6 items: (2*(3+1) ~ 6*(3+1))."
4 | Input dimension: "6+2", Item 3*6 bits
5 | Output sequence length: "3" one item .
6 | Output dimension: equal to input dimension.
7 | """
8 |
9 | from __future__ import print_function
10 | from keras.models import Sequential
11 | from keras.layers import Activation, TimeDistributed, Dense, recurrent
12 | import numpy as np
13 | # from six.moves import range
14 | # from keras.layers import RepeatVector
15 | # from keras.engine.training import slice_X
16 | # from keras.callbacks import Callback # Add by Steven Robot
17 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | from keras.optimizers import Adam # Add by Steven Robot
20 | from util import LossHistory # Add by Steven Robot
21 | from keras.callbacks import LambdaCallback # Add by Steven Robot
22 | import dataset # Add by Steven Robot
23 | import time # Add by Steven Robot
24 | import os # Add by Steven Robot
25 | import sys # Add by Steven Robot
26 | import matplotlib.pyplot as plt
27 | import visualization
28 |
29 |
30 | # Parameters for the model to train copying algorithm_learning
31 | # TRAINING_SIZE = 1024000
32 | TRAINING_SIZE = 10240
33 | # TRAINING_SIZE = 128000
34 | # TRAINING_SIZE = 1280
35 | INPUT_DIMENSION_SIZE = 6
36 | ITEM_SIZE = 3
37 | MAX_EPISODE_SIZE = 6
38 | MAX_INPUT_LENGTH = (ITEM_SIZE+1) * (MAX_EPISODE_SIZE+2)
39 |
40 |
41 | # Try replacing SimpleRNN, GRU, or LSTM
42 | # RNN = recurrent.SimpleRNN
43 | # RNN = recurrent.GRU
44 | RNN = recurrent.LSTM
45 | HIDDEN_SIZE = 256
46 | LAYERS = 2
47 | # LAYERS = MAX_REPEAT_TIMES
48 | BATCH_SIZE = 1024
49 | # BATCH_SIZE = 128
50 |
51 | folder_name = time.strftime('experiment_results/recall_lstm/%Y-%m-%d-%H-%M-%S/')
52 | # os.makedirs(folder_name)
53 | FOLDER = folder_name
54 | if not os.path.isdir(FOLDER):
55 | os.makedirs(FOLDER)
56 | print("create folder: %s" % FOLDER)
57 |
58 | start_time = time.time()
59 | sys_stdout = sys.stdout
60 | log_file = '%s/recall.log' % (folder_name)
61 | sys.stdout = open(log_file, 'a')
62 |
63 | print()
64 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
65 | print('Generating data sets...')
66 | train_X, train_Y = dataset.generate_associative_recall_data_set(
67 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE)
68 | valid_X, valid_Y = dataset.generate_associative_recall_data_set(
69 | INPUT_DIMENSION_SIZE, ITEM_SIZE, MAX_EPISODE_SIZE, TRAINING_SIZE/5)
70 |
71 | matrix_list = []
72 | matrix_list.append(train_X[0].transpose())
73 | matrix_list.append(train_Y[0].transpose())
74 | matrix_list.append(train_Y[0].transpose())
75 | name_list = []
76 | name_list.append("Input")
77 | name_list.append("Target")
78 | name_list.append("Predict")
79 | show_matrix = visualization.PlotDynamicalMatrix(matrix_list, name_list)
80 | random_index = np.random.randint(1, 128, 20)
81 | for i in range(20):
82 | matrix_list_update = []
83 | matrix_list_update.append(train_X[random_index[i]].transpose())
84 | matrix_list_update.append(train_Y[random_index[i]].transpose())
85 | matrix_list_update.append(train_Y[random_index[i]].transpose())
86 | show_matrix.update(matrix_list_update, name_list)
87 | show_matrix.save(FOLDER+"associative_recall_data_training_%2d.png" % i)
88 |
89 | print()
90 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
91 | print('Build model...')
92 | model = Sequential()
93 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
94 | # note: in a situation where your input sequences have a variable length,
95 | # use input_shape=(None, nb_feature).
96 | model.add(RNN(
97 | HIDDEN_SIZE,
98 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE+2),
99 | init='glorot_uniform',
100 | inner_init='orthogonal',
101 | activation='tanh',
102 | return_sequences=True,
103 | # activation='hard_sigmoid',
104 | # activation='sigmoid',
105 | W_regularizer=None,
106 | U_regularizer=None,
107 | b_regularizer=None,
108 | dropout_W=0.0,
109 | dropout_U=0.0))
110 |
111 | # For the decoder's input, we repeat the encoded input for each time step
112 | # model.add(RepeatVector(MAX_INPUT_LENGTH))
113 | # The decoder RNN could be multiple layers stacked or a single layer
114 | for _ in range(LAYERS):
115 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
116 |
117 | # For each of step of the output sequence, decide which character should be chosen
118 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
119 | # model.add(Activation('softmax'))
120 | # model.add(Activation('hard_sigmoid'))
121 | model.add(Activation('sigmoid'))
122 |
123 | # initialize the optimizer
124 | lr = 0.0001
125 | beta_1 = 0.9
126 | beta_2 = 0.999
127 | epsilon = 1e-8
128 | ADAM_ = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
129 |
130 | # compile the model
131 | model.compile(loss='binary_crossentropy',
132 | # loss='mse',
133 | # optimizer='adam',
134 | optimizer=ADAM_,
135 | metrics=['accuracy'])
136 |
137 | # show the information of the model
138 | print()
139 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
140 | print("Model architecture")
141 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_associative_recall.png")
142 | print("Model summary")
143 | print(model.summary())
144 | print("Model parameter count")
145 | print(model.count_params())
146 |
147 | # begain training
148 | print()
149 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
150 | print("Training...")
151 | # Train the model each generation and show predictions against the
152 | # validation dataset
153 | losses = []
154 | acces = []
155 | for iteration in range(1, 3):
156 | print()
157 | print('-' * 78)
158 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
159 | print('Iteration', iteration)
160 | history = LossHistory()
161 | plot_loss_callback = LambdaCallback(
162 | on_epoch_end=lambda epoch, logs:
163 | plt.plot(np.arange((epoch, 1)), logs['loss']))
164 | check_pointer = ModelCheckpoint(
165 | filepath=FOLDER+"associative_recall_model_weights.hdf5",
166 | verbose=1, save_best_only=True)
167 | model.fit(train_X,
168 | train_Y,
169 | batch_size=BATCH_SIZE,
170 | # nb_epoch=30,
171 | nb_epoch=10,
172 | callbacks=[check_pointer, history, plot_loss_callback], #, plot_loss_callback
173 | validation_data=(valid_X, valid_Y))
174 | # print(len(history.losses))
175 | # print(history.losses)
176 | # print(len(history.acces))
177 | # print(history.acces)
178 | losses.append(history.losses)
179 | acces.append(history.acces)
180 |
181 | ###
182 | # Select 20 samples from the validation set at random so we can
183 | # visualize errors
184 | for i in range(20):
185 | ind = np.random.randint(0, len(valid_X))
186 | inputs, outputs = valid_X[np.array([ind])], \
187 | valid_Y[np.array([ind])]
188 | predicts = model.predict(inputs, verbose=0)
189 | matrix_list_update = []
190 | matrix_list_update.append(inputs[0].transpose())
191 | matrix_list_update.append(outputs[0].transpose())
192 | matrix_list_update.append(predicts[0].transpose())
193 | show_matrix.update(matrix_list_update,
194 | name_list)
195 | show_matrix.save(FOLDER+"associative_data_predict_%2d_%2d.png" % (iteration, i))
196 |
197 | show_matrix.close()
198 | # end of training
199 |
200 | # print loss and accuracy
201 | print("\nlosses")
202 | print(len(losses))
203 | print(len(losses[0]))
204 | # print(losses.shape)
205 | sample_num = 1
206 | for los in losses:
207 | for lo in los:
208 | if sample_num % 100 == 1:
209 | print("(%d, %f)" % (sample_num, lo))
210 | sample_num = sample_num + 1
211 | # print(losses)
212 |
213 | print("********************************************")
214 | print("\naccess")
215 | print(len(acces))
216 | print(len(acces[0]))
217 | # print(acces.shape)
218 | sample_num = 1
219 | for acc in acces:
220 | for ac in acc:
221 | if sample_num % 100 == 1:
222 | print("(%d, %f)" % (sample_num, ac))
223 | sample_num = sample_num + 1
224 | # print(acces)
225 |
226 | # print loss and accuracy
227 | print("\nlosses")
228 | print(len(losses))
229 | print(len(losses[0]))
230 | # print(losses.shape)
231 | sample_num = 1
232 | for los in losses:
233 | for lo in los:
234 | print("(%d, %f)" % (sample_num, lo))
235 | sample_num = sample_num + 1
236 | # print(losses)
237 |
238 | print("********************************************")
239 | print("\naccess")
240 | print(len(acces))
241 | print(len(acces[0]))
242 | # print(acces.shape)
243 | sample_num = 1
244 | for acc in acces:
245 | for ac in acc:
246 | print("(%d, %f)" % (sample_num, ac))
247 | sample_num = sample_num + 1
248 | # print(acces)
249 |
250 | print ("task took %.3fs" % (float(time.time()) - start_time))
251 | sys.stdout.close()
252 | sys.stdout = sys_stdout
253 |
--------------------------------------------------------------------------------
/algorithm_learning/learning_priority_sort_lstm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """An implementation of learning priority sort algorithm_learning with LSTM.
3 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
4 | Input dimension: "8"
5 | Output sequence length: equal to input sequence length.
6 | Output dimension: equal to input dimension.
7 | """
8 |
9 | from __future__ import print_function
10 | from keras.models import Sequential
11 | # from keras.engine.training import slice_X
12 | from keras.layers import Activation, TimeDistributed, Dense, recurrent
13 | # from keras.layers import RepeatVector
14 | import numpy as np
15 | # from six.moves import range
16 | import dataset # Add by Steven Robot
17 | import visualization # Add by Steven
18 | from keras.utils.visualize_util import plot # Add by Steven Robot
19 | import time # Add by Steven Robot
20 | from keras.layers import Merge # Add by Steven Robot
21 | from keras.callbacks import Callback # Add by Steven Robot
22 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
23 | from util import LossHistory # Add by Steven Robot
24 | import os # Add by Steven Robot
25 | import sys # Add by Steven Robot
26 |
27 |
28 | # Parameters for the model to train copying algorithm_learning
29 | # EXAMPLE_SIZE = 2560000
30 | EXAMPLE_SIZE = 1024000
31 | # EXAMPLE_SIZE = 128000
32 | # EXAMPLE_SIZE = 1280
33 | INPUT_DIMENSION_SIZE = 8
34 | # INPUT_DIMENSION_SIZE = 4
35 | INPUT_SEQUENCE_LENGTH = 20
36 | PRIORITY_OUTPUT_SEQUENCE_LENGTH = 16
37 | SEQUENCE_LENGTH = INPUT_SEQUENCE_LENGTH + PRIORITY_OUTPUT_SEQUENCE_LENGTH + 1
38 | PRIORITY_LOWER_BOUND = 0
39 | PRIORITY_UPPER_BOUND = 1
40 |
41 | # Try replacing SimpleRNN, GRU, or LSTM
42 | # RNN = recurrent.SimpleRNN
43 | # RNN = recurrent.GRU
44 | RNN = recurrent.LSTM
45 | # HIDDEN_SIZE = 128 # acc. 99.9%
46 | # HIDDEN_SIZE = 128*30 # 191919370 parameters
47 | # HIDDEN_SIZE = 128*16 # 54646794 parameters
48 | # HIDDEN_SIZE = 128*8 # 13691914 parameters
49 | # HIDDEN_SIZE = 128*2 # 3438090 parameters
50 | HIDDEN_SIZE = 128*1 # 220554 parameters
51 | # HIDDEN_SIZE = 64 # 57034 parameters
52 | LAYERS = 2
53 | # LAYERS = MAX_REPEAT_TIMES
54 | BATCH_SIZE = 1024
55 | # BATCH_SIZE = 16
56 |
57 |
58 | folder_name = time.strftime('experiment_results/sort_lstm/%Y-%m-%d-%H-%M-%S/')
59 | # os.makedirs(folder_name)
60 | FOLDER = folder_name
61 | if not os.path.isdir(FOLDER):
62 | os.makedirs(FOLDER)
63 | print("create folder: %s" % FOLDER)
64 |
65 | start_time = time.time()
66 | sys_stdout = sys.stdout
67 | log_file = '%s/recall.log' % (folder_name)
68 | sys.stdout = open(log_file, 'a')
69 |
70 |
71 | print()
72 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
73 | print('Generating data sets...')
74 | train_x_seq, train_y_seq = \
75 | dataset.generate_priority_sort_data_set(
76 | INPUT_DIMENSION_SIZE,
77 | INPUT_SEQUENCE_LENGTH,
78 | PRIORITY_OUTPUT_SEQUENCE_LENGTH,
79 | PRIORITY_LOWER_BOUND,
80 | PRIORITY_UPPER_BOUND,
81 | EXAMPLE_SIZE)
82 | print(train_x_seq.shape)
83 | print(train_y_seq.shape)
84 | validation_x_seq, validation_y_seq = \
85 | dataset.generate_priority_sort_data_set(
86 | INPUT_DIMENSION_SIZE,
87 | INPUT_SEQUENCE_LENGTH,
88 | PRIORITY_OUTPUT_SEQUENCE_LENGTH,
89 | PRIORITY_LOWER_BOUND,
90 | PRIORITY_UPPER_BOUND,
91 | EXAMPLE_SIZE/10)
92 | print(validation_x_seq.shape)
93 | print(validation_y_seq.shape)
94 |
95 | input_matrix = np.zeros(
96 | (SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+1),
97 | dtype=np.float32)
98 | output_matrix = np.zeros(
99 | (SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+1),
100 | dtype=np.float32)
101 | predict_matrix = np.zeros(
102 | (SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+1),
103 | dtype=np.float32)
104 | input_matrix = train_x_seq[0]
105 | output_matrix = train_y_seq[0]
106 | predict_matrix = output_matrix
107 | show_matrix = visualization.PlotDynamicalMatrix4PrioritySort(
108 | input_matrix.transpose(),
109 | output_matrix.transpose(),
110 | predict_matrix.transpose())
111 | random_index = np.random.randint(1, 128, 20)
112 | for i in range(20):
113 | input_matrix = train_x_seq[random_index[i]]
114 | output_matrix = train_y_seq[random_index[i]]
115 | predict_matrix = output_matrix
116 | show_matrix.update(input_matrix.transpose(),
117 | output_matrix.transpose(),
118 | predict_matrix.transpose())
119 | show_matrix.save(FOLDER+"priority_data_training_%2d.png"%i)
120 | # show_matrix.close()
121 |
122 | print()
123 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
124 | print('Build model...')
125 | model = Sequential()
126 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
127 | # note: in a situation where your input sequences have a variable length,
128 | # use input_shape=(None, nb_feature).
129 | hidden_layer = RNN(
130 | HIDDEN_SIZE,
131 | input_shape=(SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+2),
132 | init='glorot_uniform',
133 | inner_init='orthogonal',
134 | activation='tanh',
135 | # activation='hard_sigmoid',
136 | # activation='sigmoid',
137 | return_sequences=True,
138 | W_regularizer=None,
139 | U_regularizer=None,
140 | b_regularizer=None,
141 | dropout_W=0.0,
142 | dropout_U=0.0)
143 | model.add(hidden_layer)
144 |
145 | # model.add(
146 | # Dense(HIDDEN_SIZE, input_shape=(SEQUENCE_LENGTH, INPUT_DIMENSION_SIZE+2)))
147 |
148 | # For the decoder's input, we repeat the encoded input for each time step
149 | # model.add(RepeatVector(SEQUENCE_LENGTH))
150 | # The decoder RNN could be multiple layers stacked or a single layer
151 | for _ in range(LAYERS):
152 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
153 |
154 | # For each of step of the output sequence, decide which character should be chosen
155 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+2)))
156 | # model.add(Activation('softmax'))
157 | # model.add(Activation('hard_sigmoid'))
158 | model.add(Activation('sigmoid'))
159 |
160 | model.compile(loss='binary_crossentropy',
161 | #loss='mse',
162 | #loss='categorical_crossentropy',
163 | optimizer='adam',
164 | metrics=['accuracy'])
165 |
166 | print()
167 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
168 | print("Model architecture")
169 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_priority_sort.png")
170 | print("Model summary")
171 | print(model.summary())
172 | print("Model parameter count")
173 | print(model.count_params())
174 |
175 | print()
176 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
177 | print("Training...")
178 | # Train the model each generation and show predictions against the
179 | # validation dataset
180 | losses = []
181 | acces = []
182 | for iteration in range(1, 3):
183 | print()
184 | print('-' * 78)
185 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
186 | print('Iteration', iteration)
187 | history = LossHistory()
188 | check_pointer = ModelCheckpoint(
189 | filepath=FOLDER+"priority_sort_model_weights.hdf5",
190 | verbose=1, save_best_only=True)
191 | model.fit([train_x_seq],
192 | train_y_seq,
193 | batch_size=BATCH_SIZE,
194 | nb_epoch=1,
195 | callbacks=[check_pointer, history],
196 | validation_data=([validation_x_seq], validation_y_seq))
197 | # print(len(history.losses))
198 | # print(history.losses)
199 | # print(len(history.acces))
200 | # print(history.acces)
201 | losses.append(history.losses)
202 | acces.append(history.acces)
203 |
204 | ###
205 | # Select 20 samples from the validation set at random so we can
206 | # visualize errors
207 | for i in range(20):
208 | ind = np.random.randint(0, len(validation_x_seq))
209 | inputs, outputs = validation_x_seq[np.array([ind])],\
210 | validation_y_seq[np.array([ind])]
211 | predicts = model.predict([inputs], verbose=0)
212 |
213 | input_matrix = validation_x_seq[np.array([ind])]
214 | output_matrix = validation_y_seq[np.array([ind])]
215 | predict_matrix = predicts
216 |
217 | show_matrix.update(input_matrix[0].transpose(),
218 | output_matrix[0].transpose(),
219 | predict_matrix[0].transpose())
220 | show_matrix.save(FOLDER+"priority_data_predict_%2d_%2d.png" % (iteration, i))
221 |
222 | show_matrix.close()
223 |
224 | # end of training
225 |
226 | # print loss and accuracy
227 | print("\nlosses")
228 | print(len(losses))
229 | print(len(losses[0]))
230 | # print(losses.shape)
231 | sample_num = 1
232 | for los in losses:
233 | for lo in los:
234 | if sample_num % 100 == 1:
235 | print("(%d, %f)" % (sample_num, lo))
236 | sample_num = sample_num + 1
237 | # print(losses)
238 |
239 | print("********************************************")
240 | print("\naccess")
241 | print(len(acces))
242 | print(len(acces[0]))
243 | # print(acces.shape)
244 | sample_num = 1
245 | for acc in acces:
246 | for ac in acc:
247 | if sample_num % 100 == 1:
248 | print("(%d, %f)" % (sample_num, ac))
249 | sample_num = sample_num + 1
250 | # print(acces)
251 |
252 | # print loss and accuracy
253 | print("\nlosses")
254 | print(len(losses))
255 | print(len(losses[0]))
256 | # print(losses.shape)
257 | sample_num = 1
258 | for los in losses:
259 | for lo in los:
260 | print("(%d, %f)" % (sample_num, lo))
261 | sample_num = sample_num + 1
262 | # print(losses)
263 |
264 | print("********************************************")
265 | print("\naccess")
266 | print(len(acces))
267 | print(len(acces[0]))
268 | # print(acces.shape)
269 | sample_num = 1
270 | for acc in acces:
271 | for ac in acc:
272 | print("(%d, %f)" % (sample_num, ac))
273 | sample_num = sample_num + 1
274 | # print(acces)
275 |
276 | print ("task took %.3fs" % (float(time.time()) - start_time))
277 | sys.stdout.close()
278 | sys.stdout = sys_stdout
279 |
280 |
--------------------------------------------------------------------------------
/algorithm_learning/learning_repeat_copy_lstm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """An implementation of learning copying algorithm_learning with RNN (basic RNN, LSTM,
3 | GRU).
4 | Input sequence length: "1 ~ 20: (1*2+1)=3 ~ (20*2+1)=41"
5 | Input dimension: "4"
6 | Repeat times: "5"
7 | Output sequence length: equal to input sequence length * repeat times.
8 | Output dimension: equal to input dimension.
9 | """
10 |
11 | from __future__ import print_function
12 | from keras.models import Sequential
13 | # from keras.engine.training import slice_X
14 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
15 | import numpy as np
16 | # from six.moves import range
17 | import dataset # Add by Steven Robot
18 | import visualization # Add by Steven Robot
19 | from keras.utils.visualize_util import plot # Add by Steven Robot
20 | import time # Add by Steven Robot
21 | from keras.layers import Merge # Add by Steven Robot
22 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
23 | from keras.callbacks import Callback # Add by Steven Robot
24 | from util import LossHistory # Add by Steven Robot
25 | import time # Add by Steven Robot
26 | import os # Add by Steven Robot
27 | import sys # Add by Steven Robot
28 | from keras_tqdm import TQDMNotebookCallback # Add by Steven Robot
29 |
30 |
31 | # Parameters for the model to train copying algorithm_learning
32 | # TRAINING_SIZE = 4*1024000
33 | # TRAINING_SIZE = 1024000
34 | # TRAINING_SIZE = 10240
35 | # TRAINING_SIZE = 128000
36 | TRAINING_SIZE = 1280
37 | # INPUT_DIMENSION_SIZE = 4 + 1
38 | # INPUT_DIMENSION_SIZE = 7 + 1
39 | INPUT_DIMENSION_SIZE = 8 + 1
40 | MAX_COPY_LENGTH = 10
41 | # REPEAT_TIMES = 2
42 | # MAX_INPUT_LENGTH = MAX_COPY_LENGTH + 1 + REPEAT_TIMES * MAX_COPY_LENGTH + 1
43 | # MAX_REPEAT_TIMES = 5
44 | MAX_REPEAT_TIMES = 10
45 | MAX_INPUT_LENGTH = MAX_COPY_LENGTH + 1 + MAX_REPEAT_TIMES * MAX_COPY_LENGTH # + 1
46 |
47 | # Try replacing SimpleRNN, GRU, or LSTM
48 | # RNN = recurrent.SimpleRNN
49 | # RNN = recurrent.GRU
50 | RNN = recurrent.LSTM
51 | # HIDDEN_SIZE = 128 # acc. 99.9%
52 | HIDDEN_SIZE = 128*4
53 | LAYERS = 2
54 | # LAYERS = MAX_REPEAT_TIMES
55 | # BATCH_SIZE = 1024
56 | BATCH_SIZE = 128
57 | # BATCH_SIZE = 128
58 |
59 |
60 | folder_name = time.strftime('experiment_results/re_copy_lstm/%Y-%m-%d-%H-%M-%S/')
61 | # os.makedirs(folder_name)
62 | FOLDER = folder_name
63 | if not os.path.isdir(FOLDER):
64 | os.makedirs(FOLDER)
65 | print("create folder: %s" % FOLDER)
66 |
67 | start_time = time.time()
68 | # sys_stdout = sys.stdout
69 | # log_file = '%s/recall.log' % (folder_name)
70 | # sys.stdout = open(log_file, 'a')
71 |
72 |
73 | print()
74 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
75 | print('Generating data sets...')
76 | # Fix 2 times copying
77 | # train_X, train_Y = dataset.generate_repeat_copy_data_set(
78 | # INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE, REPEAT_TIMES)
79 | # valid_X, valid_Y = dataset.generate_repeat_copy_data_set(
80 | # INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE/10, REPEAT_TIMES)
81 | train_X, train_Y, train_repeats_times = dataset.generate_repeat_copy_data_set(
82 | INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE, MAX_REPEAT_TIMES)
83 | valid_X, valid_Y, valid_repeats_times = dataset.generate_repeat_copy_data_set(
84 | INPUT_DIMENSION_SIZE, MAX_COPY_LENGTH, TRAINING_SIZE/10, MAX_REPEAT_TIMES)
85 | print(train_repeats_times)
86 | print(valid_repeats_times)
87 | train_repeats_times = (train_repeats_times - 1.0) / (MAX_REPEAT_TIMES - 1.0)
88 | valid_repeats_times = (valid_repeats_times - 1.0) / (MAX_REPEAT_TIMES - 1.0)
89 | print(train_repeats_times)
90 | print(valid_repeats_times)
91 |
92 | matrix_list = []
93 | matrix_list.append(train_X[0].transpose())
94 | matrix_list.append(train_Y[0].transpose())
95 | matrix_list.append(train_Y[0].transpose())
96 | name_list = []
97 | name_list.append("Input")
98 | name_list.append("Target")
99 | name_list.append("Predict")
100 | show_matrix = visualization.PlotDynamicalMatrix4Repeat(
101 | matrix_list, name_list, train_repeats_times[0])
102 | random_index = np.random.randint(1, 128, 20)
103 | for i in range(20):
104 | matrix_list_update = []
105 | matrix_list_update.append(train_X[random_index[i]].transpose())
106 | matrix_list_update.append(train_Y[random_index[i]].transpose())
107 | matrix_list_update.append(train_Y[random_index[i]].transpose())
108 | show_matrix.update(matrix_list_update, name_list, train_repeats_times[random_index[i]])
109 | show_matrix.save(FOLDER+"repeat_copy_data_training_%2d.png"%i)
110 |
111 | print()
112 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
113 | print('Build model...')
114 | input_sequence = Sequential()
115 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
116 | # note: in a situation where your input sequences have a variable length,
117 | # use input_shape=(None, nb_feature).
118 | hidden_layer = RNN(
119 | HIDDEN_SIZE,
120 | input_shape=(MAX_INPUT_LENGTH, INPUT_DIMENSION_SIZE),
121 | init='glorot_uniform',
122 | inner_init='orthogonal',
123 | activation='tanh',
124 | return_sequences=True,
125 | # activation='hard_sigmoid',
126 | # activation='sigmoid',
127 | W_regularizer=None,
128 | U_regularizer=None,
129 | b_regularizer=None,
130 | dropout_W=0.0,
131 | dropout_U=0.0)
132 | input_sequence.add(hidden_layer)
133 |
134 | repeat_times = Sequential()
135 | repeat_times.add(Dense(16, input_dim=1))
136 | repeat_times.add(Activation('sigmoid'))
137 | repeat_times.add(RepeatVector(MAX_INPUT_LENGTH)) # add
138 |
139 | merged = Merge([input_sequence, repeat_times], mode='concat')
140 |
141 | model = Sequential()
142 | model.add(merged)
143 |
144 | # For the decoder's input, we repeat the encoded input for each time step
145 | # model.add(RepeatVector(MAX_INPUT_LENGTH))
146 | # The decoder RNN could be multiple layers stacked or a single layer
147 | for _ in range(LAYERS):
148 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
149 |
150 | # For each of step of the output sequence, decide which character should be chosen
151 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE)))
152 | # model.add(Activation('softmax'))
153 | # model.add(Activation('hard_sigmoid'))
154 | model.add(Activation('sigmoid'))
155 |
156 | model.compile(loss='binary_crossentropy',
157 | # loss='mse',
158 | optimizer='adam',
159 | metrics=['accuracy'])
160 |
161 | print()
162 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
163 | print("Model architecture")
164 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_repeat_copying.png")
165 | print("Model summary")
166 | print(model.summary())
167 | print("Model config")
168 | print(model.get_config())
169 | print("Model parameter count")
170 | print(model.count_params())
171 |
172 | print()
173 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
174 | print("Training...")
175 | # Train the model each generation and show predictions against the
176 | # validation dataset
177 | losses = []
178 | acces = []
179 | for iteration in range(1, 3):
180 | print()
181 | print('-' * 78)
182 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
183 | print('Iteration', iteration)
184 | history = LossHistory()
185 | check_pointer = ModelCheckpoint(
186 | filepath=FOLDER+"repeat_copying_model_weights.hdf5",
187 | verbose=2, save_best_only=True)
188 | model.fit([train_X, train_repeats_times],
189 | train_Y,
190 | batch_size=BATCH_SIZE,
191 | verbose=2,
192 | nb_epoch=10,
193 | # nb_epoch=30,
194 | # nb_epoch=1,
195 | # callbacks=[TQDMNotebookCallback()],
196 | callbacks=[check_pointer, history],
197 | validation_data=([valid_X, valid_repeats_times], valid_Y))
198 | # print(len(history.losses))
199 | # print(history.losses)
200 | # print(len(history.acces))
201 | # print(history.acces)
202 | losses.append(history.losses)
203 | acces.append(history.acces)
204 |
205 | ###
206 | # Select 20 samples from the validation set at random so we can
207 | # visualize errors
208 | for i in range(20):
209 | ind = np.random.randint(0, len(valid_X))
210 | inputs, repeats, outputs = valid_X[np.array([ind])], \
211 | valid_repeats_times[np.array([ind])], \
212 | valid_Y[np.array([ind])]
213 | predicts = model.predict([inputs, repeats], verbose=0)
214 | matrix_list_update = []
215 | matrix_list_update.append(inputs[0].transpose())
216 | matrix_list_update.append(outputs[0].transpose())
217 | matrix_list_update.append(predicts[0].transpose())
218 | show_matrix.update(matrix_list_update,
219 | name_list,
220 | valid_repeats_times[ind] * (MAX_REPEAT_TIMES - 1.0) + 1)
221 | show_matrix.save(FOLDER+"repeat_copy_data_predict_%2d_%2d.png" % (iteration, i))
222 |
223 | show_matrix.close()
224 |
225 | # end of training
226 |
227 | # print loss and accuracy
228 | print("\nlosses")
229 | print(len(losses))
230 | print(len(losses[0]))
231 | # print(losses.shape)
232 | sample_num = 1
233 | for los in losses:
234 | for lo in los:
235 | if sample_num % 100 == 1:
236 | print("(%d, %f)" % (sample_num, lo))
237 | sample_num = sample_num + 1
238 | # print(losses)
239 |
240 | print("********************************************")
241 | print("\naccess")
242 | print(len(acces))
243 | print(len(acces[0]))
244 | # print(acces.shape)
245 | sample_num = 1
246 | for acc in acces:
247 | for ac in acc:
248 | if sample_num % 100 == 1:
249 | print("(%d, %f)" % (sample_num, ac))
250 | sample_num = sample_num + 1
251 | # print(acces)
252 |
253 | # print loss and accuracy
254 | print("\nlosses")
255 | print(len(losses))
256 | print(len(losses[0]))
257 | # print(losses.shape)
258 | sample_num = 1
259 | for los in losses:
260 | for lo in los:
261 | print("(%d, %f)" % (sample_num, lo))
262 | sample_num = sample_num + 1
263 | # print(losses)
264 |
265 | print("********************************************")
266 | print("\naccess")
267 | print(len(acces))
268 | print(len(acces[0]))
269 | # print(acces.shape)
270 | sample_num = 1
271 | for acc in acces:
272 | for ac in acc:
273 | print("(%d, %f)" % (sample_num, ac))
274 | sample_num = sample_num + 1
275 | # print(acces)
276 |
277 | print ("task took %.3fs" % (float(time.time()) - start_time))
278 | sys.stdout.close()
279 | # sys.stdout = sys_stdout
280 |
281 |
--------------------------------------------------------------------------------
/algorithm_learning/previous_version/learning_dynamic_n_grams.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """The goal of dynamic N-Grams task was to test whether NTM could rapidly
3 | adapt to new predictive distributions. In particular we were interested to
4 | see if it were able to use its memory as a re-writable that it could use to
5 | keep count of transition statistics, thereby emulating a conventional
6 | N-Gram model.
7 |
8 | We considered the set of all possible 6-Gram distributions over binary
9 | sequences. Each 6-Gram distribution can be expressed as a table of
10 | $2^{5}=32$ numbers, specifying the probability that the next bit will be
11 | one, given all possible length five binary histories.
12 |
13 | For each training example, we first generated random 6-Gram probabilities by
14 | independently drawing all 32 probabilities from the $Beta(0.5, 0.5)$
15 | distribution. We then generated a particular training sequence by drawing
16 | 200 successive bits using the current lookup table. The network observes the
17 | sequence one bit at a time and is then asked to predict the next bit. The
18 | optimal estimator for the problem can be determined by Bayesian analysis
19 | $$P(B=1|N_{1}, N_{2}, c) = \frac{N_{1} + 0.5}{N_{1} + N_{0} + 1.0}$$
20 | where c is the five bit previous context, B is the value of the next bit and
21 | $N_{0}$ and $N_{1}$ are respectively the number of zeros and ones observed
22 | after c so far in the sequence.
23 |
24 | To assess performance we used a validation set of 1000 length 200 sequences
25 | sampled from the same distribution as the training data.
26 |
27 | Input sequence length: "200"
28 | Input dimension: "1"
29 | Output sequence length: equal to input sequence length.
30 | Output dimension: equal to input dimension.
31 | """
32 |
33 | from __future__ import print_function
34 | from keras.models import Sequential
35 | # from keras.engine.training import slice_X
36 | from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
37 | import numpy as np
38 | # from six.moves import range
39 | import dataset # Add by Steven Robot
40 | import visualization # Add by Steven
41 | from keras.utils.visualize_util import plot # Add by Steven
42 | import time # Add by Steven Robot
43 | from keras.layers import Merge # Add by Steven Robot
44 | from keras.callbacks import ModelCheckpoint # Add by Steven Robot
45 | from keras.callbacks import Callback # Add by Steven Robot
46 | from algorithm_learning.util import LossHistory # Add by Steven Robot
47 | import cPickle as pickle
48 | import random
49 | import os
50 |
51 |
52 | # Parameters for the model to train dynamic N Gram
53 | # EXAMPLE_SIZE = 1024000 # need 70000 seconds to genarate these sequences
54 | EXAMPLE_SIZE = 128000 # need 700 seconds to genarate these sequences
55 | # EXAMPLE_SIZE = 12800 # need 70 seconds to genarate these sequences
56 | # EXAMPLE_SIZE = 1280 # need 7 seconds to genarate these sequences
57 | A = 0.5
58 | B = 0.5
59 | N_GRAM_SIZE = 6
60 | INPUT_LENGTH = 40
61 | # INPUT_LENGTH = 100
62 | INPUT_DIMENSION_SIZE = 2
63 |
64 |
65 | # Try replacing SimpleRNN, GRU, or LSTM
66 | # RNN = recurrent.SimpleRNN
67 | # RNN = recurrent.GRU
68 | RNN = recurrent.LSTM
69 | HIDDEN_SIZE = 128*1 # *4B/1024/1024=12MB
70 | # HIDDEN_SIZE = 128*4 # 3152385*4B/1024/1024=12MB
71 | # HIDDEN_SIZE = 128*8 # 12596225*4B/1024/1024=48MB
72 | # HIDDEN_SIZE = 128*16 # 50358273*4B/1024/1024=192MB
73 | # HIDDEN_SIZE = 128*32 # 2,0137,9841*4B/1024/1024=768MB
74 | # HIDDEN_SIZE = 128*40 # 3,1463,9361*4/1024/1024=1200MB
75 | # HIDDEN_SIZE = 128*128 # MemoryError: ('Error allocating 1073741824 bytes
76 | # of device memory (CNMEM_STATUS_OUT_OF_MEMORY).',
77 | # "you might consider using 'theano.shared(..., borrow=True)'")
78 | LAYERS = 1
79 | # LAYERS = MAX_REPEAT_TIMES
80 | BATCH_SIZE = 1024
81 | # BATCH_SIZE = 512
82 | # BATCH_SIZE = 360
83 | # BATCH_SIZE = 256
84 | # BATCH_SIZE = 128 # if the batch size is larger than example size the
85 | # CUDA will report error
86 | # BATCH_SIZE = 64
87 | # BATCH_SIZE = 8 # if the batch size is large and the hidden size is 128*16 the
88 | # CUDA will report error
89 |
90 | FOLDER = "experiment_results/dynamic_n_grams/"
91 | if not os.path.isdir(FOLDER):
92 | os.makedirs(FOLDER)
93 | print("create folder: %s" % FOLDER)
94 |
95 | print()
96 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
97 | print('Generating data sets...')
98 | print(' generating look up table...')
99 | look_up_table = dataset.generate_probability_of_n_gram_by_beta(
100 | A, B, N_GRAM_SIZE)
101 | print(look_up_table)
102 | print(" dumping look up table...")
103 | pickle.dump(look_up_table,
104 | # open(FOLDER+"n_gram_look_up_table.txt", "w"),
105 | open(FOLDER+"n_gram_look_up_table.txt", "wb"),
106 | True)
107 | print(" loading look up table...")
108 | look_up_table = pickle.load(
109 | open(FOLDER+"n_gram_look_up_table.txt", "rb")) # "rb"
110 | print(" Look_up_table = ")
111 | print(look_up_table)
112 |
113 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
114 | print(' generating training x, y...')
115 | train_X, train_Y = dataset.generate_dynamical_n_gram_data_set(
116 | look_up_table, N_GRAM_SIZE, INPUT_LENGTH, EXAMPLE_SIZE)
117 | print(" dumping training x, y...")
118 | pickle.dump(train_X,
119 | open(FOLDER+"n_gram_train_X.txt", "wb"),
120 | True)
121 | pickle.dump(train_Y,
122 | open(FOLDER+"n_gram_train_Y.txt", "wb"),
123 | True)
124 | print(" loading training x, y...")
125 | train_X = pickle.load(
126 | open(FOLDER+"n_gram_train_X.txt", "rb"))
127 | train_Y = pickle.load(
128 | open(FOLDER+"n_gram_train_Y.txt", "rb"))
129 |
130 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
131 | print(" train_X.shape = ")
132 | print(train_X.shape)
133 | print(" train_Y.shape = ")
134 | print(train_Y.shape)
135 |
136 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
137 | print(' generating validation x, y...')
138 | valid_X, valid_Y = dataset.generate_dynamical_n_gram_data_set(
139 | look_up_table, N_GRAM_SIZE, INPUT_LENGTH, EXAMPLE_SIZE/10)
140 | print(" dumping validation x, y...")
141 | pickle.dump(valid_X,
142 | open(FOLDER+"n_gram_valid_X.txt", "wb"),
143 | True)
144 | pickle.dump(valid_Y,
145 | open(FOLDER+"n_gram_valid_Y.txt", "wb"),
146 | True)
147 | print(" validation training x, y...")
148 | valid_X = pickle.load(
149 | open(FOLDER+"n_gram_valid_X.txt", "rb"))
150 | valid_Y = pickle.load(
151 | open(FOLDER+"n_gram_valid_Y.txt", "rb"))
152 |
153 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
154 | print(" valid_X.shape = ")
155 | print(valid_X.shape)
156 | print(" valid_Y.shape = ")
157 | print(valid_Y.shape)
158 |
159 | print()
160 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
161 | print('Showing data sets...')
162 | # show training sample
163 | show_matrix = visualization.PlotDynamicalMatrix4NGram(
164 | train_X[0].transpose(), train_Y[0].transpose(), train_Y[0].transpose())
165 | # show_size = EXAMPLE_SIZE/10
166 | show_size = 20
167 | random_index = np.random.randint(1, EXAMPLE_SIZE, show_size)
168 | for i in range(show_size):
169 | show_matrix.update(
170 | train_X[random_index[i]].transpose(),
171 | train_Y[random_index[i]].transpose(),
172 | train_Y[random_index[i]].transpose())
173 | show_matrix.save(FOLDER+"n_gram_data_training_%2d.png"%i)
174 |
175 | print()
176 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
177 | print('Build model...')
178 | model = Sequential()
179 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
180 | # note: in a situation where your input sequences have a variable length,
181 | # use input_shape=(None, nb_feature).
182 | hidden_layer = RNN(
183 | HIDDEN_SIZE,
184 | input_shape=(INPUT_LENGTH*2-N_GRAM_SIZE+2, INPUT_DIMENSION_SIZE+1),
185 | init='glorot_uniform',
186 | inner_init='orthogonal',
187 | activation='tanh',
188 | # activation='hard_sigmoid',
189 | # activation='sigmoid',
190 | W_regularizer=None,
191 | U_regularizer=None,
192 | b_regularizer=None,
193 | dropout_W=0.0,
194 | dropout_U=0.0)
195 | model.add(hidden_layer)
196 |
197 | # For the decoder's input, we repeat the encoded input for each time step
198 | model.add(RepeatVector(INPUT_LENGTH*2-N_GRAM_SIZE+2))
199 | # The decoder RNN could be multiple layers stacked or a single layer
200 | for _ in range(LAYERS):
201 | model.add(RNN(HIDDEN_SIZE, return_sequences=True))
202 |
203 | # For each of step of the output sequence, decide which character should be chosen
204 | model.add(TimeDistributed(Dense(INPUT_DIMENSION_SIZE+1)))
205 | model.add(Activation('softmax'))
206 | # model.add(Activation('hard_sigmoid'))
207 | # model.add(Activation('sigmoid'))
208 |
209 | model.compile(#loss='binary_crossentropy',
210 | # loss='mse',
211 | loss='categorical_crossentropy',
212 | optimizer='adam',
213 | metrics=['accuracy'])
214 |
215 | print()
216 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
217 | print("Model architecture")
218 | plot(model, show_shapes=True, to_file=FOLDER+"lstm_n_gram.png")
219 | print("Model summary")
220 | print(model.summary())
221 | print("Model parameter count")
222 | print(model.count_params())
223 |
224 | print()
225 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
226 | print("Training...")
227 | # Train the model each generation and show predictions against the
228 | # validation dataset
229 | for iteration in range(1, 200):
230 | print()
231 | print('-' * 78)
232 | print(time.strftime('%Y-%m-%d %H:%M:%S'))
233 | print('Iteration', iteration)
234 | history = LossHistory()
235 | check_pointer = ModelCheckpoint(
236 | filepath=FOLDER+"n_gram_model_weights.hdf5",
237 | verbose=1, save_best_only=True)
238 | model.fit(train_X,
239 | train_Y,
240 | batch_size=BATCH_SIZE,
241 | nb_epoch=1,
242 | # nb_epoch=1,
243 | # callbacks=[check_pointer, history],
244 | validation_data=(valid_X, valid_Y))
245 | # print(len(history.losses))
246 | # print(history.losses)
247 | # print(len(history.acces))
248 | # print(history.acces)
249 |
250 | ###
251 | # Select 20 samples from the validation set at random so we can
252 | # visualize errors
253 | for i in range(20):
254 | ind = np.random.randint(0, len(valid_X))
255 | inputs, outputs = valid_X[np.array([ind])], \
256 | valid_Y[np.array([ind])]
257 | predicts = model.predict(inputs, verbose=0)
258 |
259 | show_matrix.update(
260 | inputs[0].transpose(),
261 | outputs[0].transpose(),
262 | predicts[0].transpose())
263 | show_matrix.save(FOLDER+"n_gram_data_predict_%2d.png" % i)
264 |
265 | show_matrix.close()
266 |
267 |
--------------------------------------------------------------------------------
/memory.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from theano import tensor as T
3 | import numpy as np
4 | import math
5 | import theano
6 |
7 |
8 | def initial(number_of_memory_locations, memory_vector_size):
9 | return K.zeros((number_of_memory_locations, memory_vector_size))
10 |
11 |
12 | def batch_addressing(
13 | head_num,
14 | memory_size,
15 | memory_t,
16 | weight_t_1,
17 | key_vector_t,
18 | key_strength_t,
19 | interpolation_gate_t,
20 | shift_weight_t,
21 | scalar_t):
22 | """
23 | Addressing mechanisms.
24 | :param head_num: the number of heads.
25 | :param memory_size:
26 | :param memory_t: memory matrix at time t.
27 | :param weight_t_1: memory weight at time t-1.
28 | :param key_vector_t: key vector at time t.
29 | :param key_strength_t: strength of key vector at time t.
30 | :param interpolation_gate_t: interpolation gate at time t.
31 | :param shift_weight_t: shift weight at time t.
32 | :param scalar_t: scalar at time t.
33 | :return: a weight vector at time t.
34 | """
35 | w_w_t = K.zeros_like(weight_t_1)
36 | for i in xrange(head_num):
37 | # get the addressing for writing
38 | begin = i * memory_size
39 | end = begin + memory_size
40 | w_w_t_i = addressing(
41 | memory_t,
42 | weight_t_1[begin:end],
43 | key_vector_t[begin:end],
44 | key_strength_t[begin:end],
45 | interpolation_gate_t[begin:end],
46 | shift_weight_t[begin:end],
47 | scalar_t[begin:end])
48 | w_w_t[begin:end] = w_w_t_i
49 |
50 | return w_w_t
51 |
52 | #
53 | # def addressing(
54 | # memory_t,
55 | # weight_t_1,
56 | # key_vector_t, key_strength_t,
57 | # interpolation_gate_t,
58 | # shift_weight_t,
59 | # scalar_t):
60 | # """
61 | # Addressing mechanisms.
62 | # :param memory_t: memory matrix at time t.
63 | # :param weight_t_1: memory weight at time t-1.
64 | # :param key_vector_t: key vector at time t.
65 | # :param key_strength_t: strength of key vector at time t.
66 | # :param interpolation_gate_t: interpolation gate at time t.
67 | # :param shift_weight_t: shift weight at time t.
68 | # :param scalar_t: scalar at time t.
69 | # :return: a weight vector at time t.
70 | # """
71 | # # Content addressing
72 | # weight_content_t = content_addressing(
73 | # memory_t, key_vector_t, key_strength_t)
74 | # print("weight_content_t")
75 | # print(weight_content_t)
76 | #
77 | # # Interpolation
78 | # weight_gated_t = interpolation(
79 | # weight_t_1, weight_content_t, interpolation_gate_t)
80 | # print("weight_content_t")
81 | # print(weight_gated_t)
82 | #
83 | #
84 | # # Convolutional Shift
85 | # _weight_t = circular_convolutional_shift(weight_gated_t, shift_weight_t)
86 | #
87 | # # Sharpening
88 | # weight_t = sharpen(_weight_t, scalar_t)
89 | #
90 | # return weight_t
91 |
92 |
93 | def addressing(
94 | memory_t,
95 | memory_dim,
96 | memory_size,
97 | weight_t_1,
98 | key_vector_t, key_strength_t,
99 | interpolation_gate_t,
100 | shift_weight_t,
101 | shift_range,
102 | scalar_t):
103 | """
104 | Addressing mechanisms.
105 | :param memory_t: memory matrix at time t.
106 | :param weight_t_1: memory weight at time t-1.
107 | :param key_vector_t: key vector at time t.
108 | :param key_strength_t: strength of key vector at time t.
109 | :param interpolation_gate_t: interpolation gate at time t.
110 | :param shift_weight_t: shift weight at time t.
111 | :param scalar_t: scalar at time t.
112 | :return: a weight vector at time t.
113 | """
114 | print("\tbegin addressing()")
115 | # Content addressing
116 | weight_content_t = content_addressing(
117 | memory_t, key_vector_t, key_strength_t)
118 | # print("weight_content_t")
119 | # print(weight_content_t)
120 |
121 | # Interpolation
122 | weight_gated_t = interpolation(
123 | weight_t_1, weight_content_t, interpolation_gate_t)
124 | # print("weight_gated_t")
125 | # print(weight_gated_t)
126 |
127 | # Convolutional Shift
128 | _weight_t = circular_convolutional_shift(
129 | weight_gated_t, shift_weight_t, memory_size, shift_range)
130 | # print("_weight_t")
131 | # print(_weight_t)
132 |
133 | # Sharpening
134 | weight_t = sharpen(_weight_t, scalar_t)
135 | # print("weight_t")
136 | # print(weight_t)
137 |
138 | print("\tend addressing()")
139 | return weight_t
140 |
141 |
142 | def cosine_similarity_group(u, V):
143 | similairty = K.dot(u, V) / (K.sum(K.abs(u)) * K.sum(K.abs(V), axis=0))
144 | # import numpy as np
145 | # u = np.random.random((3))
146 | # V = np.random.random((3, 4))
147 | # sim = np.dot(u, V) / (sum(abs(u)) * np.sum(abs(V), axis=0))
148 | # print("u")
149 | # print(u)
150 | # print("V")
151 | # print(V)
152 | # print("similairty")
153 | # print(similairty)
154 | return similairty
155 |
156 |
157 | def cosine_similarity(u, v):
158 | similairty = K.dot(u, v) / (K.sum(K.abs(u)) * K.sum(K.abs(v), axis=0))
159 | # similairty = K.dot(u, v) / (K.sum(K.abs(u), axis=1) * K.sum(K.abs(v), axis=1))
160 | # print("u")
161 | # print(u)
162 | # print("v")
163 | # print(v)
164 | # print("similairty")
165 | # print(similairty)
166 | return similairty
167 |
168 |
169 | def softmax(x):
170 | # print("x")
171 | # print(x)
172 | _softmax = K.softmax(x)
173 | # print("softmax(x)")
174 | # print(_softmax)
175 | return _softmax
176 |
177 |
178 | def content_addressing(memory_t, key_vector_t, key_strength_t):
179 | '''
180 | Focusing by content.
181 | :param memory_t: external memory.
182 | :param key_vector_t: key vector.
183 | :param key_strength_t: the strength of key.
184 | :return:
185 | '''
186 | # print("content addressing:")
187 | # print(">>memory_t")
188 | # print(key_vector_t)
189 | # print(">>key_vector_t")
190 | # print(key_vector_t)
191 | # print(">>key_strength_t")
192 | # print(key_strength_t)
193 | _weight_content_t = \
194 | key_strength_t * cosine_similarity_group(key_vector_t, memory_t)
195 | weight_content_t = softmax(_weight_content_t)
196 | # print("_weight_content_t")
197 | # print(_weight_content_t)
198 | return weight_content_t
199 |
200 |
201 | def interpolation(weight_t_1, weight_content_t, interpolation_gate_t):
202 | '''
203 | Focusing by location.
204 | :param weight_t_1: the weight value at time-step t-1
205 | :param weight_content_t: the weight get by content-based addressing.
206 | :param interpolation_gate_t: the interpolation gate.
207 | :return:
208 | '''
209 | weight_gated_t = interpolation_gate_t * weight_content_t + \
210 | (1.0 - interpolation_gate_t) * weight_t_1
211 | return weight_gated_t
212 |
213 |
214 | def circular_convolutional_shift(v, k, n, m):
215 | """Computes circular convolution.
216 | Args:
217 | v: a 1-D `Tensor` (vector)
218 | k: a 1-D `Tensor` (kernel)
219 | """
220 | # size = int(v.get_shape()[0])
221 | # kernel_size = int(k.get_shape()[0])
222 | # kernel_shift = int(math.floor(kernel_size/2.0))
223 | size = n
224 | kernel_size = m
225 | kernel_shift = (kernel_size + 1)/2.0
226 | shift_range = T.argmax(k) - kernel_shift
227 |
228 | def loop(idx):
229 | if T.lt(idx, 0):
230 | return size + idx
231 | if T.ge(idx, size):
232 | return idx - size
233 | else:
234 | return idx
235 |
236 | kernels = []
237 | for i in T.xrange(size):
238 | indices = loop(i + shift_range)
239 | index = theano.tensor.cast(indices, 'int64')
240 | v_ = v[index]
241 | kernels.append(v_)
242 |
243 | return kernels
244 |
245 | # def circular_convolutional_shift(v, k, n, m):
246 | # """Computes circular convolution.
247 | # Args:
248 | # v: a 1-D `Tensor` (vector)
249 | # k: a 1-D `Tensor` (kernel)
250 | # """
251 | # # size = int(v.get_shape()[0])
252 | # # kernel_size = int(k.get_shape()[0])
253 | # # kernel_shift = int(math.floor(kernel_size/2.0))
254 | # size = n
255 | # kernel_size = m
256 | # kernel_shift = (kernel_size + 1)/2.0
257 | #
258 | # # def loop(idx):
259 | # # if idx < 0:
260 | # # return size + idx
261 | # # if idx >= size:
262 | # # return idx - size
263 | # # else:
264 | # # return idx
265 | #
266 | # def loop(idx):
267 | # if idx < 0:
268 | # return size + idx
269 | # if T.ge(idx, size):
270 | # return idx - size
271 | # else:
272 | # return idx
273 | #
274 | # kernels = []
275 | # # range_list = T.xrange(kernel_shift, -kernel_shift-1, -1)
276 | # # range_list = theano.tensor.arange(kernel_shift, -kernel_shift-1, -1)
277 | # #
278 | # # range_list_, updates_ = theano.scan(lambda i, d: T.sub(m, i), sequences=k)
279 | # # range_list = theano.function(inputs=[m, k], outputs=range_list_)
280 | # #
281 | #
282 | # my_range_max = T.iscalar('my_range_max')
283 | # my_range = T.arange(my_range_max)
284 | # get_range_list = theano.function(inputs=[my_range_max], outputs=my_range)
285 | # range_list = get_range_list(kernel_size)
286 | #
287 | # # range_list = T.arange(m)
288 | #
289 | # for i in T.xrange(size):
290 | # results, updates = theano.scan(lambda r: loop(T.add(r, i)), sequences=range_list)
291 | # indices = theano.function(inputs=[i, range_list], outputs=results)
292 | #
293 | # v_ = T.gather(v, indices)
294 | # kernels.append(T.reduce_sum(v_ * k, 0))
295 | #
296 | # return T.dynamic_stitch([i for i in T.xrange(size)], kernels)
297 |
298 |
299 | def sharpen(_weight_t, scalar_gama_t):
300 | '''
301 | The convolution operation in convolutional shift can cause leakage or
302 | dispersion of weights over time if the shift weighting is no sharp.
303 | For example, if shift of -1, 0 and 1 are given weights of 0.1, 0.8,
304 | and 0.1, the rotation will transform a weighting focused at single
305 | point into one slightly blurred over three points. To combat this,
306 | each head emits one further scalar \gama >= 1 whose effect is sharpen
307 | the final weighting as follows:
308 | $$w_{i}^{(t)} = \frac{(\hat{w}_{i}^{(t)})^{\gama}}
309 | {\sum_{j}\hat{w}_{j}^{(t)})^{\gama}}$$
310 | :param _weight_t: the weight vector which denotes a memory address.
311 | :param scalar_gama_t: the scalar for sharpen.
312 | :return: the sharpened weight.
313 | '''
314 | weight_t = K.pow(_weight_t, scalar_gama_t)
315 | return weight_t / K.sum(weight_t)
316 |
317 |
--------------------------------------------------------------------------------
/lstm2ntm.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy as np
3 |
4 | from keras import backend as K
5 | from keras import activations, initializations, regularizers
6 | from keras.engine import Layer, InputSpec
7 |
8 | from keras.layers import Recurrent
9 | from keras.layers import time_distributed_dense
10 | import memory as EM
11 | import head
12 |
13 |
14 | class NTM(Recurrent):
15 | '''Long-Short Term Memory unit - Hochreiter 1997.
16 |
17 | For a step-by-step description of the algorithm_learning, see
18 | [this tutorial](http://deeplearning.net/tutorial/lstm.html).
19 |
20 | # Arguments
21 | output_dim: dimension of the internal projections and the final output.
22 | init: weight initialization function.
23 | Can be the name of an existing function (str),
24 | or a Theano function (see: [initializations](../initializations.md)).
25 | inner_init: initialization function of the inner cells.
26 | forget_bias_init: initialization function for the bias of the forget gate.
27 | [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
28 | recommend initializing with ones.
29 | activation: activation function.
30 | Can be the name of an existing function (str),
31 | or a Theano function (see: [activations](../activations.md)).
32 | inner_activation: activation function for the inner cells.
33 | W_regularizer: instance of [WeightRegularizer](../regularizers.md)
34 | (eg. L1 or L2 regularization), applied to the input weights matrices.
35 | U_regularizer: instance of [WeightRegularizer](../regularizers.md)
36 | (eg. L1 or L2 regularization), applied to the recurrent weights matrices.
37 | b_regularizer: instance of [WeightRegularizer](../regularizers.md),
38 | applied to the bias.
39 | dropout_W: float between 0 and 1. Fraction of the input units to drop for input gates.
40 | dropout_U: float between 0 and 1. Fraction of the input units to drop for recurrent connections.
41 |
42 | # References
43 | - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper)
44 | - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
45 | - [Supervised sequence labelling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
46 | - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
47 | '''
48 | def __init__(self, output_dim, memory_dim=128, memory_size=20,
49 | controller_output_dim=100, location_shift_range=1,
50 | num_read_head=1, num_write_head=1,
51 | init='glorot_uniform', inner_init='orthogonal',
52 | forget_bias_init='one', activation='tanh',
53 | inner_activation='hard_sigmoid',
54 | W_regularizer=None, U_regularizer=None, R_regularizer=None,
55 | b_regularizer=None, W_y_regularizer=None,
56 | W_xi_regularizer=None, W_r_regularizer=None,
57 | dropout_W=0., dropout_U=0., **kwargs):
58 | self.output_dim = output_dim
59 | self.init = initializations.get(init)
60 | self.inner_init = initializations.get(inner_init)
61 | self.forget_bias_init = initializations.get(forget_bias_init)
62 | self.activation = activations.get(activation)
63 | self.inner_activation = activations.get(inner_activation)
64 | self.W_regularizer = regularizers.get(W_regularizer)
65 | self.U_regularizer = regularizers.get(U_regularizer)
66 | self.b_regularizer = regularizers.get(b_regularizer)
67 | self.dropout_W, self.dropout_U = dropout_W, dropout_U
68 |
69 | if self.dropout_W or self.dropout_U:
70 | self.uses_learning_phase = True
71 | super(NTM, self).__init__(**kwargs)
72 |
73 | def build(self, input_shape):
74 | self.input_spec = [InputSpec(shape=input_shape)]
75 | self.input_dim = input_shape[2]
76 |
77 | if self.stateful:
78 | self.reset_states()
79 | else:
80 | # initial states: 2 all-zero tensors of shape (output_dim)
81 | self.states = [None, None]
82 |
83 | if self.consume_less == 'gpu':
84 | self.W = self.init((self.input_dim, 4 * self.output_dim),
85 | name='{}_W'.format(self.name))
86 | self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
87 | name='{}_U'.format(self.name))
88 |
89 | self.b = K.variable(np.hstack((np.zeros(self.output_dim),
90 | K.get_value(self.forget_bias_init((self.output_dim,))),
91 | np.zeros(self.output_dim),
92 | np.zeros(self.output_dim))),
93 | name='{}_b'.format(self.name))
94 | self.trainable_weights = [self.W, self.U, self.b]
95 | else:
96 | self.W_i = self.init((self.input_dim, self.output_dim),
97 | name='{}_W_i'.format(self.name))
98 | self.U_i = self.inner_init((self.output_dim, self.output_dim),
99 | name='{}_U_i'.format(self.name))
100 | self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
101 |
102 | self.W_f = self.init((self.input_dim, self.output_dim),
103 | name='{}_W_f'.format(self.name))
104 | self.U_f = self.inner_init((self.output_dim, self.output_dim),
105 | name='{}_U_f'.format(self.name))
106 | self.b_f = self.forget_bias_init((self.output_dim,),
107 | name='{}_b_f'.format(self.name))
108 |
109 | self.W_c = self.init((self.input_dim, self.output_dim),
110 | name='{}_W_c'.format(self.name))
111 | self.U_c = self.inner_init((self.output_dim, self.output_dim),
112 | name='{}_U_c'.format(self.name))
113 | self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
114 |
115 | self.W_o = self.init((self.input_dim, self.output_dim),
116 | name='{}_W_o'.format(self.name))
117 | self.U_o = self.inner_init((self.output_dim, self.output_dim),
118 | name='{}_U_o'.format(self.name))
119 | self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
120 |
121 | self.trainable_weights = [self.W_i, self.U_i, self.b_i,
122 | self.W_c, self.U_c, self.b_c,
123 | self.W_f, self.U_f, self.b_f,
124 | self.W_o, self.U_o, self.b_o]
125 |
126 | self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
127 | self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
128 | self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
129 |
130 | self.regularizers = []
131 | if self.W_regularizer:
132 | self.W_regularizer.set_param(self.W)
133 | self.regularizers.append(self.W_regularizer)
134 | if self.U_regularizer:
135 | self.U_regularizer.set_param(self.U)
136 | self.regularizers.append(self.U_regularizer)
137 | if self.b_regularizer:
138 | self.b_regularizer.set_param(self.b)
139 | self.regularizers.append(self.b_regularizer)
140 |
141 | if self.initial_weights is not None:
142 | self.set_weights(self.initial_weights)
143 | del self.initial_weights
144 | self.built = True
145 |
146 | def reset_states(self):
147 | assert self.stateful, 'Layer must be stateful.'
148 | input_shape = self.input_spec[0].shape
149 | if not input_shape[0]:
150 | raise Exception('If a RNN is stateful, a complete ' +
151 | 'input_shape must be provided (including batch size).')
152 | if hasattr(self, 'states'):
153 | K.set_value(self.states[0],
154 | np.zeros((input_shape[0], self.output_dim)))
155 | K.set_value(self.states[1],
156 | np.zeros((input_shape[0], self.output_dim)))
157 | else:
158 | self.states = [K.zeros((input_shape[0], self.output_dim)),
159 | K.zeros((input_shape[0], self.output_dim))]
160 |
161 | def preprocess_input(self, x):
162 | if self.consume_less == 'cpu':
163 | if 0 < self.dropout_W < 1:
164 | dropout = self.dropout_W
165 | else:
166 | dropout = 0
167 | input_shape = self.input_spec[0].shape
168 | input_dim = input_shape[2]
169 | timesteps = input_shape[1]
170 |
171 | x_i = time_distributed_dense(x, self.W_i, self.b_i, dropout,
172 | input_dim, self.output_dim, timesteps)
173 | x_f = time_distributed_dense(x, self.W_f, self.b_f, dropout,
174 | input_dim, self.output_dim, timesteps)
175 | x_c = time_distributed_dense(x, self.W_c, self.b_c, dropout,
176 | input_dim, self.output_dim, timesteps)
177 | x_o = time_distributed_dense(x, self.W_o, self.b_o, dropout,
178 | input_dim, self.output_dim, timesteps)
179 | return K.concatenate([x_i, x_f, x_c, x_o], axis=2)
180 | else:
181 | return x
182 |
183 | def step(self, x, states):
184 | h_tm1 = states[0]
185 | c_tm1 = states[1]
186 | B_U = states[2]
187 | B_W = states[3]
188 |
189 | if self.consume_less == 'gpu':
190 | z = K.dot(x * B_W[0], self.W) + K.dot(h_tm1 * B_U[0], self.U) + self.b
191 |
192 | z0 = z[:, :self.output_dim]
193 | z1 = z[:, self.output_dim: 2 * self.output_dim]
194 | z2 = z[:, 2 * self.output_dim: 3 * self.output_dim]
195 | z3 = z[:, 3 * self.output_dim:]
196 |
197 | i = self.inner_activation(z0)
198 | f = self.inner_activation(z1)
199 | c = f * c_tm1 + i * self.activation(z2)
200 | o = self.inner_activation(z3)
201 | else:
202 | if self.consume_less == 'cpu':
203 | x_i = x[:, :self.output_dim]
204 | x_f = x[:, self.output_dim: 2 * self.output_dim]
205 | x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
206 | x_o = x[:, 3 * self.output_dim:]
207 | elif self.consume_less == 'mem':
208 | x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
209 | x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
210 | x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
211 | x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
212 | else:
213 | raise Exception('Unknown `consume_less` mode.')
214 |
215 | i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
216 | f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
217 | c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
218 | o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))
219 |
220 | h = o * self.activation(c)
221 | return h, [h, c]
222 |
223 | def get_constants(self, x):
224 | constants = []
225 | if 0 < self.dropout_U < 1:
226 | ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
227 | ones = K.tile(ones, (1, self.output_dim))
228 | B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(4)]
229 | constants.append(B_U)
230 | else:
231 | constants.append([K.cast_to_floatx(1.) for _ in range(4)])
232 |
233 | if 0 < self.dropout_W < 1:
234 | input_shape = self.input_spec[0].shape
235 | input_dim = input_shape[-1]
236 | ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
237 | ones = K.tile(ones, (1, int(input_dim)))
238 | B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(4)]
239 | constants.append(B_W)
240 | else:
241 | constants.append([K.cast_to_floatx(1.) for _ in range(4)])
242 | return constants
243 |
244 | def get_config(self):
245 | config = {'output_dim': self.output_dim,
246 | 'init': self.init.__name__,
247 | 'inner_init': self.inner_init.__name__,
248 | 'forget_bias_init': self.forget_bias_init.__name__,
249 | 'activation': self.activation.__name__,
250 | 'inner_activation': self.inner_activation.__name__,
251 | 'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
252 | 'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
253 | 'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
254 | 'dropout_W': self.dropout_W,
255 | 'dropout_U': self.dropout_U}
256 | base_config = super(NTM, self).get_config()
257 | return dict(list(base_config.items()) + list(config.items()))
--------------------------------------------------------------------------------
/datasets/algorithm_learning.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | from utils import initialize_random_seed
4 |
5 |
6 | # Initialize the random seed
7 | initialize_random_seed()
8 |
9 |
10 | def generate_copy_sample(dimension, sequence_length):
11 | """Generate one sample of copy algorithm.
12 |
13 | # Arguments
14 | dimension: the dimension of each input output tokens.
15 | sequence_length: the length of input sequence, i.e. the number of
16 | input tokens.
17 |
18 | # Returns
19 | input_sequence: the input sequence of a sample.
20 | output_sequence: the output sequence of a sample.
21 | """
22 | # produce random sequence
23 | sequence = np.random.binomial(
24 | 1, 0.5, (sequence_length, dimension - 1)).astype(np.uint8)
25 |
26 | # allocate space for input sequence and output sequence
27 | input_sequence = np.zeros(
28 | (sequence_length * 2 + 1, dimension), dtype=np.bool)
29 | output_sequence = np.zeros(
30 | (sequence_length * 2 + 1, dimension), dtype=np.bool)
31 |
32 | # set value of input sequence
33 | input_sequence[:sequence_length, :-1] = sequence
34 | # "1": A special flag which indicate the end of the input
35 | input_sequence[sequence_length, -1] = 1
36 |
37 | # set value of output sequence
38 | output_sequence[sequence_length + 1:, :-1] = sequence
39 | # "1": A special flag which indicate the begin of the output
40 | output_sequence[sequence_length, -1] = 1
41 |
42 | # return the sample
43 | return input_sequence, output_sequence
44 |
45 |
46 | def generate_copy_data_set(
47 | dimension,
48 | max_length_of_original_sequence,
49 | data_set_size):
50 | """Generate samples for learning copy algorithm.
51 |
52 | # Arguments
53 | dimension: the dimension of each input output tokens.
54 | max_length_of_original_sequence: the max length of original sequence.
55 | data_set_size: the size of total samples.
56 |
57 | # Returns
58 | input_sequences: the input sequences of total samples.
59 | output_sequences: the output sequences of total samples.
60 | """
61 | # get random sequence lengths from uniform distribution e.g. [1, 20]
62 | sequence_lengths = np.random.randint(
63 | 1, max_length_of_original_sequence + 1, data_set_size)
64 |
65 | # allocate space for input sequences and output sequences, where the
66 | # "1" is a special flag which indicate the end of the input or output
67 | input_sequences = np.zeros(
68 | (data_set_size, max_length_of_original_sequence * 2 + 1, dimension),
69 | dtype=np.bool)
70 | output_sequences = np.zeros(
71 | (data_set_size, max_length_of_original_sequence * 2 + 1, dimension),
72 | dtype=np.bool)
73 |
74 | # set the value for input sequences and output sequences
75 | for i in range(data_set_size):
76 | input_sequence, output_sequence = \
77 | generate_copy_sample(dimension, sequence_lengths[i])
78 | input_sequences[i, :sequence_lengths[i]*2+1] = input_sequence
79 | output_sequences[i, :sequence_lengths[i]*2+1] = output_sequence
80 |
81 | # return the total samples
82 | return input_sequences, output_sequences
83 |
84 |
85 | def generate_repeat_copy_sample(dimension, sequence_length, repeat_times):
86 | """Generate one sample of repeat copy algorithm.
87 |
88 | # Arguments
89 | dimension: the dimension of each input output tokens.
90 | sequence_length: the length of input sequence, i.e. the number of
91 | input tokens.
92 | repeat_times: repeat times of output.
93 |
94 | # Returns
95 | input_sequence: the input sequence of a sample.
96 | output_sequence: the output sequence of a sample.
97 | """
98 | # produce random sequence
99 | sequence = np.random.binomial(
100 | 1, 0.5, (sequence_length, dimension - 1)).astype(np.uint8)
101 |
102 | # allocate space for input sequence and output sequence
103 | input_sequence = np.zeros(
104 | (sequence_length + 1 + sequence_length * repeat_times, # + 1
105 | dimension),
106 | dtype=np.bool)
107 | output_sequence = np.zeros(
108 | (sequence_length + 1 + sequence_length * repeat_times, # + 1
109 | dimension),
110 | dtype=np.bool)
111 |
112 | # set value of input sequence
113 | input_sequence[:sequence_length, :-1] = sequence
114 | # input_sequence[sequence_length, -1] = repeat_times
115 | input_sequence[sequence_length, -1] = 1
116 |
117 | # set value of output sequence ## sequence_length + 1
118 | output_sequence[sequence_length+1:, :-1] = \
119 | np.tile(sequence, (repeat_times, 1))
120 | # "1": A special flag which indicate the begin of the output
121 | # output_sequence[sequence_length, -1] = 1
122 |
123 | # return the sample
124 | return input_sequence, output_sequence
125 |
126 |
127 | def generate_repeat_copy_data_set(
128 | dimension,
129 | max_length_of_original_sequence,
130 | max_repeat_times,
131 | data_set_size):
132 | """Generate samples for learning repeat copy algorithm.
133 |
134 | # Arguments
135 | dimension: the dimension of each input output tokens.
136 | max_length_of_original_sequence: the max length of original sequence.
137 | max_repeat_times: the maximum repeat times.
138 | data_set_size: the size of total samples.
139 |
140 | # Returns
141 | input_sequences: the input sequences of total samples.
142 | output_sequences: the output sequences of total samples.
143 | repeat_times: the repeat times of each output sequence of total
144 | samples.
145 | """
146 | # produce random sequence lengths from uniform distribution
147 | # [1, max_length]
148 | sequence_lengths = np.random.randint(
149 | 1, max_length_of_original_sequence + 1, data_set_size)
150 |
151 | # produce random repeat times from uniform distribution
152 | # [1, max_repeat_times]
153 | repeat_times = np.random.randint(1, max_repeat_times + 1, data_set_size)
154 | input_sequences = np.zeros(
155 | (data_set_size,
156 | max_length_of_original_sequence * (max_repeat_times + 1) + 1, # + 1
157 | dimension),
158 | dtype=np.bool)
159 | output_sequences = np.zeros(
160 | (data_set_size,
161 | max_length_of_original_sequence * (max_repeat_times + 1) + 1, # + 1
162 | dimension),
163 | dtype=np.bool)
164 |
165 | # set the value for input sequences and output sequences
166 | for i in range(data_set_size):
167 | input_sequence, output_sequence = generate_repeat_copy_sample(
168 | dimension, sequence_lengths[i], repeat_times[i])
169 | input_sequences[i, :sequence_lengths[i]*(repeat_times[i]+1)+1] = \
170 | input_sequence
171 | output_sequences[i, :sequence_lengths[i]*(repeat_times[i]+1)+1] = \
172 | output_sequence
173 |
174 | # return total samples
175 | return input_sequences, output_sequences, repeat_times
176 |
177 |
178 | def generate_associative_recall_items(dimension, item_size, episode_size):
179 | """Generate items of associative recall algorithm.
180 |
181 | # Arguments
182 | dimension: the dimension of input output sequences.
183 | item_size: the size of items.
184 | episode_size: the size of one episode.
185 |
186 | # Returns
187 | items: the generated item.
188 | """
189 | inner_item = np.random.binomial(
190 | 1, 0.5, ((item_size + 1) * episode_size, dimension)
191 | ).astype(np.uint8)
192 | items = np.zeros(((item_size + 1) * episode_size, dimension + 2),
193 | dtype=np.uint8)
194 | # items = np.zeros(((item_size + 1) * episode_size, dimension + 2),
195 | # dtype=np.bool)
196 | items[:, :-2] = inner_item
197 |
198 | separator = np.zeros((1, dimension + 2), dtype=np.uint8)
199 | # separator = np.zeros((1, dimension + 2), dtype=np.bool)
200 | separator[0][-2] = 1
201 | items[:(item_size + 1) * episode_size:(item_size + 1)] = separator[0]
202 |
203 | # return one items for associative recall
204 | return items
205 |
206 |
207 | def generate_associative_recall_sample(
208 | dimension, item_size, episode_size, max_episode_size):
209 | """Generate one sample of associative recall algorithm.
210 |
211 | Arguments
212 | dimension: the dimension of input output sequences.
213 | item_size: the size of one item.
214 | episode_size: the size of one episode.
215 | max_episode_size: the maximum episode size.
216 |
217 | Returns
218 | input_sequence: the input sequence of a sample.
219 | output_sequence: the output sequence of a sample.
220 | """
221 | sequence_length = (item_size+1) * (max_episode_size+2)
222 | input_sequence = np.zeros(
223 | (sequence_length, dimension + 2), dtype=np.uint8)
224 | # input_sequence = np.zeros(
225 | # (sequence_length, dimension + 2), dtype=np.bool)
226 | input_sequence[:(item_size + 1) * episode_size] = \
227 | generate_associative_recall_items(
228 | dimension, item_size, episode_size)
229 |
230 | separator = np.zeros((1, dimension + 2), dtype=np.uint8)
231 | # separator = np.zeros((1, dimension + 2), dtype=np.bool)
232 | separator[0][-2] = 1
233 | query_index = np.random.randint(0, episode_size-1)
234 |
235 | input_sequence[(item_size+1)*episode_size:(item_size+1)*(episode_size+1)] = \
236 | input_sequence[(item_size+1)*query_index:(item_size+1)*(query_index+1)]
237 | input_sequence[(item_size+1)*episode_size][-2] = 0
238 | input_sequence[(item_size+1)*episode_size][-1] = 1
239 | input_sequence[(item_size+1)*(episode_size+1)][-1] = 1
240 |
241 | output_sequence = np.zeros(
242 | (sequence_length, dimension + 2), dtype=np.uint8)
243 | # output_sequence = np.zeros(
244 | # (sequence_length, dimension + 2), dtype=np.bool)
245 | output_sequence[(item_size+1)*(episode_size+1):(item_size+1)*(episode_size+2)] = \
246 | input_sequence[(item_size+1)*(query_index+1):(item_size+1)*(query_index+2)]
247 | output_sequence[(item_size+1)*(episode_size+1)][-2] = 0
248 |
249 | # return one sample for associative recall
250 | return input_sequence, output_sequence
251 |
252 |
253 | def generate_associative_recall_data_set(
254 | dimension, item_size, max_episode_size, data_set_size):
255 | """Generate samples for learning associative recall algorithm.
256 |
257 | Arguments
258 | dimension: the dimension of input output sequences.
259 | item_size: the size of one item.
260 | max_episode_size: the maximum episode size.
261 | data_set_size: the size of one episode.
262 |
263 | Returns
264 | input_sequences: the input sequences of total samples.
265 | output_sequences: the output sequences of total samples.
266 | """
267 | episode_size = np.random.randint(2, max_episode_size + 1, data_set_size)
268 | sequence_length = (item_size+1) * (max_episode_size+2)
269 | input_sequences = np.zeros(
270 | (data_set_size, sequence_length, dimension + 2), dtype=np.uint8)
271 | output_sequences = np.zeros(
272 | (data_set_size, sequence_length, dimension + 2), dtype=np.uint8)
273 | # input_sequences = np.zeros(
274 | # (training_size, sequence_length, dimension + 2), dtype=np.bool)
275 | # output_sequences = np.zeros(
276 | # (training_size, sequence_length, dimension + 2), dtype=np.bool)
277 | for i in range(data_set_size):
278 | input_sequence, output_sequence = generate_associative_recall_sample(
279 | dimension, item_size, episode_size[i], max_episode_size)
280 | input_sequences[i] = input_sequence
281 | output_sequences[i] = output_sequence
282 |
283 | # return the total samples
284 | return input_sequences, output_sequences
285 |
286 |
287 | def generate_priority_sort_sample(
288 | dimension,
289 | input_sequence_length,
290 | output_sequence_length,
291 | priority_lower_bound,
292 | priority_upper_bound):
293 | """Generate one sample of priority sort algorithm.
294 |
295 | Arguments
296 | dimension: the dimension of input output sequences.
297 | input_sequence_length: the length of input sequence.
298 | output_sequence_length: the length of output sequence.
299 | priority_lower_bound: the lower bound of priority.
300 | priority_upper_bound: the upper bound of priority.
301 |
302 | Returns
303 | input_sequence: the input sequence of a sample.
304 | output_sequence: the output sequence of a sample.
305 | """
306 | sequence = input_sequence_length + output_sequence_length + 1
307 | input_sequence = np.random.binomial(
308 | 1, 0.5, (input_sequence_length, dimension + 1)).astype(np.uint8)
309 | output_sequence = np.zeros(
310 | (output_sequence_length, dimension + 1), dtype=np.uint8)
311 | input_priority = np.random.uniform(priority_lower_bound,
312 | priority_upper_bound,
313 | (input_sequence_length, 1))
314 | output_priority = sorted(
315 | input_priority, reverse=True)[:output_sequence_length]
316 | pair = [(input_sequence[i], input_priority[i])
317 | for i in range(input_sequence_length)]
318 | sorted_input_sequence = sorted(
319 | pair, key=lambda prior: prior[1], reverse=True)
320 | for i in range(output_sequence_length):
321 | output_sequence[i] = sorted_input_sequence[i][0]
322 |
323 | input_sequence_ = np.zeros((sequence, dimension + 2), dtype=np.float32)
324 | input_priority_ = np.zeros((sequence, 1), dtype=np.float32)
325 | output_sequence_ = np.zeros((sequence, dimension + 2), dtype=np.float32)
326 | output_priority_ = np.zeros((sequence, 1), dtype=np.float32)
327 |
328 | input_sequence_[:input_sequence_length, :-1] = input_sequence
329 | input_sequence_[input_sequence_length][-1] = 1
330 | input_priority_[:input_sequence_length] = input_priority
331 | output_sequence_[input_sequence_length+1:sequence, :-1] = output_sequence
332 | output_priority_[input_sequence_length+1:sequence] = output_priority
333 |
334 | # return input sequence, priority of each input, output sequence, priority
335 | # of each output
336 | return input_sequence_, input_priority_, output_sequence_, output_priority_
337 |
338 |
339 | def generate_priority_sort_data_set(
340 | dimension,
341 | input_sequence_length,
342 | output_sequence_length,
343 | priority_lower_bound,
344 | priority_upper_bound,
345 | data_set_size):
346 | """Generate samples for learning priority sort algorithm.
347 |
348 | Arguments
349 | dimension: the dimension of input output sequences.
350 | input_sequence_length: the length of input sequence.
351 | output_sequence_length: the length of output sequence.
352 | priority_lower_bound: the lower bound of priority.
353 | priority_upper_bound: the upper bound of priority.
354 | data_set_size: the size of one episode.
355 |
356 | Returns
357 | input_sequence: the input sequence of a sample.
358 | output_sequence: the output sequence of a sample.
359 | """
360 | sequence_length = input_sequence_length + output_sequence_length
361 | input_sequences = np.zeros(
362 | (data_set_size, sequence_length + 1, dimension + 2), dtype=np.float32)
363 | output_sequences = np.zeros(
364 | (data_set_size, sequence_length + 1, dimension + 2), dtype=np.float32)
365 | for i in range(data_set_size):
366 | input_sequence, input_priority, output_sequence, output_priority = \
367 | generate_priority_sort_sample(
368 | dimension,
369 | input_sequence_length,
370 | output_sequence_length,
371 | priority_lower_bound,
372 | priority_upper_bound)
373 | input_sequences[i] = input_sequence
374 | output_sequences[i] = output_sequence
375 | input_sequences[i][:, -2] = input_priority.transpose()
376 | output_sequences[i][:, -2] = output_priority.transpose()
377 |
378 | # return the total samples
379 | return input_sequences, output_sequences
380 |
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 |
5 | np.random.seed(7883)
6 | random.seed(7883)
7 |
8 |
9 | def generate_random_binomial_(row, col):
10 | return np.random.binomial(
11 | 1, 0.5, (row, col)).astype(np.uint8)
12 |
13 |
14 | def generate_weightings(row, col):
15 | write_weightings = np.zeros((row, col), dtype=np.float32)
16 | read_weightings = np.zeros((row, col), dtype=np.float32)
17 | r = (row * 3) / 4
18 | for i in np.arange(0, col/2):
19 | write_weightings[r][i] = 1
20 | read_weightings[r][i + col/2] = 1
21 | r -= 1
22 |
23 | return write_weightings, read_weightings
24 |
25 |
26 | def generate_copy_sample(dimension, sequence_length):
27 | """Generate one sample of copy algorithm.
28 |
29 | # Arguments
30 | dimension: the dimension of each input output tokens.
31 | sequence_length: the length of input sequence, i.e. the number of
32 | input tokens.
33 |
34 | # Returns
35 | input_sequence: the input sequence of a sample.
36 | output_sequence: the output sequence of a sample.
37 | """
38 | # produce random sequence
39 | sequence = np.random.binomial(
40 | 1, 0.5, (sequence_length, dimension - 1)).astype(np.uint8)
41 |
42 | # allocate space for input sequence and output sequence
43 | input_sequence = np.zeros(
44 | (sequence_length * 2 + 1, dimension), dtype=np.bool)
45 | output_sequence = np.zeros(
46 | (sequence_length * 2 + 1, dimension), dtype=np.bool)
47 |
48 | # set value of input sequence
49 | input_sequence[:sequence_length, :-1] = sequence
50 | # "1": A special flag which indicate the end of the input
51 | input_sequence[sequence_length, -1] = 1
52 |
53 | # set value of output sequence
54 | output_sequence[sequence_length + 1:, :-1] = sequence
55 | # "1": A special flag which indicate the begin of the output
56 | output_sequence[sequence_length, -1] = 1
57 |
58 | # return the sample
59 | return input_sequence, output_sequence
60 |
61 |
62 | def generate_copy_data_set(
63 | dimension,
64 | max_length_of_original_sequence,
65 | data_set_size):
66 | """Generate samples for learning copy algorithm.
67 |
68 | # Arguments
69 | dimension: the dimension of each input output tokens.
70 | max_length_of_original_sequence: the max length of original sequence.
71 | data_set_size: the size of total samples.
72 |
73 | # Returns
74 | input_sequences: the input sequences of total samples.
75 | output_sequences: the output sequences of total samples.
76 | """
77 | # get random sequence lengths from uniform distribution e.g. [1, 20]
78 | sequence_lengths = np.random.randint(
79 | 1, max_length_of_original_sequence + 1, data_set_size)
80 |
81 | # allocate space for input sequences and output sequences, where the
82 | # "1" is a special flag which indicate the end of the input or output
83 | input_sequences = np.zeros(
84 | (data_set_size, max_length_of_original_sequence * 2 + 1, dimension),
85 | dtype=np.bool)
86 | output_sequences = np.zeros(
87 | (data_set_size, max_length_of_original_sequence * 2 + 1, dimension),
88 | dtype=np.bool)
89 |
90 | # set the value for input sequences and output sequences
91 | for i in range(data_set_size):
92 | input_sequence, output_sequence = \
93 | generate_copy_sample(dimension, sequence_lengths[i])
94 | input_sequences[i, :sequence_lengths[i]*2+1] = input_sequence
95 | output_sequences[i, :sequence_lengths[i]*2+1] = output_sequence
96 |
97 | # return the total samples
98 | return input_sequences, output_sequences
99 |
100 |
101 | def generate_repeat_copy_sample(dimension, sequence_length, repeat_times):
102 | """Generate one sample of repeat copy algorithm.
103 |
104 | # Arguments
105 | dimension: the dimension of each input output tokens.
106 | sequence_length: the length of input sequence, i.e. the number of
107 | input tokens.
108 | repeat_times: repeat times of output.
109 |
110 | # Returns
111 | input_sequence: the input sequence of a sample.
112 | output_sequence: the output sequence of a sample.
113 | """
114 | # produce random sequence
115 | sequence = np.random.binomial(
116 | 1, 0.5, (sequence_length, dimension - 1)).astype(np.uint8)
117 |
118 | # allocate space for input sequence and output sequence
119 | input_sequence = np.zeros(
120 | (sequence_length + 1 + sequence_length * repeat_times, # + 1
121 | dimension),
122 | dtype=np.bool)
123 | output_sequence = np.zeros(
124 | (sequence_length + 1 + sequence_length * repeat_times, # + 1
125 | dimension),
126 | dtype=np.bool)
127 |
128 | # set value of input sequence
129 | input_sequence[:sequence_length, :-1] = sequence
130 | # input_sequence[sequence_length, -1] = repeat_times
131 | input_sequence[sequence_length, -1] = 1
132 |
133 | # set value of output sequence ## sequence_length + 1
134 | output_sequence[sequence_length+1:, :-1] = \
135 | np.tile(sequence, (repeat_times, 1))
136 | # "1": A special flag which indicate the begin of the output
137 | # output_sequence[sequence_length, -1] = 1
138 |
139 | # return the sample
140 | return input_sequence, output_sequence
141 |
142 |
143 | def generate_repeat_copy_data_set(
144 | dimension,
145 | max_length_of_original_sequence,
146 | max_repeat_times,
147 | data_set_size):
148 | """Generate samples for learning repeat copy algorithm.
149 |
150 | # Arguments
151 | dimension: the dimension of each input output tokens.
152 | max_length_of_original_sequence: the max length of original sequence.
153 | max_repeat_times: the maximum repeat times.
154 | data_set_size: the size of total samples.
155 |
156 | # Returns
157 | input_sequences: the input sequences of total samples.
158 | output_sequences: the output sequences of total samples.
159 | repeat_times: the repeat times of each output sequence of total
160 | samples.
161 | """
162 | # produce random sequence lengths from uniform distribution
163 | # [1, max_length]
164 | sequence_lengths = np.random.randint(
165 | 1, max_length_of_original_sequence + 1, data_set_size)
166 |
167 | # produce random repeat times from uniform distribution
168 | # [1, max_repeat_times]
169 | repeat_times = np.random.randint(1, max_repeat_times + 1, data_set_size)
170 | input_sequences = np.zeros(
171 | (data_set_size,
172 | max_length_of_original_sequence * (max_repeat_times + 1) + 1, # + 1
173 | dimension),
174 | dtype=np.bool)
175 | output_sequences = np.zeros(
176 | (data_set_size,
177 | max_length_of_original_sequence * (max_repeat_times + 1) + 1, # + 1
178 | dimension),
179 | dtype=np.bool)
180 |
181 | # set the value for input sequences and output sequences
182 | for i in range(data_set_size):
183 | input_sequence, output_sequence = generate_repeat_copy_sample(
184 | dimension, sequence_lengths[i], repeat_times[i])
185 | input_sequences[i, :sequence_lengths[i]*(repeat_times[i]+1)+1] = \
186 | input_sequence
187 | output_sequences[i, :sequence_lengths[i]*(repeat_times[i]+1)+1] = \
188 | output_sequence
189 |
190 | # return total samples
191 | return input_sequences, output_sequences, repeat_times
192 |
193 |
194 | def _generate_associative_recall_items(dimension, item_size, episode_size):
195 | """Generate items of associative recall algorithm.
196 |
197 | # Arguments
198 | dimension: the dimension of input output sequences.
199 | item_size: the size of items.
200 | episode_size: the size of one episode.
201 |
202 | # Returns
203 | items: the generated item.
204 | """
205 | inner_item = np.random.binomial(
206 | 1, 0.5, ((item_size + 1) * episode_size, dimension)
207 | ).astype(np.uint8)
208 | items = np.zeros(((item_size + 1) * episode_size, dimension + 2),
209 | dtype=np.uint8)
210 | # items = np.zeros(((item_size + 1) * episode_size, dimension + 2),
211 | # dtype=np.bool)
212 | items[:, :-2] = inner_item
213 |
214 | separator = np.zeros((1, dimension + 2), dtype=np.uint8)
215 | # separator = np.zeros((1, dimension + 2), dtype=np.bool)
216 | separator[0][-2] = 1
217 | items[:(item_size + 1) * episode_size:(item_size + 1)] = separator[0]
218 |
219 | # return one items for associative recall
220 | return items
221 |
222 |
223 | def generate_associative_recall_sample(
224 | dimension, item_size, episode_size, max_episode_size):
225 | """Generate one sample of associative recall algorithm.
226 |
227 | Arguments
228 | dimension: the dimension of input output sequences.
229 | item_size: the size of one item.
230 | episode_size: the size of one episode.
231 | max_episode_size: the maximum episode size.
232 |
233 | Returns
234 | input_sequence: the input sequence of a sample.
235 | output_sequence: the output sequence of a sample.
236 | """
237 | sequence_length = (item_size+1) * (max_episode_size+2)
238 | input_sequence = np.zeros(
239 | (sequence_length, dimension + 2), dtype=np.uint8)
240 | # input_sequence = np.zeros(
241 | # (sequence_length, dimension + 2), dtype=np.bool)
242 | input_sequence[:(item_size + 1) * episode_size] = \
243 | _generate_associative_recall_items(
244 | dimension, item_size, episode_size)
245 |
246 | separator = np.zeros((1, dimension + 2), dtype=np.uint8)
247 | # separator = np.zeros((1, dimension + 2), dtype=np.bool)
248 | separator[0][-2] = 1
249 | query_index = np.random.randint(0, episode_size-1)
250 |
251 | input_sequence[(item_size+1)*episode_size:(item_size+1)*(episode_size+1)] = \
252 | input_sequence[(item_size+1)*query_index:(item_size+1)*(query_index+1)]
253 | input_sequence[(item_size+1)*episode_size][-2] = 0
254 | input_sequence[(item_size+1)*episode_size][-1] = 1
255 | input_sequence[(item_size+1)*(episode_size+1)][-1] = 1
256 |
257 | output_sequence = np.zeros(
258 | (sequence_length, dimension + 2), dtype=np.uint8)
259 | # output_sequence = np.zeros(
260 | # (sequence_length, dimension + 2), dtype=np.bool)
261 | output_sequence[(item_size+1)*(episode_size+1):(item_size+1)*(episode_size+2)] = \
262 | input_sequence[(item_size+1)*(query_index+1):(item_size+1)*(query_index+2)]
263 | output_sequence[(item_size+1)*(episode_size+1)][-2] = 0
264 |
265 | # return one sample for associative recall
266 | return input_sequence, output_sequence
267 |
268 |
269 | def generate_associative_recall_data_set(
270 | dimension, item_size, max_episode_size, data_set_size):
271 | """Generate samples for learning associative recall algorithm.
272 |
273 | Arguments
274 | dimension: the dimension of input output sequences.
275 | item_size: the size of one item.
276 | max_episode_size: the maximum episode size.
277 | data_set_size: the size of one episode.
278 |
279 | Returns
280 | input_sequences: the input sequences of total samples.
281 | output_sequences: the output sequences of total samples.
282 | """
283 | episode_size = np.random.randint(2, max_episode_size + 1, data_set_size)
284 | sequence_length = (item_size+1) * (max_episode_size+2)
285 | input_sequences = np.zeros(
286 | (data_set_size, sequence_length, dimension + 2), dtype=np.uint8)
287 | output_sequences = np.zeros(
288 | (data_set_size, sequence_length, dimension + 2), dtype=np.uint8)
289 | # input_sequences = np.zeros(
290 | # (training_size, sequence_length, dimension + 2), dtype=np.bool)
291 | # output_sequences = np.zeros(
292 | # (training_size, sequence_length, dimension + 2), dtype=np.bool)
293 | for i in range(data_set_size):
294 | input_sequence, output_sequence = generate_associative_recall_sample(
295 | dimension, item_size, episode_size[i], max_episode_size)
296 | input_sequences[i] = input_sequence
297 | output_sequences[i] = output_sequence
298 |
299 | # return the total samples
300 | return input_sequences, output_sequences
301 |
302 |
303 | # def generate_probability_of_n_gram_by_beta(a, b, n):
304 | # return np.random.beta(a, b, np.power(2, n-1))
305 |
306 |
307 | def get_index(n_1_bits, n):
308 | index = n_1_bits[0]
309 | for i in range(1, n-1):
310 | index = index + np.power(2, i) * n_1_bits[i]
311 |
312 | return index
313 |
314 |
315 | def generate_dynamical_n_gram_sample(look_up_table, n, sequence_length):
316 | example_number = 100
317 | input_size = 1
318 | input_sequence = np.zeros((example_number, sequence_length*2-n+2, input_size+2), dtype=np.uint8)
319 | output_sequence = np.zeros((example_number, sequence_length*2-n+2, input_size+2), dtype=np.uint8)
320 |
321 | input_sequence_ = np.zeros((sequence_length*2-n+2, input_size+2), dtype=np.uint8)
322 | output_sequence_ = np.zeros((sequence_length*2-n+2, input_size+2), dtype=np.uint8)
323 | input_sequence_[0:n-1, 0] = np.random.binomial(1, 0.5, (1, n-1)).astype(np.uint8)
324 | # for i in range(n-1, sequence_length):
325 | # n_1_bits = input_sequence[i-n+1: i]
326 | # index = get_index(n_1_bits, n)
327 | # input_sequence[i] = np.random.binomial(1, look_up_table[index], 1)
328 | # output_sequence[n-1: -1] = input_sequence[n-1: -1]
329 |
330 | for i in range(n-1, sequence_length):
331 | n_1_bits = input_sequence_[i-n+1: i, 0]
332 | index = get_index(n_1_bits, n)
333 | # input_sequence_[i][0] = np.random.binomial(1, look_up_table[index], 1)
334 | # output_sequence_[sequence_length+i-n+2][0] = np.random.binomial(1, look_up_table[index], 1)
335 | input_sequence[:, i, 0] = np.random.binomial(1, look_up_table[index], 1)
336 | # output_sequence_[sequence_length+i-n+2][0] = np.random.binomial(1, look_up_table[index], 1)
337 | output_sequence[:, sequence_length+i-n+2, 0] = np.random.binomial(
338 | 1, look_up_table[index], example_number)
339 | input_sequence[:, sequence_length, -1] = 1
340 | input_ones = np.ones((example_number, sequence_length))
341 | input_sequence[:, 0:sequence_length, 1] = \
342 | input_ones - input_sequence[:, 0:sequence_length, 0]
343 | output_ones = np.ones((example_number, sequence_length-n+1))
344 | output_sequence[:, sequence_length+1:sequence_length*2-n+2, 1] = \
345 | output_ones - output_sequence[:, sequence_length+1:sequence_length*2-n+2, 0]
346 |
347 | # print(input_sequence_.shape)
348 | # input_sequence_[0:sequence_length, 0] = input_sequence
349 | # input_sequence_[sequence_length, -1] = 1
350 | # output_sequence_[1, sequence_length+1:sequence_length*2-n+2] = input_sequence
351 |
352 | # print(input_sequence)
353 | # print(output_sequence)
354 |
355 | return input_sequence, output_sequence
356 |
357 |
358 | def generate_dynamical_n_gram_data_set(
359 | look_up_table, n, sequence_length, example_size):
360 | input_size = 1
361 | input_sequences = np.zeros((example_size, sequence_length*2-n+2, input_size+2), dtype=np.uint8)
362 | output_sequences = np.zeros((example_size, sequence_length*2-n+2, input_size+2), dtype=np.uint8)
363 | # input_sequences = np.zeros((example_size, sequence_length, input_size), dtype=np.uint8)
364 | # output_sequences = np.zeros((example_size, sequence_length, input_size), dtype=np.uint8)
365 | # input_sequences = np.zeros((example_size, sequence_length, 1), dtype=np.bool)
366 | # output_sequences = np.zeros((example_size, sequence_length, 1), dtype=np.bool)
367 | for i in range(example_size/100):
368 | input_sequence, output_sequence = generate_dynamical_n_gram_sample(
369 | look_up_table, n, sequence_length)
370 | input_sequences[i*100:(i+1)*100] = input_sequence
371 | output_sequences[i*100:(i+1)*100] = output_sequence
372 | # print(i)
373 | # print(input_sequence)
374 | # print(output_sequence)
375 |
376 | return input_sequences, output_sequences
377 |
378 |
379 | def generate_priority_sort_sample(
380 | dimension,
381 | input_sequence_length,
382 | output_sequence_length,
383 | priority_lower_bound,
384 | priority_upper_bound):
385 | """Generate one sample of priority sort algorithm.
386 |
387 | Arguments
388 | dimension: the dimension of input output sequences.
389 | input_sequence_length: the length of input sequence.
390 | output_sequence_length: the length of output sequence.
391 | priority_lower_bound: the lower bound of priority.
392 | priority_upper_bound: the upper bound of priority.
393 |
394 | Returns
395 | input_sequence: the input sequence of a sample.
396 | output_sequence: the output sequence of a sample.
397 | """
398 | sequence = input_sequence_length + output_sequence_length + 1
399 | input_sequence = np.random.binomial(
400 | 1, 0.5, (input_sequence_length, dimension + 1)).astype(np.uint8)
401 | output_sequence = np.zeros(
402 | (output_sequence_length, dimension + 1), dtype=np.uint8)
403 | input_priority = np.random.uniform(priority_lower_bound,
404 | priority_upper_bound,
405 | (input_sequence_length, 1))
406 | output_priority = sorted(
407 | input_priority, reverse=True)[:output_sequence_length]
408 | pair = [(input_sequence[i], input_priority[i])
409 | for i in range(input_sequence_length)]
410 | sorted_input_sequence = sorted(
411 | pair, key=lambda prior: prior[1], reverse=True)
412 | for i in range(output_sequence_length):
413 | output_sequence[i] = sorted_input_sequence[i][0]
414 |
415 | input_sequence_ = np.zeros((sequence, dimension + 2), dtype=np.float32)
416 | input_priority_ = np.zeros((sequence, 1), dtype=np.float32)
417 | output_sequence_ = np.zeros((sequence, dimension + 2), dtype=np.float32)
418 | output_priority_ = np.zeros((sequence, 1), dtype=np.float32)
419 |
420 | input_sequence_[:input_sequence_length, :-1] = input_sequence
421 | input_sequence_[input_sequence_length][-1] = 1
422 | input_priority_[:input_sequence_length] = input_priority
423 | output_sequence_[input_sequence_length+1:sequence, :-1] = output_sequence
424 | output_priority_[input_sequence_length+1:sequence] = output_priority
425 |
426 | # return input sequence, priority of each input, output sequence, priority
427 | # of each output
428 | return input_sequence_, input_priority_, output_sequence_, output_priority_
429 |
430 |
431 | def generate_priority_sort_data_set(
432 | dimension,
433 | input_sequence_length,
434 | output_sequence_length,
435 | priority_lower_bound,
436 | priority_upper_bound,
437 | data_set_size):
438 | """Generate samples for learning priority sort algorithm.
439 |
440 | Arguments
441 | dimension: the dimension of input output sequences.
442 | input_sequence_length: the length of input sequence.
443 | output_sequence_length: the length of output sequence.
444 | priority_lower_bound: the lower bound of priority.
445 | priority_upper_bound: the upper bound of priority.
446 | data_set_size: the size of one episode.
447 |
448 | Returns
449 | input_sequence: the input sequence of a sample.
450 | output_sequence: the output sequence of a sample.
451 | """
452 | sequence_length = input_sequence_length + output_sequence_length
453 | input_sequences = np.zeros(
454 | (data_set_size, sequence_length + 1, dimension + 2), dtype=np.float32)
455 | output_sequences = np.zeros(
456 | (data_set_size, sequence_length + 1, dimension + 2), dtype=np.float32)
457 | for i in range(data_set_size):
458 | input_sequence, input_priority, output_sequence, output_priority = \
459 | generate_priority_sort_sample(
460 | dimension,
461 | input_sequence_length,
462 | output_sequence_length,
463 | priority_lower_bound,
464 | priority_upper_bound)
465 | input_sequences[i] = input_sequence
466 | output_sequences[i] = output_sequence
467 | input_sequences[i][:, -2] = input_priority.transpose()
468 | output_sequences[i][:, -2] = output_priority.transpose()
469 |
470 | # return the total samples
471 | return input_sequences, output_sequences
472 |
--------------------------------------------------------------------------------