├── .gitignore
├── Data
└── .gitignore
├── LICENSE
├── Literature
├── base_paper.pdf
├── copy_network.pdf
├── decoder_part.odt
├── doubts
└── graves2016.pdf
├── README.md
├── TensorFlow_implementation
├── .gitignore
├── Data_Preprocessor.ipynb
├── Summary_Generator
│ ├── Model.py
│ ├── Tensorflow_Graph
│ │ ├── __init__.py
│ │ ├── order_planner_with_copynet.py
│ │ ├── order_planner_without_copynet.py
│ │ └── utils.py
│ ├── Text_Preprocessing_Helpers
│ │ ├── __init__.py
│ │ ├── pickling_tools.py
│ │ └── utils.py
│ └── __init__.py
├── fast_data_preprocessor_part1.py
├── fast_data_preprocessor_part2.py
├── inferencer.py
├── pre_processing_op.ipynb
├── seq2seq
│ ├── __init__.py
│ ├── configurable.py
│ ├── contrib
│ │ ├── __init__.py
│ │ ├── experiment.py
│ │ ├── rnn_cell.py
│ │ └── seq2seq
│ │ │ ├── __init__.py
│ │ │ ├── decoder.py
│ │ │ └── helper.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── input_pipeline.py
│ │ ├── parallel_data_provider.py
│ │ ├── postproc.py
│ │ ├── sequence_example_decoder.py
│ │ ├── split_tokens_decoder.py
│ │ └── vocab.py
│ ├── decoders
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── attention_decoder.py
│ │ ├── basic_decoder.py
│ │ ├── beam_search_decoder.py
│ │ └── rnn_decoder.py
│ ├── encoders
│ │ ├── __init__.py
│ │ ├── conv_encoder.py
│ │ ├── encoder.py
│ │ ├── image_encoder.py
│ │ ├── pooling_encoder.py
│ │ └── rnn_encoder.py
│ ├── global_vars.py
│ ├── graph_module.py
│ ├── graph_utils.py
│ ├── inference
│ │ ├── __init__.py
│ │ ├── beam_search.py
│ │ └── inference.py
│ ├── losses.py
│ ├── metrics
│ │ ├── __init__.py
│ │ ├── bleu.py
│ │ ├── metric_specs.py
│ │ └── rouge.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── attention_seq2seq.py
│ │ ├── basic_seq2seq.py
│ │ ├── bridges.py
│ │ ├── image2seq.py
│ │ ├── model_base.py
│ │ └── seq2seq_model.py
│ ├── tasks
│ │ ├── __init__.py
│ │ ├── decode_text.py
│ │ ├── dump_attention.py
│ │ ├── dump_beams.py
│ │ └── inference_task.py
│ ├── test
│ │ ├── __init__.py
│ │ ├── attention_test.py
│ │ ├── beam_search_test.py
│ │ ├── bridges_test.py
│ │ ├── conv_encoder_test.py
│ │ ├── data_test.py
│ │ ├── decoder_test.py
│ │ ├── example_config_test.py
│ │ ├── hooks_test.py
│ │ ├── input_pipeline_test.py
│ │ ├── losses_test.py
│ │ ├── metrics_test.py
│ │ ├── models_test.py
│ │ ├── pipeline_test.py
│ │ ├── pooling_encoder_test.py
│ │ ├── rnn_cell_test.py
│ │ ├── rnn_encoder_test.py
│ │ ├── train_utils_test.py
│ │ ├── utils.py
│ │ └── vocab_test.py
│ └── training
│ │ ├── __init__.py
│ │ ├── hooks.py
│ │ └── utils.py
├── trainer_with_copy_net.py
└── trainer_without_copy_net.py
├── Visualizations
├── first_run_of_both.png
└── projector_pic.png
└── architecture_diagram.jpeg
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # ignore pycharm setup
104 | .idea/
105 |
--------------------------------------------------------------------------------
/Data/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore the full version of the wikipedia-biography-dataset
2 | wikipedia-biography-dataset/
3 |
4 | # ignore link as well
5 | wikipedia-biography-dataset
6 |
7 | # ignore the three full dataset files
8 | *.nb
9 | *.sent
10 | *.box
11 |
12 | # ignore the pickle files
13 | *.pickle
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Animesh Karnewar
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Literature/base_paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/Literature/base_paper.pdf
--------------------------------------------------------------------------------
/Literature/copy_network.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/Literature/copy_network.pdf
--------------------------------------------------------------------------------
/Literature/decoder_part.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/Literature/decoder_part.odt
--------------------------------------------------------------------------------
/Literature/doubts:
--------------------------------------------------------------------------------
1 | 1.) In link based attention, how is the link matrix implemented?
2 |
3 | 2.) Equation 8, what is the first alpha(t - 1)
4 |
5 | 3.) Equation 8, how is the product of Link matrix with the alpha(t - 1) dimensionally correct?
6 |
7 | 4.) alpha_t_link is used for computing zt and also for computing alpha_hybrid
8 |
9 | 5.) how is the vocabulary calculated programmatically?
10 |
11 |
12 |
--------------------------------------------------------------------------------
/Literature/graves2016.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/Literature/graves2016.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Natural-Language-Summary-Generation-From-Structured-Data
2 | Implementation (Personal) of the paper titled
3 | `"Order-Planning Neural Text Generation From Structured Data"`. The dataset
4 | for this project can be found at ->
5 | [WikiBio](https://github.com/DavidGrangier/wikipedia-biography-dataset)
6 |
7 | Requirements for training:
8 | * `python 3+`
9 | * `tensorflow-gpu` (preferable; CPU will take forever)
10 | * `Host Memory 12GB+` (this will be addressed soon)
11 |
12 | ## Architecture
13 |
14 |
15 |
16 |
17 | ## Running the Code
18 | Process of using this code is slightly involved presently.
19 | This will be addressed in further development (perhaps with collaboration).
20 |
21 | #### 1. Preprocessing:
22 | Please refer to the `/TensorFlow_implementation/Data_Preprocessor.ipynb`
23 | for info about what steps are performed in preprocessing the data. Using
24 | the notebook on the full data for preprocessing will be very slow,
25 | so please use the following procedure for it.
26 |
27 | Step 1:
28 | (your_venv)$ python fast_data_preprocessor_part1.py
29 |
30 | Note that all the tweakable parameters are declared at the
31 | beginning of the script (Change them as per your requirement).
32 | This will generate a `temp.pickle` file in the same directory. Do not delete
33 | it even after full preprocessing. This is like a backup of the
34 | preprocessing pipeline; i.e. if you decide to change something later,
35 | you would'nt have to run the entire preprocessing again.
36 |
37 | Step 2:
38 | (your_venv)$ python fast_data_preprocessor_part12.py
39 |
40 | This will create the following file: `/Data/plug_and_play.pickle`. Again,
41 | tweakable parameters are at the beginning of the script.
42 | **Please Note that this process requires RAM 12GB+.
43 | If you have < 12GB Host memory, please use a subset of
44 | the dataset instead of the entire dataset
45 | (change `data_limit` in the script).**
46 |
47 | #### 2. Training:
48 |
49 | Once preprocessing is done, simply run one of the two training Scripts.
50 |
51 | (your_venv)$ python trainer_with_copy_net.py
52 | OR
53 | (your_venv)$ python trainer_without_copy_net.py
54 |
55 | Again all the hyperparameters are present at the beginning of the script.
56 | Example `trainer_without_copy_net.py`:
57 |
58 | ''' Name of the model: '''
59 | # This can be changed to create new models in the directory
60 | model_name = "Model_1(without_copy_net)"
61 |
62 | '''
63 | ========================================================
64 | || All Tweakable hyper-parameters
65 | ========================================================
66 | '''
67 | # constants for this script
68 | no_of_epochs = 500
69 | train_percentage = 100
70 | batch_size = 8
71 | checkpoint_factor = 100
72 | learning_rate = 3e-4 # for learning rate
73 | # but I have noticed that this learning rate works quite well.
74 | momentum = 0.9
75 |
76 | # Memory usage fraction:
77 | gpu_memory_usage_fraction = 1
78 |
79 | # Embeddings size:
80 | field_embedding_size = 100
81 | content_label_embedding_size = 400 # This is a much bigger
82 | # vocabulary compared to the field_name's vocabulary
83 |
84 | # LSTM hidden state sizes
85 | lstm_cell_state_size = hidden_state_size = 500 # they are
86 | # same (for now)
87 | '''
88 | ========================================================
89 | '''
90 |
91 | ## Test Runs:
92 | Once training is started, log-dirs are created for Tensorboard.
93 | Start your `tensorboard` server pointing to the log-dir.
94 |
95 | #### Loss monitor:
96 |
97 |
98 |
100 |
101 |
102 | #### Embedding projector:
103 |
104 |
105 |
107 |
108 |
109 | * **Trained models coming soon ...**
110 |
111 | ## Thanks
112 | Please feel free to open PRs (contribute)/ issues / comments (feedback) here.
113 |
114 |
115 | Best regards,
116 | @akanimax :)
--------------------------------------------------------------------------------
/TensorFlow_implementation/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore the pickle files
2 | *.pickle
3 |
4 | # ignore the Models directory
5 | Models/
6 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/Summary_Generator/Tensorflow_Graph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/TensorFlow_implementation/Summary_Generator/Tensorflow_Graph/__init__.py
--------------------------------------------------------------------------------
/TensorFlow_implementation/Summary_Generator/Tensorflow_Graph/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Library of helper tools for training and creating the Tensorflow graph of the
3 | system
4 | '''
5 |
6 | import numpy as np
7 |
8 | # Obtain the sequence lengths for the given input field_encodings / content_encodings (To feed to the RNN encoder)
9 | def get_lengths(sequences):
10 | '''
11 | Function to obtain the lengths of the given encodings. This allows for variable length sequences in the
12 | RNN encoder.
13 | @param
14 | sequences = [2d] list of integer encoded sequences, padded to the max_length of the batch
15 |
16 | @return
17 | lengths = [1d] list containing the lengths of the sequences
18 | '''
19 | return list(map(lambda x: len(x), sequences))
20 |
21 |
22 | def pad_sequences(seqs, pad_value = 0):
23 | '''
24 | funtion for padding the list of sequences and return a tensor that has all the sequences padded
25 | with leading 0s (for the bucketing phase)
26 | @param
27 | seqs => the list of integer sequences
28 | pad_value => the integer used as the padding value (defaults to zero)
29 | @return => padded tensor for this batch
30 | '''
31 |
32 | # find the maximum length among the given sequences
33 | max_length = max(map(lambda x: len(x), seqs))
34 |
35 | # create a list denoting the values with which the sequences need to be padded:
36 | padded_seqs = [] # initialize to empty list
37 | for seq in seqs:
38 | seq_len = len(seq) # obtain the length of current sequences
39 | diff = max_length - seq_len # calculate the padding amount for this seq
40 | padded_seqs.append(seq + [pad_value for _ in range(diff)])
41 |
42 |
43 | # return the padded seqs tensor
44 | return np.array(padded_seqs)
45 |
46 |
47 |
48 | # function to perform synchronous random shuffling of the training data
49 | def synch_random_shuffle_non_np(X, Y):
50 | '''
51 | ** This function takes in the parameters that are non numpy compliant dtypes such as list, tuple, etc.
52 | Although this function works on numpy arrays as well, this is not as performant enough
53 | @param
54 | X, Y => The data to be shuffled
55 | @return => The shuffled data
56 | '''
57 | combined = list(zip(X, Y))
58 |
59 | # shuffle the combined list in place
60 | np.random.shuffle(combined)
61 |
62 | # extract the data back from the combined list
63 | X, Y = list(zip(*combined))
64 |
65 | # return the shuffled data:
66 | return X, Y
67 |
68 |
69 |
70 | # function to split the data into train - dev sets:
71 | def split_train_dev(X, Y, train_percentage):
72 | '''
73 | function to split the given data into two small datasets (train - dev)
74 | @param
75 | X, Y => the data to be split
76 | (** Make sure the train dimension is the first one)
77 | train_percentage => the percentage which should be in the training set.
78 | (**this should be in 100% not decimal)
79 | @return => train_X, train_Y, test_X, test_Y
80 | '''
81 | m_examples = len(X)
82 | assert train_percentage <= 100, "Train percentage cannot be greater than 100! NOOB!"
83 | partition_point = int((m_examples * (float(train_percentage) / 100)) + 0.5) # 0.5 is added for rounding
84 |
85 | # construct the train_X, train_Y, test_X, test_Y sets:
86 | train_X = X[: partition_point]; train_Y = Y[: partition_point]
87 | test_X = X[partition_point: ]; test_Y = Y[partition_point: ]
88 |
89 | assert len(train_X) + len(test_X) == m_examples, "Something wrong in X splitting"
90 | assert len(train_Y) + len(test_Y) == m_examples, "Something wrong in Y splitting"
91 |
92 | # return the constructed sets
93 | return train_X, train_Y, test_X, test_Y
94 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/Summary_Generator/Text_Preprocessing_Helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/TensorFlow_implementation/Summary_Generator/Text_Preprocessing_Helpers/__init__.py
--------------------------------------------------------------------------------
/TensorFlow_implementation/Summary_Generator/Text_Preprocessing_Helpers/pickling_tools.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import _pickle as pickle # pickle module in python
4 | import os # for path related operations
5 |
6 | '''
7 | Simple function to perform pickling of the given object. This fucntion may fail if the size of the object exceeds
8 | the max size of the pickling protocol used. Although this is highly rare, One might then have to resort to some other
9 | strategy to pickle the data.
10 | The second function available is to unpickle a file located at the specified path
11 | '''
12 |
13 | # coded by botman
14 |
15 | # function to pickle an object
16 | def pickleIt(obj, save_path):
17 | '''
18 | function to pickle the given object.
19 | @param
20 | obj => the python object to be pickled
21 | save_path => the path where the pickled file is to be saved
22 | @return => nothing (the pickle file gets saved at the given location)
23 | '''
24 | if(not os.path.isfile(save_path)):
25 | with open(save_path, 'wb') as dumping:
26 | pickle.dump(obj, dumping)
27 |
28 | print("The file has been pickled at:", save_path)
29 |
30 | else:
31 | print("The pickle file already exists: ", save_path)
32 |
33 |
34 | # function to unpickle the given file and load the obj back into the python environment
35 | def unPickleIt(pickle_path): # might throw the file not found exception
36 | '''
37 | function to unpickle the object from the given path
38 | @param
39 | pickle_path => the path where the pickle file is located
40 | @return => the object extracted from the saved path
41 | '''
42 |
43 | with open(pickle_path, 'rb') as dumped_pickle:
44 | obj = pickle.load(dumped_pickle)
45 |
46 | return obj # return the unpickled object
47 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/Summary_Generator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/TensorFlow_implementation/Summary_Generator/__init__.py
--------------------------------------------------------------------------------
/TensorFlow_implementation/fast_data_preprocessor_part1.py:
--------------------------------------------------------------------------------
1 | '''
2 | script for preprocessing the data from the files
3 | This script is optimized for producing the processed data faster
4 | '''
5 | from __future__ import print_function
6 | import numpy as np
7 | import os
8 | from Summary_Generator.Text_Preprocessing_Helpers.pickling_tools import *
9 |
10 | # set the data_path
11 | data_path = "../Data"
12 |
13 | data_files_paths = {
14 | "table_content": os.path.join(data_path, "train.box"),
15 | "nb_sentences" : os.path.join(data_path, "train.nb"),
16 | "train_sentences": os.path.join(data_path, "train.sent")
17 | }
18 |
19 | # generate the lists for all the samples in the dataset by reading the file once
20 |
21 |
22 | #=======================================================================================================================
23 | # Read the file for field_names and content_names
24 | #=======================================================================================================================
25 |
26 |
27 | print("Reading from the train.box file ...")
28 | with open(data_files_paths["table_content"]) as t_file:
29 | # read all the lines from the file:
30 | table_contents = t_file.readlines()
31 |
32 | # split all the lines at tab to generate the list of field_value pairs
33 | table_contents = map(lambda x: x.strip().split('\t'), table_contents)
34 |
35 |
36 | print("splitting the samples into field_names and content_words ...")
37 | # convert this list of string pairs into list of lists of tuples
38 | table_contents = map(lambda y: map(lambda x: tuple(x.split(":")), y), table_contents)
39 |
40 | # write a loop to separate out the field_names and the content_words
41 | count = 0; field_names = []; content_words = [] # initialize these to empty lists
42 | for sample in table_contents:
43 | # unzip the list:
44 | fields, contents = zip(*sample)
45 |
46 | # modify the fields to discard the _1, _2 labels
47 | fields = map(lambda x: x.split("_")[0], fields)
48 |
49 | # append the lists to appropriate lists
50 | field_names.append(list(fields)); content_words.append(list(contents))
51 |
52 | # increment the counter
53 | count += 1
54 |
55 | # give a feed_back for 1,00,000 samples:
56 | if(count % 100000 == 0):
57 | print("seperated", count, "samples")
58 |
59 | print("\nfield_names:\n", field_names[: 3], "\n\ncontent_words:\n", content_words[: 3])
60 |
61 |
62 |
63 | #==================================================================================================================
64 | # Read the file for the labels now
65 | #==================================================================================================================
66 | print("\n\nReading from the train.nb and the train.sent files ...")
67 | (labels, label_lengths) = (open(data_files_paths["train_sentences"]), open(data_files_paths["nb_sentences"]))
68 | label_words = labels.readlines(); lab_lengths = label_lengths.readlines()
69 | # close the files:
70 | labels.close(); label_lengths.close()
71 |
72 | print(label_words[: 3])
73 |
74 | # now perfrom the map_reduce operation to receive the a data structure similar to the field_names and content_words
75 | print("grouping lines in train.sent according to the train.nb ... ")
76 | count = 0; label_sentences = [] # initialize to empty list
77 |
78 | for length in lab_lengths:
79 | temp = []; cnt = 0;
80 | while(cnt < int(length)):
81 | sent = label_words.pop(0)
82 | # print("sent", sent)
83 | temp += sent.strip().split(' ')
84 | cnt += 1
85 | # print("temp ", temp)
86 |
87 | # append the temp to the label_sentences
88 | label_sentences.append(temp)
89 |
90 | # increment the counter
91 | count += 1
92 |
93 | # print a feedback for 1000 samples:
94 | if(count % 1000 == 0):
95 | print("grouped", count, "label_sentences")
96 |
97 |
98 | print(label_sentences[-3:])
99 |
100 |
101 | print("pickling the stuff generated till now ... ")
102 | # finally pickle the objects into a temporary pickle file:
103 | # temp_pickle object definition:
104 | temp_pickle = {
105 | "fields": field_names,
106 | "content": content_words,
107 | "label": label_sentences
108 | }
109 |
110 | pickleIt(temp_pickle, "temp.pickle")
111 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/fast_data_preprocessor_part2.py:
--------------------------------------------------------------------------------
1 | '''
2 | This script picks up from where we left in the first part.
3 | '''
4 |
5 | from __future__ import print_function
6 | from Summary_Generator.Text_Preprocessing_Helpers.pickling_tools import *
7 | from Summary_Generator.Text_Preprocessing_Helpers.utils import *
8 |
9 |
10 | # obtain the data from the pickle file generated as an entailment of the preprocessing part 1.
11 | temp_pickle_file_path = "temp.pickle"
12 |
13 | # set the limit on the samples to be trained on:
14 | limit = 600000 # no limit for now
15 |
16 | # unpickle the object from this file
17 | print("unpickling the data ...")
18 | temp_obj = unPickleIt(temp_pickle_file_path)
19 |
20 | # extract the three lists from this temp_obj
21 | field_names = temp_obj['fields'][:limit]
22 | content_words = temp_obj['content'][:limit]
23 | label_words = temp_obj['label'][:limit]
24 |
25 | # print first three elements from this list to verify the sanity:
26 | print("\nField_names:", field_names[: 3]); print("\nContent_words:", content_words[: 3]), print("\nLabel_words:", label_words[: 3])
27 |
28 | # tokenize the field_names:
29 | print("\n\nTokenizing the field_names ...")
30 | field_sequences, field_dict, rev_field_dict, field_vocab_size = prepare_tokenizer(field_names)
31 |
32 | print("Encoded field_sequences:", field_sequences[: 3])
33 |
34 |
35 | #Last part is to tokenize the content and the label sequences together:
36 | # note the length of the content_words:
37 | content_split_point = len(content_words)
38 |
39 | # attach them together
40 | # transform the label_words to add and tokens to all the sentences
41 | for i in range(len(label_words)):
42 | label_words[i] = [''] + label_words[i] + ['']
43 |
44 | unified_content_label_list = content_words + label_words
45 |
46 | # tokenize the unified_content_and_label_words:
47 | print("\n\nTokenizing the content and the label names ...")
48 | unified_sequences, content_label_dict, rev_content_label_dict, content_label_vocab_size = prepare_tokenizer(unified_content_label_list, max_word_length = 20000)
49 |
50 | print("Encoded content_label_sequences:", unified_sequences[: 3])
51 |
52 | # obtain the content and label sequences by separating it from the unified_sequences
53 | content_sequences = unified_sequences[: content_split_point]; label_sequences = unified_sequences[content_split_point: ]
54 |
55 | # Finally, pickle all of it together:
56 | pickle_obj = {
57 | # ''' Input structured data: '''
58 |
59 | # field_encodings and related data:
60 | 'field_encodings': field_sequences,
61 | 'field_dict': field_dict,
62 | 'field_rev_dict': rev_field_dict,
63 | 'field_vocab_size': field_vocab_size,
64 |
65 | # content encodings and related data:
66 | 'content_encodings': content_sequences,
67 |
68 | # ''' Label summary sentences: '''
69 |
70 | # label encodings and related data:
71 | 'label_encodings': label_sequences,
72 |
73 | # V union C related data:
74 | 'content_union_label_dict': content_label_dict,
75 | 'rev_content_union_label_dict': rev_content_label_dict,
76 | 'content_label_vocab_size': content_label_vocab_size
77 | }
78 |
79 | # call the pickling function to perform the pickling:
80 | print("\nPickling the processed data ...")
81 | pickleIt(pickle_obj, "../Data/plug_and_play.pickle")
82 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/inferencer.py:
--------------------------------------------------------------------------------
1 | '''
2 | Script for checking if the Inference computations run properly for the trained graph.
3 | '''
4 |
5 | from Summary_Generator.Tensorflow_Graph import order_planner_without_copynet
6 | from Summary_Generator.Text_Preprocessing_Helpers.pickling_tools import *
7 | from Summary_Generator.Tensorflow_Graph.utils import *
8 | from Summary_Generator.Model import *
9 | import numpy as np
10 | import tensorflow as tf
11 |
12 |
13 | # random_seed value for consistent debuggable behaviour
14 | seed_value = 3
15 |
16 | np.random.seed(seed_value) # set this seed for a device independant consistent behaviour
17 |
18 | ''' Set the constants for the script '''
19 | # various paths of the files
20 | data_path = "../Data" # the data path
21 |
22 | data_files_paths = {
23 | "table_content": os.path.join(data_path, "train.box"),
24 | "nb_sentences" : os.path.join(data_path, "train.nb"),
25 | "train_sentences": os.path.join(data_path, "train.sent")
26 | }
27 |
28 | base_model_path = "Models"
29 | plug_and_play_data_file = os.path.join(data_path, "plug_and_play.pickle")
30 |
31 |
32 | # Set the train_percentage mark here.
33 | train_percentage = 90
34 |
35 |
36 |
37 | ''' Extract and setup the data '''
38 | # Obtain the data:
39 | data = unPickleIt(plug_and_play_data_file)
40 |
41 | field_encodings = data['field_encodings']
42 | field_dict = data['field_dict']
43 |
44 | content_encodings = data['content_encodings']
45 |
46 | label_encodings = data['label_encodings']
47 | content_label_dict = data['content_union_label_dict']
48 | rev_content_label_dict = data['rev_content_union_label_dict']
49 |
50 | # vocabulary sizes
51 | field_vocab_size = data['field_vocab_size']
52 | content_label_vocab_size = data['content_label_vocab_size']
53 |
54 |
55 | X, Y = synch_random_shuffle_non_np(zip(field_encodings, content_encodings), label_encodings)
56 |
57 | train_X, train_Y, dev_X, dev_Y = split_train_dev(X, Y, train_percentage)
58 | train_X_field, train_X_content = zip(*train_X)
59 | train_X_field = list(train_X_field); train_X_content = list(train_X_content)
60 |
61 | # Free up the resources by deleting non required stuff
62 | del X, Y, field_encodings, content_encodings, train_X
63 |
64 | # print train_X_field, train_X_content, train_Y, dev_X, dev_Y
65 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | seq2seq library base module
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | from seq2seq.graph_module import GraphModule
23 |
24 | from seq2seq import contrib
25 | from seq2seq import data
26 | from seq2seq import decoders
27 | from seq2seq import encoders
28 | from seq2seq import global_vars
29 | from seq2seq import graph_utils
30 | from seq2seq import inference
31 | from seq2seq import losses
32 | from seq2seq import metrics
33 | from seq2seq import models
34 | from seq2seq import test
35 | from seq2seq import training
36 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/configurable.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Abstract base class for objects that are configurable using
16 | a parameters dictionary.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import abc
24 | import copy
25 | from pydoc import locate
26 |
27 | import six
28 | import yaml
29 |
30 | import tensorflow as tf
31 |
32 |
33 | class abstractstaticmethod(staticmethod): #pylint: disable=C0111,C0103
34 | """Decorates a method as abstract and static"""
35 | __slots__ = ()
36 |
37 | def __init__(self, function):
38 | super(abstractstaticmethod, self).__init__(function)
39 | function.__isabstractmethod__ = True
40 |
41 | __isabstractmethod__ = True
42 |
43 |
44 | def _create_from_dict(dict_, default_module, *args, **kwargs):
45 | """Creates a configurable class from a dictionary. The dictionary must have
46 | "class" and "params" properties. The class can be either fully qualified, or
47 | it is looked up in the modules passed via `default_module`.
48 | """
49 | class_ = locate(dict_["class"]) or getattr(default_module, dict_["class"])
50 | params = {}
51 | if "params" in dict_:
52 | params = dict_["params"]
53 | instance = class_(params, *args, **kwargs)
54 | return instance
55 |
56 |
57 | def _maybe_load_yaml(item):
58 | """Parses `item` only if it is a string. If `item` is a dictionary
59 | it is returned as-is.
60 | """
61 | if isinstance(item, six.string_types):
62 | return yaml.load(item)
63 | elif isinstance(item, dict):
64 | return item
65 | else:
66 | raise ValueError("Got {}, expected YAML string or dict", type(item))
67 |
68 |
69 | def _deep_merge_dict(dict_x, dict_y, path=None):
70 | """Recursively merges dict_y into dict_x.
71 | """
72 | if path is None: path = []
73 | for key in dict_y:
74 | if key in dict_x:
75 | if isinstance(dict_x[key], dict) and isinstance(dict_y[key], dict):
76 | _deep_merge_dict(dict_x[key], dict_y[key], path + [str(key)])
77 | elif dict_x[key] == dict_y[key]:
78 | pass # same leaf value
79 | else:
80 | dict_x[key] = dict_y[key]
81 | else:
82 | dict_x[key] = dict_y[key]
83 | return dict_x
84 |
85 |
86 | def _parse_params(params, default_params):
87 | """Parses parameter values to the types defined by the default parameters.
88 | Default parameters are used for missing values.
89 | """
90 | # Cast parameters to correct types
91 | if params is None:
92 | params = {}
93 | result = copy.deepcopy(default_params)
94 | for key, value in params.items():
95 | # If param is unknown, drop it to stay compatible with past versions
96 | if key not in default_params:
97 | raise ValueError("%s is not a valid model parameter" % key)
98 | # Param is a dictionary
99 | if isinstance(value, dict):
100 | default_dict = default_params[key]
101 | if not isinstance(default_dict, dict):
102 | raise ValueError("%s should not be a dictionary", key)
103 | if default_dict:
104 | value = _parse_params(value, default_dict)
105 | else:
106 | # If the default is an empty dict we do not typecheck it
107 | # and assume it's done downstream
108 | pass
109 | if value is None:
110 | continue
111 | if default_params[key] is None:
112 | result[key] = value
113 | else:
114 | result[key] = type(default_params[key])(value)
115 | return result
116 |
117 |
118 | @six.add_metaclass(abc.ABCMeta)
119 | class Configurable(object):
120 | """Interface for all classes that are configurable
121 | via a parameters dictionary.
122 |
123 | Args:
124 | params: A dictionary of parameters.
125 | mode: A value in tf.contrib.learn.ModeKeys
126 | """
127 |
128 | def __init__(self, params, mode):
129 | self._params = _parse_params(params, self.default_params())
130 | self._mode = mode
131 | self._print_params()
132 |
133 | def _print_params(self):
134 | """Logs parameter values"""
135 | classname = self.__class__.__name__
136 | tf.logging.info("Creating %s in mode=%s", classname, self._mode)
137 | tf.logging.info("\n%s", yaml.dump({classname: self._params}))
138 |
139 | @property
140 | def mode(self):
141 | """Returns a value in tf.contrib.learn.ModeKeys.
142 | """
143 | return self._mode
144 |
145 | @property
146 | def params(self):
147 | """Returns a dictionary of parsed parameters.
148 | """
149 | return self._params
150 |
151 | @abstractstaticmethod
152 | def default_params():
153 | """Returns a dictionary of default parameters. The default parameters
154 | are used to define the expected type of passed parameters. Missing
155 | parameter values are replaced with the defaults returned by this method.
156 | """
157 | raise NotImplementedError
158 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/contrib/experiment.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """A patched tf.learn Experiment class to handle GPU memory
16 | sharing issues.
17 | """
18 |
19 | import tensorflow as tf
20 |
21 | class Experiment(tf.contrib.learn.Experiment):
22 | """A patched tf.learn Experiment class to handle GPU memory
23 | sharing issues."""
24 |
25 | def __init__(self, train_steps_per_iteration=None, *args, **kwargs):
26 | super(Experiment, self).__init__(*args, **kwargs)
27 | self._train_steps_per_iteration = train_steps_per_iteration
28 |
29 | def _has_training_stopped(self, eval_result):
30 | """Determines whether the training has stopped."""
31 | if not eval_result:
32 | return False
33 |
34 | global_step = eval_result.get(tf.GraphKeys.GLOBAL_STEP)
35 | return global_step and self._train_steps and (
36 | global_step >= self._train_steps)
37 |
38 | def continuous_train_and_eval(self,
39 | continuous_eval_predicate_fn=None):
40 | """Interleaves training and evaluation.
41 |
42 | The frequency of evaluation is controlled by the `train_steps_per_iteration`
43 | (via constructor). The model will be first trained for
44 | `train_steps_per_iteration`, and then be evaluated in turns.
45 |
46 | This differs from `train_and_evaluate` as follows:
47 | 1. The procedure will have train and evaluation in turns. The model
48 | will be trained for a number of steps (usuallly smaller than `train_steps`
49 | if provided) and then be evaluated. `train_and_evaluate` will train the
50 | model for `train_steps` (no small training iteraions).
51 |
52 | 2. Due to the different approach this schedule takes, it leads to two
53 | differences in resource control. First, the resources (e.g., memory) used
54 | by training will be released before evaluation (`train_and_evaluate` takes
55 | double resources). Second, more checkpoints will be saved as a checkpoint
56 | is generated at the end of each small trainning iteration.
57 |
58 | Args:
59 | continuous_eval_predicate_fn: A predicate function determining whether to
60 | continue after each iteration. `predicate_fn` takes the evaluation
61 | results as its arguments. At the beginning of evaluation, the passed
62 | eval results will be None so it's expected that the predicate function
63 | handles that gracefully. When `predicate_fn` is not specified, this will
64 | run in an infinite loop or exit when global_step reaches `train_steps`.
65 |
66 | Returns:
67 | A tuple of the result of the `evaluate` call to the `Estimator` and the
68 | export results using the specified `ExportStrategy`.
69 |
70 | Raises:
71 | ValueError: if `continuous_eval_predicate_fn` is neither None nor
72 | callable.
73 | """
74 |
75 | if (continuous_eval_predicate_fn is not None and
76 | not callable(continuous_eval_predicate_fn)):
77 | raise ValueError(
78 | "`continuous_eval_predicate_fn` must be a callable, or None.")
79 |
80 | eval_result = None
81 |
82 | # Set the default value for train_steps_per_iteration, which will be
83 | # overriden by other settings.
84 | train_steps_per_iteration = 1000
85 | if self._train_steps_per_iteration is not None:
86 | train_steps_per_iteration = self._train_steps_per_iteration
87 | elif self._train_steps is not None:
88 | # train_steps_per_iteration = int(self._train_steps / 10)
89 | train_steps_per_iteration = min(
90 | self._min_eval_frequency, self._train_steps)
91 |
92 | while (not continuous_eval_predicate_fn or
93 | continuous_eval_predicate_fn(eval_result)):
94 |
95 | if self._has_training_stopped(eval_result):
96 | # Exits once max steps of training is satisfied.
97 | tf.logging.info("Stop training model as max steps reached")
98 | break
99 |
100 | tf.logging.info("Training model for %s steps", train_steps_per_iteration)
101 | self._estimator.fit(
102 | input_fn=self._train_input_fn,
103 | steps=train_steps_per_iteration,
104 | monitors=self._train_monitors)
105 |
106 | tf.logging.info("Evaluating model now.")
107 | eval_result = self._estimator.evaluate(
108 | input_fn=self._eval_input_fn,
109 | steps=self._eval_steps,
110 | metrics=self._eval_metrics,
111 | name="one_pass",
112 | hooks=self._eval_hooks)
113 |
114 | return eval_result, self._maybe_export(eval_result)
115 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/contrib/rnn_cell.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Collection of RNN Cells
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | from __future__ import unicode_literals
21 |
22 | import sys
23 | import inspect
24 |
25 | import tensorflow as tf
26 | from tensorflow.python.ops import array_ops # pylint: disable=E0611
27 | from tensorflow.python.util import nest # pylint: disable=E0611
28 | from tensorflow.contrib.rnn import MultiRNNCell # pylint: disable=E0611
29 |
30 | # Import all cell classes from Tensorflow
31 | TF_CELL_CLASSES = [
32 | x for x in tf.contrib.rnn.__dict__.values()
33 | if inspect.isclass(x) and issubclass(x, tf.contrib.rnn.RNNCell)
34 | ]
35 | for cell_class in TF_CELL_CLASSES:
36 | setattr(sys.modules[__name__], cell_class.__name__, cell_class)
37 |
38 |
39 | class ExtendedMultiRNNCell(MultiRNNCell):
40 | """Extends the Tensorflow MultiRNNCell with residual connections"""
41 |
42 | def __init__(self,
43 | cells,
44 | residual_connections=False,
45 | residual_combiner="add",
46 | residual_dense=False):
47 | """Create a RNN cell composed sequentially of a number of RNNCells.
48 |
49 | Args:
50 | cells: list of RNNCells that will be composed in this order.
51 | state_is_tuple: If True, accepted and returned states are n-tuples, where
52 | `n = len(cells)`. If False, the states are all
53 | concatenated along the column axis. This latter behavior will soon be
54 | deprecated.
55 | residual_connections: If true, add residual connections between all cells.
56 | This requires all cells to have the same output_size. Also, iff the
57 | input size is not equal to the cell output size, a linear transform
58 | is added before the first layer.
59 | residual_combiner: One of "add" or "concat". To create inputs for layer
60 | t+1 either "add" the inputs from the prev layer or concat them.
61 | residual_dense: Densely connect each layer to all other layers
62 |
63 | Raises:
64 | ValueError: if cells is empty (not allowed), or at least one of the cells
65 | returns a state tuple but the flag `state_is_tuple` is `False`.
66 | """
67 | super(ExtendedMultiRNNCell, self).__init__(cells, state_is_tuple=True)
68 | assert residual_combiner in ["add", "concat", "mean"]
69 |
70 | self._residual_connections = residual_connections
71 | self._residual_combiner = residual_combiner
72 | self._residual_dense = residual_dense
73 |
74 | def __call__(self, inputs, state, scope=None):
75 | """Run this multi-layer cell on inputs, starting from state."""
76 | if not self._residual_connections:
77 | return super(ExtendedMultiRNNCell, self).__call__(
78 | inputs, state, (scope or "extended_multi_rnn_cell"))
79 |
80 | with tf.variable_scope(scope or "extended_multi_rnn_cell"):
81 | # Adding Residual connections are only possible when input and output
82 | # sizes are equal. Optionally transform the initial inputs to
83 | # `cell[0].output_size`
84 | if self._cells[0].output_size != inputs.get_shape().as_list()[1] and \
85 | (self._residual_combiner in ["add", "mean"]):
86 | inputs = tf.contrib.layers.fully_connected(
87 | inputs=inputs,
88 | num_outputs=self._cells[0].output_size,
89 | activation_fn=None,
90 | scope="input_transform")
91 |
92 | # Iterate through all layers (code from MultiRNNCell)
93 | cur_inp = inputs
94 | prev_inputs = [cur_inp]
95 | new_states = []
96 | for i, cell in enumerate(self._cells):
97 | with tf.variable_scope("cell_%d" % i):
98 | if not nest.is_sequence(state):
99 | raise ValueError(
100 | "Expected state to be a tuple of length %d, but received: %s" %
101 | (len(self.state_size), state))
102 | cur_state = state[i]
103 | next_input, new_state = cell(cur_inp, cur_state)
104 |
105 | # Either combine all previous inputs or only the current input
106 | input_to_combine = prev_inputs[-1:]
107 | if self._residual_dense:
108 | input_to_combine = prev_inputs
109 |
110 | # Add Residual connection
111 | if self._residual_combiner == "add":
112 | next_input = next_input + sum(input_to_combine)
113 | if self._residual_combiner == "mean":
114 | combined_mean = tf.reduce_mean(tf.stack(input_to_combine), 0)
115 | next_input = next_input + combined_mean
116 | elif self._residual_combiner == "concat":
117 | next_input = tf.concat([next_input] + input_to_combine, 1)
118 | cur_inp = next_input
119 | prev_inputs.append(cur_inp)
120 |
121 | new_states.append(new_state)
122 | new_states = (tuple(new_states)
123 | if self._state_is_tuple else array_ops.concat(new_states, 1))
124 | return cur_inp, new_states
125 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/contrib/seq2seq/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Collection of input-related utlities.
15 | """
16 |
17 | from seq2seq.data import input_pipeline
18 | from seq2seq.data import parallel_data_provider
19 | from seq2seq.data import postproc
20 | from seq2seq.data import split_tokens_decoder
21 | from seq2seq.data import vocab
22 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/data/parallel_data_provider.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A Data Provder that reads parallel (aligned) data.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | from __future__ import unicode_literals
21 |
22 | import numpy as np
23 |
24 | import tensorflow as tf
25 | from tensorflow.contrib.slim.python.slim.data import data_provider
26 | from tensorflow.contrib.slim.python.slim.data import parallel_reader
27 |
28 | from seq2seq.data import split_tokens_decoder
29 |
30 |
31 | def make_parallel_data_provider(data_sources_source,
32 | data_sources_target,
33 | reader=tf.TextLineReader,
34 | num_samples=None,
35 | source_delimiter=" ",
36 | target_delimiter=" ",
37 | **kwargs):
38 | """Creates a DataProvider that reads parallel text data.
39 |
40 | Args:
41 | data_sources_source: A list of data sources for the source text files.
42 | data_sources_target: A list of data sources for the target text files.
43 | Can be None for inference mode.
44 | num_samples: Optional, number of records in the dataset
45 | delimiter: Split tokens in the data on this delimiter. Defaults to space.
46 | kwargs: Additional arguments (shuffle, num_epochs, etc) that are passed
47 | to the data provider
48 |
49 | Returns:
50 | A DataProvider instance
51 | """
52 |
53 | decoder_source = split_tokens_decoder.SplitTokensDecoder(
54 | tokens_feature_name="source_tokens",
55 | length_feature_name="source_len",
56 | append_token="SEQUENCE_END",
57 | delimiter=source_delimiter)
58 |
59 | dataset_source = tf.contrib.slim.dataset.Dataset(
60 | data_sources=data_sources_source,
61 | reader=reader,
62 | decoder=decoder_source,
63 | num_samples=num_samples,
64 | items_to_descriptions={})
65 |
66 | dataset_target = None
67 | if data_sources_target is not None:
68 | decoder_target = split_tokens_decoder.SplitTokensDecoder(
69 | tokens_feature_name="target_tokens",
70 | length_feature_name="target_len",
71 | prepend_token="SEQUENCE_START",
72 | append_token="SEQUENCE_END",
73 | delimiter=target_delimiter)
74 |
75 | dataset_target = tf.contrib.slim.dataset.Dataset(
76 | data_sources=data_sources_target,
77 | reader=reader,
78 | decoder=decoder_target,
79 | num_samples=num_samples,
80 | items_to_descriptions={})
81 |
82 | return ParallelDataProvider(
83 | dataset1=dataset_source, dataset2=dataset_target, **kwargs)
84 |
85 |
86 | class ParallelDataProvider(data_provider.DataProvider):
87 | """Creates a ParallelDataProvider. This data provider reads two datasets
88 | in parallel, keeping them aligned.
89 |
90 | Args:
91 | dataset1: The first dataset. An instance of the Dataset class.
92 | dataset2: The second dataset. An instance of the Dataset class.
93 | Can be None. If None, only `dataset1` is read.
94 | num_readers: The number of parallel readers to use.
95 | shuffle: Whether to shuffle the data sources and common queue when
96 | reading.
97 | num_epochs: The number of times each data source is read. If left as None,
98 | the data will be cycled through indefinitely.
99 | common_queue_capacity: The capacity of the common queue.
100 | common_queue_min: The minimum number of elements in the common queue after
101 | a dequeue.
102 | seed: The seed to use if shuffling.
103 | """
104 |
105 | def __init__(self,
106 | dataset1,
107 | dataset2,
108 | shuffle=True,
109 | num_epochs=None,
110 | common_queue_capacity=4096,
111 | common_queue_min=1024,
112 | seed=None):
113 |
114 | if seed is None:
115 | seed = np.random.randint(10e8)
116 |
117 | _, data_source = parallel_reader.parallel_read(
118 | dataset1.data_sources,
119 | reader_class=dataset1.reader,
120 | num_epochs=num_epochs,
121 | num_readers=1,
122 | shuffle=False,
123 | capacity=common_queue_capacity,
124 | min_after_dequeue=common_queue_min,
125 | seed=seed)
126 |
127 | data_target = ""
128 | if dataset2 is not None:
129 | _, data_target = parallel_reader.parallel_read(
130 | dataset2.data_sources,
131 | reader_class=dataset2.reader,
132 | num_epochs=num_epochs,
133 | num_readers=1,
134 | shuffle=False,
135 | capacity=common_queue_capacity,
136 | min_after_dequeue=common_queue_min,
137 | seed=seed)
138 |
139 | # Optionally shuffle the data
140 | if shuffle:
141 | shuffle_queue = tf.RandomShuffleQueue(
142 | capacity=common_queue_capacity,
143 | min_after_dequeue=common_queue_min,
144 | dtypes=[tf.string, tf.string],
145 | seed=seed)
146 | enqueue_ops = []
147 | enqueue_ops.append(shuffle_queue.enqueue([data_source, data_target]))
148 | tf.train.add_queue_runner(
149 | tf.train.QueueRunner(shuffle_queue, enqueue_ops))
150 | data_source, data_target = shuffle_queue.dequeue()
151 |
152 | # Decode source items
153 | items = dataset1.decoder.list_items()
154 | tensors = dataset1.decoder.decode(data_source, items)
155 |
156 | if dataset2 is not None:
157 | # Decode target items
158 | items2 = dataset2.decoder.list_items()
159 | tensors2 = dataset2.decoder.decode(data_target, items2)
160 |
161 | # Merge items and results
162 | items = items + items2
163 | tensors = tensors + tensors2
164 |
165 | super(ParallelDataProvider, self).__init__(
166 | items_to_tensors=dict(zip(items, tensors)),
167 | num_samples=dataset1.num_samples)
168 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/data/postproc.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """
17 | A collection of commonly used post-processing functions.
18 | """
19 |
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | from __future__ import unicode_literals
24 |
25 | def strip_bpe(text):
26 | """Deodes text that was processed using BPE from
27 | https://github.com/rsennrich/subword-nmt"""
28 | return text.replace("@@ ", "").strip()
29 |
30 | def decode_sentencepiece(text):
31 | """Decodes text that uses https://github.com/google/sentencepiece encoding.
32 | Assumes that pieces are separated by a space"""
33 | return "".join(text.split(" ")).replace("▁", " ").strip()
34 |
35 | def slice_text(text,
36 | eos_token="SEQUENCE_END",
37 | sos_token="SEQUENCE_START"):
38 | """Slices text from SEQUENCE_START to SEQUENCE_END, not including
39 | these special tokens.
40 | """
41 | eos_index = text.find(eos_token)
42 | text = text[:eos_index] if eos_index > -1 else text
43 | sos_index = text.find(sos_token)
44 | text = text[sos_index+len(sos_token):] if sos_index > -1 else text
45 | return text.strip()
46 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/data/sequence_example_decoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A decoder for tf.SequenceExample"""
15 |
16 | import tensorflow as tf
17 | from tensorflow.contrib.slim.python.slim.data import data_decoder
18 |
19 |
20 | class TFSEquenceExampleDecoder(data_decoder.DataDecoder):
21 | """A decoder for TensorFlow Examples.
22 | Decoding Example proto buffers is comprised of two stages: (1) Example parsing
23 | and (2) tensor manipulation.
24 | In the first stage, the tf.parse_example function is called with a list of
25 | FixedLenFeatures and SparseLenFeatures. These instances tell TF how to parse
26 | the example. The output of this stage is a set of tensors.
27 | In the second stage, the resulting tensors are manipulated to provide the
28 | requested 'item' tensors.
29 | To perform this decoding operation, an ExampleDecoder is given a list of
30 | ItemHandlers. Each ItemHandler indicates the set of features for stage 1 and
31 | contains the instructions for post_processing its tensors for stage 2.
32 | """
33 |
34 | def __init__(self, context_keys_to_features, sequence_keys_to_features,
35 | items_to_handlers):
36 | """Constructs the decoder.
37 | Args:
38 | keys_to_features: a dictionary from TF-Example keys to either
39 | tf.VarLenFeature or tf.FixedLenFeature instances. See tensorflow's
40 | parsing_ops.py.
41 | items_to_handlers: a dictionary from items (strings) to ItemHandler
42 | instances. Note that the ItemHandler's are provided the keys that they
43 | use to return the final item Tensors.
44 | """
45 | self._context_keys_to_features = context_keys_to_features
46 | self._sequence_keys_to_features = sequence_keys_to_features
47 | self._items_to_handlers = items_to_handlers
48 |
49 | def list_items(self):
50 | """See base class."""
51 | return list(self._items_to_handlers.keys())
52 |
53 | def decode(self, serialized_example, items=None):
54 | """Decodes the given serialized TF-example.
55 | Args:
56 | serialized_example: a serialized TF-example tensor.
57 | items: the list of items to decode. These must be a subset of the item
58 | keys in self._items_to_handlers. If `items` is left as None, then all
59 | of the items in self._items_to_handlers are decoded.
60 | Returns:
61 | the decoded items, a list of tensor.
62 | """
63 | context, sequence = tf.parse_single_sequence_example(
64 | serialized_example, self._context_keys_to_features,
65 | self._sequence_keys_to_features)
66 |
67 | # Merge context and sequence features
68 | example = {}
69 | example.update(context)
70 | example.update(sequence)
71 |
72 | all_features = {}
73 | all_features.update(self._context_keys_to_features)
74 | all_features.update(self._sequence_keys_to_features)
75 |
76 | # Reshape non-sparse elements just once:
77 | for k, value in all_features.items():
78 | if isinstance(value, tf.FixedLenFeature):
79 | example[k] = tf.reshape(example[k], value.shape)
80 |
81 | if not items:
82 | items = self._items_to_handlers.keys()
83 |
84 | outputs = []
85 | for item in items:
86 | handler = self._items_to_handlers[item]
87 | keys_to_tensors = {key: example[key] for key in handler.keys}
88 | outputs.append(handler.tensors_to_item(keys_to_tensors))
89 | return outputs
90 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/data/split_tokens_decoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A decoder that splits a string into tokens and returns the
15 | individual tokens and the length.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import tensorflow as tf
24 | from tensorflow.contrib.slim.python.slim.data import data_decoder
25 |
26 |
27 | class SplitTokensDecoder(data_decoder.DataDecoder):
28 | """A DataProvider that splits a string tensor into individual tokens and
29 | returns the tokens and the length.
30 | Optionally prepends or appends special tokens.
31 |
32 | Args:
33 | delimiter: Delimiter to split on. Must be a single character.
34 | tokens_feature_name: A descriptive feature name for the token values
35 | length_feature_name: A descriptive feature name for the length value
36 | """
37 |
38 | def __init__(self,
39 | delimiter=" ",
40 | tokens_feature_name="tokens",
41 | length_feature_name="length",
42 | prepend_token=None,
43 | append_token=None):
44 | self.delimiter = delimiter
45 | self.tokens_feature_name = tokens_feature_name
46 | self.length_feature_name = length_feature_name
47 | self.prepend_token = prepend_token
48 | self.append_token = append_token
49 |
50 | def decode(self, data, items):
51 | decoded_items = {}
52 |
53 | # Split tokens
54 | tokens = tf.string_split([data], delimiter=self.delimiter).values
55 |
56 | # Optionally prepend a special token
57 | if self.prepend_token is not None:
58 | tokens = tf.concat([[self.prepend_token], tokens], 0)
59 |
60 | # Optionally append a special token
61 | if self.append_token is not None:
62 | tokens = tf.concat([tokens, [self.append_token]], 0)
63 |
64 | decoded_items[self.length_feature_name] = tf.size(tokens)
65 | decoded_items[self.tokens_feature_name] = tokens
66 | return [decoded_items[_] for _ in items]
67 |
68 | def list_items(self):
69 | return [self.tokens_feature_name, self.length_feature_name]
70 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/data/vocab.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Vocabulary related functions.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import collections
22 | import tensorflow as tf
23 | from tensorflow import gfile
24 |
25 | SpecialVocab = collections.namedtuple("SpecialVocab",
26 | ["UNK", "SEQUENCE_START", "SEQUENCE_END"])
27 |
28 |
29 | class VocabInfo(
30 | collections.namedtuple("VocbabInfo",
31 | ["path", "vocab_size", "special_vocab"])):
32 | """Convenience structure for vocabulary information.
33 | """
34 |
35 | @property
36 | def total_size(self):
37 | """Returns size the the base vocabulary plus the size of extra vocabulary"""
38 | return self.vocab_size + len(self.special_vocab)
39 |
40 |
41 | def get_vocab_info(vocab_path):
42 | """Creates a `VocabInfo` instance that contains the vocabulary size and
43 | the special vocabulary for the given file.
44 |
45 | Args:
46 | vocab_path: Path to a vocabulary file with one word per line.
47 |
48 | Returns:
49 | A VocabInfo tuple.
50 | """
51 | with gfile.GFile(vocab_path) as file:
52 | vocab_size = sum(1 for _ in file)
53 | special_vocab = get_special_vocab(vocab_size)
54 | return VocabInfo(vocab_path, vocab_size, special_vocab)
55 |
56 |
57 | def get_special_vocab(vocabulary_size):
58 | """Returns the `SpecialVocab` instance for a given vocabulary size.
59 | """
60 | return SpecialVocab(*range(vocabulary_size, vocabulary_size + 3))
61 |
62 |
63 | def create_vocabulary_lookup_table(filename, default_value=None):
64 | """Creates a lookup table for a vocabulary file.
65 |
66 | Args:
67 | filename: Path to a vocabulary file containg one word per line.
68 | Each word is mapped to its line number.
69 | default_value: UNK tokens will be mapped to this id.
70 | If None, UNK tokens will be mapped to [vocab_size]
71 |
72 | Returns:
73 | A tuple (vocab_to_id_table, id_to_vocab_table,
74 | word_to_count_table, vocab_size). The vocab size does not include
75 | the UNK token.
76 | """
77 | if not gfile.Exists(filename):
78 | raise ValueError("File does not exist: {}".format(filename))
79 |
80 | # Load vocabulary into memory
81 | with gfile.GFile(filename) as file:
82 | vocab = list(line.strip("\n") for line in file)
83 | vocab_size = len(vocab)
84 |
85 | has_counts = len(vocab[0].split("\t")) == 2
86 | if has_counts:
87 | vocab, counts = zip(*[_.split("\t") for _ in vocab])
88 | counts = [float(_) for _ in counts]
89 | vocab = list(vocab)
90 | else:
91 | counts = [-1. for _ in vocab]
92 |
93 | # Add special vocabulary items
94 | special_vocab = get_special_vocab(vocab_size)
95 | vocab += list(special_vocab._fields)
96 | vocab_size += len(special_vocab)
97 | counts += [-1. for _ in list(special_vocab._fields)]
98 |
99 | if default_value is None:
100 | default_value = special_vocab.UNK
101 |
102 | tf.logging.info("Creating vocabulary lookup table of size %d", vocab_size)
103 |
104 | vocab_tensor = tf.constant(vocab)
105 | count_tensor = tf.constant(counts, dtype=tf.float32)
106 | vocab_idx_tensor = tf.range(vocab_size, dtype=tf.int64)
107 |
108 | # Create ID -> word mapping
109 | id_to_vocab_init = tf.contrib.lookup.KeyValueTensorInitializer(
110 | vocab_idx_tensor, vocab_tensor, tf.int64, tf.string)
111 | id_to_vocab_table = tf.contrib.lookup.HashTable(id_to_vocab_init, "UNK")
112 |
113 | # Create word -> id mapping
114 | vocab_to_id_init = tf.contrib.lookup.KeyValueTensorInitializer(
115 | vocab_tensor, vocab_idx_tensor, tf.string, tf.int64)
116 | vocab_to_id_table = tf.contrib.lookup.HashTable(vocab_to_id_init,
117 | default_value)
118 |
119 | # Create word -> count mapping
120 | word_to_count_init = tf.contrib.lookup.KeyValueTensorInitializer(
121 | vocab_tensor, count_tensor, tf.string, tf.float32)
122 | word_to_count_table = tf.contrib.lookup.HashTable(word_to_count_init, -1)
123 |
124 | return vocab_to_id_table, id_to_vocab_table, word_to_count_table, vocab_size
125 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/decoders/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Collection of decoders and decoder-related functions.
15 | """
16 |
17 | from seq2seq.decoders.rnn_decoder import *
18 | from seq2seq.decoders.attention import *
19 | from seq2seq.decoders.basic_decoder import *
20 | from seq2seq.decoders.attention_decoder import *
21 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/decoders/attention.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Implementations of attention layers.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | from __future__ import unicode_literals
21 |
22 | import abc
23 | import six
24 |
25 | import tensorflow as tf
26 | from tensorflow.python.framework import function # pylint: disable=E0611
27 |
28 | from seq2seq.graph_module import GraphModule
29 | from seq2seq.configurable import Configurable
30 |
31 |
32 | @function.Defun(
33 | tf.float32,
34 | tf.float32,
35 | tf.float32,
36 | func_name="att_sum_bahdanau",
37 | noinline=True)
38 | def att_sum_bahdanau(v_att, keys, query):
39 | """Calculates a batch- and timweise dot product with a variable"""
40 | return tf.reduce_sum(v_att * tf.tanh(keys + tf.expand_dims(query, 1)), [2])
41 |
42 |
43 | @function.Defun(tf.float32, tf.float32, func_name="att_sum_dot", noinline=True)
44 | def att_sum_dot(keys, query):
45 | """Calculates a batch- and timweise dot product"""
46 | return tf.reduce_sum(keys * tf.expand_dims(query, 1), [2])
47 |
48 |
49 | @six.add_metaclass(abc.ABCMeta)
50 | class AttentionLayer(GraphModule, Configurable):
51 | """
52 | Attention layer according to https://arxiv.org/abs/1409.0473.
53 |
54 | Params:
55 | num_units: Number of units used in the attention layer
56 | """
57 |
58 | def __init__(self, params, mode, name="attention"):
59 | GraphModule.__init__(self, name)
60 | Configurable.__init__(self, params, mode)
61 |
62 | @staticmethod
63 | def default_params():
64 | return {"num_units": 128}
65 |
66 | @abc.abstractmethod
67 | def score_fn(self, keys, query):
68 | """Computes the attention score"""
69 | raise NotImplementedError
70 |
71 | def _build(self, query, keys, values, values_length):
72 | """Computes attention scores and outputs.
73 |
74 | Args:
75 | query: The query used to calculate attention scores.
76 | In seq2seq this is typically the current state of the decoder.
77 | A tensor of shape `[B, ...]`
78 | keys: The keys used to calculate attention scores. In seq2seq, these
79 | are typically the outputs of the encoder and equivalent to `values`.
80 | A tensor of shape `[B, T, ...]` where each element in the `T`
81 | dimension corresponds to the key for that value.
82 | values: The elements to compute attention over. In seq2seq, this is
83 | typically the sequence of encoder outputs.
84 | A tensor of shape `[B, T, input_dim]`.
85 | values_length: An int32 tensor of shape `[B]` defining the sequence
86 | length of the attention values.
87 |
88 | Returns:
89 | A tuple `(scores, context)`.
90 | `scores` is vector of length `T` where each element is the
91 | normalized "score" of the corresponding `inputs` element.
92 | `context` is the final attention layer output corresponding to
93 | the weighted inputs.
94 | A tensor fo shape `[B, input_dim]`.
95 | """
96 | values_depth = values.get_shape().as_list()[-1]
97 |
98 | # Fully connected layers to transform both keys and query
99 | # into a tensor with `num_units` units
100 | att_keys = tf.contrib.layers.fully_connected(
101 | inputs=keys,
102 | num_outputs=self.params["num_units"],
103 | activation_fn=None,
104 | scope="att_keys")
105 | att_query = tf.contrib.layers.fully_connected(
106 | inputs=query,
107 | num_outputs=self.params["num_units"],
108 | activation_fn=None,
109 | scope="att_query")
110 |
111 | scores = self.score_fn(att_keys, att_query)
112 |
113 | # Replace all scores for padded inputs with tf.float32.min
114 | num_scores = tf.shape(scores)[1]
115 | scores_mask = tf.sequence_mask(
116 | lengths=tf.to_int32(values_length),
117 | maxlen=tf.to_int32(num_scores),
118 | dtype=tf.float32)
119 | scores = scores * scores_mask + ((1.0 - scores_mask) * tf.float32.min)
120 |
121 | # Normalize the scores
122 | scores_normalized = tf.nn.softmax(scores, name="scores_normalized")
123 |
124 | # Calculate the weighted average of the attention inputs
125 | # according to the scores
126 | context = tf.expand_dims(scores_normalized, 2) * values
127 | context = tf.reduce_sum(context, 1, name="context")
128 | context.set_shape([None, values_depth])
129 |
130 |
131 | return (scores_normalized, context)
132 |
133 |
134 | class AttentionLayerDot(AttentionLayer):
135 | """An attention layer that calculates attention scores using
136 | a dot product.
137 | """
138 |
139 | def score_fn(self, keys, query):
140 | return att_sum_dot(keys, query)
141 |
142 |
143 | class AttentionLayerBahdanau(AttentionLayer):
144 | """An attention layer that calculates attention scores using
145 | a parameterized multiplication."""
146 |
147 | def score_fn(self, keys, query):
148 | v_att = tf.get_variable(
149 | "v_att", shape=[self.params["num_units"]], dtype=tf.float32)
150 | return att_sum_bahdanau(v_att, keys, query)
151 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/decoders/basic_decoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | A basic sequence decoder that performs a softmax based on the RNN state.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import tensorflow as tf
24 | from seq2seq.decoders.rnn_decoder import RNNDecoder, DecoderOutput
25 |
26 |
27 | class BasicDecoder(RNNDecoder):
28 | """Simple RNN decoder that performed a softmax operations on the cell output.
29 | """
30 |
31 | def __init__(self, params, mode, vocab_size, name="basic_decoder"):
32 | super(BasicDecoder, self).__init__(params, mode, name)
33 | self.vocab_size = vocab_size
34 |
35 | def compute_output(self, cell_output):
36 | """Computes the decoder outputs."""
37 | return tf.contrib.layers.fully_connected(
38 | inputs=cell_output, num_outputs=self.vocab_size, activation_fn=None)
39 |
40 | @property
41 | def output_size(self):
42 | return DecoderOutput(
43 | logits=self.vocab_size,
44 | predicted_ids=tf.TensorShape([]),
45 | cell_output=self.cell.output_size)
46 |
47 | @property
48 | def output_dtype(self):
49 | return DecoderOutput(
50 | logits=tf.float32, predicted_ids=tf.int32, cell_output=tf.float32)
51 |
52 | def initialize(self, name=None):
53 | finished, first_inputs = self.helper.initialize()
54 | return finished, first_inputs, self.initial_state
55 |
56 | def step(self, time_, inputs, state, name=None):
57 | cell_output, cell_state = self.cell(inputs, state)
58 | logits = self.compute_output(cell_output)
59 | sample_ids = self.helper.sample(
60 | time=time_, outputs=logits, state=cell_state)
61 | outputs = DecoderOutput(
62 | logits=logits, predicted_ids=sample_ids, cell_output=cell_output)
63 | finished, next_inputs, next_state = self.helper.next_inputs(
64 | time=time_, outputs=outputs, state=cell_state, sample_ids=sample_ids)
65 | return (outputs, next_state, next_inputs, finished)
66 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/decoders/rnn_decoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for sequence decoders.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import abc
24 | from collections import namedtuple
25 |
26 | import six
27 | import tensorflow as tf
28 | from tensorflow.python.util import nest # pylint: disable=E0611
29 |
30 | from seq2seq.graph_module import GraphModule
31 | from seq2seq.configurable import Configurable
32 | from seq2seq.contrib.seq2seq.decoder import Decoder, dynamic_decode
33 | from seq2seq.encoders.rnn_encoder import _default_rnn_cell_params
34 | from seq2seq.encoders.rnn_encoder import _toggle_dropout
35 | from seq2seq.training import utils as training_utils
36 |
37 |
38 | class DecoderOutput(
39 | namedtuple("DecoderOutput", ["logits", "predicted_ids", "cell_output"])):
40 | """Output of an RNN decoder.
41 |
42 | Note that we output both the logits and predictions because during
43 | dynamic decoding the predictions may not correspond to max(logits).
44 | For example, we may be sampling from the logits instead.
45 | """
46 | pass
47 |
48 |
49 | @six.add_metaclass(abc.ABCMeta)
50 | class RNNDecoder(Decoder, GraphModule, Configurable):
51 | """Base class for RNN decoders.
52 |
53 | Args:
54 | cell: An instance of ` tf.contrib.rnn.RNNCell`
55 | helper: An instance of `tf.contrib.seq2seq.Helper` to assist decoding
56 | initial_state: A tensor or tuple of tensors used as the initial cell
57 | state.
58 | name: A name for this module
59 | """
60 |
61 | def __init__(self, params, mode, name):
62 | GraphModule.__init__(self, name)
63 | Configurable.__init__(self, params, mode)
64 | self.params["rnn_cell"] = _toggle_dropout(self.params["rnn_cell"], mode)
65 | self.cell = training_utils.get_rnn_cell(**self.params["rnn_cell"])
66 | # Not initialized yet
67 | self.initial_state = None
68 | self.helper = None
69 |
70 | @abc.abstractmethod
71 | def initialize(self, name=None):
72 | raise NotImplementedError
73 |
74 | @abc.abstractmethod
75 | def step(self, name=None):
76 | raise NotImplementedError
77 |
78 | @property
79 | def batch_size(self):
80 | return tf.shape(nest.flatten([self.initial_state])[0])[0]
81 |
82 | def _setup(self, initial_state, helper):
83 | """Sets the initial state and helper for the decoder.
84 | """
85 | self.initial_state = initial_state
86 | self.helper = helper
87 |
88 | def finalize(self, outputs, final_state):
89 | """Applies final transformation to the decoder output once decoding is
90 | finished.
91 | """
92 | #pylint: disable=R0201
93 | return (outputs, final_state)
94 |
95 | @staticmethod
96 | def default_params():
97 | return {
98 | "max_decode_length": 100,
99 | "rnn_cell": _default_rnn_cell_params(),
100 | "init_scale": 0.04,
101 | }
102 |
103 | def _build(self, initial_state, helper):
104 | if not self.initial_state:
105 | self._setup(initial_state, helper)
106 |
107 | scope = tf.get_variable_scope()
108 | scope.set_initializer(tf.random_uniform_initializer(
109 | -self.params["init_scale"],
110 | self.params["init_scale"]))
111 |
112 | maximum_iterations = None
113 | if self.mode == tf.contrib.learn.ModeKeys.INFER:
114 | maximum_iterations = self.params["max_decode_length"]
115 |
116 | outputs, final_state = dynamic_decode(
117 | decoder=self,
118 | output_time_major=True,
119 | impute_finished=False,
120 | maximum_iterations=maximum_iterations)
121 | return self.finalize(outputs, final_state)
122 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/encoders/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Collection of encoders"""
15 |
16 | import seq2seq.encoders.encoder
17 | import seq2seq.encoders.rnn_encoder
18 |
19 | from seq2seq.encoders.rnn_encoder import *
20 | from seq2seq.encoders.image_encoder import *
21 | from seq2seq.encoders.pooling_encoder import PoolingEncoder
22 | from seq2seq.encoders.conv_encoder import ConvEncoder
23 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/encoders/conv_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | An encoder that pools over embeddings, as described in
16 | https://arxiv.org/abs/1611.02344.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | from pydoc import locate
24 |
25 | import tensorflow as tf
26 |
27 | from seq2seq.encoders.encoder import Encoder, EncoderOutput
28 | from seq2seq.encoders.pooling_encoder import _create_position_embedding
29 |
30 |
31 | class ConvEncoder(Encoder):
32 | """A deep convolutional encoder, as described in
33 | https://arxiv.org/abs/1611.02344. The encoder supports optional positions
34 | embeddings.
35 |
36 | Params:
37 | attention_cnn.units: Number of units in `cnn_a`. Same in each layer.
38 | attention_cnn.kernel_size: Kernel size for `cnn_a`.
39 | attention_cnn.layers: Number of layers in `cnn_a`.
40 | embedding_dropout_keep_prob: Dropout keep probability
41 | applied to the embeddings.
42 | output_cnn.units: Number of units in `cnn_c`. Same in each layer.
43 | output_cnn.kernel_size: Kernel size for `cnn_c`.
44 | output_cnn.layers: Number of layers in `cnn_c`.
45 | position_embeddings.enable: If true, add position embeddings to the
46 | inputs before pooling.
47 | position_embeddings.combiner_fn: Function used to combine the
48 | position embeddings with the inputs. For example, `tensorflow.add`.
49 | position_embeddings.num_positions: Size of the position embedding matrix.
50 | This should be set to the maximum sequence length of the inputs.
51 | """
52 |
53 | def __init__(self, params, mode, name="conv_encoder"):
54 | super(ConvEncoder, self).__init__(params, mode, name)
55 | self._combiner_fn = locate(self.params["position_embeddings.combiner_fn"])
56 |
57 | @staticmethod
58 | def default_params():
59 | return {
60 | "attention_cnn.units": 512,
61 | "attention_cnn.kernel_size": 3,
62 | "attention_cnn.layers": 15,
63 | "embedding_dropout_keep_prob": 0.8,
64 | "output_cnn.units": 256,
65 | "output_cnn.kernel_size": 3,
66 | "output_cnn.layers": 5,
67 | "position_embeddings.enable": True,
68 | "position_embeddings.combiner_fn": "tensorflow.multiply",
69 | "position_embeddings.num_positions": 100,
70 | }
71 |
72 | def encode(self, inputs, sequence_length):
73 | if self.params["position_embeddings.enable"]:
74 | positions_embed = _create_position_embedding(
75 | embedding_dim=inputs.get_shape().as_list()[-1],
76 | num_positions=self.params["position_embeddings.num_positions"],
77 | lengths=sequence_length,
78 | maxlen=tf.shape(inputs)[1])
79 | inputs = self._combiner_fn(inputs, positions_embed)
80 |
81 | # Apply dropout to embeddings
82 | inputs = tf.contrib.layers.dropout(
83 | inputs=inputs,
84 | keep_prob=self.params["embedding_dropout_keep_prob"],
85 | is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)
86 |
87 | with tf.variable_scope("cnn_a"):
88 | cnn_a_output = inputs
89 | for layer_idx in range(self.params["attention_cnn.layers"]):
90 | next_layer = tf.contrib.layers.conv2d(
91 | inputs=cnn_a_output,
92 | num_outputs=self.params["attention_cnn.units"],
93 | kernel_size=self.params["attention_cnn.kernel_size"],
94 | padding="SAME",
95 | activation_fn=None)
96 | # Add a residual connection, except for the first layer
97 | if layer_idx > 0:
98 | next_layer += cnn_a_output
99 | cnn_a_output = tf.tanh(next_layer)
100 |
101 | with tf.variable_scope("cnn_c"):
102 | cnn_c_output = inputs
103 | for layer_idx in range(self.params["output_cnn.layers"]):
104 | next_layer = tf.contrib.layers.conv2d(
105 | inputs=cnn_c_output,
106 | num_outputs=self.params["output_cnn.units"],
107 | kernel_size=self.params["output_cnn.kernel_size"],
108 | padding="SAME",
109 | activation_fn=None)
110 | # Add a residual connection, except for the first layer
111 | if layer_idx > 0:
112 | next_layer += cnn_c_output
113 | cnn_c_output = tf.tanh(next_layer)
114 |
115 | final_state = tf.reduce_mean(cnn_c_output, 1)
116 |
117 | return EncoderOutput(
118 | outputs=cnn_a_output,
119 | final_state=final_state,
120 | attention_values=cnn_c_output,
121 | attention_values_length=sequence_length)
122 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/encoders/encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Abstract base class for encoders.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import abc
23 | from collections import namedtuple
24 |
25 | import six
26 |
27 | from seq2seq.configurable import Configurable
28 | from seq2seq.graph_module import GraphModule
29 |
30 | EncoderOutput = namedtuple(
31 | "EncoderOutput",
32 | "outputs final_state attention_values attention_values_length")
33 |
34 |
35 | @six.add_metaclass(abc.ABCMeta)
36 | class Encoder(GraphModule, Configurable):
37 | """Abstract encoder class. All encoders should inherit from this.
38 |
39 | Args:
40 | params: A dictionary of hyperparameters for the encoder.
41 | name: A variable scope for the encoder graph.
42 | """
43 |
44 | def __init__(self, params, mode, name):
45 | GraphModule.__init__(self, name)
46 | Configurable.__init__(self, params, mode)
47 |
48 | def _build(self, inputs, *args, **kwargs):
49 | return self.encode(inputs, *args, **kwargs)
50 |
51 | @abc.abstractmethod
52 | def encode(self, *args, **kwargs):
53 | """
54 | Encodes an input sequence.
55 |
56 | Args:
57 | inputs: The inputs to encode. A float32 tensor of shape [B, T, ...].
58 | sequence_length: The length of each input. An int32 tensor of shape [T].
59 |
60 | Returns:
61 | An `EncoderOutput` tuple containing the outputs and final state.
62 | """
63 | raise NotImplementedError
64 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/encoders/image_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Image encoder classes
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import tensorflow as tf
23 | from tensorflow.contrib.slim.python.slim.nets.inception_v3 \
24 | import inception_v3_base
25 |
26 | from seq2seq.encoders.encoder import Encoder, EncoderOutput
27 |
28 |
29 | class InceptionV3Encoder(Encoder):
30 | """
31 | A unidirectional RNN encoder. Stacking should be performed as
32 | part of the cell.
33 |
34 | Params:
35 | resize_height: Resize the image to this height before feeding it
36 | into the convolutional network.
37 | resize_width: Resize the image to this width before feeding it
38 | into the convolutional network.
39 | """
40 |
41 | def __init__(self, params, mode, name="image_encoder"):
42 | super(InceptionV3Encoder, self).__init__(params, mode, name)
43 |
44 | @staticmethod
45 | def default_params():
46 | return {
47 | "resize_height": 299,
48 | "resize_width": 299,
49 | }
50 |
51 | def encode(self, inputs):
52 | inputs = tf.image.resize_images(
53 | images=inputs,
54 | size=[self.params["resize_height"], self.params["resize_width"]],
55 | method=tf.image.ResizeMethod.BILINEAR)
56 |
57 | outputs, _ = inception_v3_base(tf.to_float(inputs))
58 | output_shape = outputs.get_shape() #pylint: disable=E1101
59 | shape_list = output_shape.as_list()
60 |
61 | # Take attentin over output elemnts in width and height dimension:
62 | # Shape: [B, W*H, ...]
63 | outputs_flat = tf.reshape(outputs, [shape_list[0], -1, shape_list[-1]])
64 |
65 | # Final state is the pooled output
66 | # Shape: [B, W*H*...]
67 | final_state = tf.contrib.slim.avg_pool2d(
68 | outputs, output_shape[1:3], padding="VALID", scope="pool")
69 | final_state = tf.contrib.slim.flatten(outputs, scope="flatten")
70 |
71 | return EncoderOutput(
72 | outputs=outputs_flat,
73 | final_state=final_state,
74 | attention_values=outputs_flat,
75 | attention_values_length=tf.shape(outputs_flat)[1])
76 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/encoders/pooling_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | An encoder that pools over embeddings, as described in
16 | https://arxiv.org/abs/1611.02344.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | from pydoc import locate
24 |
25 | import numpy as np
26 | import tensorflow as tf
27 |
28 | from seq2seq.encoders.encoder import Encoder, EncoderOutput
29 |
30 |
31 | def position_encoding(sentence_size, embedding_size):
32 | """
33 | Position Encoding described in section 4.1 of
34 | End-To-End Memory Networks (https://arxiv.org/abs/1503.08895).
35 |
36 | Args:
37 | sentence_size: length of the sentence
38 | embedding_size: dimensionality of the embeddings
39 |
40 | Returns:
41 | A numpy array of shape [sentence_size, embedding_size] containing
42 | the fixed position encodings for each sentence position.
43 | """
44 | encoding = np.ones((sentence_size, embedding_size), dtype=np.float32)
45 | ls = sentence_size + 1
46 | le = embedding_size + 1
47 | for k in range(1, le):
48 | for j in range(1, ls):
49 | encoding[j-1, k-1] = (1.0 - j/float(ls)) - (
50 | k / float(le)) * (1. - 2. * j/float(ls))
51 | return encoding
52 |
53 |
54 | def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen):
55 | """Creates position embeddings.
56 |
57 | Args:
58 | embedding_dim: Dimensionality of the embeddings. An integer.
59 | num_positions: The number of positions to be embedded. For example,
60 | if you have inputs of length up to 100, this should be 100. An integer.
61 | lengths: The lengths of the inputs to create position embeddings for.
62 | An int32 tensor of shape `[batch_size]`.
63 | maxlen: The maximum length of the input sequence to create position
64 | embeddings for. An int32 tensor.
65 |
66 | Returns:
67 | A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains
68 | embeddings for each position. All elements past `lengths` are zero.
69 | """
70 | # Create constant position encodings
71 | position_encodings = tf.constant(
72 | position_encoding(num_positions, embedding_dim),
73 | name="position_encoding")
74 |
75 | # Slice to size of current sequence
76 | pe_slice = position_encodings[:maxlen, :]
77 | # Replicate encodings for each element in the batch
78 | batch_size = tf.shape(lengths)[0]
79 | pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])
80 |
81 | # Mask out positions that are padded
82 | positions_mask = tf.sequence_mask(
83 | lengths=lengths, maxlen=maxlen, dtype=tf.float32)
84 | positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
85 |
86 | return positions_embed
87 |
88 | class PoolingEncoder(Encoder):
89 | """An encoder that pools over embeddings, as described in
90 | https://arxiv.org/abs/1611.02344. The encoder supports optional positions
91 | embeddings and a configurable pooling window.
92 |
93 | Params:
94 | dropout_keep_prob: Dropout keep probability applied to the embeddings.
95 | pooling_fn: The 1-d pooling function to use, e.g.
96 | `tensorflow.layers.average_pooling1d`.
97 | pool_size: The pooling window, passed as `pool_size` to
98 | the pooling function.
99 | strides: The stride during pooling, passed as `strides`
100 | the pooling function.
101 | position_embeddings.enable: If true, add position embeddings to the
102 | inputs before pooling.
103 | position_embeddings.combiner_fn: Function used to combine the
104 | position embeddings with the inputs. For example, `tensorflow.add`.
105 | position_embeddings.num_positions: Size of the position embedding matrix.
106 | This should be set to the maximum sequence length of the inputs.
107 | """
108 |
109 | def __init__(self, params, mode, name="pooling_encoder"):
110 | super(PoolingEncoder, self).__init__(params, mode, name)
111 | self._pooling_fn = locate(self.params["pooling_fn"])
112 | self._combiner_fn = locate(self.params["position_embeddings.combiner_fn"])
113 |
114 | @staticmethod
115 | def default_params():
116 | return {
117 | "dropout_keep_prob": 0.8,
118 | "pooling_fn": "tensorflow.layers.average_pooling1d",
119 | "pool_size": 5,
120 | "strides": 1,
121 | "position_embeddings.enable": True,
122 | "position_embeddings.combiner_fn": "tensorflow.multiply",
123 | "position_embeddings.num_positions": 100,
124 | }
125 |
126 | def encode(self, inputs, sequence_length):
127 | if self.params["position_embeddings.enable"]:
128 | positions_embed = _create_position_embedding(
129 | embedding_dim=inputs.get_shape().as_list()[-1],
130 | num_positions=self.params["position_embeddings.num_positions"],
131 | lengths=sequence_length,
132 | maxlen=tf.shape(inputs)[1])
133 | inputs = self._combiner_fn(inputs, positions_embed)
134 |
135 | # Apply dropout
136 | inputs = tf.contrib.layers.dropout(
137 | inputs=inputs,
138 | keep_prob=self.params["dropout_keep_prob"],
139 | is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)
140 |
141 | outputs = self._pooling_fn(
142 | inputs=inputs,
143 | pool_size=self.params["pool_size"],
144 | strides=self.params["strides"],
145 | padding="SAME")
146 |
147 | # Final state is the average representation of the pooled embeddings
148 | final_state = tf.reduce_mean(outputs, 1)
149 |
150 | return EncoderOutput(
151 | outputs=outputs,
152 | final_state=final_state,
153 | attention_values=inputs,
154 | attention_values_length=sequence_length)
155 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/encoders/rnn_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Collection of RNN encoders.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import copy
23 | import tensorflow as tf
24 | from tensorflow.contrib.rnn.python.ops import rnn
25 |
26 | from seq2seq.encoders.encoder import Encoder, EncoderOutput
27 | from seq2seq.training import utils as training_utils
28 |
29 |
30 | def _unpack_cell(cell):
31 | """Unpack the cells because the stack_bidirectional_dynamic_rnn
32 | expects a list of cells, one per layer."""
33 | if isinstance(cell, tf.contrib.rnn.MultiRNNCell):
34 | return cell._cells #pylint: disable=W0212
35 | else:
36 | return [cell]
37 |
38 |
39 | def _default_rnn_cell_params():
40 | """Creates default parameters used by multiple RNN encoders.
41 | """
42 | return {
43 | "cell_class": "BasicLSTMCell",
44 | "cell_params": {
45 | "num_units": 128
46 | },
47 | "dropout_input_keep_prob": 1.0,
48 | "dropout_output_keep_prob": 1.0,
49 | "num_layers": 1,
50 | "residual_connections": False,
51 | "residual_combiner": "add",
52 | "residual_dense": False
53 | }
54 |
55 |
56 | def _toggle_dropout(cell_params, mode):
57 | """Disables dropout during eval/inference mode
58 | """
59 | cell_params = copy.deepcopy(cell_params)
60 | if mode != tf.contrib.learn.ModeKeys.TRAIN:
61 | cell_params["dropout_input_keep_prob"] = 1.0
62 | cell_params["dropout_output_keep_prob"] = 1.0
63 | return cell_params
64 |
65 |
66 | class UnidirectionalRNNEncoder(Encoder):
67 | """
68 | A unidirectional RNN encoder. Stacking should be performed as
69 | part of the cell.
70 |
71 | Args:
72 | cell: An instance of tf.contrib.rnn.RNNCell
73 | name: A name for the encoder
74 | """
75 |
76 | def __init__(self, params, mode, name="forward_rnn_encoder"):
77 | super(UnidirectionalRNNEncoder, self).__init__(params, mode, name)
78 | self.params["rnn_cell"] = _toggle_dropout(self.params["rnn_cell"], mode)
79 |
80 | @staticmethod
81 | def default_params():
82 | return {
83 | "rnn_cell": _default_rnn_cell_params(),
84 | "init_scale": 0.04,
85 | }
86 |
87 | def encode(self, inputs, sequence_length, **kwargs):
88 | scope = tf.get_variable_scope()
89 | scope.set_initializer(tf.random_uniform_initializer(
90 | -self.params["init_scale"],
91 | self.params["init_scale"]))
92 |
93 | cell = training_utils.get_rnn_cell(**self.params["rnn_cell"])
94 | outputs, state = tf.nn.dynamic_rnn(
95 | cell=cell,
96 | inputs=inputs,
97 | sequence_length=sequence_length,
98 | dtype=tf.float32,
99 | **kwargs)
100 | return EncoderOutput(
101 | outputs=outputs,
102 | final_state=state,
103 | attention_values=outputs,
104 | attention_values_length=sequence_length)
105 |
106 |
107 | class BidirectionalRNNEncoder(Encoder):
108 | """
109 | A bidirectional RNN encoder. Uses the same cell for both the
110 | forward and backward RNN. Stacking should be performed as part of
111 | the cell.
112 |
113 | Args:
114 | cell: An instance of tf.contrib.rnn.RNNCell
115 | name: A name for the encoder
116 | """
117 |
118 | def __init__(self, params, mode, name="bidi_rnn_encoder"):
119 | super(BidirectionalRNNEncoder, self).__init__(params, mode, name)
120 | self.params["rnn_cell"] = _toggle_dropout(self.params["rnn_cell"], mode)
121 |
122 | @staticmethod
123 | def default_params():
124 | return {
125 | "rnn_cell": _default_rnn_cell_params(),
126 | "init_scale": 0.04,
127 | }
128 |
129 | def encode(self, inputs, sequence_length, **kwargs):
130 | scope = tf.get_variable_scope()
131 | scope.set_initializer(tf.random_uniform_initializer(
132 | -self.params["init_scale"],
133 | self.params["init_scale"]))
134 |
135 | cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
136 | cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
137 | outputs, states = tf.nn.bidirectional_dynamic_rnn(
138 | cell_fw=cell_fw,
139 | cell_bw=cell_bw,
140 | inputs=inputs,
141 | sequence_length=sequence_length,
142 | dtype=tf.float32,
143 | **kwargs)
144 |
145 | # Concatenate outputs and states of the forward and backward RNNs
146 | outputs_concat = tf.concat(outputs, 2)
147 |
148 | return EncoderOutput(
149 | outputs=outputs_concat,
150 | final_state=states,
151 | attention_values=outputs_concat,
152 | attention_values_length=sequence_length)
153 |
154 |
155 | class StackBidirectionalRNNEncoder(Encoder):
156 | """
157 | A stacked bidirectional RNN encoder. Uses the same cell for both the
158 | forward and backward RNN. Stacking should be performed as part of
159 | the cell.
160 |
161 | Args:
162 | cell: An instance of tf.contrib.rnn.RNNCell
163 | name: A name for the encoder
164 | """
165 |
166 | def __init__(self, params, mode, name="stacked_bidi_rnn_encoder"):
167 | super(StackBidirectionalRNNEncoder, self).__init__(params, mode, name)
168 | self.params["rnn_cell"] = _toggle_dropout(self.params["rnn_cell"], mode)
169 |
170 | @staticmethod
171 | def default_params():
172 | return {
173 | "rnn_cell": _default_rnn_cell_params(),
174 | "init_scale": 0.04,
175 | }
176 |
177 | def encode(self, inputs, sequence_length, **kwargs):
178 | scope = tf.get_variable_scope()
179 | scope.set_initializer(tf.random_uniform_initializer(
180 | -self.params["init_scale"],
181 | self.params["init_scale"]))
182 |
183 | cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
184 | cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
185 |
186 | cells_fw = _unpack_cell(cell_fw)
187 | cells_bw = _unpack_cell(cell_bw)
188 |
189 | result = rnn.stack_bidirectional_dynamic_rnn(
190 | cells_fw=cells_fw,
191 | cells_bw=cells_bw,
192 | inputs=inputs,
193 | dtype=tf.float32,
194 | sequence_length=sequence_length,
195 | **kwargs)
196 | outputs_concat, _output_state_fw, _output_state_bw = result
197 | final_state = (_output_state_fw, _output_state_bw)
198 | return EncoderOutput(
199 | outputs=outputs_concat,
200 | final_state=final_state,
201 | attention_values=outputs_concat,
202 | attention_values_length=sequence_length)
203 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/global_vars.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Collection of global variables.
16 | """
17 |
18 | SYNC_REPLICAS_OPTIMIZER = None
19 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/graph_module.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | All graph components that create Variables should inherit from this
16 | base class defined in this file.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import tensorflow as tf
24 |
25 |
26 | class GraphModule(object):
27 | """
28 | Convenience class that makes it easy to share variables.
29 | Each insance of this class creates its own set of variables, but
30 | each subsequent execution of an instance will re-use its variables.
31 |
32 | Graph components that define variables should inherit from this class
33 | and implement their logic in the `_build` method.
34 | """
35 |
36 | def __init__(self, name):
37 | """
38 | Initialize the module. Each subclass must call this constructor with a name.
39 |
40 | Args:
41 | name: Name of this module. Used for `tf.make_template`.
42 | """
43 | self.name = name
44 | self._template = tf.make_template(name, self._build, create_scope_now_=True)
45 | # Docstrings for the class should be the docstring for the _build method
46 | self.__doc__ = self._build.__doc__
47 | # pylint: disable=E1101
48 | self.__call__.__func__.__doc__ = self._build.__doc__
49 |
50 | def _build(self, *args, **kwargs):
51 | """Subclasses should implement their logic here.
52 | """
53 | raise NotImplementedError
54 |
55 | def __call__(self, *args, **kwargs):
56 | # pylint: disable=missing-docstring
57 | return self._template(*args, **kwargs)
58 |
59 | def variable_scope(self):
60 | """Returns the proper variable scope for this module.
61 | """
62 | return tf.variable_scope(self._template.variable_scope)
63 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/graph_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Miscellaneous utility function.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 |
24 | def templatemethod(name_):
25 | """This decorator wraps a method with `tf.make_template`. For example,
26 |
27 | @templatemethod
28 | def my_method():
29 | # Create variables
30 | """
31 |
32 | def template_decorator(func):
33 | """Inner decorator function"""
34 |
35 | def func_wrapper(*args, **kwargs):
36 | """Inner wrapper function"""
37 | templated_func = tf.make_template(name_, func)
38 | return templated_func(*args, **kwargs)
39 |
40 | return func_wrapper
41 |
42 | return template_decorator
43 |
44 |
45 | def add_dict_to_collection(dict_, collection_name):
46 | """Adds a dictionary to a graph collection.
47 |
48 | Args:
49 | dict_: A dictionary of string keys to tensor values
50 | collection_name: The name of the collection to add the dictionary to
51 | """
52 | key_collection = collection_name + "_keys"
53 | value_collection = collection_name + "_values"
54 | for key, value in dict_.items():
55 | tf.add_to_collection(key_collection, key)
56 | tf.add_to_collection(value_collection, value)
57 |
58 |
59 | def get_dict_from_collection(collection_name):
60 | """Gets a dictionary from a graph collection.
61 |
62 | Args:
63 | collection_name: A collection name to read a dictionary from
64 |
65 | Returns:
66 | A dictionary with string keys and tensor values
67 | """
68 | key_collection = collection_name + "_keys"
69 | value_collection = collection_name + "_values"
70 | keys = tf.get_collection(key_collection)
71 | values = tf.get_collection(value_collection)
72 | return dict(zip(keys, values))
73 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/inference/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Modules related to running model inference.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | from seq2seq.inference.inference import *
22 | import seq2seq.inference.beam_search
23 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/inference/inference.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Generates model predictions.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 | from seq2seq.training import utils as training_utils
24 |
25 |
26 | def create_inference_graph(model, input_pipeline, batch_size=32):
27 | """Creates a graph to perform inference.
28 |
29 | Args:
30 | task: An `InferenceTask` instance.
31 | input_pipeline: An instance of `InputPipeline` that defines
32 | how to read and parse data.
33 | batch_size: The batch size used for inference
34 |
35 | Returns:
36 | The return value of the model function, typically a tuple of
37 | (predictions, loss, train_op).
38 | """
39 |
40 | # TODO: This doesn't really belong here.
41 | # How to get rid of this?
42 | if hasattr(model, "use_beam_search"):
43 | if model.use_beam_search:
44 | tf.logging.info("Setting batch size to 1 for beam search.")
45 | batch_size = 1
46 |
47 | input_fn = training_utils.create_input_fn(
48 | pipeline=input_pipeline,
49 | batch_size=batch_size,
50 | allow_smaller_final_batch=True)
51 |
52 | # Build the graph
53 | features, labels = input_fn()
54 | return model(features=features, labels=labels, params=None)
55 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/losses.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Operations related to calculating sequence losses.
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 |
24 | def cross_entropy_sequence_loss(logits, targets, sequence_length):
25 | """Calculates the per-example cross-entropy loss for a sequence of logits and
26 | masks out all losses passed the sequence length.
27 |
28 | Args:
29 | logits: Logits of shape `[T, B, vocab_size]`
30 | targets: Target classes of shape `[T, B]`
31 | sequence_length: An int32 tensor of shape `[B]` corresponding
32 | to the length of each input
33 |
34 | Returns:
35 | A tensor of shape [T, B] that contains the loss per example, per time step.
36 | """
37 | with tf.name_scope("cross_entropy_sequence_loss"):
38 | losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
39 | logits=logits, labels=targets)
40 |
41 | # Mask out the losses we don't care about
42 | loss_mask = tf.sequence_mask(
43 | tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0]))
44 | losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0])
45 |
46 | return losses
47 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Collection of metric-related functions
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/metrics/bleu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """BLEU metric implementation.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import os
24 | import re
25 | import subprocess
26 | import tempfile
27 | import numpy as np
28 |
29 | from six.moves import urllib
30 | import tensorflow as tf
31 |
32 |
33 | def moses_multi_bleu(hypotheses, references, lowercase=False):
34 | """Calculate the bleu score for hypotheses and references
35 | using the MOSES ulti-bleu.perl script.
36 |
37 | Args:
38 | hypotheses: A numpy array of strings where each string is a single example.
39 | references: A numpy array of strings where each string is a single example.
40 | lowercase: If true, pass the "-lc" flag to the multi-bleu script
41 |
42 | Returns:
43 | The BLEU score as a float32 value.
44 | """
45 |
46 | if np.size(hypotheses) == 0:
47 | return np.float32(0.0)
48 |
49 | # Get MOSES multi-bleu script
50 | try:
51 | multi_bleu_path, _ = urllib.request.urlretrieve(
52 | "https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
53 | "master/scripts/generic/multi-bleu.perl")
54 | os.chmod(multi_bleu_path, 0o755)
55 | except: #pylint: disable=W0702
56 | tf.logging.info("Unable to fetch multi-bleu.perl script, using local.")
57 | metrics_dir = os.path.dirname(os.path.realpath(__file__))
58 | bin_dir = os.path.abspath(os.path.join(metrics_dir, "..", "..", "bin"))
59 | multi_bleu_path = os.path.join(bin_dir, "tools/multi-bleu.perl")
60 |
61 | # Dump hypotheses and references to tempfiles
62 | hypothesis_file = tempfile.NamedTemporaryFile()
63 | hypothesis_file.write("\n".join(hypotheses).encode("utf-8"))
64 | hypothesis_file.write(b"\n")
65 | hypothesis_file.flush()
66 | reference_file = tempfile.NamedTemporaryFile()
67 | reference_file.write("\n".join(references).encode("utf-8"))
68 | reference_file.write(b"\n")
69 | reference_file.flush()
70 |
71 | # Calculate BLEU using multi-bleu script
72 | with open(hypothesis_file.name, "r") as read_pred:
73 | bleu_cmd = [multi_bleu_path]
74 | if lowercase:
75 | bleu_cmd += ["-lc"]
76 | bleu_cmd += [reference_file.name]
77 | try:
78 | bleu_out = subprocess.check_output(
79 | bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT)
80 | bleu_out = bleu_out.decode("utf-8")
81 | bleu_score = re.search(r"BLEU = (.+?),", bleu_out).group(1)
82 | bleu_score = float(bleu_score)
83 | except subprocess.CalledProcessError as error:
84 | if error.output is not None:
85 | tf.logging.warning("multi-bleu.perl script returned non-zero exit code")
86 | tf.logging.warning(error.output)
87 | bleu_score = np.float32(0.0)
88 |
89 | # Close temp files
90 | hypothesis_file.close()
91 | reference_file.close()
92 |
93 | return np.float32(bleu_score)
94 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """This module contains various Encoder-Decoder models
15 | """
16 |
17 | from seq2seq.models.basic_seq2seq import BasicSeq2Seq
18 | from seq2seq.models.attention_seq2seq import AttentionSeq2Seq
19 | from seq2seq.models.image2seq import Image2Seq
20 |
21 | import seq2seq.models.bridges
22 | import seq2seq.models.model_base
23 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/models/attention_seq2seq.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sequence to Sequence model with attention
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | from pydoc import locate
24 |
25 | import tensorflow as tf
26 |
27 | from seq2seq import decoders
28 | from seq2seq.models.basic_seq2seq import BasicSeq2Seq
29 |
30 |
31 | class AttentionSeq2Seq(BasicSeq2Seq):
32 | """Sequence2Sequence model with attention mechanism.
33 |
34 | Args:
35 | source_vocab_info: An instance of `VocabInfo`
36 | for the source vocabulary
37 | target_vocab_info: An instance of `VocabInfo`
38 | for the target vocabulary
39 | params: A dictionary of hyperparameters
40 | """
41 |
42 | def __init__(self, params, mode, name="att_seq2seq"):
43 | super(AttentionSeq2Seq, self).__init__(params, mode, name)
44 |
45 | @staticmethod
46 | def default_params():
47 | params = BasicSeq2Seq.default_params().copy()
48 | params.update({
49 | "attention.class": "AttentionLayerBahdanau",
50 | "attention.params": {}, # Arbitrary attention layer parameters
51 | "bridge.class": "seq2seq.models.bridges.ZeroBridge",
52 | "encoder.class": "seq2seq.encoders.BidirectionalRNNEncoder",
53 | "encoder.params": {}, # Arbitrary parameters for the encoder
54 | "decoder.class": "seq2seq.decoders.AttentionDecoder",
55 | "decoder.params": {} # Arbitrary parameters for the decoder
56 | })
57 | return params
58 |
59 | def _create_decoder(self, encoder_output, features, _labels):
60 | attention_class = locate(self.params["attention.class"]) or \
61 | getattr(decoders.attention, self.params["attention.class"])
62 | attention_layer = attention_class(
63 | params=self.params["attention.params"], mode=self.mode)
64 |
65 | # If the input sequence is reversed we also need to reverse
66 | # the attention scores.
67 | reverse_scores_lengths = None
68 | if self.params["source.reverse"]:
69 | reverse_scores_lengths = features["source_len"]
70 | if self.use_beam_search:
71 | reverse_scores_lengths = tf.tile(
72 | input=reverse_scores_lengths,
73 | multiples=[self.params["inference.beam_search.beam_width"]])
74 |
75 | return self.decoder_class(
76 | params=self.params["decoder.params"],
77 | mode=self.mode,
78 | vocab_size=self.target_vocab_info.total_size,
79 | attention_values=encoder_output.attention_values,
80 | attention_values_length=encoder_output.attention_values_length,
81 | attention_keys=encoder_output.outputs,
82 | attention_fn=attention_layer,
83 | reverse_scores_lengths=reverse_scores_lengths)
84 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/models/basic_seq2seq.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Definition of a basic seq2seq model
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | from pydoc import locate
24 | import tensorflow as tf
25 | from seq2seq.contrib.seq2seq import helper as tf_decode_helper
26 |
27 | from seq2seq.models.seq2seq_model import Seq2SeqModel
28 | from seq2seq.graph_utils import templatemethod
29 | from seq2seq.models import bridges
30 |
31 |
32 | class BasicSeq2Seq(Seq2SeqModel):
33 | """Basic Sequence2Sequence model with a unidirectional encoder and decoder.
34 | The last encoder state is used to initialize the decoder and thus both
35 | must share the same type of RNN cell.
36 |
37 | Args:
38 | source_vocab_info: An instance of `VocabInfo`
39 | for the source vocabulary
40 | target_vocab_info: An instance of `VocabInfo`
41 | for the target vocabulary
42 | params: A dictionary of hyperparameters
43 | """
44 |
45 | def __init__(self, params, mode, name="basic_seq2seq"):
46 | super(BasicSeq2Seq, self).__init__(params, mode, name)
47 | self.encoder_class = locate(self.params["encoder.class"])
48 | self.decoder_class = locate(self.params["decoder.class"])
49 |
50 | @staticmethod
51 | def default_params():
52 | params = Seq2SeqModel.default_params().copy()
53 | params.update({
54 | "bridge.class": "seq2seq.models.bridges.InitialStateBridge",
55 | "bridge.params": {},
56 | "encoder.class": "seq2seq.encoders.UnidirectionalRNNEncoder",
57 | "encoder.params": {}, # Arbitrary parameters for the encoder
58 | "decoder.class": "seq2seq.decoders.BasicDecoder",
59 | "decoder.params": {} # Arbitrary parameters for the decoder
60 | })
61 | return params
62 |
63 | def _create_bridge(self, encoder_outputs, decoder_state_size):
64 | """Creates the bridge to be used between encoder and decoder"""
65 | bridge_class = locate(self.params["bridge.class"]) or \
66 | getattr(bridges, self.params["bridge.class"])
67 | return bridge_class(
68 | encoder_outputs=encoder_outputs,
69 | decoder_state_size=decoder_state_size,
70 | params=self.params["bridge.params"],
71 | mode=self.mode)
72 |
73 | def _create_decoder(self, _encoder_output, _features, _labels):
74 | """Creates a decoder instance based on the passed parameters."""
75 | return self.decoder_class(
76 | params=self.params["decoder.params"],
77 | mode=self.mode,
78 | vocab_size=self.target_vocab_info.total_size)
79 |
80 | def _decode_train(self, decoder, bridge, _encoder_output, _features, labels):
81 | """Runs decoding in training mode"""
82 | target_embedded = tf.nn.embedding_lookup(self.target_embedding,
83 | labels["target_ids"])
84 | helper_train = tf_decode_helper.TrainingHelper(
85 | inputs=target_embedded[:, :-1],
86 | sequence_length=labels["target_len"] - 1)
87 | decoder_initial_state = bridge()
88 | return decoder(decoder_initial_state, helper_train)
89 |
90 | def _decode_infer(self, decoder, bridge, _encoder_output, features, labels):
91 | """Runs decoding in inference mode"""
92 | batch_size = self.batch_size(features, labels)
93 | if self.use_beam_search:
94 | batch_size = self.params["inference.beam_search.beam_width"]
95 |
96 | target_start_id = self.target_vocab_info.special_vocab.SEQUENCE_START
97 | helper_infer = tf_decode_helper.GreedyEmbeddingHelper(
98 | embedding=self.target_embedding,
99 | start_tokens=tf.fill([batch_size], target_start_id),
100 | end_token=self.target_vocab_info.special_vocab.SEQUENCE_END)
101 | decoder_initial_state = bridge()
102 | return decoder(decoder_initial_state, helper_infer)
103 |
104 | @templatemethod("encode")
105 | def encode(self, features, labels):
106 | source_embedded = tf.nn.embedding_lookup(self.source_embedding,
107 | features["source_ids"])
108 | encoder_fn = self.encoder_class(self.params["encoder.params"], self.mode)
109 | return encoder_fn(source_embedded, features["source_len"])
110 |
111 | @templatemethod("decode")
112 | def decode(self, encoder_output, features, labels):
113 | decoder = self._create_decoder(encoder_output, features, labels)
114 | if self.use_beam_search:
115 | decoder = self._get_beam_search_decoder(decoder)
116 |
117 | bridge = self._create_bridge(
118 | encoder_outputs=encoder_output,
119 | decoder_state_size=decoder.cell.state_size)
120 | if self.mode == tf.contrib.learn.ModeKeys.INFER:
121 | return self._decode_infer(decoder, bridge, encoder_output, features,
122 | labels)
123 | else:
124 | return self._decode_train(decoder, bridge, encoder_output, features,
125 | labels)
126 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/models/bridges.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A collection of bridges between encoder and decoder. A bridge defines
15 | how encoder information are passed to the decoder.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import abc
24 | from pydoc import locate
25 |
26 | import six
27 | import numpy as np
28 |
29 | import tensorflow as tf
30 | from tensorflow.python.util import nest # pylint: disable=E0611
31 |
32 | from seq2seq.configurable import Configurable
33 |
34 |
35 | def _total_tensor_depth(tensor):
36 | """Returns the size of a tensor without the first (batch) dimension"""
37 | return np.prod(tensor.get_shape().as_list()[1:])
38 |
39 |
40 | @six.add_metaclass(abc.ABCMeta)
41 | class Bridge(Configurable):
42 | """An abstract bridge class. A bridge defines how state is passed
43 | between encoder and decoder.
44 |
45 | All logic is contained in the `_create` method, which returns an
46 | initial state for the decoder.
47 |
48 | Args:
49 | encoder_outputs: A namedtuple that corresponds to the the encoder outputs.
50 | decoder_state_size: An integer or tuple of integers defining the
51 | state size of the decoder.
52 | """
53 |
54 | def __init__(self, encoder_outputs, decoder_state_size, params, mode):
55 | Configurable.__init__(self, params, mode)
56 | self.encoder_outputs = encoder_outputs
57 | self.decoder_state_size = decoder_state_size
58 | self.batch_size = tf.shape(
59 | nest.flatten(self.encoder_outputs.final_state)[0])[0]
60 |
61 | def __call__(self):
62 | """Runs the bridge function.
63 |
64 | Returns:
65 | An initial decoder_state tensor or tuple of tensors.
66 | """
67 | return self._create()
68 |
69 | @abc.abstractmethod
70 | def _create(self):
71 | """ Implements the logic for this bridge.
72 | This function should be implemented by child classes.
73 |
74 | Returns:
75 | A tuple initial_decoder_state tensor or tuple of tensors.
76 | """
77 | raise NotImplementedError("Must be implemented by child class")
78 |
79 |
80 | class ZeroBridge(Bridge):
81 | """A bridge that does not pass any information between encoder and decoder
82 | and sets the initial decoder state to 0. The input function is not modified.
83 | """
84 |
85 | @staticmethod
86 | def default_params():
87 | return {}
88 |
89 | def _create(self):
90 | zero_state = nest.map_structure(
91 | lambda x: tf.zeros([self.batch_size, x], dtype=tf.float32),
92 | self.decoder_state_size)
93 | return zero_state
94 |
95 |
96 | class PassThroughBridge(Bridge):
97 | """Passes the encoder state through to the decoder as-is. This bridge
98 | can only be used if encoder and decoder have the exact same state size, i.e.
99 | use the same RNN cell.
100 | """
101 |
102 | @staticmethod
103 | def default_params():
104 | return {}
105 |
106 | def _create(self):
107 | nest.assert_same_structure(self.encoder_outputs.final_state,
108 | self.decoder_state_size)
109 | return self.encoder_outputs.final_state
110 |
111 |
112 | class InitialStateBridge(Bridge):
113 | """A bridge that creates an initial decoder state based on the output
114 | of the encoder. This state is created by passing the encoder outputs
115 | through an additional layer to match them to the decoder state size.
116 | The input function remains unmodified.
117 |
118 | Args:
119 | encoder_outputs: A namedtuple that corresponds to the the encoder outputs.
120 | decoder_state_size: An integer or tuple of integers defining the
121 | state size of the decoder.
122 | bridge_input: Which attribute of the `encoder_outputs` to use for the
123 | initial state calculation. For example, "final_state" means that
124 | `encoder_outputs.final_state` will be used.
125 | activation_fn: An optional activation function for the extra
126 | layer inserted between encoder and decoder. A string for a function
127 | name contained in `tf.nn`, e.g. "tanh".
128 | """
129 |
130 | def __init__(self, encoder_outputs, decoder_state_size, params, mode):
131 | super(InitialStateBridge, self).__init__(encoder_outputs,
132 | decoder_state_size, params, mode)
133 |
134 | if not hasattr(encoder_outputs, self.params["bridge_input"]):
135 | raise ValueError("Invalid bridge_input not in encoder outputs.")
136 |
137 | self._bridge_input = getattr(encoder_outputs, self.params["bridge_input"])
138 | self._activation_fn = locate(self.params["activation_fn"])
139 |
140 | @staticmethod
141 | def default_params():
142 | return {
143 | "bridge_input": "final_state",
144 | "activation_fn": "tensorflow.identity",
145 | }
146 |
147 | def _create(self):
148 | # Concat bridge inputs on the depth dimensions
149 | bridge_input = nest.map_structure(
150 | lambda x: tf.reshape(x, [self.batch_size, _total_tensor_depth(x)]),
151 | self._bridge_input)
152 | bridge_input_flat = nest.flatten([bridge_input])
153 | bridge_input_concat = tf.concat(bridge_input_flat, 1)
154 |
155 | state_size_splits = nest.flatten(self.decoder_state_size)
156 | total_decoder_state_size = sum(state_size_splits)
157 |
158 | # Pass bridge inputs through a fully connected layer layer
159 | initial_state_flat = tf.contrib.layers.fully_connected(
160 | inputs=bridge_input_concat,
161 | num_outputs=total_decoder_state_size,
162 | activation_fn=self._activation_fn)
163 |
164 | # Shape back into required state size
165 | initial_state = tf.split(initial_state_flat, state_size_splits, axis=1)
166 | return nest.pack_sequence_as(self.decoder_state_size, initial_state)
167 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/models/image2seq.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Definition of a basic seq2seq model
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import tensorflow as tf
24 |
25 | from seq2seq import graph_utils
26 | from seq2seq.data import vocab
27 | from seq2seq.graph_utils import templatemethod
28 | from seq2seq.models.model_base import ModelBase
29 | from seq2seq.models.attention_seq2seq import AttentionSeq2Seq
30 |
31 |
32 | class Image2Seq(AttentionSeq2Seq):
33 | """A model that encodes an image and produces a sequence
34 | of tokens.
35 | """
36 |
37 | def __init__(self, params, mode, name="image_seq2seq"):
38 | super(Image2Seq, self).__init__(params, mode, name)
39 | self.params["source.reverse"] = False
40 | self.params["embedding.share"] = False
41 |
42 | @staticmethod
43 | def default_params():
44 | params = ModelBase.default_params()
45 | params.update({
46 | "attention.class": "AttentionLayerBahdanau",
47 | "attention.params": {
48 | "num_units": 128
49 | },
50 | "bridge.class": "seq2seq.models.bridges.ZeroBridge",
51 | "bridge.params": {},
52 | "encoder.class": "seq2seq.encoders.InceptionV3Encoder",
53 | "encoder.params": {}, # Arbitrary parameters for the encoder
54 | "decoder.class": "seq2seq.decoders.AttentionDecoder",
55 | "decoder.params": {}, # Arbitrary parameters for the decoder
56 | "target.max_seq_len": 50,
57 | "embedding.dim": 100,
58 | "inference.beam_search.beam_width": 0,
59 | "inference.beam_search.length_penalty_weight": 0.0,
60 | "inference.beam_search.choose_successors_fn": "choose_top_k",
61 | "vocab_target": "",
62 | })
63 | return params
64 |
65 | @templatemethod("encode")
66 | def encode(self, features, _labels):
67 | encoder_fn = self.encoder_class(self.params["encoder.params"], self.mode)
68 | return encoder_fn(features["image"])
69 |
70 | def batch_size(self, features, _labels):
71 | return tf.shape(features["image"])[0]
72 |
73 | def _preprocess(self, features, labels):
74 | """Model-specific preprocessing for features and labels:
75 |
76 | - Creates vocabulary lookup tables for target vocab
77 | - Converts tokens into vocabulary ids
78 | - Prepends a speical "SEQUENCE_START" token to the target
79 | - Appends a speical "SEQUENCE_END" token to the target
80 | """
81 |
82 | # Create vocabulary look for target
83 | target_vocab_to_id, target_id_to_vocab, target_word_to_count, _ = \
84 | vocab.create_vocabulary_lookup_table(self.target_vocab_info.path)
85 |
86 | # Add vocab tables to graph colection so that we can access them in
87 | # other places.
88 | graph_utils.add_dict_to_collection({
89 | "target_vocab_to_id": target_vocab_to_id,
90 | "target_id_to_vocab": target_id_to_vocab,
91 | "target_word_to_count": target_word_to_count
92 | }, "vocab_tables")
93 |
94 | if labels is None:
95 | return features, None
96 |
97 | labels = labels.copy()
98 |
99 | # Slices targets to max length
100 | if self.params["target.max_seq_len"] is not None:
101 | labels["target_tokens"] = labels["target_tokens"][:, :self.params[
102 | "target.max_seq_len"]]
103 | labels["target_len"] = tf.minimum(labels["target_len"],
104 | self.params["target.max_seq_len"])
105 |
106 | # Look up the target ids in the vocabulary
107 | labels["target_ids"] = target_vocab_to_id.lookup(labels["target_tokens"])
108 |
109 | labels["target_len"] = tf.to_int32(labels["target_len"])
110 | tf.summary.histogram("target_len", tf.to_float(labels["target_len"]))
111 |
112 | # Add to graph collection for later use
113 | graph_utils.add_dict_to_collection(features, "features")
114 | if labels:
115 | graph_utils.add_dict_to_collection(labels, "labels")
116 |
117 | return features, labels
118 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/models/model_base.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Base class for models"""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | from __future__ import unicode_literals
20 |
21 | import collections
22 | import tensorflow as tf
23 |
24 | from seq2seq.configurable import Configurable
25 | from seq2seq.training import utils as training_utils
26 | from seq2seq import global_vars
27 |
28 |
29 | def _flatten_dict(dict_, parent_key="", sep="."):
30 | """Flattens a nested dictionary. Namedtuples within
31 | the dictionary are converted to dicts.
32 |
33 | Args:
34 | dict_: The dictionary to flatten.
35 | parent_key: A prefix to prepend to each key.
36 | sep: Separator between parent and child keys, a string. For example
37 | { "a": { "b": 3 } } will become { "a.b": 3 } if the separator is ".".
38 |
39 | Returns:
40 | A new flattened dictionary.
41 | """
42 | items = []
43 | for key, value in dict_.items():
44 | new_key = parent_key + sep + key if parent_key else key
45 | if isinstance(value, collections.MutableMapping):
46 | items.extend(_flatten_dict(value, new_key, sep=sep).items())
47 | elif isinstance(value, tuple) and hasattr(value, "_asdict"):
48 | dict_items = collections.OrderedDict(zip(value._fields, value))
49 | items.extend(_flatten_dict(dict_items, new_key, sep=sep).items())
50 | else:
51 | items.append((new_key, value))
52 | return dict(items)
53 |
54 |
55 | class ModelBase(Configurable):
56 | """Abstract base class for models.
57 |
58 | Args:
59 | params: A dictionary of hyperparameter values
60 | name: A name for this model to be used as a variable scope
61 | """
62 |
63 | def __init__(self, params, mode, name):
64 | self.name = name
65 | Configurable.__init__(self, params, mode)
66 |
67 | def _clip_gradients(self, grads_and_vars):
68 | """Clips gradients by global norm."""
69 | gradients, variables = zip(*grads_and_vars)
70 | clipped_gradients, _ = tf.clip_by_global_norm(
71 | gradients, self.params["optimizer.clip_gradients"])
72 | return list(zip(clipped_gradients, variables))
73 |
74 | def _create_optimizer(self):
75 | """Creates the optimizer"""
76 | name = self.params["optimizer.name"]
77 | optimizer = tf.contrib.layers.OPTIMIZER_CLS_NAMES[name](
78 | learning_rate=self.params["optimizer.learning_rate"],
79 | **self.params["optimizer.params"])
80 |
81 | # Optionally wrap with SyncReplicasOptimizer
82 | if self.params["optimizer.sync_replicas"] > 0:
83 | optimizer = tf.train.SyncReplicasOptimizer(
84 | opt=optimizer,
85 | replicas_to_aggregate=self.params[
86 | "optimizer.sync_replicas_to_aggregate"],
87 | total_num_replicas=self.params["optimizer.sync_replicas"])
88 | # This is really ugly, but we need to do this to make the optimizer
89 | # accessible outside of the model.
90 | global_vars.SYNC_REPLICAS_OPTIMIZER = optimizer
91 |
92 | return optimizer
93 |
94 | def _build_train_op(self, loss):
95 | """Creates the training operation"""
96 | learning_rate_decay_fn = training_utils.create_learning_rate_decay_fn(
97 | decay_type=self.params["optimizer.lr_decay_type"] or None,
98 | decay_steps=self.params["optimizer.lr_decay_steps"],
99 | decay_rate=self.params["optimizer.lr_decay_rate"],
100 | start_decay_at=self.params["optimizer.lr_start_decay_at"],
101 | stop_decay_at=self.params["optimizer.lr_stop_decay_at"],
102 | min_learning_rate=self.params["optimizer.lr_min_learning_rate"],
103 | staircase=self.params["optimizer.lr_staircase"])
104 |
105 | optimizer = self._create_optimizer()
106 | train_op = tf.contrib.layers.optimize_loss(
107 | loss=loss,
108 | global_step=tf.contrib.framework.get_global_step(),
109 | learning_rate=self.params["optimizer.learning_rate"],
110 | learning_rate_decay_fn=learning_rate_decay_fn,
111 | clip_gradients=self._clip_gradients,
112 | optimizer=optimizer,
113 | summaries=["learning_rate", "loss", "gradients", "gradient_norm"])
114 |
115 | return train_op
116 |
117 | @staticmethod
118 | def default_params():
119 | """Returns a dictionary of default parameters for this model."""
120 | return {
121 | "optimizer.name": "Adam",
122 | "optimizer.learning_rate": 1e-4,
123 | "optimizer.params": {}, # Arbitrary parameters for the optimizer
124 | "optimizer.lr_decay_type": "",
125 | "optimizer.lr_decay_steps": 100,
126 | "optimizer.lr_decay_rate": 0.99,
127 | "optimizer.lr_start_decay_at": 0,
128 | "optimizer.lr_stop_decay_at": tf.int32.max,
129 | "optimizer.lr_min_learning_rate": 1e-12,
130 | "optimizer.lr_staircase": False,
131 | "optimizer.clip_gradients": 5.0,
132 | "optimizer.sync_replicas": 0,
133 | "optimizer.sync_replicas_to_aggregate": 0,
134 | }
135 |
136 | def batch_size(self, features, labels):
137 | """Returns the batch size for a batch of examples"""
138 | raise NotImplementedError()
139 |
140 | def __call__(self, features, labels, params):
141 | """Creates the model graph. See the model_fn documentation in
142 | tf.contrib.learn.Estimator class for a more detailed explanation.
143 | """
144 | with tf.variable_scope("model"):
145 | with tf.variable_scope(self.name):
146 | return self._build(features, labels, params)
147 |
148 | def _build(self, features, labels, params):
149 | """Subclasses should implement this method. See the `model_fn` documentation
150 | in tf.contrib.learn.Estimator class for a more detailed explanation.
151 | """
152 | raise NotImplementedError
153 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Collection of task types.
16 | """
17 |
18 | from seq2seq.tasks.inference_task import InferenceTask
19 | from seq2seq.tasks.decode_text import DecodeText
20 | from seq2seq.tasks.dump_attention import DumpAttention
21 | from seq2seq.tasks.dump_beams import DumpBeams
22 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/tasks/dump_attention.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Task where both the input and output sequence are plain text.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import os
24 |
25 | import numpy as np
26 | from matplotlib import pyplot as plt
27 |
28 | import tensorflow as tf
29 | from tensorflow import gfile
30 |
31 | from seq2seq.tasks.decode_text import _get_prediction_length
32 | from seq2seq.tasks.inference_task import InferenceTask, unbatch_dict
33 |
34 |
35 | def _get_scores(predictions_dict):
36 | """Returns the attention scores, sliced by source and target length.
37 | """
38 | prediction_len = _get_prediction_length(predictions_dict)
39 | source_len = predictions_dict["features.source_len"]
40 | return predictions_dict["attention_scores"][:prediction_len, :source_len]
41 |
42 |
43 | def _create_figure(predictions_dict):
44 | """Creates and returns a new figure that visualizes
45 | attention scores for for a single model predictions.
46 | """
47 |
48 | # Find out how long the predicted sequence is
49 | target_words = list(predictions_dict["predicted_tokens"])
50 |
51 | prediction_len = _get_prediction_length(predictions_dict)
52 |
53 | # Get source words
54 | source_len = predictions_dict["features.source_len"]
55 | source_words = predictions_dict["features.source_tokens"][:source_len]
56 |
57 | # Plot
58 | fig = plt.figure(figsize=(8, 8))
59 | plt.imshow(
60 | X=predictions_dict["attention_scores"][:prediction_len, :source_len],
61 | interpolation="nearest",
62 | cmap=plt.cm.Blues)
63 | plt.xticks(np.arange(source_len), source_words, rotation=45)
64 | plt.yticks(np.arange(prediction_len), target_words, rotation=-45)
65 | fig.tight_layout()
66 |
67 | return fig
68 |
69 |
70 | class DumpAttention(InferenceTask):
71 | """Defines inference for tasks where both the input and output sequences
72 | are plain text.
73 |
74 | Params:
75 | delimiter: Character by which tokens are delimited. Defaults to space.
76 | unk_replace: If true, enable unknown token replacement based on attention
77 | scores.
78 | unk_mapping: If `unk_replace` is true, this can be the path to a file
79 | defining a dictionary to improve UNK token replacement. Refer to the
80 | documentation for more details.
81 | dump_attention_dir: Save attention scores and plots to this directory.
82 | dump_attention_no_plot: If true, only save attention scores, not
83 | attention plots.
84 | dump_beams: Write beam search debugging information to this file.
85 | """
86 |
87 | def __init__(self, params):
88 | super(DumpAttention, self).__init__(params)
89 | self._attention_scores_accum = []
90 | self._idx = 0
91 |
92 | if not self.params["output_dir"]:
93 | raise ValueError("Must specify output_dir for DumpAttention")
94 |
95 | @staticmethod
96 | def default_params():
97 | params = {}
98 | params.update({"output_dir": "", "dump_plots": True})
99 | return params
100 |
101 | def begin(self):
102 | super(DumpAttention, self).begin()
103 | gfile.MakeDirs(self.params["output_dir"])
104 |
105 | def before_run(self, _run_context):
106 | fetches = {}
107 | fetches["predicted_tokens"] = self._predictions["predicted_tokens"]
108 | fetches["features.source_len"] = self._predictions["features.source_len"]
109 | fetches["features.source_tokens"] = self._predictions[
110 | "features.source_tokens"]
111 | fetches["attention_scores"] = self._predictions["attention_scores"]
112 | return tf.train.SessionRunArgs(fetches)
113 |
114 | def after_run(self, _run_context, run_values):
115 | fetches_batch = run_values.results
116 | for fetches in unbatch_dict(fetches_batch):
117 | # Convert to unicode
118 | fetches["predicted_tokens"] = np.char.decode(
119 | fetches["predicted_tokens"].astype("S"), "utf-8")
120 | fetches["features.source_tokens"] = np.char.decode(
121 | fetches["features.source_tokens"].astype("S"), "utf-8")
122 |
123 | if self.params["dump_plots"]:
124 | output_path = os.path.join(self.params["output_dir"],
125 | "{:05d}.png".format(self._idx))
126 | _create_figure(fetches)
127 | plt.savefig(output_path)
128 | plt.close()
129 | tf.logging.info("Wrote %s", output_path)
130 | self._idx += 1
131 | self._attention_scores_accum.append(_get_scores(fetches))
132 |
133 | def end(self, _session):
134 | scores_path = os.path.join(self.params["output_dir"],
135 | "attention_scores.npz")
136 | np.savez(scores_path, *self._attention_scores_accum)
137 | tf.logging.info("Wrote %s", scores_path)
138 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/tasks/dump_beams.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Task where both the input and output sequence are plain text.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import numpy as np
24 |
25 | import tensorflow as tf
26 |
27 | from seq2seq.tasks.inference_task import InferenceTask, unbatch_dict
28 |
29 |
30 | class DumpBeams(InferenceTask):
31 | """Defines inference for tasks where both the input and output sequences
32 | are plain text.
33 |
34 | Params:
35 | file: File to write beam search information to.
36 | """
37 |
38 | def __init__(self, params):
39 | super(DumpBeams, self).__init__(params)
40 | self._beam_accum = {
41 | "predicted_ids": [],
42 | "beam_parent_ids": [],
43 | "scores": [],
44 | "log_probs": []
45 | }
46 |
47 | if not self.params["file"]:
48 | raise ValueError("Must specify file for DumpBeams")
49 |
50 | @staticmethod
51 | def default_params():
52 | params = {}
53 | params.update({"file": "",})
54 | return params
55 |
56 | def before_run(self, _run_context):
57 | fetches = {}
58 | fetches["beam_search_output.predicted_ids"] = self._predictions[
59 | "beam_search_output.predicted_ids"]
60 | fetches["beam_search_output.beam_parent_ids"] = self._predictions[
61 | "beam_search_output.beam_parent_ids"]
62 | fetches["beam_search_output.scores"] = self._predictions[
63 | "beam_search_output.scores"]
64 | fetches["beam_search_output.log_probs"] = self._predictions[
65 | "beam_search_output.log_probs"]
66 | return tf.train.SessionRunArgs(fetches)
67 |
68 | def after_run(self, _run_context, run_values):
69 | fetches_batch = run_values.results
70 | for fetches in unbatch_dict(fetches_batch):
71 | self._beam_accum["predicted_ids"].append(fetches[
72 | "beam_search_output.predicted_ids"])
73 | self._beam_accum["beam_parent_ids"].append(fetches[
74 | "beam_search_output.beam_parent_ids"])
75 | self._beam_accum["scores"].append(fetches["beam_search_output.scores"])
76 | self._beam_accum["log_probs"].append(fetches[
77 | "beam_search_output.log_probs"])
78 |
79 | def end(self, _session):
80 | np.savez(self.params["file"], **self._beam_accum)
81 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/tasks/inference_task.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Abstract base class for inference tasks.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import abc
24 |
25 | import six
26 | import tensorflow as tf
27 |
28 | from seq2seq import graph_utils
29 | from seq2seq.configurable import Configurable, abstractstaticmethod
30 |
31 |
32 | def unbatch_dict(dict_):
33 | """Converts a dictionary of batch items to a batch/list of
34 | dictionary items.
35 | """
36 | batch_size = list(dict_.values())[0].shape[0]
37 | for i in range(batch_size):
38 | yield {key: value[i] for key, value in dict_.items()}
39 |
40 |
41 | @six.add_metaclass(abc.ABCMeta)
42 | class InferenceTask(tf.train.SessionRunHook, Configurable):
43 | """
44 | Abstract base class for inference tasks. Defines the logic used to make
45 | predictions for a specific type of task.
46 |
47 | Params:
48 | model_class: The model class to instantiate. If undefined,
49 | re-uses the class used during training.
50 | model_params: Model hyperparameters. Specified hyperparameters will
51 | overwrite those used during training.
52 |
53 | Args:
54 | params: See Params above.
55 | """
56 |
57 | def __init__(self, params):
58 | Configurable.__init__(self, params, tf.contrib.learn.ModeKeys.INFER)
59 | self._predictions = None
60 |
61 | def begin(self):
62 | self._predictions = graph_utils.get_dict_from_collection("predictions")
63 |
64 | @abstractstaticmethod
65 | def default_params():
66 | raise NotImplementedError()
67 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests and testing utilities
15 | """
16 |
17 | from seq2seq.test import utils
18 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/attention_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Unit tests for attention functions.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import tensorflow as tf
24 | import numpy as np
25 |
26 | from seq2seq.decoders.attention import AttentionLayerDot
27 | from seq2seq.decoders.attention import AttentionLayerBahdanau
28 |
29 |
30 | class AttentionLayerTest(tf.test.TestCase):
31 | """
32 | Tests the AttentionLayer module.
33 | """
34 |
35 | def setUp(self):
36 | super(AttentionLayerTest, self).setUp()
37 | tf.logging.set_verbosity(tf.logging.INFO)
38 | self.batch_size = 8
39 | self.attention_dim = 128
40 | self.input_dim = 16
41 | self.seq_len = 10
42 | self.state_dim = 32
43 |
44 | def _create_layer(self):
45 | """Creates the attention layer. Should be implemented by child classes"""
46 | raise NotImplementedError
47 |
48 | def _test_layer(self):
49 | """Tests Attention layer with a given score type"""
50 | inputs_pl = tf.placeholder(tf.float32, (None, None, self.input_dim))
51 | inputs_length_pl = tf.placeholder(tf.int32, [None])
52 | state_pl = tf.placeholder(tf.float32, (None, self.state_dim))
53 | attention_fn = self._create_layer()
54 | scores, context = attention_fn(
55 | query=state_pl,
56 | keys=inputs_pl,
57 | values=inputs_pl,
58 | values_length=inputs_length_pl)
59 |
60 | with self.test_session() as sess:
61 | sess.run(tf.global_variables_initializer())
62 | feed_dict = {}
63 | feed_dict[inputs_pl] = np.random.randn(self.batch_size, self.seq_len,
64 | self.input_dim)
65 | feed_dict[state_pl] = np.random.randn(self.batch_size, self.state_dim)
66 | feed_dict[inputs_length_pl] = np.arange(self.batch_size) + 1
67 | scores_, context_ = sess.run([scores, context], feed_dict)
68 |
69 | np.testing.assert_array_equal(scores_.shape,
70 | [self.batch_size, self.seq_len])
71 | np.testing.assert_array_equal(context_.shape,
72 | [self.batch_size, self.input_dim])
73 |
74 | for idx, batch in enumerate(scores_, 1):
75 | # All scores that are padded should be zero
76 | np.testing.assert_array_equal(batch[idx:], np.zeros_like(batch[idx:]))
77 |
78 | # Scores should sum to 1
79 | scores_sum = np.sum(scores_, axis=1)
80 | np.testing.assert_array_almost_equal(scores_sum, np.ones([self.batch_size]))
81 |
82 |
83 | class AttentionLayerDotTest(AttentionLayerTest):
84 | """Tests the AttentionLayerDot class"""
85 |
86 | def _create_layer(self):
87 | return AttentionLayerDot(
88 | params={"num_units": self.attention_dim},
89 | mode=tf.contrib.learn.ModeKeys.TRAIN)
90 |
91 | def test_layer(self):
92 | self._test_layer()
93 |
94 |
95 | class AttentionLayerBahdanauTest(AttentionLayerTest):
96 | """Tests the AttentionLayerBahdanau class"""
97 |
98 | def _create_layer(self):
99 | return AttentionLayerBahdanau(
100 | params={"num_units": self.attention_dim},
101 | mode=tf.contrib.learn.ModeKeys.TRAIN)
102 |
103 | def test_layer(self):
104 | self._test_layer()
105 |
106 |
107 | if __name__ == "__main__":
108 | tf.test.main()
109 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/bridges_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Tests for Encoder-Decoder bridges.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | from collections import namedtuple
24 | import numpy as np
25 |
26 | import tensorflow as tf
27 | from tensorflow.python.util import nest # pylint: disable=E0611
28 |
29 | from seq2seq.encoders.encoder import EncoderOutput
30 | from seq2seq.models.bridges import ZeroBridge, InitialStateBridge
31 | from seq2seq.models.bridges import PassThroughBridge
32 |
33 | DecoderOutput = namedtuple("DecoderOutput", ["predicted_ids"])
34 |
35 |
36 | class BridgeTest(tf.test.TestCase):
37 | """Abstract class for bridge tests"""
38 |
39 | def setUp(self):
40 | super(BridgeTest, self).setUp()
41 | self.batch_size = 4
42 | self.encoder_cell = tf.contrib.rnn.MultiRNNCell(
43 | [tf.contrib.rnn.GRUCell(4), tf.contrib.rnn.GRUCell(8)])
44 | self.decoder_cell = tf.contrib.rnn.MultiRNNCell(
45 | [tf.contrib.rnn.LSTMCell(16), tf.contrib.rnn.GRUCell(8)])
46 | final_encoder_state = nest.map_structure(
47 | lambda x: tf.convert_to_tensor(
48 | value=np.random.randn(self.batch_size, x),
49 | dtype=tf.float32),
50 | self.encoder_cell.state_size)
51 | self.encoder_outputs = EncoderOutput(
52 | outputs=tf.convert_to_tensor(
53 | value=np.random.randn(self.batch_size, 10, 16), dtype=tf.float32),
54 | attention_values=tf.convert_to_tensor(
55 | value=np.random.randn(self.batch_size, 10, 16), dtype=tf.float32),
56 | attention_values_length=np.full([self.batch_size], 10),
57 | final_state=final_encoder_state)
58 |
59 | def _create_bridge(self):
60 | """Creates the bridge class to be tests. Must be implemented by
61 | child classes"""
62 | raise NotImplementedError()
63 |
64 | def _assert_correct_outputs(self):
65 | """Asserts bridge outputs are correct. Must be implemented by
66 | child classes"""
67 | raise NotImplementedError()
68 |
69 | def _run(self, scope=None, **kwargs):
70 | """Runs the bridge with the given arguments
71 | """
72 |
73 | with tf.variable_scope(scope or "bridge"):
74 | bridge = self._create_bridge(**kwargs)
75 | initial_state = bridge()
76 |
77 | with self.test_session() as sess:
78 | sess.run(tf.global_variables_initializer())
79 | initial_state_ = sess.run(initial_state)
80 |
81 | return initial_state_
82 |
83 |
84 | class TestZeroBridge(BridgeTest):
85 | """Tests for the ZeroBridge class"""
86 |
87 | def _create_bridge(self, **kwargs):
88 | return ZeroBridge(
89 | encoder_outputs=self.encoder_outputs,
90 | decoder_state_size=self.decoder_cell.state_size,
91 | params=kwargs,
92 | mode=tf.contrib.learn.ModeKeys.TRAIN)
93 |
94 | def _assert_correct_outputs(self, initial_state_):
95 | initial_state_flat_ = nest.flatten(initial_state_)
96 | for element in initial_state_flat_:
97 | np.testing.assert_array_equal(element, np.zeros_like(element))
98 |
99 | def test_zero_bridge(self):
100 | self._assert_correct_outputs(self._run())
101 |
102 |
103 | class TestPassThroughBridge(BridgeTest):
104 | """Tests for the ZeroBridge class"""
105 |
106 | def _create_bridge(self, **kwargs):
107 | return PassThroughBridge(
108 | encoder_outputs=self.encoder_outputs,
109 | decoder_state_size=self.decoder_cell.state_size,
110 | params=kwargs,
111 | mode=tf.contrib.learn.ModeKeys.TRAIN)
112 |
113 | def _assert_correct_outputs(self, initial_state_):
114 | nest.assert_same_structure(initial_state_, self.decoder_cell.state_size)
115 | nest.assert_same_structure(initial_state_, self.encoder_outputs.final_state)
116 |
117 | encoder_state_flat = nest.flatten(self.encoder_outputs.final_state)
118 | with self.test_session() as sess:
119 | encoder_state_flat_ = sess.run(encoder_state_flat)
120 |
121 | initial_state_flat_ = nest.flatten(initial_state_)
122 | for e_dec, e_enc in zip(initial_state_flat_, encoder_state_flat_):
123 | np.testing.assert_array_equal(e_dec, e_enc)
124 |
125 | def test_passthrough_bridge(self):
126 | self.decoder_cell = self.encoder_cell
127 | self._assert_correct_outputs(self._run())
128 |
129 |
130 | class TestInitialStateBridge(BridgeTest):
131 | """Tests for the InitialStateBridge class"""
132 |
133 | def _create_bridge(self, **kwargs):
134 | return InitialStateBridge(
135 | encoder_outputs=self.encoder_outputs,
136 | decoder_state_size=self.decoder_cell.state_size,
137 | params=kwargs,
138 | mode=tf.contrib.learn.ModeKeys.TRAIN)
139 |
140 | def _assert_correct_outputs(self, initial_state_):
141 | nest.assert_same_structure(initial_state_, self.decoder_cell.state_size)
142 |
143 | def test_with_final_state(self):
144 | self._assert_correct_outputs(self._run(bridge_input="final_state"))
145 |
146 | def test_with_outputs(self):
147 | self._assert_correct_outputs(self._run(bridge_input="outputs"))
148 |
149 | def test_with_activation_fn(self):
150 | self._assert_correct_outputs(
151 | self._run(
152 | bridge_input="final_state", activation_fn="tanh"))
153 |
154 |
155 | if __name__ == "__main__":
156 | tf.test.main()
157 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/conv_encoder_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Test Cases for PoolingEncoder.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import tensorflow as tf
24 | import numpy as np
25 |
26 | from seq2seq.encoders import ConvEncoder
27 |
28 |
29 | class ConvEncoderTest(tf.test.TestCase):
30 | """
31 | Tests the ConvEncoder class.
32 | """
33 |
34 | def setUp(self):
35 | super(ConvEncoderTest, self).setUp()
36 | self.batch_size = 4
37 | self.sequence_length = 16
38 | self.input_depth = 10
39 | self.mode = tf.contrib.learn.ModeKeys.TRAIN
40 |
41 | def _test_with_params(self, params):
42 | """Tests the encoder with a given parameter configuration"""
43 | inputs = tf.random_normal(
44 | [self.batch_size, self.sequence_length, self.input_depth])
45 | example_length = tf.ones(
46 | self.batch_size, dtype=tf.int32) * self.sequence_length
47 |
48 | encode_fn = ConvEncoder(params, self.mode)
49 | encoder_output = encode_fn(inputs, example_length)
50 |
51 | with self.test_session() as sess:
52 | sess.run(tf.global_variables_initializer())
53 | encoder_output_ = sess.run(encoder_output)
54 |
55 | att_value_units = encode_fn.params["attention_cnn.units"]
56 | output_units = encode_fn.params["output_cnn.units"]
57 |
58 | np.testing.assert_array_equal(
59 | encoder_output_.outputs.shape,
60 | [self.batch_size, self.sequence_length, att_value_units])
61 | np.testing.assert_array_equal(
62 | encoder_output_.attention_values.shape,
63 | [self.batch_size, self.sequence_length, output_units])
64 | np.testing.assert_array_equal(
65 | encoder_output_.final_state.shape,
66 | [self.batch_size, output_units])
67 |
68 | def test_encode_with_pos(self):
69 | self._test_with_params({
70 | "position_embeddings.enable": True,
71 | "position_embeddings.num_positions": self.sequence_length,
72 | "attention_cnn.units": 5,
73 | "output_cnn.units": 6
74 | })
75 |
76 | if __name__ == "__main__":
77 | tf.test.main()
78 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/data_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Unit tests for input-related operations.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | from __future__ import unicode_literals
23 |
24 | import tempfile
25 | import tensorflow as tf
26 | import numpy as np
27 |
28 | from seq2seq.data import split_tokens_decoder
29 | from seq2seq.data.parallel_data_provider import make_parallel_data_provider
30 |
31 |
32 | class SplitTokensDecoderTest(tf.test.TestCase):
33 | """Tests the SplitTokensDecoder class
34 | """
35 |
36 | def test_decode(self):
37 | decoder = split_tokens_decoder.SplitTokensDecoder(
38 | delimiter=" ",
39 | tokens_feature_name="source_tokens",
40 | length_feature_name="source_len")
41 |
42 | self.assertEqual(decoder.list_items(), ["source_tokens", "source_len"])
43 |
44 | data = tf.constant("Hello world ! 笑w")
45 |
46 | decoded_tokens = decoder.decode(data, ["source_tokens"])
47 | decoded_length = decoder.decode(data, ["source_len"])
48 | decoded_both = decoder.decode(data, decoder.list_items())
49 |
50 | with self.test_session() as sess:
51 | decoded_tokens_ = sess.run(decoded_tokens)[0]
52 | decoded_length_ = sess.run(decoded_length)[0]
53 | decoded_both_ = sess.run(decoded_both)
54 |
55 | self.assertEqual(decoded_length_, 4)
56 | np.testing.assert_array_equal(
57 | np.char.decode(decoded_tokens_.astype("S"), "utf-8"),
58 | ["Hello", "world", "!", "笑w"])
59 |
60 | self.assertEqual(decoded_both_[1], 4)
61 | np.testing.assert_array_equal(
62 | np.char.decode(decoded_both_[0].astype("S"), "utf-8"),
63 | ["Hello", "world", "!", "笑w"])
64 |
65 |
66 | class ParallelDataProviderTest(tf.test.TestCase):
67 | """Tests the ParallelDataProvider class
68 | """
69 |
70 | def setUp(self):
71 | super(ParallelDataProviderTest, self).setUp()
72 | # Our data
73 | self.source_lines = ["Hello", "World", "!", "笑"]
74 | self.target_lines = ["1", "2", "3", "笑"]
75 | self.source_to_target = dict(zip(self.source_lines, self.target_lines))
76 |
77 | # Create two parallel text files
78 | self.source_file = tempfile.NamedTemporaryFile()
79 | self.target_file = tempfile.NamedTemporaryFile()
80 | self.source_file.write("\n".join(self.source_lines).encode("utf-8"))
81 | self.source_file.flush()
82 | self.target_file.write("\n".join(self.target_lines).encode("utf-8"))
83 | self.target_file.flush()
84 |
85 | def tearDown(self):
86 | super(ParallelDataProviderTest, self).tearDown()
87 | self.source_file.close()
88 | self.target_file.close()
89 |
90 | def test_reading(self):
91 | num_epochs = 50
92 | data_provider = make_parallel_data_provider(
93 | data_sources_source=[self.source_file.name],
94 | data_sources_target=[self.target_file.name],
95 | num_epochs=num_epochs,
96 | shuffle=True)
97 |
98 | item_keys = list(data_provider.list_items())
99 | item_values = data_provider.get(item_keys)
100 | items_dict = dict(zip(item_keys, item_values))
101 |
102 | self.assertEqual(
103 | set(item_keys),
104 | set(["source_tokens", "source_len", "target_tokens", "target_len"]))
105 |
106 | with self.test_session() as sess:
107 | sess.run(tf.global_variables_initializer())
108 | sess.run(tf.local_variables_initializer())
109 | with tf.contrib.slim.queues.QueueRunners(sess):
110 | item_dicts_ = [sess.run(items_dict) for _ in range(num_epochs * 3)]
111 |
112 | for item_dict in item_dicts_:
113 | item_dict["target_tokens"] = np.char.decode(
114 | item_dict["target_tokens"].astype("S"), "utf-8")
115 | item_dict["source_tokens"] = np.char.decode(
116 | item_dict["source_tokens"].astype("S"), "utf-8")
117 |
118 | # Source is Data + SEQUENCE_END
119 | self.assertEqual(item_dict["source_len"], 2)
120 | self.assertEqual(item_dict["source_tokens"][-1], "SEQUENCE_END")
121 | # Target is SEQUENCE_START + Data + SEQUENCE_END
122 | self.assertEqual(item_dict["target_len"], 3)
123 | self.assertEqual(item_dict["target_tokens"][0], "SEQUENCE_START")
124 | self.assertEqual(item_dict["target_tokens"][-1], "SEQUENCE_END")
125 |
126 | # Make sure data is aligned
127 | source_joined = " ".join(item_dict["source_tokens"][:-1])
128 | expected_target = self.source_to_target[source_joined]
129 | np.testing.assert_array_equal(
130 | item_dict["target_tokens"],
131 | ["SEQUENCE_START"] + expected_target.split(" ") + ["SEQUENCE_END"])
132 |
133 | def test_reading_without_targets(self):
134 | num_epochs = 50
135 | data_provider = make_parallel_data_provider(
136 | data_sources_source=[self.source_file.name],
137 | data_sources_target=None,
138 | num_epochs=num_epochs,
139 | shuffle=True)
140 |
141 | item_keys = list(data_provider.list_items())
142 | item_values = data_provider.get(item_keys)
143 | items_dict = dict(zip(item_keys, item_values))
144 |
145 | self.assertEqual(set(item_keys), set(["source_tokens", "source_len"]))
146 |
147 | with self.test_session() as sess:
148 | sess.run(tf.global_variables_initializer())
149 | sess.run(tf.local_variables_initializer())
150 | with tf.contrib.slim.queues.QueueRunners(sess):
151 | item_dicts_ = [sess.run(items_dict) for _ in range(num_epochs * 3)]
152 |
153 | for item_dict in item_dicts_:
154 | self.assertEqual(item_dict["source_len"], 2)
155 | item_dict["source_tokens"] = np.char.decode(
156 | item_dict["source_tokens"].astype("S"), "utf-8")
157 | self.assertEqual(item_dict["source_tokens"][-1], "SEQUENCE_END")
158 |
159 |
160 | if __name__ == "__main__":
161 | tf.test.main()
162 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/example_config_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Test Cases for example configuration files.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | from __future__ import unicode_literals
23 |
24 | import os
25 | from pydoc import locate
26 |
27 | import yaml
28 |
29 | import tensorflow as tf
30 | from tensorflow import gfile
31 |
32 | from seq2seq.test.models_test import EncoderDecoderTests
33 | from seq2seq import models
34 |
35 | EXAMPLE_CONFIG_DIR = os.path.abspath(
36 | os.path.join(os.path.dirname(__file__), "../../example_configs"))
37 |
38 |
39 | def _load_model_from_config(config_path, hparam_overrides, vocab_file, mode):
40 | """Loads model from a configuration file"""
41 | with gfile.GFile(config_path) as config_file:
42 | config = yaml.load(config_file)
43 | model_cls = locate(config["model"]) or getattr(models, config["model"])
44 | model_params = config["model_params"]
45 | if hparam_overrides:
46 | model_params.update(hparam_overrides)
47 | # Change the max decode length to make the test run faster
48 | model_params["decoder.params"]["max_decode_length"] = 5
49 | model_params["vocab_source"] = vocab_file
50 | model_params["vocab_target"] = vocab_file
51 | return model_cls(params=model_params, mode=mode)
52 |
53 |
54 | class ExampleConfigTest(object):
55 | """Interface for configuration-based tests"""
56 |
57 | def __init__(self, *args, **kwargs):
58 | super(ExampleConfigTest, self).__init__(*args, **kwargs)
59 | self.vocab_file = None
60 |
61 | def _config_path(self):
62 | """Returns the path to the configuration to be tested"""
63 | raise NotImplementedError()
64 |
65 | def create_model(self, mode, params=None):
66 | """Creates the model"""
67 | return _load_model_from_config(
68 | config_path=self._config_path(),
69 | hparam_overrides=params,
70 | vocab_file=self.vocab_file.name,
71 | mode=mode)
72 |
73 |
74 | class TestNMTLarge(ExampleConfigTest, EncoderDecoderTests):
75 | """Tests nmt_large.yml"""
76 |
77 | def _config_path(self):
78 | return os.path.join(EXAMPLE_CONFIG_DIR, "nmt_large.yml")
79 |
80 |
81 | class TestNMTMedium(ExampleConfigTest, EncoderDecoderTests):
82 | """Tests nmt_medium.yml"""
83 |
84 | def _config_path(self):
85 | return os.path.join(EXAMPLE_CONFIG_DIR, "nmt_medium.yml")
86 |
87 |
88 | class TestNMTSmall(ExampleConfigTest, EncoderDecoderTests):
89 | """Tests nmt_small.yml"""
90 |
91 | def _config_path(self):
92 | return os.path.join(EXAMPLE_CONFIG_DIR, "nmt_small.yml")
93 |
94 | class TestNMTConv(ExampleConfigTest, EncoderDecoderTests):
95 | """Tests nmt_small.yml"""
96 |
97 | def _config_path(self):
98 | return os.path.join(EXAMPLE_CONFIG_DIR, "nmt_conv.yml")
99 |
100 |
101 | if __name__ == "__main__":
102 | tf.test.main()
103 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/hooks_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tests for SessionRunHooks.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import os
24 | import tempfile
25 | import shutil
26 | import time
27 |
28 | import tensorflow as tf
29 | from tensorflow.python.training import monitored_session # pylint: disable=E0611
30 | from tensorflow import gfile
31 |
32 | from seq2seq import graph_utils
33 | from seq2seq.training import hooks
34 |
35 |
36 | class TestPrintModelAnalysisHook(tf.test.TestCase):
37 | """Tests the `PrintModelAnalysisHook` hook"""
38 |
39 | def test_begin(self):
40 | model_dir = tempfile.mkdtemp()
41 | outfile = tempfile.NamedTemporaryFile()
42 | tf.get_variable("weigths", [128, 128])
43 | hook = hooks.PrintModelAnalysisHook(
44 | params={}, model_dir=model_dir, run_config=tf.contrib.learn.RunConfig())
45 | hook.begin()
46 |
47 | with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file:
48 | file_contents = file.read().strip()
49 |
50 | self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n"
51 | " weigths (128x128, 16.38k/16.38k params)")
52 | outfile.close()
53 |
54 |
55 | class TestTrainSampleHook(tf.test.TestCase):
56 | """Tests `TrainSampleHook` class.
57 | """
58 |
59 | def setUp(self):
60 | super(TestTrainSampleHook, self).setUp()
61 | self.model_dir = tempfile.mkdtemp()
62 | self.sample_dir = os.path.join(self.model_dir, "samples")
63 |
64 | # The hook expects these collections to be in the graph
65 | pred_dict = {}
66 | pred_dict["predicted_tokens"] = tf.constant([["Hello", "World", "笑w"]])
67 | pred_dict["labels.target_tokens"] = tf.constant([["Hello", "World", "笑w"]])
68 | pred_dict["labels.target_len"] = tf.constant(2),
69 | graph_utils.add_dict_to_collection(pred_dict, "predictions")
70 |
71 | def tearDown(self):
72 | super(TestTrainSampleHook, self).tearDown()
73 | shutil.rmtree(self.model_dir)
74 |
75 | def test_sampling(self):
76 | hook = hooks.TrainSampleHook(
77 | params={"every_n_steps": 10}, model_dir=self.model_dir,
78 | run_config=tf.contrib.learn.RunConfig())
79 |
80 | global_step = tf.contrib.framework.get_or_create_global_step()
81 | no_op = tf.no_op()
82 | hook.begin()
83 | with self.test_session() as sess:
84 | sess.run(tf.global_variables_initializer())
85 | sess.run(tf.local_variables_initializer())
86 | sess.run(tf.tables_initializer())
87 |
88 | #pylint: disable=W0212
89 | mon_sess = monitored_session._HookedSession(sess, [hook])
90 | # Should trigger for step 0
91 | sess.run(tf.assign(global_step, 0))
92 | mon_sess.run(no_op)
93 |
94 | outfile = os.path.join(self.sample_dir, "samples_000000.txt")
95 | with open(outfile, "rb") as readfile:
96 | self.assertIn("Prediction followed by Target @ Step 0",
97 | readfile.read().decode("utf-8"))
98 |
99 | # Should not trigger for step 9
100 | sess.run(tf.assign(global_step, 9))
101 | mon_sess.run(no_op)
102 | outfile = os.path.join(self.sample_dir, "samples_000009.txt")
103 | self.assertFalse(os.path.exists(outfile))
104 |
105 | # Should trigger for step 10
106 | sess.run(tf.assign(global_step, 10))
107 | mon_sess.run(no_op)
108 | outfile = os.path.join(self.sample_dir, "samples_000010.txt")
109 | with open(outfile, "rb") as readfile:
110 | self.assertIn("Prediction followed by Target @ Step 10",
111 | readfile.read().decode("utf-8"))
112 |
113 |
114 | class TestMetadataCaptureHook(tf.test.TestCase):
115 | """Test for the MetadataCaptureHook"""
116 |
117 | def setUp(self):
118 | super(TestMetadataCaptureHook, self).setUp()
119 | self.model_dir = tempfile.mkdtemp()
120 |
121 | def tearDown(self):
122 | super(TestMetadataCaptureHook, self).tearDown()
123 | shutil.rmtree(self.model_dir)
124 |
125 | def test_capture(self):
126 | global_step = tf.contrib.framework.get_or_create_global_step()
127 | # Some test computation
128 | some_weights = tf.get_variable("weigths", [2, 128])
129 | computation = tf.nn.softmax(some_weights)
130 |
131 | hook = hooks.MetadataCaptureHook(
132 | params={"step": 5}, model_dir=self.model_dir,
133 | run_config=tf.contrib.learn.RunConfig())
134 | hook.begin()
135 |
136 | with self.test_session() as sess:
137 | sess.run(tf.global_variables_initializer())
138 | #pylint: disable=W0212
139 | mon_sess = monitored_session._HookedSession(sess, [hook])
140 | # Should not trigger for step 0
141 | sess.run(tf.assign(global_step, 0))
142 | mon_sess.run(computation)
143 | self.assertEqual(gfile.ListDirectory(self.model_dir), [])
144 | # Should trigger *after* step 5
145 | sess.run(tf.assign(global_step, 5))
146 | mon_sess.run(computation)
147 | self.assertEqual(gfile.ListDirectory(self.model_dir), [])
148 | mon_sess.run(computation)
149 | self.assertEqual(
150 | set(gfile.ListDirectory(self.model_dir)),
151 | set(["run_meta", "tfprof_log", "timeline.json"]))
152 |
153 | if __name__ == "__main__":
154 | tf.test.main()
155 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/input_pipeline_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Unit tests for input-related operations.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | from __future__ import unicode_literals
23 |
24 | import tensorflow as tf
25 | import numpy as np
26 | import yaml
27 |
28 | from seq2seq.data import input_pipeline
29 | from seq2seq.test import utils as test_utils
30 |
31 |
32 | class TestInputPipelineDef(tf.test.TestCase):
33 | """Tests InputPipeline string definitions"""
34 |
35 | def test_without_extra_args(self):
36 | pipeline_def = yaml.load("""
37 | class: ParallelTextInputPipeline
38 | params:
39 | source_files: ["file1"]
40 | target_files: ["file2"]
41 | num_epochs: 1
42 | shuffle: True
43 | """)
44 | pipeline = input_pipeline.make_input_pipeline_from_def(
45 | pipeline_def, tf.contrib.learn.ModeKeys.TRAIN)
46 | self.assertIsInstance(pipeline, input_pipeline.ParallelTextInputPipeline)
47 | #pylint: disable=W0212
48 | self.assertEqual(pipeline.params["source_files"], ["file1"])
49 | self.assertEqual(pipeline.params["target_files"], ["file2"])
50 | self.assertEqual(pipeline.params["num_epochs"], 1)
51 | self.assertEqual(pipeline.params["shuffle"], True)
52 |
53 | def test_with_extra_args(self):
54 | pipeline_def = yaml.load("""
55 | class: ParallelTextInputPipeline
56 | params:
57 | source_files: ["file1"]
58 | target_files: ["file2"]
59 | num_epochs: 1
60 | shuffle: True
61 | """)
62 | pipeline = input_pipeline.make_input_pipeline_from_def(
63 | def_dict=pipeline_def,
64 | mode=tf.contrib.learn.ModeKeys.TRAIN,
65 | num_epochs=5,
66 | shuffle=False)
67 | self.assertIsInstance(pipeline, input_pipeline.ParallelTextInputPipeline)
68 | #pylint: disable=W0212
69 | self.assertEqual(pipeline.params["source_files"], ["file1"])
70 | self.assertEqual(pipeline.params["target_files"], ["file2"])
71 | self.assertEqual(pipeline.params["num_epochs"], 5)
72 | self.assertEqual(pipeline.params["shuffle"], False)
73 |
74 |
75 | class TFRecordsInputPipelineTest(tf.test.TestCase):
76 | """
77 | Tests Data Provider operations.
78 | """
79 |
80 | def setUp(self):
81 | super(TFRecordsInputPipelineTest, self).setUp()
82 | tf.logging.set_verbosity(tf.logging.INFO)
83 |
84 | def test_pipeline(self):
85 | tfrecords_file = test_utils.create_temp_tfrecords(
86 | sources=["Hello World . 笑"], targets=["Bye 泣"])
87 |
88 | pipeline = input_pipeline.TFRecordInputPipeline(
89 | params={
90 | "files": [tfrecords_file.name],
91 | "source_field": "source",
92 | "target_field": "target",
93 | "num_epochs": 5,
94 | "shuffle": False
95 | },
96 | mode=tf.contrib.learn.ModeKeys.TRAIN)
97 |
98 | data_provider = pipeline.make_data_provider()
99 |
100 | features = pipeline.read_from_data_provider(data_provider)
101 |
102 | with self.test_session() as sess:
103 | sess.run(tf.global_variables_initializer())
104 | sess.run(tf.local_variables_initializer())
105 | with tf.contrib.slim.queues.QueueRunners(sess):
106 | res = sess.run(features)
107 |
108 | self.assertEqual(res["source_len"], 5)
109 | self.assertEqual(res["target_len"], 4)
110 | np.testing.assert_array_equal(
111 | np.char.decode(res["source_tokens"].astype("S"), "utf-8"),
112 | ["Hello", "World", ".", "笑", "SEQUENCE_END"])
113 | np.testing.assert_array_equal(
114 | np.char.decode(res["target_tokens"].astype("S"), "utf-8"),
115 | ["SEQUENCE_START", "Bye", "泣", "SEQUENCE_END"])
116 |
117 |
118 | class ParallelTextInputPipelineTest(tf.test.TestCase):
119 | """
120 | Tests Data Provider operations.
121 | """
122 |
123 | def setUp(self):
124 | super(ParallelTextInputPipelineTest, self).setUp()
125 | tf.logging.set_verbosity(tf.logging.INFO)
126 |
127 | def test_pipeline(self):
128 | file_source, file_target = test_utils.create_temp_parallel_data(
129 | sources=["Hello World . 笑"], targets=["Bye 泣"])
130 |
131 | pipeline = input_pipeline.ParallelTextInputPipeline(
132 | params={
133 | "source_files": [file_source.name],
134 | "target_files": [file_target.name],
135 | "num_epochs": 5,
136 | "shuffle": False
137 | },
138 | mode=tf.contrib.learn.ModeKeys.TRAIN)
139 |
140 | data_provider = pipeline.make_data_provider()
141 |
142 | features = pipeline.read_from_data_provider(data_provider)
143 |
144 | with self.test_session() as sess:
145 | sess.run(tf.global_variables_initializer())
146 | sess.run(tf.local_variables_initializer())
147 | with tf.contrib.slim.queues.QueueRunners(sess):
148 | res = sess.run(features)
149 |
150 | self.assertEqual(res["source_len"], 5)
151 | self.assertEqual(res["target_len"], 4)
152 | np.testing.assert_array_equal(
153 | np.char.decode(res["source_tokens"].astype("S"), "utf-8"),
154 | ["Hello", "World", ".", "笑", "SEQUENCE_END"])
155 | np.testing.assert_array_equal(
156 | np.char.decode(res["target_tokens"].astype("S"), "utf-8"),
157 | ["SEQUENCE_START", "Bye", "泣", "SEQUENCE_END"])
158 |
159 |
160 | if __name__ == "__main__":
161 | tf.test.main()
162 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/losses_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Unit tests for loss-related operations.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | from seq2seq import losses as seq2seq_losses
24 | import tensorflow as tf
25 | import numpy as np
26 |
27 |
28 | class CrossEntropySequenceLossTest(tf.test.TestCase):
29 | """
30 | Test for `sqe2seq.losses.sequence_mask`.
31 | """
32 |
33 | def setUp(self):
34 | super(CrossEntropySequenceLossTest, self).setUp()
35 | tf.logging.set_verbosity(tf.logging.INFO)
36 | self.batch_size = 4
37 | self.sequence_length = 10
38 | self.vocab_size = 50
39 |
40 | def test_op(self):
41 | logits = np.random.randn(self.sequence_length, self.batch_size,
42 | self.vocab_size)
43 | logits = logits.astype(np.float32)
44 | sequence_length = np.array([1, 2, 3, 4])
45 | targets = np.random.randint(0, self.vocab_size,
46 | [self.sequence_length, self.batch_size])
47 | losses = seq2seq_losses.cross_entropy_sequence_loss(logits, targets,
48 | sequence_length)
49 |
50 | with self.test_session() as sess:
51 | losses_ = sess.run(losses)
52 |
53 | # Make sure all losses not past the sequence length are > 0
54 | np.testing.assert_array_less(np.zeros_like(losses_[:1, 0]), losses_[:1, 0])
55 | np.testing.assert_array_less(np.zeros_like(losses_[:2, 1]), losses_[:2, 1])
56 | np.testing.assert_array_less(np.zeros_like(losses_[:3, 2]), losses_[:3, 2])
57 |
58 | # Make sure all losses past the sequence length are 0
59 | np.testing.assert_array_equal(losses_[1:, 0], np.zeros_like(losses_[1:, 0]))
60 | np.testing.assert_array_equal(losses_[2:, 1], np.zeros_like(losses_[2:, 1]))
61 | np.testing.assert_array_equal(losses_[3:, 2], np.zeros_like(losses_[3:, 2]))
62 |
63 |
64 | if __name__ == "__main__":
65 | tf.test.main()
66 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/pooling_encoder_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Test Cases for PoolingEncoder.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import tensorflow as tf
24 | import numpy as np
25 |
26 | from seq2seq.encoders import PoolingEncoder
27 |
28 |
29 | class PoolingEncoderTest(tf.test.TestCase):
30 | """
31 | Tests the PoolingEncoder class.
32 | """
33 |
34 | def setUp(self):
35 | super(PoolingEncoderTest, self).setUp()
36 | self.batch_size = 4
37 | self.sequence_length = 16
38 | self.input_depth = 10
39 | self.mode = tf.contrib.learn.ModeKeys.TRAIN
40 |
41 | def _test_with_params(self, params):
42 | """Tests the encoder with a given parameter configuration"""
43 | inputs = tf.random_normal(
44 | [self.batch_size, self.sequence_length, self.input_depth])
45 | example_length = tf.ones(
46 | self.batch_size, dtype=tf.int32) * self.sequence_length
47 |
48 | encode_fn = PoolingEncoder(params, self.mode)
49 | encoder_output = encode_fn(inputs, example_length)
50 |
51 | with self.test_session() as sess:
52 | sess.run(tf.global_variables_initializer())
53 | encoder_output_ = sess.run(encoder_output)
54 |
55 | np.testing.assert_array_equal(
56 | encoder_output_.outputs.shape,
57 | [self.batch_size, self.sequence_length, self.input_depth])
58 | np.testing.assert_array_equal(
59 | encoder_output_.attention_values.shape,
60 | [self.batch_size, self.sequence_length, self.input_depth])
61 | np.testing.assert_array_equal(encoder_output_.final_state.shape,
62 | [self.batch_size, self.input_depth])
63 |
64 | def test_encode_with_pos(self):
65 | self._test_with_params({
66 | "position_embeddings.enable": True,
67 | "position_embeddings.num_positions": self.sequence_length
68 | })
69 |
70 | def test_encode_without_pos(self):
71 | self._test_with_params({
72 | "position_embeddings.enable": False,
73 | "position_embeddings.num_positions": 0
74 | })
75 |
76 | if __name__ == "__main__":
77 | tf.test.main()
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/rnn_cell_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Unit tests for input-related operations.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | from __future__ import unicode_literals
23 |
24 | import tensorflow as tf
25 | from seq2seq.contrib import rnn_cell
26 |
27 | import numpy as np
28 |
29 |
30 | class ExtendedMultiRNNCellTest(tf.test.TestCase):
31 | """Tests the ExtendedMultiRNNCell"""
32 |
33 | def test_without_residuals(self):
34 | inputs = tf.constant(np.random.randn(1, 2))
35 | state = (tf.constant(np.random.randn(1, 2)),
36 | tf.constant(np.random.randn(1, 2)))
37 |
38 | with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
39 | standard_cell = tf.contrib.rnn.MultiRNNCell(
40 | [tf.contrib.rnn.GRUCell(2) for _ in range(2)], state_is_tuple=True)
41 | res_standard = standard_cell(inputs, state, scope="standard")
42 |
43 | test_cell = rnn_cell.ExtendedMultiRNNCell(
44 | [tf.contrib.rnn.GRUCell(2) for _ in range(2)])
45 | res_test = test_cell(inputs, state, scope="test")
46 |
47 | with self.test_session() as sess:
48 | sess.run([tf.global_variables_initializer()])
49 | res_standard_, res_test_, = sess.run([res_standard, res_test])
50 |
51 | # Make sure it produces the same results as the standard cell
52 | self.assertAllClose(res_standard_[0], res_test_[0])
53 | self.assertAllClose(res_standard_[1][0], res_test_[1][0])
54 | self.assertAllClose(res_standard_[1][1], res_test_[1][1])
55 |
56 | def _test_with_residuals(self, inputs, **kwargs):
57 | """Runs the cell in a session"""
58 | inputs = tf.convert_to_tensor(inputs)
59 | state = (tf.constant(np.random.randn(1, 2)),
60 | tf.constant(np.random.randn(1, 2)))
61 |
62 | with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
63 | test_cell = rnn_cell.ExtendedMultiRNNCell(
64 | [tf.contrib.rnn.GRUCell(2) for _ in range(2)],
65 | residual_connections=True,
66 | **kwargs)
67 | res_test = test_cell(inputs, state, scope="test")
68 |
69 | with self.test_session() as sess:
70 | sess.run([tf.global_variables_initializer()])
71 | return sess.run(res_test)
72 |
73 | def _test_constant_shape(self, combiner):
74 | """Tests a residual combiner whose shape doesn't change
75 | with depth"""
76 | inputs = np.random.randn(1, 2)
77 | with tf.variable_scope("same_input_size"):
78 | res_ = self._test_with_residuals(inputs, residual_combiner=combiner)
79 | self.assertEqual(res_[0].shape, (1, 2))
80 | self.assertEqual(res_[1][0].shape, (1, 2))
81 | self.assertEqual(res_[1][1].shape, (1, 2))
82 |
83 | inputs = np.random.randn(1, 5)
84 | with tf.variable_scope("diff_input_size"):
85 | res_ = self._test_with_residuals(inputs, residual_combiner=combiner)
86 | self.assertEqual(res_[0].shape, (1, 2))
87 | self.assertEqual(res_[1][0].shape, (1, 2))
88 | self.assertEqual(res_[1][1].shape, (1, 2))
89 |
90 | with tf.variable_scope("same_input_size_dense"):
91 | res_ = self._test_with_residuals(
92 | inputs, residual_combiner=combiner, residual_dense=True)
93 | self.assertEqual(res_[0].shape, (1, 2))
94 | self.assertEqual(res_[1][0].shape, (1, 2))
95 | self.assertEqual(res_[1][1].shape, (1, 2))
96 |
97 | inputs = np.random.randn(1, 5)
98 | with tf.variable_scope("diff_input_size_dense"):
99 | res_ = self._test_with_residuals(
100 | inputs, residual_combiner=combiner, residual_dense=True)
101 | self.assertEqual(res_[0].shape, (1, 2))
102 | self.assertEqual(res_[1][0].shape, (1, 2))
103 | self.assertEqual(res_[1][1].shape, (1, 2))
104 |
105 | def test_residuals_mean(self):
106 | self._test_constant_shape(combiner="mean")
107 |
108 | def test_residuals_add(self):
109 | self._test_constant_shape(combiner="add")
110 |
111 | def test_residuals_concat(self):
112 | inputs = np.random.randn(1, 2)
113 | with tf.variable_scope("same_input_size"):
114 | res_ = self._test_with_residuals(inputs, residual_combiner="concat")
115 | self.assertEqual(res_[0].shape, (1, 6))
116 | self.assertEqual(res_[1][0].shape, (1, 2))
117 | self.assertEqual(res_[1][1].shape, (1, 2))
118 |
119 | inputs = np.random.randn(1, 5)
120 | with tf.variable_scope("diff_input_size"):
121 | res_ = self._test_with_residuals(inputs, residual_combiner="concat")
122 | self.assertEqual(res_[0].shape, (1, 5 + 2 + 2))
123 | self.assertEqual(res_[1][0].shape, (1, 2))
124 | self.assertEqual(res_[1][1].shape, (1, 2))
125 |
126 | inputs = np.random.randn(1, 2)
127 | with tf.variable_scope("same_input_size_dense"):
128 | res_ = self._test_with_residuals(
129 | inputs, residual_combiner="concat", residual_dense=True)
130 | self.assertEqual(res_[0].shape, (1, 2 + 4 + 2))
131 | self.assertEqual(res_[1][0].shape, (1, 2))
132 | self.assertEqual(res_[1][1].shape, (1, 2))
133 |
134 | inputs = np.random.randn(1, 5)
135 | with tf.variable_scope("diff_input_size_dense"):
136 | res_ = self._test_with_residuals(
137 | inputs, residual_combiner="concat", residual_dense=True)
138 | self.assertEqual(res_[0].shape, (1, 2 + (5 + 2) + 5))
139 | self.assertEqual(res_[1][0].shape, (1, 2))
140 | self.assertEqual(res_[1][1].shape, (1, 2))
141 |
142 |
143 | if __name__ == "__main__":
144 | tf.test.main()
145 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Various testing utilities
15 | """
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | from __future__ import unicode_literals
21 |
22 | import tempfile
23 | import tensorflow as tf
24 |
25 |
26 | def create_temp_parallel_data(sources, targets):
27 | """
28 | Creates a temporary TFRecords file.
29 |
30 | Args:
31 | source: List of source sentences
32 | target: List of target sentences
33 |
34 | Returns:
35 | A tuple (sources_file, targets_file).
36 | """
37 | file_source = tempfile.NamedTemporaryFile()
38 | file_target = tempfile.NamedTemporaryFile()
39 | file_source.write("\n".join(sources).encode("utf-8"))
40 | file_source.flush()
41 | file_target.write("\n".join(targets).encode("utf-8"))
42 | file_target.flush()
43 | return file_source, file_target
44 |
45 |
46 | def create_temp_tfrecords(sources, targets):
47 | """
48 | Creates a temporary TFRecords file.
49 |
50 | Args:
51 | source: List of source sentences
52 | target: List of target sentences
53 |
54 | Returns:
55 | A tuple (sources_file, targets_file).
56 | """
57 |
58 | output_file = tempfile.NamedTemporaryFile()
59 | writer = tf.python_io.TFRecordWriter(output_file.name)
60 | for source, target in zip(sources, targets):
61 | ex = tf.train.Example()
62 | #pylint: disable=E1101
63 | ex.features.feature["source"].bytes_list.value.extend(
64 | [source.encode("utf-8")])
65 | ex.features.feature["target"].bytes_list.value.extend(
66 | [target.encode("utf-8")])
67 | writer.write(ex.SerializeToString())
68 | writer.close()
69 |
70 | return output_file
71 |
72 |
73 | def create_temporary_vocab_file(words, counts=None):
74 | """
75 | Creates a temporary vocabulary file.
76 |
77 | Args:
78 | words: List of words in the vocabulary
79 |
80 | Returns:
81 | A temporary file object with one word per line
82 | """
83 | vocab_file = tempfile.NamedTemporaryFile()
84 | if counts is None:
85 | for token in words:
86 | vocab_file.write((token + "\n").encode("utf-8"))
87 | else:
88 | for token, count in zip(words, counts):
89 | vocab_file.write("{}\t{}\n".format(token, count).encode("utf-8"))
90 | vocab_file.flush()
91 | return vocab_file
92 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/test/vocab_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright 2017 Google Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Unit tests for input-related operations.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | from __future__ import unicode_literals
23 |
24 | import tensorflow as tf
25 | import numpy as np
26 |
27 | from seq2seq.data import vocab
28 | from seq2seq.test import utils as test_utils
29 |
30 |
31 | class VocabInfoTest(tf.test.TestCase):
32 | """Tests VocabInfo class"""
33 |
34 | def setUp(self):
35 | super(VocabInfoTest, self).setUp()
36 | tf.logging.set_verbosity(tf.logging.INFO)
37 | self.vocab_list = ["Hello", ".", "Bye"]
38 | self.vocab_file = test_utils.create_temporary_vocab_file(self.vocab_list)
39 |
40 | def tearDown(self):
41 | super(VocabInfoTest, self).tearDown()
42 | self.vocab_file.close()
43 |
44 | def test_vocab_info(self):
45 | vocab_info = vocab.get_vocab_info(self.vocab_file.name)
46 | self.assertEqual(vocab_info.vocab_size, 3)
47 | self.assertEqual(vocab_info.path, self.vocab_file.name)
48 | self.assertEqual(vocab_info.special_vocab.UNK, 3)
49 | self.assertEqual(vocab_info.special_vocab.SEQUENCE_START, 4)
50 | self.assertEqual(vocab_info.special_vocab.SEQUENCE_END, 5)
51 | self.assertEqual(vocab_info.total_size, 6)
52 |
53 |
54 | class CreateVocabularyLookupTableTest(tf.test.TestCase):
55 | """
56 | Tests Vocabulary lookup table operations.
57 | """
58 |
59 | def test_without_counts(self):
60 | vocab_list = ["Hello", ".", "笑"]
61 | vocab_file = test_utils.create_temporary_vocab_file(vocab_list)
62 |
63 | vocab_to_id_table, id_to_vocab_table, _, vocab_size = \
64 | vocab.create_vocabulary_lookup_table(vocab_file.name)
65 |
66 | self.assertEqual(vocab_size, 6)
67 |
68 | with self.test_session() as sess:
69 | sess.run(tf.global_variables_initializer())
70 | sess.run(tf.local_variables_initializer())
71 | sess.run(tf.tables_initializer())
72 |
73 | ids = vocab_to_id_table.lookup(
74 | tf.convert_to_tensor(["Hello", ".", "笑", "??", "xxx"]))
75 | ids = sess.run(ids)
76 | np.testing.assert_array_equal(ids, [0, 1, 2, 3, 3])
77 |
78 | words = id_to_vocab_table.lookup(
79 | tf.convert_to_tensor(
80 | [0, 1, 2, 3], dtype=tf.int64))
81 | words = sess.run(words)
82 | np.testing.assert_array_equal(
83 | np.char.decode(words.astype("S"), "utf-8"),
84 | ["Hello", ".", "笑", "UNK"])
85 |
86 | def test_with_counts(self):
87 | vocab_list = ["Hello", ".", "笑"]
88 | vocab_counts = [100, 200, 300]
89 | vocab_file = test_utils.create_temporary_vocab_file(vocab_list,
90 | vocab_counts)
91 |
92 | vocab_to_id_table, id_to_vocab_table, word_to_count_table, vocab_size = \
93 | vocab.create_vocabulary_lookup_table(vocab_file.name)
94 |
95 | self.assertEqual(vocab_size, 6)
96 |
97 | with self.test_session() as sess:
98 | sess.run(tf.global_variables_initializer())
99 | sess.run(tf.local_variables_initializer())
100 | sess.run(tf.tables_initializer())
101 |
102 | ids = vocab_to_id_table.lookup(
103 | tf.convert_to_tensor(["Hello", ".", "笑", "??", "xxx"]))
104 | ids = sess.run(ids)
105 | np.testing.assert_array_equal(ids, [0, 1, 2, 3, 3])
106 |
107 | words = id_to_vocab_table.lookup(
108 | tf.convert_to_tensor(
109 | [0, 1, 2, 3], dtype=tf.int64))
110 | words = sess.run(words)
111 | np.testing.assert_array_equal(
112 | np.char.decode(words.astype("S"), "utf-8"),
113 | ["Hello", ".", "笑", "UNK"])
114 |
115 | counts = word_to_count_table.lookup(
116 | tf.convert_to_tensor(["Hello", ".", "笑", "??", "xxx"]))
117 | counts = sess.run(counts)
118 | np.testing.assert_array_equal(counts, [100, 200, 300, -1, -1])
119 |
120 |
121 | if __name__ == "__main__":
122 | tf.test.main()
123 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/seq2seq/training/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Operatations and wrappers to help with model training.
15 | """
16 |
17 | from seq2seq.training import hooks
18 | from seq2seq.training import utils
19 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/trainer_with_copy_net.py:
--------------------------------------------------------------------------------
1 | from Summary_Generator.Tensorflow_Graph import order_planner_with_copynet
2 | from Summary_Generator.Text_Preprocessing_Helpers.pickling_tools import *
3 | from Summary_Generator.Tensorflow_Graph.utils import *
4 | from Summary_Generator.Model import *
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | # random_seed value for consistent debuggable behaviour
9 | seed_value = 3
10 |
11 | np.random.seed(seed_value) # set this seed for a device independant consistent behaviour
12 |
13 | ''' Set the constants for the script '''
14 | # various paths of the files
15 | data_path = "../Data" # the data path
16 |
17 | data_files_paths = {
18 | "table_content": os.path.join(data_path, "train.box"),
19 | "nb_sentences" : os.path.join(data_path, "train.nb"),
20 | "train_sentences": os.path.join(data_path, "train.sent")
21 | }
22 |
23 | base_model_path = "Models"
24 | plug_and_play_data_file = os.path.join(data_path, "plug_and_play.pickle")
25 |
26 |
27 |
28 |
29 |
30 | ''' Name of the model: '''
31 | # This can be changed to create new models in the directory
32 | model_name = "Model_2(with_copy_net)"
33 |
34 | '''
35 | =========================================================================================================
36 | || All Tweakable hyper-parameters
37 | =========================================================================================================
38 | '''
39 | # constants for this script
40 | train_percentage = 99
41 | batch_size = 2
42 | checkpoint_factor = 100
43 | learning_rate = 3e-4 # for learning rate -> https://twitter.com/karpathy/status/801621764144971776?lang=en
44 | # I know the tweet was a joke, but I have noticed that this learning rate works quite well.
45 |
46 | # Memory usage fraction:
47 | gpu_memory_usage_fraction = 0.2
48 |
49 | no_of_epochs = 500
50 |
51 | # Embeddings size:
52 | field_embedding_size = 200
53 | content_label_embedding_size = 400 # This is a much bigger vocabulary compared to the field_name's vocabulary
54 |
55 | # LSTM hidden state sizes
56 | lstm_cell_state_size = hidden_state_size = 500 # they are same (for now)
57 | '''
58 | =========================================================================================================
59 | '''
60 |
61 |
62 |
63 |
64 | ''' Extract and setup the data '''
65 | # Obtain the data:\
66 | print("Unpickling the data from the disc ...")
67 | data = unPickleIt(plug_and_play_data_file)
68 |
69 | field_encodings = data['field_encodings']
70 | field_dict = data['field_dict']
71 |
72 | content_encodings = data['content_encodings']
73 |
74 | label_encodings = data['label_encodings']
75 | content_label_dict = data['content_union_label_dict']
76 | rev_content_label_dict = data['rev_content_union_label_dict']
77 |
78 | # vocabulary sizes
79 | field_vocab_size = data['field_vocab_size']
80 | content_label_vocab_size = data['content_label_vocab_size']
81 |
82 |
83 | X, Y = synch_random_shuffle_non_np(zip(field_encodings, content_encodings), label_encodings)
84 |
85 | train_X, train_Y, dev_X, dev_Y = split_train_dev(X, Y, train_percentage)
86 | train_X_field, train_X_content = zip(*train_X)
87 | train_X_field = list(train_X_field); train_X_content = list(train_X_content)
88 |
89 | # Free up the resources by deleting non required stuff
90 | del X, Y, field_encodings, content_encodings, train_X
91 | print("\nTotal_training_examples:", len(train_X_field))
92 |
93 |
94 |
95 |
96 | ''' Obtain the TensorFlow graph of the order_planner_without_copynet Network'''
97 | # just execute the get_computation_graph function here:
98 | graph, interface_dict = order_planner_with_copynet.get_computation_graph (
99 | seed_value = seed_value,
100 |
101 | # vocabulary sizes
102 | field_vocab_size = field_vocab_size,
103 | content_label_vocab_size = content_label_vocab_size,
104 |
105 | # Embeddings size:
106 | field_embedding_size = field_embedding_size,
107 | content_label_embedding_size = content_label_embedding_size,
108 |
109 | # LSTM hidden state sizes
110 | lstm_cell_state_size = lstm_cell_state_size,
111 | hidden_state_size = hidden_state_size, # they are same (for now)
112 | rev_content_label_dict = rev_content_label_dict
113 | )
114 |
115 | ''' Start the Training of the data '''
116 | # Create the model and start the training on it
117 | model_path = os.path.join(base_model_path, model_name)
118 | model = Model(graph, interface_dict, tf.train.AdamOptimizer(learning_rate), field_dict, content_label_dict)
119 | model.train((train_X_field, train_X_content), train_Y, batch_size, no_of_epochs, checkpoint_factor, model_path, model_name, mem_fraction=gpu_memory_usage_fraction)
120 |
--------------------------------------------------------------------------------
/TensorFlow_implementation/trainer_without_copy_net.py:
--------------------------------------------------------------------------------
1 | from Summary_Generator.Tensorflow_Graph import order_planner_without_copynet
2 | from Summary_Generator.Text_Preprocessing_Helpers.pickling_tools import *
3 | from Summary_Generator.Tensorflow_Graph.utils import *
4 | from Summary_Generator.Model import *
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | # random_seed value for consistent debuggable behaviour
9 | seed_value = 3
10 |
11 | np.random.seed(seed_value) # set this seed for a device independant consistent behaviour
12 |
13 | ''' Set the constants for the script '''
14 | # various paths of the files
15 | data_path = "../Data" # the data path
16 |
17 | data_files_paths = {
18 | "table_content": os.path.join(data_path, "train.box"),
19 | "nb_sentences" : os.path.join(data_path, "train.nb"),
20 | "train_sentences": os.path.join(data_path, "train.sent")
21 | }
22 |
23 | base_model_path = "Models"
24 | plug_and_play_data_file = os.path.join(data_path, "plug_and_play.pickle")
25 |
26 |
27 |
28 |
29 |
30 | ''' Name of the model: '''
31 | # This can be changed to create new models in the directory
32 | model_name = "Model_1(without_copy_net)"
33 |
34 | '''
35 | =========================================================================================================
36 | || All Tweakable hyper-parameters
37 | =========================================================================================================
38 | '''
39 | # constants for this script
40 | no_of_epochs = 500
41 | train_percentage = 100
42 | batch_size = 2
43 | checkpoint_factor = 100
44 | learning_rate = 3e-4 # for learning rate -> https://twitter.com/karpathy/status/801621764144971776?lang=en
45 | # I know the tweet was a joke, but I have noticed that this learning rate works quite well.
46 | momentum = 0.9
47 |
48 | # Memory usage fraction:
49 | gpu_memory_usage_fraction = 1
50 |
51 | # Embeddings size:
52 | field_embedding_size = 100
53 | content_label_embedding_size = 400 # This is a much bigger vocabulary compared to the field_name's vocabulary
54 |
55 | # LSTM hidden state sizes
56 | lstm_cell_state_size = hidden_state_size = 500 # they are same (for now)
57 | '''
58 | =========================================================================================================
59 | '''
60 |
61 |
62 |
63 |
64 |
65 | ''' Extract and setup the data '''
66 | # Obtain the data:
67 | print("unpickling the data from the disc ...")
68 | data = unPickleIt(plug_and_play_data_file)
69 |
70 | field_encodings = data['field_encodings']
71 | field_dict = data['field_dict']
72 |
73 | content_encodings = data['content_encodings']
74 |
75 | label_encodings = data['label_encodings']
76 | content_label_dict = data['content_union_label_dict']
77 | rev_content_label_dict = data['rev_content_union_label_dict']
78 |
79 | # vocabulary sizes
80 | field_vocab_size = data['field_vocab_size']
81 | content_label_vocab_size = data['content_label_vocab_size']
82 |
83 |
84 | X, Y = synch_random_shuffle_non_np(list(zip(field_encodings, content_encodings)), label_encodings)
85 |
86 | train_X, train_Y, dev_X, dev_Y = split_train_dev(X, Y, train_percentage)
87 | train_X_field, train_X_content = zip(*train_X)
88 | train_X_field = list(train_X_field); train_X_content = list(train_X_content)
89 |
90 | # Free up the resources by deleting non required stuff
91 | del X, Y, field_encodings, content_encodings, train_X
92 |
93 |
94 |
95 |
96 |
97 |
98 | ''' Obtain the TensorFlow graph of the order_planner_without_copynet Network'''
99 | # just execute the get_computation_graph function here:
100 | graph, interface_dict = order_planner_without_copynet.get_computation_graph (
101 | seed_value = seed_value,
102 |
103 | # vocabulary sizes
104 | field_vocab_size = field_vocab_size,
105 | content_label_vocab_size = content_label_vocab_size,
106 |
107 | # Embeddings size:
108 | field_embedding_size = field_embedding_size,
109 | content_label_embedding_size = content_label_embedding_size,
110 |
111 | # LSTM hidden state sizes
112 | lstm_cell_state_size = lstm_cell_state_size,
113 | hidden_state_size = hidden_state_size, # they are same (for now)
114 | rev_content_label_dict = rev_content_label_dict
115 | )
116 |
117 | ''' Start the Training of the data '''
118 | # Create the model and start the training on it
119 | model_path = os.path.join(base_model_path, model_name)
120 | model = Model(graph, interface_dict, tf.train.MomentumOptimizer(learning_rate, momentum), field_dict, content_label_dict)
121 | #model = Model(graph, interface_dict, tf.train.AdamOptimizer(learning_rate, momentum), field_dict, content_label_dict)
122 | #model.train((train_X_field, train_X_content), train_Y, batch_size, no_of_epochs, checkpoint_factor, model_path, model_name, mem_fraction=gpu_memory_usage_fraction)
123 | model.train((train_X_field, train_X_content), train_Y, batch_size, no_of_epochs, checkpoint_factor, model_path, model_name, mem_fraction=gpu_memory_usage_fraction)
124 |
--------------------------------------------------------------------------------
/Visualizations/first_run_of_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/Visualizations/first_run_of_both.png
--------------------------------------------------------------------------------
/Visualizations/projector_pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/Visualizations/projector_pic.png
--------------------------------------------------------------------------------
/architecture_diagram.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akanimax/natural-language-summary-generation-from-structured-data/b8cc906286f97e8523acc8306945c34a4f8ef17c/architecture_diagram.jpeg
--------------------------------------------------------------------------------