├── .gitignore
├── LICENSE
├── README.md
├── notes
    └── UsefulLinks.md
├── requirements.txt
├── setup.cfg
├── setup.py
├── translator_train
    ├── data
    │   ├── cmn.txt
    │   └── fra.txt
    ├── eng_to_cmn_char_translator_train.py
    ├── eng_to_cmn_word_translator_train.py
    ├── eng_to_cnm_glove_translator_train.py
    ├── eng_to_fra_char_translator_train.py
    ├── eng_to_fra_glove_translator_train.py
    ├── eng_to_fra_word_translator_train.py
    ├── models
    │   ├── eng-to-cmn
    │   │   ├── eng-to-cmn-char-architecture.json
    │   │   ├── eng-to-cmn-char-context.npy
    │   │   ├── eng-to-cmn-char-input-char2idx.npy
    │   │   ├── eng-to-cmn-char-input-idx2char.npy
    │   │   ├── eng-to-cmn-char-target-char2idx.npy
    │   │   ├── eng-to-cmn-char-target-idx2char.npy
    │   │   ├── eng-to-cmn-char-weights.h5
    │   │   ├── eng-to-cmn-glove-architecture.json
    │   │   ├── eng-to-cmn-glove-context.npy
    │   │   ├── eng-to-cmn-glove-target-idx2word.npy
    │   │   ├── eng-to-cmn-glove-target-word2idx.npy
    │   │   ├── eng-to-cmn-glove-unknown-emb.npy
    │   │   ├── eng-to-cmn-glove-weights.h5
    │   │   ├── eng-to-cmn-word-architecture.json
    │   │   ├── eng-to-cmn-word-context.npy
    │   │   ├── eng-to-cmn-word-input-idx2word.npy
    │   │   ├── eng-to-cmn-word-input-word2idx.npy
    │   │   ├── eng-to-cmn-word-target-idx2word.npy
    │   │   ├── eng-to-cmn-word-target-word2idx.npy
    │   │   └── eng-to-cmn-word-weights.h5
    │   └── eng-to-fra
    │   │   ├── eng-to-fra-char-architecture.json
    │   │   ├── eng-to-fra-char-context.npy
    │   │   ├── eng-to-fra-char-input-char2idx.npy
    │   │   ├── eng-to-fra-char-input-idx2char.npy
    │   │   ├── eng-to-fra-char-target-char2idx.npy
    │   │   ├── eng-to-fra-char-target-idx2char.npy
    │   │   ├── eng-to-fra-char-weights.h5
    │   │   ├── eng-to-fra-glove-architecture.json
    │   │   ├── eng-to-fra-glove-context.npy
    │   │   ├── eng-to-fra-glove-target-idx2word.npy
    │   │   ├── eng-to-fra-glove-target-word2idx.npy
    │   │   ├── eng-to-fra-glove-unknown-emb.npy
    │   │   ├── eng-to-fra-glove-weights.h5
    │   │   ├── eng-to-fra-word-architecture.json
    │   │   ├── eng-to-fra-word-context.npy
    │   │   ├── eng-to-fra-word-input-idx2word.npy
    │   │   ├── eng-to-fra-word-input-word2idx.npy
    │   │   ├── eng-to-fra-word-target-idx2word.npy
    │   │   ├── eng-to-fra-word-target-word2idx.npy
    │   │   └── eng-to-fra-word-weights.h5
    └── worksheet.py
└── translator_web
    ├── __init__.py
    ├── eng_to_cmn_char_translator_predict.py
    ├── eng_to_cmn_glove_translator_predict.py
    ├── eng_to_cmn_word_translator_predict.py
    ├── eng_to_fra_char_translator_predict.py
    ├── eng_to_fra_glove_translator_predict.py
    ├── eng_to_fra_word_translator_predict.py
    ├── flaskr.py
    ├── static
        └── style.css
    └── templates
        ├── eng_to_cmn_char_translator.html
        ├── eng_to_cmn_char_translator_result.html
        ├── eng_to_cmn_word_glove_translator.html
        ├── eng_to_cmn_word_glove_translator_result.html
        ├── eng_to_cmn_word_translator.html
        ├── eng_to_cmn_word_translator_result.html
        ├── eng_to_fra_char_translator.html
        ├── eng_to_fra_char_translator_result.html
        ├── eng_to_fra_word_glove_translator.html
        ├── eng_to_fra_word_glove_translator_result.html
        ├── eng_to_fra_word_translator.html
        ├── eng_to_fra_word_translator_result.html
        ├── home.html
        └── layout.html


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | .idea/
 10 | *.iml
 11 | 
 12 | Thumbs.db
 13 | 
 14 | translator_web/uploads
 15 | translator_train/very_large_data
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | env/
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # dotenv
 91 | .env
 92 | 
 93 | # virtualenv
 94 | .venv
 95 | venv/
 96 | ENV/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Xianshun Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # keras-language-translator-web-api
  2 | 
  3 | A simple language translator implemented in Keras with Flask serving web
  4 | 
  5 | The language translator is built based on seq2seq models, and can infer based on either character-level or word-level. 
  6 | 
  7 | The seq2seq model is implemented using LSTM encoder-decoder on Keras. 
  8 | 
  9 | # Usage
 10 | 
 11 | Run the following command to install the keras, flask and other dependency modules:
 12 | 
 13 | ```bash
 14 | sudo pip install -r requirements.txt
 15 | ```
 16 | 
 17 | The translator models are trained using eng-to-french and eng-to-chinese data set and are available in the 
 18 | translator_train/models directory. During runtime, the flask app will load these trained models to perform the 
 19 | translation.
 20 | 
 21 | Currently only the eng-to-chinese and eng-to-french translations models are provided as examples, you can
 22 | go to [http://www.manythings.org/anki/](http://www.manythings.org/anki/) to download more datasets for the translator
 23 | training and use the scripts in the translator_train to generate new seq2seq for other language translation
 24 | 
 25 | ## Training (Optional)
 26 | 
 27 | As the trained models are already included in the "translator_train/models" folder in the project, the training is
 28 | not required. However, if you like to tune the parameters of the seq2seq and retrain the models, you can use the 
 29 | following command to run the training:
 30 | 
 31 | ```bash
 32 | cd translator_train
 33 | python eng_to_cmn_char_seq2seq_train.py
 34 | ```
 35 | 
 36 | The above commands will train seq2seq model using eng-to-chinese dataset on the character-level and store the trained model
 37 | in "translator_train/models/eng-to-cmn/eng-to-cmn-char-**"
 38 | 
 39 | If you like to train other models, you can use the same command above on another train python scripts:
 40 | 
 41 | * eng_to_cmn_word_translator_train.py: train on eng-to-chinese on word-level (one hot encoding)
 42 | * eng_to_cmn_glove_translator_train.py: train on eng-to-chinese on word-level (GloVe encoding)
 43 | * eng_to_fra_char_translator_train.py: train on eng-to-french on character-level
 44 | * eng_to_fra_word_translator_train.py: train on eng-to-french on word-level (one hot encoding)
 45 | * eng_to_fra_glove_translator_train.py: train on eng-to-french on word-level (GloVe encoding)
 46 | 
 47 | ## Running Web Api Server
 48 | 
 49 | Goto translator_web directory and run the following command:
 50 | 
 51 | ```bash
 52 | python flaskr.py
 53 | ```
 54 | 
 55 | Now navigate your browser to http://localhost:5000 and you can try out various predictors built with the following
 56 | trained seq2seq models:
 57 | 
 58 | * Character-level seq2seq models
 59 | * Word-level seq2seq models (one hot encoding)
 60 | * Word-level seq2seq models (GloVe encoding)
 61 | 
 62 | ## Invoke Web Api
 63 | 
 64 | To translate an english sentence to other languages using web api, after the flask server is started, run the following curl POST query
 65 | in your terminal:
 66 | 
 67 | ```bash
 68 | curl -H 'Content-Type application/json' -X POST -d '{"level":"level_type", "sentence":"your_sentence_here", "target_lang":"target_language"}' http://localhost:5000/translate_eng
 69 | ```
 70 | 
 71 | The level_type can be "char" or "word", the target_lang can be "chinese" or "french"
 72 | 
 73 | (Note that same results can be obtained by running a curl GET query to http://localhost:5000/translate_eng?sentence=your_sentence_here&level=level_type&target_lang=target_language)
 74 | 
 75 | For example, you can translate the sentence "Be nice." by running the following command:
 76 | 
 77 | ```bash
 78 | curl -H 'Content-Type: application/json' -X POST -d '{"level":"word", "sentence":"Be nice.", "target_lang":"chinese"}' http://localhost:5000/translate_eng
 79 | ```
 80 | 
 81 | And the following will be the json response:
 82 | 
 83 | ```json
 84 | {
 85 |     "level": "word",
 86 |     "sentence": "Be nice.",
 87 |     "target_lang": "chinese",
 88 |     "translated": "和气点。"
 89 | }
 90 | ```
 91 | 
 92 | Here are some examples for eng translation using some other configuration options:
 93 | 
 94 | ```bash
 95 | curl -H 'Content-Type: application/json' -X POST -d '{"level":"char", "sentence":"Be nice.", "target_lang":"chinese"}' http://localhost:5000/translate_eng
 96 | curl -H 'Content-Type: application/json' -X POST -d '{"level":"word-glove", "sentence":"Be nice.", "target_lang":"chinese"}' http://localhost:5000/translate_eng
 97 | curl -H 'Content-Type: application/json' -X POST -d '{"level":"word", "sentence":"Be nice.", "target_lang":"french"}' http://localhost:5000/translate_eng
 98 | curl -H 'Content-Type: application/json' -X POST -d '{"level":"word-glove", "sentence":"Be nice.", "target_lang":"french"}' http://localhost:5000/translate_eng
 99 | curl -H 'Content-Type: application/json' -X POST -d '{"level":"char", "sentence":"Be nice.", "target_lang":"french"}' http://localhost:5000/translate_eng
100 | ```
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/notes/UsefulLinks.md:
--------------------------------------------------------------------------------
1 | # Tutorial on character-cased seq2seq keras models for language translation
2 | 
3 | * [https://blog.keras.io/category/tutorials.html](https://blog.keras.io/category/tutorials.html)
4 | 
5 | # Language Translation DataSet
6 | 
7 | * [http://www.manythings.org/anki/](http://www.manythings.org/anki/)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask == 0.12.2
2 | gevent
3 | keras
4 | numpy
5 | nltk
6 | h5py
7 | pillow
8 | https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='translator_web',
 5 |     packages=['translator_web'],
 6 |     include_package_data=True,
 7 |     install_requires=[
 8 |         'flask',
 9 |         'keras',
10 |         'sklearn'
11 |     ],
12 |     setup_requires=[
13 |         'pytest-runner',
14 |     ],
15 |     tests_require=[
16 |         'pytest',
17 |     ],
18 | )


--------------------------------------------------------------------------------
/translator_train/eng_to_cmn_char_translator_train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from keras.models import Model
  3 | from keras.layers import Input, LSTM, Dense
  4 | import numpy as np
  5 | from keras.callbacks import ModelCheckpoint
  6 | 
  7 | BATCH_SIZE = 64
  8 | NUM_EPOCHS = 100
  9 | HIDDEN_UNITS = 256
 10 | NUM_SAMPLES = 10000
 11 | DATA_PATH = 'data/cmn.txt'
 12 | WEIGHT_FILE_PATH = 'models/eng-to-cmn/eng-to-cmn-char-weights.h5'
 13 | ARCHITECTURE_FILE_PATH = 'models/eng-to-cmn/eng-to-cmn-char-architecture.json'
 14 | 
 15 | input_texts = []
 16 | target_texts = []
 17 | input_characters = set()
 18 | target_characters = set()
 19 | 
 20 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 21 | 
 22 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 23 |     input_text, target_text = line.split('\t')
 24 |     target_text = '\t' + target_text + '\n'
 25 |     input_texts.append(input_text)
 26 |     target_texts.append(target_text)
 27 |     for char in input_text:
 28 |         if char not in input_characters:
 29 |             input_characters.add(char)
 30 |     for char in target_text:
 31 |         if char not in target_characters:
 32 |             target_characters.add(char)
 33 | 
 34 | input_characters = sorted(list(input_characters))
 35 | target_characters = sorted(list(target_characters))
 36 | num_encoder_tokens = len(input_characters)
 37 | num_decoder_tokens = len(target_characters)
 38 | max_encoder_seq_length = max([len(txt) for txt in input_texts])
 39 | max_decoder_seq_length = max([len(txt) for txt in target_texts])
 40 | 
 41 | input_char2idx = dict([(char, i) for i, char in enumerate(input_characters)])
 42 | input_idx2char = dict([(i, char) for i, char in enumerate(input_characters)])
 43 | target_char2idx = dict([(char, i) for i, char in enumerate(target_characters)])
 44 | target_idx2char = dict([(i, char) for i, char in enumerate(target_characters)])
 45 | 
 46 | np.save('models/eng-to-cmn/eng-to-cmn-char-input-char2idx.npy', input_char2idx)
 47 | np.save('models/eng-to-cmn/eng-to-cmn-char-target-char2idx.npy', target_char2idx)
 48 | np.save('models/eng-to-cmn/eng-to-cmn-char-input-idx2char.npy', input_idx2char)
 49 | np.save('models/eng-to-cmn/eng-to-cmn-char-target-idx2char.npy', target_idx2char)
 50 | 
 51 | context = dict()
 52 | context['max_encoder_seq_length'] = max_encoder_seq_length
 53 | context['max_decoder_seq_length'] = max_decoder_seq_length
 54 | context['num_encoder_tokens'] = num_encoder_tokens
 55 | context['num_decoder_tokens'] = num_decoder_tokens
 56 | 
 57 | np.save('models/eng-to-cmn/eng-to-cmn-char-context.npy', context)
 58 | 
 59 | encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
 60 | decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
 61 | decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
 62 | 
 63 | for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
 64 |     for t, char in enumerate(input_text):
 65 |         encoder_input_data[i, t, input_char2idx[char]] = 1
 66 |     for t, char in enumerate(target_text):
 67 |         decoder_input_data[i, t, target_char2idx[char]] = 1
 68 |         if t > 0:
 69 |             decoder_target_data[i, t-1, target_char2idx[char]] = 1
 70 | 
 71 | 
 72 | encoder_inputs = Input(shape=(None, num_encoder_tokens), name='encoder_inputs')
 73 | encoder = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
 74 | encoder_outputs, state_h, state_c = encoder(encoder_inputs)
 75 | encoder_states = [state_h, state_c]
 76 | 
 77 | decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs')
 78 | decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
 79 | decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
 80 | decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='decoder_dense')
 81 | decoder_outputs = decoder_dense(decoder_outputs)
 82 | 
 83 | model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
 84 | json = model.to_json()
 85 | open(ARCHITECTURE_FILE_PATH, 'w').write(json)
 86 | 
 87 | model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
 88 | checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
 89 | model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
 90 |           validation_split=0.2, callbacks=[checkpoint])
 91 | 
 92 | 
 93 | model.save_weights(WEIGHT_FILE_PATH)
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/translator_train/eng_to_cmn_word_translator_train.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model
  2 | from keras.callbacks import ModelCheckpoint
  3 | from keras.layers.recurrent import LSTM
  4 | from keras.layers import Dense, Input, Embedding
  5 | from keras.preprocessing.sequence import pad_sequences
  6 | from collections import Counter
  7 | import nltk
  8 | import numpy as np
  9 | 
 10 | BATCH_SIZE = 64
 11 | NUM_EPOCHS = 100
 12 | HIDDEN_UNITS = 256
 13 | NUM_SAMPLES = 10000
 14 | MAX_VOCAB_SIZE = 10000
 15 | DATA_PATH = 'data/cmn.txt'
 16 | WEIGHT_FILE_PATH = 'models/eng-to-cmn/eng-to-cmn-word-weights.h5'
 17 | ARCHITECTURE_FILE_PATH = 'models/eng-to-cmn/eng-to-cmn-word-architecture.json'
 18 | 
 19 | input_counter = Counter()
 20 | target_counter = Counter()
 21 | 
 22 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 23 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 24 |     input_text, target_text = line.split('\t')
 25 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
 26 |     target_text = '\t' + target_text + '\n'
 27 |     for w in input_words:
 28 |         input_counter[w] += 1
 29 |     for char in target_text:
 30 |         target_counter[char] += 1
 31 | 
 32 | input_word2idx = dict()
 33 | target_word2idx = dict()
 34 | for idx, word in enumerate(input_counter.most_common(MAX_VOCAB_SIZE)):
 35 |     input_word2idx[word[0]] = idx + 2
 36 | for idx, word in enumerate(target_counter.most_common(MAX_VOCAB_SIZE)):
 37 |     target_word2idx[word[0]] = idx
 38 | 
 39 | input_word2idx['PAD'] = 0
 40 | input_word2idx['UNK'] = 1
 41 | 
 42 | input_idx2word = dict([(idx, word) for word, idx in input_word2idx.items()])
 43 | target_idx2word = dict([(idx, word) for word, idx in target_word2idx.items()])
 44 | 
 45 | num_encoder_tokens = len(input_idx2word)
 46 | num_decoder_tokens = len(target_idx2word)
 47 | 
 48 | np.save('models/eng-to-cmn/eng-to-cmn-word-input-word2idx.npy', input_word2idx)
 49 | np.save('models/eng-to-cmn/eng-to-cmn-word-input-idx2word.npy', input_idx2word)
 50 | np.save('models/eng-to-cmn/eng-to-cmn-word-target-word2idx.npy', target_word2idx)
 51 | np.save('models/eng-to-cmn/eng-to-cmn-word-target-idx2word.npy', target_idx2word)
 52 | 
 53 | encoder_input_data = []
 54 | 
 55 | encoder_max_seq_length = 0
 56 | decoder_max_seq_length = 0
 57 | 
 58 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 59 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 60 |     input_text, target_text = line.split('\t')
 61 |     target_text = '\t' + target_text + '\n'
 62 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
 63 |     encoder_input_wids = []
 64 |     for w in input_words:
 65 |         w2idx = 1  # default [UNK]
 66 |         if w in input_word2idx:
 67 |             w2idx = input_word2idx[w]
 68 |         encoder_input_wids.append(w2idx)
 69 | 
 70 |     encoder_input_data.append(encoder_input_wids)
 71 |     encoder_max_seq_length = max(len(encoder_input_wids), encoder_max_seq_length)
 72 |     decoder_max_seq_length = max(len(target_text), decoder_max_seq_length)
 73 | 
 74 | encoder_input_data = pad_sequences(encoder_input_data, encoder_max_seq_length)
 75 | 
 76 | decoder_target_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
 77 | decoder_input_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
 78 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 79 | for lineIdx, line in enumerate(lines[: min(NUM_SAMPLES, len(lines)-1)]):
 80 |     _, target_text = line.split('\t')
 81 |     target_text = '\t' + target_text + '\n'
 82 |     for idx, char in enumerate(target_text):
 83 |         if char in target_word2idx:
 84 |             w2idx = target_word2idx[char]
 85 |             decoder_input_data[lineIdx, idx, w2idx] = 1
 86 |             if idx > 0:
 87 |                 decoder_target_data[lineIdx, idx-1, w2idx] = 1
 88 | 
 89 | context = dict()
 90 | context['num_encoder_tokens'] = num_encoder_tokens
 91 | context['num_decoder_tokens'] = num_decoder_tokens
 92 | context['encoder_max_seq_length'] = encoder_max_seq_length
 93 | context['decoder_max_seq_length'] = decoder_max_seq_length
 94 | 
 95 | np.save('models/eng-to-cmn/eng-to-cmn-word-context.npy', context)
 96 | 
 97 | encoder_inputs = Input(shape=(None, ), name='encoder_inputs')
 98 | encoder_embedding = Embedding(input_dim=num_encoder_tokens, output_dim=HIDDEN_UNITS,
 99 |                               input_length=encoder_max_seq_length, name='encoder_embedding')
100 | encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name='encoder_lstm')
101 | encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_embedding(encoder_inputs))
102 | encoder_states = [encoder_state_h, encoder_state_c]
103 | 
104 | decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs')
105 | decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm')
106 | decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
107 |                                                                  initial_state=encoder_states)
108 | decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense')
109 | decoder_outputs = decoder_dense(decoder_outputs)
110 | 
111 | model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
112 | 
113 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
114 | 
115 | 
116 | json = model.to_json()
117 | open(ARCHITECTURE_FILE_PATH, 'w').write(json)
118 | 
119 | checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
120 | model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
121 |           verbose=1, validation_split=0.2, callbacks=[checkpoint])
122 | 
123 | model.save_weights(WEIGHT_FILE_PATH)
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/translator_train/eng_to_cnm_glove_translator_train.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model
  2 | from keras.layers.recurrent import LSTM
  3 | from keras.layers import Dense, Input, Embedding
  4 | from keras.preprocessing.sequence import pad_sequences
  5 | from collections import Counter
  6 | from keras.callbacks import ModelCheckpoint
  7 | import nltk
  8 | import numpy as np
  9 | import os
 10 | import zipfile
 11 | import sys
 12 | import urllib.request
 13 | 
 14 | BATCH_SIZE = 64
 15 | NUM_EPOCHS = 100
 16 | HIDDEN_UNITS = 256
 17 | NUM_SAMPLES = 10000
 18 | MAX_VOCAB_SIZE = 10000
 19 | GLOVE_EMBEDDING_SIZE = 100
 20 | DATA_PATH = 'data/cmn.txt'
 21 | 
 22 | target_counter = Counter()
 23 | 
 24 | GLOVE_MODEL = "very_large_data/glove.6B." + str(GLOVE_EMBEDDING_SIZE) + "d.txt"
 25 | WHITELIST = 'abcdefghijklmnopqrstuvwxyz1234567890?.,'
 26 | WEIGHT_FILE_PATH = 'models/eng-to-cmn/eng-to-cmn-glove-weights.h5'
 27 | ARCHITECTURE_FILE_PATH = 'models/eng-to-cmn/eng-to-cmn-glove-architecture.json'
 28 | 
 29 | def in_white_list(_word):
 30 |     for char in _word:
 31 |         if char in WHITELIST:
 32 |             return True
 33 | 
 34 |     return False
 35 | 
 36 | 
 37 | def reporthook(block_num, block_size, total_size):
 38 |     read_so_far = block_num * block_size
 39 |     if total_size > 0:
 40 |         percent = read_so_far * 1e2 / total_size
 41 |         s = "\r%5.1f%% %*d / %d" % (
 42 |             percent, len(str(total_size)), read_so_far, total_size)
 43 |         sys.stderr.write(s)
 44 |         if read_so_far >= total_size:  # near the end
 45 |             sys.stderr.write("\n")
 46 |     else:  # total size is unknown
 47 |         sys.stderr.write("read %d\n" % (read_so_far,))
 48 | 
 49 | 
 50 | def download_glove():
 51 |     if not os.path.exists(GLOVE_MODEL):
 52 | 
 53 |         glove_zip = 'very_large_data/glove.6B.zip'
 54 | 
 55 |         if not os.path.exists('very_large_data'):
 56 |             os.makedirs('very_large_data')
 57 | 
 58 |         if not os.path.exists(glove_zip):
 59 |             print('glove file does not exist, downloading from internet')
 60 |             urllib.request.urlretrieve(url='http://nlp.stanford.edu/data/glove.6B.zip', filename=glove_zip,
 61 |                                        reporthook=reporthook)
 62 | 
 63 |         print('unzipping glove file')
 64 |         zip_ref = zipfile.ZipFile(glove_zip, 'r')
 65 |         zip_ref.extractall('very_large_data')
 66 |         zip_ref.close()
 67 | 
 68 | 
 69 | def load_glove():
 70 |     download_glove()
 71 |     _word2em = {}
 72 |     file = open(GLOVE_MODEL, mode='rt', encoding='utf8')
 73 |     for line in file:
 74 |         words = line.strip().split()
 75 |         word = words[0]
 76 |         embeds = np.array(words[1:], dtype=np.float32)
 77 |         _word2em[word] = embeds
 78 |     file.close()
 79 |     return _word2em
 80 | 
 81 | word2em = load_glove()
 82 | 
 83 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 84 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 85 |     input_text, target_text = line.split('\t')
 86 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
 87 |     target_text = '\t' + target_text + '\n'
 88 |     for char in target_text:
 89 |         target_counter[char] += 1
 90 | 
 91 | target_word2idx = dict()
 92 | for idx, word in enumerate(target_counter.most_common(MAX_VOCAB_SIZE)):
 93 |     target_word2idx[word[0]] = idx
 94 | 
 95 | target_idx2word = dict([(idx, word) for word, idx in target_word2idx.items()])
 96 | 
 97 | num_decoder_tokens = len(target_idx2word)
 98 | 
 99 | np.save('models/eng-to-cmn/eng-to-cmn-glove-target-word2idx.npy', target_word2idx)
100 | np.save('models/eng-to-cmn/eng-to-cmn-glove-target-idx2word.npy', target_idx2word)
101 | 
102 | unknown_emb = np.random.randn(GLOVE_EMBEDDING_SIZE)
103 | 
104 | np.save('models/eng-to-cmn/eng-to-cmn-glove-unknown-emb.npy', unknown_emb)
105 | 
106 | encoder_max_seq_length = 0
107 | decoder_max_seq_length = 0
108 | 
109 | input_texts_word2em = []
110 | 
111 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
112 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
113 |     input_text, target_text = line.split('\t')
114 |     target_text = '\t' + target_text + '\n'
115 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
116 |     encoder_input_wids = []
117 |     for w in input_words:
118 |         em = unknown_emb
119 |         if w in word2em:
120 |             em = word2em[w]
121 |         encoder_input_wids.append(em)
122 | 
123 |     input_texts_word2em.append(encoder_input_wids)
124 |     encoder_max_seq_length = max(len(encoder_input_wids), encoder_max_seq_length)
125 |     decoder_max_seq_length = max(len(target_text), decoder_max_seq_length)
126 | 
127 | encoder_input_data = pad_sequences(input_texts_word2em, encoder_max_seq_length)
128 | 
129 | decoder_target_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
130 | decoder_input_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
131 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
132 | for lineIdx, line in enumerate(lines[: min(NUM_SAMPLES, len(lines)-1)]):
133 |     _, target_text = line.split('\t')
134 |     target_text = '\t' + target_text + '\n'
135 |     for idx, char in enumerate(target_text):
136 |         if char in target_word2idx:
137 |             w2idx = target_word2idx[char]
138 |             decoder_input_data[lineIdx, idx, w2idx] = 1
139 |             if idx > 0:
140 |                 decoder_target_data[lineIdx, idx-1, w2idx] = 1
141 | 
142 | context = dict()
143 | context['num_decoder_tokens'] = num_decoder_tokens
144 | context['encoder_max_seq_length'] = encoder_max_seq_length
145 | context['decoder_max_seq_length'] = decoder_max_seq_length
146 | 
147 | np.save('models/eng-to-cmn/eng-to-cmn-glove-context.npy', context)
148 | 
149 | encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs')
150 | encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name='encoder_lstm')
151 | encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
152 | encoder_states = [encoder_state_h, encoder_state_c]
153 | 
154 | decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs')
155 | decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm')
156 | decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
157 |                                                                  initial_state=encoder_states)
158 | decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense')
159 | decoder_outputs = decoder_dense(decoder_outputs)
160 | 
161 | model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
162 | 
163 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
164 | 
165 | json = model.to_json()
166 | open(ARCHITECTURE_FILE_PATH, 'w').write(json)
167 | 
168 | checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
169 | model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
170 |           verbose=1, validation_split=0.2, callbacks=[checkpoint])
171 | 
172 | model.save_weights(WEIGHT_FILE_PATH)
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/translator_train/eng_to_fra_char_translator_train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from keras.models import Model
  3 | from keras.layers import Input, LSTM, Dense
  4 | import numpy as np
  5 | from keras.callbacks import ModelCheckpoint
  6 | 
  7 | BATCH_SIZE = 64
  8 | NUM_EPOCHS = 100
  9 | HIDDEN_UNITS = 256
 10 | NUM_SAMPLES = 10000
 11 | DATA_PATH = 'data/fra.txt'
 12 | WEIGHT_FILE_PATH = 'models/eng-to-fra/eng-to-fra-char-weights.h5'
 13 | ARCHITECTURE_FILE_PATH = 'models/eng-to-fra/eng-to-fra-char-architecture.json'
 14 | 
 15 | input_texts = []
 16 | target_texts = []
 17 | input_characters = set()
 18 | target_characters = set()
 19 | 
 20 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 21 | 
 22 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 23 |     input_text, target_text = line.split('\t')
 24 |     target_text = '\t' + target_text + '\n'
 25 |     input_texts.append(input_text)
 26 |     target_texts.append(target_text)
 27 |     for char in input_text:
 28 |         if char not in input_characters:
 29 |             input_characters.add(char)
 30 |     for char in target_text:
 31 |         if char not in target_characters:
 32 |             target_characters.add(char)
 33 | 
 34 | input_characters = sorted(list(input_characters))
 35 | target_characters = sorted(list(target_characters))
 36 | num_encoder_tokens = len(input_characters)
 37 | num_decoder_tokens = len(target_characters)
 38 | max_encoder_seq_length = max([len(txt) for txt in input_texts])
 39 | max_decoder_seq_length = max([len(txt) for txt in target_texts])
 40 | 
 41 | input_char2idx = dict([(char, i) for i, char in enumerate(input_characters)])
 42 | input_idx2char = dict([(i, char) for i, char in enumerate(input_characters)])
 43 | target_char2idx = dict([(char, i) for i, char in enumerate(target_characters)])
 44 | target_idx2char = dict([(i, char) for i, char in enumerate(target_characters)])
 45 | 
 46 | np.save('models/eng-to-fra/eng-to-fra-char-input-char2idx.npy', input_char2idx)
 47 | np.save('models/eng-to-fra/eng-to-fra-char-target-char2idx.npy', target_char2idx)
 48 | np.save('models/eng-to-fra/eng-to-fra-char-input-idx2char.npy', input_idx2char)
 49 | np.save('models/eng-to-fra/eng-to-fra-char-target-idx2char.npy', target_idx2char)
 50 | 
 51 | context = dict()
 52 | context['max_encoder_seq_length'] = max_encoder_seq_length
 53 | context['max_decoder_seq_length'] = max_decoder_seq_length
 54 | context['num_encoder_tokens'] = num_encoder_tokens
 55 | context['num_decoder_tokens'] = num_decoder_tokens
 56 | 
 57 | np.save('models/eng-to-fra/eng-to-fra-char-context.npy', context)
 58 | 
 59 | encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
 60 | decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
 61 | decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
 62 | 
 63 | for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
 64 |     for t, char in enumerate(input_text):
 65 |         encoder_input_data[i, t, input_char2idx[char]] = 1
 66 |     for t, char in enumerate(target_text):
 67 |         decoder_input_data[i, t, target_char2idx[char]] = 1
 68 |         if t > 0:
 69 |             decoder_target_data[i, t-1, target_char2idx[char]] = 1
 70 | 
 71 | 
 72 | encoder_inputs = Input(shape=(None, num_encoder_tokens), name='encoder_inputs')
 73 | encoder = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
 74 | encoder_outputs, state_h, state_c = encoder(encoder_inputs)
 75 | encoder_states = [state_h, state_c]
 76 | 
 77 | decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs')
 78 | decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
 79 | decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
 80 | decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='decoder_dense')
 81 | decoder_outputs = decoder_dense(decoder_outputs)
 82 | 
 83 | model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
 84 | 
 85 | model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
 86 | 
 87 | json = model.to_json()
 88 | open(ARCHITECTURE_FILE_PATH, 'w').write(json)
 89 | 
 90 | checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
 91 | model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
 92 |           validation_split=0.2, callbacks=[checkpoint])
 93 | 
 94 | model.save_weights(WEIGHT_FILE_PATH)
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/translator_train/eng_to_fra_glove_translator_train.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model
  2 | from keras.callbacks import ModelCheckpoint
  3 | from keras.layers.recurrent import LSTM
  4 | from keras.layers import Dense, Input, Embedding
  5 | from keras.preprocessing.sequence import pad_sequences
  6 | from collections import Counter
  7 | import nltk
  8 | import numpy as np
  9 | import os
 10 | import sys
 11 | import zipfile
 12 | import urllib.request
 13 | 
 14 | BATCH_SIZE = 64
 15 | NUM_EPOCHS = 100
 16 | HIDDEN_UNITS = 256
 17 | NUM_SAMPLES = 10000
 18 | MAX_VOCAB_SIZE = 10000
 19 | GLOVE_EMBEDDING_SIZE = 100
 20 | DATA_PATH = 'data/fra.txt'
 21 | WEIGHT_FILE_PATH = 'models/eng-to-fra/eng-to-fra-glove-weights.h5'
 22 | ARCHITECTURE_FILE_PATH = 'models/eng-to-fra/eng-to-fra-glove-architecture.json'
 23 | 
 24 | target_counter = Counter()
 25 | 
 26 | GLOVE_MODEL = "very_large_data/glove.6B." + str(GLOVE_EMBEDDING_SIZE) + "d.txt"
 27 | WHITELIST = 'abcdefghijklmnopqrstuvwxyz1234567890?.,'
 28 | 
 29 | 
 30 | def in_white_list(_word):
 31 |     for char in _word:
 32 |         if char in WHITELIST:
 33 |             return True
 34 | 
 35 |     return False
 36 | 
 37 | 
 38 | def reporthook(block_num, block_size, total_size):
 39 |     read_so_far = block_num * block_size
 40 |     if total_size > 0:
 41 |         percent = read_so_far * 1e2 / total_size
 42 |         s = "\r%5.1f%% %*d / %d" % (
 43 |             percent, len(str(total_size)), read_so_far, total_size)
 44 |         sys.stderr.write(s)
 45 |         if read_so_far >= total_size:  # near the end
 46 |             sys.stderr.write("\n")
 47 |     else:  # total size is unknown
 48 |         sys.stderr.write("read %d\n" % (read_so_far,))
 49 | 
 50 | 
 51 | def download_glove():
 52 |     if not os.path.exists(GLOVE_MODEL):
 53 | 
 54 |         glove_zip = 'very_large_data/glove.6B.zip'
 55 | 
 56 |         if not os.path.exists('very_large_data'):
 57 |             os.makedirs('very_large_data')
 58 | 
 59 |         if not os.path.exists(glove_zip):
 60 |             print('glove file does not exist, downloading from internet')
 61 |             urllib.request.urlretrieve(url='http://nlp.stanford.edu/data/glove.6B.zip', filename=glove_zip,
 62 |                                        reporthook=reporthook)
 63 | 
 64 |         print('unzipping glove file')
 65 |         zip_ref = zipfile.ZipFile(glove_zip, 'r')
 66 |         zip_ref.extractall('very_large_data')
 67 |         zip_ref.close()
 68 | 
 69 | 
 70 | def load_glove():
 71 |     download_glove()
 72 |     _word2em = {}
 73 |     file = open(GLOVE_MODEL, mode='rt', encoding='utf8')
 74 |     for line in file:
 75 |         words = line.strip().split()
 76 |         word = words[0]
 77 |         embeds = np.array(words[1:], dtype=np.float32)
 78 |         _word2em[word] = embeds
 79 |     file.close()
 80 |     return _word2em
 81 | 
 82 | word2em = load_glove()
 83 | 
 84 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 85 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 86 |     input_text, target_text = line.split('\t')
 87 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
 88 |     target_text = 'START ' + target_text.lower() + ' END'
 89 |     target_words = [w for w in nltk.word_tokenize(target_text)]
 90 |     for w in target_words:
 91 |         target_counter[w] += 1
 92 | 
 93 | target_word2idx = dict()
 94 | for idx, word in enumerate(target_counter.most_common(MAX_VOCAB_SIZE)):
 95 |     target_word2idx[word[0]] = idx + 1
 96 | 
 97 | target_word2idx['UNK'] = 0
 98 | 
 99 | target_idx2word = dict([(idx, word) for word, idx in target_word2idx.items()])
100 | 
101 | num_decoder_tokens = len(target_idx2word)
102 | 
103 | np.save('models/eng-to-fra/eng-to-fra-glove-target-word2idx.npy', target_word2idx)
104 | np.save('models/eng-to-fra/eng-to-fra-glove-target-idx2word.npy', target_idx2word)
105 | 
106 | unknown_emb = np.random.randn(GLOVE_EMBEDDING_SIZE)
107 | 
108 | np.save('models/eng-to-fra/eng-to-fra-glove-unknown-emb', unknown_emb)
109 | 
110 | encoder_input_data = []
111 | 
112 | encoder_max_seq_length = 0
113 | decoder_max_seq_length = 0
114 | 
115 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
116 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
117 |     input_text, target_text = line.split('\t')
118 |     target_text = 'START ' + target_text.lower() + ' END'
119 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
120 |     target_words = [w for w in nltk.word_tokenize(target_text)]
121 |     encoder_input_emb = []
122 |     for w in input_words:
123 |         emb = unknown_emb
124 |         if w in word2em:
125 |             emb = word2em[w]
126 |         encoder_input_emb.append(emb)
127 | 
128 |     encoder_input_data.append(encoder_input_emb)
129 |     encoder_max_seq_length = max(len(encoder_input_emb), encoder_max_seq_length)
130 |     decoder_max_seq_length = max(len(target_words), decoder_max_seq_length)
131 | 
132 | encoder_input_data = pad_sequences(encoder_input_data, encoder_max_seq_length)
133 | 
134 | decoder_target_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
135 | decoder_input_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
136 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
137 | for lineIdx, line in enumerate(lines[: min(NUM_SAMPLES, len(lines)-1)]):
138 |     _, target_text = line.split('\t')
139 |     target_text = 'START ' + target_text.lower() + ' END'
140 |     target_words = [w for w in nltk.word_tokenize(target_text)]
141 |     for idx, w in enumerate(target_words):
142 |         w2idx = 0  # default [UNK]
143 |         if w in target_word2idx:
144 |             w2idx = target_word2idx[w]
145 |         decoder_input_data[lineIdx, idx, w2idx] = 1
146 |         if idx > 0:
147 |             decoder_target_data[lineIdx, idx-1, w2idx] = 1
148 | 
149 | context = dict()
150 | context['num_decoder_tokens'] = num_decoder_tokens
151 | context['encoder_max_seq_length'] = encoder_max_seq_length
152 | context['decoder_max_seq_length'] = decoder_max_seq_length
153 | 
154 | np.save('models/eng-to-fra/eng-to-fra-glove-context.npy', context)
155 | 
156 | encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs')
157 | encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name='encoder_lstm')
158 | encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
159 | encoder_states = [encoder_state_h, encoder_state_c]
160 | 
161 | decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs')
162 | decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm')
163 | decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
164 |                                                                  initial_state=encoder_states)
165 | decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense')
166 | decoder_outputs = decoder_dense(decoder_outputs)
167 | 
168 | model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
169 | 
170 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
171 | 
172 | json = model.to_json()
173 | open(ARCHITECTURE_FILE_PATH, 'w').write(json)
174 | 
175 | checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
176 | model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
177 |           verbose=1, validation_split=0.2, callbacks=[checkpoint])
178 | 
179 | model.save_weights(WEIGHT_FILE_PATH)
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/translator_train/eng_to_fra_word_translator_train.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model
  2 | from keras.callbacks import ModelCheckpoint
  3 | from keras.layers.recurrent import LSTM
  4 | from keras.layers import Dense, Input, Embedding
  5 | from keras.preprocessing.sequence import pad_sequences
  6 | from collections import Counter
  7 | import nltk
  8 | import numpy as np
  9 | 
 10 | BATCH_SIZE = 64
 11 | NUM_EPOCHS = 100
 12 | HIDDEN_UNITS = 256
 13 | NUM_SAMPLES = 10000
 14 | MAX_VOCAB_SIZE = 10000
 15 | DATA_PATH = 'data/fra.txt'
 16 | WEIGHT_FILE_PATH = 'models/eng-to-fra/eng-to-fra-word-weights.h5'
 17 | ARCHITECTURE_FILE_PATH = 'models/eng-to-fra/eng-to-fra-word-architecture.json'
 18 | 
 19 | input_counter = Counter()
 20 | target_counter = Counter()
 21 | 
 22 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 23 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 24 |     input_text, target_text = line.split('\t')
 25 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
 26 |     target_text = 'START ' + target_text.lower() + ' END'
 27 |     target_words = [w for w in nltk.word_tokenize(target_text)]
 28 |     for w in input_words:
 29 |         input_counter[w] += 1
 30 |     for w in target_words:
 31 |         target_counter[w] += 1
 32 | 
 33 | input_word2idx = dict()
 34 | target_word2idx = dict()
 35 | for idx, word in enumerate(input_counter.most_common(MAX_VOCAB_SIZE)):
 36 |     input_word2idx[word[0]] = idx + 2
 37 | for idx, word in enumerate(target_counter.most_common(MAX_VOCAB_SIZE)):
 38 |     target_word2idx[word[0]] = idx + 1
 39 | 
 40 | input_word2idx['PAD'] = 0
 41 | input_word2idx['UNK'] = 1
 42 | target_word2idx['UNK'] = 0
 43 | 
 44 | input_idx2word = dict([(idx, word) for word, idx in input_word2idx.items()])
 45 | target_idx2word = dict([(idx, word) for word, idx in target_word2idx.items()])
 46 | 
 47 | num_encoder_tokens = len(input_idx2word)
 48 | num_decoder_tokens = len(target_idx2word)
 49 | 
 50 | np.save('models/eng-to-fra/eng-to-fra-word-input-word2idx.npy', input_word2idx)
 51 | np.save('models/eng-to-fra/eng-to-fra-word-input-idx2word.npy', input_idx2word)
 52 | np.save('models/eng-to-fra/eng-to-fra-word-target-word2idx.npy', target_word2idx)
 53 | np.save('models/eng-to-fra/eng-to-fra-word-target-idx2word.npy', target_idx2word)
 54 | 
 55 | encoder_input_data = []
 56 | 
 57 | encoder_max_seq_length = 0
 58 | decoder_max_seq_length = 0
 59 | 
 60 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 61 | for line in lines[: min(NUM_SAMPLES, len(lines)-1)]:
 62 |     input_text, target_text = line.split('\t')
 63 |     target_text = 'START ' + target_text.lower() + ' END'
 64 |     input_words = [w for w in nltk.word_tokenize(input_text.lower())]
 65 |     target_words = [w for w in nltk.word_tokenize(target_text)]
 66 |     encoder_input_wids = []
 67 |     for w in input_words:
 68 |         w2idx = 1  # default [UNK]
 69 |         if w in input_word2idx:
 70 |             w2idx = input_word2idx[w]
 71 |         encoder_input_wids.append(w2idx)
 72 | 
 73 |     encoder_input_data.append(encoder_input_wids)
 74 |     encoder_max_seq_length = max(len(encoder_input_wids), encoder_max_seq_length)
 75 |     decoder_max_seq_length = max(len(target_words), decoder_max_seq_length)
 76 | 
 77 | encoder_input_data = pad_sequences(encoder_input_data, encoder_max_seq_length)
 78 | 
 79 | decoder_target_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
 80 | decoder_input_data = np.zeros(shape=(NUM_SAMPLES, decoder_max_seq_length, num_decoder_tokens))
 81 | lines = open(DATA_PATH, 'rt', encoding='utf8').read().split('\n')
 82 | for lineIdx, line in enumerate(lines[: min(NUM_SAMPLES, len(lines)-1)]):
 83 |     _, target_text = line.split('\t')
 84 |     target_text = 'START ' + target_text.lower() + ' END'
 85 |     target_words = [w for w in nltk.word_tokenize(target_text)]
 86 |     for idx, w in enumerate(target_words):
 87 |         w2idx = 0  # default [UNK]
 88 |         if w in target_word2idx:
 89 |             w2idx = target_word2idx[w]
 90 |         decoder_input_data[lineIdx, idx, w2idx] = 1
 91 |         if idx > 0:
 92 |             decoder_target_data[lineIdx, idx-1, w2idx] = 1
 93 | 
 94 | context = dict()
 95 | context['num_encoder_tokens'] = num_encoder_tokens
 96 | context['num_decoder_tokens'] = num_decoder_tokens
 97 | context['encoder_max_seq_length'] = encoder_max_seq_length
 98 | context['decoder_max_seq_length'] = decoder_max_seq_length
 99 | 
100 | np.save('models/eng-to-fra/eng-to-fra-word-context.npy', context)
101 | 
102 | encoder_inputs = Input(shape=(None, ), name='encoder_inputs')
103 | encoder_embedding = Embedding(input_dim=num_encoder_tokens, output_dim=HIDDEN_UNITS,
104 |                               input_length=encoder_max_seq_length, name='encoder_embedding')
105 | encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name='encoder_lstm')
106 | encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_embedding(encoder_inputs))
107 | encoder_states = [encoder_state_h, encoder_state_c]
108 | 
109 | decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs')
110 | decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm')
111 | decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
112 |                                                                  initial_state=encoder_states)
113 | decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense')
114 | decoder_outputs = decoder_dense(decoder_outputs)
115 | 
116 | model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
117 | 
118 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
119 | 
120 | json = model.to_json()
121 | open(ARCHITECTURE_FILE_PATH, 'w').write(json)
122 | 
123 | checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
124 | model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
125 |           verbose=1, validation_split=0.2, callbacks=[checkpoint])
126 | 
127 | model.save_weights(WEIGHT_FILE_PATH)
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-architecture.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "encoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 73], "dtype": "float32", "sparse": false, "name": "encoder_inputs"}, "inbound_nodes": []}, {"name": "decoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 2640], "dtype": "float32", "sparse": false, "name": "decoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_lstm", "class_name": "LSTM", "config": {"name": "encoder_lstm", "trainable": true, "return_sequences": false, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["encoder_inputs", 0, 0, {}]]]}, {"name": "decoder_lstm", "class_name": "LSTM", "config": {"name": "decoder_lstm", "trainable": true, "return_sequences": true, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["decoder_inputs", 0, 0, {}], ["encoder_lstm", 0, 1, {}], ["encoder_lstm", 0, 2, {}]]]}, {"name": "decoder_dense", "class_name": "Dense", "config": {"name": "decoder_dense", "trainable": true, "units": 2640, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["decoder_lstm", 0, 0, {}]]]}], "input_layers": [["encoder_inputs", 0, 0], ["decoder_inputs", 0, 0]], "output_layers": [["decoder_dense", 0, 0]]}, "keras_version": "2.0.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-context.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-char-context.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-input-char2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-char-input-char2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-input-idx2char.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-char-input-idx2char.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-target-char2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-char-target-char2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-target-idx2char.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-char-target-idx2char.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-char-weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-char-weights.h5


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-glove-architecture.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "encoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 100], "dtype": "float32", "sparse": false, "name": "encoder_inputs"}, "inbound_nodes": []}, {"name": "decoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 2640], "dtype": "float32", "sparse": false, "name": "decoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_lstm", "class_name": "LSTM", "config": {"name": "encoder_lstm", "trainable": true, "return_sequences": false, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["encoder_inputs", 0, 0, {}]]]}, {"name": "decoder_lstm", "class_name": "LSTM", "config": {"name": "decoder_lstm", "trainable": true, "return_sequences": true, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["decoder_inputs", 0, 0, {}], ["encoder_lstm", 0, 1, {}], ["encoder_lstm", 0, 2, {}]]]}, {"name": "decoder_dense", "class_name": "Dense", "config": {"name": "decoder_dense", "trainable": true, "units": 2640, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["decoder_lstm", 0, 0, {}]]]}], "input_layers": [["encoder_inputs", 0, 0], ["decoder_inputs", 0, 0]], "output_layers": [["decoder_dense", 0, 0]]}, "keras_version": "2.0.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-glove-context.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-glove-context.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-glove-target-idx2word.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-glove-target-idx2word.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-glove-target-word2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-glove-target-word2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-glove-unknown-emb.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-glove-unknown-emb.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-glove-weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-glove-weights.h5


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-architecture.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "encoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null], "dtype": "float32", "sparse": false, "name": "encoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_embedding", "class_name": "Embedding", "config": {"name": "encoder_embedding", "trainable": true, "batch_input_shape": [null, 12], "dtype": "int32", "input_dim": 3557, "output_dim": 256, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 12}, "inbound_nodes": [[["encoder_inputs", 0, 0, {}]]]}, {"name": "decoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 2640], "dtype": "float32", "sparse": false, "name": "decoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_lstm", "class_name": "LSTM", "config": {"name": "encoder_lstm", "trainable": true, "return_sequences": false, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["encoder_embedding", 0, 0, {}]]]}, {"name": "decoder_lstm", "class_name": "LSTM", "config": {"name": "decoder_lstm", "trainable": true, "return_sequences": true, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["decoder_inputs", 0, 0, {}], ["encoder_lstm", 0, 1, {}], ["encoder_lstm", 0, 2, {}]]]}, {"name": "decoder_dense", "class_name": "Dense", "config": {"name": "decoder_dense", "trainable": true, "units": 2640, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["decoder_lstm", 0, 0, {}]]]}], "input_layers": [["encoder_inputs", 0, 0], ["decoder_inputs", 0, 0]], "output_layers": [["decoder_dense", 0, 0]]}, "keras_version": "2.0.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-context.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-word-context.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-input-idx2word.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-word-input-idx2word.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-input-word2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-word-input-word2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-target-idx2word.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-word-target-idx2word.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-target-word2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-word-target-word2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-cmn/eng-to-cmn-word-weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-cmn/eng-to-cmn-word-weights.h5


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-architecture.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "encoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 71], "dtype": "float32", "sparse": false, "name": "encoder_inputs"}, "inbound_nodes": []}, {"name": "decoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 93], "dtype": "float32", "sparse": false, "name": "decoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_lstm", "class_name": "LSTM", "config": {"name": "encoder_lstm", "trainable": true, "return_sequences": false, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["encoder_inputs", 0, 0, {}]]]}, {"name": "decoder_lstm", "class_name": "LSTM", "config": {"name": "decoder_lstm", "trainable": true, "return_sequences": true, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["decoder_inputs", 0, 0, {}], ["encoder_lstm", 0, 1, {}], ["encoder_lstm", 0, 2, {}]]]}, {"name": "decoder_dense", "class_name": "Dense", "config": {"name": "decoder_dense", "trainable": true, "units": 93, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["decoder_lstm", 0, 0, {}]]]}], "input_layers": [["encoder_inputs", 0, 0], ["decoder_inputs", 0, 0]], "output_layers": [["decoder_dense", 0, 0]]}, "keras_version": "2.0.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-context.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-char-context.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-input-char2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-char-input-char2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-input-idx2char.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-char-input-idx2char.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-target-char2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-char-target-char2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-target-idx2char.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-char-target-idx2char.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-char-weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-char-weights.h5


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-glove-architecture.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "encoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 100], "dtype": "float32", "sparse": false, "name": "encoder_inputs"}, "inbound_nodes": []}, {"name": "decoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 4655], "dtype": "float32", "sparse": false, "name": "decoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_lstm", "class_name": "LSTM", "config": {"name": "encoder_lstm", "trainable": true, "return_sequences": false, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["encoder_inputs", 0, 0, {}]]]}, {"name": "decoder_lstm", "class_name": "LSTM", "config": {"name": "decoder_lstm", "trainable": true, "return_sequences": true, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["decoder_inputs", 0, 0, {}], ["encoder_lstm", 0, 1, {}], ["encoder_lstm", 0, 2, {}]]]}, {"name": "decoder_dense", "class_name": "Dense", "config": {"name": "decoder_dense", "trainable": true, "units": 4655, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["decoder_lstm", 0, 0, {}]]]}], "input_layers": [["encoder_inputs", 0, 0], ["decoder_inputs", 0, 0]], "output_layers": [["decoder_dense", 0, 0]]}, "keras_version": "2.0.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-glove-context.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-glove-context.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-glove-target-idx2word.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-glove-target-idx2word.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-glove-target-word2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-glove-target-word2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-glove-unknown-emb.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-glove-unknown-emb.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-glove-weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-glove-weights.h5


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-architecture.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "encoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null], "dtype": "float32", "sparse": false, "name": "encoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_embedding", "class_name": "Embedding", "config": {"name": "encoder_embedding", "trainable": true, "batch_input_shape": [null, 6], "dtype": "int32", "input_dim": 2183, "output_dim": 256, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 6}, "inbound_nodes": [[["encoder_inputs", 0, 0, {}]]]}, {"name": "decoder_inputs", "class_name": "InputLayer", "config": {"batch_input_shape": [null, null, 4655], "dtype": "float32", "sparse": false, "name": "decoder_inputs"}, "inbound_nodes": []}, {"name": "encoder_lstm", "class_name": "LSTM", "config": {"name": "encoder_lstm", "trainable": true, "return_sequences": false, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["encoder_embedding", 0, 0, {}]]]}, {"name": "decoder_lstm", "class_name": "LSTM", "config": {"name": "decoder_lstm", "trainable": true, "return_sequences": true, "return_state": true, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}, "inbound_nodes": [[["decoder_inputs", 0, 0, {}], ["encoder_lstm", 0, 1, {}], ["encoder_lstm", 0, 2, {}]]]}, {"name": "decoder_dense", "class_name": "Dense", "config": {"name": "decoder_dense", "trainable": true, "units": 4655, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["decoder_lstm", 0, 0, {}]]]}], "input_layers": [["encoder_inputs", 0, 0], ["decoder_inputs", 0, 0]], "output_layers": [["decoder_dense", 0, 0]]}, "keras_version": "2.0.5", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-context.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-word-context.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-input-idx2word.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-word-input-idx2word.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-input-word2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-word-input-word2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-target-idx2word.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-word-target-idx2word.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-target-word2idx.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-word-target-word2idx.npy


--------------------------------------------------------------------------------
/translator_train/models/eng-to-fra/eng-to-fra-word-weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chen0040/keras-language-translator-web-api/06dc1d106e2293abaadd506992988a4a66b5eb78/translator_train/models/eng-to-fra/eng-to-fra-word-weights.h5


--------------------------------------------------------------------------------
/translator_train/worksheet.py:
--------------------------------------------------------------------------------
1 | import keras
2 | 
3 | print(keras.__version__)


--------------------------------------------------------------------------------
/translator_web/__init__.py:
--------------------------------------------------------------------------------
1 | from .flaskr import app


--------------------------------------------------------------------------------
/translator_web/eng_to_cmn_char_translator_predict.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model, model_from_json
 2 | from keras.layers import Input, LSTM, Dense
 3 | import numpy as np
 4 | 
 5 | HIDDEN_UNITS = 256
 6 | 
 7 | 
 8 | class EngToCmnCharTranslator(object):
 9 |     model = None
10 |     encoder_model = None
11 |     decoder_model = None
12 |     input_char2idx = None
13 |     input_idx2char = None
14 |     target_char2idx = None
15 |     target_idx2char = None
16 |     max_encoder_seq_length = None
17 |     max_decoder_seq_length = None
18 |     num_encoder_tokens = None
19 |     num_decoder_tokens = None
20 | 
21 |     def __init__(self):
22 |         self.input_char2idx = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-char-input-char2idx.npy').item()
23 |         self.input_idx2char = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-char-input-idx2char.npy').item()
24 |         self.target_char2idx = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-char-target-char2idx.npy').item()
25 |         self.target_idx2char = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-char-target-idx2char.npy').item()
26 |         context = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-char-context.npy').item()
27 |         self.max_encoder_seq_length = context['max_encoder_seq_length']
28 |         self.max_decoder_seq_length = context['max_decoder_seq_length']
29 |         self.num_encoder_tokens = context['num_encoder_tokens']
30 |         self.num_decoder_tokens = context['num_decoder_tokens']
31 | 
32 |         encoder_inputs = Input(shape=(None, self.num_encoder_tokens), name='encoder_inputs')
33 |         encoder = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
34 |         encoder_outputs, state_h, state_c = encoder(encoder_inputs)
35 |         encoder_states = [state_h, state_c]
36 | 
37 |         decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
38 |         decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
39 |         decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
40 |         decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense')
41 |         decoder_outputs = decoder_dense(decoder_outputs)
42 | 
43 |         self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
44 | 
45 |         # model_json = open('../translator_train/models/eng-to-cmn/eng-to-cmn-char-architecture.json', 'r').read()
46 |         # self.model = model_from_json(model_json)
47 |         self.model.load_weights('../translator_train/models/eng-to-cmn/eng-to-cmn-char-weights.h5')
48 |         self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
49 | 
50 |         self.encoder_model = Model(encoder_inputs, encoder_states)
51 | 
52 |         decoder_state_inputs = [Input(shape=(HIDDEN_UNITS,)), Input(shape=(HIDDEN_UNITS,))]
53 |         decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
54 |         decoder_states = [state_h, state_c]
55 |         decoder_outputs = decoder_dense(decoder_outputs)
56 |         self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
57 | 
58 |     def translate_lang(self, input_text):
59 |         input_seq = np.zeros((1, self.max_encoder_seq_length, self.num_encoder_tokens))
60 |         for idx, char in enumerate(input_text):
61 |             if char in self.input_char2idx:
62 |                 idx2 = self.input_char2idx[char]
63 |                 input_seq[0, idx, idx2] = 1
64 |         states_value = self.encoder_model.predict(input_seq)
65 |         target_seq = np.zeros((1, 1, self.num_decoder_tokens))
66 |         target_seq[0, 0, self.target_char2idx['\t']] = 1
67 |         target_text = ''
68 |         terminated = False
69 |         while not terminated:
70 |             output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
71 | 
72 |             sample_token_idx = np.argmax(output_tokens[0, -1, :])
73 |             sample_character = self.target_idx2char[sample_token_idx]
74 |             target_text += sample_character
75 | 
76 |             if sample_character == '\n' or len(target_text) >= self.max_decoder_seq_length:
77 |                 terminated = True
78 | 
79 |             target_seq = np.zeros((1, 1, self.num_decoder_tokens))
80 |             target_seq[0, 0, sample_token_idx] = 1
81 |             states_value = [h, c]
82 |         return target_text.strip()
83 | 
84 |     def test_run(self):
85 |         print(self.translate_lang('Be nice.'))
86 |         print(self.translate_lang('Drop it!'))
87 |         print(self.translate_lang('No way!'))
88 | 
89 | 
90 | def main():
91 |     model = EngToCmnCharTranslator()
92 |     model.test_run()
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/translator_web/eng_to_cmn_glove_translator_predict.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model, model_from_json
  2 | from keras.layers import Input, LSTM, Dense, Embedding
  3 | from keras.preprocessing.sequence import pad_sequences
  4 | import nltk
  5 | import numpy as np
  6 | import os
  7 | import sys
  8 | import urllib.request
  9 | import zipfile
 10 | 
 11 | HIDDEN_UNITS = 256
 12 | GLOVE_EMBEDDING_SIZE = 100
 13 | 
 14 | VERY_LARGE_DATA_DIR_PATH = '../translator_train/very_large_data'
 15 | MODEL_DIR_PATH = '../translator_train/models/eng-to-cmn'
 16 | GLOVE_MODEL = VERY_LARGE_DATA_DIR_PATH + "/glove.6B." + str(GLOVE_EMBEDDING_SIZE) + "d.txt"
 17 | WHITELIST = 'abcdefghijklmnopqrstuvwxyz1234567890?.,'
 18 | 
 19 | 
 20 | def in_white_list(_word):
 21 |     for char in _word:
 22 |         if char in WHITELIST:
 23 |             return True
 24 | 
 25 |     return False
 26 | 
 27 | 
 28 | def reporthook(block_num, block_size, total_size):
 29 |     read_so_far = block_num * block_size
 30 |     if total_size > 0:
 31 |         percent = read_so_far * 1e2 / total_size
 32 |         s = "\r%5.1f%% %*d / %d" % (
 33 |             percent, len(str(total_size)), read_so_far, total_size)
 34 |         sys.stderr.write(s)
 35 |         if read_so_far >= total_size:  # near the end
 36 |             sys.stderr.write("\n")
 37 |     else:  # total size is unknown
 38 |         sys.stderr.write("read %d\n" % (read_so_far,))
 39 | 
 40 | 
 41 | def download_glove():
 42 |     if not os.path.exists(GLOVE_MODEL):
 43 | 
 44 |         glove_zip = VERY_LARGE_DATA_DIR_PATH + '/glove.6B.zip'
 45 | 
 46 |         if not os.path.exists(VERY_LARGE_DATA_DIR_PATH):
 47 |             os.makedirs(VERY_LARGE_DATA_DIR_PATH)
 48 | 
 49 |         if not os.path.exists(glove_zip):
 50 |             print('glove file does not exist, downloading from internet')
 51 |             urllib.request.urlretrieve(url='http://nlp.stanford.edu/data/glove.6B.zip', filename=glove_zip,
 52 |                                        reporthook=reporthook)
 53 | 
 54 |         print('unzipping glove file')
 55 |         zip_ref = zipfile.ZipFile(glove_zip, 'r')
 56 |         zip_ref.extractall(VERY_LARGE_DATA_DIR_PATH)
 57 |         zip_ref.close()
 58 | 
 59 | 
 60 | def load_glove():
 61 |     download_glove()
 62 |     _word2em = {}
 63 |     file = open(GLOVE_MODEL, mode='rt', encoding='utf8')
 64 |     for line in file:
 65 |         words = line.strip().split()
 66 |         word = words[0]
 67 |         embeds = np.array(words[1:], dtype=np.float32)
 68 |         _word2em[word] = embeds
 69 |     file.close()
 70 |     return _word2em
 71 | 
 72 | 
 73 | class EngToCmnGloveTranslator(object):
 74 |     model = None
 75 |     encoder_model = None
 76 |     decoder_model = None
 77 |     target_word2idx = None
 78 |     target_idx2word = None
 79 |     max_encoder_seq_length = None
 80 |     max_decoder_seq_length = None
 81 |     num_decoder_tokens = None
 82 |     word2em = None
 83 |     unknown_emb = None
 84 | 
 85 |     def __init__(self):
 86 |         self.word2em = load_glove()
 87 |         self.unknown_emb = np.load(MODEL_DIR_PATH + '/eng-to-cmn-glove-unknown-emb.npy')
 88 |         self.target_word2idx = np.load(
 89 |             MODEL_DIR_PATH + '/eng-to-cmn-glove-target-word2idx.npy').item()
 90 |         self.target_idx2word = np.load(
 91 |             MODEL_DIR_PATH + '/eng-to-cmn-glove-target-idx2word.npy').item()
 92 |         context = np.load(MODEL_DIR_PATH + '/eng-to-cmn-glove-context.npy').item()
 93 |         self.max_decoder_seq_length = context['decoder_max_seq_length']
 94 |         self.max_encoder_seq_length = context['encoder_max_seq_length']
 95 |         self.num_decoder_tokens = context['num_decoder_tokens']
 96 | 
 97 |         encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs')
 98 |         encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
 99 |         encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
100 |         encoder_states = [encoder_state_h, encoder_state_c]
101 | 
102 |         decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
103 |         decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
104 |         decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
105 |         decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense')
106 |         decoder_outputs = decoder_dense(decoder_outputs)
107 | 
108 |         self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
109 | 
110 |         self.model.load_weights(MODEL_DIR_PATH + '/eng-to-cmn-glove-weights.h5')
111 |         self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
112 | 
113 |         self.encoder_model = Model(encoder_inputs, encoder_states)
114 | 
115 |         decoder_state_inputs = [Input(shape=(HIDDEN_UNITS,)), Input(shape=(HIDDEN_UNITS,))]
116 |         decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
117 |         decoder_states = [state_h, state_c]
118 |         decoder_outputs = decoder_dense(decoder_outputs)
119 |         self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
120 | 
121 |     def translate_lang(self, input_text):
122 |         input_seq = []
123 |         input_wids = []
124 |         for word in nltk.word_tokenize(input_text.lower()):
125 |             emb = self.unknown_emb
126 |             if word in self.word2em:
127 |                 emb = self.word2em[word]
128 |             input_wids.append(emb)
129 |         input_seq.append(input_wids)
130 |         input_seq = pad_sequences(input_seq, self.max_encoder_seq_length)
131 |         states_value = self.encoder_model.predict(input_seq)
132 |         target_seq = np.zeros((1, 1, self.num_decoder_tokens))
133 |         target_seq[0, 0, self.target_word2idx['\t']] = 1
134 |         target_text = ''
135 |         terminated = False
136 |         while not terminated:
137 |             output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
138 | 
139 |             sample_token_idx = np.argmax(output_tokens[0, -1, :])
140 |             sample_word = self.target_idx2word[sample_token_idx]
141 |             target_text += sample_word
142 | 
143 |             if sample_word == '\n' or len(target_text) >= self.max_decoder_seq_length:
144 |                 terminated = True
145 | 
146 |             target_seq = np.zeros((1, 1, self.num_decoder_tokens))
147 |             target_seq[0, 0, sample_token_idx] = 1
148 | 
149 |             states_value = [h, c]
150 |         return target_text.strip()
151 | 
152 |     def test_run(self):
153 |         print(self.translate_lang('Be nice.'))
154 |         print(self.translate_lang('Drop it!'))
155 |         print(self.translate_lang('Get out!'))
156 | 
157 | 
158 | def main():
159 |     model = EngToCmnGloveTranslator()
160 |     model.test_run()
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/translator_web/eng_to_cmn_word_translator_predict.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model, model_from_json
  2 | from keras.layers import Input, LSTM, Dense, Embedding
  3 | from keras.preprocessing.sequence import pad_sequences
  4 | import nltk
  5 | import numpy as np
  6 | 
  7 | HIDDEN_UNITS = 256
  8 | 
  9 | 
 10 | class EngToCmnWordTranslator(object):
 11 |     model = None
 12 |     encoder_model = None
 13 |     decoder_model = None
 14 |     input_word2idx = None
 15 |     input_idx2word = None
 16 |     target_word2idx = None
 17 |     target_idx2word = None
 18 |     max_encoder_seq_length = None
 19 |     max_decoder_seq_length = None
 20 |     num_encoder_tokens = None
 21 |     num_decoder_tokens = None
 22 | 
 23 |     def __init__(self):
 24 |         self.input_word2idx = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-word-input-word2idx.npy').item()
 25 |         self.input_idx2word = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-word-input-idx2word.npy').item()
 26 |         self.target_word2idx = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-word-target-word2idx.npy').item()
 27 |         self.target_idx2word = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-word-target-idx2word.npy').item()
 28 |         context = np.load('../translator_train/models/eng-to-cmn/eng-to-cmn-word-context.npy').item()
 29 |         self.max_encoder_seq_length = context['encoder_max_seq_length']
 30 |         self.max_decoder_seq_length = context['decoder_max_seq_length']
 31 |         self.num_encoder_tokens = context['num_encoder_tokens']
 32 |         self.num_decoder_tokens = context['num_decoder_tokens']
 33 | 
 34 |         encoder_inputs = Input(shape=(None, ), name='encoder_inputs')
 35 |         encoder_embedding = Embedding(input_dim=self.num_encoder_tokens, output_dim=HIDDEN_UNITS,
 36 |                                       input_length=self.max_encoder_seq_length, name='encoder_embedding')
 37 |         encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
 38 |         encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_embedding(encoder_inputs))
 39 |         encoder_states = [encoder_state_h, encoder_state_c]
 40 | 
 41 |         decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
 42 |         decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
 43 |         decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
 44 |         decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense')
 45 |         decoder_outputs = decoder_dense(decoder_outputs)
 46 | 
 47 |         self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
 48 | 
 49 |         # model_json = open('../translator_train/models/eng-to-cmn/eng-to-cmn-word-architecture.json', 'r').read()
 50 |         # self.model = model_from_json(model_json)
 51 |         self.model.load_weights('../translator_train/models/eng-to-cmn/eng-to-cmn-word-weights.h5')
 52 |         self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
 53 | 
 54 |         self.encoder_model = Model(encoder_inputs, encoder_states)
 55 | 
 56 |         decoder_state_inputs = [Input(shape=(HIDDEN_UNITS,)), Input(shape=(HIDDEN_UNITS,))]
 57 |         decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
 58 |         decoder_states = [state_h, state_c]
 59 |         decoder_outputs = decoder_dense(decoder_outputs)
 60 |         self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
 61 | 
 62 |     def translate_lang(self, input_text):
 63 |         input_seq = []
 64 |         input_wids = []
 65 |         for word in nltk.word_tokenize(input_text.lower()):
 66 |             idx = 1  # default [UNK]
 67 |             if word in self.input_word2idx:
 68 |                 idx = self.input_word2idx[word]
 69 |             input_wids.append(idx)
 70 |         input_seq.append(input_wids)
 71 |         input_seq = pad_sequences(input_seq, self.max_encoder_seq_length)
 72 |         states_value = self.encoder_model.predict(input_seq)
 73 |         target_seq = np.zeros((1, 1, self.num_decoder_tokens))
 74 |         target_seq[0, 0, self.target_word2idx['\t']] = 1
 75 |         target_text = ''
 76 |         terminated = False
 77 |         while not terminated:
 78 |             output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
 79 | 
 80 |             sample_token_idx = np.argmax(output_tokens[0, -1, :])
 81 |             sample_word = self.target_idx2word[sample_token_idx]
 82 |             target_text += sample_word
 83 | 
 84 |             if sample_word == '\n' or len(target_text) >= self.max_decoder_seq_length:
 85 |                 terminated = True
 86 | 
 87 |             target_seq = np.zeros((1, 1, self.num_decoder_tokens))
 88 |             target_seq[0, 0, sample_token_idx] = 1
 89 | 
 90 |             states_value = [h, c]
 91 |         return target_text.strip()
 92 | 
 93 |     def test_run(self):
 94 |         print(self.translate_lang('Be nice.'))
 95 |         print(self.translate_lang('Drop it!'))
 96 |         print(self.translate_lang('Get out!'))
 97 | 
 98 | 
 99 | def main():
100 |     model = EngToCmnWordTranslator()
101 |     model.test_run()
102 | 
103 | if __name__ == '__main__':
104 |     main()
105 | 


--------------------------------------------------------------------------------
/translator_web/eng_to_fra_char_translator_predict.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model, model_from_json
 2 | from keras.layers import Input, LSTM, Dense
 3 | import numpy as np
 4 | 
 5 | HIDDEN_UNITS = 256
 6 | 
 7 | 
 8 | class EngToFraCharTranslator(object):
 9 |     model = None
10 |     encoder_model = None
11 |     decoder_model = None
12 |     input_char2idx = None
13 |     input_idx2char = None
14 |     target_char2idx = None
15 |     target_idx2char = None
16 |     max_encoder_seq_length = None
17 |     max_decoder_seq_length = None
18 |     num_encoder_tokens = None
19 |     num_decoder_tokens = None
20 | 
21 |     def __init__(self):
22 |         self.input_char2idx = np.load('../translator_train/models/eng-to-fra/eng-to-fra-char-input-char2idx.npy').item()
23 |         self.input_idx2char = np.load('../translator_train/models/eng-to-fra/eng-to-fra-char-input-idx2char.npy').item()
24 |         self.target_char2idx = np.load('../translator_train/models/eng-to-fra/eng-to-fra-char-target-char2idx.npy').item()
25 |         self.target_idx2char = np.load('../translator_train/models/eng-to-fra/eng-to-fra-char-target-idx2char.npy').item()
26 |         context = np.load('../translator_train/models/eng-to-fra/eng-to-fra-char-context.npy').item()
27 |         self.max_encoder_seq_length = context['max_encoder_seq_length']
28 |         self.max_decoder_seq_length = context['max_decoder_seq_length']
29 |         self.num_encoder_tokens = context['num_encoder_tokens']
30 |         self.num_decoder_tokens = context['num_decoder_tokens']
31 | 
32 |         encoder_inputs = Input(shape=(None, self.num_encoder_tokens), name='encoder_inputs')
33 |         encoder = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
34 |         encoder_outputs, state_h, state_c = encoder(encoder_inputs)
35 |         encoder_states = [state_h, state_c]
36 | 
37 |         decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
38 |         decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
39 |         decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
40 |         decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense')
41 |         decoder_outputs = decoder_dense(decoder_outputs)
42 | 
43 |         self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
44 | 
45 |         # model_json = open('../translator_train/models/eng-to-fra/eng-to-fra-char-architecture.json', 'r').read()
46 |         # self.model = model_from_json(model_json)
47 |         self.model.load_weights('../translator_train/models/eng-to-fra/eng-to-fra-char-weights.h5')
48 |         self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
49 | 
50 |         self.encoder_model = Model(encoder_inputs, encoder_states)
51 | 
52 |         decoder_state_inputs = [Input(shape=(HIDDEN_UNITS,)), Input(shape=(HIDDEN_UNITS,))]
53 |         decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
54 |         decoder_states = [state_h, state_c]
55 |         decoder_outputs = decoder_dense(decoder_outputs)
56 |         self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
57 | 
58 |     def translate_lang(self, input_text):
59 |         input_seq = np.zeros((1, self.max_encoder_seq_length, self.num_encoder_tokens))
60 |         for idx, char in enumerate(input_text):
61 |             if char in self.input_char2idx:
62 |                 idx2 = self.input_char2idx[char]
63 |                 input_seq[0, idx, idx2] = 1
64 |         states_value = self.encoder_model.predict(input_seq)
65 |         target_seq = np.zeros((1, 1, self.num_decoder_tokens))
66 |         target_seq[0, 0, self.target_char2idx['\t']] = 1
67 |         target_text = ''
68 |         terminated = False
69 |         while not terminated:
70 |             output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
71 | 
72 |             sample_token_idx = np.argmax(output_tokens[0, -1, :])
73 |             sample_character = self.target_idx2char[sample_token_idx]
74 |             target_text += sample_character
75 | 
76 |             if sample_character == '\n' or len(target_text) >= self.max_decoder_seq_length:
77 |                 terminated = True
78 | 
79 |             target_seq = np.zeros((1, 1, self.num_decoder_tokens))
80 |             target_seq[0, 0, sample_token_idx] = 1
81 |             states_value = [h, c]
82 |         return target_text.strip()
83 | 
84 |     def test_run(self):
85 |         print(self.translate_lang('Be nice.'))
86 |         print(self.translate_lang('Drop it!'))
87 |         print(self.translate_lang('Get out!'))
88 | 
89 | 
90 | def main():
91 |     model = EngToFraCharTranslator()
92 |     model.test_run()
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/translator_web/eng_to_fra_glove_translator_predict.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model, model_from_json
  2 | from keras.layers import Input, LSTM, Dense, Embedding
  3 | from keras.preprocessing.sequence import pad_sequences
  4 | import numpy as np
  5 | import nltk
  6 | import os
  7 | import sys
  8 | import urllib.request
  9 | import zipfile
 10 | 
 11 | HIDDEN_UNITS = 256
 12 | GLOVE_EMBEDDING_SIZE = 100
 13 | 
 14 | MODEL_DIR_PATH = '../translator_train/models/eng-to-fra'
 15 | VERY_LARGE_DATA_DIR_PATH = '../translator_train/very_large_data'
 16 | GLOVE_MODEL = VERY_LARGE_DATA_DIR_PATH + "/glove.6B." + str(GLOVE_EMBEDDING_SIZE) + "d.txt"
 17 | WHITELIST = 'abcdefghijklmnopqrstuvwxyz1234567890?.,'
 18 | 
 19 | 
 20 | def in_white_list(_word):
 21 |     for char in _word:
 22 |         if char in WHITELIST:
 23 |             return True
 24 | 
 25 |     return False
 26 | 
 27 | 
 28 | def reporthook(block_num, block_size, total_size):
 29 |     read_so_far = block_num * block_size
 30 |     if total_size > 0:
 31 |         percent = read_so_far * 1e2 / total_size
 32 |         s = "\r%5.1f%% %*d / %d" % (
 33 |             percent, len(str(total_size)), read_so_far, total_size)
 34 |         sys.stderr.write(s)
 35 |         if read_so_far >= total_size:  # near the end
 36 |             sys.stderr.write("\n")
 37 |     else:  # total size is unknown
 38 |         sys.stderr.write("read %d\n" % (read_so_far,))
 39 | 
 40 | 
 41 | def download_glove():
 42 |     if not os.path.exists(GLOVE_MODEL):
 43 | 
 44 |         glove_zip = VERY_LARGE_DATA_DIR_PATH + '//glove.6B.zip'
 45 | 
 46 |         if not os.path.exists(VERY_LARGE_DATA_DIR_PATH):
 47 |             os.makedirs(VERY_LARGE_DATA_DIR_PATH)
 48 | 
 49 |         if not os.path.exists(glove_zip):
 50 |             print('glove file does not exist, downloading from internet')
 51 |             urllib.request.urlretrieve(url='http://nlp.stanford.edu/data/glove.6B.zip', filename=glove_zip,
 52 |                                        reporthook=reporthook)
 53 | 
 54 |         print('unzipping glove file')
 55 |         zip_ref = zipfile.ZipFile(glove_zip, 'r')
 56 |         zip_ref.extractall(VERY_LARGE_DATA_DIR_PATH)
 57 |         zip_ref.close()
 58 | 
 59 | 
 60 | def load_glove():
 61 |     download_glove()
 62 |     _word2em = {}
 63 |     file = open(GLOVE_MODEL, mode='rt', encoding='utf8')
 64 |     for line in file:
 65 |         words = line.strip().split()
 66 |         word = words[0]
 67 |         embeds = np.array(words[1:], dtype=np.float32)
 68 |         _word2em[word] = embeds
 69 |     file.close()
 70 |     return _word2em
 71 | 
 72 | 
 73 | class EngToFraGloveTranslator(object):
 74 |     model = None
 75 |     encoder_model = None
 76 |     decoder_model = None
 77 |     target_word2idx = None
 78 |     target_idx2word = None
 79 |     max_encoder_seq_length = None
 80 |     max_decoder_seq_length = None
 81 |     num_decoder_tokens = None
 82 |     word2em = None
 83 |     unknown_emb = None
 84 | 
 85 |     def __init__(self):
 86 |         self.word2em = load_glove()
 87 |         self.unknown_emb = np.load(MODEL_DIR_PATH + '/eng-to-fra-glove-unknown-emb.npy')
 88 |         self.target_word2idx = np.load(
 89 |             MODEL_DIR_PATH + '/eng-to-fra-glove-target-word2idx.npy').item()
 90 |         self.target_idx2word = np.load(
 91 |             MODEL_DIR_PATH + '/eng-to-fra-glove-target-idx2word.npy').item()
 92 |         context = np.load(MODEL_DIR_PATH + '/eng-to-fra-glove-context.npy').item()
 93 |         print(context)
 94 |         self.max_encoder_seq_length = context['encoder_max_seq_length']
 95 |         self.max_decoder_seq_length = context['decoder_max_seq_length']
 96 |         self.num_decoder_tokens = context['num_decoder_tokens']
 97 | 
 98 |         encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs')
 99 |         encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
100 |         encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
101 |         encoder_states = [encoder_state_h, encoder_state_c]
102 | 
103 |         decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
104 |         decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
105 |         decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
106 |         decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense')
107 |         decoder_outputs = decoder_dense(decoder_outputs)
108 | 
109 |         self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
110 | 
111 |         self.model.load_weights(MODEL_DIR_PATH + '/eng-to-fra-glove-weights.h5')
112 |         self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
113 | 
114 |         self.encoder_model = Model(encoder_inputs, encoder_states)
115 | 
116 |         decoder_state_inputs = [Input(shape=(HIDDEN_UNITS,)), Input(shape=(HIDDEN_UNITS,))]
117 |         decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
118 |         decoder_states = [state_h, state_c]
119 |         decoder_outputs = decoder_dense(decoder_outputs)
120 |         self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
121 | 
122 |     def translate_lang(self, input_text):
123 |         input_seq = []
124 |         input_wids = []
125 |         for word in nltk.word_tokenize(input_text.lower()):
126 |             idx = self.unknown_emb
127 |             if word in self.word2em:
128 |                 idx = self.word2em[word]
129 |             input_wids.append(idx)
130 |         input_seq.append(input_wids)
131 |         input_seq = pad_sequences(input_seq, self.max_encoder_seq_length)
132 |         states_value = self.encoder_model.predict(input_seq)
133 |         target_seq = np.zeros((1, 1, self.num_decoder_tokens))
134 |         target_seq[0, 0, self.target_word2idx['START']] = 1
135 |         target_text = ''
136 |         terminated = False
137 |         target_text_len = 0
138 |         while not terminated:
139 |             output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
140 | 
141 |             sample_token_idx = np.argmax(output_tokens[0, -1, :])
142 |             sample_word = self.target_idx2word[sample_token_idx]
143 |             target_text_len += 1
144 | 
145 |             if sample_word != 'START' and sample_word != 'END':
146 |                 target_text += ' ' + sample_word
147 | 
148 |             if sample_word == 'END' or target_text_len >= self.max_decoder_seq_length:
149 |                 terminated = True
150 | 
151 |             target_seq = np.zeros((1, 1, self.num_decoder_tokens))
152 |             target_seq[0, 0, sample_token_idx] = 1
153 |             states_value = [h, c]
154 | 
155 |         return target_text.strip()
156 | 
157 |     def test_run(self):
158 |         print(self.translate_lang('Be nice.'))
159 |         print(self.translate_lang('Drop it!'))
160 |         print(self.translate_lang('Get out!'))
161 | 
162 | 
163 | def main():
164 |     model = EngToFraGloveTranslator()
165 |     model.test_run()
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     main()
170 | 


--------------------------------------------------------------------------------
/translator_web/eng_to_fra_word_translator_predict.py:
--------------------------------------------------------------------------------
  1 | from keras.models import Model, model_from_json
  2 | from keras.layers import Input, LSTM, Dense, Embedding
  3 | from keras.preprocessing.sequence import pad_sequences
  4 | import numpy as np
  5 | import nltk
  6 | 
  7 | HIDDEN_UNITS = 256
  8 | 
  9 | 
 10 | class EngToFraWordTranslator(object):
 11 |     model = None
 12 |     encoder_model = None
 13 |     decoder_model = None
 14 |     input_word2idx = None
 15 |     input_idx2word = None
 16 |     target_word2idx = None
 17 |     target_idx2word = None
 18 |     max_encoder_seq_length = None
 19 |     max_decoder_seq_length = None
 20 |     num_encoder_tokens = None
 21 |     num_decoder_tokens = None
 22 | 
 23 |     def __init__(self):
 24 |         self.input_word2idx = np.load('../translator_train/models/eng-to-fra/eng-to-fra-word-input-word2idx.npy').item()
 25 |         self.input_idx2word = np.load('../translator_train/models/eng-to-fra/eng-to-fra-word-input-idx2word.npy').item()
 26 |         self.target_word2idx = np.load('../translator_train/models/eng-to-fra/eng-to-fra-word-target-word2idx.npy').item()
 27 |         self.target_idx2word = np.load('../translator_train/models/eng-to-fra/eng-to-fra-word-target-idx2word.npy').item()
 28 |         context = np.load('../translator_train/models/eng-to-fra/eng-to-fra-word-context.npy').item()
 29 |         print(context)
 30 |         self.max_encoder_seq_length = context['encoder_max_seq_length']
 31 |         self.max_decoder_seq_length = context['decoder_max_seq_length']
 32 |         self.num_encoder_tokens = context['num_encoder_tokens']
 33 |         self.num_decoder_tokens = context['num_decoder_tokens']
 34 | 
 35 |         encoder_inputs = Input(shape=(None, ), name='encoder_inputs')
 36 |         encoder_embedding = Embedding(input_dim=self.num_encoder_tokens, output_dim=HIDDEN_UNITS,
 37 |                                       input_length=self.max_encoder_seq_length, name='encoder_embedding')
 38 |         encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm")
 39 |         encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_embedding(encoder_inputs))
 40 |         encoder_states = [encoder_state_h, encoder_state_c]
 41 | 
 42 |         decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs')
 43 |         decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm')
 44 |         decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
 45 |         decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense')
 46 |         decoder_outputs = decoder_dense(decoder_outputs)
 47 | 
 48 |         self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
 49 | 
 50 |         # model_json = open('../translator_train/models/eng-to-fra/eng-to-fra-word-architecture.json', 'r').read()
 51 |         # self.model = model_from_json(model_json)
 52 |         self.model.load_weights('../translator_train/models/eng-to-fra/eng-to-fra-word-weights.h5')
 53 |         self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
 54 | 
 55 |         self.encoder_model = Model(encoder_inputs, encoder_states)
 56 | 
 57 |         decoder_state_inputs = [Input(shape=(HIDDEN_UNITS,)), Input(shape=(HIDDEN_UNITS,))]
 58 |         decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_state_inputs)
 59 |         decoder_states = [state_h, state_c]
 60 |         decoder_outputs = decoder_dense(decoder_outputs)
 61 |         self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
 62 | 
 63 |     def translate_lang(self, input_text):
 64 |         input_seq = []
 65 |         input_wids = []
 66 |         for word in nltk.word_tokenize(input_text.lower()):
 67 |             idx = 1
 68 |             if word in self.input_word2idx:
 69 |                 idx = self.input_word2idx[word]
 70 |             input_wids.append(idx)
 71 |         input_seq.append(input_wids)
 72 |         input_seq = pad_sequences(input_seq, self.max_encoder_seq_length)
 73 |         states_value = self.encoder_model.predict(input_seq)
 74 |         target_seq = np.zeros((1, 1, self.num_decoder_tokens))
 75 |         target_seq[0, 0, self.target_word2idx['START']] = 1
 76 |         target_text = ''
 77 |         terminated = False
 78 |         target_text_len = 0
 79 |         while not terminated:
 80 |             output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
 81 | 
 82 |             sample_token_idx = np.argmax(output_tokens[0, -1, :])
 83 |             sample_word = self.target_idx2word[sample_token_idx]
 84 |             target_text_len += 1
 85 | 
 86 |             if sample_word != 'START' and sample_word != 'END':
 87 |                 target_text += ' ' + sample_word
 88 | 
 89 |             if sample_word == 'END' or target_text_len >= self.max_decoder_seq_length:
 90 |                 terminated = True
 91 | 
 92 |             target_seq = np.zeros((1, 1, self.num_decoder_tokens))
 93 |             target_seq[0, 0, sample_token_idx] = 1
 94 |             states_value = [h, c]
 95 | 
 96 |         return target_text.strip()
 97 | 
 98 |     def test_run(self):
 99 |         print(self.translate_lang('Be nice.'))
100 |         print(self.translate_lang('Drop it!'))
101 |         print(self.translate_lang('Get out!'))
102 | 
103 | 
104 | def main():
105 |     model = EngToFraWordTranslator()
106 |     model.test_run()
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/translator_web/flaskr.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request, send_from_directory, redirect, render_template, flash, url_for, jsonify, \
  2 |     make_response, abort
  3 | from translator_web.eng_to_fra_char_translator_predict import EngToFraCharTranslator
  4 | from translator_web.eng_to_cmn_char_translator_predict import EngToCmnCharTranslator
  5 | from translator_web.eng_to_fra_word_translator_predict import EngToFraWordTranslator
  6 | from translator_web.eng_to_cmn_word_translator_predict import EngToCmnWordTranslator
  7 | from translator_web.eng_to_fra_glove_translator_predict import EngToFraGloveTranslator
  8 | from translator_web.eng_to_cmn_glove_translator_predict import EngToCmnGloveTranslator
  9 | 
 10 | app = Flask(__name__)
 11 | app.config.from_object(__name__)  # load config from this file , flaskr.py
 12 | 
 13 | # Load default config and override config from an environment variable
 14 | app.config.from_envvar('FLASKR_SETTINGS', silent=True)
 15 | app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
 16 | 
 17 | eng_to_fra_translator_c = EngToFraCharTranslator()
 18 | eng_to_cmn_translator_c = EngToCmnCharTranslator()
 19 | eng_to_fra_translator_w = EngToFraWordTranslator()
 20 | eng_to_cmn_translator_w = EngToCmnWordTranslator()
 21 | eng_to_fra_translator_g = EngToFraGloveTranslator()
 22 | eng_to_cmn_translator_g = EngToCmnGloveTranslator()
 23 | 
 24 | 
 25 | @app.route('/')
 26 | def home():
 27 |     return render_template('home.html')
 28 | 
 29 | 
 30 | @app.route('/about')
 31 | def about():
 32 |     return 'About Us'
 33 | 
 34 | 
 35 | @app.route('/eng_to_fra_char_translator', methods=['POST', 'GET'])
 36 | def eng_to_fra_char_translator():
 37 |     if request.method == 'POST':
 38 |         if 'sentence' not in request.form:
 39 |             flash('No sentence post')
 40 |             redirect(request.url)
 41 |         elif request.form['sentence'] == '':
 42 |             flash('No sentence')
 43 |             redirect(request.url)
 44 |         else:
 45 |             sent = request.form['sentence']
 46 |             translated = eng_to_fra_translator_c.translate_lang(sent)
 47 |             return render_template('eng_to_fra_char_translator_result.html', sentence=sent, translated=translated)
 48 |     return render_template('eng_to_fra_char_translator.html')
 49 | 
 50 | 
 51 | @app.route('/eng_to_cmn_char_translator', methods=['POST', 'GET'])
 52 | def eng_to_cmn_char_translator():
 53 |     if request.method == 'POST':
 54 |         if 'sentence' not in request.form:
 55 |             flash('No sentence post')
 56 |             redirect(request.url)
 57 |         elif request.form['sentence'] == '':
 58 |             flash('No sentence')
 59 |             redirect(request.url)
 60 |         else:
 61 |             sent = request.form['sentence']
 62 |             translated = eng_to_cmn_translator_c.translate_lang(sent)
 63 |             return render_template('eng_to_cmn_char_translator_result.html', sentence=sent,
 64 |                                    translated=translated)
 65 |     return render_template('eng_to_cmn_char_translator.html')
 66 | 
 67 | 
 68 | @app.route('/eng_to_fra_word_translator', methods=['POST', 'GET'])
 69 | def eng_to_fra_word_translator():
 70 |     if request.method == 'POST':
 71 |         if 'sentence' not in request.form:
 72 |             flash('No sentence post')
 73 |             redirect(request.url)
 74 |         elif request.form['sentence'] == '':
 75 |             flash('No sentence')
 76 |             redirect(request.url)
 77 |         else:
 78 |             sent = request.form['sentence']
 79 |             translated = eng_to_fra_translator_w.translate_lang(sent)
 80 |             return render_template('eng_to_fra_word_translator_result.html', sentence=sent,
 81 |                                    translated=translated)
 82 |     return render_template('eng_to_fra_word_translator.html')
 83 | 
 84 | 
 85 | @app.route('/eng_to_cmn_word_translator', methods=['POST', 'GET'])
 86 | def eng_to_cmn_word_translator():
 87 |     if request.method == 'POST':
 88 |         if 'sentence' not in request.form:
 89 |             flash('No sentence post')
 90 |             redirect(request.url)
 91 |         elif request.form['sentence'] == '':
 92 |             flash('No sentence')
 93 |             redirect(request.url)
 94 |         else:
 95 |             sent = request.form['sentence']
 96 |             sentiments = eng_to_cmn_translator_w.translate_lang(sent)
 97 |             return render_template('eng_to_cmn_word_translator_result.html', sentence=sent,
 98 |                                    sentiments=sentiments)
 99 |     return render_template('eng_to_cmn_word_translator.html')
100 | 
101 | 
102 | @app.route('/eng_to_fra_word_glove_translator', methods=['POST', 'GET'])
103 | def eng_to_fra_word_glove_translator():
104 |     if request.method == 'POST':
105 |         if 'sentence' not in request.form:
106 |             flash('No sentence post')
107 |             redirect(request.url)
108 |         elif request.form['sentence'] == '':
109 |             flash('No sentence')
110 |             redirect(request.url)
111 |         else:
112 |             sent = request.form['sentence']
113 |             translated = eng_to_fra_translator_g.translate_lang(sent)
114 |             return render_template('eng_to_fra_word_glove_translator_result.html', sentence=sent,
115 |                                    translated=translated)
116 |     return render_template('eng_to_fra_word_glove_translator.html')
117 | 
118 | 
119 | @app.route('/eng_to_cmn_word_glove_translator', methods=['POST', 'GET'])
120 | def eng_to_cmn_word_glove_translator():
121 |     if request.method == 'POST':
122 |         if 'sentence' not in request.form:
123 |             flash('No sentence post')
124 |             redirect(request.url)
125 |         elif request.form['sentence'] == '':
126 |             flash('No sentence')
127 |             redirect(request.url)
128 |         else:
129 |             sent = request.form['sentence']
130 |             sentiments = eng_to_cmn_translator_g.translate_lang(sent)
131 |             return render_template('eng_to_cmn_word_glove_translator_result.html', sentence=sent,
132 |                                    sentiments=sentiments)
133 |     return render_template('eng_to_cmn_word_glove_translator.html')
134 | 
135 | 
136 | @app.route('/translate_eng', methods=['POST', 'GET'])
137 | def translate_eng():
138 |     if request.method == 'POST':
139 |         if not request.json or 'sentence' not in request.json or 'level' not in request.json or 'target_lang' not in request.json:
140 |             abort(400)
141 |         sentence = request.json['sentence']
142 |         level = request.json['level']
143 |         target_lang = request.json['target_lang']
144 |     else:
145 |         sentence = request.args.get('sentence')
146 |         level = request.args.get('level')
147 |         target_lang = request.args.get('target_lang')
148 | 
149 |     target_text = sentence
150 |     if level == 'char' and target_lang == 'french':
151 |         target_text = eng_to_fra_translator_c.translate_lang(sentence)
152 |     elif level == 'char' and target_lang == 'chinese':
153 |         target_text = eng_to_cmn_translator_c.translate_lang(sentence)
154 |     elif level == 'word' and target_lang == 'french':
155 |         target_text = eng_to_fra_translator_w.translate_lang(sentence)
156 |     elif level == 'word' and target_lang == 'chinese':
157 |         target_text = eng_to_cmn_translator_w.translate_lang(sentence)
158 |     elif level == 'word-glove' and target_lang == 'french':
159 |         target_text = eng_to_fra_translator_g.translate_lang(sentence)
160 |     elif level == 'word-glove' and target_lang == 'chinese':
161 |         target_text = eng_to_cmn_translator_g.translate_lang(sentence)
162 |     return jsonify({
163 |         'sentence': sentence,
164 |         'translated': target_text,
165 |         'target_lang': target_lang,
166 |         'level': level
167 |     })
168 | 
169 | 
170 | @app.errorhandler(404)
171 | def not_found(error):
172 |     return make_response(jsonify({'error': 'Not found'}), 404)
173 | 
174 | 
175 | def main():
176 |     eng_to_fra_translator_c.test_run()
177 |     eng_to_cmn_translator_c.test_run()
178 |     eng_to_fra_translator_w.test_run()
179 |     eng_to_cmn_translator_w.test_run()
180 |     eng_to_fra_translator_g.test_run()
181 |     eng_to_cmn_translator_g.test_run()
182 |     app.run(debug=True)
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     main()
187 | 


--------------------------------------------------------------------------------
/translator_web/static/style.css:
--------------------------------------------------------------------------------
 1 | body            { font-family: sans-serif; background: #eee; }
 2 | a, h1, h2       { color: #377ba8; }
 3 | h1, h2          { font-family: 'Georgia', serif; margin: 0; }
 4 | h1              { border-bottom: 2px solid #eee; }
 5 | h2              { font-size: 1.2em; }
 6 | 
 7 | .page           { margin: 2em auto; width: 35em; border: 5px solid #ccc;
 8 |                   padding: 0.8em; background: white; }
 9 | .entries        { list-style: none; margin: 0; padding: 0; }
10 | .entries li     { margin: 0.8em 1.2em; }
11 | .entries li h2  { margin-left: -1em; }
12 | .add-entry      { font-size: 0.9em; border-bottom: 1px solid #ccc; }
13 | .add-entry dl   { font-weight: bold; }
14 | .metanav        { text-align: right; font-size: 0.8em; padding: 0.3em;
15 |                   margin-bottom: 1em; background: #fafafa; }
16 | .flash          { background: #cee5F5; padding: 0.5em;
17 |                   border: 1px solid #aacbe2; }
18 | .error          { background: #f0d6d6; padding: 0.5em; }


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_cmn_char_translator.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |     <p>Write something here in English to translate to Chinese (Char-Level)</p>
4 |     <form method=post>
5 |       <p>Write something here: <input type=text name=sentence></p>
6 |          <input type=submit value=Translate>
7 |     </form>
8 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_cmn_char_translator_result.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |     <hr />
 4 |     You said: {{ sentence }}
 5 |     <hr />
 6 |     <ul>
 7 |         <li>In Chinese: {{ translated }}</li>
 8 |     </ul>
 9 | 
10 |     <hr />
11 |     <a href="{{ url_for('eng_to_cmn_char_translator') }}">Try another one</a>
12 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_cmn_word_glove_translator.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |     <p>Write something here in English to translate to Chinese (Word-Level GloVe Encoding)</p>
4 |     <form method=post>
5 |       <p>Write something here: <input type=text name=sentence></p>
6 |          <input type=submit value=Translate>
7 |     </form>
8 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_cmn_word_glove_translator_result.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |     <hr />
 4 |     You said: {{ sentence }}
 5 |     <hr />
 6 |     <ul>
 7 |         <li>In Chinese: {{ translated }}</li>
 8 |     </ul>
 9 | 
10 |     <hr />
11 |     <a href="{{ url_for('eng_to_cmn_word_glove_translator') }}">Try another one</a>
12 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_cmn_word_translator.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |     <p>Write something here in English to translate to Chinese (Word-Level)</p>
4 |     <form method=post>
5 |       <p>Write something here: <input type=text name=sentence></p>
6 |          <input type=submit value=Translate>
7 |     </form>
8 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_cmn_word_translator_result.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |     <hr />
 4 |     You said: {{ sentence }}
 5 |     <hr />
 6 |     <ul>
 7 |         <li>In Chinese: {{ translated }}</li>
 8 |     </ul>
 9 | 
10 |     <hr />
11 |     <a href="{{ url_for('eng_to_cmn_word_translator') }}">Try another one</a>
12 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_fra_char_translator.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |     <p>Write something here in English to translate to French (Char-Level)</p>
4 |     <form method=post>
5 |       <p>Write something here: <input type=text name=sentence></p>
6 |          <input type=submit value=Translate>
7 |     </form>
8 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_fra_char_translator_result.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |     <hr />
 4 |     You said: {{ sentence }}
 5 |     <hr />
 6 |     <ul>
 7 |         <li>In French: {{ translated }}</li>
 8 |     </ul>
 9 | 
10 |     <hr />
11 |     <a href="{{ url_for('eng_to_fra_char_translator') }}">Try another one</a>
12 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_fra_word_glove_translator.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |     <p>Write something here in English to translate to French (Word-Level GloVe Encoding)</p>
4 |     <form method=post>
5 |       <p>Write something here: <input type=text name=sentence></p>
6 |          <input type=submit value=Translate>
7 |     </form>
8 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_fra_word_glove_translator_result.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |     <hr />
 4 |     You said: {{ sentence }}
 5 |     <hr />
 6 |     <ul>
 7 |         <li>In French: {{ translated }}</li>
 8 |     </ul>
 9 | 
10 |     <hr />
11 |     <a href="{{ url_for('eng_to_fra_word_glove_translator') }}">Try another one</a>
12 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_fra_word_translator.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |     <p>Write something here in English to translate to French (Word-Level)</p>
4 |     <form method=post>
5 |       <p>Write something here: <input type=text name=sentence></p>
6 |          <input type=submit value=Translate>
7 |     </form>
8 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/eng_to_fra_word_translator_result.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |     <hr />
 4 |     You said: {{ sentence }}
 5 |     <hr />
 6 |     <ul>
 7 |         <li>In French: {{ translated }}</li>
 8 |     </ul>
 9 | 
10 |     <hr />
11 |     <a href="{{ url_for('eng_to_fra_word_translator') }}">Try another one</a>
12 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/home.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block body %}
 3 |   <ul class=entries>
 4 |     <li><a href="{{ url_for('eng_to_fra_char_translator') }}">Eng to French Translation (Char-Level)</a></li>
 5 |     <li><a href="{{ url_for('eng_to_cmn_char_translator') }}">Eng to Chinese Translation (Char-Level)</a></li>
 6 |     <li><a href="{{ url_for('eng_to_fra_word_translator') }}">Eng to French Translation (Word-Level One Hot Encoding)</a></li>
 7 |     <li><a href="{{ url_for('eng_to_cmn_word_translator') }}">Eng to Chinese Translation (Word-Level One Hot Encoding)</a></li>
 8 |     <li><a href="{{ url_for('eng_to_fra_word_glove_translator') }}">Eng to French Translation (Word-Level GloVe Encoding)</a></li>
 9 |     <li><a href="{{ url_for('eng_to_cmn_word_glove_translator') }}">Eng to Chinese Translation (Word-Level GloVe Encoding)</a></li>
10 |   </ul>
11 | {% endblock %}


--------------------------------------------------------------------------------
/translator_web/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <title>Flask Slingshot</title>
 3 | <link rel=stylesheet type=text/css href="{{ url_for('static', filename='style.css') }}">
 4 | <div class=page>
 5 |   <h1><a href="/">Language Translation</a></h1>
 6 |   <div class=metanav>
 7 |   <a href="{{ url_for('about') }}">About</a>
 8 |   </div>
 9 |   {% for message in get_flashed_messages() %}
10 |     <div class=flash>{{ message }}</div>
11 |   {% endfor %}
12 |   {% block body %}{% endblock %}
13 | </div>


--------------------------------------------------------------------------------