├── .gitignore
├── .gitmodules
├── .travis.yml
├── LICENSE
├── Makefile
├── data
    └── words.txt
├── demos
    ├── __init__.py
    ├── consts.py
    ├── sequence
    │   ├── __init__.py
    │   ├── adder.py
    │   └── pig_latin.py
    ├── understand.py
    └── utils.py
├── main.py
├── models
    ├── adder.model
    ├── pig_latin.model
    └── xor.model
├── readme.org
├── requirements.in
├── requirements.txt
├── setup.cfg
└── tests
    ├── ensure_flake8.sh
    └── ensure_pytest.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | venv/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | 
56 | # Sphinx documentation
57 | docs/_build/
58 | 
59 | # PyBuilder
60 | target/
61 | 
62 | #Ipython Notebook
63 | .ipynb_checkpoints
64 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/.gitmodules


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |     - "3.6"
 5 | 
 6 | before_install:
 7 |     - sudo apt-get update && sudo apt-get -y --no-install-recommends install libhdf5-serial-dev
 8 | 
 9 | install: "make venv && make deps"
10 | 
11 | script:
12 |     - "make lint"
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Linusp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | setup: venv deps
 2 | 
 3 | lint: venv clean
 4 | 	- bash tests/ensure_flake8.sh
 5 | 	- venv/bin/flake8 demos/ --format=pylint
 6 | 
 7 | deps: venv
 8 | 	- venv/bin/pip install -r requirements.txt
 9 | 
10 | venv:
11 | 	- virtualenv --python=$(shell which python3.6) --prompt '<venv:soph>' venv
12 | 	- venv/bin/pip install setuptools pip -U
13 | 
14 | clean:
15 | 	- find . -iname "*__pycache__" | xargs rm -rf
16 | 	- find . -iname "*.pyc" | xargs rm -rf
17 | 
18 | 


--------------------------------------------------------------------------------
/demos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/demos/__init__.py


--------------------------------------------------------------------------------
/demos/consts.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
5 | MODEL_PATH = os.path.join(PROJECT_ROOT, 'models')
6 | DATA_PATH = os.path.join(PROJECT_ROOT, 'data')
7 | 


--------------------------------------------------------------------------------
/demos/sequence/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/demos/sequence/__init__.py


--------------------------------------------------------------------------------
/demos/sequence/adder.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import print_function
 3 | 
 4 | import os
 5 | import numpy as np
 6 | from keras.layers.recurrent import GRU
 7 | from keras.layers.wrappers import TimeDistributed
 8 | from keras.layers.core import Dense, RepeatVector
 9 | from keras.models import Sequential
10 | 
11 | from ..utils import build_model_from_file, save_model_to_file
12 | 
13 | BEGIN_SYMBOL = '^'
14 | END_SYMBOL = '$'
15 | CHARSET = set('0123456789+ ' + BEGIN_SYMBOL + END_SYMBOL)
16 | CHAR_NUM = len(CHARSET)
17 | MAX_LEN = 12
18 | MAX_LEN = 12
19 | 
20 | CHAR_TO_INDICES = {c: i for i, c in enumerate(CHARSET)}
21 | INDICES_TO_CHAR = {i: c for c, i in CHAR_TO_INDICES.items()}
22 | 
23 | 
24 | def vectorize(seq, seq_len, vec_size):
25 |     vec = np.zeros((seq_len, vec_size), dtype=int)
26 |     for i, ch in enumerate(seq):
27 |         vec[i, CHAR_TO_INDICES[ch]] = 1
28 | 
29 |     for i in range(len(seq), seq_len):
30 |         vec[i, CHAR_TO_INDICES[END_SYMBOL]] = 1
31 | 
32 |     return vec
33 | 
34 | 
35 | def build_data():
36 |     """生成所有三位数(包含)一下的加法"""
37 |     plain_x = []
38 |     plain_y = []
39 |     for i in range(0, 100):
40 |         for j in range(0, 100):
41 |             x = BEGIN_SYMBOL + '{}+{}'.format(i, j) + END_SYMBOL
42 |             y = BEGIN_SYMBOL + '{}'.format(i + j) + END_SYMBOL
43 | 
44 |             plain_x.append(x)
45 |             plain_y.append(y)
46 | 
47 |     data_size = len(plain_x)
48 | 
49 |     # convert to one-hot
50 |     X = np.zeros((data_size, MAX_LEN, CHAR_NUM), dtype=int)
51 |     Y = np.zeros((data_size, MAX_LEN, CHAR_NUM), dtype=int)
52 | 
53 |     for i, seq in enumerate(plain_x):
54 |         X[i] = vectorize(seq, MAX_LEN, CHAR_NUM)
55 | 
56 |     for i, seq in enumerate(plain_y):
57 |         Y[i] = vectorize(seq, MAX_LEN, CHAR_NUM)
58 | 
59 |     return X, Y
60 | 
61 | 
62 | def build_model(input_size, seq_len, hidden_size):
63 |     """建立一个 seq2seq 模型"""
64 |     model = Sequential()
65 |     model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False))
66 |     model.add(Dense(hidden_size, activation="relu"))
67 |     model.add(RepeatVector(seq_len))
68 |     model.add(GRU(hidden_size, return_sequences=True))
69 |     model.add(TimeDistributed(Dense(output_dim=input_size, activation="softmax")))
70 |     model.compile(loss="categorical_crossentropy", optimizer='adam')
71 | 
72 |     return model
73 | 
74 | 
75 | def train(epoch, model_path):
76 |     train_x, train_y = build_data()
77 | 
78 |     model = build_model(CHAR_NUM, MAX_LEN, 128)
79 |     model.fit(train_x, train_y, nb_epoch=epoch)
80 | 
81 |     model_file = os.path.join(model_path, "adder.model")
82 |     save_model_to_file(model, model_file)
83 | 
84 | 
85 | def test(model_path, expression):
86 |     model_file = os.path.join(model_path, 'adder.model')
87 |     model = build_model_from_file(model_file)
88 | 
89 |     x = np.zeros((1, MAX_LEN, CHAR_NUM), dtype=int)
90 |     expression = BEGIN_SYMBOL + expression.lower().strip() + END_SYMBOL
91 |     x[0] = vectorize(expression, MAX_LEN, CHAR_NUM)
92 | 
93 |     pred = model.predict(x)[0]
94 |     print(''.join([
95 |         INDICES_TO_CHAR[i] for i in pred.argmax(axis=1)
96 |         if INDICES_TO_CHAR[i] not in (BEGIN_SYMBOL, END_SYMBOL)
97 |     ]))
98 | 


--------------------------------------------------------------------------------
/demos/sequence/pig_latin.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import re
  6 | import string
  7 | from itertools import dropwhile
  8 | 
  9 | import numpy as np
 10 | from keras.layers.recurrent import GRU
 11 | from keras.layers.wrappers import TimeDistributed
 12 | from keras.models import Sequential
 13 | from keras.layers.core import Dense, RepeatVector
 14 | 
 15 | from ..consts import DATA_PATH
 16 | from ..utils import build_model_from_file, save_model_to_file
 17 | 
 18 | WORDS_FILE = 'words.txt'
 19 | BEGIN_SYMBOL = '^'
 20 | END_SYMBOL = '$'
 21 | CHAR_SET = set(string.ascii_lowercase + BEGIN_SYMBOL + END_SYMBOL)
 22 | CHAR_NUM = len(CHAR_SET)
 23 | CHAR_TO_INDICES = {c: i for i, c in enumerate(CHAR_SET)}
 24 | INDICES_TO_CHAR = {i: c for c, i in CHAR_TO_INDICES.items()}
 25 | MAX_INPUT_LEN = 18
 26 | MAX_OUTPUT_LEN = 20
 27 | 
 28 | NON_ALPHA_PAT = re.compile('[^a-z]')
 29 | 
 30 | 
 31 | def is_vowel(char):
 32 |     return char in ('a', 'e', 'i', 'o', 'u')
 33 | 
 34 | 
 35 | def is_consonant(char):
 36 |     return not is_vowel(char)
 37 | 
 38 | 
 39 | def pig_latin(word):
 40 |     if is_vowel(word[0]):
 41 |         return word + 'yay'
 42 |     else:
 43 |         remain = ''.join(dropwhile(is_consonant, word))
 44 |         removed = word[:len(word) - len(remain)]
 45 |         return remain + removed + 'ay'
 46 | 
 47 | 
 48 | def vectorize(word, seq_len, vec_size):
 49 |     vec = np.zeros((seq_len, vec_size), dtype=int)
 50 |     for i, ch in enumerate(word):
 51 |         vec[i, CHAR_TO_INDICES[ch]] = 1
 52 | 
 53 |     for i in range(len(word), seq_len):
 54 |         vec[i, CHAR_TO_INDICES[END_SYMBOL]] = 1
 55 | 
 56 |     return vec
 57 | 
 58 | 
 59 | def build_data():
 60 |     words_file = os.path.join(DATA_PATH, WORDS_FILE)
 61 |     words = [
 62 |         w.lower().strip() for w in open(words_file, 'r').readlines()
 63 |         if w.strip() != '' and not NON_ALPHA_PAT.findall(w.lower().strip())
 64 |     ]
 65 | 
 66 |     plain_x = []
 67 |     plain_y = []
 68 |     for w in words:
 69 |         plain_x.append(BEGIN_SYMBOL + w)
 70 |         plain_y.append(BEGIN_SYMBOL + pig_latin(w))
 71 | 
 72 |     # train_x 和 train_y 必须是 3-D 的数据
 73 |     train_x = np.zeros((len(words), MAX_INPUT_LEN, CHAR_NUM), dtype=int)
 74 |     train_y = np.zeros((len(words), MAX_OUTPUT_LEN, CHAR_NUM), dtype=int)
 75 |     for i in range(len(words)):
 76 |         train_x[i] = vectorize(plain_x[i], MAX_INPUT_LEN, CHAR_NUM)
 77 |         train_y[i] = vectorize(plain_y[i], MAX_OUTPUT_LEN, CHAR_NUM)
 78 | 
 79 |     return train_x, train_y
 80 | 
 81 | 
 82 | def build_model(input_size, seq_len, hidden_size):
 83 |     """建立一个 sequence to sequence 模型"""
 84 |     model = Sequential()
 85 |     model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False))
 86 |     model.add(Dense(hidden_size, activation="relu"))
 87 |     model.add(RepeatVector(seq_len))
 88 |     model.add(GRU(hidden_size, return_sequences=True))
 89 |     model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear")))
 90 |     model.compile(loss="mse", optimizer='adam')
 91 | 
 92 |     return model
 93 | 
 94 | 
 95 | def train(epoch, model_path):
 96 |     x, y = build_data()
 97 |     indices = int(len(x) / 10)
 98 |     test_x = x[:indices]
 99 |     test_y = y[:indices]
100 |     train_x = x[indices:]
101 |     train_y = y[indices:]
102 | 
103 |     model = build_model(CHAR_NUM, MAX_OUTPUT_LEN, 128)
104 | 
105 |     model.fit(train_x, train_y, validation_data=(test_x, test_y), batch_size=128, nb_epoch=epoch)
106 | 
107 |     model_file = os.path.join(model_path, 'pig_latin.model')
108 |     save_model_to_file(model, model_file)
109 | 
110 | 
111 | def test(model_path, word):
112 |     model_file = os.path.join(model_path, 'pig_latin.model')
113 |     model = build_model_from_file(model_file)
114 | 
115 |     x = np.zeros((1, MAX_INPUT_LEN, CHAR_NUM), dtype=int)
116 |     word = BEGIN_SYMBOL + word.lower().strip() + END_SYMBOL
117 |     x[0] = vectorize(word, MAX_INPUT_LEN, CHAR_NUM)
118 | 
119 |     pred = model.predict(x)[0]
120 |     print(''.join([
121 |         INDICES_TO_CHAR[i] for i in pred.argmax(axis=1)
122 |         if INDICES_TO_CHAR[i] not in (BEGIN_SYMBOL, END_SYMBOL)
123 |     ]))
124 | 


--------------------------------------------------------------------------------
/demos/understand.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | from keras.models import Sequential
  6 | from keras.layers.recurrent import GRU
  7 | 
  8 | 
  9 | def understand_return_sequence():
 10 |     """用来帮助理解 recurrent layer 中的 return_sequences 参数"""
 11 |     model_1 = Sequential()
 12 |     model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
 13 |     model_1.compile(loss='mean_squared_error', optimizer='sgd')
 14 |     train_x = np.random.randn(100, 78, 256)
 15 |     train_y = np.random.randn(100, 78, 256)
 16 |     model_1.fit(train_x, train_y, verbose=0)
 17 | 
 18 |     model_2 = Sequential()
 19 |     model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False))
 20 |     model_2.compile(loss='mean_squared_error', optimizer='sgd')
 21 |     train_x = np.random.randn(100, 78, 256)
 22 |     train_y = np.random.randn(100, 256)
 23 |     model_2.fit(train_x, train_y, verbose=0)
 24 | 
 25 |     inz = np.random.randn(100, 78, 256)
 26 |     rez_1 = model_1.predict_proba(inz, verbose=0)
 27 |     rez_2 = model_2.predict_proba(inz, verbose=0)
 28 | 
 29 |     print()
 30 |     print('=========== understand return_sequence =================')
 31 |     print('Input shape is: {}'.format(inz.shape))
 32 |     print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape))
 33 |     print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape))
 34 |     print('====================== end =============================')
 35 | 
 36 | 
 37 | def understand_variable_length_handle():
 38 |     """用来帮助理解如何用 recurrent layer 处理变长序列"""
 39 |     model = Sequential()
 40 |     model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
 41 |     model.compile(loss='mean_squared_error', optimizer='sgd')
 42 |     train_x = np.random.randn(100, 78, 256)
 43 |     train_y = np.random.randn(100, 78, 256)
 44 |     model.fit(train_x, train_y, verbose=0)
 45 | 
 46 |     inz_1 = np.random.randn(1, 78, 256)
 47 |     rez_1 = model.predict_proba(inz_1, verbose=0)
 48 | 
 49 |     inz_2 = np.random.randn(1, 87, 256)
 50 |     rez_2 = model.predict_proba(inz_2, verbose=0)
 51 | 
 52 |     print()
 53 |     print('=========== understand variable length =================')
 54 |     print('With `return_sequence=True`')
 55 |     print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape))
 56 |     print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape))
 57 |     print('====================== end =============================')
 58 | 
 59 | 
 60 | def try_variable_length_train():
 61 |     """变长序列训练实验
 62 | 
 63 |     实验失败，这样得到的 train_x 和 train_y 的 dtype 是 object 类型，
 64 |     取其 shape 得到的是 (100,) ，这将导致训练出错
 65 |     """
 66 |     model = Sequential()
 67 |     model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
 68 |     model.compile(loss='mean_squared_error', optimizer='sgd')
 69 | 
 70 |     train_x = []
 71 |     train_y = []
 72 |     for i in range(100):
 73 |         seq_length = np.random.randint(78, 87 + 1)
 74 |         sequence = []
 75 |         for _ in range(seq_length):
 76 |             sequence.append([np.random.randn() for _ in range(256)])
 77 | 
 78 |         train_x.append(np.array(sequence))
 79 |         train_y.append(np.array(sequence))
 80 | 
 81 |     train_x = np.array(train_x)
 82 |     train_y = np.array(train_y)
 83 | 
 84 |     model.fit(np.array(train_x), np.array(train_y))
 85 | 
 86 | 
 87 | def try_variable_length_train_in_batch():
 88 |     """变长序列训练实验(2)"""
 89 |     model = Sequential()
 90 |     model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
 91 |     model.compile(loss='mean_squared_error', optimizer='sgd')
 92 | 
 93 |     # 分作两个 batch, 不同 batch 中的 sequence 长度不一样
 94 |     seq_lens = [78, 87]
 95 |     for i in range(2):
 96 |         train_x = np.random.randn(20, seq_lens[i], 256)
 97 |         train_y = np.random.randn(20, seq_lens[i], 256)
 98 |         model.train_on_batch(train_x, train_y)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     understand_return_sequence()
103 |     understand_variable_length_handle()
104 | 


--------------------------------------------------------------------------------
/demos/utils.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import unicode_literals
  3 | 
  4 | import re
  5 | import pickle
  6 | import jieba
  7 | import logging
  8 | from functools import partial, reduce
  9 | 
 10 | from keras.models import Sequential
 11 | 
 12 | jieba.setLogLevel(logging.INFO)
 13 | 
 14 | PUNCTS_PATTERN = re.compile(r"[.,;:!?'\"~\[\]\(\)\{\}_—。…．，；、：！？‘’“”〕《》【】〖〗（）「」～]")
 15 | SPACES_PATTERN = re.compile(r"[\r\n\t\u00a0 ]")
 16 | SENT_SEP = u'。，！？～；：.,!?:;'
 17 | 
 18 | 
 19 | def to_halfwidth(text):
 20 |     """将文本中的全角字符转换为半角字符"""
 21 |     res = ''
 22 |     for char in text:
 23 |         inside_code = ord(char)
 24 |         if inside_code == 0x3000:
 25 |             inside_code = 0x0020
 26 |         else:
 27 |             inside_code -= 0xfee0
 28 | 
 29 |         if inside_code < 0x0020 or inside_code > 0x7e:
 30 |             res += char
 31 |         else:
 32 |             res += chr(inside_code)
 33 | 
 34 |     return res
 35 | 
 36 | 
 37 | def remove_punctuations(text):
 38 |     """从文本中移除标点符号"""
 39 |     return PUNCTS_PATTERN.sub(' ', text)
 40 | 
 41 | 
 42 | def unify_whitespace(text):
 43 |     """统一文本中的空白字符为空格"""
 44 |     return SPACES_PATTERN.sub(' ', text)
 45 | 
 46 | 
 47 | def remove_redundant(text, chars):
 48 |     """将字符串中连续的指定字符压缩成一个"""
 49 |     if chars == '' or text == '':
 50 |         return text
 51 | 
 52 |     char_set = set(chars)
 53 |     prev = ''
 54 |     result = ''
 55 |     for ch in text:
 56 |         if ch != prev or ch not in char_set:
 57 |             result += ch
 58 | 
 59 |         prev = ch
 60 | 
 61 |     return result
 62 | 
 63 | 
 64 | def clean(text):
 65 |     funcs = [
 66 |         to_halfwidth,
 67 |         remove_punctuations,
 68 |         unify_whitespace,
 69 |         partial(remove_redundant, chars=u' ')
 70 |     ]
 71 |     cleaned_text = reduce(lambda x, fn: fn(x), [text] + funcs)
 72 |     return cleaned_text
 73 | 
 74 | 
 75 | def words_tokenize(text):
 76 |     """分词"""
 77 |     return [word.strip() for word in jieba.cut(text) if len(word.strip()) > 0]
 78 | 
 79 | 
 80 | def sents_tokenize(text, puncts=SENT_SEP):
 81 |     """分句"""
 82 |     tokens = words_tokenize(text)
 83 |     sents = []
 84 | 
 85 |     prev = u' '
 86 |     cur_sent = []
 87 |     for tk in tokens:
 88 |         if tk not in puncts and prev in puncts:
 89 |             sents.append(cur_sent)
 90 |             cur_sent = []
 91 | 
 92 |         cur_sent.append(tk)
 93 |         prev = tk
 94 | 
 95 |     if cur_sent:
 96 |         sents.append(cur_sent)
 97 | 
 98 |     return sents
 99 | 
100 | 
101 | def shingle(sequence, length):
102 |     if len(sequence) < length:
103 |         return []
104 |     else:
105 |         return [sequence[i:i + length] for i in range(len(sequence) - length + 1)]
106 | 
107 | 
108 | def build_model_from_file(model_file):
109 |     structure, weights = pickle.load(open(model_file, 'rb'))
110 |     model = Sequential.from_config(structure)
111 |     model.set_weights(weights)
112 | 
113 |     return model
114 | 
115 | 
116 | def save_model_to_file(model, model_file):
117 |     # save model structure
118 |     structure = model.get_config()
119 |     weights = model.get_weights()
120 |     pickle.dump((structure, weights), open(model_file, 'wb'))
121 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import click
 4 | 
 5 | from demos.consts import PROJECT_ROOT, MODEL_PATH
 6 | from demos.sequence.pig_latin import (
 7 |     train as train_piglatin_model,
 8 |     test as test_piglatin_model,
 9 | )
10 | from demos.sequence.adder import (
11 |     train as train_adder_model,
12 |     test as test_adder_model,
13 | )
14 | 
15 | 
16 | @click.group()
17 | def main():
18 |     pass
19 | 
20 | 
21 | @main.command()
22 | @click.option('--epoch', default=50, help='number of epoch to train model')
23 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH),
24 |               help='model files to save')
25 | def train_piglatin(epoch, model_path):
26 |     train_piglatin_model(epoch, model_path)
27 | 
28 | 
29 | @main.command()
30 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH),
31 |               help='model files to read')
32 | @click.argument('word')
33 | def test_piglantin(model_path, word):
34 |     test_piglatin_model(model_path, word)
35 | 
36 | 
37 | @main.command()
38 | @click.option('--epoch', default=50, help='number of epoch to train model')
39 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH),
40 |               help='model files to save')
41 | def train_adder(epoch, model_path):
42 |     train_adder_model(epoch, model_path)
43 | 
44 | 
45 | @main.command()
46 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH),
47 |               help='model files to read')
48 | @click.argument('expression')
49 | def test_piglantin(model_path, expression):
50 |     test_adder_model(model_path, expression)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     main()
55 | 


--------------------------------------------------------------------------------
/models/adder.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/models/adder.model


--------------------------------------------------------------------------------
/models/pig_latin.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/models/pig_latin.model


--------------------------------------------------------------------------------
/models/xor.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/models/xor.model


--------------------------------------------------------------------------------
/readme.org:
--------------------------------------------------------------------------------
 1 | * Soph
 2 | 
 3 |   [[https://secure.travis-ci.org/Linusp/soph.png?branch=master]]
 4 | 
 5 |   一个人工智能实践项目。本项目旨在展示人工智能技术在实际场景中的应用，以及当前的一些优秀工具的合理利用。
 6 | 
 7 | ** 环境配置
 8 | 
 9 |    本项目主要使用 Python 进行编码，使用下列命令来初始化环境:
10 |    #+BEGIN_SRC sh
11 |    make venv && make deps && source venv/bin/active
12 |    #+END_SRC
13 | 
14 | ** 使用
15 | 
16 |    #+BEGIN_SRC sh
17 |    python main.py
18 |    #+END_SRC
19 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | keras==2.1.2
2 | click==6.7
3 | jieba==0.39
4 | scikit-learn==0.19.1
5 | tensorflow==1.4.0
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile
 3 | # To update, run:
 4 | #
 5 | #    pip-compile --output-file requirements.txt requirements.in
 6 | #
 7 | bleach==1.5.0             # via tensorflow-tensorboard
 8 | click==6.7
 9 | enum34==1.1.6             # via tensorflow
10 | html5lib==0.9999999       # via bleach, tensorflow-tensorboard
11 | jieba==0.39
12 | keras==2.1.2
13 | markdown==2.6.9           # via tensorflow-tensorboard
14 | numpy==1.13.3             # via keras, tensorflow, tensorflow-tensorboard
15 | protobuf==3.5.0.post1     # via tensorflow, tensorflow-tensorboard
16 | pyyaml==3.11              # via keras
17 | scikit-learn==0.19.1
18 | scipy==0.17.0             # via keras
19 | six==1.10.0               # via bleach, html5lib, keras, protobuf, tensorflow, tensorflow-tensorboard
20 | tensorflow-tensorboard==0.4.0rc3  # via tensorflow
21 | tensorflow==1.4.0
22 | werkzeug==0.12.2          # via tensorflow-tensorboard
23 | wheel==0.30.0             # via tensorflow, tensorflow-tensorboard
24 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | ignore = E201,E202
4 | 
5 | [pep8]
6 | max-line-length = 100
7 | ignore = E201,E202
8 | 


--------------------------------------------------------------------------------
/tests/ensure_flake8.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PIP=""
 4 | if [ -e "venv" ];then
 5 |     PIP=venv/bin/pip
 6 | elif [ -e "$(which pip3.6)" ];then
 7 |     PIP=pip3.6
 8 | elif [ -e "$(which pip3.5)" ];then
 9 |     PIP=pip3.5
10 | elif [ -e "$(which pip3)" ];then
11 |     PIP=pip3
12 | else
13 |     PIP=pip
14 | fi
15 | 
16 | ${PIP} install flake8 --quiet
17 | 


--------------------------------------------------------------------------------
/tests/ensure_pytest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PIP=""
 4 | if [ -e "venv" ];then
 5 |     PIP=venv/bin/pip
 6 | elif [ -e "$(which pip3.6)" ];then
 7 |     PIP=pip3.6
 8 | elif [ -e "$(which pip3.5)" ];then
 9 |     PIP=pip3.5
10 | elif [ -e "$(which pip3)" ];then
11 |     PIP=pip3
12 | else
13 |     PIP=pip
14 | fi
15 | 
16 | ${PIP} install pytest pytest-cov --quiet
17 | 


--------------------------------------------------------------------------------