├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── Makefile ├── data └── words.txt ├── demos ├── __init__.py ├── consts.py ├── sequence │ ├── __init__.py │ ├── adder.py │ └── pig_latin.py ├── understand.py └── utils.py ├── main.py ├── models ├── adder.model ├── pig_latin.model └── xor.model ├── readme.org ├── requirements.in ├── requirements.txt ├── setup.cfg └── tests ├── ensure_flake8.sh └── ensure_pytest.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | venv/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | 56 | # Sphinx documentation 57 | docs/_build/ 58 | 59 | # PyBuilder 60 | target/ 61 | 62 | #Ipython Notebook 63 | .ipynb_checkpoints 64 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/.gitmodules -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.6" 5 | 6 | before_install: 7 | - sudo apt-get update && sudo apt-get -y --no-install-recommends install libhdf5-serial-dev 8 | 9 | install: "make venv && make deps" 10 | 11 | script: 12 | - "make lint" 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Linusp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | setup: venv deps 2 | 3 | lint: venv clean 4 | - bash tests/ensure_flake8.sh 5 | - venv/bin/flake8 demos/ --format=pylint 6 | 7 | deps: venv 8 | - venv/bin/pip install -r requirements.txt 9 | 10 | venv: 11 | - virtualenv --python=$(shell which python3.6) --prompt '' venv 12 | - venv/bin/pip install setuptools pip -U 13 | 14 | clean: 15 | - find . -iname "*__pycache__" | xargs rm -rf 16 | - find . -iname "*.pyc" | xargs rm -rf 17 | 18 | -------------------------------------------------------------------------------- /demos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/demos/__init__.py -------------------------------------------------------------------------------- /demos/consts.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 5 | MODEL_PATH = os.path.join(PROJECT_ROOT, 'models') 6 | DATA_PATH = os.path.join(PROJECT_ROOT, 'data') 7 | -------------------------------------------------------------------------------- /demos/sequence/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/demos/sequence/__init__.py -------------------------------------------------------------------------------- /demos/sequence/adder.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function 3 | 4 | import os 5 | import numpy as np 6 | from keras.layers.recurrent import GRU 7 | from keras.layers.wrappers import TimeDistributed 8 | from keras.layers.core import Dense, RepeatVector 9 | from keras.models import Sequential 10 | 11 | from ..utils import build_model_from_file, save_model_to_file 12 | 13 | BEGIN_SYMBOL = '^' 14 | END_SYMBOL = '$' 15 | CHARSET = set('0123456789+ ' + BEGIN_SYMBOL + END_SYMBOL) 16 | CHAR_NUM = len(CHARSET) 17 | MAX_LEN = 12 18 | MAX_LEN = 12 19 | 20 | CHAR_TO_INDICES = {c: i for i, c in enumerate(CHARSET)} 21 | INDICES_TO_CHAR = {i: c for c, i in CHAR_TO_INDICES.items()} 22 | 23 | 24 | def vectorize(seq, seq_len, vec_size): 25 | vec = np.zeros((seq_len, vec_size), dtype=int) 26 | for i, ch in enumerate(seq): 27 | vec[i, CHAR_TO_INDICES[ch]] = 1 28 | 29 | for i in range(len(seq), seq_len): 30 | vec[i, CHAR_TO_INDICES[END_SYMBOL]] = 1 31 | 32 | return vec 33 | 34 | 35 | def build_data(): 36 | """生成所有三位数(包含)一下的加法""" 37 | plain_x = [] 38 | plain_y = [] 39 | for i in range(0, 100): 40 | for j in range(0, 100): 41 | x = BEGIN_SYMBOL + '{}+{}'.format(i, j) + END_SYMBOL 42 | y = BEGIN_SYMBOL + '{}'.format(i + j) + END_SYMBOL 43 | 44 | plain_x.append(x) 45 | plain_y.append(y) 46 | 47 | data_size = len(plain_x) 48 | 49 | # convert to one-hot 50 | X = np.zeros((data_size, MAX_LEN, CHAR_NUM), dtype=int) 51 | Y = np.zeros((data_size, MAX_LEN, CHAR_NUM), dtype=int) 52 | 53 | for i, seq in enumerate(plain_x): 54 | X[i] = vectorize(seq, MAX_LEN, CHAR_NUM) 55 | 56 | for i, seq in enumerate(plain_y): 57 | Y[i] = vectorize(seq, MAX_LEN, CHAR_NUM) 58 | 59 | return X, Y 60 | 61 | 62 | def build_model(input_size, seq_len, hidden_size): 63 | """建立一个 seq2seq 模型""" 64 | model = Sequential() 65 | model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False)) 66 | model.add(Dense(hidden_size, activation="relu")) 67 | model.add(RepeatVector(seq_len)) 68 | model.add(GRU(hidden_size, return_sequences=True)) 69 | model.add(TimeDistributed(Dense(output_dim=input_size, activation="softmax"))) 70 | model.compile(loss="categorical_crossentropy", optimizer='adam') 71 | 72 | return model 73 | 74 | 75 | def train(epoch, model_path): 76 | train_x, train_y = build_data() 77 | 78 | model = build_model(CHAR_NUM, MAX_LEN, 128) 79 | model.fit(train_x, train_y, nb_epoch=epoch) 80 | 81 | model_file = os.path.join(model_path, "adder.model") 82 | save_model_to_file(model, model_file) 83 | 84 | 85 | def test(model_path, expression): 86 | model_file = os.path.join(model_path, 'adder.model') 87 | model = build_model_from_file(model_file) 88 | 89 | x = np.zeros((1, MAX_LEN, CHAR_NUM), dtype=int) 90 | expression = BEGIN_SYMBOL + expression.lower().strip() + END_SYMBOL 91 | x[0] = vectorize(expression, MAX_LEN, CHAR_NUM) 92 | 93 | pred = model.predict(x)[0] 94 | print(''.join([ 95 | INDICES_TO_CHAR[i] for i in pred.argmax(axis=1) 96 | if INDICES_TO_CHAR[i] not in (BEGIN_SYMBOL, END_SYMBOL) 97 | ])) 98 | -------------------------------------------------------------------------------- /demos/sequence/pig_latin.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function 3 | 4 | import os 5 | import re 6 | import string 7 | from itertools import dropwhile 8 | 9 | import numpy as np 10 | from keras.layers.recurrent import GRU 11 | from keras.layers.wrappers import TimeDistributed 12 | from keras.models import Sequential 13 | from keras.layers.core import Dense, RepeatVector 14 | 15 | from ..consts import DATA_PATH 16 | from ..utils import build_model_from_file, save_model_to_file 17 | 18 | WORDS_FILE = 'words.txt' 19 | BEGIN_SYMBOL = '^' 20 | END_SYMBOL = '$' 21 | CHAR_SET = set(string.ascii_lowercase + BEGIN_SYMBOL + END_SYMBOL) 22 | CHAR_NUM = len(CHAR_SET) 23 | CHAR_TO_INDICES = {c: i for i, c in enumerate(CHAR_SET)} 24 | INDICES_TO_CHAR = {i: c for c, i in CHAR_TO_INDICES.items()} 25 | MAX_INPUT_LEN = 18 26 | MAX_OUTPUT_LEN = 20 27 | 28 | NON_ALPHA_PAT = re.compile('[^a-z]') 29 | 30 | 31 | def is_vowel(char): 32 | return char in ('a', 'e', 'i', 'o', 'u') 33 | 34 | 35 | def is_consonant(char): 36 | return not is_vowel(char) 37 | 38 | 39 | def pig_latin(word): 40 | if is_vowel(word[0]): 41 | return word + 'yay' 42 | else: 43 | remain = ''.join(dropwhile(is_consonant, word)) 44 | removed = word[:len(word) - len(remain)] 45 | return remain + removed + 'ay' 46 | 47 | 48 | def vectorize(word, seq_len, vec_size): 49 | vec = np.zeros((seq_len, vec_size), dtype=int) 50 | for i, ch in enumerate(word): 51 | vec[i, CHAR_TO_INDICES[ch]] = 1 52 | 53 | for i in range(len(word), seq_len): 54 | vec[i, CHAR_TO_INDICES[END_SYMBOL]] = 1 55 | 56 | return vec 57 | 58 | 59 | def build_data(): 60 | words_file = os.path.join(DATA_PATH, WORDS_FILE) 61 | words = [ 62 | w.lower().strip() for w in open(words_file, 'r').readlines() 63 | if w.strip() != '' and not NON_ALPHA_PAT.findall(w.lower().strip()) 64 | ] 65 | 66 | plain_x = [] 67 | plain_y = [] 68 | for w in words: 69 | plain_x.append(BEGIN_SYMBOL + w) 70 | plain_y.append(BEGIN_SYMBOL + pig_latin(w)) 71 | 72 | # train_x 和 train_y 必须是 3-D 的数据 73 | train_x = np.zeros((len(words), MAX_INPUT_LEN, CHAR_NUM), dtype=int) 74 | train_y = np.zeros((len(words), MAX_OUTPUT_LEN, CHAR_NUM), dtype=int) 75 | for i in range(len(words)): 76 | train_x[i] = vectorize(plain_x[i], MAX_INPUT_LEN, CHAR_NUM) 77 | train_y[i] = vectorize(plain_y[i], MAX_OUTPUT_LEN, CHAR_NUM) 78 | 79 | return train_x, train_y 80 | 81 | 82 | def build_model(input_size, seq_len, hidden_size): 83 | """建立一个 sequence to sequence 模型""" 84 | model = Sequential() 85 | model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False)) 86 | model.add(Dense(hidden_size, activation="relu")) 87 | model.add(RepeatVector(seq_len)) 88 | model.add(GRU(hidden_size, return_sequences=True)) 89 | model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear"))) 90 | model.compile(loss="mse", optimizer='adam') 91 | 92 | return model 93 | 94 | 95 | def train(epoch, model_path): 96 | x, y = build_data() 97 | indices = int(len(x) / 10) 98 | test_x = x[:indices] 99 | test_y = y[:indices] 100 | train_x = x[indices:] 101 | train_y = y[indices:] 102 | 103 | model = build_model(CHAR_NUM, MAX_OUTPUT_LEN, 128) 104 | 105 | model.fit(train_x, train_y, validation_data=(test_x, test_y), batch_size=128, nb_epoch=epoch) 106 | 107 | model_file = os.path.join(model_path, 'pig_latin.model') 108 | save_model_to_file(model, model_file) 109 | 110 | 111 | def test(model_path, word): 112 | model_file = os.path.join(model_path, 'pig_latin.model') 113 | model = build_model_from_file(model_file) 114 | 115 | x = np.zeros((1, MAX_INPUT_LEN, CHAR_NUM), dtype=int) 116 | word = BEGIN_SYMBOL + word.lower().strip() + END_SYMBOL 117 | x[0] = vectorize(word, MAX_INPUT_LEN, CHAR_NUM) 118 | 119 | pred = model.predict(x)[0] 120 | print(''.join([ 121 | INDICES_TO_CHAR[i] for i in pred.argmax(axis=1) 122 | if INDICES_TO_CHAR[i] not in (BEGIN_SYMBOL, END_SYMBOL) 123 | ])) 124 | -------------------------------------------------------------------------------- /demos/understand.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | from keras.models import Sequential 6 | from keras.layers.recurrent import GRU 7 | 8 | 9 | def understand_return_sequence(): 10 | """用来帮助理解 recurrent layer 中的 return_sequences 参数""" 11 | model_1 = Sequential() 12 | model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) 13 | model_1.compile(loss='mean_squared_error', optimizer='sgd') 14 | train_x = np.random.randn(100, 78, 256) 15 | train_y = np.random.randn(100, 78, 256) 16 | model_1.fit(train_x, train_y, verbose=0) 17 | 18 | model_2 = Sequential() 19 | model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False)) 20 | model_2.compile(loss='mean_squared_error', optimizer='sgd') 21 | train_x = np.random.randn(100, 78, 256) 22 | train_y = np.random.randn(100, 256) 23 | model_2.fit(train_x, train_y, verbose=0) 24 | 25 | inz = np.random.randn(100, 78, 256) 26 | rez_1 = model_1.predict_proba(inz, verbose=0) 27 | rez_2 = model_2.predict_proba(inz, verbose=0) 28 | 29 | print() 30 | print('=========== understand return_sequence =================') 31 | print('Input shape is: {}'.format(inz.shape)) 32 | print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape)) 33 | print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape)) 34 | print('====================== end =============================') 35 | 36 | 37 | def understand_variable_length_handle(): 38 | """用来帮助理解如何用 recurrent layer 处理变长序列""" 39 | model = Sequential() 40 | model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) 41 | model.compile(loss='mean_squared_error', optimizer='sgd') 42 | train_x = np.random.randn(100, 78, 256) 43 | train_y = np.random.randn(100, 78, 256) 44 | model.fit(train_x, train_y, verbose=0) 45 | 46 | inz_1 = np.random.randn(1, 78, 256) 47 | rez_1 = model.predict_proba(inz_1, verbose=0) 48 | 49 | inz_2 = np.random.randn(1, 87, 256) 50 | rez_2 = model.predict_proba(inz_2, verbose=0) 51 | 52 | print() 53 | print('=========== understand variable length =================') 54 | print('With `return_sequence=True`') 55 | print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape)) 56 | print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape)) 57 | print('====================== end =============================') 58 | 59 | 60 | def try_variable_length_train(): 61 | """变长序列训练实验 62 | 63 | 实验失败,这样得到的 train_x 和 train_y 的 dtype 是 object 类型, 64 | 取其 shape 得到的是 (100,) ,这将导致训练出错 65 | """ 66 | model = Sequential() 67 | model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) 68 | model.compile(loss='mean_squared_error', optimizer='sgd') 69 | 70 | train_x = [] 71 | train_y = [] 72 | for i in range(100): 73 | seq_length = np.random.randint(78, 87 + 1) 74 | sequence = [] 75 | for _ in range(seq_length): 76 | sequence.append([np.random.randn() for _ in range(256)]) 77 | 78 | train_x.append(np.array(sequence)) 79 | train_y.append(np.array(sequence)) 80 | 81 | train_x = np.array(train_x) 82 | train_y = np.array(train_y) 83 | 84 | model.fit(np.array(train_x), np.array(train_y)) 85 | 86 | 87 | def try_variable_length_train_in_batch(): 88 | """变长序列训练实验(2)""" 89 | model = Sequential() 90 | model.add(GRU(input_dim=256, output_dim=256, return_sequences=True)) 91 | model.compile(loss='mean_squared_error', optimizer='sgd') 92 | 93 | # 分作两个 batch, 不同 batch 中的 sequence 长度不一样 94 | seq_lens = [78, 87] 95 | for i in range(2): 96 | train_x = np.random.randn(20, seq_lens[i], 256) 97 | train_y = np.random.randn(20, seq_lens[i], 256) 98 | model.train_on_batch(train_x, train_y) 99 | 100 | 101 | if __name__ == '__main__': 102 | understand_return_sequence() 103 | understand_variable_length_handle() 104 | -------------------------------------------------------------------------------- /demos/utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | import pickle 6 | import jieba 7 | import logging 8 | from functools import partial, reduce 9 | 10 | from keras.models import Sequential 11 | 12 | jieba.setLogLevel(logging.INFO) 13 | 14 | PUNCTS_PATTERN = re.compile(r"[.,;:!?'\"~\[\]\(\)\{\}_—。….,;、:!?‘’“”〕《》【】〖〗()「」~]") 15 | SPACES_PATTERN = re.compile(r"[\r\n\t\u00a0 ]") 16 | SENT_SEP = u'。,!?~;:.,!?:;' 17 | 18 | 19 | def to_halfwidth(text): 20 | """将文本中的全角字符转换为半角字符""" 21 | res = '' 22 | for char in text: 23 | inside_code = ord(char) 24 | if inside_code == 0x3000: 25 | inside_code = 0x0020 26 | else: 27 | inside_code -= 0xfee0 28 | 29 | if inside_code < 0x0020 or inside_code > 0x7e: 30 | res += char 31 | else: 32 | res += chr(inside_code) 33 | 34 | return res 35 | 36 | 37 | def remove_punctuations(text): 38 | """从文本中移除标点符号""" 39 | return PUNCTS_PATTERN.sub(' ', text) 40 | 41 | 42 | def unify_whitespace(text): 43 | """统一文本中的空白字符为空格""" 44 | return SPACES_PATTERN.sub(' ', text) 45 | 46 | 47 | def remove_redundant(text, chars): 48 | """将字符串中连续的指定字符压缩成一个""" 49 | if chars == '' or text == '': 50 | return text 51 | 52 | char_set = set(chars) 53 | prev = '' 54 | result = '' 55 | for ch in text: 56 | if ch != prev or ch not in char_set: 57 | result += ch 58 | 59 | prev = ch 60 | 61 | return result 62 | 63 | 64 | def clean(text): 65 | funcs = [ 66 | to_halfwidth, 67 | remove_punctuations, 68 | unify_whitespace, 69 | partial(remove_redundant, chars=u' ') 70 | ] 71 | cleaned_text = reduce(lambda x, fn: fn(x), [text] + funcs) 72 | return cleaned_text 73 | 74 | 75 | def words_tokenize(text): 76 | """分词""" 77 | return [word.strip() for word in jieba.cut(text) if len(word.strip()) > 0] 78 | 79 | 80 | def sents_tokenize(text, puncts=SENT_SEP): 81 | """分句""" 82 | tokens = words_tokenize(text) 83 | sents = [] 84 | 85 | prev = u' ' 86 | cur_sent = [] 87 | for tk in tokens: 88 | if tk not in puncts and prev in puncts: 89 | sents.append(cur_sent) 90 | cur_sent = [] 91 | 92 | cur_sent.append(tk) 93 | prev = tk 94 | 95 | if cur_sent: 96 | sents.append(cur_sent) 97 | 98 | return sents 99 | 100 | 101 | def shingle(sequence, length): 102 | if len(sequence) < length: 103 | return [] 104 | else: 105 | return [sequence[i:i + length] for i in range(len(sequence) - length + 1)] 106 | 107 | 108 | def build_model_from_file(model_file): 109 | structure, weights = pickle.load(open(model_file, 'rb')) 110 | model = Sequential.from_config(structure) 111 | model.set_weights(weights) 112 | 113 | return model 114 | 115 | 116 | def save_model_to_file(model, model_file): 117 | # save model structure 118 | structure = model.get_config() 119 | weights = model.get_weights() 120 | pickle.dump((structure, weights), open(model_file, 'wb')) 121 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click 4 | 5 | from demos.consts import PROJECT_ROOT, MODEL_PATH 6 | from demos.sequence.pig_latin import ( 7 | train as train_piglatin_model, 8 | test as test_piglatin_model, 9 | ) 10 | from demos.sequence.adder import ( 11 | train as train_adder_model, 12 | test as test_adder_model, 13 | ) 14 | 15 | 16 | @click.group() 17 | def main(): 18 | pass 19 | 20 | 21 | @main.command() 22 | @click.option('--epoch', default=50, help='number of epoch to train model') 23 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH), 24 | help='model files to save') 25 | def train_piglatin(epoch, model_path): 26 | train_piglatin_model(epoch, model_path) 27 | 28 | 29 | @main.command() 30 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH), 31 | help='model files to read') 32 | @click.argument('word') 33 | def test_piglantin(model_path, word): 34 | test_piglatin_model(model_path, word) 35 | 36 | 37 | @main.command() 38 | @click.option('--epoch', default=50, help='number of epoch to train model') 39 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH), 40 | help='model files to save') 41 | def train_adder(epoch, model_path): 42 | train_adder_model(epoch, model_path) 43 | 44 | 45 | @main.command() 46 | @click.option('-m', '--model-path', default=os.path.join(PROJECT_ROOT, MODEL_PATH), 47 | help='model files to read') 48 | @click.argument('expression') 49 | def test_piglantin(model_path, expression): 50 | test_adder_model(model_path, expression) 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /models/adder.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/models/adder.model -------------------------------------------------------------------------------- /models/pig_latin.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/models/pig_latin.model -------------------------------------------------------------------------------- /models/xor.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linusp/soph/46822f347795f51c3872a164fbbe416fa0e91807/models/xor.model -------------------------------------------------------------------------------- /readme.org: -------------------------------------------------------------------------------- 1 | * Soph 2 | 3 | [[https://secure.travis-ci.org/Linusp/soph.png?branch=master]] 4 | 5 | 一个人工智能实践项目。本项目旨在展示人工智能技术在实际场景中的应用,以及当前的一些优秀工具的合理利用。 6 | 7 | ** 环境配置 8 | 9 | 本项目主要使用 Python 进行编码,使用下列命令来初始化环境: 10 | #+BEGIN_SRC sh 11 | make venv && make deps && source venv/bin/active 12 | #+END_SRC 13 | 14 | ** 使用 15 | 16 | #+BEGIN_SRC sh 17 | python main.py 18 | #+END_SRC 19 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | keras==2.1.2 2 | click==6.7 3 | jieba==0.39 4 | scikit-learn==0.19.1 5 | tensorflow==1.4.0 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile --output-file requirements.txt requirements.in 6 | # 7 | bleach==1.5.0 # via tensorflow-tensorboard 8 | click==6.7 9 | enum34==1.1.6 # via tensorflow 10 | html5lib==0.9999999 # via bleach, tensorflow-tensorboard 11 | jieba==0.39 12 | keras==2.1.2 13 | markdown==2.6.9 # via tensorflow-tensorboard 14 | numpy==1.13.3 # via keras, tensorflow, tensorflow-tensorboard 15 | protobuf==3.5.0.post1 # via tensorflow, tensorflow-tensorboard 16 | pyyaml==3.11 # via keras 17 | scikit-learn==0.19.1 18 | scipy==0.17.0 # via keras 19 | six==1.10.0 # via bleach, html5lib, keras, protobuf, tensorflow, tensorflow-tensorboard 20 | tensorflow-tensorboard==0.4.0rc3 # via tensorflow 21 | tensorflow==1.4.0 22 | werkzeug==0.12.2 # via tensorflow-tensorboard 23 | wheel==0.30.0 # via tensorflow, tensorflow-tensorboard 24 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | ignore = E201,E202 4 | 5 | [pep8] 6 | max-line-length = 100 7 | ignore = E201,E202 8 | -------------------------------------------------------------------------------- /tests/ensure_flake8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PIP="" 4 | if [ -e "venv" ];then 5 | PIP=venv/bin/pip 6 | elif [ -e "$(which pip3.6)" ];then 7 | PIP=pip3.6 8 | elif [ -e "$(which pip3.5)" ];then 9 | PIP=pip3.5 10 | elif [ -e "$(which pip3)" ];then 11 | PIP=pip3 12 | else 13 | PIP=pip 14 | fi 15 | 16 | ${PIP} install flake8 --quiet 17 | -------------------------------------------------------------------------------- /tests/ensure_pytest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PIP="" 4 | if [ -e "venv" ];then 5 | PIP=venv/bin/pip 6 | elif [ -e "$(which pip3.6)" ];then 7 | PIP=pip3.6 8 | elif [ -e "$(which pip3.5)" ];then 9 | PIP=pip3.5 10 | elif [ -e "$(which pip3)" ];then 11 | PIP=pip3 12 | else 13 | PIP=pip 14 | fi 15 | 16 | ${PIP} install pytest pytest-cov --quiet 17 | --------------------------------------------------------------------------------