├── desktop.ini ├── README.md ├── .gitignore ├── 自动文章学习生成v1.py └── 自动文章学习生成v2.py /desktop.ini: -------------------------------------------------------------------------------- 1 | [ViewState] 2 | Mode= 3 | Vid= 4 | FolderType=Generic 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoLearnWritePassage 2 | 自动学习写作文章的算法。。 3 | 4 | v1版是根据一个词相互关联 5 | v2计划优化算法,升值两个词关联,应该能让文章更流畅 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | zbook.txt 2 | zknow.txt 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /自动文章学习生成v1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 写文件 3 | import json 4 | import random 5 | import jieba 6 | 7 | ''' 8 | txt 9 | i:am_0.65|have_0.2|will_0.25/you:are_0.85|like_0.15 10 | 11 | zishi 12 | [0]i:am_0.65|have_0.2|will_0.25 13 | [1]you:are_0.85|like_0.15 14 | 15 | zishi 2 16 | [0][0]i 17 | [1]' 18 | 19 | json zishi 20 | { 21 | "i" : {"$" : 20, "am" : 0.65, "have" : 0.2, "will" : 0.15}, 22 | "you" : {"$" : 20, "are" : 0.85, "like" : 0.15} 23 | 24 | } 25 | ''' 26 | 27 | 28 | def learned(): 29 | try: 30 | with open("z知识库.txt", "rt") as zishi_file: 31 | zishi = eval(zishi_file.read()) 32 | return zishi 33 | except IOError: 34 | zishi = {} 35 | print('11223333333') 36 | # open("知识库.txt", "w") 37 | return zishi 38 | 39 | 40 | def write(): 41 | with open("z知识库.txt", "wt") as out_file: 42 | out_file.write(str(zishi)) 43 | 44 | 45 | def learn(): 46 | i = -1 47 | while i < len(textarray) - 2: 48 | i = i + 1 49 | zikey = zishi.keys() 50 | if textarray[i] in zikey: 51 | zishikey = zishi[textarray[i]].keys() 52 | if textarray[i + 1] in zishikey: 53 | for j in zishikey: 54 | if j != '$': 55 | if j != textarray[i + 1]: 56 | zishi[textarray[i]][j] = zishi[textarray[i]][j] / (1 + 1 / zishi[textarray[i]]['$']) 57 | zishi[textarray[i]][textarray[i + 1]] = (zishi[textarray[i]][textarray[i + 1]] + 1 / 58 | zishi[textarray[i]]['$']) / (1 + 1 / zishi[textarray[i]]['$']) 59 | zishi[textarray[i]]['$'] = zishi[textarray[i]]['$'] + 1 60 | else: 61 | for j in zishikey: 62 | if j != '$': 63 | zishi[textarray[i]][j] = zishi[textarray[i]][j] / (1 + 1 / zishi[textarray[i]]['$']) 64 | zishi[textarray[i]][textarray[i + 1]] = (1 / zishi[textarray[i]]['$']) / ( 65 | 1 + 1 / zishi[textarray[i]]['$']) 66 | zishi[textarray[i]]['$'] = zishi[textarray[i]]['$'] + 1 67 | else: 68 | zishi[textarray[i]] = {'$': 1, textarray[i + 1]: 1} 69 | 70 | 71 | def scrlearn(word, nword): 72 | zikey = zishi.keys() 73 | if word in zikey: 74 | zishikey = zishi[word].keys() 75 | if nword in zishikey: 76 | for j in zishikey: 77 | if j != '$': 78 | if j != nword: 79 | zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$']) 80 | zishi[word][nword] = (zishi[word][nword] + 1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$']) 81 | zishi[word]['$'] = zishi[word]['$'] + 1 82 | else: 83 | for j in zishikey: 84 | if j != '$': 85 | zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$']) 86 | zishi[word][nword] = (1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$']) 87 | zishi[word]['$'] = zishi[word]['$'] + 1 88 | else: 89 | zishi[word] = {'$': 1, nword: 1} 90 | 91 | 92 | def screen(text): 93 | textarray = jieba.cut(text) 94 | textarray = '[~]'.join(textarray).split('[~]') 95 | ii = -1 96 | while ii < len(textarray) - 1: 97 | ii = ii + 1 98 | textarray[ii].lower() 99 | textarray[ii] = textarray[ii].strip() 100 | if textarray[ii] == '\n': 101 | if textarray[ii + 1] == '\n': 102 | ii = ii + 2 103 | else: 104 | scrlearn(textarray[ii - 1] + ' ', textarray[ii + 1]) 105 | ii = ii + 1 106 | if textarray[ii] == ' ': 107 | scrlearn(textarray[ii - 1] + ' ', textarray[ii + 1]) 108 | ii = ii + 1 109 | return textarray 110 | 111 | 112 | def ran(w, r): 113 | zikey = zishi.keys() 114 | if w in zikey: 115 | zishikey = zishi[w].keys() 116 | for i in zishikey: 117 | if i != '$': 118 | if r < float(zishi[w][i]): 119 | print(i) 120 | if i == '\n': 121 | return '' 122 | else: 123 | return i 124 | else: 125 | r = r - float(zishi[w][i]) 126 | print(r) 127 | else: 128 | return ' ' 129 | 130 | 131 | def sen(a): 132 | sent = [] 133 | sent.append(a) 134 | while len(sent) < 1000: 135 | b = ran(sent[len(sent) - 1], random.random()) 136 | if b == ',' or b == '。' or b == '?' or b == '!': 137 | if ran(sent[len(sent) - 1] + b, random.random()) != ' ': 138 | sent.append(b) 139 | else: 140 | if ran(b, random.random()) != ' ': 141 | sent.append(b) 142 | else: 143 | break 144 | elif b != ' ': 145 | sent.append(b) 146 | else: 147 | break 148 | return ''.join(sent) 149 | 150 | 151 | while 1: 152 | print('中文版本哦') 153 | print('学习/写作/关闭') 154 | choose = input('先') 155 | text = 0 156 | if choose == '学习': 157 | try: 158 | # zishi = 159 | input('在同目录下创建“zbook.txt”') 160 | with open("zbook.txt", "rt", encoding="utf-8") as in_file: 161 | text = in_file.read() 162 | except IOError: 163 | print('没有找到书本') 164 | input('创建完成按回车继续') 165 | finally: 166 | if text == 0: 167 | input('未发现') 168 | exit(); 169 | # print(text) 170 | zishi = learned() 171 | textarray = screen(text) 172 | print(textarray) 173 | print('一共有' + str(len(textarray)) + '个词。。学习中。。。。') 174 | learn() 175 | input('学习完成') 176 | write() 177 | elif choose == '写作': 178 | zishi = learned() 179 | while 1: 180 | print('钦定第一个词') 181 | print('输入列表查看所有词') 182 | word = input() 183 | if word == '列表': 184 | for i in zishi.keys(): print(i) 185 | else: 186 | if word in zishi.keys(): 187 | print(' ') 188 | print(sen(word)) 189 | print(' ') 190 | break 191 | elif choose == '关闭': 192 | exit(); 193 | elif choose == '调试': 194 | print('→_→') 195 | # print(text) 196 | # with open("test.txt", "wt") as out_file: 197 | # out_file.write("该文本会写入到文件中\n看到我了吧!") 198 | 199 | # Read a file 200 | -------------------------------------------------------------------------------- /自动文章学习生成v2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 写文件 3 | import random 4 | import jieba 5 | 6 | # 连续储存两个词语,理论上生成文章更流畅 7 | ''' 8 | txt 9 | i:am_0.65|have_0.2|will_0.25/you:are_0.85|like_0.15 10 | 11 | zishi 12 | [0]i:am_0.65|have_0.2|will_0.25 13 | [1]you:are_0.85|like_0.15 14 | 15 | zishi 2 16 | [0][0]i 17 | [1]' 18 | 19 | json zishi 20 | { 21 | "i am" : {"$" : 20, "a" : 0.65, "working" : 0.2, "an" : 0.15}, 22 | "you are" : {"$" : 20, "a" : 0.85, "an" : 0.15} 23 | 24 | } 25 | ''' 26 | 27 | PUNCTUATION = [',', '。', '?', '!', ':', ';'] 28 | 29 | 30 | # 拿到之前学习过的数据 31 | def learned(): 32 | try: 33 | with open("zknow.txt", "rt", encoding='utf-8') as zishi_file: 34 | zishi = eval(zishi_file.read()) 35 | return zishi 36 | except IOError: 37 | zishi = {} 38 | print('11223333333') 39 | # open("知识库.txt", "w") 40 | return zishi 41 | 42 | 43 | # 储存学习数据 44 | def write(): 45 | with open("zknow.txt", "wt", encoding='utf-8') as out_file: 46 | out_file.write(str(zishi)) 47 | 48 | 49 | # 遍历整个文章的学习 50 | def learns(): 51 | i = 0 52 | while i < len(textarray) - 2: 53 | learn(textarray[i] + ' ' + textarray[i + 1], textarray[i + 2]) 54 | learn(textarray[i], textarray[i + 1]) 55 | i = i + 1 56 | 57 | 58 | # 单个的词语学习 59 | def learn(word, nword): 60 | zikey = zishi.keys() 61 | if word in zikey: 62 | zishikey = zishi[word].keys() 63 | if nword in zishikey: 64 | for j in zishikey: 65 | if j != '$': 66 | if j != nword: 67 | zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$']) 68 | zishi[word][nword] = (zishi[word][nword] + 1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$']) 69 | zishi[word]['$'] = zishi[word]['$'] + 1 70 | else: 71 | for j in zishikey: 72 | if j != '$': 73 | zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$']) 74 | zishi[word][nword] = (1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$']) 75 | zishi[word]['$'] = zishi[word]['$'] + 1 76 | else: 77 | zishi[word] = {'$': 1, nword: 1} 78 | 79 | 80 | # 分词,以及对标点符号和换行、空格的预处理 81 | def screen(text): 82 | global PUNCTUATION 83 | textarray = jieba.cut(text) # 分词 84 | textarray = '[~]'.join(textarray).split('[~]') # 分词出来的数组好像不是标准的数组。。用这个方法转换成标准的数组 85 | '''ii = -1 86 | while ii < len(textarray) - 2: # 预学习,主要处理空行,空格 87 | ii = ii + 1 88 | textarray[ii].lower() 89 | textarray[ii] = textarray[ii].strip() 90 | if textarray[ii] == '\n': 91 | if textarray[ii + 1] == '\n': 92 | ii = ii + 2 # 学习样本里有空行,代表学习到了下一个文章,下一个文章不应和上一个建立关系 93 | else: 94 | learn(textarray[ii - 1] + ' ', textarray[ii + 1]) 95 | ii = ii + 1 96 | elif textarray[ii] in PUNCTUATION: 97 | learn(textarray[ii - 1] + textarray[ii], textarray[ii + 1]) 98 | learn(textarray[ii], textarray[ii + 1]) 99 | ii = ii + 1''' 100 | return textarray 101 | 102 | 103 | # 根据现有词语和随机数得到下一个词语 104 | def ran(w, w2, r): 105 | zikey = zishi.keys() 106 | # and random.random() < 0.98 107 | if w != '' and (w + ' ' + w2) in zikey: 108 | zishikey = zishi[w + ' ' + w2].keys() 109 | k = int(len(zishikey) == 2) 110 | for i in zishikey: 111 | if i != '$': 112 | if r < float(zishi[w + ' ' + w2][i]): 113 | if i == '\n': 114 | return '', True, k 115 | else: 116 | return i, True, k 117 | else: 118 | r = r - float(zishi[w + ' ' + w2][i]) 119 | else: 120 | if w2 in zikey: 121 | zishikey = zishi[w2].keys() 122 | for i in zishikey: 123 | if i != '$': 124 | if r < float(zishi[w2][i]): 125 | if i == '\n': 126 | return '', False, -1 127 | else: 128 | return i, False, -1 129 | else: 130 | r = r - float(zishi[w2][i]) 131 | else: 132 | return '~', False, -1 133 | 134 | 135 | # 生成文章 136 | def sen(a, s): 137 | global PUNCTUATION 138 | sent = [] 139 | sent.append(a) 140 | sent.append(s) 141 | fin = '' 142 | double = doublet = kk = kkt = 0 143 | while len(sent) < 10000: # 修改左侧数值可限定文章<词语>数 144 | b, d, k = ran(sent[-2], sent[-1], random.random()) 145 | double += d 146 | doublet += 1 147 | if k != -1: 148 | kk += k 149 | kkt += 1 150 | if b == '': 151 | sent.append('\n') 152 | elif b != '~': 153 | if not ((b in PUNCTUATION) and (sent[-1] in PUNCTUATION)): 154 | # if b != sent[-1]: 155 | sent.append(b) 156 | else: 157 | fin = sent[-2] + ',' + sent[-1] 158 | break 159 | return ''.join(sent), fin, double, doublet, kk, kkt 160 | 161 | 162 | while 1: 163 | print('中文v2版本哦') 164 | print('学习/写作/关闭') 165 | choose = input('') 166 | text = 0 167 | if choose == '学习': 168 | try: 169 | input('在同目录下创建“zbook.txt”') 170 | with open("zbook.txt", "rt", encoding="utf-8") as in_file: 171 | text = in_file.read() 172 | except IOError: 173 | print('没有找到书本') 174 | input('') 175 | finally: 176 | if text == 0: 177 | input('未发现') 178 | exit() 179 | zishi = learned() # 可以继续学习 180 | textarray = screen(text) 181 | print(textarray) 182 | print('一共有' + str(len(textarray)) + '个词。。学习中。。。。') 183 | learns() 184 | input('学习完成') 185 | write() 186 | elif choose == '写作': 187 | zishi = learned() 188 | while True: 189 | print('钦定第一个和第二个词') 190 | print('输入列表查看所有词') 191 | word = input() 192 | if word == '列表': 193 | for i in zishi.keys(): 194 | print(i) 195 | else: 196 | if sum([i.find(word + ' ') == 0 for i in zishi.keys()]) != 0: 197 | while True: 198 | print('输入第二个词语') 199 | print('输入列表查看所有词') 200 | word2 = input() 201 | if word2 == '列表': 202 | for i in zishi.keys(): 203 | if i.find(word + ' ') == 0: 204 | print(i[len(word) + 1:]) 205 | else: 206 | senn, fin, d, dt, k, kt = sen(word, word2) 207 | print(senn) 208 | with open("result.txt", "wt", encoding='utf-8') as out_file: 209 | out_file.write(str(senn)) 210 | print('----------------------------------------------') 211 | print('本次生成报告:') 212 | print(' 生成文章结束原因:' + ('词数限制' if fin == '' else '无词语接龙:' + fin)) 213 | print(' 词语由双词生成数量:' + str(d) + '/' + str(dt) + ',' + str(d * 100 / dt) + '%') 214 | print(' 词语由双词生成中单词生成数量:' + str(k) + '/' + str(kt) + ',' + str(k * 100 / kt) + '%') 215 | print('') 216 | print('文章已储存至 result.txt') 217 | print('----------------------------------------------') 218 | print('') 219 | break 220 | break 221 | elif choose == '关闭': 222 | exit() 223 | elif choose == '调试': 224 | while True: 225 | print(ran(input(), input(), random.random())) 226 | --------------------------------------------------------------------------------