├── desktop.ini
├── README.md
├── .gitignore
├── 自动文章学习生成v1.py
└── 自动文章学习生成v2.py


/desktop.ini:
--------------------------------------------------------------------------------
1 | [ViewState]
2 | Mode=
3 | Vid=
4 | FolderType=Generic
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AutoLearnWritePassage
2 | 自动学习写作文章的算法。。   
3 | 
4 | v1版是根据一个词相互关联  
5 | v2计划优化算法，升值两个词关联，应该能让文章更流畅
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | zbook.txt
  2 | zknow.txt
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/自动文章学习生成v1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # 写文件
  3 | import json
  4 | import random
  5 | import jieba
  6 | 
  7 | '''
  8 | txt
  9 | i:am_0.65|have_0.2|will_0.25/you:are_0.85|like_0.15
 10 | 
 11 | zishi
 12 | [0]i:am_0.65|have_0.2|will_0.25
 13 | [1]you:are_0.85|like_0.15
 14 | 
 15 | zishi 2
 16 | [0][0]i
 17 |    [1]'
 18 |    
 19 | json zishi
 20 | {
 21 | 	"i" : {"$" : 20, "am" : 0.65, "have" : 0.2, "will" : 0.15},
 22 | 	"you" : {"$" : 20, "are" : 0.85, "like" : 0.15}
 23 | 	
 24 | }
 25 | '''
 26 | 
 27 | 
 28 | def learned():
 29 |     try:
 30 |         with open("z知识库.txt", "rt") as zishi_file:
 31 |             zishi = eval(zishi_file.read())
 32 |             return zishi
 33 |     except IOError:
 34 |         zishi = {}
 35 |         print('11223333333')
 36 |         # open("知识库.txt", "w")
 37 |         return zishi
 38 | 
 39 | 
 40 | def write():
 41 |     with open("z知识库.txt", "wt") as out_file:
 42 |         out_file.write(str(zishi))
 43 | 
 44 | 
 45 | def learn():
 46 |     i = -1
 47 |     while i < len(textarray) - 2:
 48 |         i = i + 1
 49 |         zikey = zishi.keys()
 50 |         if textarray[i] in zikey:
 51 |             zishikey = zishi[textarray[i]].keys()
 52 |             if textarray[i + 1] in zishikey:
 53 |                 for j in zishikey:
 54 |                     if j != '$':
 55 |                         if j != textarray[i + 1]:
 56 |                             zishi[textarray[i]][j] = zishi[textarray[i]][j] / (1 + 1 / zishi[textarray[i]]['$'])
 57 |                 zishi[textarray[i]][textarray[i + 1]] = (zishi[textarray[i]][textarray[i + 1]] + 1 /
 58 |                                                          zishi[textarray[i]]['$']) / (1 + 1 / zishi[textarray[i]]['$'])
 59 |                 zishi[textarray[i]]['$'] = zishi[textarray[i]]['$'] + 1
 60 |             else:
 61 |                 for j in zishikey:
 62 |                     if j != '$':
 63 |                         zishi[textarray[i]][j] = zishi[textarray[i]][j] / (1 + 1 / zishi[textarray[i]]['$'])
 64 |                 zishi[textarray[i]][textarray[i + 1]] = (1 / zishi[textarray[i]]['$']) / (
 65 |                             1 + 1 / zishi[textarray[i]]['$'])
 66 |                 zishi[textarray[i]]['$'] = zishi[textarray[i]]['$'] + 1
 67 |         else:
 68 |             zishi[textarray[i]] = {'$': 1, textarray[i + 1]: 1}
 69 | 
 70 | 
 71 | def scrlearn(word, nword):
 72 |     zikey = zishi.keys()
 73 |     if word in zikey:
 74 |         zishikey = zishi[word].keys()
 75 |         if nword in zishikey:
 76 |             for j in zishikey:
 77 |                 if j != '$':
 78 |                     if j != nword:
 79 |                         zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$'])
 80 |             zishi[word][nword] = (zishi[word][nword] + 1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$'])
 81 |             zishi[word]['$'] = zishi[word]['$'] + 1
 82 |         else:
 83 |             for j in zishikey:
 84 |                 if j != '$':
 85 |                     zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$'])
 86 |             zishi[word][nword] = (1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$'])
 87 |             zishi[word]['$'] = zishi[word]['$'] + 1
 88 |     else:
 89 |         zishi[word] = {'$': 1, nword: 1}
 90 | 
 91 | 
 92 | def screen(text):
 93 |     textarray = jieba.cut(text)
 94 |     textarray = '[~]'.join(textarray).split('[~]')
 95 |     ii = -1
 96 |     while ii < len(textarray) - 1:
 97 |         ii = ii + 1
 98 |         textarray[ii].lower()
 99 |         textarray[ii] = textarray[ii].strip()
100 |         if textarray[ii] == '\n':
101 |             if textarray[ii + 1] == '\n':
102 |                 ii = ii + 2
103 |             else:
104 |                 scrlearn(textarray[ii - 1] + ' ', textarray[ii + 1])
105 |                 ii = ii + 1
106 |         if textarray[ii] == ' ':
107 |             scrlearn(textarray[ii - 1] + ' ', textarray[ii + 1])
108 |             ii = ii + 1
109 |     return textarray
110 | 
111 | 
112 | def ran(w, r):
113 |     zikey = zishi.keys()
114 |     if w in zikey:
115 |         zishikey = zishi[w].keys()
116 |         for i in zishikey:
117 |             if i != '$':
118 |                 if r < float(zishi[w][i]):
119 |                     print(i)
120 |                     if i == '\n':
121 |                         return ''
122 |                     else:
123 |                         return i
124 |                 else:
125 |                     r = r - float(zishi[w][i])
126 |                     print(r)
127 |     else:
128 |         return ' '
129 | 
130 | 
131 | def sen(a):
132 |     sent = []
133 |     sent.append(a)
134 |     while len(sent) < 1000:
135 |         b = ran(sent[len(sent) - 1], random.random())
136 |         if b == '，' or b == '。' or b == '？' or b == '！':
137 |             if ran(sent[len(sent) - 1] + b, random.random()) != ' ':
138 |                 sent.append(b)
139 |             else:
140 |                 if ran(b, random.random()) != ' ':
141 |                     sent.append(b)
142 |                 else:
143 |                     break
144 |         elif b != ' ':
145 |             sent.append(b)
146 |         else:
147 |             break
148 |     return ''.join(sent)
149 | 
150 | 
151 | while 1:
152 |     print('中文版本哦')
153 |     print('学习/写作/关闭')
154 |     choose = input('先')
155 |     text = 0
156 |     if choose == '学习':
157 |         try:
158 |             # zishi =
159 |             input('在同目录下创建“zbook.txt”')
160 |             with open("zbook.txt", "rt", encoding="utf-8") as in_file:
161 |                 text = in_file.read()
162 |         except IOError:
163 |             print('没有找到书本')
164 |             input('创建完成按回车继续')
165 |         finally:
166 |             if text == 0:
167 |                 input('未发现')
168 |                 exit();
169 |             # print(text)
170 |             zishi = learned()
171 |             textarray = screen(text)
172 |             print(textarray)
173 |             print('一共有' + str(len(textarray)) + '个词。。学习中。。。。')
174 |             learn()
175 |             input('学习完成')
176 |             write()
177 |     elif choose == '写作':
178 |         zishi = learned()
179 |         while 1:
180 |             print('钦定第一个词')
181 |             print('输入列表查看所有词')
182 |             word = input()
183 |             if word == '列表':
184 |                 for i in zishi.keys(): print(i)
185 |             else:
186 |                 if word in zishi.keys():
187 |                     print(' ')
188 |                     print(sen(word))
189 |                     print(' ')
190 |                     break
191 |     elif choose == '关闭':
192 |         exit();
193 |     elif choose == '调试':
194 |         print('→_→')
195 | # print(text)
196 | # with open("test.txt", "wt") as out_file:
197 | #	out_file.write("该文本会写入到文件中\n看到我了吧！")
198 | 
199 | # Read a file
200 | 


--------------------------------------------------------------------------------
/自动文章学习生成v2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # 写文件
  3 | import random
  4 | import jieba
  5 | 
  6 | # 连续储存两个词语，理论上生成文章更流畅
  7 | '''
  8 | txt
  9 | i:am_0.65|have_0.2|will_0.25/you:are_0.85|like_0.15
 10 | 
 11 | zishi
 12 | [0]i:am_0.65|have_0.2|will_0.25
 13 | [1]you:are_0.85|like_0.15
 14 | 
 15 | zishi 2
 16 | [0][0]i
 17 |    [1]'
 18 |    
 19 | json zishi
 20 | {
 21 | 	"i am" : {"$" : 20, "a" : 0.65, "working" : 0.2, "an" : 0.15},
 22 | 	"you are" : {"$" : 20, "a" : 0.85, "an" : 0.15}
 23 | 	
 24 | }
 25 | '''
 26 | 
 27 | PUNCTUATION = ['，', '。', '？', '！', '：', '；']
 28 | 
 29 | 
 30 | # 拿到之前学习过的数据
 31 | def learned():
 32 |     try:
 33 |         with open("zknow.txt", "rt", encoding='utf-8') as zishi_file:
 34 |             zishi = eval(zishi_file.read())
 35 |             return zishi
 36 |     except IOError:
 37 |         zishi = {}
 38 |         print('11223333333')
 39 |         # open("知识库.txt", "w")
 40 |         return zishi
 41 | 
 42 | 
 43 | # 储存学习数据
 44 | def write():
 45 |     with open("zknow.txt", "wt", encoding='utf-8') as out_file:
 46 |         out_file.write(str(zishi))
 47 | 
 48 | 
 49 | # 遍历整个文章的学习
 50 | def learns():
 51 |     i = 0
 52 |     while i < len(textarray) - 2:
 53 |         learn(textarray[i] + ' ' + textarray[i + 1], textarray[i + 2])
 54 |         learn(textarray[i], textarray[i + 1])
 55 |         i = i + 1
 56 | 
 57 | 
 58 | # 单个的词语学习
 59 | def learn(word, nword):
 60 |     zikey = zishi.keys()
 61 |     if word in zikey:
 62 |         zishikey = zishi[word].keys()
 63 |         if nword in zishikey:
 64 |             for j in zishikey:
 65 |                 if j != '$':
 66 |                     if j != nword:
 67 |                         zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$'])
 68 |             zishi[word][nword] = (zishi[word][nword] + 1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$'])
 69 |             zishi[word]['$'] = zishi[word]['$'] + 1
 70 |         else:
 71 |             for j in zishikey:
 72 |                 if j != '$':
 73 |                     zishi[word][j] = zishi[word][j] / (1 + 1 / zishi[word]['$'])
 74 |             zishi[word][nword] = (1 / zishi[word]['$']) / (1 + 1 / zishi[word]['$'])
 75 |             zishi[word]['$'] = zishi[word]['$'] + 1
 76 |     else:
 77 |         zishi[word] = {'$': 1, nword: 1}
 78 | 
 79 | 
 80 | # 分词，以及对标点符号和换行、空格的预处理
 81 | def screen(text):
 82 |     global PUNCTUATION
 83 |     textarray = jieba.cut(text)  # 分词
 84 |     textarray = '[~]'.join(textarray).split('[~]')  # 分词出来的数组好像不是标准的数组。。用这个方法转换成标准的数组
 85 |     '''ii = -1
 86 |     while ii < len(textarray) - 2:  # 预学习，主要处理空行，空格
 87 |         ii = ii + 1
 88 |         textarray[ii].lower()
 89 |         textarray[ii] = textarray[ii].strip()
 90 |         if textarray[ii] == '\n':
 91 |             if textarray[ii + 1] == '\n':
 92 |                 ii = ii + 2  # 学习样本里有空行，代表学习到了下一个文章，下一个文章不应和上一个建立关系
 93 |             else:
 94 |                 learn(textarray[ii - 1] + ' ', textarray[ii + 1])
 95 |                 ii = ii + 1
 96 |         elif textarray[ii] in PUNCTUATION:
 97 |             learn(textarray[ii - 1] + textarray[ii], textarray[ii + 1])
 98 |             learn(textarray[ii], textarray[ii + 1])
 99 |             ii = ii + 1'''
100 |     return textarray
101 | 
102 | 
103 | # 根据现有词语和随机数得到下一个词语
104 | def ran(w, w2, r):
105 |     zikey = zishi.keys()
106 |     # and random.random() < 0.98
107 |     if w != '' and (w + ' ' + w2) in zikey:
108 |         zishikey = zishi[w + ' ' + w2].keys()
109 |         k = int(len(zishikey) == 2)
110 |         for i in zishikey:
111 |             if i != '$':
112 |                 if r < float(zishi[w + ' ' + w2][i]):
113 |                     if i == '\n':
114 |                         return '', True, k
115 |                     else:
116 |                         return i, True, k
117 |                 else:
118 |                     r = r - float(zishi[w + ' ' + w2][i])
119 |     else:
120 |         if w2 in zikey:
121 |             zishikey = zishi[w2].keys()
122 |             for i in zishikey:
123 |                 if i != '$':
124 |                     if r < float(zishi[w2][i]):
125 |                         if i == '\n':
126 |                             return '', False, -1
127 |                         else:
128 |                             return i, False, -1
129 |                     else:
130 |                         r = r - float(zishi[w2][i])
131 |         else:
132 |             return '~', False, -1
133 | 
134 | 
135 | # 生成文章
136 | def sen(a, s):
137 |     global PUNCTUATION
138 |     sent = []
139 |     sent.append(a)
140 |     sent.append(s)
141 |     fin = ''
142 |     double = doublet = kk = kkt = 0
143 |     while len(sent) < 10000:  # 修改左侧数值可限定文章<词语>数
144 |         b, d, k = ran(sent[-2], sent[-1], random.random())
145 |         double += d
146 |         doublet += 1
147 |         if k != -1:
148 |             kk += k
149 |             kkt += 1
150 |         if b == '':
151 |             sent.append('\n')
152 |         elif b != '~':
153 |             if not ((b in PUNCTUATION) and (sent[-1] in PUNCTUATION)):
154 |                 # if b != sent[-1]:
155 |                     sent.append(b)
156 |         else:
157 |             fin = sent[-2] + '，' + sent[-1]
158 |             break
159 |     return ''.join(sent), fin, double, doublet, kk, kkt
160 | 
161 | 
162 | while 1:
163 |     print('中文v2版本哦')
164 |     print('学习/写作/关闭')
165 |     choose = input('')
166 |     text = 0
167 |     if choose == '学习':
168 |         try:
169 |             input('在同目录下创建“zbook.txt”')
170 |             with open("zbook.txt", "rt", encoding="utf-8") as in_file:
171 |                 text = in_file.read()
172 |         except IOError:
173 |             print('没有找到书本')
174 |             input('')
175 |         finally:
176 |             if text == 0:
177 |                 input('未发现')
178 |                 exit()
179 |             zishi = learned()  # 可以继续学习
180 |             textarray = screen(text)
181 |             print(textarray)
182 |             print('一共有' + str(len(textarray)) + '个词。。学习中。。。。')
183 |             learns()
184 |             input('学习完成')
185 |             write()
186 |     elif choose == '写作':
187 |         zishi = learned()
188 |         while True:
189 |             print('钦定第一个和第二个词')
190 |             print('输入列表查看所有词')
191 |             word = input()
192 |             if word == '列表':
193 |                 for i in zishi.keys():
194 |                     print(i)
195 |             else:
196 |                 if sum([i.find(word + ' ') == 0 for i in zishi.keys()]) != 0:
197 |                     while True:
198 |                         print('输入第二个词语')
199 |                         print('输入列表查看所有词')
200 |                         word2 = input()
201 |                         if word2 == '列表':
202 |                             for i in zishi.keys():
203 |                                 if i.find(word + ' ') == 0:
204 |                                     print(i[len(word) + 1:])
205 |                         else:
206 |                             senn, fin, d, dt, k, kt = sen(word, word2)
207 |                             print(senn)
208 |                             with open("result.txt", "wt", encoding='utf-8') as out_file:
209 |                                 out_file.write(str(senn))
210 |                             print('----------------------------------------------')
211 |                             print('本次生成报告：')
212 |                             print('  生成文章结束原因：' + ('词数限制' if fin == '' else '无词语接龙：' + fin))
213 |                             print('  词语由双词生成数量：' + str(d) + '/' + str(dt) + '，' + str(d * 100 / dt) + '%')
214 |                             print('  词语由双词生成中单词生成数量：' + str(k) + '/' + str(kt) + '，' + str(k * 100 / kt) + '%')
215 |                             print('')
216 |                             print('文章已储存至 result.txt')
217 |                             print('----------------------------------------------')
218 |                             print('')
219 |                             break
220 |                 break
221 |     elif choose == '关闭':
222 |         exit()
223 |     elif choose == '调试':
224 |         while True:
225 |             print(ran(input(), input(), random.random()))
226 | 


--------------------------------------------------------------------------------