├── .Rbuildignore ├── .gitignore ├── .ipynb_checkpoints ├── demo-checkpoint.ipynb ├── dtm-checkpoint.ipynb ├── gensim-lda-checkpoint.ipynb ├── index-checkpoint.ipynb └── sklearn-lda-checkpoint.ipynb ├── CITATION.bib ├── DESCRIPTION ├── LICENSE.md ├── Makefile ├── NAMESPACE ├── NEWS.md ├── README.Rmd ├── README.md ├── analysis ├── .ipynb_checkpoints │ └── demo-checkpoint.ipynb ├── build-README.R ├── preprocess-demi_gods_and_semi_devils.Rmd ├── push.R ├── viz-demo.Rmd ├── viz-topic.Rmd └── viz-word.Rmd ├── build └── lib │ └── dynamic_topic_modeling │ ├── __init__.py │ ├── _nbdev.py │ ├── dtm.py │ ├── gensim_lda.py │ └── sklearn_lda.py ├── data-raw └── affirmative_modifed.R ├── data ├── affirmative.csv ├── affirmative_modifed.csv ├── negative.csv └── stopwords.txt ├── demo.ipynb ├── dev_history_r_proj.R ├── dist ├── dynamic_topic_modeling-1.0.0-py3-none-any.whl ├── dynamic_topic_modeling-1.0.0.tar.gz ├── dynamic_topic_modeling-1.0.1-py3-none-any.whl ├── dynamic_topic_modeling-1.0.1.tar.gz ├── dynamic_topic_modeling-1.0.2-py3-none-any.whl ├── dynamic_topic_modeling-1.0.2.tar.gz ├── dynamic_topic_modeling-1.1.0-py3-none-any.whl └── dynamic_topic_modeling-1.1.0.tar.gz ├── docs ├── .gitignore ├── Gemfile ├── Gemfile.lock ├── _config.yml ├── _data │ ├── alerts.yml │ ├── definitions.yml │ ├── glossary.yml │ ├── sidebars │ │ └── home_sidebar.yml │ ├── tags.yml │ ├── terms.yml │ └── topnav.yml ├── _includes │ ├── archive.html │ ├── callout.html │ ├── footer.html │ ├── google_analytics.html │ ├── head.html │ ├── head_print.html │ ├── image.html │ ├── important.html │ ├── initialize_shuffle.html │ ├── inline_image.html │ ├── links.html │ ├── note.html │ ├── search_google_custom.html │ ├── search_simple_jekyll.html │ ├── sidebar.html │ ├── tip.html │ ├── toc.html │ ├── topnav.html │ └── warning.html ├── _layouts │ ├── default.html │ ├── default_print.html │ ├── none.html │ ├── page.html │ └── page_print.html ├── css │ ├── bootstrap.min.css │ ├── boxshadowproperties.css │ ├── customstyles.css │ ├── font-awesome.min.css │ ├── fonts │ │ ├── FontAwesome.otf │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.svg │ │ ├── fontawesome-webfont.ttf │ │ ├── fontawesome-webfont.woff │ │ └── fontawesome-webfont.woff2 │ ├── modern-business.css │ ├── printstyles.css │ ├── syntax.css │ ├── theme-blue.css │ └── theme-green.css ├── dtm.html ├── feed.xml ├── fonts │ ├── FontAwesome.otf │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.svg │ ├── fontawesome-webfont.ttf │ ├── fontawesome-webfont.woff │ ├── glyphicons-halflings-regular.eot │ ├── glyphicons-halflings-regular.svg │ ├── glyphicons-halflings-regular.ttf │ ├── glyphicons-halflings-regular.woff │ └── glyphicons-halflings-regular.woff2 ├── gensim-lda.html ├── images │ ├── company_logo.png │ ├── company_logo_big.png │ ├── doc_example.png │ ├── export_example.png │ ├── favicon.ico │ └── workflowarrow.png ├── index.html ├── js │ ├── customscripts.js │ ├── jekyll-search.js │ ├── jquery.ba-throttle-debounce.min.js │ ├── jquery.navgoco.min.js │ ├── jquery.shuffle.min.js │ └── toc.js ├── licenses │ ├── LICENSE │ └── LICENSE-BSD-NAVGOCO.txt ├── sidebar.json ├── sitemap.xml ├── sklearn-lda.html └── tooltips.json ├── dtm.ipynb ├── dynamic_topic_modeling.Rproj ├── dynamic_topic_modeling.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── entry_points.txt ├── not-zip-safe └── top_level.txt ├── dynamic_topic_modeling ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── _nbdev.cpython-37.pyc │ ├── dtm.cpython-37.pyc │ ├── gensim_lda.cpython-37.pyc │ └── sklearn_lda.cpython-37.pyc ├── _nbdev.py ├── dtm.py ├── gensim_lda.py └── sklearn_lda.py ├── figure ├── demo_topic_evolution.png ├── demo_word_evolution.png ├── topic_evolution.png └── word_evolution.png ├── gensim-lda.ipynb ├── index.ipynb ├── index.md ├── model ├── dtm.pkl ├── gensim-lda.pkl └── sklearn-lda.pkl ├── output ├── demo_dict_text.txt ├── demo_model_df.csv ├── demo_topic_df.csv ├── dtm.html ├── dtm_files │ └── dtm_32_0.png ├── model_df.csv └── topic_df.csv ├── refs ├── BleiLafferty2006a.pdf └── add.bib ├── settings.ini ├── setup.py ├── sklearn-lda.ipynb └── src └── dtm-win64.exe /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^dev_history_r_proj\.R$ 2 | ^LICENSE\.md$ 3 | ^Makefile$ 4 | ^README\.Rmd$ 5 | ^.*\.Rproj$ 6 | ^\.Rproj\.user$ 7 | ^dev_history_r_pkg\.R$ 8 | ^data-raw$ 9 | ^dynamic_topic_modeling\.Rproj$ 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | README.html 5 | dev_history_r_pkg.R 6 | .ipynb_checkpoints 7 | .ipynb_checkpoints/ 8 | commit.Rmd 9 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/gensim-lda-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# default_exp gensim_lda" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# gensim.models.LdaModel\n", 17 | "\n", 18 | "> Run LDA Model using 'gensim'\n", 19 | "\n", 20 | "Run LDA Model using 'gensim'." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "from dynamic_topic_modeling.sklearn_lda import *" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stderr", 39 | "output_type": "stream", 40 | "text": [ 41 | "Building prefix dict from the default dictionary ...\n", 42 | "Loading model from cache C:\\Users\\LIJIAX~1\\AppData\\Local\\Temp\\jieba.cache\n", 43 | "Loading model cost 1.117 seconds.\n", 44 | "Prefix dict has been built succesfully.\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "affirmative = make_df(\"data/affirmative.csv\")" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/html": [ 60 | "
\n", 61 | "\n", 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | "
GroupStudentsContenttext
0第1组正三慕课将分布于世界各地的最优质的教育资源聚集到一起,让任何有学习愿望的人能够低成本的,通常是免...慕课 将 分布 于 世界各地 的 最 优质 的 教育资源 聚集 到 一起 , 让 任何 有 ...
1第1组正二在慕课发展过程中的现阶段,中国最大的慕课平台icourse163的用户人数突破100万,与其...在 慕课 发展 过程 中 的 现阶段 , 中国 最大 的 慕课 平台 icourse163 ...
2第1组正一研究发现,在慕课融入的课堂学习中,学习者情感体验丰富,知识技能以及元认知能力得到提升,思想观...研究 发现 , 在 慕课 融入 的 课堂 学习 中 , 学习者 情感 体验 丰富 , 知识 ...
3第1组正三慕课在保证教育质量的同时,降低提供教育的成本,给社会带来的憧憬。任何人任何时候再任何地方,都...慕课 在 保证 教育 质量 的 同时 , 降低 提供 教育 的 成本 , 给 社会 带来 的...
4第1组正一对方反一辩友也说是可能出现的欢快气氛,传统课堂集体聆听教师单方面赐予的知识,这难道不是一种容...对方 反一 辩友 也 说 是 可能 出现 的 欢快 气氛 , 传统 课堂 集体 聆听 教师 ...
\n", 122 | "
" 123 | ], 124 | "text/plain": [ 125 | " Group Students Content \\\n", 126 | "0 第1组 正三 慕课将分布于世界各地的最优质的教育资源聚集到一起,让任何有学习愿望的人能够低成本的,通常是免... \n", 127 | "1 第1组 正二 在慕课发展过程中的现阶段,中国最大的慕课平台icourse163的用户人数突破100万,与其... \n", 128 | "2 第1组 正一 研究发现,在慕课融入的课堂学习中,学习者情感体验丰富,知识技能以及元认知能力得到提升,思想观... \n", 129 | "3 第1组 正三 慕课在保证教育质量的同时,降低提供教育的成本,给社会带来的憧憬。任何人任何时候再任何地方,都... \n", 130 | "4 第1组 正一 对方反一辩友也说是可能出现的欢快气氛,传统课堂集体聆听教师单方面赐予的知识,这难道不是一种容... \n", 131 | "\n", 132 | " text \n", 133 | "0 慕课 将 分布 于 世界各地 的 最 优质 的 教育资源 聚集 到 一起 , 让 任何 有 ... \n", 134 | "1 在 慕课 发展 过程 中 的 现阶段 , 中国 最大 的 慕课 平台 icourse163 ... \n", 135 | "2 研究 发现 , 在 慕课 融入 的 课堂 学习 中 , 学习者 情感 体验 丰富 , 知识 ... \n", 136 | "3 慕课 在 保证 教育 质量 的 同时 , 降低 提供 教育 的 成本 , 给 社会 带来 的... \n", 137 | "4 对方 反一 辩友 也 说 是 可能 出现 的 欢快 气氛 , 传统 课堂 集体 聆听 教师 ... " 138 | ] 139 | }, 140 | "execution_count": 4, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "affirmative.head()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "stopwords = get_custom_stopwords(\"data/stopwords.txt\", encoding='utf-8') # HIT停用词词典\n", 156 | "max_df = 0.9 # 在超过这一比例的文档中出现的关键词(过于平凡),去除掉。\n", 157 | "min_df = 5 # 在低于这一数量的文档中出现的关键词(过于独特),去除掉。\n", 158 | "n_features = 1000 # 最大提取特征数量\n", 159 | "n_top_words = 20 # 显示主题下关键词的时候,显示多少个\n", 160 | "col_content = \"text\" # 说明其中的文本信息所在列名称" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 7, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "raw_documents = affirmative['text'].tolist()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "# 参考 https://blog.csdn.net/kwame211/article/details/78963517\n", 179 | "import jieba\n", 180 | "docs = [[word for word in jieba.cut(document, cut_all=True)] for document in raw_documents]" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 9, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stderr", 190 | "output_type": "stream", 191 | "text": [ 192 | "D:\\install\\miniconda\\lib\\site-packages\\scipy\\sparse\\sparsetools.py:21: DeprecationWarning: `scipy.sparse.sparsetools` is deprecated!\n", 193 | "scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.\n", 194 | " _deprecated()\n", 195 | "D:\\install\\miniconda\\lib\\site-packages\\gensim\\models\\doc2vec.py:73: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n", 196 | " from collections import namedtuple, defaultdict, Iterable\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "# 参考 https://radimrehurek.com/gensim/auto_examples/tutorials/run_lda.html#sphx-glr-auto-examples-tutorials-run-lda-py\n", 202 | "from gensim.corpora import Dictionary\n", 203 | "# Create a dictionary representation of the documents.\n", 204 | "dictionary = Dictionary(docs)\n", 205 | "\n", 206 | "# Filter out words that occur less than 5 documents, or more than 90% of the documents.\n", 207 | "dictionary.filter_extremes(no_below=5, no_above=0.9)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "```python\n", 215 | "TypeError: doc2bow expects an array of unicode tokens on input, not a single string\n", 216 | "```\n", 217 | "\n", 218 | "- [x] list 化 https://blog.csdn.net/cg_amaz1ng/article/details/79567583" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 10, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "corpus = [dictionary.doc2bow(doc) for doc in docs]" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 11, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "Number of unique tokens: 1060\n", 240 | "Number of documents: 617\n" 241 | ] 242 | } 243 | ], 244 | "source": [ 245 | "print('Number of unique tokens: %d' % len(dictionary))\n", 246 | "print('Number of documents: %d' % len(corpus))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 12, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "from gensim.models import LdaModel\n", 256 | "\n", 257 | "# Set training parameters.\n", 258 | "num_topics = 8\n", 259 | "chunksize = 2000\n", 260 | "passes = 20\n", 261 | "iterations = 400\n", 262 | "eval_every = None # Don't evaluate model perplexity, takes too much time.\n", 263 | "\n", 264 | "# Make a index to word dictionary.\n", 265 | "temp = dictionary[0] # This is only to \"load\" the dictionary.\n", 266 | "id2word = dictionary.id2token\n", 267 | "\n", 268 | "model = LdaModel(\n", 269 | " corpus=corpus,\n", 270 | " id2word=id2word,\n", 271 | " chunksize=chunksize,\n", 272 | " alpha='auto',\n", 273 | " eta='auto',\n", 274 | " iterations=iterations,\n", 275 | " num_topics=num_topics,\n", 276 | " passes=passes,\n", 277 | " eval_every=eval_every\n", 278 | ")" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 13, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "Average topic coherence: -1.5715.\n", 291 | "[([(0.045936555, '学习'),\n", 292 | " (0.031208888, '学生'),\n", 293 | " (0.029099885, '课'),\n", 294 | " (0.027711054, '慕'),\n", 295 | " (0.019213945, '可以'),\n", 296 | " (0.015990602, '了'),\n", 297 | " (0.014622693, '课程'),\n", 298 | " (0.013580821, '是'),\n", 299 | " (0.013257743, '课堂'),\n", 300 | " (0.011716728, '在'),\n", 301 | " (0.011205085, '传统'),\n", 302 | " (0.010843861, '和'),\n", 303 | " (0.0103720995, '教学'),\n", 304 | " (0.009336841, '有'),\n", 305 | " (0.008886407, '教师'),\n", 306 | " (0.008839154, '也'),\n", 307 | " (0.008669153, '时间'),\n", 308 | " (0.008140829, '不'),\n", 309 | " (0.0075693075, '上'),\n", 310 | " (0.00641935, '对')],\n", 311 | " -1.1190896709229012),\n", 312 | " ([(0.04365886, '学生'),\n", 313 | " (0.028807856, '课'),\n", 314 | " (0.025896464, '慕'),\n", 315 | " (0.022753265, '课堂'),\n", 316 | " (0.020281866, '是'),\n", 317 | " (0.018622143, '可以'),\n", 318 | " (0.017597802, '在'),\n", 319 | " (0.017507399, '互动'),\n", 320 | " (0.01726766, '传统'),\n", 321 | " (0.015887884, '教师'),\n", 322 | " (0.01575285, '视频'),\n", 323 | " (0.014230284, '过程'),\n", 324 | " (0.01345424, '中'),\n", 325 | " (0.012589447, '老师'),\n", 326 | " (0.011443371, '教学'),\n", 327 | " (0.011261093, '更'),\n", 328 | " (0.010024107, '有'),\n", 329 | " (0.009968417, '学习'),\n", 330 | " (0.008570066, '了'),\n", 331 | " (0.008235991, '也')],\n", 332 | " -1.1406268289464787),\n", 333 | " ([(0.041168176, '课'),\n", 334 | " (0.038479604, '慕'),\n", 335 | " (0.028434081, '教育'),\n", 336 | " (0.023424882, '发展'),\n", 337 | " (0.023083135, '教学'),\n", 338 | " (0.017878179, '和'),\n", 339 | " (0.015430756, '是'),\n", 340 | " (0.01427263, '在'),\n", 341 | " (0.012535816, '模式'),\n", 342 | " (0.011448465, '学生'),\n", 343 | " (0.0106322495, '学习'),\n", 344 | " (0.0097915605, '信息'),\n", 345 | " (0.009334094, '课程'),\n", 346 | " (0.008974526, '与'),\n", 347 | " (0.008694211, '技术'),\n", 348 | " (0.008661952, '传统'),\n", 349 | " (0.00844705, '时代'),\n", 350 | " (0.008097915, '我们'),\n", 351 | " (0.007396978, '知识'),\n", 352 | " (0.006707873, '大学')],\n", 353 | " -1.5205310324639612),\n", 354 | " ([(0.042329118, '课'),\n", 355 | " (0.040534813, '慕'),\n", 356 | " (0.02618913, '传统'),\n", 357 | " (0.025423106, '课堂'),\n", 358 | " (0.024865817, '教育'),\n", 359 | " (0.017552653, '是'),\n", 360 | " (0.016350802, '了'),\n", 361 | " (0.013115059, '学生'),\n", 362 | " (0.01122631, '在'),\n", 363 | " (0.010857746, '教学'),\n", 364 | " (0.009673287, '取代'),\n", 365 | " (0.009628494, '我们'),\n", 366 | " (0.00956718, '发展'),\n", 367 | " (0.009007852, '不'),\n", 368 | " (0.00854386, '这'),\n", 369 | " (0.0084056, '有'),\n", 370 | " (0.008190151, '辩'),\n", 371 | " (0.0075556887, '并'),\n", 372 | " (0.0071221013, '我方'),\n", 373 | " (0.0070634685, '友')],\n", 374 | " -1.5374061935653567),\n", 375 | " ([(0.033139337, '是'),\n", 376 | " (0.023455435, '课'),\n", 377 | " (0.022710066, '慕'),\n", 378 | " (0.017635174, '不是'),\n", 379 | " (0.01595605, '在'),\n", 380 | " (0.014483565, '不'),\n", 381 | " (0.013575725, '我们'),\n", 382 | " (0.013521834, '都'),\n", 383 | " (0.013344324, '课堂'),\n", 384 | " (0.013198843, '老师'),\n", 385 | " (0.012532463, '学生'),\n", 386 | " (0.012179681, '自学'),\n", 387 | " (0.012018273, '也'),\n", 388 | " (0.011500534, '教学'),\n", 389 | " (0.011475632, '什么'),\n", 390 | " (0.010965755, '您'),\n", 391 | " (0.0108909635, '和'),\n", 392 | " (0.010553134, '发展'),\n", 393 | " (0.01046086, '如果'),\n", 394 | " (0.009452186, '了')],\n", 395 | " -1.7067579259512786),\n", 396 | " ([(0.03234891, '学习'),\n", 397 | " (0.0301957, '学生'),\n", 398 | " (0.02498013, '慕'),\n", 399 | " (0.024742436, '课'),\n", 400 | " (0.0197892, '在'),\n", 401 | " (0.018427694, '教育'),\n", 402 | " (0.016786413, '大学'),\n", 403 | " (0.014967592, '可以'),\n", 404 | " (0.011123596, '教师'),\n", 405 | " (0.010875967, '了'),\n", 406 | " (0.010345973, '情况'),\n", 407 | " (0.009712883, '根据'),\n", 408 | " (0.009105816, '有'),\n", 409 | " (0.008929264, '反'),\n", 410 | " (0.008755676, '课堂'),\n", 411 | " (0.008523766, '与'),\n", 412 | " (0.008233732, '课程'),\n", 413 | " (0.007963589, '自身'),\n", 414 | " (0.0076552206, '自己'),\n", 415 | " (0.0075655184, '任何')],\n", 416 | " -1.8297297601538782),\n", 417 | " ([(0.03539592, '是'),\n", 418 | " (0.0323152, '欠'),\n", 419 | " (0.030349951, '反驳'),\n", 420 | " (0.027505005, '学生'),\n", 421 | " (0.026632769, '反方'),\n", 422 | " (0.026452139, '学习'),\n", 423 | " (0.023567382, '可以'),\n", 424 | " (0.019868724, '有'),\n", 425 | " (0.019855661, '课'),\n", 426 | " (0.018933846, '慕'),\n", 427 | " (0.01650542, '韩'),\n", 428 | " (0.013882062, '课堂'),\n", 429 | " (0.012655453, '方式'),\n", 430 | " (0.011504636, '讨论'),\n", 431 | " (0.011340707, '自己'),\n", 432 | " (0.011195794, '这个'),\n", 433 | " (0.011156169, '中'),\n", 434 | " (0.010764056, '传统'),\n", 435 | " (0.010500618, '也'),\n", 436 | " (0.010162511, '氛围')],\n", 437 | " -1.8572670378768736),\n", 438 | " ([(0.03709079, '学生'),\n", 439 | " (0.028628118, '学习'),\n", 440 | " (0.019956885, '反'),\n", 441 | " (0.018679574, '教学'),\n", 442 | " (0.01632546, '教师'),\n", 443 | " (0.016320752, '交流'),\n", 444 | " (0.016138276, '驳'),\n", 445 | " (0.01527514, '是'),\n", 446 | " (0.014922402, '可以'),\n", 447 | " (0.0128476415, '在'),\n", 448 | " (0.011290235, '主动'),\n", 449 | " (0.010199238, '课堂'),\n", 450 | " (0.0096501205, '我'),\n", 451 | " (0.009077662, '与'),\n", 452 | " (0.009076965, '1'),\n", 453 | " (0.008919069, '主动性'),\n", 454 | " (0.008779615, '课'),\n", 455 | " (0.008687917, '不是'),\n", 456 | " (0.008524095, '反驳'),\n", 457 | " (0.008435496, '以')],\n", 458 | " -1.8606663646680894)]\n" 459 | ] 460 | } 461 | ], 462 | "source": [ 463 | "top_topics = model.top_topics(corpus) #, num_words=20)\n", 464 | "\n", 465 | "# Average topic coherence is the sum of topic coherences of all topics, divided by the number of topics.\n", 466 | "avg_topic_coherence = sum([t[1] for t in top_topics]) / num_topics\n", 467 | "print('Average topic coherence: %.4f.' % avg_topic_coherence)\n", 468 | "\n", 469 | "from pprint import pprint\n", 470 | "pprint(top_topics)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 14, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "import pickle as pkl" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 15, 485 | "metadata": {}, 486 | "outputs": [], 487 | "source": [ 488 | "with open(\"model/gensim-lda.pkl\", 'wb') as fp:\n", 489 | " pkl.dump(model, fp)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 16, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "with open(\"model/gensim-lda.pkl\", 'rb') as fp:\n", 499 | " model0 = pkl.load(fp)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 17, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "gensim.models.ldamodel.LdaModel" 511 | ] 512 | }, 513 | "execution_count": 17, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "model0.__class__" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [] 528 | } 529 | ], 530 | "metadata": { 531 | "kernelspec": { 532 | "display_name": "Python 3", 533 | "language": "python", 534 | "name": "python3" 535 | }, 536 | "language_info": { 537 | "codemirror_mode": { 538 | "name": "ipython", 539 | "version": 3 540 | }, 541 | "file_extension": ".py", 542 | "mimetype": "text/x-python", 543 | "name": "python", 544 | "nbconvert_exporter": "python", 545 | "pygments_lexer": "ipython3", 546 | "version": "3.7.3" 547 | } 548 | }, 549 | "nbformat": 4, 550 | "nbformat_minor": 2 551 | } 552 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/index-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#hide\n", 10 | "from dynamic_topic_modeling import *" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# dynamic_topic_modeling\n", 18 | "\n", 19 | "> Run dynamic topic modeling.\n", 20 | "\n", 21 | "\n", 22 | "\n", 23 | "\n", 24 | "\n", 25 | "\n", 26 | "\n", 27 | "\n", 28 | "\n", 29 | "The goal of ‘wei_lda_debate’ is to build Latent Dirichlet Allocation\n", 30 | "models based on ‘sklearn’ and ‘gensim’ framework, and Dynamic Topic\n", 31 | "Model(Blei and Lafferty 2006) based on ‘gensim’ framework. I decide to\n", 32 | "build a Python package, so this reposority will be updated. The new\n", 33 | "reposority path is\n", 34 | ".\n", 35 | "\n", 36 | "## Install\n", 37 | "\n", 38 | "`pip install dynamic_topic_modeling`\n", 39 | "\n", 40 | "## How to use\n", 41 | "\n", 42 | "\n", 43 | "1. [LDA based on\n", 44 | " sklearn](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/sklearn-lda.ipynb)\n", 45 | "2. [LDA based on\n", 46 | " gensim](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/gensim-lda.ipynb)\n", 47 | "3. [Dynamic Topic\n", 48 | " Modeling](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/dtm.ipynb)\n", 49 | "\n", 50 | "\n", 51 | "

\n", 52 | "\n", 53 | "**Code of Conduct**\n", 54 | "\n", 55 | "

\n", 56 | "\n", 57 | "
\n", 58 | "\n", 59 | "Please note that the `dynamic_topic_modeling` project is released with a\n", 60 | "[Contributor Code of\n", 61 | "Conduct](https://github.com/JiaxiangBU/dynamic_topic_modeling/blob/master/CODE_OF_CONDUCT.md).
By\n", 62 | "contributing to this project, you agree to abide by its terms.\n", 63 | "\n", 64 | "
\n", 65 | "\n", 66 | "

\n", 67 | "\n", 68 | "**License**\n", 69 | "\n", 70 | "

\n", 71 | "\n", 72 | "
\n", 73 | "\n", 74 | "Apache License © [Jiaxiang Li and Shuyi\n", 75 | "Wang](https://github.com/JiaxiangBU/dynamic_topic_modeling/blob/master/LICENSE.md)\n", 76 | "\n", 77 | "
\n", 78 | "\n", 79 | "
\n", 80 | "\n", 81 | "
\n", 82 | "\n", 83 | "Blei, David M., and John D. Lafferty. 2006. “Dynamic Topic Models.” In\n", 84 | "*Machine Learning, Proceedings of the Twenty-Third International\n", 85 | "Conference (Icml 2006), Pittsburgh, Pennsylvania, Usa, June 25-29,\n", 86 | "2006*.\n", 87 | "\n", 88 | "
\n", 89 | "\n", 90 | "
" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "kernelspec": { 96 | "display_name": "Python 3", 97 | "language": "python", 98 | "name": "python3" 99 | }, 100 | "language_info": { 101 | "codemirror_mode": { 102 | "name": "ipython", 103 | "version": 3 104 | }, 105 | "file_extension": ".py", 106 | "mimetype": "text/x-python", 107 | "name": "python", 108 | "nbconvert_exporter": "python", 109 | "pygments_lexer": "ipython3", 110 | "version": "3.7.3" 111 | } 112 | }, 113 | "nbformat": 4, 114 | "nbformat_minor": 2 115 | } 116 | -------------------------------------------------------------------------------- /CITATION.bib: -------------------------------------------------------------------------------- 1 | @software{jiaxiang_li_2020_3660401, 2 | author = {Jiaxiang Li}, 3 | title = {{JiaxiangBU/dynamic_topic_modeling: 4 | dynamic_topic_modeling 1.1.0}}, 5 | month = feb, 6 | year = 2020, 7 | publisher = {Zenodo}, 8 | version = {v1.1.0}, 9 | doi = {10.5281/zenodo.3660401}, 10 | url = {https://doi.org/10.5281/zenodo.3660401} 11 | } 12 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dynamic_topic_modeling 2 | Version: 1.1.0 3 | Title: Run Dynamic Topic Modeling 4 | Description: What the package does (one paragraph). 5 | Authors@R: 6 | c(person(given = "Jiaxiang", 7 | family = "Li", 8 | role = c("aut", "cre"), 9 | email = "alex.lijiaxiang@foxmail.com", 10 | comment = c(ORCID = "https://orcid.org/0000-0003-3196-6492")), 11 | person(given = "Shuyi", 12 | family = "Wang", 13 | role = "aut"), 14 | person(given = "Svitlana", 15 | family = "Galeshchuk", 16 | role = "aut")) 17 | License: Apache License (>= 2.0) 18 | Encoding: UTF-8 19 | LazyData: true 20 | Roxygen: list(markdown = TRUE) 21 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright 2020 Jiaxiang Li 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | README: 2 | Rscript analysis/build-README.R 3 | 4 | index1: 5 | 6 | RScript ../imp_rmd/update_index_content.R 7 | 8 | index2: 9 | 10 | notedown index.md > index.ipynb 11 | cp README-tmp.md README.md 12 | rm README-tmp.md 13 | 14 | push: 15 | Rscript analysis/push.R 16 | 17 | all: index push 18 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: fake comment so roxygen2 overwrites silently. 2 | exportPattern("^[^\\.]") 3 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # dynamic.topic.modeling 1.1.0 2 | 3 | * Added a `NEWS.md` file to track changes to the package. 4 | * Add citations, and cited repositories. 5 | * Add title for each notebooks 6 | * Add the functions of 'display_topic', 'document_influence_dim', 'topic_distribution', 'visualize_topics', 'make_df'. 7 | * Update desc for release. 8 | * Update visualization for topic evolution 9 | 1. document the function of 'visualize_topics', 10 | 1. keep repo compact, 11 | 1. add rmd for visualization for topic evolutions, 12 | 1. update the word ones, 13 | 1. output the topic evolution figure, 14 | 1. update the word evolution figure, 15 | 1. update dtm model file, 16 | 1. update the dtm model data frame file, 17 | * Update the word evolution. 18 | * Finish topic evolution and outpput the result. 19 | * Finish the word evolution viz part. 20 | * Finish the function of 'display_topic', add the author of visualization parts, output the word distribution. 21 | * Update keywords. 22 | * Add pypi badge. 23 | * Update makefile 24 | * Update index.md, index.ipynb 25 | * Upload pacakges 26 | * Update readme with examples and references. 27 | * Update license, add settings.ini, setup.py, index.ipynb, and docs. 28 | * Add the file built for package. 29 | * Copy file from 'wei_lda_debate' 30 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | bibliography: [../learn_nlp/refs/add.bib,refs/add.bib] 4 | --- 5 | 6 | 7 | 8 | ```{r, include = FALSE} 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "#>", 12 | fig.path = "man/figures/README-", 13 | out.width = "100%" 14 | ) 15 | ``` 16 | 17 | # dynamic_topic_modeling 18 | 19 | 20 | [![PyPI version](https://badge.fury.io/py/dynamic-topic-modeling.svg)](https://badge.fury.io/py/dynamic-topic-modeling) 21 | [![DOI](https://zenodo.org/badge/238671296.svg)](https://zenodo.org/badge/latestdoi/238671296) 22 | 23 | 24 | Dynamic Topic Modeling (DTM)[@Blei2006Dynamic] is an advanced machine learning technique for uncovering the latent topics in a corpus of documents over time. The goal of this project is to provide an easy-to-use Python package for running DTM. This package is built on the frameworks of [sklearn](https://github.com/wshuyi/wei_lda_debate) and [gensim](https://github.com/GSukr/dtmvisual)[@Shuyi_Wang2018;@Svitlana_2019] for Dynamic Topic Modeling. 25 | 26 | To get started, follow the tutorials on our [Jupyter notebooks](https://nbviewer.jupyter.org/github/JiaxiangBU/dynamic_topic_modeling/tree/master/): 27 | 28 | 1. [LDA based on sklearn](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/sklearn-lda.ipynb) 29 | 2. [LDA based on gensim](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/gensim-lda.ipynb) 30 | 3. [Dynamic Topic Modeling](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/dtm.ipynb) 31 | 4. [Data Analysis on Demi Gods and Semi Devils using Dynamic Topic Modeling](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/demo.ipynb) 32 | 33 | ## Install 34 | 35 | `pip install dynamic_topic_modeling` 36 | 37 | ## Citations 38 | 39 | 40 | If you use dynamic_topic_modeling, please cite: 41 | 42 | Jiaxiang Li. (2020, February 9). JiaxiangBU/dynamic_topic_modeling: dynamic_topic_modeling 1.1.0 (Version v1.1.0). Zenodo. http://doi.org/10.5281/zenodo.3660401 43 | 44 | ``` 45 | @software{jiaxiang_li_2020_3660401, 46 | author = {Jiaxiang Li}, 47 | title = {{JiaxiangBU/dynamic_topic_modeling: 48 | dynamic_topic_modeling 1.1.0}}, 49 | month = feb, 50 | year = 2020, 51 | publisher = {Zenodo}, 52 | version = {v1.1.0}, 53 | doi = {10.5281/zenodo.3660401}, 54 | url = {https://doi.org/10.5281/zenodo.3660401} 55 | } 56 | ``` 57 | 58 | `r add2pkg::add_disclaimer("Jiaxiang Li;Shuyi Wang;Svitlana Galeshchuk", license_name = "Apache License")` 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # dynamic_topic_modeling 5 | 6 | 7 | 8 | [![PyPI 9 | version](https://badge.fury.io/py/dynamic-topic-modeling.svg)](https://badge.fury.io/py/dynamic-topic-modeling) 10 | [![DOI](https://zenodo.org/badge/238671296.svg)](https://zenodo.org/badge/latestdoi/238671296) 11 | 12 | 13 | Dynamic Topic Modeling (DTM)(Blei and Lafferty 2006) is an advanced 14 | machine learning technique for uncovering the latent topics in a corpus 15 | of documents over time. The goal of this project is to provide an 16 | easy-to-use Python package for running DTM. This package is built on the 17 | frameworks of [sklearn](https://github.com/wshuyi/wei_lda_debate) and 18 | [gensim](https://github.com/GSukr/dtmvisual)(Wang 2018; Svitlana 2019) 19 | for Dynamic Topic Modeling. 20 | 21 | To get started, follow the tutorials on our [Jupyter 22 | notebooks](https://nbviewer.jupyter.org/github/JiaxiangBU/dynamic_topic_modeling/tree/master/): 23 | 24 | 1. [LDA based on 25 | sklearn](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/sklearn-lda.ipynb) 26 | 2. [LDA based on 27 | gensim](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/gensim-lda.ipynb) 28 | 3. [Dynamic Topic 29 | Modeling](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/dtm.ipynb) 30 | 4. [Data Analysis on Demi Gods and Semi Devils using Dynamic Topic 31 | Modeling](https://nbviewer.jupyter.org/urls/jiaxiangbu.github.io/dynamic_topic_modeling/demo.ipynb) 32 | 33 | ## Install 34 | 35 | `pip install dynamic_topic_modeling` 36 | 37 | ## Citations 38 | 39 | If you use dynamic_topic_modeling, please cite: 40 | 41 | Jiaxiang Li. (2020, February 9). JiaxiangBU/dynamic_topic_modeling: 42 | dynamic_topic_modeling 1.1.0 (Version v1.1.0). Zenodo. 43 | 44 | 45 | @software{jiaxiang_li_2020_3660401, 46 | author = {Jiaxiang Li}, 47 | title = {{JiaxiangBU/dynamic_topic_modeling: 48 | dynamic_topic_modeling 1.1.0}}, 49 | month = feb, 50 | year = 2020, 51 | publisher = {Zenodo}, 52 | version = {v1.1.0}, 53 | doi = {10.5281/zenodo.3660401}, 54 | url = {https://doi.org/10.5281/zenodo.3660401} 55 | } 56 | 57 |

58 | **Code of Conduct** 59 |

60 |
61 | Please note that the `dynamic_topic_modeling` project is released with a 62 | [Contributor Code of 63 | Conduct](https://github.com/JiaxiangBU/dynamic_topic_modeling/blob/master/CODE_OF_CONDUCT.md).
By 64 | contributing to this project, you agree to abide by its terms. 65 |
66 |

67 | **License** 68 |

69 |
70 | Apache License © [Jiaxiang Li;Shuyi Wang;Svitlana 71 | Galeshchuk](https://github.com/JiaxiangBU/dynamic_topic_modeling/blob/master/LICENSE.md) 72 |
73 | 74 |
75 | 76 |
77 | 78 | Blei, David M., and John D. Lafferty. 2006. “Dynamic Topic Models.” In 79 | *Machine Learning, Proceedings of the Twenty-Third International 80 | Conference (ICML 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 81 | 2006*. 82 | 83 |
84 | 85 |
86 | 87 | Svitlana. 2019. “Dtmvisual: This Package Consists of Functionalities for 88 | Dynamic Topic Modelling and Its Visualization.” GitHub. 2019. 89 | . 90 | 91 |
92 | 93 |
94 | 95 | Wang, Shuyi. 2018. “Wei_lda_debate:” GitHub. 2018. 96 | . 97 | 98 |
99 | 100 |
101 | -------------------------------------------------------------------------------- /analysis/.ipynb_checkpoints/demo-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /analysis/build-README.R: -------------------------------------------------------------------------------- 1 | rmarkdown::render("README.Rmd") 2 | 3 | -------------------------------------------------------------------------------- /analysis/preprocess-demi_gods_and_semi_devils.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(tidyverse) 3 | ``` 4 | 5 | ```{r} 6 | txt <- read_lines("../../imp_rmd//166956.txt", locale = locale(encoding = "UTF-8")) 7 | # file.edit("../../imp_rmd/166956.txt") 8 | ``` 9 | 10 | ```{r} 11 | txt %>% 12 | str_subset("章|节") %>% 13 | head() 14 | ``` 15 | 16 | ```{r} 17 | df <- data.frame(text = txt) 18 | library(magrittr) 19 | df %<>% 20 | mutate(text = text %>% str_remove_all('[:punct:]') %>% str_remove_all("\\s")) 21 | df %<>% 22 | filter(str_length(text)>10) 23 | ``` 24 | 25 | ```{r} 26 | df <- 27 | df %>% 28 | mutate(bin = str_detect(text, "第\\p{Han}{1,5}章")) %>% 29 | mutate(bin = cumsum(bin)) %>% 30 | group_by(bin) %>% 31 | filter(n() > 50) 32 | ``` 33 | 34 | ```{r} 35 | df %>% 36 | select(text, bin) %>% 37 | write_excel_csv("../../imp_rmd/天龙八部.csv") 38 | ``` 39 | 40 | ```{r} 41 | df %>% count(bin) %>% 42 | ungroup() %>% 43 | summarise(min(n)) 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /analysis/push.R: -------------------------------------------------------------------------------- 1 | git2r::add(path = "*README*") 2 | git2r::add(path = "*readme*") 3 | git2r::commit(message = "update readme") 4 | git2r::push(name = 'origin', refspec = "refs/heads/master", cred = git2r::cred_token()) 5 | -------------------------------------------------------------------------------- /analysis/viz-demo.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(tidyverse) 3 | ``` 4 | 5 | ```{r} 6 | df <- read_csv("../output/demo_model_df.csv") 7 | dict <- read_tsv("../output/demo_dict_text.txt", skip = 6, col_names = c("word_id", "word_name", "freq")) %>% 8 | select(-freq) 9 | df <- 10 | df %>% 11 | left_join(dict, by = c("word"="word_id")) %>% 12 | mutate(word = word_name) %>% 13 | select(-word_name) 14 | ``` 15 | 16 | ```{r} 17 | df %>% 18 | ggplot() + 19 | aes(x = period, y = weight, color = word) + 20 | geom_line() + 21 | geom_text( 22 | data = function(x) df %>% filter(period == max(period)), 23 | aes(label = word), nudge_x = 0.5 24 | ) + 25 | facet_wrap(~ topicId, nrow = 2) + 26 | theme_classic() + 27 | theme(legend.position = "None") 28 | ggsave("../figure/demo_word_evolution.png", height = 5, width = 20) 29 | knitr::include_graphics("../figure/demo_word_evolution.png") 30 | ``` 31 | 32 | # Topic Evolution 33 | 34 | ```{r} 35 | word_df <- df 36 | topic_df <- read_csv("../output/demo_topic_df.csv") 37 | ``` 38 | 39 | ```{r} 40 | topic_label <- 41 | word_df %>% 42 | # distinct(topicId) 43 | group_by(topicId, word) %>% 44 | summarise(weight = median(weight)) %>% 45 | group_by(topicId) %>% 46 | arrange(desc(abs(weight))) %>% 47 | mutate(row_number = row_number()) %>% 48 | filter(row_number <= 4) %>% 49 | summarise(text = str_flatten(word, ">")) 50 | ``` 51 | 52 | 53 | ```{r} 54 | df <- topic_df %>% 55 | # 也是求文档的均值 56 | group_by(period, topicId) %>% 57 | summarise(distribution = mean(distribution)) %>% 58 | left_join(topic_label, by = "topicId") %>% 59 | mutate(topicId = as.factor(topicId)) %>% 60 | ungroup() 61 | df %>% 62 | ggplot() + 63 | aes(x = period, y = distribution, color = topicId) + 64 | geom_line() + 65 | geom_text( 66 | data = function(x) df %>% filter(period == max(period)), 67 | aes(label = text), nudge_x = 0.5 68 | ) + 69 | theme_classic() + 70 | theme(legend.position = "None") + 71 | labs(y = '分布', x = '章节') 72 | ggsave("../figure/demo_topic_evolution.png", height = 5, width = 20) 73 | knitr::include_graphics("../figure/demo_topic_evolution.png") 74 | ``` 75 | 76 | -------------------------------------------------------------------------------- /analysis/viz-topic.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(tidyverse) 3 | ``` 4 | 5 | ```{r} 6 | word_df <- read_csv("../output/model_df.csv") 7 | topic_df <- read_csv("../output/topic_df.csv") 8 | ``` 9 | 10 | ```{r} 11 | topic_label <- 12 | word_df %>% 13 | # distinct(topicId) 14 | group_by(topicId, word) %>% 15 | summarise(weight = median(weight)) %>% 16 | group_by(topicId) %>% 17 | arrange(desc(abs(weight))) %>% 18 | mutate(row_number = row_number()) %>% 19 | filter(row_number <= 4) %>% 20 | summarise(text = str_flatten(word, ">")) 21 | ``` 22 | 23 | 24 | ```{r} 25 | df <- topic_df %>% 26 | # 也是求文档的均值 27 | group_by(period, topicId) %>% 28 | summarise(distribution = mean(distribution)) %>% 29 | left_join(topic_label, by = "topicId") %>% 30 | mutate(topicId = as.factor(topicId)) %>% 31 | ungroup() 32 | df %>% 33 | ggplot() + 34 | aes(x = period, y = distribution, color = topicId) + 35 | geom_line() + 36 | geom_text( 37 | data = function(x) df %>% filter(period == max(period)), 38 | aes(label = text), nudge_x = 0.5 39 | ) + 40 | theme_classic() + 41 | theme(legend.position = "None") 42 | ggsave("../figure/topic_evolution.png", height = 5, width = 20) 43 | knitr::include_graphics("../figure/topic_evolution.png") 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /analysis/viz-word.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(tidyverse) 3 | ``` 4 | 5 | ```{r} 6 | df <- read_csv("../output/model_df.csv") 7 | ``` 8 | 9 | ```{r} 10 | df %>% 11 | ggplot() + 12 | aes(x = period, y = weight, color = word) + 13 | geom_line() + 14 | geom_text( 15 | data = function(x) df %>% filter(period == max(period)), 16 | aes(label = word), nudge_x = 0.5 17 | ) + 18 | facet_wrap(~ topicId, nrow = 2) + 19 | theme_classic() + 20 | theme(legend.position = "None") 21 | ggsave("../figure/word_evolution.png", height = 5, width = 20) 22 | knitr::include_graphics("../figure/word_evolution.png") 23 | ``` 24 | 25 | -------------------------------------------------------------------------------- /build/lib/dynamic_topic_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.1.0" 2 | -------------------------------------------------------------------------------- /build/lib/dynamic_topic_modeling/_nbdev.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED BY NBDEV! DO NOT EDIT! 2 | 3 | __all__ = ["index", "modules", "custom_doc_links", "git_url"] 4 | 5 | index = {"build_docs": "dtm.ipynb", 6 | "build_dict": "dtm.ipynb", 7 | "display_topic": "dtm.ipynb", 8 | "document_influence_dim": "dtm.ipynb", 9 | "topic_distribution": "dtm.ipynb", 10 | "visualize_topics": "dtm.ipynb", 11 | "make_df": "sklearn-lda.ipynb", 12 | "chinese_word_cut": "sklearn-lda.ipynb", 13 | "print_top_words": "sklearn-lda.ipynb", 14 | "get_custom_stopwords": "sklearn-lda.ipynb", 15 | "lda_on_chinese_articles_with_param": "sklearn-lda.ipynb", 16 | "lda_on_chinese_articles": "sklearn-lda.ipynb"} 17 | 18 | modules = ["dtm.py", 19 | "sklearn_lda.py"] 20 | 21 | doc_url = "https://JiaxiangBU.github.io/dynamic_topic_modeling/" 22 | 23 | git_url = "https://github.com/fastai/dynamic_topic_modeling/tree/master/" 24 | 25 | def custom_doc_links(name): return None 26 | -------------------------------------------------------------------------------- /build/lib/dynamic_topic_modeling/dtm.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: dtm.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['build_docs', 'build_dict', 'display_topic', 'document_influence_dim', 'topic_distribution', 4 | 'visualize_topics'] 5 | 6 | # Cell 7 | def build_docs(text): 8 | raw_documents = text.tolist() 9 | # 参考 https://blog.csdn.net/kwame211/article/details/78963517 10 | import jieba 11 | docs = [[word for word in jieba.cut(document, cut_all=True)] for document in raw_documents] 12 | return docs 13 | 14 | # Cell 15 | def build_dict(docs, no_below=5, no_above=0.9): 16 | # 参考 https://radimrehurek.com/gensim/auto_examples/tutorials/run_lda.html#sphx-glr-auto-examples-tutorials-run-lda-py 17 | from gensim.corpora import Dictionary 18 | # Create a dictionary representation of the documents. 19 | dictionary = Dictionary(docs) 20 | 21 | # Filter out words that occur less than 5 documents, or more than 90% of the documents. 22 | dictionary.filter_extremes(no_below=no_below, no_above=no_above) 23 | return dictionary 24 | 25 | # Cell 26 | # cite https://github.com/GSukr/dtmvisual 27 | import pandas as pd 28 | import matplotlib.pyplot as plt 29 | 30 | def display_topic(timespans, num_topics, model, num_words = 10): 31 | 32 | """ 33 | :param timespans: number od timespans/periods 34 | :param num_topics: number of topics 35 | :param model: DTM trained model 36 | :param num_words: number of words to display for the topicid at the time period 37 | :return: Dataframe with corresponding weight for each top word in each topic of each period 38 | """ 39 | topicId, period, weight, word = [], [], [], [] 40 | for t in range(timespans): 41 | for s in range (num_topics): 42 | topics = model.show_topic(topicid=s, time=t, topn=num_words) 43 | # num_words : int, optional 44 | # DEPRECATED PARAMETER, use `topn` instead. 45 | for i, (word_, w) in enumerate(topics): 46 | topicId.append(s) 47 | period.append(t) 48 | weight.append(w) 49 | word.append(word_) 50 | return pd.DataFrame(list(zip(topicId, period, weight, word)), columns = ['topicId', 'period', 'word', 'weight']) 51 | 52 | # Cell 53 | import pandas as pd 54 | import matplotlib.pyplot as plt 55 | import seaborn as sns 56 | sns.set() 57 | 58 | def document_influence_dim(num_topics, model, time_seq = []): 59 | 60 | """ 61 | function to compute the document influence on a topic: http://users.umiacs.umd.edu/~jbg/nips_tm_workshop/30.pdf 62 | :param num_topics: number of topics 63 | 64 | """ 65 | 66 | doc, topicId, period, distributions=[], [], [], [] 67 | for topic in range(num_topics): 68 | for t in range(len(time_seq)): 69 | for document in range(time_seq[t]): 70 | distribution = round(model.influences_time[t][document][topic], 4) 71 | # print(len(model.influences_time)) 72 | # print(len(model.influences_time[0])) 73 | # print(len(model.influences_time[0][0])) 74 | # 确定好正确的顺序 75 | period.append(t) 76 | doc.append(document) 77 | topicId.append(topic) 78 | distributions.append(distribution) 79 | return pd.DataFrame(list(zip(doc, topicId, period, distributions)), columns=['document','topicId', 'period','distribution']) 80 | 81 | 82 | 83 | def topic_distribution(num_topics, model, time_seq = []): 84 | 85 | """ 86 | function to compute the topical distribution in a document 87 | :param num_topics: number of topics 88 | 89 | """ 90 | doc, topicId, distributions=[], [], [] 91 | df_dim = document_influence_dim(num_topics = num_topics, model = model, time_seq = time_seq) 92 | for document in range(0, sum(time_seq)): 93 | for topic in range(0, num_topics): 94 | distribution = round(model.gamma_[document][topic], 4) 95 | doc.append(document) 96 | topicId.append(topic) 97 | distributions.append(distribution) 98 | return pd.DataFrame(list(zip(doc, topicId, distributions, df_dim.period)), columns=['document','topicId', 'distribution', 'period']) 99 | 100 | 101 | 102 | def visualize_topics(df): 103 | 104 | """ 105 | function to vizualise mean topic distribution over defined periods. 106 | the topic distribution is defined by the average level by documents. 107 | :param num_topics: number of topics 108 | 109 | """ 110 | fig, ax = plt.subplots(figsize=(30,10)) 111 | df.groupby(['period', 'topicId'], sort=False).mean()['distribution'].unstack().plot(ax=ax,grid=True, linewidth =3.0, sharex=True) 112 | plt.ylabel("Topic Distribution", fontsize=16) 113 | plt.xlabel("Period", fontsize=16) 114 | plt.title("Topic evolution") 115 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title = "Topics", fontsize='large', labelspacing=0.6, fancybox = True) -------------------------------------------------------------------------------- /build/lib/dynamic_topic_modeling/gensim_lda.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: gensim-lda.ipynb (unless otherwise specified). 2 | 3 | __all__ = [] -------------------------------------------------------------------------------- /build/lib/dynamic_topic_modeling/sklearn_lda.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: sklearn-lda.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['make_df', 'chinese_word_cut', 'print_top_words', 'get_custom_stopwords', 4 | 'lda_on_chinese_articles_with_param', 'lda_on_chinese_articles'] 5 | 6 | # Cell 7 | import pandas as pd 8 | 9 | # Cell 10 | import jieba 11 | 12 | # Cell 13 | def make_df(csv_name, column = 'Content', output_column = 'text'): 14 | '''Use jieba, create data frame.''' 15 | df = pd.read_csv(csv_name) 16 | df = df.dropna(subset=[column]) 17 | df[output_column] = df[column].apply(lambda x: " ".join(jieba.cut(x))) 18 | return df 19 | 20 | # Cell 21 | import pyLDAvis 22 | import pyLDAvis.sklearn 23 | 24 | # Cell 25 | pyLDAvis.enable_notebook() 26 | 27 | # Cell 28 | import pandas as pd 29 | from sklearn.feature_extraction.text import CountVectorizer 30 | import jieba 31 | from sklearn.decomposition import LatentDirichletAllocation 32 | 33 | def chinese_word_cut(mytext): 34 | return " ".join(jieba.cut(mytext)) 35 | 36 | def print_top_words(model, feature_names, n_top_words): 37 | for topic_idx, topic in enumerate(model.components_): 38 | print("Topic #%d:" % topic_idx) 39 | print(" ".join([feature_names[i] 40 | for i in topic.argsort()[:-n_top_words - 1:-1]])) 41 | 42 | # Cell 43 | def get_custom_stopwords(stop_words_file, encoding = 'utf-8'): 44 | with open(stop_words_file, encoding = encoding) as f: 45 | stopwords = f.read() 46 | stopwords_list = stopwords.split('\n') 47 | custom_stopwords_list = [i for i in stopwords_list] 48 | return custom_stopwords_list 49 | 50 | # Cell 51 | def lda_on_chinese_articles_with_param(df, n_topics, 52 | col_content, 53 | stopwords, 54 | n_features, 55 | max_df, 56 | min_df, 57 | n_top_words): 58 | articles_cutted = df[col_content].apply(chinese_word_cut) 59 | vect = CountVectorizer(max_df = max_df, 60 | min_df = min_df, 61 | token_pattern=u'(?u)\\b[^\\d\\W]\\w+\\b', 62 | stop_words=frozenset(stopwords)) 63 | tf = vect.fit_transform(articles_cutted) 64 | lda = LatentDirichletAllocation(n_components=n_topics, max_iter=50, 65 | learning_method='online', 66 | learning_offset=50, 67 | random_state=0) 68 | lda.fit(tf) 69 | print_top_words(lda, vect.get_feature_names(), n_top_words) 70 | return lda, tf, vect 71 | 72 | # Cell 73 | def lda_on_chinese_articles(df, n_topics): 74 | return lda_on_chinese_articles_with_param(df, n_topics, 75 | col_content = col_content, 76 | stopwords = stopwords, 77 | n_features = n_features, 78 | max_df = max_df, 79 | min_df = min_df, 80 | n_top_words = n_top_words) -------------------------------------------------------------------------------- /data-raw/affirmative_modifed.R: -------------------------------------------------------------------------------- 1 | ## code to prepare `affirmative_modifed` dataset goes here 2 | 3 | # usethis::use_data(affirmative_modifed, overwrite = TRUE) 4 | library(tidyverse) 5 | affirmative <- readr::read_csv("data/affirmative.csv") 6 | affirmative_modifed <- 7 | affirmative %>% 8 | rename_all(str_to_lower) %>% 9 | mutate( 10 | group = group %>% 11 | str_replace("八", "8") %>% 12 | str_replace("九", "9") %>% 13 | str_replace("十", "10") %>% 14 | str_extract("\\d+") %>% 15 | str_pad(width = 2, pad = "0") %>% 16 | str_c("第", ., "组") 17 | ) 18 | affirmative_modifed %>% 19 | write_excel_csv("data/affirmative_modifed.csv") 20 | -------------------------------------------------------------------------------- /data/stopwords.txt: -------------------------------------------------------------------------------- 1 | ――― 2 | 》), 3 | )÷(1- 4 | ”, 5 | )、 6 | =( 7 | : 8 | → 9 | ℃ 10 | & 11 | * 12 | 一一 13 | ~~~~ 14 | ’ 15 | . 16 | 『 17 | .一 18 | ./ 19 | -- 20 | 』 21 | =″ 22 | 【 23 | [*] 24 | }> 25 | [⑤]] 26 | [①D] 27 | c] 28 | ng�P 29 | * 30 | // 31 | [ 32 | ] 33 | [②e] 34 | [②g] 35 | ={ 36 | } 37 | ,也 38 | ‘ 39 | A 40 | [①⑥] 41 | [②B] 42 | [①a] 43 | [④a] 44 | [①③] 45 | [③h] 46 | ③] 47 | 1. 48 | -- 49 | [②b] 50 | ’‘ 51 | ××× 52 | [①⑧] 53 | 0:2 54 | =[ 55 | [⑤b] 56 | [②c] 57 | [④b] 58 | [②③] 59 | [③a] 60 | [④c] 61 | [①⑤] 62 | [①⑦] 63 | [①g] 64 | ∈[ 65 | [①⑨] 66 | [①④] 67 | [①c] 68 | [②f] 69 | [②⑧] 70 | [②①] 71 | [①C] 72 | [③c] 73 | [③g] 74 | [②⑤] 75 | [②②] 76 | 一. 77 | [①h] 78 | .数 79 | [] 80 | [①B] 81 | 数/ 82 | [①i] 83 | [③e] 84 | [①①] 85 | [④d] 86 | [④e] 87 | [③b] 88 | [⑤a] 89 | [①A] 90 | [②⑧] 91 | [②⑦] 92 | [①d] 93 | [②j] 94 | 〕〔 95 | ][ 96 | :// 97 | ′∈ 98 | [②④ 99 | [⑤e] 100 | 12% 101 | b] 102 | ... 103 | ................... 104 | …………………………………………………③ 105 | ZXFITL 106 | [③F] 107 | 」 108 | [①o] 109 | ]∧′=[ 110 | ∪φ∈ 111 | ′| 112 | {- 113 | ②c 114 | } 115 | [③①] 116 | R.L. 117 | [①E] 118 | Ψ 119 | -[*]- 120 | ↑ 121 | .日 122 | [②d] 123 | [② 124 | [②⑦] 125 | [②②] 126 | [③e] 127 | [①i] 128 | [①B] 129 | [①h] 130 | [①d] 131 | [①g] 132 | [①②] 133 | [②a] 134 | f] 135 | [⑩] 136 | a] 137 | [①e] 138 | [②h] 139 | [②⑥] 140 | [③d] 141 | [②⑩] 142 | e] 143 | 〉 144 | 】 145 | 元/吨 146 | [②⑩] 147 | 2.3% 148 | 5:0 149 | [①] 150 | :: 151 | [②] 152 | [③] 153 | [④] 154 | [⑤] 155 | [⑥] 156 | [⑦] 157 | [⑧] 158 | [⑨] 159 | …… 160 | ―― 161 | ? 162 | 、 163 | 。 164 | “ 165 | ” 166 | 《 167 | 》 168 | ! 169 | , 170 | : 171 | ; 172 | ? 173 | . 174 | , 175 | . 176 | ' 177 | ? 178 | ・ 179 | ――― 180 | ── 181 | ? 182 | ― 183 | < 184 | > 185 | ( 186 | ) 187 | 〔 188 | 〕 189 | [ 190 | ] 191 | ( 192 | ) 193 | - 194 | + 195 | ~ 196 | × 197 | / 198 | / 199 | ① 200 | ② 201 | ③ 202 | ④ 203 | ⑤ 204 | ⑥ 205 | ⑦ 206 | ⑧ 207 | ⑨ 208 | ⑩ 209 | Ⅲ 210 | В 211 | " 212 | ; 213 | # 214 | @ 215 | γ 216 | μ 217 | φ 218 | φ. 219 | × 220 | Δ 221 | ■ 222 | ▲ 223 | sub 224 | exp 225 | sup 226 | sub 227 | Lex 228 | # 229 | % 230 | & 231 | ' 232 | + 233 | +ξ 234 | ++ 235 | - 236 | -β 237 | < 238 | <± 239 | <Δ 240 | <λ 241 | <φ 242 | << 243 | = 244 | = 245 | =☆ 246 | =- 247 | > 248 | >λ 249 | _ 250 | ~± 251 | ~+ 252 | [⑤f] 253 | [⑤d] 254 | [②i] 255 | ≈ 256 | [②G] 257 | [①f] 258 | LI 259 | ㈧ 260 | [- 261 | ...... 262 | 〉 263 | [③⑩] 264 | 第二 265 | 一番 266 | 一直 267 | 一个 268 | 一些 269 | 许多 270 | 种 271 | 有的是 272 | 也就是说 273 | 末##末 274 | 啊 275 | 阿 276 | 哎 277 | 哎呀 278 | 哎哟 279 | 唉 280 | 俺 281 | 俺们 282 | 按 283 | 按照 284 | 吧 285 | 吧哒 286 | 把 287 | 罢了 288 | 被 289 | 本 290 | 本着 291 | 比 292 | 比方 293 | 比如 294 | 鄙人 295 | 彼 296 | 彼此 297 | 边 298 | 别 299 | 别的 300 | 别说 301 | 并 302 | 并且 303 | 不比 304 | 不成 305 | 不单 306 | 不但 307 | 不独 308 | 不管 309 | 不光 310 | 不过 311 | 不仅 312 | 不拘 313 | 不论 314 | 不怕 315 | 不然 316 | 不如 317 | 不特 318 | 不惟 319 | 不问 320 | 不只 321 | 朝 322 | 朝着 323 | 趁 324 | 趁着 325 | 乘 326 | 冲 327 | 除 328 | 除此之外 329 | 除非 330 | 除了 331 | 此 332 | 此间 333 | 此外 334 | 从 335 | 从而 336 | 打 337 | 待 338 | 但 339 | 但是 340 | 当 341 | 当着 342 | 到 343 | 得 344 | 的 345 | 的话 346 | 等 347 | 等等 348 | 地 349 | 第 350 | 叮咚 351 | 对 352 | 对于 353 | 多 354 | 多少 355 | 而 356 | 而况 357 | 而且 358 | 而是 359 | 而外 360 | 而言 361 | 而已 362 | 尔后 363 | 反过来 364 | 反过来说 365 | 反之 366 | 非但 367 | 非徒 368 | 否则 369 | 嘎 370 | 嘎登 371 | 该 372 | 赶 373 | 个 374 | 各 375 | 各个 376 | 各位 377 | 各种 378 | 各自 379 | 给 380 | 根据 381 | 跟 382 | 故 383 | 故此 384 | 固然 385 | 关于 386 | 管 387 | 归 388 | 果然 389 | 果真 390 | 过 391 | 哈 392 | 哈哈 393 | 呵 394 | 和 395 | 何 396 | 何处 397 | 何况 398 | 何时 399 | 嘿 400 | 哼 401 | 哼唷 402 | 呼哧 403 | 乎 404 | 哗 405 | 还是 406 | 还有 407 | 换句话说 408 | 换言之 409 | 或 410 | 或是 411 | 或者 412 | 极了 413 | 及 414 | 及其 415 | 及至 416 | 即 417 | 即便 418 | 即或 419 | 即令 420 | 即若 421 | 即使 422 | 几 423 | 几时 424 | 己 425 | 既 426 | 既然 427 | 既是 428 | 继而 429 | 加之 430 | 假如 431 | 假若 432 | 假使 433 | 鉴于 434 | 将 435 | 较 436 | 较之 437 | 叫 438 | 接着 439 | 结果 440 | 借 441 | 紧接着 442 | 进而 443 | 尽 444 | 尽管 445 | 经 446 | 经过 447 | 就 448 | 就是 449 | 就是说 450 | 据 451 | 具体地说 452 | 具体说来 453 | 开始 454 | 开外 455 | 靠 456 | 咳 457 | 可 458 | 可见 459 | 可是 460 | 可以 461 | 况且 462 | 啦 463 | 来 464 | 来着 465 | 离 466 | 例如 467 | 哩 468 | 连 469 | 连同 470 | 两者 471 | 了 472 | 临 473 | 另 474 | 另外 475 | 另一方面 476 | 论 477 | 嘛 478 | 吗 479 | 慢说 480 | 漫说 481 | 冒 482 | 么 483 | 每 484 | 每当 485 | 们 486 | 莫若 487 | 某 488 | 某个 489 | 某些 490 | 拿 491 | 哪 492 | 哪边 493 | 哪儿 494 | 哪个 495 | 哪里 496 | 哪年 497 | 哪怕 498 | 哪天 499 | 哪些 500 | 哪样 501 | 那 502 | 那边 503 | 那儿 504 | 那个 505 | 那会儿 506 | 那里 507 | 那么 508 | 那么些 509 | 那么样 510 | 那时 511 | 那些 512 | 那样 513 | 乃 514 | 乃至 515 | 呢 516 | 能 517 | 你 518 | 你们 519 | 您 520 | 宁 521 | 宁可 522 | 宁肯 523 | 宁愿 524 | 哦 525 | 呕 526 | 啪达 527 | 旁人 528 | 呸 529 | 凭 530 | 凭借 531 | 其 532 | 其次 533 | 其二 534 | 其他 535 | 其它 536 | 其一 537 | 其余 538 | 其中 539 | 起 540 | 起见 541 | 起见 542 | 岂但 543 | 恰恰相反 544 | 前后 545 | 前者 546 | 且 547 | 然而 548 | 然后 549 | 然则 550 | 让 551 | 人家 552 | 任 553 | 任何 554 | 任凭 555 | 如 556 | 如此 557 | 如果 558 | 如何 559 | 如其 560 | 如若 561 | 如上所述 562 | 若 563 | 若非 564 | 若是 565 | 啥 566 | 上下 567 | 尚且 568 | 设若 569 | 设使 570 | 甚而 571 | 甚么 572 | 甚至 573 | 省得 574 | 时候 575 | 什么 576 | 什么样 577 | 使得 578 | 是 579 | 是的 580 | 首先 581 | 谁 582 | 谁知 583 | 顺 584 | 顺着 585 | 似的 586 | 虽 587 | 虽然 588 | 虽说 589 | 虽则 590 | 随 591 | 随着 592 | 所 593 | 所以 594 | 他 595 | 他们 596 | 他人 597 | 它 598 | 它们 599 | 她 600 | 她们 601 | 倘 602 | 倘或 603 | 倘然 604 | 倘若 605 | 倘使 606 | 腾 607 | 替 608 | 通过 609 | 同 610 | 同时 611 | 哇 612 | 万一 613 | 往 614 | 望 615 | 为 616 | 为何 617 | 为了 618 | 为什么 619 | 为着 620 | 喂 621 | 嗡嗡 622 | 我 623 | 我们 624 | 呜 625 | 呜呼 626 | 乌乎 627 | 无论 628 | 无宁 629 | 毋宁 630 | 嘻 631 | 吓 632 | 相对而言 633 | 像 634 | 向 635 | 向着 636 | 嘘 637 | 呀 638 | 焉 639 | 沿 640 | 沿着 641 | 要 642 | 要不 643 | 要不然 644 | 要不是 645 | 要么 646 | 要是 647 | 也 648 | 也罢 649 | 也好 650 | 一 651 | 一般 652 | 一旦 653 | 一方面 654 | 一来 655 | 一切 656 | 一样 657 | 一则 658 | 依 659 | 依照 660 | 矣 661 | 以 662 | 以便 663 | 以及 664 | 以免 665 | 以至 666 | 以至于 667 | 以致 668 | 抑或 669 | 因 670 | 因此 671 | 因而 672 | 因为 673 | 哟 674 | 用 675 | 由 676 | 由此可见 677 | 由于 678 | 有 679 | 有的 680 | 有关 681 | 有些 682 | 又 683 | 于 684 | 于是 685 | 于是乎 686 | 与 687 | 与此同时 688 | 与否 689 | 与其 690 | 越是 691 | 云云 692 | 哉 693 | 再说 694 | 再者 695 | 在 696 | 在下 697 | 咱 698 | 咱们 699 | 则 700 | 怎 701 | 怎么 702 | 怎么办 703 | 怎么样 704 | 怎样 705 | 咋 706 | 照 707 | 照着 708 | 者 709 | 这 710 | 这边 711 | 这儿 712 | 这个 713 | 这会儿 714 | 这就是说 715 | 这里 716 | 这么 717 | 这么点儿 718 | 这么些 719 | 这么样 720 | 这时 721 | 这些 722 | 这样 723 | 正如 724 | 吱 725 | 之 726 | 之类 727 | 之所以 728 | 之一 729 | 只是 730 | 只限 731 | 只要 732 | 只有 733 | 至 734 | 至于 735 | 诸位 736 | 着 737 | 着呢 738 | 自 739 | 自从 740 | 自个儿 741 | 自各儿 742 | 自己 743 | 自家 744 | 自身 745 | 综上所述 746 | 总的来看 747 | 总的来说 748 | 总的说来 749 | 总而言之 750 | 总之 751 | 纵 752 | 纵令 753 | 纵然 754 | 纵使 755 | 遵照 756 | 作为 757 | 兮 758 | 呃 759 | 呗 760 | 咚 761 | 咦 762 | 喏 763 | 啐 764 | 喔唷 765 | 嗬 766 | 嗯 767 | 嗳 768 | 正方 769 | 反方 770 | 反驳 771 | 慕课 772 | 辩论 773 | 辩友 774 | 我方 775 | 对方 776 | -------------------------------------------------------------------------------- /dev_history_r_proj.R: -------------------------------------------------------------------------------- 1 | # update template --------------------------------------------------------- 2 | 3 | library(fs) 4 | file_copy("../dev_history/refs/dev_history_r_proj.R", 5 | ".", 6 | overwrite = TRUE) 7 | 8 | # setup ------------------------------------------------------------------- 9 | 10 | library(devtools) 11 | use_git() 12 | 13 | # CTRL + F1 show Git 14 | library(devtools) 15 | use_build_ignore("dev_history_r_proj.R") 16 | library(magrittr) 17 | 18 | # add desc ---------------------------------------------------------------- 19 | 20 | library(usethis) 21 | add2pkg::create_desc() 22 | author_info <- add2pkg::add_me(is_paste = TRUE) 23 | desc_lines <- readr::read_lines("DESCRIPTION") 24 | desc_lines[5] <- author_info 25 | desc_lines %>% readr::write_lines("DESCRIPTION") 26 | # file.edit("DESCRIPTION") 27 | library(tidyverse) 28 | 29 | # add license ------------------------------------------------------------- 30 | 31 | options(usethis.full_name = "Jiaxiang Li") 32 | use_cc0_license() 33 | 34 | 35 | # add namespace ----------------------------------------------------------- 36 | 37 | use_namespace() 38 | 39 | 40 | # add makefile ------------------------------------------------------------ 41 | 42 | usethis::use_make() 43 | 44 | # coding ------------------------------------------------------------------ 45 | 46 | # prettify ---------------------------------------------------------------- 47 | 48 | if (file.exists("README.Rmd")) { 49 | file.rename("README.Rmd", "README-bak.Rmd") 50 | file.edit("README-bak.Rmd") 51 | } 52 | use_readme_rmd(open = FALSE) 53 | read_lines("README.Rmd")[1:22] %>% 54 | c("") %>% 55 | c('`r add2pkg::add_disclaimer("Jiaxiang Li")`') %>% 56 | write_lines("README.Rmd") 57 | file.remove("README-bak.Rmd") 58 | # file.edit("README.Rmd") 59 | rmarkdown::render("README.Rmd") 60 | rstudioapi::viewer("README.html") 61 | file.remove("README.html") 62 | 63 | # add examlpes ------------------------------------------------------------ 64 | 65 | clipr::read_clip() %>% 66 | str_c("#' ", .) %>% 67 | clipr::write_clip() 68 | 69 | clipr::read_clip() %>% 70 | str_c("#' \\dontrun{", ., "}") %>% 71 | clipr::write_clip() 72 | 73 | 74 | # add dirs ---------------------------------------------------------------- 75 | 76 | library(tidyverse) 77 | library(fs) 78 | list("analysis", "output", "refs") %>% map(dir.create) 79 | 80 | 81 | # add commit -------------------------------------------------------------- 82 | 83 | 84 | git2r::add(path = ".") 85 | glue::glue("Add metadata 86 | 87 | 1. license 88 | 1. readme 89 | 1. namespace 90 | 1. desc 91 | 1. Makefile") %>% 92 | git2r::commit(message = .) 93 | 94 | git2r::remote_add(name = "origin", 95 | url = glue::glue("https://github.com/JiaxiangBU/{add2pkg::proj_name()}.git")) 96 | 97 | library(git2r) 98 | git2r::push(name = 'origin', refspec = "refs/heads/master", 99 | cred = git2r::cred_token() 100 | ) 101 | 102 | 103 | # delete object ----------------------------------------------------------- 104 | 105 | rm("author_info") 106 | rm("desc_lines") 107 | 108 | # update template --------------------------------------------------------- 109 | 110 | library(fs) 111 | file_copy("dev_history_r_proj.R", "../dev_history/refs/dev_history_r_proj.R", 112 | overwrite = TRUE) 113 | repo <- git2r::repository("../dev_history/") 114 | repo %>% git2r::status() 115 | repo %>% git2r::add(path = ".") 116 | repo %>% git2r::status() 117 | repo %>% git2r::commit(message = "Update r proj dev history.") 118 | repo %>% git2r::push(name = 'origin', refspec = "refs/heads/master", cred = git2r::cred_token()) 119 | rm("repo") 120 | 121 | 122 | # update local ------------------------------------------------------------ 123 | 124 | git2r::status() 125 | git2r::add(path = "dev_history_r_proj.R") 126 | git2r::status() 127 | git2r::commit(message = "Update r proj dev history.") 128 | git2r::push(name = 'origin', refspec = "refs/heads/master", cred = git2r::cred_token()) 129 | -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.0.0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.0.0-py3-none-any.whl -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.0.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.0.0.tar.gz -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.0.1-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.0.1-py3-none-any.whl -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.0.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.0.1.tar.gz -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.0.2-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.0.2-py3-none-any.whl -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.0.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.0.2.tar.gz -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.1.0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.1.0-py3-none-any.whl -------------------------------------------------------------------------------- /dist/dynamic_topic_modeling-1.1.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxiangBU/dynamic_topic_modeling/96088bdc80938f0557b8561933c69e28a4fc20f3/dist/dynamic_topic_modeling-1.1.0.tar.gz -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _site/ 2 | -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem 'github-pages', group: :jekyll_plugins 4 | 5 | 6 | # Added at 2019-11-25 10:11:40 -0800 by jhoward: 7 | gem "jekyll", "~> 3.7" 8 | -------------------------------------------------------------------------------- /docs/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (4.2.11.1) 5 | i18n (~> 0.7) 6 | minitest (~> 5.1) 7 | thread_safe (~> 0.3, >= 0.3.4) 8 | tzinfo (~> 1.1) 9 | addressable (2.7.0) 10 | public_suffix (>= 2.0.2, < 5.0) 11 | coffee-script (2.4.1) 12 | coffee-script-source 13 | execjs 14 | coffee-script-source (1.11.1) 15 | colorator (1.1.0) 16 | commonmarker (0.17.13) 17 | ruby-enum (~> 0.5) 18 | concurrent-ruby (1.1.5) 19 | dnsruby (1.61.3) 20 | addressable (~> 2.5) 21 | em-websocket (0.5.1) 22 | eventmachine (>= 0.12.9) 23 | http_parser.rb (~> 0.6.0) 24 | ethon (0.12.0) 25 | ffi (>= 1.3.0) 26 | eventmachine (1.2.7) 27 | execjs (2.7.0) 28 | faraday (0.17.0) 29 | multipart-post (>= 1.2, < 3) 30 | ffi (1.11.3) 31 | forwardable-extended (2.6.0) 32 | gemoji (3.0.1) 33 | github-pages (202) 34 | activesupport (= 4.2.11.1) 35 | github-pages-health-check (= 1.16.1) 36 | jekyll (= 3.8.5) 37 | jekyll-avatar (= 0.6.0) 38 | jekyll-coffeescript (= 1.1.1) 39 | jekyll-commonmark-ghpages (= 0.1.6) 40 | jekyll-default-layout (= 0.1.4) 41 | jekyll-feed (= 0.11.0) 42 | jekyll-gist (= 1.5.0) 43 | jekyll-github-metadata (= 2.12.1) 44 | jekyll-mentions (= 1.4.1) 45 | jekyll-optional-front-matter (= 0.3.0) 46 | jekyll-paginate (= 1.1.0) 47 | jekyll-readme-index (= 0.2.0) 48 | jekyll-redirect-from (= 0.14.0) 49 | jekyll-relative-links (= 0.6.0) 50 | jekyll-remote-theme (= 0.4.0) 51 | jekyll-sass-converter (= 1.5.2) 52 | jekyll-seo-tag (= 2.5.0) 53 | jekyll-sitemap (= 1.2.0) 54 | jekyll-swiss (= 0.4.0) 55 | jekyll-theme-architect (= 0.1.1) 56 | jekyll-theme-cayman (= 0.1.1) 57 | jekyll-theme-dinky (= 0.1.1) 58 | jekyll-theme-hacker (= 0.1.1) 59 | jekyll-theme-leap-day (= 0.1.1) 60 | jekyll-theme-merlot (= 0.1.1) 61 | jekyll-theme-midnight (= 0.1.1) 62 | jekyll-theme-minimal (= 0.1.1) 63 | jekyll-theme-modernist (= 0.1.1) 64 | jekyll-theme-primer (= 0.5.3) 65 | jekyll-theme-slate (= 0.1.1) 66 | jekyll-theme-tactile (= 0.1.1) 67 | jekyll-theme-time-machine (= 0.1.1) 68 | jekyll-titles-from-headings (= 0.5.1) 69 | jemoji (= 0.10.2) 70 | kramdown (~> 2.3.0) 71 | liquid (= 4.0.0) 72 | listen (= 3.1.5) 73 | mercenary (~> 0.3) 74 | minima (= 2.5.0) 75 | nokogiri (>= 1.10.4, < 2.0) 76 | rouge (= 3.11.0) 77 | terminal-table (~> 1.4) 78 | github-pages-health-check (1.16.1) 79 | addressable (~> 2.3) 80 | dnsruby (~> 1.60) 81 | octokit (~> 4.0) 82 | public_suffix (~> 3.0) 83 | typhoeus (~> 1.3) 84 | html-pipeline (2.12.2) 85 | activesupport (>= 2) 86 | nokogiri (>= 1.4) 87 | http_parser.rb (0.6.0) 88 | i18n (0.9.5) 89 | concurrent-ruby (~> 1.0) 90 | jekyll (3.8.5) 91 | addressable (~> 2.4) 92 | colorator (~> 1.0) 93 | em-websocket (~> 0.5) 94 | i18n (~> 0.7) 95 | jekyll-sass-converter (~> 1.0) 96 | jekyll-watch (~> 2.0) 97 | kramdown (~> 2.3.0) 98 | liquid (~> 4.0) 99 | mercenary (~> 0.3.3) 100 | pathutil (~> 0.9) 101 | rouge (>= 1.7, < 4) 102 | safe_yaml (~> 1.0) 103 | jekyll-avatar (0.6.0) 104 | jekyll (~> 3.0) 105 | jekyll-coffeescript (1.1.1) 106 | coffee-script (~> 2.2) 107 | coffee-script-source (~> 1.11.1) 108 | jekyll-commonmark (1.3.1) 109 | commonmarker (~> 0.14) 110 | jekyll (>= 3.7, < 5.0) 111 | jekyll-commonmark-ghpages (0.1.6) 112 | commonmarker (~> 0.17.6) 113 | jekyll-commonmark (~> 1.2) 114 | rouge (>= 2.0, < 4.0) 115 | jekyll-default-layout (0.1.4) 116 | jekyll (~> 3.0) 117 | jekyll-feed (0.11.0) 118 | jekyll (~> 3.3) 119 | jekyll-gist (1.5.0) 120 | octokit (~> 4.2) 121 | jekyll-github-metadata (2.12.1) 122 | jekyll (~> 3.4) 123 | octokit (~> 4.0, != 4.4.0) 124 | jekyll-mentions (1.4.1) 125 | html-pipeline (~> 2.3) 126 | jekyll (~> 3.0) 127 | jekyll-optional-front-matter (0.3.0) 128 | jekyll (~> 3.0) 129 | jekyll-paginate (1.1.0) 130 | jekyll-readme-index (0.2.0) 131 | jekyll (~> 3.0) 132 | jekyll-redirect-from (0.14.0) 133 | jekyll (~> 3.3) 134 | jekyll-relative-links (0.6.0) 135 | jekyll (~> 3.3) 136 | jekyll-remote-theme (0.4.0) 137 | addressable (~> 2.0) 138 | jekyll (~> 3.5) 139 | rubyzip (>= 1.2.1, < 3.0) 140 | jekyll-sass-converter (1.5.2) 141 | sass (~> 3.4) 142 | jekyll-seo-tag (2.5.0) 143 | jekyll (~> 3.3) 144 | jekyll-sitemap (1.2.0) 145 | jekyll (~> 3.3) 146 | jekyll-swiss (0.4.0) 147 | jekyll-theme-architect (0.1.1) 148 | jekyll (~> 3.5) 149 | jekyll-seo-tag (~> 2.0) 150 | jekyll-theme-cayman (0.1.1) 151 | jekyll (~> 3.5) 152 | jekyll-seo-tag (~> 2.0) 153 | jekyll-theme-dinky (0.1.1) 154 | jekyll (~> 3.5) 155 | jekyll-seo-tag (~> 2.0) 156 | jekyll-theme-hacker (0.1.1) 157 | jekyll (~> 3.5) 158 | jekyll-seo-tag (~> 2.0) 159 | jekyll-theme-leap-day (0.1.1) 160 | jekyll (~> 3.5) 161 | jekyll-seo-tag (~> 2.0) 162 | jekyll-theme-merlot (0.1.1) 163 | jekyll (~> 3.5) 164 | jekyll-seo-tag (~> 2.0) 165 | jekyll-theme-midnight (0.1.1) 166 | jekyll (~> 3.5) 167 | jekyll-seo-tag (~> 2.0) 168 | jekyll-theme-minimal (0.1.1) 169 | jekyll (~> 3.5) 170 | jekyll-seo-tag (~> 2.0) 171 | jekyll-theme-modernist (0.1.1) 172 | jekyll (~> 3.5) 173 | jekyll-seo-tag (~> 2.0) 174 | jekyll-theme-primer (0.5.3) 175 | jekyll (~> 3.5) 176 | jekyll-github-metadata (~> 2.9) 177 | jekyll-seo-tag (~> 2.0) 178 | jekyll-theme-slate (0.1.1) 179 | jekyll (~> 3.5) 180 | jekyll-seo-tag (~> 2.0) 181 | jekyll-theme-tactile (0.1.1) 182 | jekyll (~> 3.5) 183 | jekyll-seo-tag (~> 2.0) 184 | jekyll-theme-time-machine (0.1.1) 185 | jekyll (~> 3.5) 186 | jekyll-seo-tag (~> 2.0) 187 | jekyll-titles-from-headings (0.5.1) 188 | jekyll (~> 3.3) 189 | jekyll-watch (2.2.1) 190 | listen (~> 3.0) 191 | jemoji (0.10.2) 192 | gemoji (~> 3.0) 193 | html-pipeline (~> 2.2) 194 | jekyll (~> 3.0) 195 | kramdown (~> 2.3.0) 196 | liquid (4.0.0) 197 | listen (3.1.5) 198 | rb-fsevent (~> 0.9, >= 0.9.4) 199 | rb-inotify (~> 0.9, >= 0.9.7) 200 | ruby_dep (~> 1.2) 201 | mercenary (0.3.6) 202 | mini_portile2 (2.4.0) 203 | minima (2.5.0) 204 | jekyll (~> 3.5) 205 | jekyll-feed (~> 0.9) 206 | jekyll-seo-tag (~> 2.1) 207 | minitest (5.13.0) 208 | multipart-post (2.1.1) 209 | nokogiri (1.10.8) 210 | mini_portile2 (~> 2.4.0) 211 | octokit (4.14.0) 212 | sawyer (~> 0.8.0, >= 0.5.3) 213 | pathutil (0.16.2) 214 | forwardable-extended (~> 2.6) 215 | public_suffix (3.1.1) 216 | rb-fsevent (0.10.3) 217 | rb-inotify (0.10.0) 218 | ffi (~> 1.0) 219 | rouge (3.11.0) 220 | ruby-enum (0.7.2) 221 | i18n 222 | ruby_dep (1.5.0) 223 | rubyzip (2.0.0) 224 | safe_yaml (1.0.5) 225 | sass (3.7.4) 226 | sass-listen (~> 4.0.0) 227 | sass-listen (4.0.0) 228 | rb-fsevent (~> 0.9, >= 0.9.4) 229 | rb-inotify (~> 0.9, >= 0.9.7) 230 | sawyer (0.8.2) 231 | addressable (>= 2.3.5) 232 | faraday (> 0.8, < 2.0) 233 | terminal-table (1.8.0) 234 | unicode-display_width (~> 1.1, >= 1.1.1) 235 | thread_safe (0.3.6) 236 | typhoeus (1.3.1) 237 | ethon (>= 0.9.0) 238 | tzinfo (1.2.5) 239 | thread_safe (~> 0.1) 240 | unicode-display_width (1.6.0) 241 | 242 | PLATFORMS 243 | ruby 244 | 245 | DEPENDENCIES 246 | github-pages 247 | jekyll (~> 3.7) 248 | 249 | BUNDLED WITH 250 | 2.0.2 251 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | repository: JiaxiangBU/dynamic_topic_modeling 2 | output: web 3 | topnav_title: dynamic_topic_modeling 4 | site_title: dynamic_topic_modeling 5 | company_name: Jiaxiang Li 6 | description: Run dynamic topic modeling 7 | # Set to false to disable KaTeX math 8 | use_math: true 9 | # Add Google analytics id if you have one and want to use it here 10 | google_analytics: 11 | # See http://nbdev.fast.ai/search for help with adding Search 12 | google_search: 13 | 14 | host: 127.0.0.1 15 | # the preview server used. Leave as is. 16 | port: 4000 17 | # the port where the preview is rendered. 18 | 19 | exclude: 20 | - .idea/ 21 | - .gitignore 22 | - vendor 23 | 24 | exclude: [vendor] 25 | 26 | highlighter: rouge 27 | markdown: kramdown 28 | kramdown: 29 | input: GFM 30 | auto_ids: true 31 | hard_wrap: false 32 | syntax_highlighter: rouge 33 | 34 | collections: 35 | tooltips: 36 | output: false 37 | 38 | defaults: 39 | - 40 | scope: 41 | path: "" 42 | type: "pages" 43 | values: 44 | layout: "page" 45 | comments: true 46 | search: true 47 | sidebar: home_sidebar 48 | topnav: topnav 49 | - 50 | scope: 51 | path: "" 52 | type: "tooltips" 53 | values: 54 | layout: "page" 55 | comments: true 56 | search: true 57 | tooltip: true 58 | 59 | sidebars: 60 | - home_sidebar 61 | permalink: pretty 62 | 63 | theme: jekyll-theme-cayman 64 | baseurl: /dynamic_topic_modeling/ -------------------------------------------------------------------------------- /docs/_data/alerts.yml: -------------------------------------------------------------------------------- 1 | tip: '