├── .gitignore
├── README.md
├── construct_poets_network.py
├── data
    ├── early_tang_poets.txt
    ├── high_tang_poets.txt
    ├── late_tang_poets.txt
    ├── middle_tang_poets.txt
    ├── qts_zhs.txt
    └── qts_zht.txt
├── html
    ├── early_tang_poets_net.html
    ├── echarts-all-3.js
    ├── full_tang_poets_net.html
    ├── high_tang_poets_net.html
    ├── html_head.txt
    ├── html_tail.txt
    ├── late_tang_poets_net.html
    └── middle_tang_poets_net.html
├── utils.py
├── visualize_poets_network.py
└── word_level_analyzer.py


/.gitignore:
--------------------------------------------------------------------------------
1 | # CBDB太大了，不加入repo
2 | data/cbdb_sqlite.db
3 | # 忽略中间运算结果
4 | save/*
5 | # 忽略html网页文件
6 | html/*
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 全唐诗分析程序
 2 | 这个程序最初的诞生是为了写微信公众号的两篇文章，那两篇文章的也大致讲解了程序的原理和流程。
 3 | 因此，在使用程序之前，强烈建议您先读这两篇文章：
 4 | - [当我们在读唐诗时，我们在读什么？](https://mp.weixin.qq.com/s?__biz=MzI0NTUxMjgyOA==&mid=2247483724&idx=1&sn=9fe912aaaa2757eec2634a95931e1c6a&chksm=e94c2e5fde3ba749e4e364644d6b68d004b295a6864606c79f710b4b0e7e5d07ac3e89481012&mpshare=1&scene=1&srcid=0314cTnPXrmiKE1tR18sIV5m&pass_ticket=LmF1XSUkX6AZUuMnsPEO3vBZgEqfwt9frF%2F%2FATtYfAWYcIhzbawA0%2FclwgYNC1u%2F#rd)
 5 | - [计算机告诉你，唐朝诗人之间的关系到底是什么样的？](https://mp.weixin.qq.com/s?__biz=MzI0NTUxMjgyOA==&mid=2247483750&idx=1&sn=dd883b547a3fc4343a3dcce1abea3719&chksm=e94c2e75de3ba7631ffd7abff8a89ea56fda63b2f3d3bb81fd845ef5fd3e9207b41230900288&mpshare=1&scene=1&srcid=0314HdoeYueFNse6H7j18qfx&pass_ticket=P5NYT1vI3xq6gboRVFuq64N9z2Yp0ADF4pMH3nRnXAhGuoM7eROG8O2lhVg%2BIvoR#rd)
 6 | 
 7 | 相应的，程序也主要有两个方面的功能：
 8 | - 分析词频和词向量，对应第一篇文章
 9 | - 构建诗人之间的引用关系，对应第二篇文章
10 | 
11 | master分枝仅支持python3。python2分枝(感谢网友[carryme9527](https://github.com/carryme9527/poetry_analyzer)的工作，这个分枝主要是他的功劳)则支持python2。
12 | 程序主要有两个目录：
13 | - data目录，用于存储全唐诗和CBDB数据库
14 | - html目录，存储最终的社交网络关系网页
15 | 
16 | 程序在运算过程中会dump一些中间运算结果，并存储在save目录(如果不存在会自动创建)中。
17 | 
18 | 由于CBDB数据库很大，有400+M。github不允许上传这么大的文件，请大家自行去[CBDB官网](http://projects.iq.harvard.edu/chinesecbdb/%E4%B8%8B%E8%BC%89cbdb%E5%96%AE%E6%A9%9F%E7%89%88)下载单机版数据库，并且以cbdb_sqlite.db为文件名存储在data目录下。
19 | # 依赖库
20 | 程序依赖了两个python库
21 | ``` shell
22 | pip3 install thulac
23 | pip3 install gensim
24 | ```
25 | 其中thulac用于分词，gensim用于word2vec.
26 | 这两个库只用于第一篇文章的分析。如果您只关心如何构建诗人关系网络，那么不需要安装这个两个库。
27 | 
28 | # 基本用法
29 | 对于**普通用户**来说：
30 | 直接用浏览器打开html目录下的网页文件，就可以在浏览器中观察网络结构了，并且可以随意拖动和放大，很有意思。
31 | 
32 | 对于**程序员**来说：
33 | - 运行`python3 word_level_analyzer.py`来复现第一篇文章的结果
34 | - 运行`python3 construct_poets_network.py`来构建社交网络，并将运行结果存储在save目录。
35 | - 运行`python3 visualize_poets_network.py`来构建出显示社交网络的网页，并将结果存储在html目录。
36 | # 路线图
37 | 我后续还会对古典文献进行一些分析，并将更新过的代码及时的push到这个库中。欢迎大家关注我的微信公众号：mrqianjinsi
38 | 


--------------------------------------------------------------------------------
/construct_poets_network.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import argparse
  3 | import os
  4 | from collections import Counter, defaultdict
  5 | 
  6 | from utils import read_qts, get_alter_names_from_CBDB
  7 | 
  8 | # TODO 补充著名诗人列表
  9 | # 這些詩人在CBDB的重名难以轻易排除，手動查找其在BIOG_MAIN表中的ID
 10 | # 注意CBDB使用的是繁體中文
 11 | manual_defuzzy_authors_id = {
 12 |     '李林甫': 32534, '王建': 92047,
 13 |     '李賀': 93012, '張繼': 93495,
 14 |     '張旭': 93409, '李紳': 92982}
 15 | # 手動刪除某些作者
 16 | mannual_deleted_authors = set(['無作', '清江'])
 17 | # 手動刪除作者的某些別稱，这些别称在唐诗中是常用字
 18 | mannual_deleted_alter_names = {'李林甫': set(['李十']),
 19 |     '李益': set(['李十']),
 20 |     '李世民': set(['李二']),
 21 |     '李嘉祐': set(['李二']),
 22 |     '馬湘': set(['自然']),
 23 |     '高駢': set(['千里']),
 24 |     '孟浩然': set(['浩然']),
 25 |     '李白': set(['太白']),
 26 |     '黃巢': set(['皇帝']),
 27 |     '眉娘': set(['逍遙'])}
 28 | # 補充CBDB中缺少的部分作者別稱
 29 | mannual_added_alter_names = {
 30 |     '李建': set(['李十一']),
 31 |     '劉禹錫': set(['劉二十八'])
 32 |     }
 33 | 
 34 | def get_alter_names(qts_file, cbdb_file, save_dir):
 35 |   alter_names_file = os.path.join(save_dir, "alternames.pkl")
 36 | 
 37 |   if os.path.exists(alter_names_file):
 38 |     print("find dumped alternames file, loading directly.")
 39 |     with open(alter_names_file, 'rb') as f:
 40 |       qts_list, authors_filtered_by_CBDB, alter_names_dict = pickle.load(f)
 41 |   else:
 42 |     print("processing QuanTangShi...")
 43 |     # 读取全唐诗，并存储诗歌内容和作者
 44 |     qts_list, authors_set = read_qts(qts_file)
 45 |     # 删除部分作者
 46 |     authors_set -= mannual_deleted_authors
 47 | 
 48 |     alter_names_dict, authors_filtered_by_CBDB = get_alter_names_from_CBDB(cbdb_file, authors_set,
 49 |                                                                            manual_defuzzy_authors_id)
 50 |     # 刪除不想要的別稱
 51 |     for k, v in mannual_deleted_alter_names.items():
 52 |       alter_names_dict[k] -= v
 53 |     # 補充CBDB中缺少的別稱
 54 |     for k, v in mannual_added_alter_names.items():
 55 |       alter_names_dict[k] |= v
 56 | 
 57 |     # 存储计算结果
 58 |     with open(alter_names_file, 'wb') as f:
 59 |       pickle.dump([qts_list, authors_filtered_by_CBDB, alter_names_dict], f)
 60 | 
 61 |   return qts_list, authors_filtered_by_CBDB, alter_names_dict
 62 | 
 63 | 
 64 | def get_refer_relations(qts_list, authors_filtered_by_CBDB, alter_names_dict, save_dir):
 65 |   reference_relations_file = os.path.join(save_dir, 'reference_relations.pkl')
 66 | 
 67 |   if os.path.exists(reference_relations_file):
 68 |     print("find dumped reference relations file, skip calculating.")
 69 |     return
 70 |   else:
 71 |     print("calculating reference relations...")
 72 |     reference_relations_counter = Counter()
 73 |     reference_relations_text = defaultdict(list)
 74 |     # 逐个作者搜寻
 75 |     for name in authors_filtered_by_CBDB:
 76 |       # 逐首诗搜寻
 77 |       for author, title, text in qts_list:
 78 |         # 如果不在CBDB过滤过的set中，直接跳过
 79 |         if author not in authors_filtered_by_CBDB:
 80 |           continue
 81 | 
 82 |         poem = title + ' ' + text
 83 |         # 查找本名，标题加正文中只要出现一次名字就可以
 84 |         if poem.find(name) != -1:
 85 |           reference_relations_counter[(author, name)] += 1
 86 |           reference_relations_text[(author, name)].append(title)
 87 |           continue
 88 |         # 查找别名
 89 |         alt_names = alter_names_dict[name]
 90 |         for alt_name in alt_names:
 91 |           if poem.find(alt_name) != -1:
 92 |             reference_relations_counter[(author, name)] += 1
 93 |             reference_relations_text[(author, name)].append(title)
 94 |             break
 95 |     # 存储计算结果
 96 |     with open(reference_relations_file, 'wb') as f:
 97 |       pickle.dump([reference_relations_counter, reference_relations_text], f)
 98 | 
 99 | 
100 | def main():
101 |   parser = argparse.ArgumentParser()
102 |   parser.add_argument('--qts_path', type=str, default='data/qts_zht.txt',
103 |                       help='file path of Quan Tangshi')
104 |   parser.add_argument('--cbdb_path', type=str, default='data/cbdb_sqlite.db',
105 |                       help='file path of CBDB')
106 |   parser.add_argument('--save_dir', type=str, default='save',
107 |                       help='directory to pickle intermediate data')
108 |   args = parser.parse_args()
109 | 
110 |   # 检查存储目录是否存在
111 |   if not os.path.isdir(args.save_dir):
112 |     os.makedirs(args.save_dir)
113 | 
114 |   qts_list, authors_filtered_by_CBDB, alter_names_dict = get_alter_names(args.qts_path, args.cbdb_path, args.save_dir)
115 |   get_refer_relations(qts_list, authors_filtered_by_CBDB, alter_names_dict, args.save_dir)
116 | 
117 | 
118 | if __name__ == '__main__':
119 |   main()
120 | 
121 | 


--------------------------------------------------------------------------------
/data/early_tang_poets.txt:
--------------------------------------------------------------------------------
 1 | 王績
 2 | 王勃
 3 | 王梵志
 4 | 宋之問
 5 | 杜審言
 6 | 李百藥
 7 | 李嶠
 8 | 陳子昂
 9 | 駱賓王
10 | 李賢
11 | 魏徵
12 | 上官儀
13 | 李世民
14 | 盧照鄰
15 | 蘇味道
16 | 楊炯
17 | 劉希夷
18 | 寒山
19 | 崔液
20 | 韋承慶
21 | 張若虛
22 | 沈佺期
23 | 喬知之
24 | 


--------------------------------------------------------------------------------
/data/high_tang_poets.txt:
--------------------------------------------------------------------------------
 1 | 李治
 2 | 綦毋潛
 3 | 高適
 4 | 崔顥
 5 | 戎昱
 6 | 張說
 7 | 崔國輔
 8 | 錢起
 9 | 蘇頲
10 | 王昌齡
11 | 王之渙
12 | 皇甫冉
13 | 張巡
14 | 崔峒
15 | 岑參
16 | 丘爲
17 | 杜甫
18 | 李嘉祐
19 | 西鄙人
20 | 劉眘虛
21 | 孟浩然
22 | 祖詠
23 | 儲光羲
24 | 劉長卿
25 | 萬楚
26 | 張九齡
27 | 劉灣
28 | 劉方平
29 | 元結
30 | 張謂
31 | 張旭
32 | 薛稷
33 | 李白
34 | 韓翃
35 | 司空曙
36 | 王灣
37 | 常非月
38 | 張繼
39 | 王維
40 | 李隆基
41 | 常建
42 | 李頎
43 | 柳中庸
44 | 賀知章
45 | 邱爲
46 | 嚴武
47 | 嚴識玄
48 | 王翰
49 | 


--------------------------------------------------------------------------------
/data/late_tang_poets.txt:
--------------------------------------------------------------------------------
 1 | 王駕
 2 | 章碣
 3 | 司空圖
 4 | 張蟲賓
 5 | 曹鬆
 6 | 李羣玉
 7 | 錢珝
 8 | 羅隱
 9 | 黃滔
10 | 崔珏
11 | 秦韜玉
12 | 陳玉蘭
13 | 許渾
14 | 聶夷中
15 | 於濆
16 | 馬戴
17 | 鄭遨
18 | 盧汝弼
19 | 來鵠
20 | 韋莊
21 | 李商隱
22 | 溫庭筠
23 | 令狐楚
24 | 司馬劄
25 | 貫休
26 | 黃巢
27 | 張喬
28 | 吳融
29 | 鄭穀
30 | 薛逢
31 | 趙嘏
32 | 崔塗
33 | 劉駕
34 | 金昌緒
35 | 雍陶
36 | 齊己
37 | 崔道融
38 | 李洞
39 | 杜荀鶴
40 | 陳陶
41 | 杜牧
42 | 陸龜蒙
43 | 韓偓
44 | 方幹
45 | 皮日休
46 | 曹鄴
47 | 孟賓於
48 | 唐彥謙
49 | 


--------------------------------------------------------------------------------
/data/middle_tang_poets.txt:
--------------------------------------------------------------------------------
 1 | 李賀
 2 | 白居易
 3 | 姚合
 4 | 於鵠
 5 | 武元衡
 6 | 韋應物
 7 | 賈島
 8 | 趙微明
 9 | 劉皁
10 | 薛濤
11 | 何希堯
12 | 權德輿
13 | 李德裕
14 | 韓氏
15 | 元稹
16 | 郎士元
17 | 張籍
18 | 顧況
19 | 楊凝
20 | 韓愈
21 | 張繼
22 | 張祜
23 | 劉禹錫
24 | 嚴維
25 | 李約
26 | 韓琮
27 | 李益
28 | 施肩吾
29 | 柳宗元
30 | 呂溫
31 | 杜秋娘
32 | 耿湋
33 | 李端
34 | 賈至
35 | 李涉
36 | 朱慶餘
37 | 張潮
38 | 胡令能
39 | 李紳
40 | 鮑溶
41 | 孟郊
42 | 王建
43 | 劉採春
44 | 楊巨源
45 | 李坤
46 | 盧綸
47 | 張仲素
48 | 王涯
49 | 崔護
50 | 劉商
51 | 鄭錫
52 | 戴叔倫
53 | 


--------------------------------------------------------------------------------
/html/early_tang_poets_net.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html style="height: 100%">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <title>ECharts</title>
  6 |     <script type="text/javascript" src="echarts-all-3.js"></script>
  7 |   </head>
  8 |   <body style="height: 100%; margin: 0">
  9 |     <div id="container" style="height: 100%"></div>
 10 |     <script type="text/javascript">
 11 |       var dom = document.getElementById("container");
 12 |       var myChart = echarts.init(dom);
 13 | 
 14 |       var option = {
 15 |         title: {
 16 |           text: '',
 17 |           top: '80%',
 18 |           left: '50%'
 19 |         },
 20 |         tooltip: {},
 21 |         animation: false,
 22 |         series : [
 23 |         {
 24 |           name: '唐朝诗人社交网络',
 25 |           type: 'graph',
 26 |           layout: 'force',
 27 |           draggable: true,
 28 | 
 29 |           symbolSize: 28,
 30 |           edgeSymbol:['circle', 'arrow'],
 31 |           edgeSymbolSize:[4, 6],
 32 |           lineStyle:{
 33 |             normal:{
 34 |               curveness: 0.1,
 35 |               color: 'rgb(50, 50, 50)'
 36 |             }
 37 |           },
 38 | 
 39 | data:[
 40 | {name: '宋之問'},
 41 | {name: '陳子昂'},
 42 | {name: '喬知之'},
 43 | {name: '李世民'},
 44 | {name: '李嶠'},
 45 | {name: '王勃'},
 46 | {name: '駱賓王'},
 47 | {name: '魏徵'},
 48 | {name: '李賢'},
 49 | {name: '蘇味道'},
 50 | {name: '韋承慶'},
 51 | {name: '沈佺期'},
 52 | {name: '盧照鄰'},
 53 | {name: '杜審言'},
 54 | ],
 55 | links: [
 56 | {source: '駱賓王', target: '李嶠',
 57 |   lineStyle:{normal:{width: 0.500000}}},
 58 |   {source: '宋之問', target: '陳子昂',
 59 |   lineStyle:{normal:{width: 0.500000}}},
 60 |   {source: '杜審言', target: '韋承慶',
 61 |   lineStyle:{normal:{width: 0.500000}}},
 62 |   {source: '李世民', target: '魏徵',
 63 |   lineStyle:{normal:{width: 0.897434}}},
 64 |   {source: '宋之問', target: '李世民',
 65 |   lineStyle:{normal:{width: 0.500000}}},
 66 |   {source: '李嶠', target: '宋之問',
 67 |   lineStyle:{normal:{width: 1.202395}}},
 68 |   {source: '駱賓王', target: '宋之問',
 69 |   lineStyle:{normal:{width: 1.202395}}},
 70 |   {source: '陳子昂', target: '喬知之',
 71 |   lineStyle:{normal:{width: 0.897434}}},
 72 |   {source: '駱賓王', target: '盧照鄰',
 73 |   lineStyle:{normal:{width: 0.500000}}},
 74 |   {source: '李嶠', target: '杜審言',
 75 |   lineStyle:{normal:{width: 0.500000}}},
 76 |   {source: '杜審言', target: '蘇味道',
 77 |   lineStyle:{normal:{width: 0.500000}}},
 78 |   {source: '沈佺期', target: '李賢',
 79 |   lineStyle:{normal:{width: 0.500000}}},
 80 |   {source: '駱賓王', target: '沈佺期',
 81 |   lineStyle:{normal:{width: 0.500000}}},
 82 |   {source: '王勃', target: '駱賓王',
 83 |   lineStyle:{normal:{width: 0.500000}}},
 84 |   {source: '沈佺期', target: '宋之問',
 85 |   lineStyle:{normal:{width: 2.722778}}},
 86 |   {source: '沈佺期', target: '喬知之',
 87 |   lineStyle:{normal:{width: 0.500000}}},
 88 |   {source: '沈佺期', target: '陳子昂',
 89 |   lineStyle:{normal:{width: 0.500000}}},
 90 |   {source: '喬知之', target: '陳子昂',
 91 |   lineStyle:{normal:{width: 0.500000}}},
 92 |   {source: '宋之問', target: '杜審言',
 93 |   lineStyle:{normal:{width: 0.897434}}},
 94 |   {source: '宋之問', target: '沈佺期',
 95 |   lineStyle:{normal:{width: 3.000000}}},
 96 |   {source: '李嶠', target: '駱賓王',
 97 |   lineStyle:{normal:{width: 0.500000}}},
 98 |   ],
 99 |           roam: true,
100 |           label: {
101 |             normal: {
102 |               show: true,
103 |               position: 'inside',
104 |               formatter: '{b}',
105 |               textStyle:{
106 |                 fontSize:10
107 |               }
108 |             }
109 |           },
110 |           force: {
111 |             repulsion: 100
112 |           }
113 |         }
114 |         ]
115 |       };
116 | 
117 |       myChart.setOption(option);
118 |     </script>
119 |   </body>
120 | </html>
121 | 


--------------------------------------------------------------------------------
/html/full_tang_poets_net.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html style="height: 100%">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <title>ECharts</title>
  6 |     <script type="text/javascript" src="echarts-all-3.js"></script>
  7 |   </head>
  8 |   <body style="height: 100%; margin: 0">
  9 |     <div id="container" style="height: 100%"></div>
 10 |     <script type="text/javascript">
 11 |       var dom = document.getElementById("container");
 12 |       var myChart = echarts.init(dom);
 13 | 
 14 |       var option = {
 15 |         title: {
 16 |           text: '',
 17 |           top: '80%',
 18 |           left: '50%'
 19 |         },
 20 |         tooltip: {},
 21 |         animation: false,
 22 |         series : [
 23 |         {
 24 |           name: '唐朝诗人社交网络',
 25 |           type: 'graph',
 26 |           layout: 'force',
 27 |           draggable: true,
 28 | 
 29 |           symbolSize: 28,
 30 |           edgeSymbol:['circle', 'arrow'],
 31 |           edgeSymbolSize:[4, 6],
 32 |           lineStyle:{
 33 |             normal:{
 34 |               curveness: 0.1,
 35 |               color: 'rgb(50, 50, 50)'
 36 |             }
 37 |           },
 38 | 
 39 | data:[
 40 | {name: '姚合'},
 41 | {name: '楊玉環'},
 42 | {name: '陸羽'},
 43 | {name: '錢起'},
 44 | {name: '裴度'},
 45 | {name: '李觀'},
 46 | {name: '孟郊'},
 47 | {name: '李建'},
 48 | {name: '李夷簡'},
 49 | {name: '許棠'},
 50 | {name: '李泌'},
 51 | {name: '白居易'},
 52 | {name: '韋渠牟'},
 53 | {name: '靈澈'},
 54 | {name: '王建'},
 55 | {name: '秦系'},
 56 | {name: '魚玄機'},
 57 | {name: '許渾'},
 58 | {name: '王質'},
 59 | {name: '皇甫冉'},
 60 | {name: '王播'},
 61 | {name: '賈島'},
 62 | {name: '李諒'},
 63 | {name: '柳宗元'},
 64 | {name: '宋之問'},
 65 | {name: '鄭谷'},
 66 | {name: '崔備'},
 67 | {name: '牛僧孺'},
 68 | {name: '李紳'},
 69 | {name: '馬總'},
 70 | {name: '楊虞卿'},
 71 | {name: '楊巨源'},
 72 | {name: '鄭虔'},
 73 | {name: '李邕'},
 74 | {name: '裴迪'},
 75 | {name: '楊汝士'},
 76 | {name: '郎士元'},
 77 | {name: '王喬'},
 78 | {name: '岑參'},
 79 | {name: '韓愈'},
 80 | {name: '李逢吉'},
 81 | {name: '元稹'},
 82 | {name: '王仲舒'},
 83 | {name: '吉中孚'},
 84 | {name: '劉方平'},
 85 | {name: '劉長卿'},
 86 | {name: '李端'},
 87 | {name: '竇鞏'},
 88 | {name: '李世民'},
 89 | {name: '王勃'},
 90 | {name: '杜甫'},
 91 | {name: '儲光羲'},
 92 | {name: '崔玄亮'},
 93 | {name: '皮日休'},
 94 | {name: '劉禹錫'},
 95 | {name: '李渤'},
 96 | {name: '張籍'},
 97 | {name: '沈佺期'},
 98 | {name: '高適'},
 99 | {name: '張署'},
100 | {name: '盧綸'},
101 | {name: '鄭澣'},
102 | {name: '崔邠'},
103 | {name: '李澣'},
104 | {name: '張賁'},
105 | {name: '孟浩然'},
106 | {name: '王起'},
107 | {name: '鄭絪'},
108 | {name: '楊嗣復'},
109 | {name: '陸龜蒙'},
110 | {name: '杜牧'},
111 | {name: '李隆基'},
112 | {name: '李白'},
113 | {name: '皎然'},
114 | {name: '張喬'},
115 | {name: '嚴維'},
116 | {name: '張祜'},
117 | {name: '李虞仲'},
118 | {name: '韋丹'},
119 | {name: '王維'},
120 | ],
121 | links: [
122 | {source: '陸龜蒙', target: '皮日休',
123 |   lineStyle:{normal:{width: 3.000000}}},
124 |   {source: '白居易', target: '元稹',
125 |   lineStyle:{normal:{width: 2.750669}}},
126 |   {source: '劉禹錫', target: '白居易',
127 |   lineStyle:{normal:{width: 2.393091}}},
128 |   {source: '皮日休', target: '陸龜蒙',
129 |   lineStyle:{normal:{width: 2.336119}}},
130 |   {source: '白居易', target: '劉禹錫',
131 |   lineStyle:{normal:{width: 2.027034}}},
132 |   {source: '元稹', target: '白居易',
133 |   lineStyle:{normal:{width: 2.004710}}},
134 |   {source: '白居易', target: '崔玄亮',
135 |   lineStyle:{normal:{width: 1.016893}}},
136 |   {source: '白居易', target: '李逢吉',
137 |   lineStyle:{normal:{width: 0.994182}}},
138 |   {source: '皇甫冉', target: '劉長卿',
139 |   lineStyle:{normal:{width: 0.947075}}},
140 |   {source: '白居易', target: '牛僧孺',
141 |   lineStyle:{normal:{width: 0.922591}}},
142 |   {source: '白居易', target: '李渤',
143 |   lineStyle:{normal:{width: 0.922591}}},
144 |   {source: '白居易', target: '李紳',
145 |   lineStyle:{normal:{width: 0.897416}}},
146 |   {source: '白居易', target: '李建',
147 |   lineStyle:{normal:{width: 0.897416}}},
148 |   {source: '白居易', target: '楊汝士',
149 |   lineStyle:{normal:{width: 0.871490}}},
150 |   {source: '韓愈', target: '張籍',
151 |   lineStyle:{normal:{width: 0.871490}}},
152 |   {source: '劉禹錫', target: '裴度',
153 |   lineStyle:{normal:{width: 0.844740}}},
154 |   {source: '白居易', target: '裴度',
155 |   lineStyle:{normal:{width: 0.817082}}},
156 |   {source: '白居易', target: '張籍',
157 |   lineStyle:{normal:{width: 0.817082}}},
158 |   {source: '宋之問', target: '沈佺期',
159 |   lineStyle:{normal:{width: 0.788417}}},
160 |   {source: '姚合', target: '賈島',
161 |   lineStyle:{normal:{width: 0.758628}}},
162 |   {source: '杜甫', target: '李白',
163 |   lineStyle:{normal:{width: 0.758628}}},
164 |   {source: '白居易', target: '李夷簡',
165 |   lineStyle:{normal:{width: 0.758628}}},
166 |   {source: '韓愈', target: '孟郊',
167 |   lineStyle:{normal:{width: 0.758628}}},
168 |   {source: '李端', target: '鄭絪',
169 |   lineStyle:{normal:{width: 0.727569}}},
170 |   {source: '劉禹錫', target: '元稹',
171 |   lineStyle:{normal:{width: 0.727569}}},
172 |   {source: '李端', target: '韋丹',
173 |   lineStyle:{normal:{width: 0.727569}}},
174 |   {source: '沈佺期', target: '宋之問',
175 |   lineStyle:{normal:{width: 0.727569}}},
176 |   {source: '杜甫', target: '鄭虔',
177 |   lineStyle:{normal:{width: 0.727569}}},
178 |   {source: '元稹', target: '李諒',
179 |   lineStyle:{normal:{width: 0.727569}}},
180 |   {source: '白居易', target: '王仲舒',
181 |   lineStyle:{normal:{width: 0.695063}}},
182 |   {source: '劉長卿', target: '皇甫冉',
183 |   lineStyle:{normal:{width: 0.695063}}},
184 |   {source: '白居易', target: '李諒',
185 |   lineStyle:{normal:{width: 0.695063}}},
186 |   {source: '杜甫', target: '高適',
187 |   lineStyle:{normal:{width: 0.695063}}},
188 |   {source: '孟郊', target: '韓愈',
189 |   lineStyle:{normal:{width: 0.660886}}},
190 |   {source: '王維', target: '裴迪',
191 |   lineStyle:{normal:{width: 0.660886}}},
192 |   {source: '劉禹錫', target: '楊虞卿',
193 |   lineStyle:{normal:{width: 0.660886}}},
194 |   {source: '張賁', target: '皮日休',
195 |   lineStyle:{normal:{width: 0.660886}}},
196 |   {source: '皇甫冉', target: '劉方平',
197 |   lineStyle:{normal:{width: 0.624753}}},
198 |   {source: '韓愈', target: '張署',
199 |   lineStyle:{normal:{width: 0.624753}}},
200 |   {source: '杜牧', target: '許渾',
201 |   lineStyle:{normal:{width: 0.624753}}},
202 |   {source: '元稹', target: '李隆基',
203 |   lineStyle:{normal:{width: 0.624753}}},
204 |   {source: '皎然', target: '李泌',
205 |   lineStyle:{normal:{width: 0.624753}}},
206 |   {source: '李端', target: '盧綸',
207 |   lineStyle:{normal:{width: 0.624753}}},
208 |   {source: '盧綸', target: '李端',
209 |   lineStyle:{normal:{width: 0.586281}}},
210 |   {source: '元稹', target: '李渤',
211 |   lineStyle:{normal:{width: 0.586281}}},
212 |   {source: '杜甫', target: '王仲舒',
213 |   lineStyle:{normal:{width: 0.586281}}},
214 |   {source: '杜甫', target: '鄭澣',
215 |   lineStyle:{normal:{width: 0.586281}}},
216 |   {source: '姚合', target: '李白',
217 |   lineStyle:{normal:{width: 0.586281}}},
218 |   {source: '張籍', target: '王建',
219 |   lineStyle:{normal:{width: 0.586281}}},
220 |   {source: '元稹', target: '李建',
221 |   lineStyle:{normal:{width: 0.586281}}},
222 |   {source: '白居易', target: '楊虞卿',
223 |   lineStyle:{normal:{width: 0.586281}}},
224 |   {source: '柳宗元', target: '劉禹錫',
225 |   lineStyle:{normal:{width: 0.544947}}},
226 |   {source: '白居易', target: '楊嗣復',
227 |   lineStyle:{normal:{width: 0.544947}}},
228 |   {source: '杜甫', target: '李渤',
229 |   lineStyle:{normal:{width: 0.544947}}},
230 |   {source: '張祜', target: '楊玉環',
231 |   lineStyle:{normal:{width: 0.544947}}},
232 |   {source: '皎然', target: '陸羽',
233 |   lineStyle:{normal:{width: 0.544947}}},
234 |   {source: '白居易', target: '崔邠',
235 |   lineStyle:{normal:{width: 0.544947}}},
236 |   {source: '李白', target: '李虞仲',
237 |   lineStyle:{normal:{width: 0.544947}}},
238 |   {source: '元稹', target: '竇鞏',
239 |   lineStyle:{normal:{width: 0.544947}}},
240 |   {source: '王建', target: '張籍',
241 |   lineStyle:{normal:{width: 0.544947}}},
242 |   {source: '劉長卿', target: '秦系',
243 |   lineStyle:{normal:{width: 0.544947}}},
244 |   {source: '杜甫', target: '李世民',
245 |   lineStyle:{normal:{width: 0.544947}}},
246 |   {source: '白居易', target: '李澣',
247 |   lineStyle:{normal:{width: 0.544947}}},
248 |   {source: '鄭谷', target: '杜甫',
249 |   lineStyle:{normal:{width: 0.544947}}},
250 |   {source: '賈島', target: '姚合',
251 |   lineStyle:{normal:{width: 0.544947}}},
252 |   {source: '錢起', target: '郎士元',
253 |   lineStyle:{normal:{width: 0.544947}}},
254 |   {source: '杜甫', target: '王喬',
255 |   lineStyle:{normal:{width: 0.500000}}},
256 |   {source: '嚴維', target: '劉長卿',
257 |   lineStyle:{normal:{width: 0.500000}}},
258 |   {source: '白居易', target: '王質',
259 |   lineStyle:{normal:{width: 0.500000}}},
260 |   {source: '元稹', target: '楊巨源',
261 |   lineStyle:{normal:{width: 0.500000}}},
262 |   {source: '李白', target: '孟浩然',
263 |   lineStyle:{normal:{width: 0.500000}}},
264 |   {source: '皇甫冉', target: '郎士元',
265 |   lineStyle:{normal:{width: 0.500000}}},
266 |   {source: '白居易', target: '王起',
267 |   lineStyle:{normal:{width: 0.500000}}},
268 |   {source: '皎然', target: '韋渠牟',
269 |   lineStyle:{normal:{width: 0.500000}}},
270 |   {source: '杜甫', target: '岑參',
271 |   lineStyle:{normal:{width: 0.500000}}},
272 |   {source: '白居易', target: '王播',
273 |   lineStyle:{normal:{width: 0.500000}}},
274 |   {source: '郎士元', target: '皇甫冉',
275 |   lineStyle:{normal:{width: 0.500000}}},
276 |   {source: '韓愈', target: '馬總',
277 |   lineStyle:{normal:{width: 0.500000}}},
278 |   {source: '張籍', target: '賈島',
279 |   lineStyle:{normal:{width: 0.500000}}},
280 |   {source: '錢起', target: '王維',
281 |   lineStyle:{normal:{width: 0.500000}}},
282 |   {source: '孟郊', target: '李觀',
283 |   lineStyle:{normal:{width: 0.500000}}},
284 |   {source: '韓愈', target: '李逢吉',
285 |   lineStyle:{normal:{width: 0.500000}}},
286 |   {source: '李端', target: '吉中孚',
287 |   lineStyle:{normal:{width: 0.500000}}},
288 |   {source: '李白', target: '李逢吉',
289 |   lineStyle:{normal:{width: 0.500000}}},
290 |   {source: '張喬', target: '許棠',
291 |   lineStyle:{normal:{width: 0.500000}}},
292 |   {source: '元稹', target: '李夷簡',
293 |   lineStyle:{normal:{width: 0.500000}}},
294 |   {source: '白居易', target: '李邕',
295 |   lineStyle:{normal:{width: 0.500000}}},
296 |   {source: '杜牧', target: '王仲舒',
297 |   lineStyle:{normal:{width: 0.500000}}},
298 |   {source: '杜牧', target: '張祜',
299 |   lineStyle:{normal:{width: 0.500000}}},
300 |   {source: '劉禹錫', target: '張籍',
301 |   lineStyle:{normal:{width: 0.500000}}},
302 |   {source: '鄭谷', target: '李白',
303 |   lineStyle:{normal:{width: 0.500000}}},
304 |   {source: '魚玄機', target: '王勃',
305 |   lineStyle:{normal:{width: 0.500000}}},
306 |   {source: '白居易', target: '崔備',
307 |   lineStyle:{normal:{width: 0.500000}}},
308 |   {source: '儲光羲', target: '張籍',
309 |   lineStyle:{normal:{width: 0.500000}}},
310 |   {source: '皎然', target: '靈澈',
311 |   lineStyle:{normal:{width: 0.500000}}},
312 |   {source: '劉長卿', target: '嚴維',
313 |   lineStyle:{normal:{width: 0.500000}}},
314 |   ],
315 |           roam: true,
316 |           label: {
317 |             normal: {
318 |               show: true,
319 |               position: 'inside',
320 |               formatter: '{b}',
321 |               textStyle:{
322 |                 fontSize:10
323 |               }
324 |             }
325 |           },
326 |           force: {
327 |             repulsion: 100
328 |           }
329 |         }
330 |         ]
331 |       };
332 | 
333 |       myChart.setOption(option);
334 |     </script>
335 |   </body>
336 | </html>
337 | 


--------------------------------------------------------------------------------
/html/high_tang_poets_net.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html style="height: 100%">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <title>ECharts</title>
  6 |     <script type="text/javascript" src="echarts-all-3.js"></script>
  7 |   </head>
  8 |   <body style="height: 100%; margin: 0">
  9 |     <div id="container" style="height: 100%"></div>
 10 |     <script type="text/javascript">
 11 |       var dom = document.getElementById("container");
 12 |       var myChart = echarts.init(dom);
 13 | 
 14 |       var option = {
 15 |         title: {
 16 |           text: '',
 17 |           top: '80%',
 18 |           left: '50%'
 19 |         },
 20 |         tooltip: {},
 21 |         animation: false,
 22 |         series : [
 23 |         {
 24 |           name: '唐朝诗人社交网络',
 25 |           type: 'graph',
 26 |           layout: 'force',
 27 |           draggable: true,
 28 | 
 29 |           symbolSize: 28,
 30 |           edgeSymbol:['circle', 'arrow'],
 31 |           edgeSymbolSize:[4, 6],
 32 |           lineStyle:{
 33 |             normal:{
 34 |               curveness: 0.1,
 35 |               color: 'rgb(50, 50, 50)'
 36 |             }
 37 |           },
 38 | 
 39 | data:[
 40 | {name: '錢起'},
 41 | {name: '崔國輔'},
 42 | {name: '張旭'},
 43 | {name: '賀知章'},
 44 | {name: '蘇頲'},
 45 | {name: '韓翃'},
 46 | {name: '皇甫冉'},
 47 | {name: '李嘉祐'},
 48 | {name: '岑參'},
 49 | {name: '嚴武'},
 50 | {name: '薛稷'},
 51 | {name: '劉方平'},
 52 | {name: '劉長卿'},
 53 | {name: '儲光羲'},
 54 | {name: '張說'},
 55 | {name: '杜甫'},
 56 | {name: '高適'},
 57 | {name: '張謂'},
 58 | {name: '孟浩然'},
 59 | {name: '王昌齡'},
 60 | {name: '李隆基'},
 61 | {name: '李白'},
 62 | {name: '王維'},
 63 | {name: '張繼'},
 64 | ],
 65 | links: [
 66 | {source: '李白', target: '王昌齡',
 67 |   lineStyle:{normal:{width: 1.044859}}},
 68 |   {source: '李白', target: '孟浩然',
 69 |   lineStyle:{normal:{width: 1.419995}}},
 70 |   {source: '皇甫冉', target: '劉方平',
 71 |   lineStyle:{normal:{width: 1.860883}}},
 72 |   {source: '儲光羲', target: '王維',
 73 |   lineStyle:{normal:{width: 1.244292}}},
 74 |   {source: '杜甫', target: '岑參',
 75 |   lineStyle:{normal:{width: 1.419995}}},
 76 |   {source: '岑參', target: '王昌齡',
 77 |   lineStyle:{normal:{width: 0.808296}}},
 78 |   {source: '李白', target: '賀知章',
 79 |   lineStyle:{normal:{width: 0.808296}}},
 80 |   {source: '蘇頲', target: '張說',
 81 |   lineStyle:{normal:{width: 0.808296}}},
 82 |   {source: '孟浩然', target: '崔國輔',
 83 |   lineStyle:{normal:{width: 1.044859}}},
 84 |   {source: '錢起', target: '王維',
 85 |   lineStyle:{normal:{width: 1.419995}}},
 86 |   {source: '李隆基', target: '張說',
 87 |   lineStyle:{normal:{width: 1.044859}}},
 88 |   {source: '張謂', target: '劉長卿',
 89 |   lineStyle:{normal:{width: 0.808296}}},
 90 |   {source: '張繼', target: '皇甫冉',
 91 |   lineStyle:{normal:{width: 0.808296}}},
 92 |   {source: '高適', target: '杜甫',
 93 |   lineStyle:{normal:{width: 0.808296}}},
 94 |   {source: '李白', target: '杜甫',
 95 |   lineStyle:{normal:{width: 1.044859}}},
 96 |   {source: '蘇頲', target: '李白',
 97 |   lineStyle:{normal:{width: 0.808296}}},
 98 |   {source: '王維', target: '王昌齡',
 99 |   lineStyle:{normal:{width: 0.808296}}},
100 |   {source: '李嘉祐', target: '皇甫冉',
101 |   lineStyle:{normal:{width: 1.044859}}},
102 |   {source: '杜甫', target: '李隆基',
103 |   lineStyle:{normal:{width: 1.044859}}},
104 |   {source: '嚴武', target: '杜甫',
105 |   lineStyle:{normal:{width: 0.808296}}},
106 |   {source: '韓翃', target: '張繼',
107 |   lineStyle:{normal:{width: 1.244292}}},
108 |   {source: '杜甫', target: '李白',
109 |   lineStyle:{normal:{width: 2.334010}}},
110 |   {source: '賀知章', target: '張說',
111 |   lineStyle:{normal:{width: 0.808296}}},
112 |   {source: '劉長卿', target: '皇甫冉',
113 |   lineStyle:{normal:{width: 2.109365}}},
114 |   {source: '王昌齡', target: '王維',
115 |   lineStyle:{normal:{width: 0.808296}}},
116 |   {source: '皇甫冉', target: '張繼',
117 |   lineStyle:{normal:{width: 0.808296}}},
118 |   {source: '孟浩然', target: '王昌齡',
119 |   lineStyle:{normal:{width: 1.244292}}},
120 |   {source: '高適', target: '王昌齡',
121 |   lineStyle:{normal:{width: 0.808296}}},
122 |   {source: '張繼', target: '韓翃',
123 |   lineStyle:{normal:{width: 1.244292}}},
124 |   {source: '杜甫', target: '嚴武',
125 |   lineStyle:{normal:{width: 0.808296}}},
126 |   {source: '王維', target: '錢起',
127 |   lineStyle:{normal:{width: 0.808296}}},
128 |   {source: '杜甫', target: '孟浩然',
129 |   lineStyle:{normal:{width: 0.808296}}},
130 |   {source: '杜甫', target: '張旭',
131 |   lineStyle:{normal:{width: 1.044859}}},
132 |   {source: '錢起', target: '杜甫',
133 |   lineStyle:{normal:{width: 0.808296}}},
134 |   {source: '劉長卿', target: '李嘉祐',
135 |   lineStyle:{normal:{width: 1.244292}}},
136 |   {source: '杜甫', target: '高適',
137 |   lineStyle:{normal:{width: 2.109365}}},
138 |   {source: '王昌齡', target: '高適',
139 |   lineStyle:{normal:{width: 0.808296}}},
140 |   {source: '李白', target: '張旭',
141 |   lineStyle:{normal:{width: 1.044859}}},
142 |   {source: '杜甫', target: '薛稷',
143 |   lineStyle:{normal:{width: 0.808296}}},
144 |   {source: '皇甫冉', target: '劉長卿',
145 |   lineStyle:{normal:{width: 3.000000}}},
146 |   {source: '王維', target: '孟浩然',
147 |   lineStyle:{normal:{width: 0.808296}}},
148 |   {source: '李嘉祐', target: '劉長卿',
149 |   lineStyle:{normal:{width: 1.244292}}},
150 |   {source: '岑參', target: '李嘉祐',
151 |   lineStyle:{normal:{width: 0.808296}}},
152 |   {source: '劉長卿', target: '張謂',
153 |   lineStyle:{normal:{width: 0.808296}}},
154 |   ],
155 |           roam: true,
156 |           label: {
157 |             normal: {
158 |               show: true,
159 |               position: 'inside',
160 |               formatter: '{b}',
161 |               textStyle:{
162 |                 fontSize:10
163 |               }
164 |             }
165 |           },
166 |           force: {
167 |             repulsion: 100
168 |           }
169 |         }
170 |         ]
171 |       };
172 | 
173 |       myChart.setOption(option);
174 |     </script>
175 |   </body>
176 | </html>
177 | 


--------------------------------------------------------------------------------
/html/html_head.txt:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html style="height: 100%">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>ECharts</title>
 6 |     <script type="text/javascript" src="echarts-all-3.js"></script>
 7 |   </head>
 8 |   <body style="height: 100%; margin: 0">
 9 |     <div id="container" style="height: 100%"></div>
10 |     <script type="text/javascript">
11 |       var dom = document.getElementById("container");
12 |       var myChart = echarts.init(dom);
13 | 
14 |       var option = {
15 |         title: {
16 |           text: '',
17 |           top: '80%',
18 |           left: '50%'
19 |         },
20 |         tooltip: {},
21 |         animation: false,
22 |         series : [
23 |         {
24 |           name: '唐朝诗人社交网络',
25 |           type: 'graph',
26 |           layout: 'force',
27 |           draggable: true,
28 | 
29 |           symbolSize: 28,
30 |           edgeSymbol:['circle', 'arrow'],
31 |           edgeSymbolSize:[4, 6],
32 |           lineStyle:{
33 |             normal:{
34 |               curveness: 0.1,
35 |               color: 'rgb(50, 50, 50)'
36 |             }
37 |           },
38 | 
39 | 


--------------------------------------------------------------------------------
/html/html_tail.txt:
--------------------------------------------------------------------------------
 1 |           roam: true,
 2 |           label: {
 3 |             normal: {
 4 |               show: true,
 5 |               position: 'inside',
 6 |               formatter: '{b}',
 7 |               textStyle:{
 8 |                 fontSize:10
 9 |               }
10 |             }
11 |           },
12 |           force: {
13 |             repulsion: 100
14 |           }
15 |         }
16 |         ]
17 |       };
18 | 
19 |       myChart.setOption(option);
20 |     </script>
21 |   </body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/html/late_tang_poets_net.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html style="height: 100%">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <title>ECharts</title>
  6 |     <script type="text/javascript" src="echarts-all-3.js"></script>
  7 |   </head>
  8 |   <body style="height: 100%; margin: 0">
  9 |     <div id="container" style="height: 100%"></div>
 10 |     <script type="text/javascript">
 11 |       var dom = document.getElementById("container");
 12 |       var myChart = echarts.init(dom);
 13 | 
 14 |       var option = {
 15 |         title: {
 16 |           text: '',
 17 |           top: '80%',
 18 |           left: '50%'
 19 |         },
 20 |         tooltip: {},
 21 |         animation: false,
 22 |         series : [
 23 |         {
 24 |           name: '唐朝诗人社交网络',
 25 |           type: 'graph',
 26 |           layout: 'force',
 27 |           draggable: true,
 28 | 
 29 |           symbolSize: 28,
 30 |           edgeSymbol:['circle', 'arrow'],
 31 |           edgeSymbolSize:[4, 6],
 32 |           lineStyle:{
 33 |             normal:{
 34 |               curveness: 0.1,
 35 |               color: 'rgb(50, 50, 50)'
 36 |             }
 37 |           },
 38 | 
 39 | data:[
 40 | {name: '杜荀鶴'},
 41 | {name: '陸龜蒙'},
 42 | {name: '崔珏'},
 43 | {name: '許渾'},
 44 | {name: '李商隱'},
 45 | {name: '羅隱'},
 46 | {name: '杜牧'},
 47 | {name: '張喬'},
 48 | {name: '吳融'},
 49 | {name: '黃滔'},
 50 | {name: '韓偓'},
 51 | {name: '陳陶'},
 52 | {name: '令狐楚'},
 53 | {name: '韋莊'},
 54 | {name: '皮日休'},
 55 | {name: '司空圖'},
 56 | {name: '崔道融'},
 57 | {name: '劉駕'},
 58 | {name: '貫休'},
 59 | {name: '趙嘏'},
 60 | ],
 61 | links: [
 62 | {source: '劉駕', target: '張喬',
 63 |   lineStyle:{normal:{width: 0.500000}}},
 64 |   {source: '杜牧', target: '趙嘏',
 65 |   lineStyle:{normal:{width: 0.639633}}},
 66 |   {source: '羅隱', target: '陸龜蒙',
 67 |   lineStyle:{normal:{width: 0.500000}}},
 68 |   {source: '李商隱', target: '崔珏',
 69 |   lineStyle:{normal:{width: 0.500000}}},
 70 |   {source: '貫休', target: '韓偓',
 71 |   lineStyle:{normal:{width: 0.500000}}},
 72 |   {source: '羅隱', target: '韋莊',
 73 |   lineStyle:{normal:{width: 0.500000}}},
 74 |   {source: '杜牧', target: '李商隱',
 75 |   lineStyle:{normal:{width: 0.639633}}},
 76 |   {source: '韓偓', target: '許渾',
 77 |   lineStyle:{normal:{width: 0.579008}}},
 78 |   {source: '陸龜蒙', target: '吳融',
 79 |   lineStyle:{normal:{width: 0.500000}}},
 80 |   {source: '黃滔', target: '貫休',
 81 |   lineStyle:{normal:{width: 0.500000}}},
 82 |   {source: '張喬', target: '陳陶',
 83 |   lineStyle:{normal:{width: 0.500000}}},
 84 |   {source: '韋莊', target: '許渾',
 85 |   lineStyle:{normal:{width: 0.500000}}},
 86 |   {source: '貫休', target: '吳融',
 87 |   lineStyle:{normal:{width: 0.579008}}},
 88 |   {source: '許渾', target: '趙嘏',
 89 |   lineStyle:{normal:{width: 0.639633}}},
 90 |   {source: '李商隱', target: '杜牧',
 91 |   lineStyle:{normal:{width: 0.639633}}},
 92 |   {source: '貫休', target: '羅隱',
 93 |   lineStyle:{normal:{width: 0.500000}}},
 94 |   {source: '杜牧', target: '許渾',
 95 |   lineStyle:{normal:{width: 0.848757}}},
 96 |   {source: '趙嘏', target: '杜牧',
 97 |   lineStyle:{normal:{width: 0.639633}}},
 98 |   {source: '趙嘏', target: '許渾',
 99 |   lineStyle:{normal:{width: 0.690742}}},
100 |   {source: '貫休', target: '皮日休',
101 |   lineStyle:{normal:{width: 0.579008}}},
102 |   {source: '韋莊', target: '貫休',
103 |   lineStyle:{normal:{width: 0.500000}}},
104 |   {source: '李商隱', target: '令狐楚',
105 |   lineStyle:{normal:{width: 0.500000}}},
106 |   {source: '皮日休', target: '陸龜蒙',
107 |   lineStyle:{normal:{width: 2.398728}}},
108 |   {source: '韋莊', target: '杜荀鶴',
109 |   lineStyle:{normal:{width: 0.500000}}},
110 |   {source: '羅隱', target: '貫休',
111 |   lineStyle:{normal:{width: 0.500000}}},
112 |   {source: '吳融', target: '韓偓',
113 |   lineStyle:{normal:{width: 0.500000}}},
114 |   {source: '陸龜蒙', target: '李商隱',
115 |   lineStyle:{normal:{width: 0.500000}}},
116 |   {source: '吳融', target: '貫休',
117 |   lineStyle:{normal:{width: 0.639633}}},
118 |   {source: '陸龜蒙', target: '皮日休',
119 |   lineStyle:{normal:{width: 3.000000}}},
120 |   {source: '杜荀鶴', target: '張喬',
121 |   lineStyle:{normal:{width: 0.500000}}},
122 |   {source: '許渾', target: '杜牧',
123 |   lineStyle:{normal:{width: 0.500000}}},
124 |   {source: '崔珏', target: '李商隱',
125 |   lineStyle:{normal:{width: 0.500000}}},
126 |   {source: '杜荀鶴', target: '羅隱',
127 |   lineStyle:{normal:{width: 0.500000}}},
128 |   {source: '司空圖', target: '崔道融',
129 |   lineStyle:{normal:{width: 0.500000}}},
130 |   {source: '李商隱', target: '皮日休',
131 |   lineStyle:{normal:{width: 0.500000}}},
132 |   {source: '杜荀鶴', target: '陳陶',
133 |   lineStyle:{normal:{width: 0.500000}}},
134 |   {source: '貫休', target: '陳陶',
135 |   lineStyle:{normal:{width: 0.579008}}},
136 |   {source: '韓偓', target: '吳融',
137 |   lineStyle:{normal:{width: 0.579008}}},
138 |   ],
139 |           roam: true,
140 |           label: {
141 |             normal: {
142 |               show: true,
143 |               position: 'inside',
144 |               formatter: '{b}',
145 |               textStyle:{
146 |                 fontSize:10
147 |               }
148 |             }
149 |           },
150 |           force: {
151 |             repulsion: 100
152 |           }
153 |         }
154 |         ]
155 |       };
156 | 
157 |       myChart.setOption(option);
158 |     </script>
159 |   </body>
160 | </html>
161 | 


--------------------------------------------------------------------------------
/html/middle_tang_poets_net.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html style="height: 100%">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <title>ECharts</title>
  6 |     <script type="text/javascript" src="echarts-all-3.js"></script>
  7 |   </head>
  8 |   <body style="height: 100%; margin: 0">
  9 |     <div id="container" style="height: 100%"></div>
 10 |     <script type="text/javascript">
 11 |       var dom = document.getElementById("container");
 12 |       var myChart = echarts.init(dom);
 13 | 
 14 |       var option = {
 15 |         title: {
 16 |           text: '',
 17 |           top: '80%',
 18 |           left: '50%'
 19 |         },
 20 |         tooltip: {},
 21 |         animation: false,
 22 |         series : [
 23 |         {
 24 |           name: '唐朝诗人社交网络',
 25 |           type: 'graph',
 26 |           layout: 'force',
 27 |           draggable: true,
 28 | 
 29 |           symbolSize: 28,
 30 |           edgeSymbol:['circle', 'arrow'],
 31 |           edgeSymbolSize:[4, 6],
 32 |           lineStyle:{
 33 |             normal:{
 34 |               curveness: 0.1,
 35 |               color: 'rgb(50, 50, 50)'
 36 |             }
 37 |           },
 38 | 
 39 | data:[
 40 | {name: '姚合'},
 41 | {name: '孟郊'},
 42 | {name: '白居易'},
 43 | {name: '王建'},
 44 | {name: '薛濤'},
 45 | {name: '王涯'},
 46 | {name: '賈島'},
 47 | {name: '柳宗元'},
 48 | {name: '李紳'},
 49 | {name: '呂溫'},
 50 | {name: '楊巨源'},
 51 | {name: '權德輿'},
 52 | {name: '郎士元'},
 53 | {name: '韓愈'},
 54 | {name: '元稹'},
 55 | {name: '鄭錫'},
 56 | {name: '李端'},
 57 | {name: '盧綸'},
 58 | {name: '劉禹錫'},
 59 | {name: '張籍'},
 60 | {name: '李益'},
 61 | {name: '張繼'},
 62 | ],
 63 | links: [
 64 | {source: '孟郊', target: '李益',
 65 |   lineStyle:{normal:{width: 0.653497}}},
 66 |   {source: '柳宗元', target: '劉禹錫',
 67 |   lineStyle:{normal:{width: 0.803931}}},
 68 |   {source: '白居易', target: '劉禹錫',
 69 |   lineStyle:{normal:{width: 2.279531}}},
 70 |   {source: '姚合', target: '賈島',
 71 |   lineStyle:{normal:{width: 1.016676}}},
 72 |   {source: '白居易', target: '李紳',
 73 |   lineStyle:{normal:{width: 1.154857}}},
 74 |   {source: '賈島', target: '李益',
 75 |   lineStyle:{normal:{width: 0.586853}}},
 76 |   {source: '元稹', target: '楊巨源',
 77 |   lineStyle:{normal:{width: 0.759181}}},
 78 |   {source: '王建', target: '李益',
 79 |   lineStyle:{normal:{width: 0.586853}}},
 80 |   {source: '元稹', target: '薛濤',
 81 |   lineStyle:{normal:{width: 0.586853}}},
 82 |   {source: '李益', target: '盧綸',
 83 |   lineStyle:{normal:{width: 0.586853}}},
 84 |   {source: '張籍', target: '賈島',
 85 |   lineStyle:{normal:{width: 0.759181}}},
 86 |   {source: '張籍', target: '白居易',
 87 |   lineStyle:{normal:{width: 0.709681}}},
 88 |   {source: '白居易', target: '韓愈',
 89 |   lineStyle:{normal:{width: 0.653497}}},
 90 |   {source: '白居易', target: '楊巨源',
 91 |   lineStyle:{normal:{width: 0.586853}}},
 92 |   {source: '韓愈', target: '賈島',
 93 |   lineStyle:{normal:{width: 0.586853}}},
 94 |   {source: '孟郊', target: '韓愈',
 95 |   lineStyle:{normal:{width: 0.919363}}},
 96 |   {source: '韓愈', target: '柳宗元',
 97 |   lineStyle:{normal:{width: 0.586853}}},
 98 |   {source: '賈島', target: '韓愈',
 99 |   lineStyle:{normal:{width: 0.709681}}},
100 |   {source: '姚合', target: '張籍',
101 |   lineStyle:{normal:{width: 0.653497}}},
102 |   {source: '盧綸', target: '李端',
103 |   lineStyle:{normal:{width: 0.845084}}},
104 |   {source: '劉禹錫', target: '白居易',
105 |   lineStyle:{normal:{width: 2.643987}}},
106 |   {source: '姚合', target: '劉禹錫',
107 |   lineStyle:{normal:{width: 0.586853}}},
108 |   {source: '姚合', target: '王建',
109 |   lineStyle:{normal:{width: 0.709681}}},
110 |   {source: '王建', target: '張籍',
111 |   lineStyle:{normal:{width: 0.803931}}},
112 |   {source: '王建', target: '楊巨源',
113 |   lineStyle:{normal:{width: 0.586853}}},
114 |   {source: '張籍', target: '韓愈',
115 |   lineStyle:{normal:{width: 0.653497}}},
116 |   {source: '盧綸', target: '郎士元',
117 |   lineStyle:{normal:{width: 0.586853}}},
118 |   {source: '賈島', target: '張籍',
119 |   lineStyle:{normal:{width: 0.709681}}},
120 |   {source: '張籍', target: '孟郊',
121 |   lineStyle:{normal:{width: 0.586853}}},
122 |   {source: '劉禹錫', target: '元稹',
123 |   lineStyle:{normal:{width: 0.985753}}},
124 |   {source: '劉禹錫', target: '張籍',
125 |   lineStyle:{normal:{width: 0.759181}}},
126 |   {source: '賈島', target: '孟郊',
127 |   lineStyle:{normal:{width: 0.653497}}},
128 |   {source: '劉禹錫', target: '李紳',
129 |   lineStyle:{normal:{width: 0.586853}}},
130 |   {source: '李端', target: '郎士元',
131 |   lineStyle:{normal:{width: 0.586853}}},
132 |   {source: '韓愈', target: '李紳',
133 |   lineStyle:{normal:{width: 0.653497}}},
134 |   {source: '劉禹錫', target: '柳宗元',
135 |   lineStyle:{normal:{width: 0.709681}}},
136 |   {source: '權德輿', target: '張籍',
137 |   lineStyle:{normal:{width: 0.709681}}},
138 |   {source: '張繼', target: '郎士元',
139 |   lineStyle:{normal:{width: 0.653497}}},
140 |   {source: '賈島', target: '王建',
141 |   lineStyle:{normal:{width: 0.709681}}},
142 |   {source: '劉禹錫', target: '呂溫',
143 |   lineStyle:{normal:{width: 0.586853}}},
144 |   {source: '李端', target: '鄭錫',
145 |   lineStyle:{normal:{width: 0.653497}}},
146 |   {source: '元稹', target: '白居易',
147 |   lineStyle:{normal:{width: 2.257305}}},
148 |   {source: '元稹', target: '王建',
149 |   lineStyle:{normal:{width: 0.586853}}},
150 |   {source: '李端', target: '盧綸',
151 |   lineStyle:{normal:{width: 0.883387}}},
152 |   {source: '郎士元', target: '張繼',
153 |   lineStyle:{normal:{width: 0.653497}}},
154 |   {source: '韓愈', target: '劉禹錫',
155 |   lineStyle:{normal:{width: 0.586853}}},
156 |   {source: '張籍', target: '王建',
157 |   lineStyle:{normal:{width: 0.845084}}},
158 |   {source: '張籍', target: '姚合',
159 |   lineStyle:{normal:{width: 0.586853}}},
160 |   {source: '孟郊', target: '張籍',
161 |   lineStyle:{normal:{width: 0.653497}}},
162 |   {source: '韓愈', target: '張籍',
163 |   lineStyle:{normal:{width: 1.129044}}},
164 |   {source: '白居易', target: '元稹',
165 |   lineStyle:{normal:{width: 3.000000}}},
166 |   {source: '李紳', target: '白居易',
167 |   lineStyle:{normal:{width: 0.586853}}},
168 |   {source: '韓愈', target: '王涯',
169 |   lineStyle:{normal:{width: 0.653497}}},
170 |   {source: '韓愈', target: '孟郊',
171 |   lineStyle:{normal:{width: 1.016676}}},
172 |   {source: '賈島', target: '姚合',
173 |   lineStyle:{normal:{width: 0.803931}}},
174 |   {source: '孟郊', target: '王涯',
175 |   lineStyle:{normal:{width: 0.586853}}},
176 |   {source: '元稹', target: '劉禹錫',
177 |   lineStyle:{normal:{width: 0.653497}}},
178 |   {source: '盧綸', target: '李益',
179 |   lineStyle:{normal:{width: 0.759181}}},
180 |   {source: '白居易', target: '張籍',
181 |   lineStyle:{normal:{width: 1.074875}}},
182 |   ],
183 |           roam: true,
184 |           label: {
185 |             normal: {
186 |               show: true,
187 |               position: 'inside',
188 |               formatter: '{b}',
189 |               textStyle:{
190 |                 fontSize:10
191 |               }
192 |             }
193 |           },
194 |           force: {
195 |             repulsion: 100
196 |           }
197 |         }
198 |         ]
199 |       };
200 | 
201 |       myChart.setOption(option);
202 |     </script>
203 |   </body>
204 | </html>
205 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | from collections import defaultdict
 3 | 
 4 | # 读取全唐诗
 5 | def read_qts(file_name):
 6 |   qts_list = []
 7 |   authors_set = set()
 8 |   # 逐行读取诗歌
 9 |   with open(file_name, 'r', encoding = 'utf-8') as f:
10 |     for line in f:
11 |       text_segs = line.split()
12 |       title = text_segs[1]
13 |       author = text_segs[2]
14 |       poem = text_segs[-1]
15 | 
16 |       authors_set.add(author)
17 | 
18 |       # 去除非汉字字符
19 |       valid_char_list = [c for c in poem if '\u4e00' <= c <= '\u9fff' or c == '，' or c == '。']
20 |       validated_poem = ''.join(valid_char_list)
21 |       # 按照作者、标题、内容的格式保存
22 |       qts_list.append((author, title, validated_poem))
23 | 
24 |   return qts_list, authors_set
25 | 
26 | 
27 | # 从CBDB中获取诗人们的别名
28 | def get_alter_names_from_CBDB(db_file, authors_set, manual_defuzzy_authors_id):
29 |   tang_begin_year = 618  # 唐朝建立年份
30 |   tang_end_year = 907  # 唐朝灭亡年份
31 | 
32 |   # 手动排查的诗人集合
33 |   mannual_defuzzy_authors = set(manual_defuzzy_authors_id.keys())
34 | 
35 |   authors_not_in_CBDB = set()
36 |   fuzzy_authors = set()
37 |   fuzzy_authors_details = {}
38 |   alter_names_dict = defaultdict(set)
39 | 
40 |   conn = sqlite3.connect(db_file)
41 |   cursor = conn.cursor()
42 |   for author in authors_set:
43 |     # 如果在手动排查集合中，直接使用
44 |     if author in mannual_defuzzy_authors:
45 |       author_id = manual_defuzzy_authors_id[author]
46 |     else: # 否则从CBDB中查询
47 |       # import ipdb; ipdb.set_trace()
48 |       # 某些诗人的名字在全唐诗中和CBDB中不一致，用模糊搜索更好
49 |       # 比如"贯休"在CBDB中的名字为"释贯休"
50 |       author_pattern = '%' + author
51 |       cursor.execute('SELECT c_personid, c_birthyear, c_deathyear FROM BIOG_MAIN WHERE c_name_chn LIKE?',
52 |                      (author_pattern,))
53 |       person_info_list = cursor.fetchall()
54 | 
55 |       # 排除重名现象
56 |       # 具体策略请参考我的微信公众号(mrqianjinsi)文章《计算机告诉你，唐朝诗人之间的关系到底是什么样的？》
57 |       candidate_author_ids = []
58 |       # import ipdb; ipdb.set_trace()
59 |       for person_id, birth_year, death_year in person_info_list:
60 |         if birth_year and death_year:  # 生卒年俱全
61 |           if birth_year < tang_end_year and death_year > tang_begin_year:
62 |             # 一旦找到一个生卒年俱全且和唐朝有交集的，就不看其他的了
63 |             candidate_author_ids = [person_id]
64 |             break
65 |         elif birth_year or death_year:  # 只有生年或者卒年
66 |           year = birth_year if birth_year else death_year
67 |           if year > tang_begin_year and year < tang_end_year:
68 |             candidate_author_ids.append(person_id)
69 | 
70 |       # 候选名单为空或者多于一个人的候选名单都不要
71 |       if not candidate_author_ids:
72 |         authors_not_in_CBDB.add(author)
73 |         # print('can\'t find valid items for %s' % author)
74 |         continue
75 |       elif len(candidate_author_ids) > 1:
76 |         fuzzy_authors.add(author)
77 |         fuzzy_authors_details[author] = candidate_author_ids
78 |         # print('fuzzy authors: %s' % author)
79 |         continue
80 | 
81 |       author_id = candidate_author_ids[0]
82 | 
83 |     # 根据author_id找出诗人别名
84 |     cursor.execute('SELECT c_alt_name_chn FROM ALTNAME_DATA WHERE c_personid=?',
85 |                    (author_id,))
86 |     alt_name_list = cursor.fetchall()
87 |     for alt_name in alt_name_list:
88 |       # 不要只有一个字的别称
89 |       if len(alt_name[0]) > 1:
90 |         alter_names_dict[author].add(alt_name[0])
91 | 
92 |   conn.close()
93 | 
94 |   # 经过CBDB过滤过的诗人，接下来只分析这些人之间的关系
95 |   authors_filtered_by_CBDB = authors_set - authors_not_in_CBDB - fuzzy_authors
96 | 
97 |   return alter_names_dict, authors_filtered_by_CBDB
98 | 


--------------------------------------------------------------------------------
/visualize_poets_network.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import argparse
  3 | import os
  4 | import math
  5 | 
  6 | # 如果需要，可以用opencc实现繁体和简体之间的转换
  7 | # 需要在电脑上安装opencc
  8 | # opencc = 'opencc -i echart_visualize/poets_network_early.html -o echart_visualize/poets_network_early_zhs.html -c zht2zhs.ini'
  9 | 
 10 | 
 11 | # 直接获取排名前visulize_range的引用关系
 12 | def get_concerned_relations_by_range(reference_relations_counter, visulize_range):
 13 |   # 获取引用关系
 14 |   relations = reference_relations_counter.most_common(visulize_range)
 15 |   max_refer_count = relations[0][1]
 16 |   min_refer_count = relations[-1][1]
 17 | 
 18 |   return relations, max_refer_count, min_refer_count
 19 | 
 20 | # 获取指定诗人群体之间的引用关系，适合画出某个群体内部的网络
 21 | def get_concerned_relations_by_authors(reference_relations_counter, authors):
 22 |   # 获取指定作者群体内部的引用关系
 23 |   relations = []
 24 |   max_refer_count = 0
 25 |   min_refer_count = 10000
 26 |   for (refered_by, refered), count in reference_relations_counter.items():
 27 |     # 不统计自引用的count
 28 |     if refered_by == refered:
 29 |       continue
 30 |     if refered_by in authors and refered in authors:
 31 |       if count > max_refer_count:
 32 |         max_refer_count = count
 33 |       if count < min_refer_count:
 34 |         min_refer_count = count
 35 | 
 36 |       relations.append(((refered_by, refered), count))
 37 | 
 38 |   return relations, max_refer_count, min_refer_count
 39 | 
 40 | # 有些时候如果画出所有关系会显得非常拥挤，用count_to_plot_threshold来控制最小显示出来的关系
 41 | # 只有引用数大于等于count_to_plot_threshold的关系才会显示出来
 42 | def generate_html_page(relations, max_refer_count, min_refer_count, saved_html_file, count_to_plot_threshold = 1):
 43 |   html_dir = os.path.dirname(saved_html_file)
 44 |   html_head_path = os.path.join(html_dir, 'html_head.txt')
 45 |   html_tail_path = os.path.join(html_dir, 'html_tail.txt')
 46 | 
 47 |   min_link_width = 0.5
 48 |   max_link_width = 3.0
 49 | 
 50 |   # 因为引用关系的强弱范围很大，对其开方降低变化范围，画图更直观
 51 |   max_refer_count = math.sqrt(max_refer_count)
 52 |   min_refer_count = math.sqrt(min_refer_count)
 53 |   width_slope = (max_link_width - min_link_width) / (max_refer_count - min_refer_count)
 54 |   # 格式化links数据
 55 |   links_text = 'links: [\n'
 56 |   links_item_format = """{source: '%s', target: '%s',
 57 |   lineStyle:{normal:{width: %f}}},
 58 |   """
 59 |   filtered_authors = set()
 60 |   for (refered_by, refered), count in relations:
 61 |     # 跳过自引用，不然有可能画出孤立节点
 62 |     if refered_by == refered:
 63 |       continue
 64 |     # 小于门限跳过
 65 |     if count < count_to_plot_threshold:
 66 |       continue
 67 | 
 68 |     filtered_authors.add(refered_by)
 69 |     filtered_authors.add(refered)
 70 |     count = math.sqrt(count)
 71 |     line_width = min_link_width + width_slope * (count - min_refer_count)
 72 |     links_text += links_item_format % (refered_by, refered, line_width)
 73 | 
 74 |   links_text += '],\n'
 75 | 
 76 |   # 格式化node数据
 77 |   data_text = 'data:[\n'
 78 |   data_item_format = "{name: '%s'},\n"
 79 |   for author in filtered_authors:
 80 |     data_text += data_item_format % author
 81 | 
 82 |   data_text += '],\n'
 83 | 
 84 |   # 读取html的head和tail部分
 85 |   with open(html_head_path, 'r', encoding = 'utf-8') as f:
 86 |     head_text = f.read()
 87 | 
 88 |   with open(html_tail_path, 'r', encoding = 'utf-8') as f:
 89 |     tail_text = f.read()
 90 | 
 91 |   # 合并存储为html
 92 |   with open(saved_html_file, 'w', encoding = 'utf-8') as f:
 93 |     f.write(head_text + data_text + links_text + tail_text)
 94 | 
 95 | 
 96 | def main():
 97 |   parser = argparse.ArgumentParser()
 98 | 
 99 |   parser.add_argument('--relations_path', type=str, default='save/reference_relations.pkl',
100 |                       help='file to load relations data')
101 |   parser.add_argument('--data_dir', type=str, default='data',
102 |                       help='directory to load authors file')
103 |   parser.add_argument('--html_dir', type=str, default='html',
104 |                       help='directory to save html page')
105 | 
106 |   args = parser.parse_args()
107 | 
108 |   with open(args.relations_path, 'rb') as f:
109 |     reference_relations_counter, reference_relations_text = pickle.load(f)
110 | 
111 |   # 生成全唐排名前100的关系图
112 |   relations, max_refer_count, min_refer_count = get_concerned_relations_by_range(reference_relations_counter, 100)
113 |   saved_html = os.path.join(args.html_dir, 'full_tang_poets_net.html')
114 |   generate_html_page(relations, max_refer_count, min_refer_count, saved_html)
115 | 
116 |   # 生成初唐、盛唐、中唐、晚唐四个时期的诗人关系图
117 |   #                      诗人名字文件              社交关系图网页              引用数门限
118 |   files_name_array = [('early_tang_poets.txt', 'early_tang_poets_net.html',  1),
119 |                       ('high_tang_poets.txt',  'high_tang_poets_net.html',   2),
120 |                       ('middle_tang_poets.txt','middle_tang_poets_net.html', 2),
121 |                       ('late_tang_poets.txt',  'late_tang_poets_net.html',   1)]
122 | 
123 |   for authors_file_name, html_file_name, threshold in files_name_array:
124 |     authors_file_path = os.path.join(args.data_dir, authors_file_name)
125 |     with open(authors_file_path, 'r', encoding='utf-8') as f:
126 |       text = f.read()
127 |     authors = set(text.split())
128 | 
129 |     relations, max_refer_count, min_refer_count = get_concerned_relations_by_authors(reference_relations_counter, authors)
130 | 
131 |     saved_html = os.path.join(args.html_dir, html_file_name)
132 |     generate_html_page(relations, max_refer_count, min_refer_count, saved_html, threshold)
133 | 
134 | 
135 | if __name__ == '__main__':
136 |   main()
137 | 


--------------------------------------------------------------------------------
/word_level_analyzer.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter, defaultdict
  2 | import thulac
  3 | import pickle
  4 | import os
  5 | import argparse
  6 | 
  7 | import multiprocessing
  8 | from gensim.models import Word2Vec
  9 | from gensim.models.word2vec import LineSentence
 10 | 
 11 | # 对全唐诗分词
 12 | def cut_qts_to_words(qts_file, saved_words_file):
 13 |   save_dir = os.path.dirname((saved_words_file))
 14 |   dumped_file = os.path.join(save_dir, 'qts_words_stat_result.pkl')
 15 | 
 16 |   if os.path.exists(dumped_file) and os.path.exists(saved_words_file):
 17 |     print('find preprocessed data, loading directly...')
 18 |     with open(dumped_file, 'rb') as f:
 19 |       char_counter, author_counter, vocab, word_counter, genre_counter = pickle.load(f)
 20 |   else:
 21 |     char_counter = Counter()  # 字频统计
 22 |     author_counter = Counter()  # 每个作者的写诗篇数
 23 |     vocab = set()  # 词汇库
 24 |     word_counter = Counter()  # 词频统计
 25 |     genre_counter = defaultdict(Counter)  # 针对每个词性的Counter
 26 | 
 27 |     fid_save = open(saved_words_file, 'w', encoding = 'utf-8')
 28 |     lex_analyzer = thulac.thulac()  # 分词器
 29 |     line_cnt = 0
 30 |     with open(qts_file, 'r', encoding = 'utf-8') as f:
 31 |       for line in f:
 32 |         text_segs = line.split()
 33 |         author = text_segs[2]
 34 |         author_counter[author] += 1
 35 | 
 36 |         poem = text_segs[-1]
 37 |         # 去除非汉字字符
 38 |         valid_char_list = [c for c in poem if '\u4e00' <= c <= '\u9fff' or c == '，' or c == '。']
 39 |         for char in valid_char_list:
 40 |           char_counter[char] += 1
 41 | 
 42 |         regularized_poem = ''.join(valid_char_list)
 43 |         word_genre_pairs = lex_analyzer.cut(regularized_poem)
 44 | 
 45 |         word_list = []
 46 |         for word, genre in word_genre_pairs:
 47 |           word_list.append(word)
 48 |           vocab.add(word)
 49 |           word_counter[word] += 1
 50 |           genre_counter[genre][word] += 1
 51 | 
 52 |         save_line = ' '.join(word_list)
 53 |         fid_save.write(save_line + '\n')
 54 | 
 55 |         if line_cnt % 10 == 0:
 56 |           print('%d poets processed.' % line_cnt)
 57 |         line_cnt += 1
 58 | 
 59 |     fid_save.close()
 60 |     # 存储下来
 61 |     dumped_data = [char_counter, author_counter, vocab, word_counter, genre_counter]
 62 |     with open(dumped_file, 'wb') as f:
 63 |       pickle.dump(dumped_data, f)
 64 | 
 65 |   return char_counter, author_counter, genre_counter
 66 | 
 67 | # 将分词结果转换为向量
 68 | def word2vec(words_file):
 69 |   save_dir = os.path.dirname((words_file))
 70 |   vector_file = os.path.join(save_dir, 'word_vectors.model')
 71 | 
 72 |   if os.path.exists(vector_file):
 73 |     print('find word vector file, loading directly...')
 74 |     model = Word2Vec.load(vector_file)
 75 |   else:
 76 |     print('calculating word vectors...')
 77 |     model = Word2Vec(LineSentence(words_file), size=400, window=3, min_count=10,
 78 |                      workers=multiprocessing.cpu_count())
 79 |     # 将计算结果存储起来，下次就不用重新计算了
 80 |     model.save(vector_file)
 81 | 
 82 |   return model
 83 | 
 84 | def print_stat_results(char_counter, author_counter, genre_counter, vector_model):
 85 |   def print_counter(counter):
 86 |     for k, v in counter:
 87 |       print(k, v)
 88 |   # 诗人写作数量排名
 89 |   print('\n诗人写作数量排名')
 90 |   print_counter(author_counter.most_common(10))
 91 | 
 92 |   # 基于字的分析
 93 |   print('\n\n基于字的分析')
 94 |   # 常用字排名
 95 |   print('\n常用字排名')
 96 |   print_counter(char_counter.most_common(12))
 97 |   # 季节排名
 98 |   print('\n季节排名')
 99 |   for c in ['春', '夏', '秋', '冬']:
100 |     print(c, char_counter[c])
101 |   # 颜色排名
102 |   print('\n颜色排名')
103 |   colors = ['红', '白', '青', '蓝', '绿', '紫', '黑', '黄']
104 |   for c in colors:
105 |     print(c, char_counter[c])
106 |   # 植物排名
107 |   print('\n植物排名')
108 |   plants = ['梅', '兰', '竹', '菊', '松', '柳', '枫', '桃', '梨', '杏']
109 |   for p in plants:
110 |     print(p, char_counter[p])
111 |   # 动物排名
112 |   print('\n动物排名')
113 |   age_animals = ['鼠', '牛', '虎', '兔', '龙', '蛇', '马', '羊', '猴', '鸡', '狗', '猪']
114 |   for a in age_animals:
115 |     print(a, char_counter[a])
116 | 
117 |   # 基于词的分析
118 |   print('\n\n基于词的分析')
119 |   # 地名排名
120 |   print('\n地名词排名')
121 |   print_counter(genre_counter['ns'].most_common(10))
122 |   # 时间排名
123 |   print('\n时间词排名')
124 |   print_counter(genre_counter['t'].most_common(10))
125 |   # 场景排名
126 |   print('\n场景词排名')
127 |   print_counter(genre_counter['s'].most_common(10))
128 | 
129 | 
130 |   # 基于词向量的分析
131 |   print('\n\n基于词向量的分析')
132 |   # print(vector_model['今日'])
133 |   def print_similar_words(word):
134 |     print('\n与"%s"比较意思比较接近的词' % word)
135 |     print_counter(vector_model.most_similar(word))
136 | 
137 |   print_similar_words('天子')
138 |   print_similar_words('寂寞')
139 | 
140 | 
141 | def main():
142 |   parser = argparse.ArgumentParser()
143 |   parser.add_argument('--qts_path', type=str, default='data/qts_zhs.txt',
144 |                       help='file path of Quan Tangshi')
145 |   parser.add_argument('--words_path', type=str, default='save/qts_words_list.txt',
146 |                       help='file path to save Quan Tangshi words data')
147 |   args = parser.parse_args()
148 | 
149 |   # 检查存储目录是否存在
150 |   save_dir = os.path.dirname(args.words_path)
151 |   if not os.path.isdir(save_dir):
152 |     os.makedirs(save_dir)
153 | 
154 |   char_counter, author_counter, genre_counter = cut_qts_to_words(args.qts_path, args.words_path)
155 |   vector_model = word2vec(args.words_path)
156 | 
157 |   print_stat_results(char_counter, author_counter, genre_counter, vector_model)
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     main()
162 | 


--------------------------------------------------------------------------------