├── .gitignore
├── Graph_built.py
├── README.md
├── dic.txt
├── edge.txt
├── graph.png
├── node.txt
├── relationship_find.py
└── test.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/Graph_built.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import networkx as nx
 5 | import matplotlib.pyplot as plt
 6 | from pylab import mpl
 7 | 
 8 | 
 9 | def Graph_show():
10 |     mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默认字体
11 |     mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
12 |     G=nx.Graph()
13 |     # 在NetworkX中，节点可以是任何哈希对象，像一个文本字符串，一幅图像，一个XML对象，甚至是另一个图或任意定制的节点对象
14 |     with open('e:/PY/relationship_find/edge.txt','r') as f:
15 |         for i in f.readlines():
16 |             line=str(i).split()
17 |             if line == []:
18 |                 continue
19 |             if int(line[2])<=50:
20 |                 continue
21 |             G.add_weighted_edges_from([(line[0],line[1],int(line[2]))])
22 |     nx.draw(G,pos=nx.shell_layout(G),node_size=1000,node_color = '#A0CBE2',edge_color='#A0CBE1',with_labels = True,font_size=12)
23 |     plt.show()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/README.md


--------------------------------------------------------------------------------
/dic.txt:
--------------------------------------------------------------------------------
 1 | ﻿沙瑞金 nr
 2 | 侯亮平 nr
 3 | 李达康 nr
 4 | 易学习 nr
 5 | 欧阳菁 nr
 6 | 陆亦可 nr
 7 | 钟小艾 nr
 8 | 陈岩石 nr
 9 | 季昌明 nr
10 | 赵东来 nr
11 | 陈海 nr
12 | 郑西坡 nr
13 | 梁璐 nr
14 | 吴惠芬 nr
15 | 高育良 nr
16 | 祁同伟 nr
17 | 高小琴 nr
18 | 胡玉贵 nr
19 | 赵瑞龙 nr
20 | 赵立春 nr
21 | 刘新建 nr
22 | 丁义珍 nr
23 | 蔡成功 nr
24 | 田国富 nr
25 | 明白 d
26 | 达康 v
27 | 林城 ns
28 | 孤鹰岭 ns
29 | 吕州 ns
30 | 伯仲 d
31 | 银行卡 n
32 | 师母 n
33 | 老总 n
34 | 老同志 n
35 | 赵公子 n
36 | 师生 n 
37 | 阿庆嫂 nz
38 | 陈老 nz
39 | 安置费 n
40 | 阳光 n 
41 | 师傅 n
42 | 宝宝 n
43 | 程度 nr
44 | 孙连城 nr
45 | 肖钢玉 nr
46 | 郑胜利 nr
47 | 王大路 nr
48 | 林华华 nr
49 | 王文革 nr
50 | 吕梁 nr
51 |  


--------------------------------------------------------------------------------
/edge.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/edge.txt


--------------------------------------------------------------------------------
/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/graph.png


--------------------------------------------------------------------------------
/node.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/node.txt


--------------------------------------------------------------------------------
/relationship_find.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # 基于共现矩阵来确定人物关系
 5 | 
 6 | import jieba
 7 | jieba.load_userdict('e:/PY/relationship_find/dic.txt') #导入自己的字典，不然分词效果不好
 8 | import jieba.posseg as pseg
 9 | import Graph_built
10 | 
11 | 
12 | # 将剧本进行分词，并将表示人名的词提出，将其他停用词和标点省略
13 | # 提出人名的同时，同name字典记录下来，作为矩阵的行和列
14 | def cut_word(text):
15 |     words=pseg.cut(text)
16 |     L_name=[]
17 |     for x in words :
18 |         if x.flag!='nr' or len(x.word) < 2:
19 |             continue
20 |         if not Names.get(x.word):
21 |             Names[x.word]=1
22 |         else:
23 |             Names[x.word]=Names[x.word]+1
24 |         L_name.append(x.word)
25 |     return L_name
26 | 
27 | 
28 | # 建立词频字典和每段中的人物列表
29 | def namedict_built():
30 |     global Names
31 |     with open('e:/PY/relationship_find/test.txt','r') as f:
32 |         for l in f.readlines():
33 |             n=cut_word(l)
34 |             if len(n)>=2: # 由于要计算关系，空list和单元素list没有用
35 |                 Lines.append(n)
36 |     Names=dict(sorted(Names.items(),key = lambda x:x[1],reverse = True)[:36])
37 |     # print(Line)
38 | 
39 | 
40 | # 通过遍历Lines来构建贡献矩阵
41 | def relation_built():
42 |     for key in Names:
43 |         relationships[key]={}
44 |     for line in Lines:
45 |         for name1 in line:
46 |             if not Names.get(name1):
47 |                 continue
48 |             for name2 in line:
49 |                 if name1==name2 or (not Names.get(name2)):
50 |                     continue
51 |                 if not relationships[name1].get(name2):		
52 |                     relationships[name1][name2]= 1
53 |                 else:
54 |                     relationships[name1][name2] = relationships[name1][name2]+ 1
55 |     # print(relationships)
56 | 
57 | # 生成有向图的节点文件和边文件
58 | def file_built():
59 |     with open('e:/PY/relationship_find/node.txt','w') as fn:
60 |         # windows的换行是\r\n
61 |         for key in Names:
62 |             fn.write(key+' '+str(Names[key])+'\r\n')
63 |     with open('e:/PY/relationship_find/edge.txt','w') as fe:
64 |         for key in relationships:
65 |             for v in relationships[key]:
66 |                 fe.write(key+' '+v+' '+str(relationships[key][v])+'\r\n')
67 |     
68 | 
69 | Names={} # 统计频次
70 | Lines=[] # 每一行的名字的集合
71 | relationships={} # 二维共现矩阵，用二维字典实现
72 | namedict_built() #Names的元素是元组
73 | relation_built()
74 | file_built()
75 | Graph_built.Graph_show()
76 | 
77 | 
78 | 
79 | 
80 | 
81 |         
82 | 
83 | 


--------------------------------------------------------------------------------
/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/test.txt


--------------------------------------------------------------------------------