├── .gitignore ├── Graph_built.py ├── README.md ├── dic.txt ├── edge.txt ├── graph.png ├── node.txt ├── relationship_find.py └── test.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /Graph_built.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import networkx as nx 5 | import matplotlib.pyplot as plt 6 | from pylab import mpl 7 | 8 | 9 | def Graph_show(): 10 | mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默认字体 11 | mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题 12 | G=nx.Graph() 13 | # 在NetworkX中,节点可以是任何哈希对象,像一个文本字符串,一幅图像,一个XML对象,甚至是另一个图或任意定制的节点对象 14 | with open('e:/PY/relationship_find/edge.txt','r') as f: 15 | for i in f.readlines(): 16 | line=str(i).split() 17 | if line == []: 18 | continue 19 | if int(line[2])<=50: 20 | continue 21 | G.add_weighted_edges_from([(line[0],line[1],int(line[2]))]) 22 | nx.draw(G,pos=nx.shell_layout(G),node_size=1000,node_color = '#A0CBE2',edge_color='#A0CBE1',with_labels = True,font_size=12) 23 | plt.show() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/README.md -------------------------------------------------------------------------------- /dic.txt: -------------------------------------------------------------------------------- 1 | 沙瑞金 nr 2 | 侯亮平 nr 3 | 李达康 nr 4 | 易学习 nr 5 | 欧阳菁 nr 6 | 陆亦可 nr 7 | 钟小艾 nr 8 | 陈岩石 nr 9 | 季昌明 nr 10 | 赵东来 nr 11 | 陈海 nr 12 | 郑西坡 nr 13 | 梁璐 nr 14 | 吴惠芬 nr 15 | 高育良 nr 16 | 祁同伟 nr 17 | 高小琴 nr 18 | 胡玉贵 nr 19 | 赵瑞龙 nr 20 | 赵立春 nr 21 | 刘新建 nr 22 | 丁义珍 nr 23 | 蔡成功 nr 24 | 田国富 nr 25 | 明白 d 26 | 达康 v 27 | 林城 ns 28 | 孤鹰岭 ns 29 | 吕州 ns 30 | 伯仲 d 31 | 银行卡 n 32 | 师母 n 33 | 老总 n 34 | 老同志 n 35 | 赵公子 n 36 | 师生 n 37 | 阿庆嫂 nz 38 | 陈老 nz 39 | 安置费 n 40 | 阳光 n 41 | 师傅 n 42 | 宝宝 n 43 | 程度 nr 44 | 孙连城 nr 45 | 肖钢玉 nr 46 | 郑胜利 nr 47 | 王大路 nr 48 | 林华华 nr 49 | 王文革 nr 50 | 吕梁 nr 51 | -------------------------------------------------------------------------------- /edge.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/edge.txt -------------------------------------------------------------------------------- /graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/graph.png -------------------------------------------------------------------------------- /node.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/node.txt -------------------------------------------------------------------------------- /relationship_find.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # 基于共现矩阵来确定人物关系 5 | 6 | import jieba 7 | jieba.load_userdict('e:/PY/relationship_find/dic.txt') #导入自己的字典,不然分词效果不好 8 | import jieba.posseg as pseg 9 | import Graph_built 10 | 11 | 12 | # 将剧本进行分词,并将表示人名的词提出,将其他停用词和标点省略 13 | # 提出人名的同时,同name字典记录下来,作为矩阵的行和列 14 | def cut_word(text): 15 | words=pseg.cut(text) 16 | L_name=[] 17 | for x in words : 18 | if x.flag!='nr' or len(x.word) < 2: 19 | continue 20 | if not Names.get(x.word): 21 | Names[x.word]=1 22 | else: 23 | Names[x.word]=Names[x.word]+1 24 | L_name.append(x.word) 25 | return L_name 26 | 27 | 28 | # 建立词频字典和每段中的人物列表 29 | def namedict_built(): 30 | global Names 31 | with open('e:/PY/relationship_find/test.txt','r') as f: 32 | for l in f.readlines(): 33 | n=cut_word(l) 34 | if len(n)>=2: # 由于要计算关系,空list和单元素list没有用 35 | Lines.append(n) 36 | Names=dict(sorted(Names.items(),key = lambda x:x[1],reverse = True)[:36]) 37 | # print(Line) 38 | 39 | 40 | # 通过遍历Lines来构建贡献矩阵 41 | def relation_built(): 42 | for key in Names: 43 | relationships[key]={} 44 | for line in Lines: 45 | for name1 in line: 46 | if not Names.get(name1): 47 | continue 48 | for name2 in line: 49 | if name1==name2 or (not Names.get(name2)): 50 | continue 51 | if not relationships[name1].get(name2): 52 | relationships[name1][name2]= 1 53 | else: 54 | relationships[name1][name2] = relationships[name1][name2]+ 1 55 | # print(relationships) 56 | 57 | # 生成有向图的节点文件和边文件 58 | def file_built(): 59 | with open('e:/PY/relationship_find/node.txt','w') as fn: 60 | # windows的换行是\r\n 61 | for key in Names: 62 | fn.write(key+' '+str(Names[key])+'\r\n') 63 | with open('e:/PY/relationship_find/edge.txt','w') as fe: 64 | for key in relationships: 65 | for v in relationships[key]: 66 | fe.write(key+' '+v+' '+str(relationships[key][v])+'\r\n') 67 | 68 | 69 | Names={} # 统计频次 70 | Lines=[] # 每一行的名字的集合 71 | relationships={} # 二维共现矩阵,用二维字典实现 72 | namedict_built() #Names的元素是元组 73 | relation_built() 74 | file_built() 75 | Graph_built.Graph_show() 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/August1s/Relationships-Find-by-Python/ce7f137470ebd7bb70feb661571703ddc2d9f9ce/test.txt --------------------------------------------------------------------------------