├── README.md
├── data
    ├── idf.txt
    └── token_vector.bin
├── keyinfo_extract.py
└── script
    ├── abstract_textrank.py
    ├── keywords_textrank.py
    ├── keywords_tfidf.py
    ├── sentence_similarity.py
    └── textrank.py


/README.md:
--------------------------------------------------------------------------------
 1 | # KeyInfoExtraction
 2 | # 简介
 3 | Self complemented Key infomation extraction including keywords, abstract from text using algorithm like textrank ,tfidf    
 4 | 基于Textrank算法的文本摘要抽取与关键词抽取，基于TFIDF算法的关键词抽取。  
 5 | 
 6 | # 使用介绍
 7 | 
 8 |     import keyinfo_extact  
 9 |     nlp = KeyInfoExtract()  
10 |     text = '''（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）
11 |     央视新闻客户端11月24日消息，2017年8月31日晚，在陕西省榆林市第一医院绥德院区，产妇马茸茸在待产时，从医院五楼坠亡。事发后，医院方面表示，由于家属多次拒绝剖宫产，最终导致产妇难忍疼痛跳楼。但是产妇家属却声称，曾向医生多次提出剖宫产被拒绝。
12 |     事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生。央视记者找到了等待产妇的家属，主治医生，病区主任，以及当时的两位助产师，一位实习医生，希望通过他们的讲述，更准确地还原事情经过。
13 |     产妇待产时坠亡，事件有何疑点。公安机关经过调查，排除他杀可能，初步认定马茸茸为跳楼自杀身亡。马茸茸为何会在医院待产期间跳楼身亡，这让所有人的目光都聚焦到了榆林第一医院，这家在当地人心目中数一数二的大医院。
14 |     就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通？这就需要与之相关的法律法规更加的细化、人性化并且充满温度。用这种温度来消除孕妇对未知的恐惧，来保障医患双方的权益，迎接新生儿平安健康地来到这个世界。'''
15 | 
16 |     keywords_textrank = nlp.extract_keywords_textrank(text, 10)
17 |     keywords_tfidf = nlp.extract_keywords_tfidf(text, 10)
18 |     abstract_textrank = nlp.extract_abstract(text, 3)
19 | 
20 |     print(keywords_tfidf)
21 |      ('产妇', 0.16089872363839283)
22 |      ('医院', 0.10469306102267856)
23 |      ('待产', 0.10192652680535713)
24 |      ('剖宫产', 0.09611770924999999)
25 |      ('家属', 0.09150082801845238)
26 |      ('坠亡', 0.069497286319104)
27 |      ('事件', 0.06781284802178572)
28 |      ('跳楼', 0.061929583872023804)
29 |      ('患者', 0.056677817569285714)
30 |      ('榆林', 0.053159906859523806)
31 |      
32 |     print(keywords_textrank)
33 |     ('产妇', 1.0)
34 |     ('医院', 0.5913681024247537)
35 |     ('家属', 0.5429117450097523)
36 |     ('事件', 0.5252165334872677)
37 |     ('剖宫产', 0.4323518137698726)
38 |     ('患者', 0.42213201850447274)
39 |     ('榆林', 0.3458613813882902)
40 |     ('温度', 0.3433894045919456)
41 |     ('跳楼', 0.3253241303426245)
42 |     ('事情', 0.30329273312129706)
43 |     
44 |     print(abstract_textrank)
45 |      ('就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通', 1.0)
46 |      ('事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生', 0.9999999860476693)
47 |      ('（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）', 0.99999402813924)
48 | 
49 | 


--------------------------------------------------------------------------------
/keyinfo_extract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding: utf-8
 3 | # File: keyinfo_extract.py
 4 | # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
 5 | # Date: 18-4-17
 6 | from script.keywords_textrank import *
 7 | from script.keywords_tfidf import *
 8 | from script.abstract_textrank import *
 9 | 
10 | class KeyInfoExtract:
11 |     def __init__(self):
12 |         self.kewords_textanker = TextRank()
13 |         self.kewords_tfidfer = TFIDF()
14 |         self.abstract_textranker = AbstarctTextrank()
15 | 
16 |     def extract_keywords_textrank(self, text, num_keywords):
17 |         return self.kewords_textanker.extract_keywords(text, num_keywords)
18 | 
19 |     def extract_keywords_tfidf(self, text, num_keywords):
20 |         return self.kewords_tfidfer.extract_keywords(text, num_keywords)
21 | 
22 |     def extract_abstract(self, text, num_sentences):
23 |         return self.abstract_textranker.extract_abstract(text, num_sentences)
24 | 
25 | 
26 | def test():
27 |     nlp = KeyInfoExtract()
28 | 
29 |     text = '''（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）
30 |     央视新闻客户端11月24日消息，2017年8月31日晚，在陕西省榆林市第一医院绥德院区，产妇马茸茸在待产时，从医院五楼坠亡。事发后，医院方面表示，由于家属多次拒绝剖宫产，最终导致产妇难忍疼痛跳楼。但是产妇家属却声称，曾向医生多次提出剖宫产被拒绝。
31 |     事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生。央视记者找到了等待产妇的家属，主治医生，病区主任，以及当时的两位助产师，一位实习医生，希望通过他们的讲述，更准确地还原事情经过。
32 |     产妇待产时坠亡，事件有何疑点。公安机关经过调查，排除他杀可能，初步认定马茸茸为跳楼自杀身亡。马茸茸为何会在医院待产期间跳楼身亡，这让所有人的目光都聚焦到了榆林第一医院，这家在当地人心目中数一数二的大医院。
33 |     就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通？这就需要与之相关的法律法规更加的细化、人性化并且充满温度。用这种温度来消除孕妇对未知的恐惧，来保障医患双方的权益，迎接新生儿平安健康地来到这个世界。'''
34 | 
35 |     keywords_textrank = nlp.extract_keywords_textrank(text, 10)
36 | 
37 |     keywords_tfidf = nlp.extract_keywords_tfidf(text, 10)
38 | 
39 |     abstract_textrank = nlp.extract_abstract(text, 3)
40 | 
41 | 
42 |     print(keywords_tfidf)
43 | 
44 |     print(keywords_textrank)
45 | 
46 |     print(abstract_textrank)
47 | 
48 | test()
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/script/abstract_textrank.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding: utf-8
 3 | # File: abstract_textrank.py
 4 | # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
 5 | # Date: 18-4-17
 6 | 
 7 | from collections import defaultdict
 8 | import jieba.posseg as pseg
 9 | from .textrank import *
10 | from .sentence_similarity import *
11 | import re
12 | 
13 | class AbstarctTextrank:
14 |     def __init__(self):
15 |         self.span = 3
16 |         self.similer = SimilarityCompute()
17 |         self.sim_score = 0.5 #句子相似度阈值，用于构建句子之间的边
18 | 
19 |     def sentence_split(self, text):
20 |         sentence_dict = {}
21 |         sentences = [sentence for sentence in re.split(r'[？！。;；\n\r]', text) if sentence]
22 |         for index, sentence in enumerate(sentences):
23 |             sentence_dict[index] = [sentence, [word.word for word in pseg.cut(sentence) if word.flag[0] not in ['x', 'u', 'p', 'w']]]
24 |         return sentence_dict
25 | 
26 |     def extract_abstract(self, text, num_sentences):
27 |         sentence_dict = self.sentence_split(text)
28 |         g = textrank_graph()
29 |         cm = defaultdict(int)
30 |         for i, s1 in sentence_dict.items():
31 |             for j, s2 in sentence_dict.items():
32 |                 sim_score = self.similer.similarity_cosine(s1[1], s2[1])
33 |                 if sim_score >= 0.5:
34 |                     cm[(s1[0], s2[0])] += 1
35 |         for terms, w in cm.items():
36 |             g.addEdge(terms[0], terms[1], w)
37 |         nodes_rank = g.rank()
38 |         nodes_rank = sorted(nodes_rank.items(), key=lambda asd: asd[1], reverse=True)
39 |         return nodes_rank[:num_sentences]
40 | 
41 | 
42 | # def test():
43 | #     text = '''（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）
44 | #     央视新闻客户端11月24日消息，2017年8月31日晚，在陕西省榆林市第一医院绥德院区，产妇马茸茸在待产时，从医院五楼坠亡。事发后，医院方面表示，由于家属多次拒绝剖宫产，最终导致产妇难忍疼痛跳楼。但是产妇家属却声称，曾向医生多次提出剖宫产被拒绝。
45 | #     事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生。央视记者找到了等待产妇的家属，主治医生，病区主任，以及当时的两位助产师，一位实习医生，希望通过他们的讲述，更准确地还原事情经过。
46 | #     产妇待产时坠亡，事件有何疑点。公安机关经过调查，排除他杀可能，初步认定马茸茸为跳楼自杀身亡。马茸茸为何会在医院待产期间跳楼身亡，这让所有人的目光都聚焦到了榆林第一医院，这家在当地人心目中数一数二的大医院。
47 | #     就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通？这就需要与之相关的法律法规更加的细化、人性化并且充满温度。用这种温度来消除孕妇对未知的恐惧，来保障医患双方的权益，迎接新生儿平安健康地来到这个世界。'''
48 | #
49 | #     abstracter = AbstarctTextrank()
50 | #     keysentences = abstracter.extract_abstract(text, 3)
51 | #
52 | #     for sent in keysentences:
53 | #         print(sent)
54 | #     '''
55 | #     ('就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通', 1.0)
56 | #     ('事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生', 0.9999999860476693)
57 | #     ('（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）', 0.99999402813924)
58 | #     '''
59 | #
60 | # test()
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/script/keywords_textrank.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding: utf-8
 3 | # File: textrank.py
 4 | # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
 5 | # Date: 18-4-17
 6 | 
 7 | from collections import defaultdict
 8 | import jieba.posseg as pseg
 9 | from .textrank import *
10 | 
11 | class TextRank:
12 |     def __init__(self):
13 |         self.candi_pos = ['n', 'v', 'a']
14 |         self.span = 5
15 | 
16 |     def extract_keywords(self, text, num_keywords):
17 |         g = textrank_graph()
18 |         cm = defaultdict(int)
19 |         word_list = [[word.word, word.flag] for word in pseg.cut(text)]
20 |         for i, word in enumerate(word_list):
21 |             if word[1][0] in self.candi_pos and len(word[0]) > 1:
22 |                 for j in range(i + 1, i + self.span):
23 |                     if j >= len(word_list):
24 |                         break
25 |                     if word_list[j][1][0] not in self.candi_pos or len(word_list[j][0]) < 2:
26 |                         continue
27 |                     pair = tuple((word[0], word_list[j][0]))
28 |                     cm[(pair)] +=  1
29 | 
30 |         for terms, w in cm.items():
31 |             g.addEdge(terms[0], terms[1], w)
32 |         nodes_rank = g.rank()
33 |         nodes_rank = sorted(nodes_rank.items(), key=lambda asd:asd[1], reverse=True)
34 | 
35 |         return nodes_rank[:num_keywords]
36 | 
37 | # def test():
38 | #     text = '''（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）
39 | #     央视新闻客户端11月24日消息，2017年8月31日晚，在陕西省榆林市第一医院绥德院区，产妇马茸茸在待产时，从医院五楼坠亡。事发后，医院方面表示，由于家属多次拒绝剖宫产，最终导致产妇难忍疼痛跳楼。但是产妇家属却声称，曾向医生多次提出剖宫产被拒绝。
40 | #     事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生。央视记者找到了等待产妇的家属，主治医生，病区主任，以及当时的两位助产师，一位实习医生，希望通过他们的讲述，更准确地还原事情经过。
41 | #     产妇待产时坠亡，事件有何疑点。公安机关经过调查，排除他杀可能，初步认定马茸茸为跳楼自杀身亡。马茸茸为何会在医院待产期间跳楼身亡，这让所有人的目光都聚焦到了榆林第一医院，这家在当地人心目中数一数二的大医院。
42 | #     就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通？这就需要与之相关的法律法规更加的细化、人性化并且充满温度。用这种温度来消除孕妇对未知的恐惧，来保障医患双方的权益，迎接新生儿平安健康地来到这个世界。'''
43 | #     textranker = TextRank()
44 | #     keywords = textranker.extract_keywords(text, 10)
45 | #
46 | #     for keyword in keywords:
47 | #         print(keyword)
48 | #     '''
49 | #     ('产妇', 1.0)
50 | #     ('医院', 0.5913681024247537)
51 | #     ('家属', 0.5429117450097523)
52 | #     ('事件', 0.5252165334872677)
53 | #     ('剖宫产', 0.4323518137698726)
54 | #     ('患者', 0.42213201850447274)
55 | #     ('榆林', 0.3458613813882902)
56 | #     ('温度', 0.3433894045919456)
57 | #     ('跳楼', 0.3253241303426245)
58 | #     ('事情', 0.30329273312129706)
59 | #     '''
60 | # test()


--------------------------------------------------------------------------------
/script/keywords_tfidf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding: utf-8
 3 | # File: tfidf.py
 4 | # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
 5 | # Date: 18-4-17
 6 | import jieba.posseg as pseg
 7 | 
 8 | class TFIDF:
 9 |     def __init__(self):
10 |         self.idf_file = 'data/idf.txt'
11 |         self.idf_dict, self.common_idf = self.load_idf()
12 | 
13 |     def build_wordsdict(self, text):
14 |         word_dict = {}
15 |         candi_words = []
16 |         candi_dict = {}
17 |         for word in pseg.cut(text):
18 |             if word.flag[0] in ['n', 'v', 'a'] and len(word.word) > 1:
19 |                 candi_words.append(word.word)
20 |             if word.word not in word_dict:
21 |                 word_dict[word.word] = 1
22 |             else:
23 |                 word_dict[word.word] += 1
24 |         count_total = sum(word_dict.values())
25 |         for word, word_count in word_dict.items():
26 |             if word in candi_words:
27 |                 candi_dict[word] = word_count/count_total
28 |             else:
29 |                 continue
30 | 
31 |         return candi_dict
32 | 
33 |     def extract_keywords(self, text, num_keywords):
34 |         keywords_dict = {}
35 |         candi_dict = self.build_wordsdict(text)
36 |         for word, word_tf in candi_dict.items():
37 |             word_idf = self.idf_dict.get(word, self.common_idf)
38 |             word_tfidf = word_idf * word_tf
39 |             keywords_dict[word] = word_tfidf
40 |         keywords_dict = sorted(keywords_dict.items(), key=lambda asd:asd[1], reverse=True)
41 | 
42 |         return keywords_dict[:num_keywords]
43 | 
44 |     def load_idf(self):
45 |         idf_dict = {}
46 |         for line in open(self.idf_file):
47 |             word, freq = line.strip().split(' ')
48 |             idf_dict[word] = float(freq)
49 |         common_idf = sum(idf_dict.values())/len(idf_dict)
50 | 
51 |         return idf_dict, common_idf
52 | 
53 | # def test():
54 | #     text = '''（原标题：央视独家采访：陕西榆林产妇坠楼事件在场人员还原事情经过）
55 | #     央视新闻客户端11月24日消息，2017年8月31日晚，在陕西省榆林市第一医院绥德院区，产妇马茸茸在待产时，从医院五楼坠亡。事发后，医院方面表示，由于家属多次拒绝剖宫产，最终导致产妇难忍疼痛跳楼。但是产妇家属却声称，曾向医生多次提出剖宫产被拒绝。
56 | #     事情经过究竟如何，曾引起舆论纷纷，而随着时间的推移，更多的反思也留给了我们，只有解决了这起事件中暴露出的一些问题，比如患者的医疗选择权，人们对剖宫产和顺产的认识问题等，这样的悲剧才不会再次发生。央视记者找到了等待产妇的家属，主治医生，病区主任，以及当时的两位助产师，一位实习医生，希望通过他们的讲述，更准确地还原事情经过。
57 | #     产妇待产时坠亡，事件有何疑点。公安机关经过调查，排除他杀可能，初步认定马茸茸为跳楼自杀身亡。马茸茸为何会在医院待产期间跳楼身亡，这让所有人的目光都聚焦到了榆林第一医院，这家在当地人心目中数一数二的大医院。
58 | #     就这起事件来说，如何保障患者和家属的知情权，如何让患者和医生能够多一份实质化的沟通？这就需要与之相关的法律法规更加的细化、人性化并且充满温度。用这种温度来消除孕妇对未知的恐惧，来保障医患双方的权益，迎接新生儿平安健康地来到这个世界。'''
59 | #     tfidfer = TFIDF()
60 | #     for keyword in tfidfer.extract_keywords(text, 10):
61 | #         print(keyword)
62 | #     '''
63 | #      ('产妇', 0.16089872363839283)
64 | #      ('医院', 0.10469306102267856)
65 | #      ('待产', 0.10192652680535713)
66 | #      ('剖宫产', 0.09611770924999999)
67 | #      ('家属', 0.09150082801845238)
68 | #      ('坠亡', 0.069497286319104)
69 | #      ('事件', 0.06781284802178572)
70 | #      ('跳楼', 0.061929583872023804)
71 | #      ('患者', 0.056677817569285714)
72 | #      ('榆林', 0.053159906859523806)
73 | 
74 | #     '''
75 | # test()
76 | 


--------------------------------------------------------------------------------
/script/sentence_similarity.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding: utf-8
 3 | # File: sentence_similarity.py
 4 | # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
 5 | # Date: 18-4-17
 6 | 
 7 | import gensim
 8 | import numpy as np
 9 | class SimilarityCompute:
10 |     def __init__(self):
11 |         self.embedding_file = 'data/token_vector.bin'
12 |         self.model = gensim.models.KeyedVectors.load_word2vec_format(self.embedding_file, binary=False)
13 | 
14 |     def get_wordvector(self, word):
15 |         try:
16 |             return self.model[word]
17 |         except:
18 |             return np.zeros(200)
19 | 
20 |     def similarity_cosine(self, word_list1,word_list2):
21 |         simalrity = 0
22 |         vector1 = np.zeros(200)
23 |         for word in word_list1:
24 |             vector1 += self.get_wordvector(word)
25 | 
26 |         vector1 = vector1/len(word_list1)
27 |         vector2 = np.zeros(200)
28 | 
29 |         for word in word_list2:
30 |             vector2 += self.get_wordvector(word)
31 | 
32 |         vector2 = vector2/len(word_list2)
33 |         cos1 = np.sum(vector1*vector2)
34 |         cos21 = np.sqrt(sum(vector1**2))
35 |         cos22 = np.sqrt(sum(vector2**2))
36 |         similarity = cos1/float(cos21*cos22)
37 |         return similarity
38 | 


--------------------------------------------------------------------------------
/script/textrank.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding: utf-8
 3 | # File: textrank.py
 4 | # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
 5 | # Date: 18-4-17
 6 | from collections import defaultdict
 7 | import sys
 8 | 
 9 | 
10 | class textrank_graph:
11 |     def __init__(self):
12 |         self.graph = defaultdict(list)
13 |         self.d = 0.85 #d是阻尼系数，一般设置为0.85
14 |         self.min_diff = 1e-5 #设定收敛阈值
15 | 
16 |     #添加节点之间的边
17 |     def addEdge(self, start, end, weight):
18 |         self.graph[start].append((start, end, weight))
19 |         self.graph[end].append((end, start, weight))
20 | 
21 |     #节点排序
22 |     def rank(self):
23 |         #一共有14个节点
24 |         print(len(self.graph))
25 |         #默认初始化权重
26 |         weight_deault = 1.0 / (len(self.graph) or 1.0)
27 |         #nodeweight_dict, 存储节点的权重
28 |         nodeweight_dict = defaultdict(float)
29 |         #outsum，存储节点的出度权重
30 |         outsum_node_dict = defaultdict(float)
31 |         #根据图中的边，更新节点权重
32 |         for node, out_edge in self.graph.items():
33 |             #是 [('是', '全国', 1), ('是', '调查', 1), ('是', '失业率', 1), ('是', '城镇', 1)]
34 |             nodeweight_dict[node] = weight_deault
35 |             outsum_node_dict[node] = sum((edge[2] for edge in out_edge), 0.0)
36 |         #初始状态下的textrank重要性权重
37 |         sorted_keys = sorted(self.graph.keys())
38 |         #设定迭代次数，
39 |         step_dict = [0]
40 |         for step in range(1, 1000):
41 |             for node in sorted_keys:
42 |                 s = 0
43 |                 #计算公式：(edge_weight/outsum_node_dict[edge_node])*node_weight[edge_node]
44 |                 for e in self.graph[node]:
45 |                     s += e[2] / outsum_node_dict[e[1]] * nodeweight_dict[e[1]]
46 |                 #计算公式：(1-d) + d*s
47 |                 nodeweight_dict[node] = (1 - self.d) + self.d * s
48 |             step_dict.append(sum(nodeweight_dict.values()))
49 | 
50 |             if abs(step_dict[step] - step_dict[step - 1]) <= self.min_diff:
51 |                 break
52 | 
53 |         #利用Z-score进行权重归一化，也称为离差标准化，是对原始数据的线性变换，使结果值映射到[0 - 1]之间。
54 |         #先设定最大值与最小值均为系统存储的最大值和最小值
55 |         (min_rank, max_rank) = (sys.float_info[0], sys.float_info[3])
56 |         for w in nodeweight_dict.values():
57 |             if w < min_rank:
58 |                 min_rank = w
59 |             if w > max_rank:
60 |                 max_rank = w
61 | 
62 |         for n, w in nodeweight_dict.items():
63 |             nodeweight_dict[n] = (w - min_rank/10.0) / (max_rank - min_rank/10.0)
64 | 
65 |         return nodeweight_dict
66 | 
67 | 


--------------------------------------------------------------------------------