├── dict
    ├── label_dict
    ├── postag_dict
    └── p_eng
├── conf
    └── IE_extraction.conf
├── lib
    ├── get_spo_train.py
    ├── conf_lib.py
    └── get_vocab.py
├── bin
    ├── p_classification
    │   ├── p_model.py
    │   ├── p_infer.py
    │   ├── p_train.py
    │   └── p_data_reader.py
    ├── so_labeling
    │   ├── spo_model.py
    │   ├── spo_train.py
    │   ├── spo_infer.py
    │   └── spo_data_reader.py
    └── evaluation
    │   └── calc_pr.py
├── README.md
└── data
    ├── test_demo_spo.json
    └── test_demo.json


/dict/label_dict:
--------------------------------------------------------------------------------
1 | B-SUB
2 | I-SUB
3 | E-SUB
4 | B-OBJ
5 | I-OBJ
6 | E-OBJ
7 | O
8 | 


--------------------------------------------------------------------------------
/dict/postag_dict:
--------------------------------------------------------------------------------
 1 | n
 2 | f
 3 | s
 4 | t
 5 | nr
 6 | ns
 7 | nt
 8 | nw
 9 | nz
10 | v
11 | vd
12 | vn
13 | a
14 | ad
15 | an
16 | d
17 | m
18 | q
19 | r
20 | p
21 | c
22 | u
23 | xc
24 | w
25 | 


--------------------------------------------------------------------------------
/dict/p_eng:
--------------------------------------------------------------------------------
 1 | 改编自	RP
 2 | 主角	LA
 3 | 丈夫	HB
 4 | 号	PN
 5 | 民族	NAT
 6 | 所属专辑	AL
 7 | 创始人	INVEN
 8 | 毕业院校	GRA
 9 | 总部地点	HQ
10 | 专业代码	SP
11 | 主演	ACT
12 | 董事长	CM
13 | 海拔	AT
14 | 朝代	DY
15 | 导演	DIR
16 | 简称	ABBR
17 | 首都	CP
18 | 注册资本	RG
19 | 出生地	BP
20 | 人口数量	PA
21 | 占地面积	AS
22 | 所在城市	CITY
23 | 上映时间	RS
24 | 父亲	FAR
25 | 出版社	PRESS
26 | 官方语言	OL
27 | 主持人	HOST
28 | 身高	HEIG
29 | 妻子	WIFE
30 | 气候	CLI
31 | 目	BO
32 | 歌手	SING
33 | 修业年限	SD
34 | 作词	LYR
35 | 连载网站	WEB
36 | 祖籍	AP
37 | 面积	AREA
38 | 母亲	MOT
39 | 出品公司	PC
40 | 编剧	WC
41 | 字	CN
42 | 作曲	MB
43 | 邮政编码	PCODE
44 | 制片人	FP
45 | 成立日期	BD
46 | 嘉宾	GUEST
47 | 国籍	NA
48 | 出生日期	BDATE
49 | 作者	WRITER
50 | 


--------------------------------------------------------------------------------
/conf/IE_extraction.conf:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | work_dir = .
 3 | dict_dir = %(work_dir)s/dict
 4 | data_dir = %(work_dir)s/data
 5 | model_dir = %(work_dir)s/model
 6 | log_dir = %(work_dir)s/log
 7 | 
 8 | [dict_path]
 9 | word_idx_path = %(work_dir)s/dict/word_idx
10 | label_dict_path = %(work_dir)s/dict/p_eng
11 | so_label_dict_path = %(work_dir)s/dict/label_dict
12 | postag_dict_path = %(work_dir)s/dict/postag_dict
13 | 
14 | [model_params]
15 | emb_name='emb'
16 | use_gpu = False
17 | is_sparse = False
18 | is_local = False
19 | word_emb_fixed = False
20 | mix_hidden_lr = 1e-3
21 | cost_threshold = 5
22 | mark_dict_len = 2
23 | word_dim = 128
24 | mark_dim = 5
25 | postag_dim = 20
26 | hidden_dim = 512
27 | depth = 8
28 | pass_num = 100
29 | batch_size = 1000
30 | class_dim = 49
31 | 
32 | [p_model_dir]
33 | train_data_path = %(data_dir)s/train_data.json
34 | test_data_path = %(data_dir)s/dev_data.json
35 | p_model_save_dir = %(model_dir)s/p_model
36 | 
37 | [spo_model_dir]
38 | spo_train_data_path = %(data_dir)s/train_data.p
39 | spo_test_data_path = %(data_dir)s/dev_data.p
40 | spo_model_save_dir = %(model_dir)s/spo_model
41 | 


--------------------------------------------------------------------------------
/lib/get_spo_train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | ########################################################
 3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # imitations under the License.
16 | ########################################################
17 | """
18 | This module to generate training data for training a so-labeling model
19 | """
20 | 
21 | import random
22 | import codecs
23 | import sys
24 | import json
25 | reload(sys)
26 | sys.setdefaultencoding('utf-8')
27 | 
28 | def get_p(input_file):
29 |     """
30 |     Generate training data for so labeling model
31 |     """
32 |     with codecs.open(input_file, 'r', 'utf-8') as fr:
33 |         for line in fr:
34 |             try:
35 |                 dic = json.loads(line.decode('utf-8').strip())
36 |             except:
37 |                 continue
38 |             spo_list = dic['spo_list']
39 |             p_list = [item['predicate'] for item in spo_list]
40 |             for p in p_list:
41 |                 print "\t".join([json.dumps(dic, ensure_ascii=False), p]).encode('utf-8')
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     input_file = sys.argv[1]
46 |     get_p(input_file)
47 | 


--------------------------------------------------------------------------------
/lib/conf_lib.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | ########################################################
 3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # imitations under the License.
16 | ########################################################
17 | """
18 | This module to laod the configuration file
19 | """
20 | 
21 | import json
22 | import os
23 | import sys
24 | import argparse
25 | import ConfigParser
26 | 
27 | 
28 | def load_conf(conf_filename):
29 |     """
30 |     load the conf file
31 |     :param string conf_filename: conf file
32 |     :rtype dict param_conf_dict: conf_dict
33 |     """
34 |     param_conf_dict={}
35 |     cf = ConfigParser.ConfigParser()
36 |     cf.read(conf_filename)
37 |     int_conf_keys = {
38 |                 'model_params': ["cost_threshold", "mark_dict_len", "word_dim",
39 |                     "mark_dim", "postag_dim", "hidden_dim", "depth",
40 |                     "pass_num", "batch_size", "class_dim"]
41 |                 }
42 |     for session_key in int_conf_keys:
43 |         for option_key in int_conf_keys[session_key]:
44 |             try:
45 |                 option_value = cf.get(session_key, option_key)
46 |                 param_conf_dict[option_key] = int(option_value)
47 |             except:
48 |                 raise ValueError("%s--%s is not a integer" % (session_key, option_key))
49 |     str_conf_keys = {
50 |                 'model_params': ['is_sparse', "use_gpu", "emb_name",
51 |                     "is_local", "word_emb_fixed", "mix_hidden_lr"],
52 |                 'p_model_dir': ["test_data_path", "train_data_path",
53 |                     "p_model_save_dir"],
54 |                 'spo_model_dir': ['spo_test_data_path', 'spo_train_data_path', 
55 |                     'spo_model_save_dir'],
56 |                 'dict_path': ["so_label_dict_path", "label_dict_path",
57 |                     "postag_dict_path", "word_idx_path"]
58 |                 }
59 | 
60 |     for session_key in str_conf_keys:
61 |         for option_key in str_conf_keys[session_key]:
62 |             try:
63 |                 param_conf_dict[option_key] = cf.get(session_key, option_key)
64 |             except:
65 |                 raise ValueError("%s no such option %s" % (session_key, option_key))
66 |     return param_conf_dict
67 | 


--------------------------------------------------------------------------------
/lib/get_vocab.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | ########################################################
 3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # imitations under the License.
16 | ########################################################
17 | """
18 | This module to generate vocabulary list
19 | """
20 | 
21 | import random
22 | import os
23 | import codecs
24 | import sys
25 | import json
26 | reload(sys)
27 | sys.setdefaultencoding('utf-8')
28 | 
29 | def load_word_file(f_input):
30 |     """
31 |     Get all words in files
32 |     :param string: input file
33 |     """
34 |     file_words = {}
35 |     with codecs.open(f_input, 'r', 'utf-8') as fr:
36 |         words = []
37 |         for line in fr:
38 |             try:
39 |                 dic = json.loads(line.decode('utf-8').strip())
40 |                 postag = dic['postag']
41 |                 words = [item["word"].strip() for item in postag]
42 |             except:
43 |                 continue
44 |             for word in words:
45 |                 file_words[word] = file_words.get(word, 0) + 1
46 |     return file_words
47 | 
48 | 
49 | def get_vocab(train_file, dev_file):
50 |     """
51 |     Get vocabulary file from the field 'postag' of files
52 |     :param string: input train data file
53 |     :param string: input dev data file
54 |     """
55 |     word_dic = load_word_file(train_file)
56 |     if len(word_dic) == 0:
57 |         raise ValueError('The length of train word is 0')
58 |     dev_word_dic = load_word_file(dev_file)
59 |     if len(dev_word_dic) == 0:
60 |         raise ValueError('The length of dev word is 0')
61 |     for word in dev_word_dic:
62 |         if word in word_dic:
63 |             word_dic[word] += dev_word_dic[word]
64 |         else:
65 |             word_dic[word] = dev_word_dic[word]
66 |     print '<UNK>'
67 |     vocab_set = set()
68 |     value_list = sorted(word_dic.iteritems(), key=lambda d:d[1], reverse=True)
69 |     for word in value_list[:30000]:
70 |         print word[0]
71 |         vocab_set.add(word[0])
72 | 
73 |     #add predicate in all_50_schemas
74 |     if not os.path.exists('./data/all_50_schemas'):
75 |         raise ValueError("./data/all_50_schemas not found.")
76 |     with codecs.open('./data/all_50_schemas', 'r', 'utf-8') as fr:
77 |         for line in fr:
78 |             dic = json.loads(line.decode('utf-8').strip())
79 |             p = dic['predicate']
80 |             if p not in vocab_set:
81 |                 vocab_set.add(p)
82 |                 print p
83 | 
84 |     
85 | if __name__ == '__main__':
86 |     train_file = sys.argv[1]
87 |     dev_file = sys.argv[2]
88 |     get_vocab(train_file, dev_file)
89 | 


--------------------------------------------------------------------------------
/bin/p_classification/p_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to define a neural network
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import sys
 24 | import argparse
 25 | import ConfigParser
 26 | 
 27 | import paddle
 28 | import paddle.fluid as fluid
 29 | 
 30 | 
 31 | def db_lstm(data_reader, word, postag, conf_dict):
 32 |     """
 33 |     Neural network structure definition: stacked bidirectional
 34 |     LSTM and max-pooling
 35 |     """
 36 |     hidden_dim = conf_dict['hidden_dim']
 37 |     depth = conf_dict['depth']
 38 |     label_dict_len = data_reader.get_dict_size('label_dict')
 39 |     word_emb_fixed = True if conf_dict['word_emb_fixed'] == "True" else False
 40 |     emb_distributed = not conf_dict['is_local']
 41 |     # 2 features
 42 |     word_param = fluid.ParamAttr(name=conf_dict['emb_name'],
 43 |                                  trainable=(not word_emb_fixed))
 44 |     pos_param = fluid.ParamAttr(name='pos_emb', trainable=(not word_emb_fixed))
 45 | 
 46 |     conf_dict['is_sparse'] = bool(conf_dict['is_sparse'])
 47 |     word_embedding = fluid.layers.embedding(
 48 |         input=word,
 49 |         size=[data_reader.get_dict_size('wordemb_dict'),
 50 |             conf_dict['word_dim']],
 51 |         dtype='float32',
 52 |         is_distributed=emb_distributed,
 53 |         is_sparse=conf_dict['is_sparse'],
 54 |         param_attr=word_param)
 55 | 
 56 |     postag_embedding = fluid.layers.embedding(
 57 |         input=postag,
 58 |         size=[data_reader.get_dict_size('postag_dict'),
 59 |             conf_dict['postag_dim']],
 60 |         dtype='float32',
 61 |         is_distributed=emb_distributed,
 62 |         is_sparse=conf_dict['is_sparse'],
 63 |         param_attr=pos_param)
 64 | 
 65 |     # embedding
 66 |     emb_layers = [word_embedding, postag_embedding]
 67 | 
 68 |     # input hidden
 69 |     hidden_0_layers = [
 70 |         fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
 71 |         for emb in emb_layers
 72 |     ]
 73 | 
 74 |     hidden_0 = fluid.layers.sums(input=hidden_0_layers)
 75 | 
 76 |     lstm_0 = fluid.layers.dynamic_lstm(
 77 |         input=hidden_0,
 78 |         size=hidden_dim,
 79 |         candidate_activation='relu',
 80 |         gate_activation='sigmoid',
 81 |         cell_activation='sigmoid')
 82 | 
 83 |     # stack L-LSTM and R-LSTM with direct edges
 84 |     input_tmp = [hidden_0, lstm_0]
 85 | 
 86 |     for i in range(1, depth):
 87 |         mix_hidden = fluid.layers.sums(input=[
 88 |             fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
 89 |             fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
 90 |         ])
 91 | 
 92 |         lstm = fluid.layers.dynamic_lstm(
 93 |             input=mix_hidden,
 94 |             size=hidden_dim,
 95 |             candidate_activation='relu',
 96 |             gate_activation='sigmoid',
 97 |             cell_activation='sigmoid',
 98 |             is_reverse=((i % 2) == 1))
 99 | 
100 |         input_tmp = [mix_hidden, lstm]
101 |     
102 |     # max-pooling 
103 |     fc_last = fluid.layers.sequence_pool(input=input_tmp[0], pool_type='max')
104 |     lstm_last = fluid.layers.sequence_pool(input=input_tmp[1][0], pool_type='max')
105 | 
106 |     # output layer
107 |     feature_out = fluid.layers.fc(input=[fc_last, lstm_last],
108 |             size=conf_dict['class_dim'])
109 | 
110 |     return feature_out
111 | 


--------------------------------------------------------------------------------
/bin/so_labeling/spo_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to define neural network
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import sys
 24 | import argparse
 25 | import ConfigParser
 26 | 
 27 | import paddle
 28 | import paddle.fluid as fluid
 29 | 
 30 |     
 31 | def db_lstm(data_reader, word, postag, p_word, conf_dict):
 32 |     """
 33 |     Neural network structure definition: Stacked bidirectional 
 34 |     LSTM network
 35 |     """
 36 |     hidden_dim = conf_dict['hidden_dim']
 37 |     depth = conf_dict['depth']
 38 |     label_dict_len = data_reader.get_dict_size('so_label_dict')
 39 |     word_emb_fixed = True if conf_dict['word_emb_fixed'] == "True" else False
 40 |     emb_distributed = not conf_dict['is_local']
 41 |     conf_dict['is_sparse'] = bool(conf_dict['is_sparse'])
 42 |     # 3 features
 43 |     word_param = fluid.ParamAttr(name=conf_dict['emb_name'],
 44 |                                  trainable=(not word_emb_fixed))
 45 |     word_embedding = fluid.layers.embedding(
 46 |         input=word,
 47 |         size=[data_reader.get_dict_size('wordemb_dict'),
 48 |             conf_dict['word_dim']],
 49 |         dtype='float32',
 50 |         is_distributed=emb_distributed,
 51 |         is_sparse=emb_distributed,
 52 |         param_attr=word_param)
 53 |     
 54 | 
 55 |     postag_embedding = fluid.layers.embedding(
 56 |         input=postag,
 57 |         size=[data_reader.get_dict_size('postag_dict'),
 58 |             conf_dict['postag_dim']],
 59 |         dtype='float32',
 60 |         is_distributed=emb_distributed,
 61 |         is_sparse=emb_distributed)
 62 | 
 63 |     
 64 |     p_embedding = fluid.layers.embedding(
 65 |         input=p_word,
 66 |         size=[data_reader.get_dict_size('wordemb_dict'),
 67 |             conf_dict['word_dim']],
 68 |         dtype='float32',
 69 |         is_distributed=emb_distributed,
 70 |         is_sparse=emb_distributed,
 71 |         param_attr=word_param)
 72 | 
 73 |     # embedding
 74 |     emb_layers = [word_embedding, postag_embedding, p_embedding]
 75 |     # input hidden
 76 |     hidden_0_layers = [
 77 |         fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
 78 |         for emb in emb_layers
 79 |     ]
 80 | 
 81 |     hidden_0 = fluid.layers.sums(input=hidden_0_layers)
 82 | 
 83 |     lstm_0 = fluid.layers.dynamic_lstm(
 84 |         input=hidden_0,
 85 |         size=hidden_dim,
 86 |         candidate_activation='relu',
 87 |         gate_activation='sigmoid',
 88 |         cell_activation='sigmoid')
 89 | 
 90 |     # stack L-LSTM and R-LSTM with direct edges
 91 |     input_tmp = [hidden_0, lstm_0]
 92 | 
 93 |     for i in range(1, depth):
 94 |         mix_hidden = fluid.layers.sums(input=[
 95 |             fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
 96 |             fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
 97 |         ])
 98 | 
 99 |         lstm = fluid.layers.dynamic_lstm(
100 |             input=mix_hidden,
101 |             size=hidden_dim,
102 |             candidate_activation='relu',
103 |             gate_activation='sigmoid',
104 |             cell_activation='sigmoid',
105 |             is_reverse=((i % 2) == 1))
106 | 
107 |         input_tmp = [mix_hidden, lstm]
108 |     
109 |     # output
110 |     feature_out = fluid.layers.sums(input=[
111 |         fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
112 |         fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
113 |     ])
114 | 
115 |     return feature_out
116 | 


--------------------------------------------------------------------------------
/bin/p_classification/p_infer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to infer with a p classification model
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import sys
 24 | import argparse
 25 | import ConfigParser
 26 | import math
 27 | 
 28 | import numpy as np
 29 | import paddle
 30 | import paddle.fluid as fluid
 31 | 
 32 | import p_data_reader
 33 | 
 34 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../lib")))
 35 | import conf_lib
 36 | 
 37 | 
 38 | def predict_infer(conf_dict, data_reader, predict_data_path, \
 39 |         predict_result_path, model_path):
 40 |     """
 41 |     Predict with trained models 
 42 |     """
 43 |     if len(predict_result_path) > 0:
 44 |         result_writer = open(predict_result_path, 'w')
 45 |     else:
 46 |         result_writer = sys.stdout
 47 | 
 48 |     np.set_printoptions(precision=3)
 49 |     if len(model_path) == 0:
 50 |         return
 51 | 
 52 |     place = fluid.CPUPlace()
 53 |     word = fluid.layers.data(
 54 |         name='word_data', shape=[1], dtype='int64', lod_level=1)
 55 |     postag = fluid.layers.data(
 56 |         name='token_pos', shape=[1], dtype='int64', lod_level=1)
 57 |     feeder = fluid.DataFeeder(feed_list=[word, postag], place=place)
 58 |     exe = fluid.Executor(place)
 59 | 
 60 |     test_batch_reader = paddle.batch(
 61 |         paddle.reader.buffered(data_reader.get_predict_reader\
 62 |                 (predict_data_path, need_input=True, need_label=False),
 63 |                 size=8192),
 64 |         batch_size=conf_dict["batch_size"])
 65 |     inference_scope = fluid.core.Scope()
 66 |     with fluid.scope_guard(inference_scope):
 67 |         [inference_program, feed_target_names, fetch_targets] = \
 68 |             fluid.io.load_inference_model(
 69 |                 model_path, exe, params_filename='params')
 70 | 
 71 |         # batch
 72 |         batch_id = 0
 73 |         for data in test_batch_reader():
 74 |             feeder_data = []
 75 |             input_data = []
 76 |             for item in data:
 77 |                 input_dic = json.loads(item[0])
 78 |                 input_data.append(input_dic)
 79 |                 feeder_data.append(item[1:])
 80 |             results = exe.run(inference_program, feed=feeder.feed(feeder_data),
 81 |                               fetch_list=fetch_targets, return_numpy=False)
 82 |             label_scores = np.array(results[0]).tolist()
 83 |             #infer a batch
 84 |             infer_a_batch(label_scores, input_data, result_writer, data_reader)
 85 |             
 86 |             batch_id += 1
 87 | 
 88 | 
 89 | def infer_a_batch(label_scores, input_data, result_writer, data_reader):
 90 |     """Infer the results of a batch"""
 91 |     for sent_idx, label in enumerate(label_scores):
 92 |         p_label = []
 93 |         label = map(float, label)
 94 |         for p_idx, p_score in enumerate(label):
 95 |             if sigmoid(p_score) > 0.5:
 96 |                 p_label.append(data_reader.get_label_output(p_idx))
 97 |         for p in p_label:
 98 |             output_fields = [json.dumps(input_data[sent_idx], ensure_ascii=False), p]
 99 |             result_writer.write('\t'.join(output_fields).encode('utf-8'))
100 |             result_writer.write('\n')
101 | 
102 | 
103 | def sigmoid(x):
104 |     """sigmode function"""
105 |     return math.exp(x) / (1 + math.exp(x))
106 | 
107 | 
108 | def main(conf_dict, model_path, predict_data_path, 
109 |             predict_result_path, use_cuda=False):
110 |     """Predict main function"""
111 |     if use_cuda and not fluid.core.is_compiled_with_cuda():
112 |         return
113 |     data_generator = p_data_reader.RcDataReader(
114 |         wordemb_dict_path=conf_dict['word_idx_path'],
115 |         postag_dict_path=conf_dict['postag_dict_path'],
116 |         label_dict_path=conf_dict['label_dict_path'],
117 |         train_data_list_path=conf_dict['train_data_path'],
118 |         test_data_list_path=conf_dict['test_data_path'])
119 |     
120 |     predict_infer(conf_dict, data_generator, predict_data_path, \
121 |             predict_result_path, model_path)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     # Load configuration file
126 |     parser = argparse.ArgumentParser()
127 |     parser.add_argument("--conf_path", type=str,
128 |             help="conf_file_path_for_model. (default: %(default)s)",
129 |             required=True)
130 |     parser.add_argument("--model_path", type=str,
131 |             help="model_path", required=True)
132 |     parser.add_argument("--predict_file", type=str,
133 |             help="the_file_to_be_predicted", required=True)
134 |     parser.add_argument("--result_file", type=str,
135 |             default='', help="the_file_of_predicted_results")
136 |     args = parser.parse_args()
137 |     conf_dict = conf_lib.load_conf(args.conf_path)
138 |     model_path = args.model_path
139 |     predict_data_path = args.predict_file
140 |     predict_result_path = args.result_file
141 |     for input_path in [model_path, predict_data_path]:
142 |         if not os.path.exists(input_path):
143 |             raise ValueError("%s not found." % (input_path))
144 |     main(conf_dict, model_path, predict_data_path, predict_result_path)
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Information Extraction Baseline System—InfoExtractor
  2 | ## Abstract
  3 | InfoExtractor is an information extraction baseline system based on the Schema constrained Knowledge Extraction dataset(SKED). InfoExtractor adopt a pipeline architecture with a p-classification model and a so-labeling model which are both implemented with PaddlePaddle. The p-classification model is a multi-label classification which employs a stacked Bi-LSTM with max-pooling network, to identify the predicate involved in the given sentence. Then a deep Bi-LSTM-CRF network is adopted with BIEO tagging scheme in the so-labeling model to label the element of subject and object mention, given the predicate which is distinguished in the p-classification model. The F1 value of InfoExtractor on the development set is 0.668.
  4 | 
  5 | ## Getting Started
  6 | ### Environment Requirements
  7 | Paddlepaddle v1.2.0 </br>
  8 | Numpy </br>
  9 | Memory requirement 10G for training and 6G for infering
 10 | 
 11 | ### Step 1: Install paddlepaddle
 12 | For now we’ve only tested on PaddlePaddle Fluid v1.2.0, please install PaddlePaddle firstly and see more details about PaddlePaddle in [PaddlePaddle Homepage](http://www.paddlepaddle.org/).
 13 | 
 14 | ### Step 2: Download the training data, dev data and schema files
 15 | Please download the training data, development data and schema files from [the competition website](http://lic2019.ccf.org.cn/kg), then unzip files and put them in ```./data/``` folder.
 16 | ```
 17 | cd data
 18 | unzip train_data.json.zip 
 19 | unzip dev_data.json.zip
 20 | cd -
 21 | ```
 22 | ### Step 3: Get the vocabulary file
 23 | Obtain high frequency words from the field ‘postag’ of training and dev data, then compose these high frequency words into a vocabulary list.
 24 | ```
 25 | python lib/get_vocab.py ./data/train_data.json ./data/dev_data.json > ./dict/word_idx
 26 | ```
 27 | ### Step 4: Train p-classification model
 28 | First, the classification model is trained to identify predicates in sentences. Note that if you need to change the default hyper-parameters, e.g. hidden layer size or whether to use GPU for training (By default, CPU training is used), etc. Please modify the specific argument in ```./conf/IE_extraction.conf```, then run the following command:
 29 | ```
 30 | python bin/p_classification/p_train.py --conf_path=./conf/IE_extraction.conf
 31 | ```
 32 | The trained p-classification model will be saved in the folder ```./model/p_model```.
 33 | ### Step 5: Train so-labeling model
 34 | After getting the predicates that exist in the sentence, a sequence labeling model is trained to identify the s-o pairs corresponding to the relation that appear in the sentence. </br>
 35 | Before training the so-labeling model, you need to prepare the training data that meets the training model format to train a so-labeling model.
 36 | ```
 37 | python lib/get_spo_train.py  ./data/train_data.json > ./data/train_data.p
 38 | python lib/get_spo_train.py  ./data/dev_data.json > ./data/dev_data.p
 39 | ```
 40 | To train a so labeling model, you can run:
 41 | ```
 42 | python bin/so_labeling/spo_train.py --conf_path=./conf/IE_extraction.conf
 43 | ```
 44 | The trained so-labeling model will be saved in the folder ```./model/spo_model```.
 45 | 
 46 | ### Step 6: Infer with two trained models
 47 | After the training is completed, you can choose a trained model for prediction. The following command is used to predict with the last model. You can also use the development set to select the optimal model for prediction. To do inference by using two trained models with the demo test data (under ```./data/test_demo.json```), please execute the command in two steps:
 48 | ```
 49 | python bin/p_classification/p_infer.py --conf_path=./conf/IE_extraction.conf --model_path=./model/p_model/final/ --predict_file=./data/test_demo.json > ./data/test_demo.p
 50 | python bin/so_labeling/spo_infer.py --conf_path=./conf/IE_extraction.conf --model_path=./model/spo_model/final/ --predict_file=./data/test_demo.p > ./data/test_demo.res
 51 | ```
 52 | The predicted SPO triples will be saved in the folder ```./data/test_demo.res```.
 53 | 
 54 | ## Evaluation
 55 | Precision, Recall and F1 score are used as the basic evaluation metrics to measure the performance of participating systems. After obtaining the predicted triples of the model, you can run the following command. 
 56 | Considering data security, we don't provide the alias dictionary.
 57 | ```
 58 | zip -r ./data/test_demo.res.zip ./data/test_demo.res
 59 | python bin/evaluation/calc_pr.py --golden_file=./data/test_demo_spo.json --predict_file=./data/test_demo.res.zip
 60 | ```
 61 | 
 62 | ## Discussion
 63 | If you have any question, you can submit an issue in github and we will respond periodically. </br>
 64 | 
 65 | 
 66 | ## Copyright and License
 67 | Copyright 2019 Baidu.com, Inc. All Rights Reserved </br>
 68 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may otain a copy of the License at </br>
 69 | ```http://www.apache.org/licenses/LICENSE-2.0``` </br>
 70 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 71 | 
 72 | # APPENDIX
 73 | In the released dataset, the field ‘postag’ of sentences represents the segmentation and part-of-speech tagging information. The abbreviations of part-of-speech tagging (PosTag) and their corresponding part of speech meanings are shown in the following table. </br>
 74 | In addition, the given segmentation and part-of-speech tagging of the dataset are only references and can be replaced with other segmentation results.</br>
 75 | 
 76 | |POS| Meaning |
 77 | |:---|:---|
 78 | | n |common nouns|
 79 | | f | localizer |
 80 | | s | space |
 81 | | t | time|
 82 | | nr | noun of people|
 83 | | ns | noun of space|
 84 | | nt | noun of tuan|
 85 | | nw | noun of work|
 86 | | nz | other proper noun|
 87 | | v | verbs |
 88 | | vd | verb of adverbs|
 89 | | vn |verb of noun|
 90 | | a | adjective |
 91 | | ad | adjective of adverb|
 92 | | an | adnoun |
 93 | | d | adverbs |
 94 | | m | numeral |
 95 | | q | quantity|
 96 | | r | pronoun |
 97 | | p | prepositions |
 98 | | c | conjunction |
 99 | | u | auxiliary |
100 | | xc | other function word |
101 | | w | punctuations |
102 | 


--------------------------------------------------------------------------------
/bin/p_classification/p_train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to train the relation classification model
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import sys
 24 | import time
 25 | import argparse
 26 | import ConfigParser
 27 | 
 28 | import paddle
 29 | import paddle.fluid as fluid
 30 | import six
 31 | 
 32 | import p_data_reader
 33 | import p_model
 34 | 
 35 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../lib")))
 36 | import conf_lib
 37 | 
 38 | 
 39 | def train(conf_dict, data_reader, use_cuda=False):
 40 |     """
 41 |     Training of p classification model
 42 |     """
 43 |     label_dict_len = data_reader.get_dict_size('label_dict')
 44 |     # input layer
 45 |     word = fluid.layers.data(
 46 |         name='word_data', shape=[1], dtype='int64', lod_level=1)
 47 |     postag = fluid.layers.data(
 48 |         name='token_pos', shape=[1], dtype='int64', lod_level=1)
 49 |     # label
 50 |     target = fluid.layers.data(
 51 |         name='target', shape=[label_dict_len], dtype='float32', lod_level=0)
 52 |     # NN: embedding + lstm + pooling
 53 |     feature_out = p_model.db_lstm(data_reader, word, postag, conf_dict)
 54 |     # loss function for multi-label classification
 55 |     class_cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=feature_out, \
 56 |         label=target)
 57 |     avg_cost = fluid.layers.mean(class_cost)
 58 |     # optimization method
 59 |     sgd_optimizer = fluid.optimizer.AdamOptimizer(
 60 |         learning_rate=2e-3, )
 61 | 
 62 |     sgd_optimizer.minimize(avg_cost)
 63 | 
 64 |     train_batch_reader = paddle.batch(
 65 |         paddle.reader.shuffle(data_reader.get_train_reader(), buf_size=8192),
 66 |         batch_size=conf_dict['batch_size'])
 67 | 
 68 |     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 69 |     feeder = fluid.DataFeeder(feed_list=[word, postag, target], place=place)
 70 |     exe = fluid.Executor(place)
 71 | 
 72 |     save_dirname = conf_dict['p_model_save_dir']
 73 | 
 74 |     def train_loop(main_program, trainer_id=0):
 75 |         """start train"""
 76 |         exe.run(fluid.default_startup_program())
 77 | 
 78 |         start_time = time.time()
 79 |         batch_id = 0
 80 |         for pass_id in six.moves.xrange(conf_dict['pass_num']):
 81 |             pass_start_time = time.time()
 82 |             cost_sum, cost_counter = 0, 0
 83 |             for data in train_batch_reader():
 84 |                 cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost])
 85 |                 cost = cost[0]
 86 |                 cost_sum += cost
 87 |                 cost_counter += 1
 88 |                 if batch_id % 10 == 0 and batch_id != 0:
 89 |                     print >> sys.stderr, "batch %d finished, second per batch: %02f" % (
 90 |                         batch_id, (time.time() - start_time) / batch_id)
 91 | 
 92 |                 # cost expected, training over
 93 |                 if float(cost) < 0.01:
 94 |                     pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0
 95 |                     print >> sys.stderr, "%d pass end, cost time: %02f, avg_cost: %f" % (
 96 |                         pass_id, time.time() - pass_start_time, pass_avg_cost)
 97 |                     save_path = os.path.join(save_dirname, 'final')
 98 |                     fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'],
 99 |                                                   [feature_out], exe, params_filename='params')
100 |                     return
101 |                 batch_id = batch_id + 1
102 | 
103 |             # save the model once each pass ends 
104 |             pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0
105 |             print >> sys.stderr, "%d pass end, cost time: %02f, avg_cost: %f" % (
106 |                 pass_id, time.time() - pass_start_time, pass_avg_cost)
107 |             save_path = os.path.join(save_dirname, 'pass_%04d-%f' %
108 |                                     (pass_id, pass_avg_cost))
109 |             fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'],
110 |                                           [feature_out], exe, params_filename='params')
111 | 
112 |         else:
113 |             # pass times complete and the training is over
114 |             save_path = os.path.join(save_dirname, 'final')
115 |             fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'],
116 |                                           [feature_out], exe, params_filename='params')
117 |         return
118 | 
119 |     train_loop(fluid.default_main_program())
120 | 
121 | 
122 | def main(conf_dict, use_cuda=False):
123 |     """Train main function"""
124 |     if use_cuda and not fluid.core.is_compiled_with_cuda():
125 |         print >> sys.stderr, 'No GPU'
126 |         return
127 |     data_generator = p_data_reader.RcDataReader(
128 |         wordemb_dict_path=conf_dict['word_idx_path'],
129 |         postag_dict_path=conf_dict['postag_dict_path'],
130 |         label_dict_path=conf_dict['label_dict_path'],
131 |         train_data_list_path=conf_dict['train_data_path'],
132 |         test_data_list_path=conf_dict['test_data_path'])
133 |     
134 |     train(conf_dict, data_generator, use_cuda=use_cuda)
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     # Load the configuration file
139 |     parser = argparse.ArgumentParser()
140 |     parser.add_argument("--conf_path", type=str,
141 |         help="conf_file_path_for_model. (default: %(default)s)",
142 |         required=True)
143 |     args = parser.parse_args()
144 |     conf_dict = conf_lib.load_conf(args.conf_path)
145 |     use_gpu = True if conf_dict.get('use_gpu', 'False') == 'True' else False
146 |     main(conf_dict, use_cuda=use_gpu)
147 | 


--------------------------------------------------------------------------------
/bin/so_labeling/spo_train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to train a so labeling model
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import time
 24 | import sys
 25 | import argparse
 26 | import ConfigParser
 27 | 
 28 | import paddle
 29 | import paddle.fluid as fluid
 30 | import six
 31 | 
 32 | import spo_data_reader
 33 | import spo_model
 34 | 
 35 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../lib")))
 36 | import conf_lib
 37 | 
 38 | 
 39 | 
 40 | def train(conf_dict, data_reader, use_cuda=False):
 41 |     """
 42 |     Training of so labeling model
 43 |     """
 44 |     # input data layer
 45 |     word = fluid.layers.data(
 46 |         name='word_data', shape=[1], dtype='int64', lod_level=1)
 47 |     postag = fluid.layers.data(
 48 |         name='token_pos', shape=[1], dtype='int64', lod_level=1)
 49 |     p_word = fluid.layers.data(
 50 |         name='p_word', shape=[1], dtype='int64', lod_level=1)
 51 |     # label
 52 |     target = fluid.layers.data(
 53 |         name='target', shape=[1], dtype='int64', lod_level=1)
 54 | 
 55 |     # embedding + lstm
 56 |     feature_out = spo_model.db_lstm(data_reader, word, \
 57 |             postag, p_word, conf_dict)
 58 | 
 59 |     # loss function
 60 |     # crf layer
 61 |     mix_hidden_lr = float(conf_dict['mix_hidden_lr'])
 62 |     crf_cost = fluid.layers.linear_chain_crf(
 63 |         input=feature_out,
 64 |         label=target,
 65 |         param_attr=fluid.ParamAttr(name='crfw', learning_rate=mix_hidden_lr))
 66 |     avg_cost = fluid.layers.mean(crf_cost)
 67 | 
 68 |     # optimizer
 69 |     sgd_optimizer = fluid.optimizer.AdamOptimizer(
 70 |         learning_rate=2e-3, )
 71 | 
 72 |     sgd_optimizer.minimize(avg_cost)
 73 | 
 74 |     crf_decode = fluid.layers.crf_decoding(
 75 |         input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
 76 | 
 77 |     train_batch_reader = paddle.batch(
 78 |         paddle.reader.shuffle(data_reader.get_train_reader(), buf_size=8192),
 79 |         batch_size=conf_dict['batch_size'])
 80 | 
 81 |     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 82 | 
 83 |     feeder = fluid.DataFeeder(feed_list=[word, postag, p_word, target], place=place)
 84 |     exe = fluid.Executor(place)
 85 | 
 86 |     save_dirname = conf_dict['spo_model_save_dir']
 87 | 
 88 |     def train_loop(main_program, trainer_id=0):
 89 |         """start train loop"""
 90 |         exe.run(fluid.default_startup_program())
 91 | 
 92 |         start_time = time.time()
 93 |         batch_id = 0
 94 |         for pass_id in six.moves.xrange(conf_dict['pass_num']):
 95 |             pass_start_time = time.time()
 96 |             cost_sum, cost_counter = 0, 0
 97 |             for data in train_batch_reader():
 98 |                 cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost])
 99 |                 cost = cost[0]
100 |                 cost_sum += cost
101 |                 cost_counter += 1
102 |                 if batch_id % 10 == 0 and batch_id != 0:
103 |                     print >> sys.stderr, "batch %d finished, second per batch: %02f" % (
104 |                         batch_id, (time.time() - start_time) / batch_id)
105 | 
106 |                 # cost expected, training over
107 |                 if float(cost) < 1:
108 |                     save_path = os.path.join(save_dirname, 'final')
109 |                     fluid.io.save_inference_model(save_path, ['word_data', 'token_dist', 'p_word'],
110 |                                                   [feature_out], exe, params_filename='params')
111 |                     return
112 |                 batch_id = batch_id + 1
113 | 
114 |             # save the model once each pass ends
115 |             pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0
116 |             print >> sys.stderr, "%d pass end, cost time: %02f, avg_cost: %f" % (
117 |                     pass_id, time.time() - pass_start_time, pass_avg_cost)
118 |             save_path = os.path.join(save_dirname, 'pass_%04d-%f' %
119 |                                     (pass_id, pass_avg_cost))
120 |             fluid.io.save_inference_model(save_path, ['word_data', 'token_pos', 'p_word'],
121 |                                           [feature_out], exe, params_filename='params')
122 | 
123 |         else:
124 |             # pass times complete and the training is over
125 |             save_path = os.path.join(save_dirname, 'final')
126 |             fluid.io.save_inference_model(save_path, ['word_data', 'token_pos', 'p_word'],
127 |                                           [feature_out], exe, params_filename='params')
128 |         return
129 | 
130 |     train_loop(fluid.default_main_program())
131 | 
132 | 
133 | def main(conf_dict, use_cuda=False):
134 |     """Train main function"""
135 |     if use_cuda and not fluid.core.is_compiled_with_cuda():
136 |         return
137 |     data_generator = spo_data_reader.DataReader(
138 |         wordemb_dict_path=conf_dict['word_idx_path'],
139 |         postag_dict_path=conf_dict['postag_dict_path'],
140 |         label_dict_path=conf_dict['so_label_dict_path'],
141 |         p_eng_dict_path=conf_dict['label_dict_path'],
142 |         train_data_list_path=conf_dict['spo_train_data_path'],
143 |         test_data_list_path=conf_dict['spo_test_data_path'])
144 |     
145 |     train(conf_dict, data_generator, use_cuda=use_cuda)
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     # Load the configuration file
150 |     parser = argparse.ArgumentParser()
151 |     parser.add_argument("--conf_path", type=str,
152 |         help="conf_file_path_for_model. (default: %(default)s)",
153 |         required=True)
154 |     args = parser.parse_args()
155 |     conf_dict = conf_lib.load_conf(args.conf_path)
156 |     use_gpu = True if conf_dict.get('use_gpu', 'False') == 'True' else False
157 |     main(conf_dict, use_cuda=use_gpu)
158 | 


--------------------------------------------------------------------------------
/bin/so_labeling/spo_infer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to infer with a trained so model
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import sys
 24 | import argparse
 25 | import ConfigParser
 26 | import codecs
 27 | 
 28 | import numpy as np
 29 | import paddle
 30 | import paddle.fluid as fluid
 31 | 
 32 | import spo_data_reader
 33 | 
 34 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../lib")))
 35 | import conf_lib
 36 | 
 37 |     
 38 | def predict_infer(conf_dict, data_reader, predict_data_path, \
 39 |         predict_result_path, model_path):
 40 |     """
 41 |     Predict with trained models
 42 |     """
 43 |     if len(predict_result_path) > 0:
 44 |         result_writer = open(predict_result_path, 'w')
 45 |     else:
 46 |         result_writer = sys.stdout
 47 | 
 48 |     np.set_printoptions(precision=3)
 49 |     if len(model_path) == 0:
 50 |         return
 51 | 
 52 |     place = fluid.CPUPlace()
 53 |     word = fluid.layers.data(
 54 |         name='word_data', shape=[1], dtype='int64', lod_level=1)
 55 |     postag = fluid.layers.data(
 56 |         name='token_pos', shape=[1], dtype='int64', lod_level=1)
 57 |     p_word = fluid.layers.data(
 58 |         name='p_word', shape=[1], dtype='int64', lod_level=1)
 59 |     feeder = fluid.DataFeeder(feed_list=[word, postag, p_word], place=place)
 60 |     exe = fluid.Executor(place)
 61 | 
 62 |     test_batch_reader = paddle.batch(
 63 |         paddle.reader.buffered(data_reader.get_predict_reader\
 64 |             (predict_data_path, need_input=True, need_label=False),
 65 |             size=8192),
 66 |         batch_size=conf_dict['batch_size'])
 67 |     inference_scope = fluid.core.Scope()
 68 |     text_spo_dic = {}  #final triples
 69 |     with fluid.scope_guard(inference_scope):
 70 |         [inference_program, feed_target_names, fetch_targets] = \
 71 |                 fluid.io.load_inference_model(
 72 |                 model_path, exe, params_filename='params')
 73 | 
 74 |         # batch
 75 |         batch_id = 0
 76 |         for data in test_batch_reader():
 77 |             feeder_data = []
 78 |             input_data = []
 79 |             for item in data:
 80 |                 feeder_data.append(item[1:])
 81 |                 input_data.append(item[0])
 82 |             results = exe.run(inference_program, feed=feeder.feed(feeder_data),
 83 |                               fetch_list=fetch_targets, return_numpy=False)
 84 |             tag_split_idx = results[0].lod()[0]
 85 |             label_tag_scores = np.array(results[0])
 86 |             # sentence
 87 |             print >> sys.stderr, 'batch_id=', batch_id
 88 |             for sent_idx, tag_idx in enumerate(tag_split_idx[:-1]):
 89 |                 input_sent = input_data[sent_idx].split('\t')[0]
 90 |                 input_p = input_data[sent_idx].split('\t')[1]
 91 |                 tag_scores = label_tag_scores[tag_idx: tag_split_idx[sent_idx + 1]]
 92 |                 # token
 93 |                 tag_list = []
 94 |                 for token_idx, token_tags in enumerate(tag_scores):
 95 |                     tag = data_reader.get_label_output(token_tags)
 96 |                     tag_list.append(tag)
 97 |                 predicted_s_list, predicted_o_list = refine_predict_seq(input_sent, tag_list) 
 98 |                 tag_list_str = json.dumps(tag_list, ensure_ascii=False)
 99 |                 if len(predicted_s_list) == 0 or len(predicted_o_list) == 0:
100 |                     continue
101 |                 else:
102 |                     text = json.loads(input_sent)["text"]
103 |                     predicted_s_list = list(set(predicted_s_list))
104 |                     predicted_o_list = list(set(predicted_o_list))
105 |                     for predicted_s in predicted_s_list:
106 |                         for predicted_o in predicted_o_list:
107 |                             if text not in text_spo_dic:
108 |                                 text_spo_dic[text] = set()
109 |                             text_spo_dic[text].add((predicted_s, input_p, predicted_o))
110 |                     
111 |             batch_id += 1
112 |     output(text_spo_dic, result_writer)
113 | 
114 | 
115 | def refine_predict_seq(input_sent, tag_list):
116 |     """
117 |     Generate s-o list based on the annotation results 
118 |     predicted by the model
119 |     """
120 |     sent_info = json.loads(input_sent)
121 |     word_seq = [item["word"] for item in sent_info["postag"]]
122 |     s_list, o_list= [], []
123 |     token_idx = 0
124 |     while token_idx < len(tag_list):
125 |         if tag_list[token_idx] == 'O':
126 |             token_idx += 1
127 |         elif tag_list[token_idx].endswith('SUB') and tag_list[token_idx].startswith('B'):
128 |             cur_s = word_seq[token_idx]
129 |             token_idx += 1
130 |             while token_idx < len(tag_list) and tag_list[token_idx].endswith('SUB'):
131 |                 cur_s += word_seq[token_idx]
132 |                 token_idx += 1
133 |             s_list.append(cur_s)
134 |         elif tag_list[token_idx].endswith('OBJ') and tag_list[token_idx].startswith('B'):
135 |             cur_o = word_seq[token_idx]
136 |             token_idx += 1
137 |             while token_idx < len(tag_list) and tag_list[token_idx].endswith('OBJ'):
138 |                 cur_o += word_seq[token_idx]
139 |                 token_idx += 1
140 |             o_list.append(cur_o)
141 |         else:
142 |             token_idx += 1
143 |     return s_list, o_list
144 | 
145 | 
146 | def get_schemas(schema_file):
147 |     """"Read the original schema file"""
148 |     schema_dic = {}
149 |     with codecs.open(schema_file, 'r', 'utf-8') as fr:
150 |         for line in fr:
151 |             dic = json.loads(line.strip())
152 |             predicate = dic["predicate"]
153 |             subject_type = dic["subject_type"]
154 |             object_type = dic["object_type"]
155 |             schema_dic[predicate] = (subject_type, object_type)
156 |     return schema_dic
157 | 
158 | 
159 | def output(text_spo_dic, result_writer):
160 |     """
161 |     Output final SPO triples
162 |     """
163 |     schema_dic = {}
164 |     schema_dic = get_schemas('./data/all_50_schemas')
165 |     for text in text_spo_dic:
166 |         text_dic = {"text": text}
167 |         text_dic["spo_list"] = []
168 |         for spo in text_spo_dic[text]:
169 |             dic = {"subject": spo[0], "predicate": spo[1], \
170 |                     "object": spo[2], "subject_type": schema_dic[spo[1]][0], \
171 |                     "object_type": schema_dic[spo[1]][1]}
172 |             text_dic["spo_list"].append(dic)
173 |         result_writer.write(json.dumps(text_dic, ensure_ascii=False).encode('utf-8'))
174 |         result_writer.write('\n')
175 | 
176 | 
177 | def main(conf_dict, model_path, predict_data_path, predict_result_path, \
178 |         use_cuda=False):
179 |     """Predict main function"""
180 |     if use_cuda and not fluid.core.is_compiled_with_cuda():
181 |         return
182 |     data_generator = spo_data_reader.DataReader(
183 |         wordemb_dict_path=conf_dict['word_idx_path'],
184 |         postag_dict_path=conf_dict['postag_dict_path'],
185 |         label_dict_path=conf_dict['so_label_dict_path'],
186 |         p_eng_dict_path=conf_dict['label_dict_path'],
187 |         train_data_list_path=conf_dict['spo_train_data_path'],
188 |         test_data_list_path=conf_dict['spo_test_data_path'])
189 |     
190 |     predict_infer(conf_dict, data_generator, predict_data_path, \
191 |             predict_result_path, model_path)
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     # Load the configuration file
196 |     parser = argparse.ArgumentParser()
197 |     parser.add_argument("--conf_path", type=str,
198 |         help="conf_file_path_for_model. (default: %(default)s)",
199 |         required=True)
200 |     parser.add_argument("--model_path", type=str,
201 |         help="model_path", required=True)
202 |     parser.add_argument("--predict_file", type=str,
203 |         help="the_file_to_be_predicted", required=True)
204 |     parser.add_argument("--result_file", type=str,
205 |         default='', help="the_file_of_predicted_results")
206 | 
207 |     args = parser.parse_args()
208 |     conf_dict = conf_lib.load_conf(args.conf_path)
209 |     model_path = args.model_path
210 |     predict_data_path = args.predict_file
211 |     predict_result_path = args.result_file
212 |     for input_path in [model_path, predict_data_path]:
213 |         if not os.path.exists(input_path):
214 |             raise ValueError("%s not found." % (input_path))
215 | 
216 |     main(conf_dict, model_path, predict_data_path, predict_result_path)
217 | 


--------------------------------------------------------------------------------
/bin/p_classification/p_data_reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to define a class for p classfication data reader
 19 | """
 20 | 
 21 | import json
 22 | import os
 23 | import codecs
 24 | import sys
 25 | 
 26 | 
 27 | class RcDataReader(object):
 28 |     """
 29 |     class for p classfication data reader
 30 |     """
 31 |     def __init__(self,
 32 |                 wordemb_dict_path,
 33 |                 postag_dict_path,
 34 |                 label_dict_path,
 35 |                 train_data_list_path='',
 36 |                 test_data_list_path=''):
 37 |         self._wordemb_dict_path = wordemb_dict_path
 38 |         self._postag_dict_path = postag_dict_path
 39 |         self._label_dict_path = label_dict_path
 40 |         self.train_data_list_path = train_data_list_path
 41 |         self.test_data_list_path = test_data_list_path
 42 |         self._p_map_eng_dict = {}
 43 |         # load dictionary
 44 |         self._dict_path_dict = {'wordemb_dict': self._wordemb_dict_path,
 45 |                                 'postag_dict': self._postag_dict_path,
 46 |                                 'label_dict': self._label_dict_path}
 47 |         # check if the file exists
 48 |         for input_dict in [wordemb_dict_path, postag_dict_path, \
 49 |                 label_dict_path, train_data_list_path, test_data_list_path]:
 50 |             if not os.path.exists(input_dict):
 51 |                 raise ValueError("%s not found." % (input_dict))
 52 |                 return
 53 | 
 54 |         self._feature_dict = {}
 55 |         self._feature_dict['postag_dict'] = \
 56 |                 self._load_dict_from_file(self._dict_path_dict['postag_dict'])
 57 |         self._feature_dict['wordemb_dict'] = \
 58 |                 self._load_dict_from_file(self._dict_path_dict['wordemb_dict'])
 59 |         self._feature_dict['label_dict'] = \
 60 |                 self._load_label_dict(self._dict_path_dict['label_dict'])
 61 |         self._reverse_dict = {name: self._get_reverse_dict(name) for name in
 62 |                               self._dict_path_dict.keys()}
 63 |         self._reverse_dict['eng_map_p_dict'] = self._reverse_p_eng(self._p_map_eng_dict)
 64 |         self._UNK_IDX = 0
 65 | 
 66 |     def _load_label_dict(self, dict_name):
 67 |         """load label dict from file"""
 68 |         label_dict = {}
 69 |         with codecs.open(dict_name, 'r', 'utf-8') as fr:
 70 |             for idx, line in enumerate(fr):
 71 |                 p, p_eng = line.strip().split('\t')
 72 |                 label_dict[p_eng] = idx
 73 |                 self._p_map_eng_dict[p] = p_eng
 74 |         return label_dict
 75 | 
 76 |     def _load_dict_from_file(self, dict_name, bias=0):
 77 |         """
 78 |         Load vocabulary from file.
 79 |         """
 80 |         dict_result = {}
 81 |         with codecs.open(dict_name, 'r', 'utf-8') as f_dict:
 82 |             for idx, line in enumerate(f_dict):
 83 |                 line = line.strip()
 84 |                 dict_result[line] = idx + bias
 85 |         return dict_result
 86 | 
 87 |     def _cal_mark_slot(self, spo_list, sentence):
 88 |         """
 89 |         Calculate the value of the label 
 90 |         """
 91 |         mark_list = [0] * len(self._feature_dict['label_dict'])
 92 |         for spo in spo_list:
 93 |             predicate = spo['predicate']
 94 |             p_idx = self._feature_dict['label_dict'][self._p_map_eng_dict[predicate]]
 95 |             mark_list[p_idx] = 1
 96 |         return mark_list
 97 | 
 98 |     def _is_valid_input_data(self, input_data):
 99 |         """is the input data valid"""
100 |         try:
101 |             dic = json.loads(input_data)
102 |         except:
103 |             return False
104 |         if "text" not in dic or "postag" not in dic or \
105 |                 type(dic["postag"]) is not list:
106 |             return False
107 |         for item in dic['postag']:
108 |             if "word" not in item or "pos" not in item:
109 |                 return False
110 |         return True
111 |     
112 |     def _get_feed_iterator(self, line, need_input=False, need_label=True):
113 |         # verify that the input format of each line meets the format
114 |         if not self._is_valid_input_data(line):
115 |             print >> sys.stderr, 'Format is error'
116 |             return None
117 |         dic = json.loads(line)
118 |         sentence = dic['text']
119 |         sentence_term_list = [item['word'] for item in dic['postag']]
120 |         sentence_pos_list = [item['pos'] for item in dic['postag']]
121 |         sentence_emb_slot = [self._feature_dict['wordemb_dict'].get(w, self._UNK_IDX) \
122 |                 for w in sentence_term_list]
123 |         sentence_pos_slot = [self._feature_dict['postag_dict'].get(pos, self._UNK_IDX) \
124 |                 for pos in sentence_pos_list]
125 |         if 'spo_list' not in dic:
126 |             label_slot = [0] * len(self._feature_dict['label_dict'])
127 |         else:
128 |             label_slot = self._cal_mark_slot(dic['spo_list'], sentence)
129 |         # verify that the feature is valid
130 |         if len(sentence_emb_slot) == 0 or len(sentence_pos_slot) == 0 \
131 |                 or len(label_slot) == 0:
132 |             return None
133 |         feature_slot = [sentence_emb_slot, sentence_pos_slot]
134 |         input_fields = json.dumps(dic, ensure_ascii=False)
135 |         output_slot = feature_slot
136 |         if need_input:
137 |             output_slot = [input_fields] + output_slot
138 |         if need_label:
139 |             output_slot = output_slot + [label_slot]
140 |         return output_slot
141 | 
142 |     def path_reader(self, data_path, need_input=False, need_label=True):
143 |         """Read data from data_path"""
144 |         self._feature_dict['data_keylist'] = []
145 | 
146 |         def reader():
147 |             """Generator"""
148 |             if os.path.isdir(data_path):
149 |                 input_files = os.listdir(data_path)
150 |                 for data_file in input_files:
151 |                     data_file_path = os.path.join(data_path, data_file)
152 |                     for line in open(data_file_path.strip()):
153 |                         sample_result = self._get_feed_iterator(line.strip(), need_input, need_label)
154 |                         if sample_result is None:
155 |                             continue
156 |                         yield tuple(sample_result)
157 |             elif os.path.isfile(data_path):
158 |                 for line in open(data_path.strip()):
159 |                     sample_result = self._get_feed_iterator(line.strip(), need_input, need_label)
160 |                     if sample_result is None:
161 |                         continue
162 |                     yield tuple(sample_result)
163 | 
164 |         return reader
165 | 
166 |     def get_train_reader(self, need_input=False, need_label=True):
167 |         """Data reader during training"""
168 |         return self.path_reader(self.train_data_list_path, need_input, need_label)
169 | 
170 |     def get_test_reader(self, need_input=True, need_label=True):
171 |         """Data reader during test"""
172 |         return self.path_reader(self.test_data_list_path, need_input, need_label)
173 |     
174 |     def get_predict_reader(self, predict_file_path='', need_input=True, need_label=False):
175 |         """Data reader during predict"""
176 |         return self.path_reader(predict_file_path, need_input, need_label)
177 | 
178 |     def get_dict(self, dict_name):
179 |         """Return dict"""
180 |         if dict_name not in self._feature_dict:
181 |             raise ValueError("dict name %s not found." % (dict_name))
182 |         return self._feature_dict[dict_name]
183 | 
184 |     def get_all_dict_name(self):
185 |         """Get name of all dict"""
186 |         return self._feature_dict.keys()
187 | 
188 |     def get_dict_size(self, dict_name):
189 |         """Return dict length"""
190 |         if dict_name not in self._feature_dict:
191 |             raise ValueError("dict name %s not found." % (dict_name))
192 |         return len(self._feature_dict[dict_name])
193 | 
194 |     def _get_reverse_dict(self, dict_name):
195 |         dict_reverse = {}
196 |         for key, value in self._feature_dict[dict_name].iteritems():
197 |             dict_reverse[value] = key
198 |         return dict_reverse
199 |     
200 |     def _reverse_p_eng(self, dic):
201 |         dict_reverse = {}
202 |         for key, value in dic.iteritems():
203 |             dict_reverse[value] = key
204 |         return dict_reverse
205 | 
206 |     def get_label_output(self, label_idx):
207 |         """Output final label, used during predict and test"""
208 |         dict_name = 'label_dict'
209 |         if len(self._reverse_dict[dict_name]) == 0:
210 |             self._get_reverse_dict(dict_name)
211 |         p_eng = self._reverse_dict[dict_name][label_idx]
212 |         return self._reverse_dict['eng_map_p_dict'][p_eng]
213 | 
214 | 
215 | if __name__ == '__main__':
216 |     # initialize data generator
217 |     data_generator = RcDataReader(
218 |         wordemb_dict_path='./dict/word_idx',
219 |         postag_dict_path='./dict/postag_dict',
220 |         label_dict_path='./dict/p_eng',
221 |         train_data_list_path='./data/train_data.json',
222 |         test_data_list_path='./data/dev_data.json')
223 | 
224 |     # prepare data reader
225 |     ttt = data_generator.get_test_reader()
226 |     for index, features in enumerate(ttt()):
227 |         input_sent, word_idx_list, postag_list, label_list = features
228 |         print input_sent.encode('utf-8')
229 |         print '1st features:', len(word_idx_list), word_idx_list
230 |         print '2nd features:', len(postag_list), postag_list
231 |         print '3rd features:', len(label_list), '\t', label_list
232 | 


--------------------------------------------------------------------------------
/bin/evaluation/calc_pr.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to calculate precision, recall and f1-value 
 19 | of the predicated results.
 20 | """
 21 | import sys
 22 | import json
 23 | import os
 24 | import zipfile
 25 | import traceback
 26 | import argparse
 27 | import ConfigParser
 28 | reload(sys)
 29 | sys.setdefaultencoding('utf-8')
 30 | 
 31 | SUCCESS = 0
 32 | FILE_ERROR = 1
 33 | ENCODING_ERROR = 2
 34 | JSON_ERROR = 3
 35 | SCHEMA_ERROR = 4
 36 | TEXT_ERROR = 5
 37 | CODE_INFO = ['success', 'file_reading_error', 'encoding_error', 'json_parse_error', \
 38 |         'schema_error', 'input_text_not_in_dataset']
 39 | 
 40 | 
 41 | def del_bookname(entity_name):
 42 |     """delete the book name"""
 43 |     if entity_name.startswith(u'《') and entity_name.endswith(u'》'):
 44 |         entity_name = entity_name[1:-1]
 45 |     return entity_name
 46 | 
 47 | 
 48 | def load_predict_result(predict_filename):
 49 |     """Loads the file to be predicted"""
 50 |     predict_result = {}
 51 |     ret_code = SUCCESS
 52 |     try:
 53 |         predict_file_zip = zipfile.ZipFile(predict_filename)
 54 |     except:
 55 |         ret_code = FILE_ERROR
 56 |         return predict_result, ret_code
 57 |     for predict_file in predict_file_zip.namelist():
 58 |         for line in predict_file_zip.open(predict_file):
 59 |             try:
 60 |                 line = line.decode('utf8').strip()
 61 |             except:
 62 |                 ret_code = ENCODING_ERROR
 63 |                 return predict_result, ret_code
 64 |             try:
 65 |                 json_info = json.loads(line)
 66 |             except:
 67 |                 ret_code = JSON_ERROR
 68 |                 return predict_result, ret_code
 69 |             if 'text' not in json_info or 'spo_list' not in json_info:
 70 |                 ret_code = SCHEMA_ERROR
 71 |                 return predict_result, ret_code
 72 |             sent = json_info['text']
 73 |             spo_set = set()
 74 |             for spo_item in json_info['spo_list']:
 75 |                 if type(spo_item) is not dict or 'subject' not in spo_item \
 76 |                         or 'predicate' not in spo_item \
 77 |                         or 'object' not in spo_item or \
 78 |                         not isinstance(spo_item['subject'], basestring) or \
 79 |                         not isinstance(spo_item['object'], basestring):
 80 |                     ret_code = SCHEMA_ERROR
 81 |                     return predict_result, ret_code
 82 |                 s = del_bookname(spo_item['subject'].lower())
 83 |                 o = del_bookname(spo_item['object'].lower())
 84 |                 spo_set.add((s, spo_item['predicate'], o))
 85 |             predict_result[sent] = spo_set
 86 |     return predict_result, ret_code
 87 | 
 88 | 
 89 | def load_test_dataset(golden_filename):
 90 |     """load golden file"""
 91 |     golden_dict = {}
 92 |     ret_code = SUCCESS
 93 |     with open(golden_filename) as gf:
 94 |         for line in gf:
 95 |             try:
 96 |                 line = line.decode('utf8').strip()
 97 |             except:
 98 |                 ret_code = ENCODING_ERROR
 99 |                 return golden_dict, ret_code
100 |             try:
101 |                 json_info = json.loads(line)
102 |             except:
103 |                 ret_code = JSON_ERROR
104 |                 return golden_dict, ret_code
105 |             try:
106 |                 sent = json_info['text']
107 |                 spo_list = json_info['spo_list']
108 |             except:
109 |                 ret_code = SCHEMA_ERROR
110 |                 return golden_dict, ret_code
111 | 
112 |             spo_result = []
113 |             for item in spo_list:
114 |                 o = del_bookname(item['object'].lower())
115 |                 s = del_bookname(item['subject'].lower())
116 |                 spo_result.append((s, item['predicate'], o))
117 |             spo_result = set(spo_result)
118 |             golden_dict[sent] = spo_result
119 |     return golden_dict, ret_code
120 | 
121 | 
122 | def load_dict(dict_filename):
123 |     """load alias dict"""
124 |     alias_dict = {}
125 |     ret_code = SUCCESS
126 |     if dict_filename == "":
127 |         return alias_dict, ret_code
128 |     try:
129 |         with open(dict_filename) as af:
130 |             for line in af:
131 |                 line = line.decode().strip()
132 |                 words = line.split('\t')
133 |                 alias_dict[words[0].lower()] = set()
134 |                 for alias_word in words[1:]:
135 |                     alias_dict[words[0].lower()].add(alias_word.lower())
136 |     except:
137 |         ret_code = FILE_ERROR
138 |     return alias_dict, ret_code
139 | 
140 | 
141 | def is_alias(spo, normalized_predict_spo, alias_dict):
142 |     """whether is alias as the other triples"""
143 |     (s, p, o) = spo
144 |     s_alias_set = alias_dict.get(s, set())
145 |     s_alias_set.add(s)
146 |     o_alias_set = alias_dict.get(o, set())
147 |     o_alias_set.add(o)
148 |     for s_a in s_alias_set:
149 |         for o_a in o_alias_set:
150 |             if (s_a, p, o_a) in normalized_predict_spo:
151 |                 return True
152 |     return False
153 |     
154 | 
155 | def del_duplicate(predict_spo_set, alias_dict):
156 |     """delete synonyms triples in predict result"""
157 |     normalized_predict_spo = set()
158 |     for spo in predict_spo_set:
159 |         if spo not in normalized_predict_spo and \
160 |                 not is_alias(spo, normalized_predict_spo, alias_dict):
161 |             normalized_predict_spo.add(spo)
162 |     return normalized_predict_spo
163 | 
164 | 
165 | def is_recall_correct(golden_spo, predict_spo_set, alias_dict, loc_dict):
166 |     """if the correct spo is recall"""
167 |     if golden_spo in predict_spo_set:
168 |         return True
169 |     (golden_s, golden_p, golden_o) = golden_spo
170 |     loc_golden_o_set = loc_dict.get(golden_o, set())
171 |     for spo in predict_spo_set:
172 |         (s, p, o) = spo
173 |         if p != golden_p:
174 |             continue
175 |         s_alias_set = alias_dict.get(s, set())
176 |         s_alias_set.add(s)
177 |         o_alias_set = alias_dict.get(o, set())
178 |         o_alias_set.add(o)
179 |         if golden_s in s_alias_set and golden_o in o_alias_set:
180 |             return True
181 |         if golden_s in s_alias_set and o in loc_golden_o_set:
182 |             return True
183 |     return False
184 | 
185 | 
186 | def is_spo_correct(spo, golden_spo_set, alias_dict, loc_dict):
187 |     """if the spo is correct"""
188 |     if spo in golden_spo_set:
189 |         return True
190 |     (s, p, o) = spo
191 |     #alias dictionary
192 |     s_alias_set = alias_dict.get(s, set())
193 |     s_alias_set.add(s)
194 |     o_alias_set = alias_dict.get(o, set())
195 |     o_alias_set.add(o)
196 |     for s_a in s_alias_set:
197 |         for o_a in o_alias_set:
198 |             if (s_a, p, o_a) in golden_spo_set:
199 |                 return True
200 |     for golden_spo in golden_spo_set:
201 |         (golden_s, golden_p, golden_o) = golden_spo
202 |         golden_o_set = loc_dict.get(golden_o, set())
203 |         if golden_s in s_alias_set and p == golden_p and o in golden_o_set:
204 |             return True
205 |     return False
206 | 
207 | 
208 | def calc_pr(predict_filename, alias_filename, location_filename, \
209 |         golden_filename):
210 |     """calculate precision, recall, f1"""
211 |     ret_info = {}
212 |     #load location dict
213 |     loc_dict, ret_code = load_dict(location_filename)
214 |     if ret_code != SUCCESS:
215 |         ret_info['errorCode'] = ret_code
216 |         ret_info['errorMsg'] = CODE_INFO[ret_code]
217 |         return ret_info
218 | 
219 |     #load alias dict
220 |     alias_dict, ret_code = load_dict(alias_filename)
221 |     if ret_code != SUCCESS:
222 |         ret_info['errorCode'] = ret_code
223 |         ret_info['errorMsg'] = CODE_INFO[ret_code]
224 |         return ret_info
225 |     #load test dataset
226 |     golden_dict, ret_code = load_test_dataset(golden_filename)
227 |     if ret_code != SUCCESS:
228 |         ret_info['errorCode'] = ret_code
229 |         ret_info['errorMsg'] = CODE_INFO[ret_code]
230 |         return ret_info
231 |     #load predict result
232 |     predict_result, ret_code = load_predict_result(predict_filename)
233 |     if ret_code != SUCCESS:
234 |         ret_info['errorCode'] = ret_code
235 |         ret_info['errorMsg'] = CODE_INFO[ret_code]
236 |         return ret_info
237 |     
238 |     #evaluation
239 |     correct_sum, predict_sum, recall_sum, recall_correct_sum = 0.0, 0.0, 0.0, 0.0
240 |     for sent in golden_dict:
241 |         golden_spo_set = golden_dict[sent]
242 |         predict_spo_set = predict_result.get(sent, set())
243 |         normalized_predict_spo = del_duplicate(predict_spo_set, alias_dict)
244 |         recall_sum += len(golden_spo_set)
245 |         predict_sum += len(normalized_predict_spo)
246 |         for spo in normalized_predict_spo:
247 |             if is_spo_correct(spo, golden_spo_set, alias_dict, loc_dict):
248 |                 correct_sum += 1
249 |         for golden_spo in golden_spo_set:
250 |             if is_recall_correct(golden_spo, predict_spo_set, alias_dict, loc_dict):
251 |                 recall_correct_sum += 1
252 |     print >> sys.stderr, 'correct spo num = ', correct_sum
253 |     print >> sys.stderr, 'submitted spo num = ', predict_sum
254 |     print >> sys.stderr, 'golden set spo num = ', recall_sum
255 |     print >> sys.stderr, 'submitted recall spo num = ', recall_correct_sum
256 |     precision = correct_sum / predict_sum if predict_sum > 0 else 0.0
257 |     recall = recall_correct_sum / recall_sum if recall_sum > 0 else 0.0
258 |     f1 = 2 * precision * recall / (precision + recall) \
259 |             if precision + recall > 0 else 0.0
260 |     precision = round(precision, 4)
261 |     recall = round(recall, 4)
262 |     f1 = round(f1, 4)
263 |     ret_info['errorCode'] = SUCCESS
264 |     ret_info['errorMsg'] = CODE_INFO[SUCCESS]
265 |     ret_info['data'] = []
266 |     ret_info['data'].append({'name': 'precision', 'value': precision})
267 |     ret_info['data'].append({'name': 'recall', 'value': recall})
268 |     ret_info['data'].append({'name': 'f1-score', 'value': f1})
269 |     return ret_info       
270 | 
271 | 
272 | if __name__ == '__main__':
273 |     reload(sys)
274 |     sys.setdefaultencoding('utf-8')
275 |     parser = argparse.ArgumentParser()
276 |     parser.add_argument("--golden_file", type=str,
277 |         help="true spo results", required=True)
278 |     parser.add_argument("--predict_file", type=str,
279 |         help="spo results predicted", required=True)
280 |     parser.add_argument("--loc_file", type=str,
281 |         default='', help="location entities of various granularity")
282 |     parser.add_argument("--alias_file", type=str,
283 |         default='', help="entities alias dictionary")
284 |     args = parser.parse_args()
285 |     golden_filename = args.golden_file
286 |     predict_filename = args.predict_file
287 |     location_filename = args.loc_file
288 |     alias_filename = args.alias_file
289 |     ret_info = calc_pr(predict_filename, alias_filename, location_filename, \
290 |             golden_filename)
291 |     print json.dumps(ret_info)
292 | 


--------------------------------------------------------------------------------
/bin/so_labeling/spo_data_reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | ########################################################
  3 | # Copyright (c) 2019, Baidu Inc. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # imitations under the License.
 16 | ########################################################
 17 | """
 18 | This module to define a class for data reader
 19 | """
 20 | 
 21 | import json
 22 | import random
 23 | import os
 24 | import re
 25 | import codecs
 26 | import sys
 27 | 
 28 | 
 29 | class DataReader(object):
 30 |     """
 31 |     class for data reader 
 32 |     """
 33 |     def __init__(self,
 34 |                 wordemb_dict_path,
 35 |                 postag_dict_path,
 36 |                 label_dict_path,
 37 |                 p_eng_dict_path,
 38 |                 train_data_list_path='',
 39 |                 test_data_list_path=''):
 40 |         self._wordemb_dict_path = wordemb_dict_path
 41 |         self._postag_dict_path = postag_dict_path
 42 |         self._label_dict_path = label_dict_path
 43 |         self.train_data_list_path = train_data_list_path
 44 |         self.test_data_list_path = test_data_list_path
 45 |         self._p_map_eng_dict = {}
 46 |         # load dictionary
 47 | 
 48 |         self._dict_path_dict = {'wordemb_dict': self._wordemb_dict_path,
 49 |                                 'postag_dict': self._postag_dict_path,
 50 |                                 'so_label_dict': self._label_dict_path}
 51 |         # check if the file exists
 52 |         for input_dict in [wordemb_dict_path, postag_dict_path, \
 53 |                 label_dict_path, train_data_list_path, test_data_list_path]:
 54 |             if not os.path.exists(input_dict):
 55 |                 raise ValueError("%s not found." % (input_dict))
 56 |                 return
 57 | 
 58 |         self._feature_dict = {}
 59 |         self._feature_dict = {name: self._load_dict_from_file(self._dict_path_dict[name]) \
 60 |                 for name in self._dict_path_dict.keys()}
 61 |         self._p_map_eng_dict = self._load_p_eng_dict(p_eng_dict_path)
 62 |         self._reverse_dict = {name: self._get_reverse_dict(name) for name in
 63 |                               self._dict_path_dict.keys()}
 64 |         self._UNK_IDX = 0
 65 | 
 66 |     def _load_p_eng_dict(self, dict_name):
 67 |         """load label dict from file"""
 68 |         p_eng_dict = {}
 69 |         with codecs.open(dict_name, 'r', 'utf-8') as fr:
 70 |             for idx, line in enumerate(fr):
 71 |                 p, p_eng = line.strip().split('\t')
 72 |                 p_eng_dict[p] = p_eng
 73 |         return p_eng_dict
 74 | 
 75 |     def _load_dict_from_file(self, dict_name, bias=0):
 76 |         """
 77 |         Load vocabulary from file.
 78 |         """
 79 |         dict_result = {}
 80 |         with codecs.open(dict_name, 'r', 'utf-8') as f_dict:
 81 |             for idx, line in enumerate(f_dict):
 82 |                 line = line.strip()
 83 |                 dict_result[line] = idx + bias
 84 |         return dict_result
 85 | 
 86 |     def _add_item_offset(self, token, sentence):
 87 |         """Get the start and end offset of a token in a sentence"""
 88 |         s_pattern = re.compile(re.escape(token), re.I)
 89 |         token_offset_list = []
 90 |         for m in s_pattern.finditer(sentence):
 91 |             token_offset_list.append((m.group(), m.start(), m.end()))
 92 |         return token_offset_list
 93 |     
 94 |     def _cal_item_pos(self, target_offset, idx_list):
 95 |         """Get the index list where the token is located"""
 96 |         target_idx = []
 97 |         for target in target_offset:
 98 |             start, end = target[1], target[2]
 99 |             cur_idx = []
100 |             for i, idx in enumerate(idx_list):
101 |                 if idx >= start and idx < end:
102 |                     cur_idx.append(i)
103 |             if len(cur_idx) > 0:
104 |                 target_idx.append(cur_idx)
105 |         return target_idx
106 |     
107 |     def _get_token_idx(self, sentence_term_list, sentence):
108 |         """Get the start offset of every token"""
109 |         token_idx_list = []
110 |         start_idx = 0
111 |         for sent_term in sentence_term_list:
112 |             if start_idx >= len(sentence):
113 |                 break
114 |             token_idx_list.append(start_idx)
115 |             start_idx += len(sent_term)
116 |         return token_idx_list
117 | 
118 |     def _cal_mark_slot(self, spo_list, sentence, p, token_idx_list):
119 |         """Calculate the value of the label"""
120 |         mark_list = ['O'] * len(token_idx_list)
121 |         for spo in spo_list:
122 |             predicate = spo['predicate']
123 |             if predicate != p:
124 |                 continue
125 |             sub = spo['subject']
126 |             obj = spo['object']
127 |             s_idx_list = self._cal_item_pos(self._add_item_offset(sub, sentence), \
128 |                     token_idx_list)
129 |             o_idx_list = self._cal_item_pos(self._add_item_offset(obj, sentence), \
130 |                     token_idx_list)
131 |             if len(s_idx_list) == 0 or len(o_idx_list) == 0:
132 |                 continue
133 |             for s_idx in s_idx_list:
134 |                 if len(s_idx) == 1:
135 |                     mark_list[s_idx[0]] = 'B-SUB'
136 |                 elif len(s_idx) == 2:
137 |                     mark_list[s_idx[0]] = 'B-SUB'
138 |                     mark_list[s_idx[1]] = 'E-SUB'
139 |                 else:
140 |                     mark_list[s_idx[0]] = 'B-SUB'
141 |                     mark_list[s_idx[-1]] = 'E-SUB'
142 |                     for idx in range(1, len(s_idx) - 1):
143 |                         mark_list[s_idx[idx]] = 'I-SUB'
144 |             for o_idx in o_idx_list:
145 |                 if len(o_idx) == 1:
146 |                     mark_list[o_idx[0]] = 'B-OBJ'
147 |                 elif len(o_idx) == 2:
148 |                     mark_list[o_idx[0]] = 'B-OBJ'
149 |                     mark_list[o_idx[1]] = 'E-OBJ'
150 |                 else:
151 |                     mark_list[o_idx[0]] = 'B-OBJ'
152 |                     mark_list[o_idx[-1]] = 'E-OBJ'
153 |                     for idx in range(1, len(o_idx) - 1):
154 |                         mark_list[o_idx[idx]] = 'I-OBJ'
155 |         return mark_list
156 | 
157 |     def _is_valid_input_data(self, input_line):
158 |         """is the input data valid"""
159 |         try:
160 |             dic, p = input_line.strip().decode('utf-8').split('\t')
161 |             dic = json.loads(dic)
162 |         except:
163 |             return False
164 |         if "text" not in dic or "postag" not in dic or \
165 |                 type(dic["postag"]) is not list:
166 |             return False
167 |         for item in dic['postag']:
168 |             if "word" not in item or "pos" not in item:
169 |                 return False
170 |         return True
171 | 
172 |     def _get_feed_iterator(self, line, need_input=False, need_label=True):
173 |         # verify that the input format of each line meets the format
174 |         if not self._is_valid_input_data(line):
175 |             print >> sys.stderr, 'Format is error'
176 |             return None
177 |         dic, p = line.strip().decode('utf-8').split('\t')
178 |         dic = json.loads(dic)
179 |         sentence = dic['text']
180 |         sentence_term_list = [item['word'] for item in dic['postag']]
181 |         token_idx_list = self._get_token_idx(sentence_term_list, sentence)
182 |         sentence_pos_list = [item['pos'] for item in dic['postag']]
183 |         sentence_emb_slot = [self._feature_dict['wordemb_dict'].get(w, self._UNK_IDX) \
184 |                 for w in sentence_term_list]
185 |         sentence_pos_slot = [self._feature_dict['postag_dict'].get(pos, self._UNK_IDX) \
186 |                 for pos in sentence_pos_list]
187 |         p_emb_slot = [self._feature_dict['wordemb_dict'].get(p, self._UNK_IDX)] * len(sentence_term_list)
188 |         if 'spo_list' not in dic:
189 |             label_slot = [self._feature_dict['so_label_dict']['O']] * \
190 |                     len(sentence_term_list)
191 |         else:
192 |             label_slot = self._cal_mark_slot(dic['spo_list'], sentence, p, token_idx_list)
193 |             label_slot = [self._feature_dict['so_label_dict'][label] for label in label_slot]
194 |         feature_slot = [sentence_emb_slot, sentence_pos_slot, p_emb_slot]
195 |         input_fields = "\t".join([json.dumps(dic, ensure_ascii=False), p])
196 |         output_slot = feature_slot
197 |         #verify the feature is valid or not
198 |         if len(sentence_emb_slot) == 0 or len(sentence_pos_slot) == 0 \
199 |                 or len(label_slot) == 0:
200 |             return None
201 |         if need_input:
202 |             output_slot = [input_fields] + output_slot
203 |         if need_label:
204 |             output_slot = output_slot + [label_slot]
205 |         return output_slot
206 | 
207 |     def path_reader(self, data_path, need_input=False, need_label=True):
208 |         """Read data from data_path"""
209 |         def reader():
210 |             """Generator"""
211 |             if os.path.isdir(data_path):
212 |                 input_files = os.listdir(data_path)
213 |                 for data_file in input_files:
214 |                     data_file_path = os.path.join(data_path, data_file)
215 |                     for line in open(data_file_path.strip()):
216 |                         sample_result = self._get_feed_iterator(line, need_input, need_label)
217 |                         if sample_result is None:
218 |                             continue
219 |                         yield tuple(sample_result)
220 |             elif os.path.isfile(data_path):
221 |                 for line in open(data_path.strip()):
222 |                     sample_result = self._get_feed_iterator(line, need_input, need_label)
223 |                     if sample_result is None:
224 |                         continue
225 |                     yield tuple(sample_result)
226 | 
227 |         return reader
228 | 
229 |     def get_train_reader(self, need_input=False, need_label=True):
230 |         """Data reader during training"""
231 |         return self.path_reader(self.train_data_list_path, need_input, need_label)
232 | 
233 |     def get_test_reader(self, need_input=True, need_label=True):
234 |         """Data reader during test"""
235 |         return self.path_reader(self.test_data_list_path, need_input, need_label)
236 | 
237 |     def get_predict_reader(self, predict_file_path='', \
238 |             need_input=False, need_label=False):
239 |         """Data reader during predict"""
240 |         return self.path_reader(predict_file_path, need_input, need_label)
241 | 
242 |     def get_dict(self, dict_name):
243 |         """Return dict"""
244 |         if dict_name not in self._feature_dict:
245 |             raise ValueError("dict name %s not found." % (dict_name))
246 |         return self._feature_dict[dict_name]
247 | 
248 |     def get_all_dict_name(self):
249 |         """Get name of all dict"""
250 |         return self._feature_dict.keys()
251 | 
252 |     def get_dict_size(self, dict_name):
253 |         """Return dict length"""
254 |         if dict_name not in self._feature_dict:
255 |             raise ValueError("dict name %s not found." % (dict_name))
256 |         return len(self._feature_dict[dict_name])
257 | 
258 |     def _get_reverse_dict(self, dict_name):
259 |         dict_reverse = {}
260 |         for key, value in self._feature_dict[dict_name].iteritems():
261 |             dict_reverse[value] = key
262 |         return dict_reverse
263 | 
264 |     def get_label_output(self, tensor_list):
265 |         """Output final label, used during predict and test"""
266 |         dict_name = 'so_label_dict'
267 |         if len(self._reverse_dict[dict_name]) == 0:
268 |             self._get_reverse_dict(dict_name)
269 | 
270 |         max_idx = tensor_list.argmax()
271 |         return self._reverse_dict[dict_name].get(max_idx, 0)
272 | 
273 | 
274 | if __name__ == '__main__':
275 |     # initialize data generator
276 |     data_generator = DataReader(
277 |         wordemb_dict_path='./dict/word_idx',
278 |         postag_dict_path='./dict/postag_dict',
279 |         label_dict_path='./dict/label_dict',
280 |         p_eng_dict_path='./dict/p_eng',
281 |         train_data_list_path='./data/train_data.p',
282 |         test_data_list_path='./data/dev_data.p')
283 | 
284 |     # prepare data reader
285 |     ttt = data_generator.get_test_reader()
286 |     for index, features in enumerate(ttt()):
287 |         input_sent, word_idx_list, postag_list, p_idx, label_list = features
288 |         print input_sent.encode('utf-8')
289 |         print '1st features:', len(word_idx_list), word_idx_list
290 |         print '2nd features:', len(postag_list), postag_list
291 |         print '3rd features:', len(p_idx), p_idx
292 |         print '4th features:', len(label_list), label_list
293 | 


--------------------------------------------------------------------------------
/data/test_demo_spo.json:
--------------------------------------------------------------------------------
  1 | {"text": "刺跗逍遥蛛，Philodromus spinitarsis （Simon, 1895），为蜘蛛目(Araneae)逍遥蛛科逍遥蛛属的一种蜘蛛", "spo_list": [{"predicate": "目", "object_type": "目", "subject_type": "生物", "object": "蜘蛛目", "subject": "刺跗逍遥蛛"}]}
  2 | {"text": "《清穿之胤禛福晋》是连载在晋江文学城的小说，作者是青玉铭瑄", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "晋江文学城", "subject": "清穿之胤禛福晋"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "青玉铭瑄", "subject": "清穿之胤禛福晋"}]}
  3 | {"text": "《改变人生的经典故事》是2009年中国纺织出版社出版的图书，作者是王蔚", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "中国纺织出版社", "subject": "改变人生的经典故事"}]}
  4 | {"text": "《十七岁的天空》是陈映蓉执导的爱情喜剧同志电影，由杨佑宁、周群达、金勤、季宏全等主演", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "陈映蓉", "subject": "十七岁的天空"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "金勤", "subject": "十七岁的天空"}]}
  5 | {"text": "《激情创造财富》是2011年6月1日新星出版社出版的图书，作者是克里斯多夫·霍华德", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "新星出版社", "subject": "激情创造财富"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "霍华德", "subject": "激情创造财富"}]}
  6 | {"text": "陈匡辉，男，汉族，江西南康人，1964年9月出生，1984年7月参加工作，1989年12月加入中国共产党，大学学历，农学学士，江西农业大学牧医系畜牧专业毕业", "spo_list": [{"predicate": "毕业院校", "object_type": "学校", "subject_type": "人物", "object": "江西农业大学", "subject": "陈匡辉"}, {"predicate": "民族", "object_type": "Text", "subject_type": "人物", "object": "汉族", "subject": "陈匡辉"}, {"predicate": "国籍", "object_type": "国家", "subject_type": "人物", "object": "中国", "subject": "陈匡辉"}]}
  7 | {"text": "黄花大闺女，那个年代，金钱至上的年代，能抛家（富裕的娘家）舍业（已是名角的京韵大鼓行业）跟着二婚的郭德纲，还带个油瓶郭麒麟，王慧，王慧，不但眼毒，还有大智慧啊", "spo_list": [{"predicate": "父亲", "object_type": "人物", "subject_type": "人物", "object": "郭德纲", "subject": "郭麒麟"}]}
  8 | {"text": "朴信惠比崔泰俊大一岁，作为中央大学戏剧系前后辈，二人出道后就是亲近的朋友，后发展成恋人", "spo_list": [{"predicate": "毕业院校", "object_type": "学校", "subject_type": "人物", "object": "中央大学", "subject": "朴信惠"}]}
  9 | {"text": "朴树，1973年出生于南京，父母都是北大教授，但他却唯独喜爱摇滚，出道多年唱歌很多经典歌曲，比如：《平凡之路》，《那些花儿》，《白桦林》，《生如夏花》，《她在睡梦中》等，都是如今歌坛的经典作品，更是因《平凡之路》一曲成名", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "朴树", "subject": "平凡之路"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "朴树", "subject": "那些花儿"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "朴树", "subject": "白桦林"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "朴树", "subject": "她在睡梦中"}]}
 10 | {"text": "《重生之我们都是好孩子》是翻飞的记忆创作的网络小说，发表于17K小说网", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "翻飞的记忆", "subject": "重生之我们都是好孩子"}]}
 11 | {"text": "《孙伯翔谈艺录》是2005年天津人美出版社出版的图书，作者是刘运峰", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "天津人美出版社", "subject": "孙伯翔谈艺录"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "刘运峰", "subject": "孙伯翔谈艺录"}]}
 12 | {"text": "《契兽师》是一部在17k小说网连载的小说", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "17k小说网", "subject": "契兽师"}]}
 13 | {"text": "2贡献徐建光教授师从我国著名的手外科、显微外科专家、中国工程院院士顾玉东教授", "spo_list": [{"predicate": "国籍", "object_type": "国家", "subject_type": "人物", "object": "中国", "subject": "顾玉东"}]}
 14 | {"text": "近两年，公司先后投资1500万元刘新才男，汉族，1963年10月出生", "spo_list": [{"predicate": "出生日期", "object_type": "Date", "subject_type": "人物", "object": "1963", "subject": "刘新才"}, {"predicate": "民族", "object_type": "Text", "subject_type": "人物", "object": "汉族", "subject": "刘新才"}]}
 15 | {"text": "克吕士科学仪器（上海）有限公司于2015年12月21日在上海市工商局登记成立", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "企业", "object": "2015年12月21日", "subject": "克吕士科学仪器（上海）有限公司"}]}
 16 | {"text": "《非常接触》是由尤小刚执导，巍子、邬倩倩、廖京生、盖丽丽、戴娇倩主演的侦探悬疑剧，于2001年6月在北京电视台首播", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "巍子", "subject": "非常接触"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "戴娇倩", "subject": "非常接触"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "廖京生", "subject": "非常接触"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "邬倩倩", "subject": "非常接触"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "尤小刚", "subject": "非常接触"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "盖丽丽", "subject": "非常接触"}]}
 17 | {"text": "涂序生，男，1942年生于景德镇，祖籍江西南昌", "spo_list": [{"predicate": "出生日期", "object_type": "Date", "subject_type": "人物", "object": "1942", "subject": "涂序生"}, {"predicate": "祖籍", "object_type": "地点", "subject_type": "人物", "object": "江西南昌", "subject": "涂序生"}]}
 18 | {"text": "《冤家路窄：逃婚也有爱》是17k小说网已完结的言情小说，小说作者是萧黎草", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "17k小说网", "subject": "冤家路窄：逃婚也有爱"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "萧黎草", "subject": "冤家路窄：逃婚也有爱"}]}
 19 | {"text": "《河上的爱情》贾樟柯导演2008年在苏州老城拍摄的一部短片", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "贾樟柯", "subject": "河上的爱情"}]}
 20 | {"text": "在如此兼具权威性与流行性的全球华语歌曲排行榜中，同曲不同词的两首歌曲能同时进入全球榜TOP10尚属首次，胡彦斌《男人KTV》以其超人气之势蝉联冠军，其音乐实力绝不容质疑6", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "胡彦斌", "subject": "男人ktv"}]}
 21 | {"text": "《龙虎智多星》是陈勋奇担任导演，李修贤、曾志伟、利智主演的动作犯罪片", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "陈勋奇", "subject": "龙虎智多星"}]}
 22 | {"text": "2009年2月，参演刘江执导，海清、黄海波领衔主演的都市情感剧《媳妇的美好时代》，饰演毛豆豆的爸爸，由此开始被媒体称为“国民父亲”、“温情老爸”", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "黄海波", "subject": "媳妇的美好时代"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "刘江", "subject": "媳妇的美好时代"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "海清", "subject": "媳妇的美好时代"}]}
 23 | {"text": "马庆雄，又名马汉珪，原籍广东潮阳和平镇下寨乡，1927年9月5日出生于泰国", "spo_list": [{"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "广东潮阳和平镇下寨乡", "subject": "马庆雄"}]}
 24 | {"text": "《西乡殿》是日本放送协会（NHK）制作的大河剧，由野田雄介、梶原登城、盆子原诚执导，中园美保担任编剧，铃木亮平主演1，于2018年1月7日首播", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "梶原登城", "subject": "西乡殿"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "日本放送协会", "subject": "西乡殿"}, {"predicate": "编剧", "object_type": "人物", "subject_type": "影视作品", "object": "中园美保", "subject": "西乡殿"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "野田雄介", "subject": "西乡殿"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "盆子原诚", "subject": "西乡殿"}]}
 25 | {"text": "《皇上请休了我》是叶子忻创作的网络小说，发表于起点网", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "叶子忻", "subject": "皇上请休了我"}]}
 26 | {"text": "《爱情合约》是西安天河影视有限公司、陕旅影视集团联出品的都市情感家庭剧，由王军执导，林申、高露等主演1", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "高露", "subject": "爱情合约"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "王军", "subject": "爱情合约"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "西安天河影视有限公司、陕旅影视集团", "subject": "爱情合约"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "林申", "subject": "爱情合约"}]}
 27 | {"text": "《青春烈火》原名雅典娜女神，是由文化中国、强视传媒、博海影视、博纳影业联合出品的一部民国激战年代情感大剧，由叶璇、刘恩佑、莫小棋、巫迪文李蓓蕾领衔主演，著名动作导演谭俏执导，故事背景发生在1932年的上海租界，讲述了一位以“雅典娜”为代号的“叛谍狂花”游走于国仇与家恨之间浴血抗战的传奇故事", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "巫迪文", "subject": "青春烈火"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "谭俏", "subject": "青春烈火"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "博纳", "subject": "青春烈火"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "文化中国", "subject": "青春烈火"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "巫迪文", "subject": "雅典娜女神"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "李蓓蕾", "subject": "雅典娜女神"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "谭俏", "subject": "雅典娜女神"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "文化中国", "subject": "雅典娜女神"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "莫小棋", "subject": "雅典娜女神"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "强视", "subject": "青春烈火"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "莫小棋", "subject": "青春烈火"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "刘恩佑", "subject": "雅典娜女神"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "刘恩佑", "subject": "青春烈火"}]}
 28 | {"text": "《韩信》是由中国台湾导演陈聪明执导，蔡天送 、蔡天琳编剧，杨丽花、陈小咪、司马玉娇、小凤仙、唐美云、青蓉、柯玉枝、潘丽丽、陈亚兰、吴梅芳、吴翠娥主演的历史、传记类电视歌仔戏，《韩信》是1985年的中国台湾拍摄的一部电视歌仔戏", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "司马玉娇", "subject": "韩信"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "陈聪明", "subject": "韩信"}, {"predicate": "编剧", "object_type": "人物", "subject_type": "影视作品", "object": "蔡天送", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "吴翠娥", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "陈小咪", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "吴梅芳", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "青蓉", "subject": "韩信"}, {"predicate": "上映时间", "object_type": "Date", "subject_type": "影视作品", "object": "1985年", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "唐美云", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "陈亚兰", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "柯玉枝", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "潘丽丽", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "小凤仙", "subject": "韩信"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "杨丽花", "subject": "韩信"}, {"predicate": "编剧", "object_type": "人物", "subject_type": "影视作品", "object": "蔡天琳", "subject": "韩信"}]}
 29 | {"text": "《爱的种子》是林子祥的音乐作品，郑国江作曲，收录在《好气连祥》专辑中", "spo_list": [{"predicate": "作曲", "object_type": "人物", "subject_type": "歌曲", "object": "郑国江", "subject": "爱的种子"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "林子祥", "subject": "爱的种子"}, {"predicate": "所属专辑", "object_type": "音乐专辑", "subject_type": "歌曲", "object": "好气连祥", "subject": "爱的种子"}]}
 30 | {"text": "由本·阿弗莱克执导的新片《逃离德黑兰》就是讲述了FBI假冒成好莱坞电影拍摄剧组解救人质的故事", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "本·阿弗莱克", "subject": "逃离德黑兰"}]}
 31 | {"text": "原唱蒋姗倍现场演唱《红尘情歌》", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "蒋姗倍", "subject": "红尘情歌"}]}
 32 | {"text": "暗妃是由无计春留住创作的，已完结与潇湘书院", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "无计春留住", "subject": "暗妃"}, {"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "潇湘书院", "subject": "暗妃"}]}
 33 | {"text": "《古世》是连载于云中书城的网络小说，作者是未弱", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "未弱", "subject": "古世"}, {"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "云中书城", "subject": "古世"}]}
 34 | {"text": "2016年蒋欣凭借在《欢乐颂》中饰演的“凤凰女”樊胜美再次成为霸占各大网站热搜的话题之王", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "蒋欣", "subject": "欢乐颂"}]}
 35 | {"text": "甜蜜明星夫妻6：林志颖、陈若仪，明星与粉丝之恋", "spo_list": [{"predicate": "丈夫", "object_type": "人物", "subject_type": "人物", "object": "林志颖", "subject": "陈若仪"}]}
 36 | {"text": "《我是传说》是韩国SBS电视台于2010年8月2日首播的月火剧", "spo_list": [{"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "韩国SBS电视台", "subject": "我是传说"}, {"predicate": "上映时间", "object_type": "Date", "subject_type": "影视作品", "object": "2010年8月2日", "subject": "我是传说"}]}
 37 | {"text": "《帝国文明之崛起》是文武不就生所著的一本图书，小说类型是游戏网游，在起点中文网连载", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "起点中文网", "subject": "帝国文明之崛起"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "文武不就生", "subject": "帝国文明之崛起"}]}
 38 | {"text": "杨镛，浙江湖州双林人，研习书画二十余载，喜作花鸟，兼习书法", "spo_list": [{"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "浙江湖州双林", "subject": "杨镛"}]}
 39 | {"text": "威利·考利-斯坦（Willie Cauley-Stein），1993年8月18日出生于美国堪萨斯州斯皮尔维尔（Spearville, KS），美国职业篮球运动员，司职中锋，效力于NBA萨克拉门托国王队", "spo_list": [{"predicate": "出生日期", "object_type": "Date", "subject_type": "人物", "object": "1993年8月18日", "subject": "威利·考利-斯坦"}, {"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "美国堪萨斯州斯皮尔维尔", "subject": "威利·考利-斯坦"}, {"predicate": "国籍", "object_type": "国家", "subject_type": "人物", "object": "美国", "subject": "威利·考利-斯坦"}]}
 40 | {"text": "《为了爱梦一生》是一首高胜美演唱的歌曲，由陈乐融作词、许冠杰作曲，由陈进兴、尤景仰编曲，收录于专辑《经典金选2 爱上一个不回家的人》中，由上格唱片于1992年3月1日发行", "spo_list": [{"predicate": "作词", "object_type": "人物", "subject_type": "歌曲", "object": "陈乐融", "subject": "为了爱梦一生"}, {"predicate": "所属专辑", "object_type": "音乐专辑", "subject_type": "歌曲", "object": "经典金选2 爱上一个不回家的人", "subject": "为了爱梦一生"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "高胜美", "subject": "为了爱梦一生"}, {"predicate": "作曲", "object_type": "人物", "subject_type": "歌曲", "object": "许冠杰", "subject": "为了爱梦一生"}]}
 41 | {"text": "蝾螈科：有尾目的1科，通常全变态，偶有童体型，均有肺（个别属退化或残迹状），睾丸分叶，肛腺三对，体内受精", "spo_list": [{"predicate": "目", "object_type": "目", "subject_type": "生物", "object": "有尾目", "subject": "蝾螈科"}]}
 42 | {"text": "《恋爱操作 01》是2007年角川出版社出版的图书，作者是莲川爱", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "角川", "subject": "恋爱操作 01"}]}
 43 | {"text": "《潜能量（管理者掌权的14个关键点）》是2003年中国华侨出版社出版的图书，作者是方略", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "中国华侨出版社", "subject": "潜能量（管理者掌权的14个关键点）"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "方略", "subject": "潜能量（管理者掌权的14个关键点）"}]}
 44 | {"text": "和妈妈李丽珍一起生活的许倚榕，如今也有19岁，她长得亭亭玉立，摆脱老爸许愿基因，越来越像蜜桃老妈，果然女大十八变", "spo_list": [{"predicate": "母亲", "object_type": "人物", "subject_type": "人物", "object": "李丽珍", "subject": "许倚榕"}, {"predicate": "父亲", "object_type": "人物", "subject_type": "人物", "object": "许愿", "subject": "许倚榕"}]}
 45 | {"text": "叶亹（生卒年月不详），字世黾，号龟峰，明代福建诏安县三都宝桥（深桥镇上营村）人", "spo_list": [{"predicate": "字", "object_type": "Text", "subject_type": "历史人物", "object": "世黾", "subject": "叶亹"}, {"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "福建诏安县三都宝桥", "subject": "叶亹"}, {"predicate": "号", "object_type": "Text", "subject_type": "历史人物", "object": "龟峰", "subject": "叶亹"}]}
 46 | {"text": "秦哲为 男，汉族，1936年12月生，甘肃张掖币人", "spo_list": [{"predicate": "民族", "object_type": "Text", "subject_type": "人物", "object": "汉族", "subject": "秦哲为"}, {"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "甘肃张掖", "subject": "秦哲为"}]}
 47 | {"text": "柞新小卷蛾是昆虫纲、鳞翅目、卷蛾科、新小卷蛾亚科、新小卷蛾属的动物", "spo_list": [{"predicate": "目", "object_type": "目", "subject_type": "生物", "object": "鳞翅目", "subject": "卷蛾科"}, {"predicate": "目", "object_type": "目", "subject_type": "生物", "object": "鳞翅目", "subject": "柞新小卷蛾"}]}
 48 | {"text": "盒果藤属，旋花科，茄目，约25种，分布于热带地区，我国有盒果藤1种，产台湾、广东、广西和云南", "spo_list": [{"predicate": "目", "object_type": "目", "subject_type": "生物", "object": "茄目", "subject": "旋花科"}, {"predicate": "目", "object_type": "目", "subject_type": "生物", "object": "茄目", "subject": "盒果藤属"}]}
 49 | {"text": "爱施德是深圳市爱施德股份有限公司的简称，公司成立于1998年6月，注册资本999100000元，是一家致力于全球最新移动通讯产品、数码电子产品的引进和推广的上市公司（股票代码：002416）", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "企业", "object": "1998年6月", "subject": "爱施德"}]}
 50 | {"text": "贺聪是葛优的媳妇，年青时刻是个美术老师，是葛优成名前意识的", "spo_list": [{"predicate": "丈夫", "object_type": "人物", "subject_type": "人物", "object": "葛优", "subject": "贺聪"}]}
 51 | {"text": "2009年，亚历克斯·梅金将这个短片重新加工和扩充，拍摄成一部标准长片版《旅馆大堂对面》", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "亚历克斯·梅金", "subject": "旅馆大堂对面"}]}
 52 | {"text": "一位女舞者上场，满脸迷妹相的跟韬韬说：我带来一首你的歌，《我是大主宰》黄子韬：这歌我已经不喜欢了", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "黄子韬", "subject": "我是大主宰"}]}
 53 | {"text": "《林蛙养殖》是2006年中国农业出版社出版的图书，作者是刘学龙", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "刘学龙", "subject": "林蛙养殖"}, {"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "中国农业出版社", "subject": "林蛙养殖"}]}
 54 | {"text": "北京爱瑞森园林绿化有限公司于2012年02月20日在平谷分局登记成立", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "企业", "object": "2012年02月20日", "subject": "北京爱瑞森园林绿化有限公司"}]}
 55 | {"text": "基本资料球员:博格道夫  生日:1979年2月26日  身高:185cm  体重:81kg  加盟日期:1995-7-1  球衣号码:999  效力球队:布伦瑞克  场上位置:中场球队阵容前锋  库姆贝拉,墨克尔,兰迪,吉安卢卡", "spo_list": [{"predicate": "出生日期", "object_type": "Date", "subject_type": "人物", "object": "1979年2月26日", "subject": "博格道夫"}, {"predicate": "身高", "object_type": "Number", "subject_type": "人物", "object": "185cm", "subject": "博格道夫"}]}
 56 | {"text": "郭晓玲，台湾著名企业家郭台铭与林淑如的女儿", "spo_list": [{"predicate": "丈夫", "object_type": "人物", "subject_type": "人物", "object": "郭台铭", "subject": "林淑如"}, {"predicate": "父亲", "object_type": "人物", "subject_type": "人物", "object": "郭台铭", "subject": "郭晓玲"}, {"predicate": "母亲", "object_type": "人物", "subject_type": "人物", "object": "林淑如", "subject": "郭晓玲"}]}
 57 | {"text": "《稍安勿躁》是由宋金笑执导，宋金笑、范逸臣、陈冲、王宏伟等主演的一部粉红色喜剧片，于2012年9月7日全国上映", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "宋金笑", "subject": "稍安勿躁"}, {"predicate": "上映时间", "object_type": "Date", "subject_type": "影视作品", "object": "2012年9月7日", "subject": "稍安勿躁"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "范逸臣", "subject": "稍安勿躁"}]}
 58 | {"text": "江淑娜鲁冰花作词：姚谦　作曲：陈阳", "spo_list": [{"predicate": "作曲", "object_type": "人物", "subject_type": "歌曲", "object": "陈阳", "subject": "鲁冰花"}, {"predicate": "作词", "object_type": "人物", "subject_type": "歌曲", "object": "姚谦", "subject": "鲁冰花"}]}
 59 | {"text": "2004年再次操刀，徐静蕾自编、自导、自演的电影《一个陌生女人的来信》获得第52届西班牙圣塞巴斯蒂安电影节最佳导演奖", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "徐静蕾", "subject": "一个陌生女人的来信"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "徐静蕾", "subject": "一个陌生女人的来信"}]}
 60 | {"text": "寂寞飞行是江美琪演唱的歌曲，收录在其专辑《想起》中", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "江美琪", "subject": "寂寞飞行"}, {"predicate": "所属专辑", "object_type": "音乐专辑", "subject_type": "歌曲", "object": "想起", "subject": "寂寞飞行"}]}
 61 | {"text": "《一个女人的史诗》是严歌苓所著的一本小说", "spo_list": [{"predicate": "编剧", "object_type": "人物", "subject_type": "影视作品", "object": "严歌苓", "subject": "一个女人的史诗"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "严歌苓", "subject": "一个女人的史诗"}]}
 62 | {"text": "吴标，字召臣，号雨亭，江苏常熟人，清代画家", "spo_list": [{"predicate": "号", "object_type": "Text", "subject_type": "历史人物", "object": "雨亭", "subject": "吴标"}, {"predicate": "字", "object_type": "Text", "subject_type": "历史人物", "object": "召臣", "subject": "吴标"}, {"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "江苏常熟", "subject": "吴标"}]}
 63 | {"text": "《名利你得到》是一首由卢国沾填词，顾嘉辉谱曲，林子祥演唱的歌曲", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "林子祥", "subject": "名利你得到"}, {"predicate": "作曲", "object_type": "人物", "subject_type": "歌曲", "object": "顾嘉辉", "subject": "名利你得到"}, {"predicate": "作词", "object_type": "人物", "subject_type": "歌曲", "object": "卢国沾", "subject": "名利你得到"}]}
 64 | {"text": "《穿越之灭世神妃》是起点中文网连载的一部古装言情小说，作者是我爱zpp", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "我爱zpp", "subject": "穿越之灭世神妃"}, {"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "起点中文网", "subject": "穿越之灭世神妃"}]}
 65 | {"text": "《战逆九川》是连载于17k小说网的小说，作者是落雨尘缘", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "17k小说网", "subject": "战逆九川"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "落雨尘缘", "subject": "战逆九川"}]}
 66 | {"text": "高晓松就不同了，虽然他的歌在我们青春年少里留下了很深的印象，但现在他真的是变化太大了，无论是歌曲还是在奇葩说中的表现，都和少年时候那个同桌的你作词作曲的人对不上号啊", "spo_list": [{"predicate": "作曲", "object_type": "人物", "subject_type": "歌曲", "object": "高晓松", "subject": "同桌的你"}, {"predicate": "作词", "object_type": "人物", "subject_type": "歌曲", "object": "高晓松", "subject": "同桌的你"}]}
 67 | {"text": "3、《柳叶刀》是张建栋导演与王学兵第六次合作", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "张建栋", "subject": "柳叶刀"}]}
 68 | {"text": "《没有播出的故事》是2001年中国对外翻译出版公司出版的图书，作者是李仲生", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "李仲生", "subject": "没有播出的故事"}, {"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "中国对外翻译出版公司", "subject": "没有播出的故事"}]}
 69 | {"text": "中国国际企业合作公司成立于1991年2月，是经中华人民共和国国务院批准成立，国务院国资委直属的全民所有制大型涉外企业，现并入中国诚通控股集团，是从事国际经济技术交流、进出口贸易、展览广告及人力资源咨询与服务业务的综合性经济实体", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "企业", "object": "1991年2月", "subject": "中国国际企业合作公司"}]}
 70 | {"text": "《歌手》迎来终极踢馆歌手，霍尊带着《伊人如梦》《卷珠帘》来了", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "霍尊", "subject": "卷珠帘"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "霍尊", "subject": "伊人如梦"}]}
 71 | {"text": "近日，刘恺威被媒体拍到与父母和女儿“小糯米”在深湾游艇俱乐部共吃开年饭，全程不见妻子杨幂的踪影，当媒体问及刘恺威为什么杨幂不在场时，刘恺威只回了句“新年快乐，多谢”，就不再回应", "spo_list": [{"predicate": "父亲", "object_type": "人物", "subject_type": "人物", "object": "刘恺威", "subject": "小糯米"}, {"predicate": "丈夫", "object_type": "人物", "subject_type": "人物", "object": "刘恺威", "subject": "杨幂"}]}
 72 | {"text": "校园生活网创始人、站长，互联网爱好者，2011年6月创办济南校园生活网", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "机构", "object": "2011年6月", "subject": "校园生活网"}]}
 73 | {"text": "《使徒行者》是由嘉映影业、邵氏兄弟国际影业、烈火影业等联合出品的警匪片，由文伟鸿执导，王晶、乐易玲监制，关皓月担任编剧，古天乐、张家辉、吴镇宇、佘诗曼、许绍雄、李光洁、张慧雯等主演", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "佘诗曼", "subject": "使徒行者"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "李光洁", "subject": "使徒行者"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "嘉映影业", "subject": "使徒行者"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "文伟鸿", "subject": "使徒行者"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "吴镇宇", "subject": "使徒行者"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "张家辉", "subject": "使徒行者"}, {"predicate": "出品公司", "object_type": "企业", "subject_type": "影视作品", "object": "烈火影业", "subject": "使徒行者"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "张慧雯", "subject": "使徒行者"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "许绍雄", "subject": "使徒行者"}, {"predicate": "制片人", "object_type": "人物", "subject_type": "影视作品", "object": "乐易玲", "subject": "使徒行者"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "古天乐", "subject": "使徒行者"}, {"predicate": "编剧", "object_type": "人物", "subject_type": "影视作品", "object": "关皓月", "subject": "使徒行者"}]}
 74 | {"text": "刘一鸣，男，生于1958年，江苏省苏州市相城区阳澄湖人，自小随祖父、父亲学习雕刻", "spo_list": [{"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "江苏省苏州市相城区", "subject": "刘一鸣"}, {"predicate": "出生日期", "object_type": "Date", "subject_type": "人物", "object": "1958", "subject": "刘一鸣"}]}
 75 | {"text": "《我为书狂》是2004年由新世界出版社出版的一部作品，作者是石涛", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "新世界出版社", "subject": "我为书狂"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "石涛", "subject": "我为书狂"}]}
 76 | {"text": "《末世之超级融合》是在起点中文网上连载的一部科幻幻想小说，作者是龙蛋蛋小坏蛋", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "起点中文网", "subject": "末世之超级融合"}, {"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "起点中文网", "subject": "超级融合"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "龙蛋蛋小坏蛋", "subject": "末世之超级融合"}]}
 77 | {"text": "《丽兹与迪克》是由劳埃德·克莱默执导的传记片，琳赛·洛翰、格兰特·鲍尔等参加演出", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "琳赛·洛翰", "subject": "丽兹与迪克"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "劳埃德·克莱默", "subject": "丽兹与迪克"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "格兰特·鲍尔", "subject": "丽兹与迪克"}]}
 78 | {"text": "《科技统计学》是1994年中国人民大学出版社出版的图书，作者是孙学范", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "中国人民大学出版社", "subject": "科技统计学"}]}
 79 | {"text": "爱到底 之《第六号刘海》  导演：黄子佼  主要演员：  标签：三声有幸 第六号刘海 华山-24  四个小故事组成本片，方文山、黄子佼、陈奕先、九把刀执导，二十多位当红明星首次联手诠释多段式不同风格的爱情故事", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "黄子佼", "subject": "第六号刘海"}]}
 80 | {"text": "华帝燃气热水器华帝燃具股份有限公司成立于2001年11月28日，主要从事生产和销售燃气用具、厨房用具、家用电器及企业自有资产、进出口经营业务，华帝产品已形成燃气灶具、热水器（电热水器、燃气热水器和太阳能热水器）、抽油烟机、消毒柜、橱柜等系列产品为主的500多个品种，燃气灶具连续十一年中国产销量，燃气热水器、抽油烟机分别进入全国行业三强", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "企业", "object": "2001年11月28日", "subject": "华帝"}]}
 81 | {"text": "1987年，被人誉为“大熊猫之父”的张和民被公派到美国爱达荷大学攻读野生动物与自然保护区管理硕士学位", "spo_list": [{"predicate": "毕业院校", "object_type": "学校", "subject_type": "人物", "object": "爱达荷大学", "subject": "张和民"}]}
 82 | {"text": "细数出轨风波，首先就要说说文章和姚笛了，可以说瞬时间就击垮了文章，而前途大好的姚笛，更是一落千丈，当然这里少不了白百何还有陈羽凡了，当时的轰炸力也是不小于李小璐和贾乃亮的，这不事隔这么久了，白百何仍在沉寂中", "spo_list": [{"predicate": "丈夫", "object_type": "人物", "subject_type": "人物", "object": "陈羽凡", "subject": "白百何"}]}
 83 | {"text": "《Linux培训教程》是2011年清华大学出版社出版的图书，作者是何明", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "清华大学出版社", "subject": "Linux培训教程"}]}
 84 | {"text": "网友在飞机上偶遇两人周迅和李亚鹏曾经是情侣关系，王菲和李亚鹏曾结为夫妻，周迅与窦鹏曾为情侣，窦鹏的兄弟窦唯与王菲曾为夫妻，但是王菲的女儿窦靖童如今与周迅成了闺蜜，不禁让人直呼：娱乐圈真的太小了", "spo_list": [{"predicate": "父亲", "object_type": "人物", "subject_type": "人物", "object": "窦唯", "subject": "窦靖童"}, {"predicate": "母亲", "object_type": "人物", "subject_type": "人物", "object": "王菲", "subject": "窦靖童"}]}
 85 | {"text": "《中国最美旅游线路》是2012年北京航空航天大学出版社出版的图书", "spo_list": [{"predicate": "出版社", "object_type": "出版社", "subject_type": "书籍", "object": "北京航空航天大学出版社", "subject": "中国最美旅游线路"}]}
 86 | {"text": "《凤囚凰》刘宋篇完结之时，容止计谋跳崖，一则让楚玉认清了对他的爱，二则消除了大魏太后对 其的后顾之忧", "spo_list": [{"predicate": "主角", "object_type": "人物", "subject_type": "网络小说", "object": "容止", "subject": "凤囚凰"}]}
 87 | {"text": "《一见如故》是陶钰玉2014年推出的第三张个人新专辑《因为爱你》里的一首歌曲", "spo_list": [{"predicate": "所属专辑", "object_type": "音乐专辑", "subject_type": "歌曲", "object": "因为爱你", "subject": "一见如故"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "陶钰玉", "subject": "一见如故"}]}
 88 | {"text": "姚庆和 男，1933年11月2日出生，辽宁省黑山县人，核工业中南地勘局高级工程师（研究员级）", "spo_list": [{"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "辽宁省黑山县", "subject": "姚庆和"}, {"predicate": "出生日期", "object_type": "Date", "subject_type": "人物", "object": "1933年11月2日", "subject": "姚庆和"}]}
 89 | {"text": "内容简介  《中国教育学史遗稿》是著名教育学专家陈元晖遗著", "spo_list": [{"predicate": "国籍", "object_type": "国家", "subject_type": "人物", "object": "中国", "subject": "陈元晖"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "陈元晖", "subject": "中国教育学史遗稿"}]}
 90 | {"text": "1997年06月网易公司成立公司正式推出全中文搜索引擎服务", "spo_list": [{"predicate": "成立日期", "object_type": "Date", "subject_type": "企业", "object": "1997年06月", "subject": "网易公司"}]}
 91 | {"text": "《马后炮》是南方电视台经济频道的一档新闻评论的节目，由马志海主持，自2005年开播，广受观众欢迎，播出时间为周一至周五晚上22点56到23点12分", "spo_list": [{"predicate": "主持人", "object_type": "人物", "subject_type": "电视综艺", "object": "马志海", "subject": "马后炮"}]}
 92 | {"text": "《魔熊之雷霆咆哮》是百度文学旗下纵横中文网签约作家木雁创作的一部异世大陆小说，小说已于2014-02-09正式发布", "spo_list": [{"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "木雁", "subject": "魔熊之雷霆咆哮"}, {"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "纵横中文网", "subject": "魔熊之雷霆咆哮"}]}
 93 | {"text": "《手拉手》是由吴建飞演唱的歌曲之一", "spo_list": [{"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "吴建飞", "subject": "手拉手"}]}
 94 | {"text": "钟明，男，广东蕉岭县人，汉族，1967年1月生，学历大学，农业推广硕士，1990年7月参加工作，1990年5月加入中国共产党", "spo_list": [{"predicate": "民族", "object_type": "Text", "subject_type": "人物", "object": "汉族", "subject": "钟明"}, {"predicate": "出生地", "object_type": "地点", "subject_type": "人物", "object": "广东蕉岭", "subject": "钟明"}, {"predicate": "民族", "object_type": "Text", "subject_type": "人物", "object": "汉", "subject": "钟明"}]}
 95 | {"text": "《澳门往事》，本剧是由徐子牧导演，吴建飞 、吕晓霖 、刘述、边熙蕾、崔心心等主演", "spo_list": [{"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "徐子牧", "subject": "澳门往事"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "吕晓霖", "subject": "澳门往事"}, {"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "吴建飞", "subject": "澳门往事"}]}
 96 | {"text": "徐志飞，1949年10月出生，1977年毕业于第二军医大学军医系，1988年获胸心外科硕士学位", "spo_list": [{"predicate": "毕业院校", "object_type": "学校", "subject_type": "人物", "object": "于第二军医大学", "subject": "徐志飞"}]}
 97 | {"text": "该剧讲述的是大唐贞观年间，苏凤父子反唐，薛仁贵元帅奉命征剿却惨遭奸人所害，其子薛丁山请缨，以无敌之势夺得帅印，与母亲及妹妹出发西征的故事", "spo_list": [{"predicate": "父亲", "object_type": "人物", "subject_type": "人物", "object": "薛仁贵", "subject": "薛丁山"}]}
 98 | {"text": "《谓我》是收录在古风网络歌手小曲儿2012年12月15日发行的原创专辑《曲倾天下》中的一首歌曲，由Pavane A·G Orison（小鱼萝莉作词），白蔓作曲1", "spo_list": [{"predicate": "作词", "object_type": "人物", "subject_type": "歌曲", "object": "Pavane A·G Orison", "subject": "谓我"}, {"predicate": "所属专辑", "object_type": "音乐专辑", "subject_type": "歌曲", "object": "曲倾天下", "subject": "谓我"}, {"predicate": "作曲", "object_type": "人物", "subject_type": "歌曲", "object": "白蔓", "subject": "谓我"}, {"predicate": "歌手", "object_type": "人物", "subject_type": "歌曲", "object": "小曲儿", "subject": "谓我"}]}
 99 | {"text": "《魔箭》是连载于17k小说网的异界大陆类型小说，作者是风哈哈", "spo_list": [{"predicate": "连载网站", "object_type": "网站", "subject_type": "网络小说", "object": "17k小说网", "subject": "魔箭"}, {"predicate": "作者", "object_type": "人物", "subject_type": "图书作品", "object": "风哈哈", "subject": "魔箭"}]}
100 | {"text": "《开往名古屋的末班列车》是2012年首播的电视剧，由神道俊浩导演，松井玲奈主演", "spo_list": [{"predicate": "主演", "object_type": "人物", "subject_type": "影视作品", "object": "松井玲奈", "subject": "开往名古屋的末班列车"}, {"predicate": "导演", "object_type": "人物", "subject_type": "影视作品", "object": "神道俊浩", "subject": "开往名古屋的末班列车"}]}
101 | 


--------------------------------------------------------------------------------
/data/test_demo.json:
--------------------------------------------------------------------------------
  1 | {"postag": [{"word": "和", "pos": "p"}, {"word": "妈妈", "pos": "n"}, {"word": "李丽珍", "pos": "nr"}, {"word": "一起", "pos": "d"}, {"word": "生活", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "许倚榕", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "如今", "pos": "t"}, {"word": "也有", "pos": "v"}, {"word": "19岁", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "她", "pos": "r"}, {"word": "长", "pos": "v"}, {"word": "得", "pos": "u"}, {"word": "亭亭玉立", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "摆脱", "pos": "v"}, {"word": "老爸", "pos": "n"}, {"word": "许愿", "pos": "v"}, {"word": "基因", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "越来越", "pos": "d"}, {"word": "像", "pos": "v"}, {"word": "蜜桃", "pos": "n"}, {"word": "老妈", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "果然", "pos": "d"}, {"word": "女大十八变", "pos": "nz"}], "text": "和妈妈李丽珍一起生活的许倚榕，如今也有19岁，她长得亭亭玉立，摆脱老爸许愿基因，越来越像蜜桃老妈，果然女大十八变"}
  2 | {"postag": [{"word": "华帝燃气热水器华帝燃具股份有限公司", "pos": "nt"}, {"word": "成立", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "2001年11月28日", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "主要", "pos": "ad"}, {"word": "从事", "pos": "v"}, {"word": "生产", "pos": "vn"}, {"word": "和", "pos": "c"}, {"word": "销售", "pos": "vn"}, {"word": "燃气", "pos": "n"}, {"word": "用具", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "厨房", "pos": "n"}, {"word": "用具", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "家用电器", "pos": "n"}, {"word": "及", "pos": "c"}, {"word": "企业", "pos": "n"}, {"word": "自有", "pos": "v"}, {"word": "资产", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "进出口", "pos": "vn"}, {"word": "经营", "pos": "vn"}, {"word": "业务", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "华帝", "pos": "nt"}, {"word": "产品", "pos": "n"}, {"word": "已", "pos": "d"}, {"word": "形成", "pos": "v"}, {"word": "燃气", "pos": "n"}, {"word": "灶具", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "热水器", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "电", "pos": "n"}, {"word": "热水器", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "燃气", "pos": "vn"}, {"word": "热水器", "pos": "n"}, {"word": "和", "pos": "c"}, {"word": "太阳能", "pos": "n"}, {"word": "热水器", "pos": "n"}, {"word": "）", "pos": "w"}, {"word": "、", "pos": "w"}, {"word": "抽油烟机", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "消毒柜", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "橱柜", "pos": "n"}, {"word": "等", "pos": "u"}, {"word": "系列", "pos": "n"}, {"word": "产品", "pos": "n"}, {"word": "为主", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "500多个", "pos": "m"}, {"word": "品种", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "燃气", "pos": "n"}, {"word": "灶具", "pos": "n"}, {"word": "连续", "pos": "a"}, {"word": "十一年", "pos": "m"}, {"word": "中国", "pos": "ns"}, {"word": "产", "pos": "v"}, {"word": "销量", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "燃气", "pos": "n"}, {"word": "热水器", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "抽油烟机", "pos": "n"}, {"word": "分别", "pos": "d"}, {"word": "进入", "pos": "v"}, {"word": "全国", "pos": "n"}, {"word": "行业", "pos": "n"}, {"word": "三强", "pos": "n"}], "text": "华帝燃气热水器华帝燃具股份有限公司成立于2001年11月28日，主要从事生产和销售燃气用具、厨房用具、家用电器及企业自有资产、进出口经营业务，华帝产品已形成燃气灶具、热水器（电热水器、燃气热水器和太阳能热水器）、抽油烟机、消毒柜、橱柜等系列产品为主的500多个品种，燃气灶具连续十一年中国产销量，燃气热水器、抽油烟机分别进入全国行业三强"}
  3 | {"postag": [{"word": "细数", "pos": "v"}, {"word": "出轨", "pos": "vn"}, {"word": "风波", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "首先", "pos": "r"}, {"word": "就要", "pos": "v"}, {"word": "说说", "pos": "v"}, {"word": "文章", "pos": "n"}, {"word": "和", "pos": "c"}, {"word": "姚笛", "pos": "nr"}, {"word": "了", "pos": "xc"}, {"word": "，", "pos": "w"}, {"word": "可以说", "pos": "v"}, {"word": "瞬", "pos": "vn"}, {"word": "时间", "pos": "n"}, {"word": "就", "pos": "d"}, {"word": "击", "pos": "v"}, {"word": "垮", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "文章", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "而", "pos": "c"}, {"word": "前途", "pos": "n"}, {"word": "大好", "pos": "a"}, {"word": "的", "pos": "u"}, {"word": "姚笛", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "更", "pos": "d"}, {"word": "是", "pos": "v"}, {"word": "一落千丈", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "当然", "pos": "d"}, {"word": "这里", "pos": "r"}, {"word": "少不了", "pos": "v"}, {"word": "白百何", "pos": "nr"}, {"word": "还有", "pos": "v"}, {"word": "陈羽凡", "pos": "nr"}, {"word": "了", "pos": "xc"}, {"word": "，", "pos": "w"}, {"word": "当时", "pos": "t"}, {"word": "的", "pos": "u"}, {"word": "轰炸力", "pos": "n"}, {"word": "也是", "pos": "v"}, {"word": "不", "pos": "d"}, {"word": "小于", "pos": "v"}, {"word": "李小璐", "pos": "nr"}, {"word": "和", "pos": "c"}, {"word": "贾乃亮", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "，", "pos": "w"}, {"word": "这", "pos": "r"}, {"word": "不", "pos": "d"}, {"word": "事", "pos": "n"}, {"word": "隔", "pos": "v"}, {"word": "这么", "pos": "r"}, {"word": "久", "pos": "a"}, {"word": "了", "pos": "xc"}, {"word": "，", "pos": "w"}, {"word": "白百何", "pos": "nr"}, {"word": "仍", "pos": "d"}, {"word": "在", "pos": "p"}, {"word": "沉寂", "pos": "an"}, {"word": "中", "pos": "f"}], "text": "细数出轨风波，首先就要说说文章和姚笛了，可以说瞬时间就击垮了文章，而前途大好的姚笛，更是一落千丈，当然这里少不了白百何还有陈羽凡了，当时的轰炸力也是不小于李小璐和贾乃亮的，这不事隔这么久了，白百何仍在沉寂中"}
  4 | {"postag": [{"word": "《", "pos": "w"}, {"word": "西乡殿", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "日本放送协会", "pos": "nt"}, {"word": "（", "pos": "w"}, {"word": "NHK", "pos": "nt"}, {"word": "）", "pos": "w"}, {"word": "制作", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "大河剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "野田雄介", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "梶原登城", "pos": "ns"}, {"word": "、", "pos": "w"}, {"word": "盆子", "pos": "n"}, {"word": "原诚", "pos": "a"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "中园美保", "pos": "nr"}, {"word": "担任", "pos": "v"}, {"word": "编剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "铃木亮平", "pos": "nr"}, {"word": "主演", "pos": "v"}, {"word": "1", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "于", "pos": "p"}, {"word": "2018年1月7日", "pos": "t"}, {"word": "首播", "pos": "v"}], "text": "《西乡殿》是日本放送协会（NHK）制作的大河剧，由野田雄介、梶原登城、盆子原诚执导，中园美保担任编剧，铃木亮平主演1，于2018年1月7日首播"}
  5 | {"postag": [{"word": "原", "pos": "a"}, {"word": "唱", "pos": "vn"}, {"word": "蒋姗倍", "pos": "nr"}, {"word": "现场", "pos": "s"}, {"word": "演唱", "pos": "v"}, {"word": "《", "pos": "w"}, {"word": "红尘情歌", "pos": "nw"}, {"word": "》", "pos": "w"}], "text": "原唱蒋姗倍现场演唱《红尘情歌》"}
  6 | {"postag": [{"word": "2016年", "pos": "t"}, {"word": "蒋欣", "pos": "nr"}, {"word": "凭借", "pos": "p"}, {"word": "在", "pos": "p"}, {"word": "《", "pos": "w"}, {"word": "欢乐颂", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "中", "pos": "f"}, {"word": "饰演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "“", "pos": "w"}, {"word": "凤凰女", "pos": "nz"}, {"word": "”", "pos": "w"}, {"word": "樊胜美", "pos": "nr"}, {"word": "再次", "pos": "d"}, {"word": "成为", "pos": "v"}, {"word": "霸占", "pos": "v"}, {"word": "各", "pos": "r"}, {"word": "大", "pos": "a"}, {"word": "网站", "pos": "n"}, {"word": "热", "pos": "ad"}, {"word": "搜", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "话题", "pos": "n"}, {"word": "之", "pos": "u"}, {"word": "王", "pos": "n"}], "text": "2016年蒋欣凭借在《欢乐颂》中饰演的“凤凰女”樊胜美再次成为霸占各大网站热搜的话题之王"}
  7 | {"postag": [{"word": "《", "pos": "w"}, {"word": "爱情合约", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "西安天河影视有限公司", "pos": "nt"}, {"word": "、", "pos": "w"}, {"word": "陕旅影视集团联", "pos": "nt"}, {"word": "出品", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "都市情感", "pos": "n"}, {"word": "家庭", "pos": "n"}, {"word": "剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "王军", "pos": "n"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "林申", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "高露", "pos": "nr"}, {"word": "等", "pos": "u"}, {"word": "主演", "pos": "n"}, {"word": "1", "pos": "m"}], "text": "《爱情合约》是西安天河影视有限公司、陕旅影视集团联出品的都市情感家庭剧，由王军执导，林申、高露等主演1"}
  8 | {"postag": [{"word": "校园生活网", "pos": "nz"}, {"word": "创始人", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "站长", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "互联网", "pos": "n"}, {"word": "爱好者", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "2011年", "pos": "t"}, {"word": "6月", "pos": "t"}, {"word": "创办", "pos": "v"}, {"word": "济南", "pos": "ns"}, {"word": "校园生活网", "pos": "nz"}], "text": "校园生活网创始人、站长，互联网爱好者，2011年6月创办济南校园生活网"}
  9 | {"postag": [{"word": "黄花", "pos": "n"}, {"word": "大", "pos": "a"}, {"word": "闺女", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "那个年代", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "金钱", "pos": "n"}, {"word": "至上", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "年代", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "能", "pos": "v"}, {"word": "抛", "pos": "v"}, {"word": "家", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "富裕", "pos": "a"}, {"word": "的", "pos": "u"}, {"word": "娘家", "pos": "n"}, {"word": "）", "pos": "w"}, {"word": "舍业", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "已", "pos": "d"}, {"word": "是", "pos": "v"}, {"word": "名角", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "京韵大鼓", "pos": "nz"}, {"word": "行业", "pos": "n"}, {"word": "）", "pos": "w"}, {"word": "跟着", "pos": "v"}, {"word": "二婚", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "郭德纲", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "还", "pos": "d"}, {"word": "带", "pos": "v"}, {"word": "个", "pos": "q"}, {"word": "油", "pos": "n"}, {"word": "瓶", "pos": "n"}, {"word": "郭麒麟", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "王慧", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "王慧", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "不", "pos": "d"}, {"word": "但", "pos": "c"}, {"word": "眼", "pos": "n"}, {"word": "毒", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "还有", "pos": "v"}, {"word": "大智慧", "pos": "n"}, {"word": "啊", "pos": "xc"}], "text": "黄花大闺女，那个年代，金钱至上的年代，能抛家（富裕的娘家）舍业（已是名角的京韵大鼓行业）跟着二婚的郭德纲，还带个油瓶郭麒麟，王慧，王慧，不但眼毒，还有大智慧啊"}
 10 | {"postag": [{"word": "《", "pos": "w"}, {"word": "歌手", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "迎来", "pos": "v"}, {"word": "终极", "pos": "n"}, {"word": "踢馆", "pos": "n"}, {"word": "歌手", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "霍尊", "pos": "nr"}, {"word": "带", "pos": "v"}, {"word": "着", "pos": "u"}, {"word": "《", "pos": "w"}, {"word": "伊人如梦", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "卷珠帘", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "来", "pos": "v"}, {"word": "了", "pos": "xc"}], "text": "《歌手》迎来终极踢馆歌手，霍尊带着《伊人如梦》《卷珠帘》来了"}
 11 | {"postag": [{"word": "《", "pos": "w"}, {"word": "皇上请休了我", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "叶子忻", "pos": "nr"}, {"word": "创作", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "网络", "pos": "n"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "发表", "pos": "v"}, {"word": "于起点网", "pos": "nz"}], "text": "《皇上请休了我》是叶子忻创作的网络小说，发表于起点网"}
 12 | {"postag": [{"word": "2009年", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "亚历克斯", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "梅金", "pos": "nr"}, {"word": "将", "pos": "p"}, {"word": "这个", "pos": "r"}, {"word": "短片", "pos": "n"}, {"word": "重新", "pos": "d"}, {"word": "加工", "pos": "v"}, {"word": "和", "pos": "c"}, {"word": "扩充", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "拍摄", "pos": "v"}, {"word": "成", "pos": "v"}, {"word": "一部", "pos": "m"}, {"word": "标准", "pos": "n"}, {"word": "长片版", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "旅馆大堂对面", "pos": "nw"}, {"word": "》", "pos": "w"}], "text": "2009年，亚历克斯·梅金将这个短片重新加工和扩充，拍摄成一部标准长片版《旅馆大堂对面》"}
 13 | {"postag": [{"word": "《", "pos": "w"}, {"word": "没有播出的故事", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2001年", "pos": "t"}, {"word": "中国对外翻译出版公司", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "李仲生", "pos": "nr"}], "text": "《没有播出的故事》是2001年中国对外翻译出版公司出版的图书，作者是李仲生"}
 14 | {"postag": [{"word": "《", "pos": "w"}, {"word": "魔熊之雷霆咆哮", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "百度文学", "pos": "nz"}, {"word": "旗下", "pos": "n"}, {"word": "纵横中文网", "pos": "nz"}, {"word": "签约", "pos": "vn"}, {"word": "作家", "pos": "n"}, {"word": "木雁", "pos": "n"}, {"word": "创作", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "异世大陆", "pos": "nw"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "小说", "pos": "n"}, {"word": "已", "pos": "d"}, {"word": "于", "pos": "p"}, {"word": "2014", "pos": "t"}, {"word": "-", "pos": "w"}, {"word": "02", "pos": "m"}, {"word": "-", "pos": "w"}, {"word": "09", "pos": "m"}, {"word": "正式", "pos": "ad"}, {"word": "发布", "pos": "v"}], "text": "《魔熊之雷霆咆哮》是百度文学旗下纵横中文网签约作家木雁创作的一部异世大陆小说，小说已于2014-02-09正式发布"}
 15 | {"postag": [{"word": "爱到底", "pos": "v"}, {"word": " ", "pos": "w"}, {"word": "之", "pos": "u"}, {"word": "《", "pos": "w"}, {"word": "第六号刘海", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "  ", "pos": "v"}, {"word": "导演", "pos": "n"}, {"word": "：", "pos": "w"}, {"word": "黄子佼", "pos": "nr"}, {"word": "  ", "pos": "v"}, {"word": "主要", "pos": "a"}, {"word": "演员", "pos": "n"}, {"word": "：", "pos": "w"}, {"word": "  ", "pos": "v"}, {"word": "标签", "pos": "n"}, {"word": "：", "pos": "w"}, {"word": "三声有幸 第六号刘海 华山", "pos": "nw"}, {"word": "-", "pos": "w"}, {"word": "24  ", "pos": "m"}, {"word": "四个", "pos": "m"}, {"word": "小故事", "pos": "n"}, {"word": "组成", "pos": "v"}, {"word": "本片", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "方文山", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "黄子佼", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "陈奕先", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "九把刀", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "二十多位", "pos": "m"}, {"word": "当红", "pos": "a"}, {"word": "明星", "pos": "n"}, {"word": "首次", "pos": "m"}, {"word": "联手", "pos": "vd"}, {"word": "诠释", "pos": "v"}, {"word": "多段式", "pos": "a"}, {"word": "不同", "pos": "a"}, {"word": "风格", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "爱情故事", "pos": "n"}], "text": "爱到底 之《第六号刘海》  导演：黄子佼  主要演员：  标签：三声有幸 第六号刘海 华山-24  四个小故事组成本片，方文山、黄子佼、陈奕先、九把刀执导，二十多位当红明星首次联手诠释多段式不同风格的爱情故事"}
 16 | {"postag": [{"word": "《", "pos": "w"}, {"word": "帝国文明之崛起", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "文武", "pos": "n"}, {"word": "不", "pos": "d"}, {"word": "就", "pos": "d"}, {"word": "生", "pos": "v"}, {"word": "所", "pos": "u"}, {"word": "著", "pos": "u"}, {"word": "的", "pos": "u"}, {"word": "一本", "pos": "m"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "小说", "pos": "n"}, {"word": "类型", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "游戏", "pos": "n"}, {"word": "网游", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "在", "pos": "p"}, {"word": "起点中文网", "pos": "nz"}, {"word": "连载", "pos": "v"}], "text": "《帝国文明之崛起》是文武不就生所著的一本图书，小说类型是游戏网游，在起点中文网连载"}
 17 | {"postag": [{"word": "《", "pos": "w"}, {"word": "青春烈火", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "原名", "pos": "n"}, {"word": "雅典娜女神", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "文化", "pos": "n"}, {"word": "中国", "pos": "ns"}, {"word": "、", "pos": "w"}, {"word": "强视", "pos": "v"}, {"word": "传媒", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "博海影视", "pos": "nt"}, {"word": "、", "pos": "w"}, {"word": "博纳影业", "pos": "nt"}, {"word": "联合", "pos": "vd"}, {"word": "出品", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "民国", "pos": "ns"}, {"word": "激战", "pos": "v"}, {"word": "年代", "pos": "n"}, {"word": "情感", "pos": "n"}, {"word": "大", "pos": "a"}, {"word": "剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "叶璇", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "刘恩佑", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "莫小棋", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "巫迪文", "pos": "nr"}, {"word": "李蓓蕾", "pos": "nr"}, {"word": "领衔主演", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "著名", "pos": "a"}, {"word": "动作", "pos": "n"}, {"word": "导演", "pos": "n"}, {"word": "谭俏", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "故事", "pos": "n"}, {"word": "背景", "pos": "n"}, {"word": "发生", "pos": "v"}, {"word": "在", "pos": "p"}, {"word": "1932年", "pos": "t"}, {"word": "的", "pos": "u"}, {"word": "上海", "pos": "ns"}, {"word": "租界", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "讲述", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "一位", "pos": "m"}, {"word": "以", "pos": "p"}, {"word": "“", "pos": "w"}, {"word": "雅典娜", "pos": "nr"}, {"word": "”", "pos": "w"}, {"word": "为", "pos": "v"}, {"word": "代号", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "“", "pos": "w"}, {"word": "叛谍狂花", "pos": "nz"}, {"word": "”", "pos": "w"}, {"word": "游走", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "国", "pos": "n"}, {"word": "仇", "pos": "n"}, {"word": "与", "pos": "c"}, {"word": "家", "pos": "n"}, {"word": "恨", "pos": "vn"}, {"word": "之间", "pos": "f"}, {"word": "浴血", "pos": "vd"}, {"word": "抗战", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "传奇故事", "pos": "n"}], "text": "《青春烈火》原名雅典娜女神，是由文化中国、强视传媒、博海影视、博纳影业联合出品的一部民国激战年代情感大剧，由叶璇、刘恩佑、莫小棋、巫迪文李蓓蕾领衔主演，著名动作导演谭俏执导，故事背景发生在1932年的上海租界，讲述了一位以“雅典娜”为代号的“叛谍狂花”游走于国仇与家恨之间浴血抗战的传奇故事"}
 18 | {"postag": [{"word": "网友", "pos": "n"}, {"word": "在", "pos": "p"}, {"word": "飞机", "pos": "n"}, {"word": "上", "pos": "f"}, {"word": "偶遇", "pos": "v"}, {"word": "两人", "pos": "n"}, {"word": "周迅", "pos": "nr"}, {"word": "和", "pos": "c"}, {"word": "李亚鹏", "pos": "nr"}, {"word": "曾经", "pos": "d"}, {"word": "是", "pos": "v"}, {"word": "情侣", "pos": "n"}, {"word": "关系", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "王菲", "pos": "nr"}, {"word": "和", "pos": "c"}, {"word": "李亚鹏", "pos": "nr"}, {"word": "曾", "pos": "d"}, {"word": "结为夫妻", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "周迅", "pos": "nr"}, {"word": "与", "pos": "c"}, {"word": "窦鹏", "pos": "nr"}, {"word": "曾", "pos": "d"}, {"word": "为", "pos": "p"}, {"word": "情侣", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "窦鹏", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "兄弟", "pos": "n"}, {"word": "窦唯", "pos": "nr"}, {"word": "与", "pos": "c"}, {"word": "王菲", "pos": "nr"}, {"word": "曾", "pos": "d"}, {"word": "为", "pos": "p"}, {"word": "夫妻", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "但是", "pos": "c"}, {"word": "王菲", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "女儿", "pos": "n"}, {"word": "窦靖童", "pos": "nr"}, {"word": "如今", "pos": "t"}, {"word": "与", "pos": "p"}, {"word": "周迅", "pos": "nr"}, {"word": "成", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "闺蜜", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "不", "pos": "d"}, {"word": "禁", "pos": "v"}, {"word": "让", "pos": "v"}, {"word": "人", "pos": "n"}, {"word": "直呼", "pos": "v"}, {"word": "：", "pos": "w"}, {"word": "娱乐圈", "pos": "n"}, {"word": "真", "pos": "a"}, {"word": "的", "pos": "u"}, {"word": "太", "pos": "d"}, {"word": "小", "pos": "a"}, {"word": "了", "pos": "xc"}], "text": "网友在飞机上偶遇两人周迅和李亚鹏曾经是情侣关系，王菲和李亚鹏曾结为夫妻，周迅与窦鹏曾为情侣，窦鹏的兄弟窦唯与王菲曾为夫妻，但是王菲的女儿窦靖童如今与周迅成了闺蜜，不禁让人直呼：娱乐圈真的太小了"}
 19 | {"postag": [{"word": "《", "pos": "w"}, {"word": "使徒行者", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "嘉", "pos": "n"}, {"word": "映", "pos": "v"}, {"word": "影业", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "邵氏", "pos": "nr"}, {"word": "兄弟", "pos": "n"}, {"word": "国际", "pos": "n"}, {"word": "影业", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "烈火", "pos": "n"}, {"word": "影业", "pos": "n"}, {"word": "等", "pos": "u"}, {"word": "联合", "pos": "vd"}, {"word": "出品", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "警匪片", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "文伟鸿", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "王晶", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "乐易玲", "pos": "nr"}, {"word": "监制", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "关皓月", "pos": "nr"}, {"word": "担任", "pos": "v"}, {"word": "编剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "古天乐", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "张家辉", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "吴镇宇", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "佘诗曼", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "许绍雄", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "李光洁", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "张慧雯", "pos": "nr"}, {"word": "等", "pos": "u"}, {"word": "主演", "pos": "n"}], "text": "《使徒行者》是由嘉映影业、邵氏兄弟国际影业、烈火影业等联合出品的警匪片，由文伟鸿执导，王晶、乐易玲监制，关皓月担任编剧，古天乐、张家辉、吴镇宇、佘诗曼、许绍雄、李光洁、张慧雯等主演"}
 20 | {"postag": [{"word": "《", "pos": "w"}, {"word": "谓我", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "收录", "pos": "v"}, {"word": "在", "pos": "p"}, {"word": "古风", "pos": "n"}, {"word": "网络", "pos": "n"}, {"word": "歌手", "pos": "n"}, {"word": "小曲儿", "pos": "nr"}, {"word": "2012年12月15日", "pos": "t"}, {"word": "发行", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "原创", "pos": "vn"}, {"word": "专辑", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "曲倾天下", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "中", "pos": "f"}, {"word": "的", "pos": "u"}, {"word": "一首", "pos": "m"}, {"word": "歌曲", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "Pavane A", "pos": "nz"}, {"word": "·", "pos": "w"}, {"word": "G Orison", "pos": "nz"}, {"word": "（", "pos": "w"}, {"word": "小鱼", "pos": "n"}, {"word": "萝莉", "pos": "n"}, {"word": "作词", "pos": "vn"}, {"word": "）", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "白蔓", "pos": "n"}, {"word": "作曲", "pos": "v"}, {"word": "1", "pos": "m"}], "text": "《谓我》是收录在古风网络歌手小曲儿2012年12月15日发行的原创专辑《曲倾天下》中的一首歌曲，由Pavane A·G Orison（小鱼萝莉作词），白蔓作曲1"}
 21 | {"postag": [{"word": "《", "pos": "w"}, {"word": "手拉手", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "吴建飞", "pos": "nr"}, {"word": "演唱", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "歌曲", "pos": "n"}, {"word": "之", "pos": "u"}, {"word": "一", "pos": "m"}], "text": "《手拉手》是由吴建飞演唱的歌曲之一"}
 22 | {"postag": [{"word": "《", "pos": "w"}, {"word": "凤囚凰", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "刘宋篇", "pos": "nr"}, {"word": "完结", "pos": "v"}, {"word": "之", "pos": "u"}, {"word": "时", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "容止", "pos": "v"}, {"word": "计谋", "pos": "v"}, {"word": "跳崖", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "一则", "pos": "m"}, {"word": "让", "pos": "v"}, {"word": "楚玉", "pos": "nr"}, {"word": "认清", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "对", "pos": "p"}, {"word": "他", "pos": "r"}, {"word": "的", "pos": "u"}, {"word": "爱", "pos": "vn"}, {"word": "，", "pos": "w"}, {"word": "二则", "pos": "n"}, {"word": "消除", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "大魏", "pos": "ns"}, {"word": "太后", "pos": "n"}, {"word": "对", "pos": "p"}, {"word": " ", "pos": "w"}, {"word": "其", "pos": "r"}, {"word": "的", "pos": "u"}, {"word": "后顾之忧", "pos": "n"}], "text": "《凤囚凰》刘宋篇完结之时，容止计谋跳崖，一则让楚玉认清了对他的爱，二则消除了大魏太后对 其的后顾之忧"}
 23 | {"postag": [{"word": "《", "pos": "w"}, {"word": "丽兹与迪克", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "劳埃德", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "克莱默", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "传记片", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "琳赛", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "洛翰", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "格兰特", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "鲍尔等", "pos": "nr"}, {"word": "参加", "pos": "v"}, {"word": "演出", "pos": "vn"}], "text": "《丽兹与迪克》是由劳埃德·克莱默执导的传记片，琳赛·洛翰、格兰特·鲍尔等参加演出"}
 24 | {"postag": [{"word": "《", "pos": "w"}, {"word": "契兽师", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "一部", "pos": "m"}, {"word": "在", "pos": "p"}, {"word": "17k小说网", "pos": "nz"}, {"word": "连载", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "小说", "pos": "n"}], "text": "《契兽师》是一部在17k小说网连载的小说"}
 25 | {"postag": [{"word": "基本", "pos": "a"}, {"word": "资料", "pos": "n"}, {"word": "球员", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "博格道夫  ", "pos": "nr"}, {"word": "生日", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "1979年2月26日", "pos": "t"}, {"word": "  ", "pos": "v"}, {"word": "身高", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "185cm  ", "pos": "m"}, {"word": "体重", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "81kg  ", "pos": "m"}, {"word": "加盟", "pos": "vn"}, {"word": "日期", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "1995", "pos": "m"}, {"word": "-", "pos": "w"}, {"word": "7", "pos": "m"}, {"word": "-", "pos": "w"}, {"word": "1  ", "pos": "m"}, {"word": "球衣", "pos": "n"}, {"word": "号码", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "999  ", "pos": "m"}, {"word": "效力", "pos": "v"}, {"word": "球队", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "布伦瑞克  ", "pos": "ns"}, {"word": "场上", "pos": "s"}, {"word": "位置", "pos": "n"}, {"word": ":", "pos": "w"}, {"word": "中场", "pos": "n"}, {"word": "球队", "pos": "n"}, {"word": "阵容", "pos": "n"}, {"word": "前锋", "pos": "n"}, {"word": "  ", "pos": "v"}, {"word": "库姆贝拉", "pos": "nr"}, {"word": ",", "pos": "w"}, {"word": "墨克尔", "pos": "nr"}, {"word": ",", "pos": "w"}, {"word": "兰迪", "pos": "nr"}, {"word": ",", "pos": "w"}, {"word": "吉安卢卡", "pos": "nr"}], "text": "基本资料球员:博格道夫  生日:1979年2月26日  身高:185cm  体重:81kg  加盟日期:1995-7-1  球衣号码:999  效力球队:布伦瑞克  场上位置:中场球队阵容前锋  库姆贝拉,墨克尔,兰迪,吉安卢卡"}
 26 | {"postag": [{"word": "《", "pos": "w"}, {"word": "澳门往事", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "本剧", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "徐子牧", "pos": "nr"}, {"word": "导演", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "吴建飞", "pos": "nr"}, {"word": " ", "pos": "w"}, {"word": "、", "pos": "w"}, {"word": "吕晓霖", "pos": "nr"}, {"word": " ", "pos": "w"}, {"word": "、", "pos": "w"}, {"word": "刘述", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "边熙蕾", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "崔心心", "pos": "nr"}, {"word": "等", "pos": "u"}, {"word": "主演", "pos": "n"}], "text": "《澳门往事》，本剧是由徐子牧导演，吴建飞 、吕晓霖 、刘述、边熙蕾、崔心心等主演"}
 27 | {"postag": [{"word": "中国国际企业合作公司", "pos": "nt"}, {"word": "成立", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "1991年", "pos": "t"}, {"word": "2月", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "经", "pos": "p"}, {"word": "中华人民共和国国务院", "pos": "nt"}, {"word": "批准", "pos": "v"}, {"word": "成立", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "国务院国资委", "pos": "nt"}, {"word": "直属", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "全民", "pos": "n"}, {"word": "所有制", "pos": "n"}, {"word": "大型", "pos": "a"}, {"word": "涉外", "pos": "vn"}, {"word": "企业", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "现", "pos": "t"}, {"word": "并入", "pos": "v"}, {"word": "中国诚通控股集团", "pos": "nt"}, {"word": "，", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "从事", "pos": "v"}, {"word": "国际", "pos": "n"}, {"word": "经济", "pos": "n"}, {"word": "技术", "pos": "n"}, {"word": "交流", "pos": "vn"}, {"word": "、", "pos": "w"}, {"word": "进出口", "pos": "vn"}, {"word": "贸易", "pos": "vn"}, {"word": "、", "pos": "w"}, {"word": "展览", "pos": "vn"}, {"word": "广告", "pos": "n"}, {"word": "及", "pos": "c"}, {"word": "人力资源", "pos": "n"}, {"word": "咨询", "pos": "vn"}, {"word": "与", "pos": "c"}, {"word": "服务", "pos": "vn"}, {"word": "业务", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "综合性", "pos": "n"}, {"word": "经济", "pos": "n"}, {"word": "实体", "pos": "n"}], "text": "中国国际企业合作公司成立于1991年2月，是经中华人民共和国国务院批准成立，国务院国资委直属的全民所有制大型涉外企业，现并入中国诚通控股集团，是从事国际经济技术交流、进出口贸易、展览广告及人力资源咨询与服务业务的综合性经济实体"}
 28 | {"postag": [{"word": "朴信惠", "pos": "nr"}, {"word": "比", "pos": "p"}, {"word": "崔泰俊", "pos": "nr"}, {"word": "大一岁", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "作为", "pos": "v"}, {"word": "中央大学", "pos": "nt"}, {"word": "戏剧", "pos": "n"}, {"word": "系", "pos": "n"}, {"word": "前", "pos": "f"}, {"word": "后辈", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "二人", "pos": "n"}, {"word": "出道", "pos": "v"}, {"word": "后", "pos": "f"}, {"word": "就是", "pos": "v"}, {"word": "亲近", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "朋友", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "后", "pos": "f"}, {"word": "发展", "pos": "v"}, {"word": "成", "pos": "v"}, {"word": "恋人", "pos": "n"}], "text": "朴信惠比崔泰俊大一岁，作为中央大学戏剧系前后辈，二人出道后就是亲近的朋友，后发展成恋人"}
 29 | {"postag": [{"word": "《", "pos": "w"}, {"word": "穿越之灭世神妃", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "起点中文网", "pos": "nz"}, {"word": "连载", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "古装", "pos": "n"}, {"word": "言情", "pos": "n"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "我", "pos": "r"}, {"word": "爱", "pos": "v"}, {"word": "zpp", "pos": "nz"}], "text": "《穿越之灭世神妃》是起点中文网连载的一部古装言情小说，作者是我爱zpp"}
 30 | {"postag": [{"word": "刘一鸣", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "生于", "pos": "v"}, {"word": "1958年", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "江苏省", "pos": "ns"}, {"word": "苏州市", "pos": "ns"}, {"word": "相城区", "pos": "ns"}, {"word": "阳澄湖", "pos": "ns"}, {"word": "人", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "自小", "pos": "d"}, {"word": "随", "pos": "p"}, {"word": "祖父", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "父亲", "pos": "n"}, {"word": "学习", "pos": "v"}, {"word": "雕刻", "pos": "v"}], "text": "刘一鸣，男，生于1958年，江苏省苏州市相城区阳澄湖人，自小随祖父、父亲学习雕刻"}
 31 | {"postag": [{"word": "《", "pos": "w"}, {"word": "古世", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "连载", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "云中书城", "pos": "ns"}, {"word": "的", "pos": "u"}, {"word": "网络", "pos": "n"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "未", "pos": "d"}, {"word": "弱", "pos": "a"}], "text": "《古世》是连载于云中书城的网络小说，作者是未弱"}
 32 | {"postag": [{"word": "吴标", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "字", "pos": "n"}, {"word": "召", "pos": "v"}, {"word": "臣", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "号雨亭", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "江苏", "pos": "ns"}, {"word": "常熟", "pos": "ns"}, {"word": "人", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "清代", "pos": "t"}, {"word": "画家", "pos": "n"}], "text": "吴标，字召臣，号雨亭，江苏常熟人，清代画家"}
 33 | {"postag": [{"word": "2", "pos": "m"}, {"word": "贡献", "pos": "n"}, {"word": "徐建光", "pos": "nr"}, {"word": "教授", "pos": "n"}, {"word": "师从", "pos": "v"}, {"word": "我", "pos": "r"}, {"word": "国", "pos": "n"}, {"word": "著名", "pos": "a"}, {"word": "的", "pos": "u"}, {"word": "手外科", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "显微外科", "pos": "nz"}, {"word": "专家", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "中国工程院", "pos": "nt"}, {"word": "院士", "pos": "n"}, {"word": "顾玉东", "pos": "nr"}, {"word": "教授", "pos": "n"}], "text": "2贡献徐建光教授师从我国著名的手外科、显微外科专家、中国工程院院士顾玉东教授"}
 34 | {"postag": [{"word": "《", "pos": "w"}, {"word": "战逆九川", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "连载", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "17k小说网", "pos": "nz"}, {"word": "的", "pos": "u"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "落", "pos": "v"}, {"word": "雨", "pos": "n"}, {"word": "尘缘", "pos": "n"}], "text": "《战逆九川》是连载于17k小说网的小说，作者是落雨尘缘"}
 35 | {"postag": [{"word": "《", "pos": "w"}, {"word": "末世之超级融合", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "在", "pos": "p"}, {"word": "起点", "pos": "n"}, {"word": "中文", "pos": "nz"}, {"word": "网上", "pos": "s"}, {"word": "连载", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "科幻", "pos": "n"}, {"word": "幻想小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "龙蛋", "pos": "nr"}, {"word": "蛋小坏蛋", "pos": "v"}], "text": "《末世之超级融合》是在起点中文网上连载的一部科幻幻想小说，作者是龙蛋蛋小坏蛋"}
 36 | {"postag": [{"word": "《", "pos": "w"}, {"word": "林蛙养殖", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2006年", "pos": "t"}, {"word": "中国农业出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "刘学龙", "pos": "nr"}], "text": "《林蛙养殖》是2006年中国农业出版社出版的图书，作者是刘学龙"}
 37 | {"postag": [{"word": "柞新小卷蛾", "pos": "nz"}, {"word": "是", "pos": "v"}, {"word": "昆虫", "pos": "n"}, {"word": "纲", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "鳞翅目", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "卷蛾科", "pos": "nz"}, {"word": "、", "pos": "w"}, {"word": "新", "pos": "a"}, {"word": "小卷蛾", "pos": "nz"}, {"word": "亚科", "pos": "nz"}, {"word": "、", "pos": "w"}, {"word": "新", "pos": "a"}, {"word": "小卷蛾", "pos": "nz"}, {"word": "属", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "动物", "pos": "n"}], "text": "柞新小卷蛾是昆虫纲、鳞翅目、卷蛾科、新小卷蛾亚科、新小卷蛾属的动物"}
 38 | {"postag": [{"word": "贺聪", "pos": "nr"}, {"word": "是", "pos": "v"}, {"word": "葛优", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "媳妇", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "年青", "pos": "a"}, {"word": "时刻", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "个", "pos": "q"}, {"word": "美术", "pos": "n"}, {"word": "老师", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "葛优", "pos": "nr"}, {"word": "成名", "pos": "v"}, {"word": "前", "pos": "f"}, {"word": "意识", "pos": "n"}, {"word": "的", "pos": "u"}], "text": "贺聪是葛优的媳妇，年青时刻是个美术老师，是葛优成名前意识的"}
 39 | {"postag": [{"word": "《", "pos": "w"}, {"word": "河上的爱情", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "贾樟柯", "pos": "nr"}, {"word": "导演", "pos": "n"}, {"word": "2008年", "pos": "t"}, {"word": "在", "pos": "p"}, {"word": "苏州", "pos": "ns"}, {"word": "老城", "pos": "n"}, {"word": "拍摄", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "短片", "pos": "n"}], "text": "《河上的爱情》贾樟柯导演2008年在苏州老城拍摄的一部短片"}
 40 | {"postag": [{"word": "《", "pos": "w"}, {"word": "稍安勿躁", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "宋金笑", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "宋金笑", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "范逸臣", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "陈冲", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "王宏伟", "pos": "nr"}, {"word": "等", "pos": "u"}, {"word": "主演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "粉红色", "pos": "n"}, {"word": "喜剧片", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "于", "pos": "p"}, {"word": "2012年9月7日", "pos": "t"}, {"word": "全国", "pos": "n"}, {"word": "上映", "pos": "v"}], "text": "《稍安勿躁》是由宋金笑执导，宋金笑、范逸臣、陈冲、王宏伟等主演的一部粉红色喜剧片，于2012年9月7日全国上映"}
 41 | {"postag": [{"word": "该剧", "pos": "r"}, {"word": "讲述", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "是", "pos": "v"}, {"word": "大唐", "pos": "ns"}, {"word": "贞观", "pos": "t"}, {"word": "年间", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "苏凤", "pos": "nr"}, {"word": "父子", "pos": "n"}, {"word": "反", "pos": "v"}, {"word": "唐", "pos": "ns"}, {"word": "，", "pos": "w"}, {"word": "薛仁贵", "pos": "nr"}, {"word": "元帅", "pos": "n"}, {"word": "奉命", "pos": "v"}, {"word": "征", "pos": "v"}, {"word": "剿", "pos": "v"}, {"word": "却", "pos": "d"}, {"word": "惨遭", "pos": "v"}, {"word": "奸人", "pos": "n"}, {"word": "所", "pos": "u"}, {"word": "害", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "其", "pos": "r"}, {"word": "子", "pos": "n"}, {"word": "薛丁山", "pos": "nr"}, {"word": "请缨", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "以", "pos": "p"}, {"word": "无敌", "pos": "a"}, {"word": "之", "pos": "u"}, {"word": "势", "pos": "n"}, {"word": "夺得", "pos": "v"}, {"word": "帅印", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "与", "pos": "p"}, {"word": "母亲", "pos": "n"}, {"word": "及", "pos": "c"}, {"word": "妹妹", "pos": "n"}, {"word": "出发", "pos": "v"}, {"word": "西征", "pos": "vn"}, {"word": "的", "pos": "u"}, {"word": "故事", "pos": "n"}], "text": "该剧讲述的是大唐贞观年间，苏凤父子反唐，薛仁贵元帅奉命征剿却惨遭奸人所害，其子薛丁山请缨，以无敌之势夺得帅印，与母亲及妹妹出发西征的故事"}
 42 | {"postag": [{"word": "《", "pos": "w"}, {"word": "为了爱梦一生", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "一首", "pos": "m"}, {"word": "高胜美", "pos": "nr"}, {"word": "演唱", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "歌曲", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "陈乐融", "pos": "nr"}, {"word": "作词", "pos": "v"}, {"word": "、", "pos": "w"}, {"word": "许冠杰", "pos": "nr"}, {"word": "作曲", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "陈进兴", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "尤景仰", "pos": "nr"}, {"word": "编曲", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "收录", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "专辑", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "经典金选2 爱上一个不回家的人", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "中", "pos": "f"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "上", "pos": "f"}, {"word": "格", "pos": "n"}, {"word": "唱片", "pos": "n"}, {"word": "于", "pos": "p"}, {"word": "1992年3月1日", "pos": "t"}, {"word": "发行", "pos": "v"}], "text": "《为了爱梦一生》是一首高胜美演唱的歌曲，由陈乐融作词、许冠杰作曲，由陈进兴、尤景仰编曲，收录于专辑《经典金选2 爱上一个不回家的人》中，由上格唱片于1992年3月1日发行"}
 43 | {"postag": [{"word": "《", "pos": "w"}, {"word": "马后炮", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "南方电视台经济频道", "pos": "nt"}, {"word": "的", "pos": "u"}, {"word": "一档", "pos": "m"}, {"word": "新闻", "pos": "n"}, {"word": "评论", "pos": "vn"}, {"word": "的", "pos": "u"}, {"word": "节目", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "马志海", "pos": "nr"}, {"word": "主持", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "自", "pos": "p"}, {"word": "2005年", "pos": "t"}, {"word": "开播", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "广", "pos": "ad"}, {"word": "受", "pos": "v"}, {"word": "观众", "pos": "n"}, {"word": "欢迎", "pos": "vn"}, {"word": "，", "pos": "w"}, {"word": "播出", "pos": "vn"}, {"word": "时间", "pos": "n"}, {"word": "为", "pos": "v"}, {"word": "周一", "pos": "t"}, {"word": "至", "pos": "p"}, {"word": "周五", "pos": "t"}, {"word": "晚上", "pos": "t"}, {"word": "22点", "pos": "t"}, {"word": "56", "pos": "m"}, {"word": "到", "pos": "v"}, {"word": "23点12分", "pos": "t"}], "text": "《马后炮》是南方电视台经济频道的一档新闻评论的节目，由马志海主持，自2005年开播，广受观众欢迎，播出时间为周一至周五晚上22点56到23点12分"}
 44 | {"postag": [{"word": "蝾螈科", "pos": "n"}, {"word": "：", "pos": "w"}, {"word": "有", "pos": "v"}, {"word": "尾", "pos": "n"}, {"word": "目的", "pos": "n"}, {"word": "1科", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "通常", "pos": "d"}, {"word": "全", "pos": "a"}, {"word": "变态", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "偶", "pos": "d"}, {"word": "有", "pos": "v"}, {"word": "童", "pos": "n"}, {"word": "体型", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "均有", "pos": "v"}, {"word": "肺", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "个别", "pos": "a"}, {"word": "属", "pos": "v"}, {"word": "退化", "pos": "vn"}, {"word": "或", "pos": "c"}, {"word": "残迹状", "pos": "n"}, {"word": "）", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "睾丸", "pos": "n"}, {"word": "分", "pos": "v"}, {"word": "叶", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "肛腺", "pos": "nz"}, {"word": "三", "pos": "m"}, {"word": "对", "pos": "q"}, {"word": "，", "pos": "w"}, {"word": "体内", "pos": "s"}, {"word": "受精", "pos": "v"}], "text": "蝾螈科：有尾目的1科，通常全变态，偶有童体型，均有肺（个别属退化或残迹状），睾丸分叶，肛腺三对，体内受精"}
 45 | {"postag": [{"word": "近两年", "pos": "f"}, {"word": "，", "pos": "w"}, {"word": "公司", "pos": "n"}, {"word": "先后", "pos": "d"}, {"word": "投资", "pos": "v"}, {"word": "1500万元", "pos": "m"}, {"word": "刘新才", "pos": "nr"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "汉族", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "1963年", "pos": "t"}, {"word": "10月", "pos": "t"}, {"word": "出生", "pos": "v"}], "text": "近两年，公司先后投资1500万元刘新才男，汉族，1963年10月出生"}
 46 | {"postag": [{"word": "由", "pos": "p"}, {"word": "本", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "阿弗莱克", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "新片", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "逃离德黑兰", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "就是", "pos": "v"}, {"word": "讲述", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "FBI", "pos": "nt"}, {"word": "假冒", "pos": "v"}, {"word": "成", "pos": "v"}, {"word": "好莱坞", "pos": "ns"}, {"word": "电影", "pos": "n"}, {"word": "拍摄", "pos": "vn"}, {"word": "剧组", "pos": "n"}, {"word": "解救", "pos": "v"}, {"word": "人质", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "故事", "pos": "n"}], "text": "由本·阿弗莱克执导的新片《逃离德黑兰》就是讲述了FBI假冒成好莱坞电影拍摄剧组解救人质的故事"}
 47 | {"postag": [{"word": "《", "pos": "w"}, {"word": "潜能量（管理者掌权的14个关键点）", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2003年", "pos": "t"}, {"word": "中国华侨出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "方略", "pos": "n"}], "text": "《潜能量（管理者掌权的14个关键点）》是2003年中国华侨出版社出版的图书，作者是方略"}
 48 | {"postag": [{"word": "高晓松", "pos": "nr"}, {"word": "就", "pos": "d"}, {"word": "不同", "pos": "a"}, {"word": "了", "pos": "xc"}, {"word": "，", "pos": "w"}, {"word": "虽然", "pos": "c"}, {"word": "他", "pos": "r"}, {"word": "的", "pos": "u"}, {"word": "歌", "pos": "n"}, {"word": "在", "pos": "p"}, {"word": "我们", "pos": "r"}, {"word": "青春年少", "pos": "n"}, {"word": "里", "pos": "f"}, {"word": "留下", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "很", "pos": "d"}, {"word": "深", "pos": "a"}, {"word": "的", "pos": "u"}, {"word": "印象", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "但", "pos": "c"}, {"word": "现在", "pos": "t"}, {"word": "他", "pos": "r"}, {"word": "真", "pos": "a"}, {"word": "的", "pos": "u"}, {"word": "是", "pos": "v"}, {"word": "变化", "pos": "vn"}, {"word": "太", "pos": "d"}, {"word": "大", "pos": "a"}, {"word": "了", "pos": "xc"}, {"word": "，", "pos": "w"}, {"word": "无", "pos": "c"}, {"word": "论", "pos": "c"}, {"word": "是", "pos": "v"}, {"word": "歌曲", "pos": "n"}, {"word": "还是", "pos": "c"}, {"word": "在", "pos": "p"}, {"word": "奇葩说", "pos": "v"}, {"word": "中", "pos": "f"}, {"word": "的", "pos": "u"}, {"word": "表现", "pos": "vn"}, {"word": "，", "pos": "w"}, {"word": "都", "pos": "d"}, {"word": "和", "pos": "p"}, {"word": "少年", "pos": "n"}, {"word": "时候", "pos": "n"}, {"word": "那个", "pos": "r"}, {"word": "同桌", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "你", "pos": "r"}, {"word": "作词", "pos": "v"}, {"word": "作曲", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "人", "pos": "n"}, {"word": "对", "pos": "p"}, {"word": "不", "pos": "d"}, {"word": "上", "pos": "v"}, {"word": "号", "pos": "n"}, {"word": "啊", "pos": "xc"}], "text": "高晓松就不同了，虽然他的歌在我们青春年少里留下了很深的印象，但现在他真的是变化太大了，无论是歌曲还是在奇葩说中的表现，都和少年时候那个同桌的你作词作曲的人对不上号啊"}
 49 | {"postag": [{"word": "《", "pos": "w"}, {"word": "激情创造财富", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2011年6月1日", "pos": "t"}, {"word": "新星出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "克里斯多夫", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "霍华德", "pos": "nr"}], "text": "《激情创造财富》是2011年6月1日新星出版社出版的图书，作者是克里斯多夫·霍华德"}
 50 | {"postag": [{"word": "《", "pos": "w"}, {"word": "韩信", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "中国", "pos": "ns"}, {"word": "台湾", "pos": "ns"}, {"word": "导演", "pos": "n"}, {"word": "陈聪明", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "蔡天送", "pos": "nr"}, {"word": " ", "pos": "w"}, {"word": "、", "pos": "w"}, {"word": "蔡天琳", "pos": "nr"}, {"word": "编剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "杨丽花", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "陈小咪", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "司马玉娇", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "小凤仙", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "唐美云", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "青蓉", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "柯玉枝", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "潘丽丽", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "陈亚兰", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "吴梅芳", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "吴翠娥", "pos": "nr"}, {"word": "主演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "历史", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "传记类", "pos": "n"}, {"word": "电视", "pos": "n"}, {"word": "歌仔戏", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "韩信", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "1985年", "pos": "t"}, {"word": "的", "pos": "u"}, {"word": "中国", "pos": "ns"}, {"word": "台湾", "pos": "ns"}, {"word": "拍摄", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "电视", "pos": "n"}, {"word": "歌仔戏", "pos": "n"}], "text": "《韩信》是由中国台湾导演陈聪明执导，蔡天送 、蔡天琳编剧，杨丽花、陈小咪、司马玉娇、小凤仙、唐美云、青蓉、柯玉枝、潘丽丽、陈亚兰、吴梅芳、吴翠娥主演的历史、传记类电视歌仔戏，《韩信》是1985年的中国台湾拍摄的一部电视歌仔戏"}
 51 | {"postag": [{"word": "涂序生", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "1942年", "pos": "t"}, {"word": "生于", "pos": "v"}, {"word": "景德镇", "pos": "ns"}, {"word": "，", "pos": "w"}, {"word": "祖籍", "pos": "n"}, {"word": "江西", "pos": "ns"}, {"word": "南昌", "pos": "ns"}], "text": "涂序生，男，1942年生于景德镇，祖籍江西南昌"}
 52 | {"postag": [{"word": "爱施德", "pos": "nr"}, {"word": "是", "pos": "v"}, {"word": "深圳市爱施德股份有限公司", "pos": "nt"}, {"word": "的", "pos": "u"}, {"word": "简称", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "公司", "pos": "n"}, {"word": "成立", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "1998年", "pos": "t"}, {"word": "6月", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "注册资本", "pos": "nz"}, {"word": "999100000元", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "一家", "pos": "m"}, {"word": "致力于", "pos": "v"}, {"word": "全球", "pos": "n"}, {"word": "最新", "pos": "a"}, {"word": "移动", "pos": "vn"}, {"word": "通讯", "pos": "n"}, {"word": "产品", "pos": "n"}, {"word": "、", "pos": "w"}, {"word": "数码", "pos": "n"}, {"word": "电子", "pos": "n"}, {"word": "产品", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "引进", "pos": "vn"}, {"word": "和", "pos": "c"}, {"word": "推广", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "上市公司", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "股票", "pos": "n"}, {"word": "代码", "pos": "n"}, {"word": "：", "pos": "w"}, {"word": "002416", "pos": "nz"}, {"word": "）", "pos": "w"}], "text": "爱施德是深圳市爱施德股份有限公司的简称，公司成立于1998年6月，注册资本999100000元，是一家致力于全球最新移动通讯产品、数码电子产品的引进和推广的上市公司（股票代码：002416）"}
 53 | {"postag": [{"word": "姚庆和", "pos": "nr"}, {"word": " ", "pos": "w"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "1933年11月2日", "pos": "t"}, {"word": "出生", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "辽宁省", "pos": "ns"}, {"word": "黑山县", "pos": "ns"}, {"word": "人", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "核工业中南地勘局", "pos": "nt"}, {"word": "高级", "pos": "a"}, {"word": "工程师", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "研究员级", "pos": "n"}, {"word": "）", "pos": "w"}], "text": "姚庆和 男，1933年11月2日出生，辽宁省黑山县人，核工业中南地勘局高级工程师（研究员级）"}
 54 | {"postag": [{"word": "《", "pos": "w"}, {"word": "我为书狂", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2004年", "pos": "t"}, {"word": "由", "pos": "p"}, {"word": "新世界出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一部", "pos": "m"}, {"word": "作品", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "石涛", "pos": "nr"}], "text": "《我为书狂》是2004年由新世界出版社出版的一部作品，作者是石涛"}
 55 | {"postag": [{"word": "马庆雄", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "又", "pos": "d"}, {"word": "名", "pos": "v"}, {"word": "马汉珪", "pos": "ns"}, {"word": "，", "pos": "w"}, {"word": "原籍", "pos": "n"}, {"word": "广东", "pos": "ns"}, {"word": "潮阳", "pos": "ns"}, {"word": "和平镇", "pos": "ns"}, {"word": "下寨乡", "pos": "ns"}, {"word": "，", "pos": "w"}, {"word": "1927年9月5日", "pos": "t"}, {"word": "出生", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "泰国", "pos": "ns"}], "text": "马庆雄，又名马汉珪，原籍广东潮阳和平镇下寨乡，1927年9月5日出生于泰国"}
 56 | {"postag": [{"word": "《", "pos": "w"}, {"word": "科技统计学", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "1994年", "pos": "t"}, {"word": "中国人民大学出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "孙学范", "pos": "nr"}], "text": "《科技统计学》是1994年中国人民大学出版社出版的图书，作者是孙学范"}
 57 | {"postag": [{"word": "克吕士", "pos": "nr"}, {"word": "科学", "pos": "n"}, {"word": "仪器", "pos": "n"}, {"word": "（", "pos": "w"}, {"word": "上海", "pos": "ns"}, {"word": "）有限公司", "pos": "nt"}, {"word": "于", "pos": "p"}, {"word": "2015年12月21日", "pos": "t"}, {"word": "在", "pos": "p"}, {"word": "上海市", "pos": "ns"}, {"word": "工商", "pos": "n"}, {"word": "局", "pos": "n"}, {"word": "登记", "pos": "v"}, {"word": "成立", "pos": "v"}], "text": "克吕士科学仪器（上海）有限公司于2015年12月21日在上海市工商局登记成立"}
 58 | {"postag": [{"word": "《", "pos": "w"}, {"word": "一见如故", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "陶钰玉", "pos": "nr"}, {"word": "2014年", "pos": "t"}, {"word": "推出", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "第三张", "pos": "m"}, {"word": "个人", "pos": "n"}, {"word": "新专辑", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "因为爱你", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "里", "pos": "f"}, {"word": "的", "pos": "u"}, {"word": "一首", "pos": "m"}, {"word": "歌曲", "pos": "n"}], "text": "《一见如故》是陶钰玉2014年推出的第三张个人新专辑《因为爱你》里的一首歌曲"}
 59 | {"postag": [{"word": "内容", "pos": "n"}, {"word": "简介", "pos": "n"}, {"word": "  ", "pos": "v"}, {"word": "《", "pos": "w"}, {"word": "中国教育学史遗稿", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "著名", "pos": "a"}, {"word": "教育学", "pos": "n"}, {"word": "专家", "pos": "n"}, {"word": "陈元晖", "pos": "nr"}, {"word": "遗著", "pos": "n"}], "text": "内容简介  《中国教育学史遗稿》是著名教育学专家陈元晖遗著"}
 60 | {"postag": [{"word": "叶亹", "pos": "nr"}, {"word": "（", "pos": "w"}, {"word": "生卒年月", "pos": "n"}, {"word": "不详", "pos": "a"}, {"word": "）", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "字", "pos": "n"}, {"word": "世黾", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "号", "pos": "n"}, {"word": "龟峰", "pos": "ns"}, {"word": "，", "pos": "w"}, {"word": "明代", "pos": "t"}, {"word": "福建", "pos": "ns"}, {"word": "诏安县", "pos": "ns"}, {"word": "三都宝桥", "pos": "ns"}, {"word": "（", "pos": "w"}, {"word": "深桥镇", "pos": "ns"}, {"word": "上营村", "pos": "ns"}, {"word": "）", "pos": "w"}, {"word": "人", "pos": "n"}], "text": "叶亹（生卒年月不详），字世黾，号龟峰，明代福建诏安县三都宝桥（深桥镇上营村）人"}
 61 | {"postag": [{"word": "暗妃", "pos": "nr"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "无计春", "pos": "nr"}, {"word": "留住", "pos": "v"}, {"word": "创作", "pos": "vn"}, {"word": "的", "pos": "u"}, {"word": "，", "pos": "w"}, {"word": "已", "pos": "d"}, {"word": "完结", "pos": "v"}, {"word": "与", "pos": "p"}, {"word": "潇湘书院", "pos": "nt"}], "text": "暗妃是由无计春留住创作的，已完结与潇湘书院"}
 62 | {"postag": [{"word": "江淑娜鲁冰花", "pos": "nr"}, {"word": "作词", "pos": "v"}, {"word": "：", "pos": "w"}, {"word": "姚谦", "pos": "nr"}, {"word": "　", "pos": "w"}, {"word": "作曲", "pos": "vn"}, {"word": "：", "pos": "w"}, {"word": "陈阳", "pos": "nr"}], "text": "江淑娜鲁冰花作词：姚谦　作曲：陈阳"}
 63 | {"postag": [{"word": "2009年", "pos": "t"}, {"word": "2月", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "参演", "pos": "v"}, {"word": "刘江", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "海清", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "黄海波", "pos": "nr"}, {"word": "领衔主演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "都市情感", "pos": "n"}, {"word": "剧", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "媳妇的美好时代", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "饰演", "pos": "v"}, {"word": "毛豆豆", "pos": "nz"}, {"word": "的", "pos": "u"}, {"word": "爸爸", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由此", "pos": "d"}, {"word": "开始", "pos": "v"}, {"word": "被", "pos": "p"}, {"word": "媒体", "pos": "n"}, {"word": "称为", "pos": "v"}, {"word": "“", "pos": "w"}, {"word": "国民", "pos": "n"}, {"word": "父亲", "pos": "n"}, {"word": "”", "pos": "w"}, {"word": "、", "pos": "w"}, {"word": "“", "pos": "w"}, {"word": "温情", "pos": "n"}, {"word": "老爸", "pos": "n"}, {"word": "”", "pos": "w"}], "text": "2009年2月，参演刘江执导，海清、黄海波领衔主演的都市情感剧《媳妇的美好时代》，饰演毛豆豆的爸爸，由此开始被媒体称为“国民父亲”、“温情老爸”"}
 64 | {"postag": [{"word": "《", "pos": "w"}, {"word": "孙伯翔谈艺录", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2005年", "pos": "t"}, {"word": "天津", "pos": "ns"}, {"word": "人", "pos": "n"}, {"word": "美出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "刘运峰", "pos": "nr"}], "text": "《孙伯翔谈艺录》是2005年天津人美出版社出版的图书，作者是刘运峰"}
 65 | {"postag": [{"word": "徐志飞", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "1949年", "pos": "t"}, {"word": "10月", "pos": "t"}, {"word": "出生", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "1977年", "pos": "t"}, {"word": "毕业", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "第二军医大学", "pos": "nt"}, {"word": "军医", "pos": "n"}, {"word": "系", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "1988年", "pos": "t"}, {"word": "获", "pos": "v"}, {"word": "胸", "pos": "n"}, {"word": "心", "pos": "n"}, {"word": "外科", "pos": "n"}, {"word": "硕士学位", "pos": "n"}], "text": "徐志飞，1949年10月出生，1977年毕业于第二军医大学军医系，1988年获胸心外科硕士学位"}
 66 | {"postag": [{"word": "《", "pos": "w"}, {"word": "非常接触", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "由", "pos": "p"}, {"word": "尤小刚", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "巍子", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "邬倩倩", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "廖京生", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "盖丽丽", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "戴娇倩", "pos": "nr"}, {"word": "主演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "侦探", "pos": "n"}, {"word": "悬疑", "pos": "n"}, {"word": "剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "于", "pos": "p"}, {"word": "2001年", "pos": "t"}, {"word": "6月", "pos": "t"}, {"word": "在", "pos": "p"}, {"word": "北京电视台", "pos": "nt"}, {"word": "首播", "pos": "v"}], "text": "《非常接触》是由尤小刚执导，巍子、邬倩倩、廖京生、盖丽丽、戴娇倩主演的侦探悬疑剧，于2001年6月在北京电视台首播"}
 67 | {"postag": [{"word": "《", "pos": "w"}, {"word": "龙虎智多星", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "陈勋奇", "pos": "nr"}, {"word": "担任", "pos": "v"}, {"word": "导演", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "李修贤", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "曾志伟", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "利智", "pos": "nr"}, {"word": "主演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "动作", "pos": "n"}, {"word": "犯罪片", "pos": "n"}], "text": "《龙虎智多星》是陈勋奇担任导演，李修贤、曾志伟、利智主演的动作犯罪片"}
 68 | {"postag": [{"word": "《", "pos": "w"}, {"word": "中国最美旅游线路", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2012年", "pos": "t"}, {"word": "北京航空航天大学出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}], "text": "《中国最美旅游线路》是2012年北京航空航天大学出版社出版的图书"}
 69 | {"postag": [{"word": "《", "pos": "w"}, {"word": "名利你得到", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "一首", "pos": "m"}, {"word": "由", "pos": "p"}, {"word": "卢国沾", "pos": "nr"}, {"word": "填词", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "顾嘉辉", "pos": "nr"}, {"word": "谱曲", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "林子祥", "pos": "nr"}, {"word": "演唱", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "歌曲", "pos": "n"}], "text": "《名利你得到》是一首由卢国沾填词，顾嘉辉谱曲，林子祥演唱的歌曲"}
 70 | {"postag": [{"word": "1987年", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "被", "pos": "p"}, {"word": "人", "pos": "n"}, {"word": "誉为", "pos": "v"}, {"word": "“", "pos": "w"}, {"word": "大熊猫", "pos": "n"}, {"word": "之", "pos": "u"}, {"word": "父", "pos": "n"}, {"word": "”", "pos": "w"}, {"word": "的", "pos": "u"}, {"word": "张和民", "pos": "nr"}, {"word": "被", "pos": "p"}, {"word": "公派", "pos": "n"}, {"word": "到", "pos": "v"}, {"word": "美国爱达荷大学", "pos": "nt"}, {"word": "攻读", "pos": "v"}, {"word": "野生动物", "pos": "n"}, {"word": "与", "pos": "c"}, {"word": "自然保护区", "pos": "n"}, {"word": "管理", "pos": "v"}, {"word": "硕士学位", "pos": "n"}], "text": "1987年，被人誉为“大熊猫之父”的张和民被公派到美国爱达荷大学攻读野生动物与自然保护区管理硕士学位"}
 71 | {"postag": [{"word": "甜蜜", "pos": "a"}, {"word": "明星", "pos": "n"}, {"word": "夫妻", "pos": "n"}, {"word": "6", "pos": "m"}, {"word": "：", "pos": "w"}, {"word": "林志颖", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "陈若仪", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "明星", "pos": "n"}, {"word": "与", "pos": "c"}, {"word": "粉丝", "pos": "n"}, {"word": "之", "pos": "u"}, {"word": "恋", "pos": "n"}], "text": "甜蜜明星夫妻6：林志颖、陈若仪，明星与粉丝之恋"}
 72 | {"postag": [{"word": "陈匡辉", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "汉族", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "江西", "pos": "ns"}, {"word": "南康", "pos": "ns"}, {"word": "人", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "1964年", "pos": "t"}, {"word": "9月", "pos": "t"}, {"word": "出生", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "1984年", "pos": "t"}, {"word": "7月", "pos": "t"}, {"word": "参加", "pos": "v"}, {"word": "工作", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "1989年", "pos": "t"}, {"word": "12月", "pos": "t"}, {"word": "加入", "pos": "v"}, {"word": "中国共产党", "pos": "nt"}, {"word": "，", "pos": "w"}, {"word": "大学", "pos": "n"}, {"word": "学历", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "农学", "pos": "n"}, {"word": "学士", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "江西农业大学牧医系畜牧专业", "pos": "nt"}, {"word": "毕业", "pos": "v"}], "text": "陈匡辉，男，汉族，江西南康人，1964年9月出生，1984年7月参加工作，1989年12月加入中国共产党，大学学历，农学学士，江西农业大学牧医系畜牧专业毕业"}
 73 | {"postag": [{"word": "《", "pos": "w"}, {"word": "改变人生的经典故事", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2009年", "pos": "t"}, {"word": "中国纺织出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "王蔚", "pos": "nr"}], "text": "《改变人生的经典故事》是2009年中国纺织出版社出版的图书，作者是王蔚"}
 74 | {"postag": [{"word": "郭晓玲", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "台湾", "pos": "ns"}, {"word": "著名", "pos": "a"}, {"word": "企业家", "pos": "n"}, {"word": "郭台铭", "pos": "nr"}, {"word": "与", "pos": "p"}, {"word": "林淑如", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "女儿", "pos": "n"}], "text": "郭晓玲，台湾著名企业家郭台铭与林淑如的女儿"}
 75 | {"postag": [{"word": "1997年", "pos": "t"}, {"word": "06月", "pos": "t"}, {"word": "网易公司", "pos": "nt"}, {"word": "成立", "pos": "v"}, {"word": "公司", "pos": "n"}, {"word": "正式", "pos": "ad"}, {"word": "推出", "pos": "v"}, {"word": "全", "pos": "a"}, {"word": "中文", "pos": "nz"}, {"word": "搜索引擎", "pos": "n"}, {"word": "服务", "pos": "vn"}], "text": "1997年06月网易公司成立公司正式推出全中文搜索引擎服务"}
 76 | {"postag": [{"word": "一位", "pos": "m"}, {"word": "女", "pos": "a"}, {"word": "舞者", "pos": "n"}, {"word": "上场", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "满脸", "pos": "d"}, {"word": "迷", "pos": "v"}, {"word": "妹", "pos": "n"}, {"word": "相", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "跟", "pos": "p"}, {"word": "韬韬", "pos": "nr"}, {"word": "说", "pos": "v"}, {"word": "：", "pos": "w"}, {"word": "我", "pos": "r"}, {"word": "带来", "pos": "v"}, {"word": "一首", "pos": "m"}, {"word": "你的歌", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "我是大主宰", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "黄子韬", "pos": "nr"}, {"word": "：", "pos": "w"}, {"word": "这", "pos": "r"}, {"word": "歌", "pos": "n"}, {"word": "我", "pos": "r"}, {"word": "已经", "pos": "d"}, {"word": "不", "pos": "d"}, {"word": "喜欢", "pos": "v"}, {"word": "了", "pos": "xc"}], "text": "一位女舞者上场，满脸迷妹相的跟韬韬说：我带来一首你的歌，《我是大主宰》黄子韬：这歌我已经不喜欢了"}
 77 | {"postag": [{"word": "《", "pos": "w"}, {"word": "一个女人的史诗", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "严歌苓", "pos": "nr"}, {"word": "所", "pos": "u"}, {"word": "著", "pos": "u"}, {"word": "的", "pos": "u"}, {"word": "一本", "pos": "m"}, {"word": "小说", "pos": "n"}], "text": "《一个女人的史诗》是严歌苓所著的一本小说"}
 78 | {"postag": [{"word": "3", "pos": "m"}, {"word": "、", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "柳叶刀", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "张建栋", "pos": "nr"}, {"word": "导演", "pos": "n"}, {"word": "与", "pos": "p"}, {"word": "王学兵", "pos": "nr"}, {"word": "第六次", "pos": "m"}, {"word": "合作", "pos": "vn"}], "text": "3、《柳叶刀》是张建栋导演与王学兵第六次合作"}
 79 | {"postag": [{"word": "钟明", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "广东", "pos": "ns"}, {"word": "蕉岭县", "pos": "ns"}, {"word": "人", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "汉族", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "1967年", "pos": "t"}, {"word": "1月", "pos": "t"}, {"word": "生", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "学历", "pos": "n"}, {"word": "大学", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "农业", "pos": "n"}, {"word": "推广", "pos": "vn"}, {"word": "硕士", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "1990年", "pos": "t"}, {"word": "7月", "pos": "t"}, {"word": "参加", "pos": "v"}, {"word": "工作", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "1990年", "pos": "t"}, {"word": "5月", "pos": "t"}, {"word": "加入", "pos": "v"}, {"word": "中国共产党", "pos": "nt"}], "text": "钟明，男，广东蕉岭县人，汉族，1967年1月生，学历大学，农业推广硕士，1990年7月参加工作，1990年5月加入中国共产党"}
 80 | {"postag": [{"word": "《", "pos": "w"}, {"word": "我是传说", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "韩国SBS电视台", "pos": "nt"}, {"word": "于", "pos": "p"}, {"word": "2010年8月2日", "pos": "t"}, {"word": "首播", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "月火剧", "pos": "n"}], "text": "《我是传说》是韩国SBS电视台于2010年8月2日首播的月火剧"}
 81 | {"postag": [{"word": "北京爱瑞森园林绿化有限公司", "pos": "nt"}, {"word": "于", "pos": "p"}, {"word": "2012年", "pos": "t"}, {"word": "02月20日", "pos": "t"}, {"word": "在", "pos": "p"}, {"word": "平谷", "pos": "ns"}, {"word": "分局", "pos": "n"}, {"word": "登记", "pos": "v"}, {"word": "成立", "pos": "v"}], "text": "北京爱瑞森园林绿化有限公司于2012年02月20日在平谷分局登记成立"}
 82 | {"postag": [{"word": "《", "pos": "w"}, {"word": "Linux培训教程", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2011年", "pos": "t"}, {"word": "清华大学出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "何明", "pos": "nr"}], "text": "《Linux培训教程》是2011年清华大学出版社出版的图书，作者是何明"}
 83 | {"postag": [{"word": "近日", "pos": "t"}, {"word": "，", "pos": "w"}, {"word": "刘恺威", "pos": "nr"}, {"word": "被", "pos": "p"}, {"word": "媒体", "pos": "n"}, {"word": "拍", "pos": "v"}, {"word": "到", "pos": "v"}, {"word": "与", "pos": "p"}, {"word": "父母", "pos": "n"}, {"word": "和", "pos": "c"}, {"word": "女儿", "pos": "n"}, {"word": "“", "pos": "w"}, {"word": "小糯米", "pos": "nr"}, {"word": "”", "pos": "w"}, {"word": "在", "pos": "p"}, {"word": "深湾", "pos": "ns"}, {"word": "游艇", "pos": "n"}, {"word": "俱乐部", "pos": "n"}, {"word": "共", "pos": "d"}, {"word": "吃开", "pos": "v"}, {"word": "年", "pos": "q"}, {"word": "饭", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "全程", "pos": "n"}, {"word": "不见", "pos": "v"}, {"word": "妻子", "pos": "n"}, {"word": "杨幂", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "踪影", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "当", "pos": "p"}, {"word": "媒体", "pos": "n"}, {"word": "问及", "pos": "v"}, {"word": "刘恺威", "pos": "nr"}, {"word": "为什么", "pos": "r"}, {"word": "杨幂", "pos": "nr"}, {"word": "不在场", "pos": "v"}, {"word": "时", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "刘恺威", "pos": "nr"}, {"word": "只", "pos": "d"}, {"word": "回", "pos": "v"}, {"word": "了", "pos": "u"}, {"word": "句", "pos": "q"}, {"word": "“", "pos": "w"}, {"word": "新年快乐", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "多谢", "pos": "v"}, {"word": "”", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "就", "pos": "d"}, {"word": "不再", "pos": "d"}, {"word": "回应", "pos": "v"}], "text": "近日，刘恺威被媒体拍到与父母和女儿“小糯米”在深湾游艇俱乐部共吃开年饭，全程不见妻子杨幂的踪影，当媒体问及刘恺威为什么杨幂不在场时，刘恺威只回了句“新年快乐，多谢”，就不再回应"}
 84 | {"postag": [{"word": "威利", "pos": "nr"}, {"word": "·", "pos": "w"}, {"word": "考利", "pos": "nr"}, {"word": "-", "pos": "w"}, {"word": "斯坦", "pos": "ns"}, {"word": "（", "pos": "w"}, {"word": "Willie Cauley-Stein", "pos": "nz"}, {"word": "）", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "1993年8月18日", "pos": "t"}, {"word": "出生", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "美国", "pos": "ns"}, {"word": "堪萨斯州", "pos": "ns"}, {"word": "斯皮尔维尔", "pos": "ns"}, {"word": "（", "pos": "w"}, {"word": "Spearville", "pos": "nz"}, {"word": ",", "pos": "w"}, {"word": " ", "pos": "w"}, {"word": "KS", "pos": "nz"}, {"word": "）", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "美国", "pos": "ns"}, {"word": "职业", "pos": "n"}, {"word": "篮球", "pos": "n"}, {"word": "运动员", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "司职", "pos": "v"}, {"word": "中锋", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "效力", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "NBA", "pos": "nz"}, {"word": "萨克拉门托国王队", "pos": "nt"}], "text": "威利·考利-斯坦（Willie Cauley-Stein），1993年8月18日出生于美国堪萨斯州斯皮尔维尔（Spearville, KS），美国职业篮球运动员，司职中锋，效力于NBA萨克拉门托国王队"}
 85 | {"postag": [{"word": "朴树", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "1973年", "pos": "t"}, {"word": "出生于", "pos": "v"}, {"word": "南京", "pos": "ns"}, {"word": "，", "pos": "w"}, {"word": "父母", "pos": "n"}, {"word": "都是", "pos": "v"}, {"word": "北大", "pos": "nt"}, {"word": "教授", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "但", "pos": "c"}, {"word": "他", "pos": "r"}, {"word": "却", "pos": "d"}, {"word": "唯独", "pos": "d"}, {"word": "喜爱", "pos": "v"}, {"word": "摇滚", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "出道", "pos": "v"}, {"word": "多年", "pos": "m"}, {"word": "唱歌", "pos": "v"}, {"word": "很多", "pos": "a"}, {"word": "经典", "pos": "a"}, {"word": "歌曲", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "比如", "pos": "v"}, {"word": "：", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "平凡之路", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "那些花儿", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "白桦林", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "生如夏花", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "《", "pos": "w"}, {"word": "她在睡梦中", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "等", "pos": "u"}, {"word": "，", "pos": "w"}, {"word": "都是", "pos": "v"}, {"word": "如今", "pos": "t"}, {"word": "歌坛", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "经典", "pos": "a"}, {"word": "作品", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "更", "pos": "d"}, {"word": "是", "pos": "v"}, {"word": "因", "pos": "p"}, {"word": "《", "pos": "w"}, {"word": "平凡之路", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "一曲成名", "pos": "nw"}], "text": "朴树，1973年出生于南京，父母都是北大教授，但他却唯独喜爱摇滚，出道多年唱歌很多经典歌曲，比如：《平凡之路》，《那些花儿》，《白桦林》，《生如夏花》，《她在睡梦中》等，都是如今歌坛的经典作品，更是因《平凡之路》一曲成名"}
 86 | {"postag": [{"word": "《", "pos": "w"}, {"word": "重生之我们都是好孩子", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "翻飞", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "记忆", "pos": "n"}, {"word": "创作", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "网络", "pos": "n"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "发表", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "17K小说网", "pos": "nz"}], "text": "《重生之我们都是好孩子》是翻飞的记忆创作的网络小说，发表于17K小说网"}
 87 | {"postag": [{"word": "《", "pos": "w"}, {"word": "爱的种子", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "林子祥", "pos": "nr"}, {"word": "的", "pos": "u"}, {"word": "音乐", "pos": "n"}, {"word": "作品", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "郑国江", "pos": "nr"}, {"word": "作曲", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "收录", "pos": "v"}, {"word": "在", "pos": "p"}, {"word": "《", "pos": "w"}, {"word": "好气连祥", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "专辑", "pos": "n"}, {"word": "中", "pos": "f"}], "text": "《爱的种子》是林子祥的音乐作品，郑国江作曲，收录在《好气连祥》专辑中"}
 88 | {"postag": [{"word": "秦哲为", "pos": "nr"}, {"word": " ", "pos": "w"}, {"word": "男", "pos": "a"}, {"word": "，", "pos": "w"}, {"word": "汉族", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "1936年", "pos": "t"}, {"word": "12月", "pos": "t"}, {"word": "生", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "甘肃", "pos": "ns"}, {"word": "张掖币", "pos": "nz"}, {"word": "人", "pos": "n"}], "text": "秦哲为 男，汉族，1936年12月生，甘肃张掖币人"}
 89 | {"postag": [{"word": "《", "pos": "w"}, {"word": "清穿之胤禛福晋", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "连载", "pos": "v"}, {"word": "在", "pos": "p"}, {"word": "晋江文学城", "pos": "nt"}, {"word": "的", "pos": "u"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "青玉铭瑄", "pos": "nt"}], "text": "《清穿之胤禛福晋》是连载在晋江文学城的小说，作者是青玉铭瑄"}
 90 | {"postag": [{"word": "《", "pos": "w"}, {"word": "恋爱操作 01", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2007年", "pos": "t"}, {"word": "角川出版社", "pos": "nt"}, {"word": "出版", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "图书", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "莲川爱", "pos": "nr"}], "text": "《恋爱操作 01》是2007年角川出版社出版的图书，作者是莲川爱"}
 91 | {"postag": [{"word": "刺跗逍遥蛛", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "Philodromus spinitarsis", "pos": "nz"}, {"word": " ", "pos": "w"}, {"word": "（", "pos": "w"}, {"word": "Simon", "pos": "nr"}, {"word": ",", "pos": "w"}, {"word": " ", "pos": "w"}, {"word": "1895", "pos": "m"}, {"word": "）", "pos": "w"}, {"word": "，", "pos": "w"}, {"word": "为", "pos": "p"}, {"word": "蜘蛛目", "pos": "n"}, {"word": "(", "pos": "w"}, {"word": "Araneae", "pos": "nz"}, {"word": ")", "pos": "w"}, {"word": "逍遥蛛科逍遥蛛", "pos": "nz"}, {"word": "属", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "一种", "pos": "m"}, {"word": "蜘蛛", "pos": "n"}], "text": "刺跗逍遥蛛，Philodromus spinitarsis （Simon, 1895），为蜘蛛目(Araneae)逍遥蛛科逍遥蛛属的一种蜘蛛"}
 92 | {"postag": [{"word": "《", "pos": "w"}, {"word": "冤家路窄：逃婚也有爱", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "17k小说网", "pos": "nz"}, {"word": "已", "pos": "d"}, {"word": "完结", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "言情小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "小说", "pos": "n"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "萧黎草", "pos": "nr"}], "text": "《冤家路窄：逃婚也有爱》是17k小说网已完结的言情小说，小说作者是萧黎草"}
 93 | {"postag": [{"word": "杨镛", "pos": "nr"}, {"word": "，", "pos": "w"}, {"word": "浙江", "pos": "ns"}, {"word": "湖州", "pos": "ns"}, {"word": "双林人", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "研习", "pos": "v"}, {"word": "书画", "pos": "n"}, {"word": "二十余载", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "喜", "pos": "v"}, {"word": "作", "pos": "v"}, {"word": "花鸟", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "兼", "pos": "v"}, {"word": "习", "pos": "v"}, {"word": "书法", "pos": "n"}], "text": "杨镛，浙江湖州双林人，研习书画二十余载，喜作花鸟，兼习书法"}
 94 | {"postag": [{"word": "在", "pos": "p"}, {"word": "如此", "pos": "r"}, {"word": "兼具", "pos": "v"}, {"word": "权威性", "pos": "n"}, {"word": "与", "pos": "c"}, {"word": "流行性", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "全球", "pos": "n"}, {"word": "华语", "pos": "nz"}, {"word": "歌曲", "pos": "n"}, {"word": "排行榜", "pos": "n"}, {"word": "中", "pos": "f"}, {"word": "，", "pos": "w"}, {"word": "同", "pos": "a"}, {"word": "曲", "pos": "n"}, {"word": "不同", "pos": "a"}, {"word": "词", "pos": "n"}, {"word": "的", "pos": "u"}, {"word": "两首", "pos": "m"}, {"word": "歌曲", "pos": "n"}, {"word": "能", "pos": "v"}, {"word": "同时", "pos": "d"}, {"word": "进入", "pos": "v"}, {"word": "全球", "pos": "n"}, {"word": "榜", "pos": "n"}, {"word": "TOP10", "pos": "m"}, {"word": "尚", "pos": "d"}, {"word": "属", "pos": "v"}, {"word": "首次", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "胡彦斌", "pos": "nr"}, {"word": "《", "pos": "w"}, {"word": "男人KTV", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "以", "pos": "p"}, {"word": "其", "pos": "p"}, {"word": "超人气", "pos": "n"}, {"word": "之", "pos": "u"}, {"word": "势", "pos": "n"}, {"word": "蝉联", "pos": "v"}, {"word": "冠军", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "其", "pos": "r"}, {"word": "音乐", "pos": "n"}, {"word": "实力", "pos": "n"}, {"word": "绝不", "pos": "d"}, {"word": "容", "pos": "v"}, {"word": "质疑", "pos": "vn"}, {"word": "6", "pos": "m"}], "text": "在如此兼具权威性与流行性的全球华语歌曲排行榜中，同曲不同词的两首歌曲能同时进入全球榜TOP10尚属首次，胡彦斌《男人KTV》以其超人气之势蝉联冠军，其音乐实力绝不容质疑6"}
 95 | {"postag": [{"word": "2004年", "pos": "t"}, {"word": "再次", "pos": "d"}, {"word": "操刀", "pos": "v"}, {"word": "，", "pos": "w"}, {"word": "徐静蕾", "pos": "nr"}, {"word": "自编", "pos": "v"}, {"word": "、", "pos": "w"}, {"word": "自导", "pos": "v"}, {"word": "、", "pos": "w"}, {"word": "自演", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "电影", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "一个陌生女人的来信", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "获得", "pos": "v"}, {"word": "第52届", "pos": "m"}, {"word": "西班牙", "pos": "ns"}, {"word": "圣塞巴斯蒂安电影节", "pos": "nz"}, {"word": "最佳", "pos": "a"}, {"word": "导演", "pos": "n"}, {"word": "奖", "pos": "n"}], "text": "2004年再次操刀，徐静蕾自编、自导、自演的电影《一个陌生女人的来信》获得第52届西班牙圣塞巴斯蒂安电影节最佳导演奖"}
 96 | {"postag": [{"word": "盒果藤属", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "旋花科", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "茄目", "pos": "nz"}, {"word": "，", "pos": "w"}, {"word": "约", "pos": "d"}, {"word": "25种", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "分布", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "热带", "pos": "n"}, {"word": "地区", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "我", "pos": "r"}, {"word": "国", "pos": "n"}, {"word": "有", "pos": "v"}, {"word": "盒果藤", "pos": "nz"}, {"word": "1种", "pos": "m"}, {"word": "，", "pos": "w"}, {"word": "产", "pos": "v"}, {"word": "台湾", "pos": "ns"}, {"word": "、", "pos": "w"}, {"word": "广东", "pos": "ns"}, {"word": "、", "pos": "w"}, {"word": "广西", "pos": "ns"}, {"word": "和", "pos": "c"}, {"word": "云南", "pos": "ns"}], "text": "盒果藤属，旋花科，茄目，约25种，分布于热带地区，我国有盒果藤1种，产台湾、广东、广西和云南"}
 97 | {"postag": [{"word": "《", "pos": "w"}, {"word": "十七岁的天空", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "陈映蓉", "pos": "nr"}, {"word": "执导", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "爱情", "pos": "n"}, {"word": "喜剧", "pos": "n"}, {"word": "同志", "pos": "n"}, {"word": "电影", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "杨佑宁", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "周群达", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "金勤", "pos": "nr"}, {"word": "、", "pos": "w"}, {"word": "季宏", "pos": "nr"}, {"word": "全等", "pos": "n"}, {"word": "主演", "pos": "v"}], "text": "《十七岁的天空》是陈映蓉执导的爱情喜剧同志电影，由杨佑宁、周群达、金勤、季宏全等主演"}
 98 | {"postag": [{"word": "寂寞", "pos": "a"}, {"word": "飞行", "pos": "vn"}, {"word": "是", "pos": "v"}, {"word": "江美琪", "pos": "nr"}, {"word": "演唱", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "歌曲", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "收录", "pos": "v"}, {"word": "在", "pos": "p"}, {"word": "其", "pos": "r"}, {"word": "专辑", "pos": "n"}, {"word": "《", "pos": "w"}, {"word": "想起", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "中", "pos": "f"}], "text": "寂寞飞行是江美琪演唱的歌曲，收录在其专辑《想起》中"}
 99 | {"postag": [{"word": "《", "pos": "w"}, {"word": "魔箭", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "连载", "pos": "v"}, {"word": "于", "pos": "p"}, {"word": "17k小说网", "pos": "nz"}, {"word": "的", "pos": "u"}, {"word": "异界", "pos": "n"}, {"word": "大陆", "pos": "n"}, {"word": "类型", "pos": "n"}, {"word": "小说", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "作者", "pos": "n"}, {"word": "是", "pos": "v"}, {"word": "风", "pos": "n"}, {"word": "哈哈", "pos": "xc"}], "text": "《魔箭》是连载于17k小说网的异界大陆类型小说，作者是风哈哈"}
100 | {"postag": [{"word": "《", "pos": "w"}, {"word": "开往名古屋的末班列车", "pos": "nw"}, {"word": "》", "pos": "w"}, {"word": "是", "pos": "v"}, {"word": "2012年", "pos": "t"}, {"word": "首播", "pos": "v"}, {"word": "的", "pos": "u"}, {"word": "电视剧", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "由", "pos": "p"}, {"word": "神道", "pos": "n"}, {"word": "俊浩", "pos": "n"}, {"word": "导演", "pos": "n"}, {"word": "，", "pos": "w"}, {"word": "松井玲奈", "pos": "nr"}, {"word": "主演", "pos": "v"}], "text": "《开往名古屋的末班列车》是2012年首播的电视剧，由神道俊浩导演，松井玲奈主演"}
101 | 


--------------------------------------------------------------------------------