├── OrgRecognize.py ├── README.md ├── data ├── nt.pattern.txt ├── nt.tr.txt └── nt.txt └── generate_datas.py /OrgRecognize.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # -*- coding:utf-8 -*- 3 | # 作者:李鹏飞 4 | # 个人博客:https://www.lookfor404.com/ 5 | # 代码说明:https://www.lookfor404.com/用隐马尔可夫模型hmm做命名实体识别-ner系列二/ 6 | # github项目:https://github.com/lipengfei-558/hmm_ner_organization 7 | import jieba 8 | class OrgRecognize: 9 | def __init__(self, input_sentence): 10 | self.hidden_states = ["A", "B", "C", "D","F","G","I","J","K","L","M","P","S","W","X","Z"] 11 | self.observed_states = self.get_observed_states(sentence=input_sentence) 12 | self.initial_vector = self.load_initial_vector() 13 | self.transision_matrix = self.load_transition_matrix(hidden_states=self.hidden_states) 14 | self.emission_matrix = self.load_emission_matrix(hidden_states=self.hidden_states) 15 | 16 | def load_patterns(self): 17 | """ 18 | 读取机构名模式串 19 | :return: list,元素为模式串 20 | """ 21 | result = [] 22 | with open("./data/nt.pattern.txt", "rb") as file: 23 | datas = file.readlines() 24 | for line in datas: 25 | result.append(line.strip()) 26 | return result 27 | 28 | def load_transition_matrix(self,hidden_states): 29 | """ 30 | 载入状态转移矩阵 31 | :return: 字典:key为首状态,value为字典--key为次状态,value为概率 32 | """ 33 | result = {x: {} for x in hidden_states} 34 | with open("./data/transition_probability.txt","rb") as file: 35 | datas = file.readlines() 36 | for line in datas: 37 | split_line = line.strip().split(",") 38 | result[split_line[0]][split_line[1]] = split_line[2] 39 | return result 40 | def load_initial_vector(self): 41 | """ 42 | 载入初始化向量 43 | :return: 字典:key为隐状态标识,value为概率 44 | """ 45 | result = {} 46 | with open("./data/initial_vector.txt","rb") as file: 47 | datas = file.readlines() 48 | for line in datas: 49 | split_line = line.strip().split(",") 50 | result[split_line[0]] = split_line[2] 51 | return result 52 | 53 | def load_emission_matrix(self,hidden_states): 54 | """ 55 | 载入发射矩阵 56 | :param hidden_states: 隐藏状态list 57 | :return: 字典,格式为:key为隐状态,value是一个字典--key为观察状态,value为概率 58 | """ 59 | result = {x:{} for x in hidden_states} 60 | with open("./data/emit_probability.txt","rb") as file: 61 | datas = file.readlines() 62 | for line in datas: 63 | split_line = line.strip().split(",") 64 | result[split_line[0]][split_line[1]] = split_line[2] 65 | return result 66 | def get_observed_states(self,sentence): 67 | return sentence 68 | 69 | def viterbi(self,observation,hidden_states,initial_probability,transition_probability,emit_probability): 70 | """ 71 | 用维特比算法计算最优标签 72 | :param observation: 粗分词结果 73 | :param hidden_states: 隐藏状态标签,最终要求的标签都在里面 74 | :param initial_probability: 初始状态矩阵 75 | :param transition_probability: 转移状态矩阵 76 | :param emit_probability: 发射矩阵 77 | :return: 最优标签 78 | """ 79 | result = [] 80 | compute_recode = [] #记录每一次的计算结果 81 | #初始化 82 | tmp_result = {} 83 | for state in hidden_states: 84 | if emit_probability[state].has_key(observation[0]): 85 | tmp_result[state] = eval(initial_probability[state])*eval(emit_probability[state][observation[0]]) 86 | else: 87 | tmp_result[state] = 0 88 | compute_recode.append(tmp_result) 89 | 90 | #对于之后的词语,继续计算 91 | for index,word in enumerate(observation[1:]): 92 | tmp_result = {} 93 | for current_state in hidden_states: 94 | #取最大值:上一次的所有状态(x)*转移到当前状态(current_state)*发射概率 95 | if emit_probability[current_state].has_key(word): 96 | tmp_result[current_state] = max([compute_recode[index][x]*eval(transition_probability[x][current_state])* 97 | eval(emit_probability[current_state][word]) for x in hidden_states]) 98 | else: 99 | tmp_result[current_state] = 0 100 | compute_recode.append(tmp_result) 101 | 102 | #返回概率最大的标签序列 103 | tag_sequence = [] 104 | for recode in compute_recode: 105 | tag_sequence.append(max(recode, key=recode.get)) 106 | return tag_sequence 107 | def get_organization(self,observation,sequence,patterns): 108 | """ 109 | 得到识别的机构名 110 | :param observation: 单词序列 111 | :param sequence: 标注序列 112 | :param patterns: 模式串 113 | :return: list,机构名 114 | """ 115 | org_indices = [] # 存放机构名的索引 116 | orgs = [] # 存放机构名字符串 117 | tag_sequence_str = "".join(tag_sequence) # 转为字符串 118 | for pattern in patterns: 119 | if pattern in tag_sequence_str: 120 | start_index = (tag_sequence_str.index(pattern)) 121 | end_index = start_index + len(pattern) 122 | org_indices.append([start_index,end_index]) 123 | if len(org_indices)!=0: 124 | for start,end in org_indices: 125 | orgs.append("".join(observation[start:end])) 126 | return orgs 127 | 128 | if __name__ == '__main__': 129 | sentence = ["始##始", "中海油","集团","在", "哪里", "末##末"] 130 | orgrecog = OrgRecognize(sentence) 131 | observation = sentence 132 | initial_probability = orgrecog.load_initial_vector() 133 | transition_probability = orgrecog.load_transition_matrix(hidden_states=orgrecog.hidden_states) 134 | emit_probability = orgrecog.load_emission_matrix(hidden_states=orgrecog.hidden_states) 135 | tag_sequence = orgrecog.viterbi(observation=observation,hidden_states=orgrecog.hidden_states,initial_probability=initial_probability,transition_probability=transition_probability,emit_probability=emit_probability) 136 | print tag_sequence 137 | patterns = orgrecog.load_patterns() 138 | results = orgrecog.get_organization(observation=observation,sequence=tag_sequence,patterns=patterns) 139 | if len(results) == 0: 140 | print "未识别到机构名" 141 | print tag_sequence 142 | else: 143 | for result in results: 144 | print result 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 基于HMM模型的机构名实体识别 2 | 3 | ## 1.环境依赖 4 | - python 2.7 5 | - jieba (可选) 6 | 7 | ## 2.算法说明 8 | 参考《基于角色标注的中文机构名识别》论文,结合HanLP提供的针对机构名的HMM语料,实现了基于HMM模型的机构名实体识别算法。 9 | 10 | 详细说明文档,可前往我的博客围观:[用隐马尔可夫模型(HMM)做命名实体识别——NER系列(二)](https://www.lookfor404.com/%e7%94%a8%e9%9a%90%e9%a9%ac%e5%b0%94%e5%8f%af%e5%a4%ab%e6%a8%a1%e5%9e%8bhmm%e5%81%9a%e5%91%bd%e5%90%8d%e5%ae%9e%e4%bd%93%e8%af%86%e5%88%ab-ner%e7%b3%bb%e5%88%97%e4%ba%8c/) 11 | 12 | ## 3.使用说明 13 | 14 | 首先,运行以下脚本: 15 | 16 | `python generate_data.py` 17 | 18 | 会在`./data`下生成`transition_probability.txt`,`emit_probability.txt`以及`initial_vector.txt` 19 | 20 | 然后,运行: 21 | 22 | `python OrgRecognize.py` 23 | 24 | 就可以了,不出意外,“中海油集团在哪里”这句话,会识别出“中海油集团”这个机构实体。 25 | 26 | ## 4.参考资料 27 | 28 | - 张华平, 刘群. 基于角色标注的中国人名自动识别研究[J]. 计算机学报, 2004, 27(1):85-91. 29 | - 俞鸿魁, 张华平, 刘群. 基于角色标注的中文机构名识别[C]// Advances in Computation of Oriental Languages--Proceedings of the, International Conference on Computer Processing of Oriental Languages. 2003. 30 | - 俞鸿魁, 张华平, 刘群,等. 基于层叠隐马尔可夫模型的中文命名实体识别[J]. 通信学报, 2006, 27(2):87-94. 31 | - 码农场文章:[层叠HMM-Viterbi角色标注模型下的机构名识别](http://www.hankcs.com/nlp/ner/place-name-recognition-model-of-the-stacked-hmm-viterbi-role-labeling.html) 32 | -------------------------------------------------------------------------------- /data/nt.pattern.txt: -------------------------------------------------------------------------------- 1 | CCCCCCCCD 2 | CCCCCCCD 3 | CCCCCCD 4 | CCCCCCGD 5 | CCCCCCICCCCD 6 | CCCCCCPD 7 | CCCCCD 8 | CCCCCDD 9 | CCCCCGCCD 10 | CCCCCICCCCCD 11 | CCCCCPCCD 12 | CCCCCWDWD 13 | CCCCD 14 | CCCCDCCD 15 | CCCCDCD 16 | CCCCDD 17 | CCCCID 18 | CCCCPCD 19 | CCCD 20 | CCCDCCCD 21 | CCCDCCD 22 | CCCDCD 23 | CCCDD 24 | CCCDICFPD 25 | CCCFCFFCD 26 | CCCGD 27 | CCCGID 28 | CCCGJCD 29 | CCCID 30 | CCCJCCD 31 | CCCJD 32 | CCCLGCD 33 | CCCMD 34 | CCCPCCCD 35 | CCCPCCD 36 | CCCPCD 37 | CCCPD 38 | CCD 39 | CCDCCCCCCD 40 | CCDCCCCD 41 | CCDCCCD 42 | CCDCCCDD 43 | CCDCCD 44 | CCDCD 45 | CCDCDD 46 | CCDCGCD 47 | CCDCGCDID 48 | CCDCGCDPD 49 | CCDCGGDD 50 | CCDCID 51 | CCDCJCCD 52 | CCDCJCCDD 53 | CCDD 54 | CCDDD 55 | CCDFIDGD 56 | CCDGCCD 57 | CCDICD 58 | CCDID 59 | CCDJCD 60 | CCDPCD 61 | CCDPJD 62 | CCFCCD 63 | CCFD 64 | CCGCCCD 65 | CCGCCD 66 | CCGCD 67 | CCGCDCD 68 | CCGCDCMD 69 | CCGD 70 | CCGGCD 71 | CCGID 72 | CCGIDD 73 | CCGJD 74 | CCGWGWD 75 | CCICCD 76 | CCICD 77 | CCICIFD 78 | CCICJPD 79 | CCID 80 | CCIDCD 81 | CCIDD 82 | CCIID 83 | CCJCCCD 84 | CCJCCD 85 | CCJCD 86 | CCJCFD 87 | CCJD 88 | CCJID 89 | CCJJMJD 90 | CCKID 91 | CCLD 92 | CCMD 93 | CCMMPDCD 94 | CCPCCD 95 | CCPCD 96 | CCPD 97 | CCPDCD 98 | CCPPD 99 | CCWCWD 100 | CCWGWCCD 101 | CCWGWD 102 | CD 103 | CDCCCCCCD 104 | CDCCCCD 105 | CDCCCD 106 | CDCCD 107 | CDCCDD 108 | CDCCJD 109 | CDCD 110 | CDCDD 111 | CDCGD 112 | CDCGPCCD 113 | CDCJD 114 | CDCLD 115 | CDCWIWD 116 | CDD 117 | CDDCCD 118 | CDDCCDD 119 | CDDCD 120 | CDDD 121 | CDFD 122 | CDFPCCD 123 | CDGCD 124 | CDGCICD 125 | CDGD 126 | CDICD 127 | CDID 128 | CDILLCCD 129 | CDJCCD 130 | CDJCD 131 | CDJD 132 | CDJLD 133 | CDLGCD 134 | CDLJD 135 | CDMCD 136 | CDPCCCCD 137 | CDPCCD 138 | CDPD 139 | CDPPD 140 | CFCCD 141 | CFCPD 142 | CFD 143 | CFPD 144 | CGCCCD 145 | CGCCD 146 | CGCD 147 | CGCDCD 148 | CGCDD 149 | CGD 150 | CGDCD 151 | CGDD 152 | CGDDCCD 153 | CGDDD 154 | CGDDID 155 | CGDJD 156 | CGDMD 157 | CGFD 158 | CGGCCCD 159 | CGGCCD 160 | CGGCD 161 | CGGD 162 | CGGGD 163 | CGGGDD 164 | CGGICD 165 | CGGJD 166 | CGICD 167 | CGID 168 | CGIJD 169 | CGJD 170 | CGMD 171 | CGPJD 172 | CICCCCD 173 | CICCD 174 | CICD 175 | CICDCD 176 | CICDD 177 | CICWGWD 178 | CID 179 | CIDD 180 | CIGCD 181 | CIGD 182 | CIID 183 | CILCD 184 | CIMD 185 | CJCCCCCD 186 | CJCCCD 187 | CJCCCDD 188 | CJCCD 189 | CJCCMD 190 | CJCD 191 | CJCDD 192 | CJCGCCD 193 | CJCGPJD 194 | CJCMD 195 | CJCPCCCD 196 | CJCPD 197 | CJD 198 | CJDCCCCD 199 | CJDCCJD 200 | CJDCD 201 | CJDD 202 | CJDFD 203 | CJDPD 204 | CJFCD 205 | CJFD 206 | CJGD 207 | CJGLD 208 | CJGPCJD 209 | CJID 210 | CJJCCD 211 | CJJD 212 | CJJJD 213 | CJJLD 214 | CJKD 215 | CJLCCD 216 | CJMCD 217 | CJMD 218 | CJPD 219 | CJWCCWCGJD 220 | CJWD 221 | CJWPMWCGD 222 | CKCD 223 | CKD 224 | CKJCDCD 225 | CKJPD 226 | CLCCCD 227 | CLCCD 228 | CLCCGCD 229 | CLCD 230 | CLD 231 | CLDFD 232 | CLID 233 | CLPCD 234 | CMCD 235 | CMCDD 236 | CMCGD 237 | CMD 238 | CMDCD 239 | CMDD 240 | CMMD 241 | CMMDCCD 242 | CMPD 243 | CPCCCCCCCD 244 | CPCCCCD 245 | CPCCCD 246 | CPCCD 247 | CPCD 248 | CPCDD 249 | CPCPD 250 | CPD 251 | CPDCCD 252 | CPDCD 253 | CPDD 254 | CPDGD 255 | CPDWGWD 256 | CPGCD 257 | CPGD 258 | CPID 259 | CPJCD 260 | CPJD 261 | CPJPD 262 | CPMD 263 | CPPD 264 | CWCD 265 | CWCGWCCD 266 | CWCWD 267 | CWDWDD 268 | CWGWCCD 269 | CWGWCD 270 | CWPWD 271 | DCCCCCD 272 | DCCCCD 273 | DCCCCDCCD 274 | DCCCD 275 | DCCD 276 | DCD 277 | DCDD 278 | DCGCD 279 | DCJD 280 | DCPD 281 | DD 282 | DDCCD 283 | DDCD 284 | DDD 285 | DDICCD 286 | DFD 287 | DGCCD 288 | DGCD 289 | DGD 290 | DGDCD 291 | DGDD 292 | DGDPD 293 | DGGD 294 | DICCCD 295 | DICD 296 | DID 297 | DIICD 298 | DJCCD 299 | DJCD 300 | DJD 301 | DLCCD 302 | DLCD 303 | DLD 304 | DMCD 305 | DMD 306 | DMMCD 307 | DPD 308 | DPMMCCD 309 | FCCCCCD 310 | FCCCCD 311 | FCCCD 312 | FCCCPCD 313 | FCCD 314 | FCCGD 315 | FCCID 316 | FCCPD 317 | FCCWGWD 318 | FCD 319 | FCDCD 320 | FCDD 321 | FCDFD 322 | FCFCD 323 | FCFPD 324 | FCGCCD 325 | FCGCD 326 | FCGD 327 | FCID 328 | FCIJJD 329 | FCJCD 330 | FCJD 331 | FCPD 332 | FCPGCD 333 | FCWGWD 334 | FD 335 | FDCD 336 | FDD 337 | FDFD 338 | FDGCCD 339 | FDID 340 | FDLCD 341 | FFCCD 342 | FFCD 343 | FFCKFCCD 344 | FFCLLD 345 | FFD 346 | FFFD 347 | FFGCCD 348 | FFGD 349 | FFJCD 350 | FFJD 351 | FFJPCD 352 | FFPD 353 | FGCCD 354 | FGCD 355 | FGCGCGCJCD 356 | FGD 357 | FGDD 358 | FGFD 359 | FGJCCD 360 | FICCD 361 | FICD 362 | FICDD 363 | FICGD 364 | FICID 365 | FID 366 | FIDCD 367 | FIDD 368 | FIFPD 369 | FIID 370 | FIJCD 371 | FIJD 372 | FJCCD 373 | FJCD 374 | FJCDD 375 | FJD 376 | FJDCD 377 | FJDD 378 | FJGD 379 | FJJCCD 380 | FJJCD 381 | FJJCLCD 382 | FJJD 383 | FJJJCCD 384 | FJJJD 385 | FJJJICCD 386 | FJJLJLCD 387 | FJPJD 388 | FKCD 389 | FKCJD 390 | FLD 391 | FLPCD 392 | FPCCCD 393 | FPCD 394 | FPD 395 | FPFD 396 | FPFDD 397 | FPID 398 | FPJCCD 399 | FPJCD 400 | FPPCD 401 | FPPD 402 | FPPDLD 403 | FWCCCWCD 404 | FWCCCWD 405 | FWDWD 406 | FWFD 407 | FWFWCCCWD 408 | FWGJCD 409 | FWGWCD 410 | GCCCCCCCD 411 | GCCCCCCD 412 | GCCCCCD 413 | GCCCCCDCD 414 | GCCCCCDD 415 | GCCCCD 416 | GCCCCDCCD 417 | GCCCCDD 418 | GCCCCGD 419 | GCCCCJD 420 | GCCCCPD 421 | GCCCCWDWD 422 | GCCCD 423 | GCCCDCCCD 424 | GCCCDCCCDD 425 | GCCCDCCD 426 | GCCCDCD 427 | GCCCDD 428 | GCCCDDJD 429 | GCCCDID 430 | GCCCDMCD 431 | GCCCDPD 432 | GCCCDWGCDWD 433 | GCCCFCD 434 | GCCCGD 435 | GCCCICD 436 | GCCCID 437 | GCCCJCD 438 | GCCCJD 439 | GCCCJGD 440 | GCCCLD 441 | GCCCMD 442 | GCCCPCCD 443 | GCCCWDWD 444 | GCCD 445 | GCCDCCCCD 446 | GCCDCCCD 447 | GCCDCCCDCD 448 | GCCDCCD 449 | GCCDCD 450 | GCCDCID 451 | GCCDCJCD 452 | GCCDCPCD 453 | GCCDD 454 | GCCDDCCCD 455 | GCCDDCCD 456 | GCCDDD 457 | GCCDFD 458 | GCCDGCCD 459 | GCCDGD 460 | GCCDGGDCD 461 | GCCDID 462 | GCCDJCD 463 | GCCDJD 464 | GCCDLDD 465 | GCCDLJCD 466 | GCCDMJD 467 | GCCDMJMMCD 468 | GCCDMJMMD 469 | GCCDMMD 470 | GCCDPD 471 | GCCFCD 472 | GCCFDD 473 | GCCFJPD 474 | GCCFPD 475 | GCCGCCCD 476 | GCCGCCD 477 | GCCGCD 478 | GCCGCDD 479 | GCCGD 480 | GCCGGCGD 481 | GCCGGDD 482 | GCCICCDCCD 483 | GCCICD 484 | GCCID 485 | GCCIDD 486 | GCCJCCCD 487 | GCCJCCCID 488 | GCCJCCD 489 | GCCJCD 490 | GCCJCJD 491 | GCCJD 492 | GCCJICD 493 | GCCJID 494 | GCCJPCD 495 | GCCJPD 496 | GCCKD 497 | GCCLCCD 498 | GCCLCD 499 | GCCLCGCD 500 | GCCLD 501 | GCCMCD 502 | GCCMD 503 | GCCMPD 504 | GCCPCCCCD 505 | GCCPCCCID 506 | GCCPCCD 507 | GCCPCD 508 | GCCPD 509 | GCCPDD 510 | GCCPFWCJD 511 | GCCPJD 512 | GCCWCCWCD 513 | GCCWCDWCD 514 | GCCWDWCCD 515 | GCCWDWD 516 | GCD 517 | GCDCCCCD 518 | GCDCCCCPD 519 | GCDCCCD 520 | GCDCCD 521 | GCDCCDCD 522 | GCDCCDD 523 | GCDCCDID 524 | GCDCCJCD 525 | GCDCCJD 526 | GCDCD 527 | GCDCDD 528 | GCDCDICD 529 | GCDCGCD 530 | GCDCGD 531 | GCDCGMCD 532 | GCDCID 533 | GCDCJCD 534 | GCDCJD 535 | GCDCLDD 536 | GCDCMCD 537 | GCDCMD 538 | GCDCMDCD 539 | GCDCMDD 540 | GCDCMDID 541 | GCDCPD 542 | GCDD 543 | GCDDCD 544 | GCDDD 545 | GCDDMCD 546 | GCDFD 547 | GCDFGCD 548 | GCDFWFD 549 | GCDGCCCCCD 550 | GCDGCCD 551 | GCDGCD 552 | GCDGD 553 | GCDGDD 554 | GCDGGD 555 | GCDGLCCD 556 | GCDGLJPCD 557 | GCDICCCCD 558 | GCDICCD 559 | GCDICD 560 | GCDID 561 | GCDIDD 562 | GCDJCCD 563 | GCDJCD 564 | GCDJCDGPD 565 | GCDJD 566 | GCDJJD 567 | GCDKCDCD 568 | GCDLCCCD 569 | GCDLD 570 | GCDLGCCCCD 571 | GCDLGCD 572 | GCDLPD 573 | GCDMCD 574 | GCDMCDD 575 | GCDMD 576 | GCDMDD 577 | GCDMJD 578 | GCDPCD 579 | GCDPD 580 | GCDWFWD 581 | GCDWGWCD 582 | GCDWGWD 583 | GCFCCD 584 | GCFCCJFGDD 585 | GCFCD 586 | GCFD 587 | GCFDD 588 | GCFFD 589 | GCFID 590 | GCFJCCD 591 | GCFPCD 592 | GCFPD 593 | GCFWGCCD 594 | GCFWGCCDD 595 | GCFWGJCD 596 | GCGCCCD 597 | GCGCCD 598 | GCGCD 599 | GCGCID 600 | GCGCLD 601 | GCGCPPCCD 602 | GCGD 603 | GCGDD 604 | GCGGCD 605 | GCGGCGD 606 | GCGGD 607 | GCGICD 608 | GCGID 609 | GCGJCCD 610 | GCGPCCD 611 | GCICCCCD 612 | GCICCCD 613 | GCICCD 614 | GCICD 615 | GCICDD 616 | GCID 617 | GCIDD 618 | GCIDID 619 | GCIFCCD 620 | GCIID 621 | GCIJCD 622 | GCIJD 623 | GCIJICD 624 | GCIPCD 625 | GCIPD 626 | GCIWGIIWD 627 | GCJCCCCD 628 | GCJCCCD 629 | GCJCCD 630 | GCJCD 631 | GCJCGD 632 | GCJCID 633 | GCJCIID 634 | GCJCPD 635 | GCJD 636 | GCJDCCD 637 | GCJDCD 638 | GCJDD 639 | GCJDID 640 | GCJFD 641 | GCJGD 642 | GCJICD 643 | GCJID 644 | GCJJCCD 645 | GCJJCD 646 | GCJJD 647 | GCJJGD 648 | GCJKCD 649 | GCJLCCD 650 | GCJMD 651 | GCJPCCGJLFD 652 | GCJPD 653 | GCJWCCJCD 654 | GCKCCD 655 | GCKD 656 | GCLCCCD 657 | GCLCCD 658 | GCLCD 659 | GCLD 660 | GCLDD 661 | GCLGGCD 662 | GCMCCD 663 | GCMCD 664 | GCMD 665 | GCMDD 666 | GCMPCD 667 | GCMPMD 668 | GCPCCCCD 669 | GCPCCCD 670 | GCPCCD 671 | GCPCCDD 672 | GCPCD 673 | GCPCDD 674 | GCPCKCD 675 | GCPD 676 | GCPDCCD 677 | GCPDD 678 | GCPFD 679 | GCPICCCD 680 | GCPJCCD 681 | GCPJCD 682 | GCPJD 683 | GCPJDCD 684 | GCPJJCD 685 | GCPJJDD 686 | GCPJPD 687 | GCPPCCD 688 | GCPPD 689 | GCPPPD 690 | GCWCWCJD 691 | GCWCWD 692 | GCWDWCDD 693 | GCWDWD 694 | GCWGWDD 695 | GD 696 | GDCCCCCCD 697 | GDCCCCCD 698 | GDCCCCD 699 | GDCCCCPD 700 | GDCCCD 701 | GDCCCDD 702 | GDCCCGCCD 703 | GDCCCJCD 704 | GDCCCJD 705 | GDCCCJDCD 706 | GDCCD 707 | GDCCDCD 708 | GDCCDCDD 709 | GDCCDD 710 | GDCCID 711 | GDCCJD 712 | GDCCPCD 713 | GDCD 714 | GDCDCCD 715 | GDCDCD 716 | GDCDD 717 | GDCDICD 718 | GDCDPD 719 | GDCFD 720 | GDCGCCD 721 | GDCGD 722 | GDCGPPCCD 723 | GDCID 724 | GDCIDD 725 | GDCJCCD 726 | GDCJD 727 | GDCLD 728 | GDCMD 729 | GDCPD 730 | GDCPID 731 | GDCPJD 732 | GDD 733 | GDDCCCCD 734 | GDDCCCD 735 | GDDCCD 736 | GDDCD 737 | GDDCDD 738 | GDDCFD 739 | GDDCFDCD 740 | GDDCMD 741 | GDDD 742 | GDDDCD 743 | GDDID 744 | GDDPPD 745 | GDDPPLD 746 | GDFCCD 747 | GDFCD 748 | GDFD 749 | GDFFD 750 | GDFGD 751 | GDGCCCD 752 | GDGCCD 753 | GDGCD 754 | GDGD 755 | GDGDCD 756 | GDGDD 757 | GDGDFID 758 | GDGJCCD 759 | GDGMD 760 | GDICCD 761 | GDICD 762 | GDID 763 | GDIDCD 764 | GDIDD 765 | GDIGCD 766 | GDIID 767 | GDIPCD 768 | GDJCCCD 769 | GDJCCD 770 | GDJCD 771 | GDJD 772 | GDJICD 773 | GDJJD 774 | GDJJJD 775 | GDJPCD 776 | GDJPDD 777 | GDLCCCCCD 778 | GDLCID 779 | GDLD 780 | GDLJD 781 | GDLJDD 782 | GDMCD 783 | GDMD 784 | GDMDCD 785 | GDMDD 786 | GDMJD 787 | GDMJMMD 788 | GDMPD 789 | GDPCCCCCD 790 | GDPCCD 791 | GDPCD 792 | GDPD 793 | GDPGCD 794 | GDPID 795 | GDPJCD 796 | GDPJD 797 | GDPPD 798 | GDPPJD 799 | GDWDWCCD 800 | GDWDWCCDD 801 | GDWDWD 802 | GDWFWD 803 | GDWGWD 804 | GFCCCCCD 805 | GFCCCCD 806 | GFCCCCJD 807 | GFCCCD 808 | GFCCCID 809 | GFCCD 810 | GFCCID 811 | GFCCDD 812 | GFCCFCD 813 | GFCCPD 814 | GFCCPGD 815 | GFCD 816 | GFCDCD 817 | GFCDD 818 | GFCID 819 | GFCJCD 820 | GFCJD 821 | GFCPCCD 822 | GFCPCD 823 | GFCPD 824 | GFCPJD 825 | GFCPJPD 826 | GFD 827 | GFDCCCD 828 | GFDCD 829 | GFDD 830 | GFFCCD 831 | GFFCD 832 | GFFD 833 | GFFPCGCD 834 | GFGCD 835 | GFGCID 836 | GFGD 837 | GFGJCD 838 | GFICCD 839 | GFICD 840 | GFID 841 | GFIICD 842 | GFJCCCD 843 | GFJCCD 844 | GFJCD 845 | GFJCDCD 846 | GFJD 847 | GFJJCCD 848 | GFJJD 849 | GFJJJCCD 850 | GFJJLJCLCD 851 | GFLD 852 | GFLPD 853 | GFMCD 854 | GFPCD 855 | GFPD 856 | GFPJCD 857 | GFPJD 858 | GFPJPD 859 | GFPPCCCD 860 | GFPPD 861 | GFWCJCPCCCWCCD 862 | GFWGWCD 863 | GGCCCCCD 864 | GGCCCCD 865 | GGCCCD 866 | GGCCCICD 867 | GGCCCID 868 | GGCCCWDWD 869 | GGCCD 870 | GGCCDCD 871 | GGCCDD 872 | GGCCGCD 873 | GGCCGD 874 | GGCCGJD 875 | GGCCJCD 876 | GGCCJD 877 | GGCD 878 | GGCDCCCCCD 879 | GGCDCCD 880 | GGCDCD 881 | GGCDD 882 | GGCDJD 883 | GGCFCCFCPD 884 | GGCFD 885 | GGCFJD 886 | GGCGCCCD 887 | GGCGCD 888 | GGCGD 889 | GGCGGD 890 | GGCICLCD 891 | GGCID 892 | GGCIJCD 893 | GGCJCCD 894 | GGCJCD 895 | GGCJD 896 | GGCJDDCD 897 | GGCJJCCD 898 | GGCJJD 899 | GGCJPCICCCD 900 | GGCJPD 901 | GGCLCD 902 | GGCLD 903 | GGCMD 904 | GGCPCCD 905 | GGCPCD 906 | GGCPD 907 | GGD 908 | GGDCCCD 909 | GGDCCD 910 | GGDCD 911 | GGDD 912 | GGDDCCD 913 | GGDDCD 914 | GGDDD 915 | GGDFCD 916 | GGDFD 917 | GGDGD 918 | GGDID 919 | GGDJCD 920 | GGDJD 921 | GGDJJD 922 | GGDPPJD 923 | GGFCCCD 924 | GGFCCD 925 | GGFCD 926 | GGFD 927 | GGFDD 928 | GGFFCD 929 | GGFFD 930 | GGFFDCD 931 | GGFFDD 932 | GGFGD 933 | GGFJCCD 934 | GGFJD 935 | GGFJDD 936 | GGFJJD 937 | GGFLD 938 | GGFPCFPCD 939 | GGGCCCCD 940 | GGGCCCD 941 | GGGCCD 942 | GGGCD 943 | GGGCDD 944 | GGGCGCD 945 | GGGCGD 946 | GGGCID 947 | GGGCJD 948 | GGGD 949 | GGGDCD 950 | GGGDD 951 | GGGFD 952 | GGGGCD 953 | GGGGD 954 | GGGGFJD 955 | GGGGICD 956 | GGGGJD 957 | GGGGJPD 958 | GGGGLD 959 | GGGGPCD 960 | GGGGPPD 961 | GGGICD 962 | GGGID 963 | GGGIDID 964 | GGGIGCJD 965 | GGGIJD 966 | GGGJCD 967 | GGGJD 968 | GGGJJCJD 969 | GGGJJD 970 | GGGJPCCD 971 | GGGLD 972 | GGGMD 973 | GGGPJD 974 | GGGWICWD 975 | GGICCCCD 976 | GGICCCD 977 | GGICCD 978 | GGICCGD 979 | GGICCLD 980 | GGICCPCCD 981 | GGICD 982 | GGICGCCCD 983 | GGICID 984 | GGICJD 985 | GGID 986 | GGIDCD 987 | GGIDD 988 | GGIFD 989 | GGIFJCD 990 | GGIFPD 991 | GGIGCCD 992 | GGIGD 993 | GGIICD 994 | GGIID 995 | GGIIPID 996 | GGIJCCD 997 | GGIJD 998 | GGIPCD 999 | GGIPD 1000 | GGIPDD 1001 | GGJCCCD 1002 | GGJCCD 1003 | GGJCCPCJCCD 1004 | GGJCD 1005 | GGJCWDWD 1006 | GGJD 1007 | GGJGCCCD 1008 | GGJGCCD 1009 | GGJGD 1010 | GGJJD 1011 | GGJJPCD 1012 | GGJLD 1013 | GGJPD 1014 | GGJPDD 1015 | GGKD 1016 | GGKGD 1017 | GGLCCCD 1018 | GGLCD 1019 | GGLCDD 1020 | GGLCJD 1021 | GGLCPD 1022 | GGLD 1023 | GGLFD 1024 | GGLID 1025 | GGLJD 1026 | GGLLFD 1027 | GGLPD 1028 | GGMCD 1029 | GGMCDD 1030 | GGMD 1031 | GGMJCD 1032 | GGMLD 1033 | GGMPCCD 1034 | GGPCCCD 1035 | GGPCCD 1036 | GGPCD 1037 | GGPCJCD 1038 | GGPD 1039 | GGPFD 1040 | GGPICD 1041 | GGPJCCCCD 1042 | GGPJCD 1043 | GGPJCDD 1044 | GGPJD 1045 | GGPLD 1046 | GGPPCCD 1047 | GGPPCD 1048 | GGPPD 1049 | GGPPJJD 1050 | GGPPPCD 1051 | GGWPCGWPJD 1052 | GICCCCCCD 1053 | GICCCCCD 1054 | GICCCCD 1055 | GICCCD 1056 | GICCCDD 1057 | GICCCJCD 1058 | GICCD 1059 | GICCDD 1060 | GICCJD 1061 | GICCLDD 1062 | GICCPD 1063 | GICD 1064 | GICDCCCCD 1065 | GICDCCD 1066 | GICDCD 1067 | GICDD 1068 | GICDLPD 1069 | GICDWCCWD 1070 | GICGCCCCD 1071 | GICGCCD 1072 | GICGCJICD 1073 | GICGD 1074 | GICGGD 1075 | GICGMMD 1076 | GICGPCJD 1077 | GICICCD 1078 | GICICD 1079 | GICID 1080 | GICIGD 1081 | GICIID 1082 | GICJCCD 1083 | GICJCD 1084 | GICJD 1085 | GICPCCCCD 1086 | GICPD 1087 | GICPICD 1088 | GICPJD 1089 | GID 1090 | GIDCCCJCD 1091 | GIDCCD 1092 | GIDCD 1093 | GIDD 1094 | GIDDD 1095 | GIDICCD 1096 | GIDID 1097 | GIDLPCD 1098 | GIFCCD 1099 | GIFD 1100 | GIFICD 1101 | GIFWFD 1102 | GIGCCD 1103 | GIGCD 1104 | GIGCGCD 1105 | GIGCJD 1106 | GIGCPD 1107 | GIGD 1108 | GIGGD 1109 | GIGICD 1110 | GIGID 1111 | GIGJPCD 1112 | GIICCCCD 1113 | GIICCD 1114 | GIICD 1115 | GIID 1116 | GIIGD 1117 | GIIID 1118 | GIIJCCCD 1119 | GIIJCD 1120 | GIJCCCCCD 1121 | GIJCCCCD 1122 | GIJCCCD 1123 | GIJCCD 1124 | GIJCD 1125 | GIJCPD 1126 | GIJD 1127 | GIJDD 1128 | GIJID 1129 | GIJJCCD 1130 | GIJJCD 1131 | GIJLD 1132 | GIJPD 1133 | GIJPDCD 1134 | GIKD 1135 | GILCCCCDD 1136 | GILCCD 1137 | GILCD 1138 | GILD 1139 | GILID 1140 | GILPMD 1141 | GIMCCD 1142 | GIMCD 1143 | GIMD 1144 | GIMJCD 1145 | GIMJD 1146 | GIMPCCD 1147 | GIPCCCCD 1148 | GIPCCCD 1149 | GIPCCD 1150 | GIPCD 1151 | GIPCMD 1152 | GIPD 1153 | GIPDCD 1154 | GIPDD 1155 | GIPICD 1156 | GIPJCCD 1157 | GIPJCD 1158 | GIPPCD 1159 | GIPPD 1160 | GIWDCCWCD 1161 | GIWDWD 1162 | GIWGWCD 1163 | GJCCCCCD 1164 | GJCCCCD 1165 | GJCCCD 1166 | GJCCCDCDCD 1167 | GJCCCDD 1168 | GJCCD 1169 | GJCCDCD 1170 | GJCCDD 1171 | GJCCFD 1172 | GJCCGJPD 1173 | GJCCICCD 1174 | GJCCJCD 1175 | GJCCJD 1176 | GJCD 1177 | GJCDCCD 1178 | GJCDCJCCD 1179 | GJCDD 1180 | GJCDJCD 1181 | GJCDPD 1182 | GJCGCD 1183 | GJCGD 1184 | GJCGPJCCD 1185 | GJCICCCD 1186 | GJCICD 1187 | GJCID 1188 | GJCJCCD 1189 | GJCJCD 1190 | GJCJD 1191 | GJCJJCCCCD 1192 | GJCJJCD 1193 | GJCJPD 1194 | GJCJPPCD 1195 | GJCLD 1196 | GJCLJCCCD 1197 | GJCMD 1198 | GJCPD 1199 | GJCPJD 1200 | GJCPPD 1201 | GJD 1202 | GJDCCCD 1203 | GJDCCD 1204 | GJDCD 1205 | GJDD 1206 | GJDICD 1207 | GJDID 1208 | GJDLCD 1209 | GJDPCD 1210 | GJFCCD 1211 | GJFCD 1212 | GJFD 1213 | GJFFD 1214 | GJFGD 1215 | GJFICD 1216 | GJGCD 1217 | GJGD 1218 | GJGPCD 1219 | GJICCCD 1220 | GJICCD 1221 | GJICD 1222 | GJID 1223 | GJIID 1224 | GJJCCCD 1225 | GJJCCD 1226 | GJJCCDD 1227 | GJJCD 1228 | GJJCJCCCD 1229 | GJJCJCCD 1230 | GJJCPCD 1231 | GJJD 1232 | GJJDCD 1233 | GJJDD 1234 | GJJFCCD 1235 | GJJFD 1236 | GJJGD 1237 | GJJJCD 1238 | GJJJD 1239 | GJJJICD 1240 | GJJJJCCD 1241 | GJJJJD 1242 | GJJPCCCD 1243 | GJJPCCD 1244 | GJJPCID 1245 | GJJPPD 1246 | GJLCCCCD 1247 | GJLCD 1248 | GJLCDD 1249 | GJLD 1250 | GJMCCD 1251 | GJMD 1252 | GJPCCCCD 1253 | GJPCCCD 1254 | GJPCCD 1255 | GJPCD 1256 | GJPCDD 1257 | GJPCJCD 1258 | GJPCLCD 1259 | GJPCMD 1260 | GJPD 1261 | GJPDD 1262 | GJPGCCD 1263 | GJPGD 1264 | GJPICCD 1265 | GJPICD 1266 | GJPICDD 1267 | GJPJCCD 1268 | GJPJD 1269 | GJPJPD 1270 | GJPLCD 1271 | GJPPJD 1272 | GKCCCD 1273 | GKCCD 1274 | GKCCPD 1275 | GKCD 1276 | GKCDCD 1277 | GKCDD 1278 | GKCDJCD 1279 | GKCJCD 1280 | GKCMD 1281 | GKD 1282 | GKDD 1283 | GKJJD 1284 | GLCCCCCCD 1285 | GLCCCCD 1286 | GLCCCD 1287 | GLCCD 1288 | GLCCDD 1289 | GLCCJCCCD 1290 | GLCCJCCD 1291 | GLCD 1292 | GLCDD 1293 | GLCDGCCD 1294 | GLCGCJCD 1295 | GLCGD 1296 | GLCGDD 1297 | GLCJD 1298 | GLCJJCCCCCD 1299 | GLCLD 1300 | GLCMD 1301 | GLCPCCD 1302 | GLCPD 1303 | GLD 1304 | GLDCD 1305 | GLDCMD 1306 | GLDCMDCD 1307 | GLDCMDD 1308 | GLDD 1309 | GLDDCKCD 1310 | GLFCD 1311 | GLFCFD 1312 | GLFGCD 1313 | GLGCD 1314 | GLGD 1315 | GLGPJD 1316 | GLICCD 1317 | GLICD 1318 | GLID 1319 | GLJCCCD 1320 | GLJCCD 1321 | GLJCD 1322 | GLJCICCD 1323 | GLJD 1324 | GLJFCD 1325 | GLJGD 1326 | GLJICCD 1327 | GLJID 1328 | GLJJD 1329 | GLJPCCD 1330 | GLJPCICD 1331 | GLJPJCCD 1332 | GLJWGWCD 1333 | GLLCCCD 1334 | GLLCID 1335 | GLPCCCD 1336 | GLPCCD 1337 | GLPCD 1338 | GLPCDD 1339 | GLPCPCCD 1340 | GLPD 1341 | GLPDD 1342 | GLPGCD 1343 | GLPJD 1344 | GLPLJCCCD 1345 | GLPLJCD 1346 | GLPPCCCCD 1347 | GLPPCCD 1348 | GLPPCD 1349 | GMCCCCD 1350 | GMCCCD 1351 | GMCCD 1352 | GMCCID 1353 | GMCD 1354 | GMCDCCCD 1355 | GMCDCCD 1356 | GMCDCD 1357 | GMCDD 1358 | GMCDMCD 1359 | GMCGD 1360 | GMCJCD 1361 | GMCMD 1362 | GMCMJD 1363 | GMD 1364 | GMDCD 1365 | GMDD 1366 | GMDICD 1367 | GMDID 1368 | GMGJCD 1369 | GMGJJD 1370 | GMICD 1371 | GMID 1372 | GMIPJCCD 1373 | GMJCCD 1374 | GMJCD 1375 | GMJD 1376 | GMJDD 1377 | GMJICCCD 1378 | GMJMJFCD 1379 | GMJPCD 1380 | GMJPLCCD 1381 | GMLD 1382 | GMLDCD 1383 | GMLGCD 1384 | GMLID 1385 | GMLLD 1386 | GMMCCCD 1387 | GMMD 1388 | GMMGD 1389 | GMMLCCD 1390 | GMMPCD 1391 | GMMPD 1392 | GMPCCD 1393 | GMPCD 1394 | GMPD 1395 | GMPDCD 1396 | GMPDD 1397 | GMPJCD 1398 | GPCCCCCCD 1399 | GPCCCCD 1400 | GPCCCCID 1401 | GPCCCD 1402 | GPCCD 1403 | GPCCDCCD 1404 | GPCCDD 1405 | GPCCDDD 1406 | GPCD 1407 | GPCDCCD 1408 | GPCDCD 1409 | GPCDD 1410 | GPCFDCCD 1411 | GPCFDD 1412 | GPCGD 1413 | GPCICCD 1414 | GPCID 1415 | GPCIJD 1416 | GPCJCCCD 1417 | GPCJCCD 1418 | GPCJCD 1419 | GPCPID 1420 | GPCWDWCD 1421 | GPD 1422 | GPDCCD 1423 | GPDCD 1424 | GPDD 1425 | GPFCCD 1426 | GPFCD 1427 | GPFD 1428 | GPFFCD 1429 | GPGCCCD 1430 | GPGD 1431 | GPGJCJCCCCD 1432 | GPGPJD 1433 | GPICCCCD 1434 | GPICCCD 1435 | GPICCD 1436 | GPICD 1437 | GPID 1438 | GPIDCD 1439 | GPIDD 1440 | GPJCCCCCD 1441 | GPJCCCD 1442 | GPJCCD 1443 | GPJCD 1444 | GPJCDD 1445 | GPJCJCCD 1446 | GPJD 1447 | GPJDCCD 1448 | GPJDCD 1449 | GPJDD 1450 | GPJFICD 1451 | GPJFID 1452 | GPJGD 1453 | GPJJCCD 1454 | GPJJCD 1455 | GPJLCD 1456 | GPJWDWD 1457 | GPLCWCWCWD 1458 | GPLD 1459 | GPLJCCD 1460 | GPMJCGD 1461 | GPMMD 1462 | GPMPCCD 1463 | GPPCCCCD 1464 | GPPCCCD 1465 | GPPCCD 1466 | GPPCD 1467 | GPPCDCCD 1468 | GPPCDD 1469 | GPPCLD 1470 | GPPD 1471 | GPPDCD 1472 | GPPDCDD 1473 | GPPDD 1474 | GPPGCD 1475 | GPPICCD 1476 | GPPID 1477 | GPPJCD 1478 | GPPJD 1479 | GPPJDD 1480 | GPPJJCCCCD 1481 | GPPLD 1482 | GPPPCCD 1483 | GPPPCKCCD 1484 | GPPPPCCD 1485 | GWCPWD 1486 | GWCWCCCD 1487 | GWCWCD 1488 | GWCWD 1489 | GWCWPJCD 1490 | GWD 1491 | GWFCD 1492 | GWGCCCD 1493 | GWGCCD 1494 | GWGCCWCD 1495 | GWGCD 1496 | GWGCWD 1497 | GWGD 1498 | GWGID 1499 | GWGWCCCCD 1500 | GWGWCCCD 1501 | GWGWCD 1502 | GWGWICD 1503 | GWGWLCD 1504 | GWICD 1505 | GWICWD 1506 | GWIWD 1507 | GWJWD 1508 | GWLJWCD 1509 | GWPD 1510 | GWPJD 1511 | ICCCCCCD 1512 | ICCCCCD 1513 | ICCCCD 1514 | ICCCCDD 1515 | ICCCD 1516 | ICCD 1517 | ICCDCCD 1518 | ICCDCD 1519 | ICCDD 1520 | ICCGCCD 1521 | ICCGCIPD 1522 | ICCGD 1523 | ICCJD 1524 | ICCPD 1525 | ICCWDWCD 1526 | ICD 1527 | ICDD 1528 | ICDID 1529 | ICFD 1530 | ICGCCCD 1531 | ICGCD 1532 | ICGFD 1533 | ICGGCD 1534 | ICGLCMD 1535 | ICICD 1536 | ICID 1537 | ICIGD 1538 | ICJCD 1539 | ICJD 1540 | ICJJD 1541 | ICLJCD 1542 | ICMCCCCD 1543 | ICMD 1544 | ICPCD 1545 | ICPD 1546 | ICPPD 1547 | ICWGWCD 1548 | ICWGWD 1549 | ICWGWDCD 1550 | ID 1551 | IDCCCCD 1552 | IDCCCD 1553 | IDCCD 1554 | IDCCGJID 1555 | IDCCICD 1556 | IDCCICDID 1557 | IDCD 1558 | IDCDCD 1559 | IDCDD 1560 | IDCFCD 1561 | IDCGD 1562 | IDCICD 1563 | IDCID 1564 | IDCJD 1565 | IDCPCCCCCCD 1566 | IDD 1567 | IDGCCCD 1568 | IDGCD 1569 | IDID 1570 | IDIDD 1571 | IDJCD 1572 | IDKCD 1573 | IDPD 1574 | IDWCWCCDD 1575 | IFD 1576 | IFWGWCD 1577 | IGCCCD 1578 | IGCCCDD 1579 | IGCCD 1580 | IGCD 1581 | IGCDCD 1582 | IGCDD 1583 | IGCGCCD 1584 | IGCGCD 1585 | IGCID 1586 | IGCJD 1587 | IGCPD 1588 | IGCWJWD 1589 | IGD 1590 | IGDD 1591 | IGFCCD 1592 | IGFCD 1593 | IGFD 1594 | IGGCD 1595 | IGID 1596 | IGJD 1597 | IGLCD 1598 | IGLD 1599 | IGPCD 1600 | IGPCDD 1601 | IICCCD 1602 | IICCD 1603 | IICD 1604 | IICGD 1605 | IID 1606 | IIGD 1607 | IIGJCJCD 1608 | IIIGCD 1609 | IIPCD 1610 | IJCCCCD 1611 | IJCCCD 1612 | IJCCD 1613 | IJCD 1614 | IJD 1615 | IJDCCD 1616 | IJGCD 1617 | IJGD 1618 | IJJCD 1619 | IJJD 1620 | IJJJCD 1621 | IJPCDD 1622 | IJWCFIWGD 1623 | IJWCFWD 1624 | IJWCPWGD 1625 | IKCCCD 1626 | ILCD 1627 | ILD 1628 | ILPCD 1629 | ILPMD 1630 | IMCCD 1631 | IMCD 1632 | IMD 1633 | IMPD 1634 | IPCCCD 1635 | IPCCD 1636 | IPCCID 1637 | IPCCJD 1638 | IPCD 1639 | IPCID 1640 | IPCJD 1641 | IPCPD 1642 | IPD 1643 | IPFCD 1644 | IPID 1645 | IPIJD 1646 | IPJCGD 1647 | IPJD 1648 | IPPCD 1649 | JCCCCCCD 1650 | JCCCCCD 1651 | JCCCCD 1652 | JCCCD 1653 | JCCCJCD 1654 | JCCD 1655 | JCCID 1656 | JCCJD 1657 | JCCMCD 1658 | JCD 1659 | JCDCCD 1660 | JCDCD 1661 | JCDD 1662 | JCDID 1663 | JCFCD 1664 | JCGCCCCD 1665 | JCGCCCD 1666 | JCGCCD 1667 | JCGCD 1668 | JCGD 1669 | JCGJGD 1670 | JCICCCD 1671 | JCID 1672 | JCIDD 1673 | JCJCCCD 1674 | JCJCCD 1675 | JCJCD 1676 | JCJD 1677 | JCJDD 1678 | JCJFD 1679 | JCJJPCD 1680 | JCJPID 1681 | JCJWGWD 1682 | JCLD 1683 | JCMD 1684 | JCMPD 1685 | JCPJCID 1686 | JCPJJCD 1687 | JCPPCCCD 1688 | JD 1689 | JDCD 1690 | JDCMD 1691 | JDD 1692 | JDGD 1693 | JDID 1694 | JDJD 1695 | JDMD 1696 | JFCD 1697 | JFD 1698 | JGCCCD 1699 | JGCD 1700 | JGD 1701 | JGDCJD 1702 | JGGD 1703 | JGPD 1704 | JICCCD 1705 | JICD 1706 | JID 1707 | JIDD 1708 | JIID 1709 | JIJD 1710 | JILD 1711 | JJCCCD 1712 | JJCCD 1713 | JJCCPGD 1714 | JJCD 1715 | JJD 1716 | JJDCJD 1717 | JJDD 1718 | JJGCCD 1719 | JJGD 1720 | JJICD 1721 | JJID 1722 | JJJCCCD 1723 | JJJCD 1724 | JJJCFCCCD 1725 | JJJD 1726 | JJJGD 1727 | JJMCID 1728 | JJPCD 1729 | JJPD 1730 | JJPPJLCD 1731 | JJWFWCCJJD 1732 | JJWGWCD 1733 | JJWGWCDD 1734 | JKCD 1735 | JKD 1736 | JLCCD 1737 | JLCCDD 1738 | JLCCJD 1739 | JLCD 1740 | JLCDD 1741 | JLCMD 1742 | JLCMDD 1743 | JLD 1744 | JLDD 1745 | JLGCJD 1746 | JLGJCCCJD 1747 | JLJD 1748 | JMCD 1749 | JMD 1750 | JMJD 1751 | JMPD 1752 | JPCCD 1753 | JPCD 1754 | JPCMD 1755 | JPCMDPD 1756 | JPD 1757 | JPDCCCD 1758 | JPDD 1759 | JPDGCD 1760 | JPFCCD 1761 | JPFD 1762 | JPICD 1763 | JPID 1764 | JPIID 1765 | JPJD 1766 | JPJJCCCFPCD 1767 | JPMD 1768 | JPMDCCD 1769 | JPMDD 1770 | JPPJD 1771 | JPPJLCD 1772 | KCCCCCD 1773 | KCCCCD 1774 | KCCCCDCD 1775 | KCCCD 1776 | KCCCDCD 1777 | KCCCDD 1778 | KCCCDDCCCD 1779 | KCCCGD 1780 | KCCD 1781 | KCCDCCD 1782 | KCCDCD 1783 | KCCJD 1784 | KCCJDID 1785 | KCCPD 1786 | KCD 1787 | KCDCCCCD 1788 | KCDCCD 1789 | KCDCD 1790 | KCDD 1791 | KCDICD 1792 | KCDJD 1793 | KCGCCCD 1794 | KCGCCCDD 1795 | KCGCCD 1796 | KCGCD 1797 | KCGD 1798 | KCGGGD 1799 | KCICD 1800 | KCID 1801 | KCIDCD 1802 | KCJCD 1803 | KCJD 1804 | KCKCD 1805 | KCMD 1806 | KCMDCD 1807 | KCPD 1808 | KCWGWD 1809 | KD 1810 | KDCCCD 1811 | KDCD 1812 | KDD 1813 | KDICD 1814 | KDLCCPD 1815 | KFCD 1816 | KFCDD 1817 | KFD 1818 | KFWFD 1819 | KGCCCD 1820 | KGCCD 1821 | KGCD 1822 | KGCDCCD 1823 | KGD 1824 | KGDD 1825 | KGGD 1826 | KGJPD 1827 | KICCD 1828 | KICD 1829 | KICDD 1830 | KID 1831 | KIDCCD 1832 | KIDJCD 1833 | KIGID 1834 | KIMCD 1835 | KIMD 1836 | KIWGWD 1837 | KJCCD 1838 | KJCD 1839 | KJD 1840 | KJDD 1841 | KJICCD 1842 | KJJD 1843 | KJJDCD 1844 | KJJJD 1845 | KJPD 1846 | KLCCD 1847 | KLD 1848 | KMCCJCCD 1849 | KMCD 1850 | KMCDD 1851 | KMD 1852 | KMDCD 1853 | KMDD 1854 | KMMD 1855 | KMMMD 1856 | KPCCCD 1857 | KPCCD 1858 | KPCD 1859 | KPD 1860 | KPDD 1861 | LCCCCD 1862 | LCCCD 1863 | LCCD 1864 | LCCDD 1865 | LCCDJCCD 1866 | LCCGD 1867 | LCCGID 1868 | LCCID 1869 | LCCPCD 1870 | LCCWGWD 1871 | LCD 1872 | LCDCCD 1873 | LCDCD 1874 | LCDCDD 1875 | LCDCDIGCD 1876 | LCDD 1877 | LCDFD 1878 | LCDGDD 1879 | LCDGID 1880 | LCDID 1881 | LCDLD 1882 | LCDLDCD 1883 | LCDLDD 1884 | LCDMCDD 1885 | LCDPD 1886 | LCGD 1887 | LCGDD 1888 | LCICCWGWD 1889 | LCID 1890 | LCIGD 1891 | LCJCD 1892 | LCJD 1893 | LCLD 1894 | LCMCCD 1895 | LCMCDD 1896 | LCMCID 1897 | LCMCMD 1898 | LCMD 1899 | LCMJCICD 1900 | LCMJD 1901 | LCPCJCD 1902 | LCPD 1903 | LCPMD 1904 | LCPPCD 1905 | LD 1906 | LDCCD 1907 | LDCD 1908 | LDCLCD 1909 | LDCLCDCD 1910 | LDCPD 1911 | LDD 1912 | LDDD 1913 | LDLCCCCD 1914 | LFCD 1915 | LFCFD 1916 | LFD 1917 | LFPPPCCD 1918 | LGCD 1919 | LGD 1920 | LGGCCCD 1921 | LGGCD 1922 | LGJCD 1923 | LGJLCD 1924 | LGJLD 1925 | LICCCD 1926 | LICCD 1927 | LICD 1928 | LICLD 1929 | LID 1930 | LIGD 1931 | LIPCCCD 1932 | LIWGWCCCD 1933 | LJCCCCD 1934 | LJCCCCWGWD 1935 | LJCCCD 1936 | LJCCD 1937 | LJCCDCCCD 1938 | LJCCDCCD 1939 | LJCCDCD 1940 | LJCCDID 1941 | LJCCDJCD 1942 | LJCD 1943 | LJCDD 1944 | LJCGD 1945 | LJCJJD 1946 | LJCWCWJWCWJD 1947 | LJD 1948 | LJDCCD 1949 | LJDCD 1950 | LJDD 1951 | LJDJPD 1952 | LJDJPDD 1953 | LJDJPDID 1954 | LJDJPMDD 1955 | LJFJJCLCD 1956 | LJGD 1957 | LJID 1958 | LJJCD 1959 | LJJD 1960 | LJLD 1961 | LJMD 1962 | LJPCD 1963 | LKCD 1964 | LLCD 1965 | LLD 1966 | LLPD 1967 | LMCCFCCD 1968 | LMCD 1969 | LMD 1970 | LMID 1971 | LPCCCCCD 1972 | LPCCCD 1973 | LPCCD 1974 | LPCD 1975 | LPCDD 1976 | LPCFPPD 1977 | LPCGCCCD 1978 | LPCGCCD 1979 | LPCGCCDCCD 1980 | LPCGD 1981 | LPCGDDPD 1982 | LPD 1983 | LPDD 1984 | LPDDD 1985 | LPICD 1986 | LPID 1987 | LPJD 1988 | LPMDCCD 1989 | LPPJD 1990 | MCCCD 1991 | MCCD 1992 | MCCPD 1993 | MCD 1994 | MCDCCD 1995 | MCDCCDCD 1996 | MCDCCDD 1997 | MCDCD 1998 | MCDCGD 1999 | MCDD 2000 | MCDFD 2001 | MCDFDD 2002 | MCDLCD 2003 | MCDPPD 2004 | MCGCD 2005 | MCICD 2006 | MCID 2007 | MCIDWGWD 2008 | MCJD 2009 | MCLD 2010 | MCPD 2011 | MD 2012 | MDD 2013 | MFD 2014 | MGD 2015 | MGJD 2016 | MGJJD 2017 | MICCD 2018 | MICD 2019 | MID 2020 | MIDCCD 2021 | MJCCD 2022 | MJCD 2023 | MJD 2024 | MJDD 2025 | MLCD 2026 | MLD 2027 | MLGD 2028 | MLGGD 2029 | MMCCD 2030 | MMCD 2031 | MMD 2032 | MMMD 2033 | MMPD 2034 | MPCCD 2035 | MPCD 2036 | MPD 2037 | MPDCD 2038 | MPJPD 2039 | MPPD 2040 | PCCCCCCD 2041 | PCCCCCD 2042 | PCCCCD 2043 | PCCCD 2044 | PCCCDD 2045 | PCCD 2046 | PCCDD 2047 | PCCGJGD 2048 | PCCID 2049 | PCCIDD 2050 | PCD 2051 | PCDCD 2052 | PCDCJCD 2053 | PCDD 2054 | PCDFCCCD 2055 | PCDID 2056 | PCGCCD 2057 | PCGCD 2058 | PCGD 2059 | PCID 2060 | PCJCD 2061 | PCJGD 2062 | PCPCCD 2063 | PCPD 2064 | PD 2065 | PDCCD 2066 | PDD 2067 | PDDD 2068 | PFCCD 2069 | PFCDD 2070 | PFCJCD 2071 | PFD 2072 | PFFCD 2073 | PFPCD 2074 | PGCD 2075 | PGCJD 2076 | PGD 2077 | PGDCICD 2078 | PGJD 2079 | PICCD 2080 | PICD 2081 | PICDD 2082 | PID 2083 | PIFD 2084 | PIJCCD 2085 | PIJD 2086 | PJCCCDD 2087 | PJCCD 2088 | PJCD 2089 | PJD 2090 | PJDCD 2091 | PJDD 2092 | PJFD 2093 | PJGD 2094 | PJICCCPCD 2095 | PJID 2096 | PJJD 2097 | PJJDD 2098 | PJJPD 2099 | PJLPCD 2100 | PJPCD 2101 | PJPD 2102 | PLD 2103 | PLPCD 2104 | PMJCD 2105 | PPCCCDCD 2106 | PPCD 2107 | PPCJCCD 2108 | PPD 2109 | PPDCD 2110 | PPFCCD 2111 | PPFCD 2112 | PPGCID 2113 | PPGD 2114 | PPGJCCD 2115 | PPICCD 2116 | PPIGD 2117 | PPJCD 2118 | PPJD 2119 | PPJJD 2120 | PPMD 2121 | PPPCPD 2122 | PPPD 2123 | PPPWGWCCD 2124 | -------------------------------------------------------------------------------- /data/nt.tr.txt: -------------------------------------------------------------------------------- 1 | ,A,B,C,D,F,G,I,J,K,L,M,P,S,W,X,Z 2 | A,0,0,19945,883,2013,58781,3290,1582,19254,1422,282,944,0,0,0,0 3 | B,3013,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125708 4 | C,0,0,25949,57230,142,1908,850,1603,53,169,260,834,0,144,0,0 5 | D,0,109511,4389,6473,135,1018,476,229,28,105,177,120,0,59,4586,0 6 | F,0,0,971,2666,138,127,92,163,5,7,5,87,0,48,0,0 7 | G,0,0,24497,42962,1283,5178,3104,2782,182,1415,756,1173,0,140,0,0 8 | I,0,0,2515,5556,34,270,179,181,3,39,56,210,0,11,0,0 9 | J,0,0,2002,4973,69,96,85,707,4,95,47,535,0,23,0,0 10 | K,0,19920,1162,2036,13,184,64,57,16,3,32,25,0,0,1238,0 11 | L,0,0,1289,1476,19,68,58,481,1,18,10,289,0,0,0,0 12 | M,1000,0,569,758,1,7,11,128,0,76,86,143,0,1,0,0 13 | P,0,0,1647,1989,70,54,200,425,0,67,29,433,0,9,0,0 14 | S,9509,0,2911,104,278,12704,476,212,4031,210,23,86,0,0,0,1131650 15 | W,0,0,123,150,23,105,11,9,0,4,0,10,0,0,0,0 16 | X,0,0,1173,50,91,2972,158,77,1173,79,17,34,0,0,0,0 17 | Z,95874,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19796133 18 | -------------------------------------------------------------------------------- /generate_datas.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 作者:李鹏飞 3 | # 个人博客:https://www.lookfor404.com/ 4 | # 代码说明:https://www.lookfor404.com/用隐马尔可夫模型hmm做命名实体识别-ner系列二/ 5 | # github项目:https://github.com/lipengfei-558/hmm_ner_organization 6 | def genertate_initial_vector(hidden_states): 7 | """ 8 | 生成初始化概率向量Π,命名为initial_vector.txt,格式每一行为:状态,出现次数,概率 9 | :param hidden_states: 隐藏状态list 10 | :return: 11 | """ 12 | the_hidden_states = {x:0 for x in hidden_states} 13 | count = 0 # 计算总数 14 | with open("./data/nt.txt",mode='r') as nrfile: 15 | all_data = nrfile.readlines() 16 | for line in all_data: 17 | tags_and_freq = line.strip().split(" ")[1:] 18 | for index in range(0,len(tags_and_freq),2): 19 | tmp_list = tags_and_freq[index:index+2] #list的第一个元素为状态标识,第二个元素为数量 20 | the_hidden_states[tmp_list[0]] += eval(tmp_list[1]) 21 | count += eval(tmp_list[1]) 22 | with open("./data/initial_vector.txt",mode="w") as outputfile: 23 | for key,value in the_hidden_states.items(): 24 | str_to_write = "%s,%d,%f\n" %(key,value,float(value)/count) 25 | outputfile.write(str_to_write) 26 | print ("generated ./data/initial_vector.txt") 27 | 28 | def generate_transition_probability(hidden_states): 29 | """ 30 | 生成转移概率矩阵,命名为transition_probability.txt;格式为每一行:状态1,状态2,概率 31 | :param hidden_states:隐状态 32 | :return: 33 | """ 34 | initial_count = {x: 0 for x in hidden_states} #初始化计数 35 | result = [] 36 | with open("./data/nt.tr.txt",mode="r") as initial_count_file: 37 | all_data = initial_count_file.readlines() 38 | for line in all_data[1:]: 39 | split_line = line.strip().split(",") 40 | first_state = split_line[0] 41 | the_sum = sum([eval(number) for number in split_line[1:]]) 42 | for index,second_state in enumerate(hidden_states): 43 | result.append([first_state,second_state,str(float(split_line[1:][index])/the_sum)]) 44 | #输出、写入文件 45 | with open("./data/transition_probability.txt",mode="w") as output_file: 46 | for thelist in result: 47 | str_to_write = "%s,%s,%s\n" %(thelist[0],thelist[1],thelist[2],) 48 | output_file.write(str_to_write) 49 | print ("generated ./data/transition_probability.txt") 50 | 51 | def generate_emit_probability(initial_freq): 52 | """ 53 | 生成发射矩阵,命名为emit_probability.txt;格式为每一行:隐状态,显状态,概率 54 | :param initial_freq:隐状态初始化出现频数,是一个字典,key为隐状态标识,value为频数 55 | :return: 56 | """ 57 | result = [] 58 | with open("./data/nt.txt",mode="r") as nrfile: 59 | all_data = nrfile.readlines() 60 | for line in all_data: 61 | split_line = line.strip().split(" ") 62 | observed_state = split_line[0] 63 | tags_and_freq = split_line[1:] 64 | for index in range(0,len(tags_and_freq),2): 65 | tmp_list = tags_and_freq[index:index+2] #list的第一个元素为隐状态标识,第二个元素为数量 66 | result.append([tmp_list[0],observed_state,float(tmp_list[1])/initial_freq[tmp_list[0]]]) 67 | 68 | # 输出、写入文件 69 | with open("./data/emit_probability.txt",mode="w") as output_file: 70 | for thelist in result: 71 | str_to_write = "%s,%s,%s\n" % (thelist[0], thelist[1], str(thelist[2])) 72 | output_file.write(str_to_write) 73 | print ("generated ./data/emit_probability.txt") 74 | 75 | def get_initial_freq(): 76 | """ 77 | 获取每个标签出现的频数 78 | :return: 字典,key为标签,value为频数 79 | """ 80 | result = {} 81 | with open("./data/initial_vector.txt", mode="r") as file: 82 | all_data = file.readlines() 83 | for line in all_data: 84 | split_line = line.strip().split(",") 85 | if len(split_line) == 3: 86 | result[split_line[0]] = int(split_line[1]) 87 | return result 88 | 89 | if __name__ == '__main__': 90 | hidden_states = ["A", "B", "C", "D", "F", "G", "I", "J", "K", "L", "M", "P", "S", "W", "X", "Z"] 91 | genertate_initial_vector(hidden_states) 92 | generate_transition_probability(hidden_states) 93 | generate_emit_probability(get_initial_freq()) 94 | --------------------------------------------------------------------------------