├── LICENSE ├── README.md ├── __init__.py ├── db.sqlite3 ├── event_extractor ├── __init__.py ├── __pycache__ │ └── __init__.cpython-36.pyc ├── data │ ├── .DS_Store │ ├── en │ │ ├── .DS_Store │ │ ├── dev.json │ │ ├── event_schema.json │ │ ├── test.json │ │ └── train.json │ └── zh │ │ ├── .DS_Store │ │ ├── dev.json │ │ ├── event_schema.json │ │ ├── test.json │ │ └── train.json ├── dataprocess │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── data_loader.cpython-36.pyc │ │ └── process_en.cpython-36.pyc │ ├── data_loader.py │ ├── process_en.py │ └── process_zh.py ├── db.sqlite3 ├── model │ ├── __init__.py │ └── dgcnn.py ├── predict │ ├── __init__.py │ └── event_pipeline.py ├── settings.py ├── test.py ├── train │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── eval.cpython-36.pyc │ ├── eval.py │ └── train.py ├── urls.py ├── views.py └── wsgi.py ├── manage.py └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 chenking2020 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # event_extract_master 2 | 本人将苏神的三元组抽取算法中的DGCNN改成了事件抽取任务,并将karas改成了本人习惯使用的pytorch,在数据加载处考虑了各种语言的扩展,event extraction,django服务,train和predict 3 | 4 | 目前事件抽取支持中文和英文两种语言,中文采用百度事件抽取竞赛数据集,英文采用ACE2005数据集 5 | 6 | 在data文件夹下放置了部分数据样例,仅供参考 7 | 8 | 安装依赖: 9 | pip3 install -r requirements.txt -i https://mirrors.ustc.edu.cn/pypi/web/simple 10 | 11 | #启动服务 12 | 13 | python3 manage.py runserver 0:0:0:0:8000 14 | 15 | # 接口调用 16 | 17 | 训练: 18 | http://localhost/apis/create_train 19 | 20 | 预测: 21 | http://localhost/apis/event_extract 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/__init__.py -------------------------------------------------------------------------------- /db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/db.sqlite3 -------------------------------------------------------------------------------- /event_extractor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/__init__.py -------------------------------------------------------------------------------- /event_extractor/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /event_extractor/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/data/.DS_Store -------------------------------------------------------------------------------- /event_extractor/data/en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/data/en/.DS_Store -------------------------------------------------------------------------------- /event_extractor/data/en/dev.json: -------------------------------------------------------------------------------- 1 | {"text": "New Questions About Attacking Iraq; Is Torturing Terrorists Necessary?", "id": "202005100", "event_list": []} 2 | {"text": "Welcome back.", "id": "202005101", "event_list": []} 3 | {"text": "Orders went out today to deploy 17,000 U.S. Army soldiers in the Persian Gulf region.", "id": "202005102", "event_list": [{"event_type": "Movement:Transport", "trigger": "deploy", "trigger_start_index": 5, "class": "Movement", "arguments": [{"argument_start_index": 6, "role": "Artifact", "argument": "17,000 U.S. Army soldiers", "alias": []}, {"argument_start_index": 11, "role": "Destination", "argument": "the Persian Gulf region", "alias": []}]}]} 4 | {"text": "The army's entire first Calvary division based at Fort Hood, Texas, would join the quarter million U.S. forces already in the region.", "id": "202005103", "event_list": []} 5 | {"text": "We're talking about possibilities of full scale war with former Congressman Tom Andrews, Democrat of Maine.", "id": "202005104", "event_list": [{"event_type": "Conflict:Attack", "trigger": "war", "trigger_start_index": 8, "class": "Conflict", "arguments": []}, {"event_type": "Personnel:End-Position", "trigger": "former", "trigger_start_index": 10, "class": "Personnel", "arguments": [{"argument_start_index": 10, "role": "Person", "argument": "former Congressman Tom Andrews", "alias": []}, {"argument_start_index": 17, "role": "Entity", "argument": "Maine", "alias": []}, {"argument_start_index": 11, "role": "Position", "argument": "Congressman", "alias": []}, {"argument_start_index": 10, "role": "Time-Within", "argument": "former", "alias": []}]}]} 6 | {"text": "He's now national director of Win Without War, and former Congressman Bob Dornan, Republican of California.", "id": "202005105", "event_list": [{"event_type": "Personnel:End-Position", "trigger": "former", "trigger_start_index": 11, "class": "Personnel", "arguments": [{"argument_start_index": 11, "role": "Person", "argument": "former Congressman Bob Dornan", "alias": []}, {"argument_start_index": 18, "role": "Entity", "argument": "California", "alias": []}, {"argument_start_index": 12, "role": "Position", "argument": "Congressman", "alias": []}, {"argument_start_index": 11, "role": "Time-Within", "argument": "former", "alias": []}]}]} 7 | {"text": "Bob, one of the reasons I think so many Americans are worried about this war and so many people around the world don't want to go is there have been a lot of problems with credibility from this administration.", "id": "202005106", "event_list": [{"event_type": "Conflict:Attack", "trigger": "war", "trigger_start_index": 15, "class": "Conflict", "arguments": []}]} 8 | -------------------------------------------------------------------------------- /event_extractor/data/en/event_schema.json: -------------------------------------------------------------------------------- 1 | {"event_type": "Movement:Transport", "role_list": [{"role": "Vehicle"}, {"role": "Artifact"}, {"role": "Destination"}, {"role": "Agent"}, {"role": "Time-At-Beginning"}, {"role": "Time-Within"}, {"role": "Origin"}, {"role": "Time-At-End"}, {"role": "Time-Starting"}, {"role": "Time-Ending"}, {"role": "Time-After"}, {"role": "Time-Holds"}, {"role": "Victim"}, {"role": "Place"}, {"role": "Time-Before"}]} 2 | {"event_type": "Personnel:Elect", "role_list": [{"role": "Person"}, {"role": "Position"}, {"role": "Entity"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Time-Starting"}, {"role": "Time-Holds"}, {"role": "Time-Before"}, {"role": "Time-At-Beginning"}]} 3 | {"event_type": "Personnel:Start-Position", "role_list": [{"role": "Person"}, {"role": "Entity"}, {"role": "Position"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Time-At-Beginning"}, {"role": "Time-After"}, {"role": "Time-Starting"}, {"role": "Time-Holds"}, {"role": "Time-Before"}]} 4 | {"event_type": "Personnel:Nominate", "role_list": [{"role": "Person"}, {"role": "Agent"}, {"role": "Time-Within"}, {"role": "Position"}]} 5 | {"event_type": "Personnel:End-Position", "role_list": [{"role": "Person"}, {"role": "Entity"}, {"role": "Position"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Time-Ending"}, {"role": "Time-Holds"}, {"role": "Time-Before"}, {"role": "Time-Starting"}, {"role": "Time-After"}, {"role": "Time-At-End"}]} 6 | {"event_type": "Conflict:Attack", "role_list": [{"role": "Attacker"}, {"role": "Place"}, {"role": "Target"}, {"role": "Time-Within"}, {"role": "Time-Ending"}, {"role": "Instrument"}, {"role": "Time-Holds"}, {"role": "Time-At-Beginning"}, {"role": "Victim"}, {"role": "Time-After"}, {"role": "Time-Starting"}, {"role": "Time-Before"}, {"role": "Time-At-End"}, {"role": "Agent"}]} 7 | {"event_type": "Contact:Meet", "role_list": [{"role": "Place"}, {"role": "Entity"}, {"role": "Time-Within"}, {"role": "Time-Holds"}, {"role": "Time-At-Beginning"}, {"role": "Time-Starting"}, {"role": "Time-Ending"}, {"role": "Time-After"}]} 8 | {"event_type": "Life:Marry", "role_list": [{"role": "Person"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Time-Holds"}, {"role": "Time-Before"}]} 9 | {"event_type": "Contact:Phone-Write", "role_list": [{"role": "Entity"}, {"role": "Time-Within"}, {"role": "Time-Before"}, {"role": "Place"}, {"role": "Time-Holds"}, {"role": "Time-Starting"}, {"role": "Time-After"}]} 10 | {"event_type": "Transaction:Transfer-Money", "role_list": [{"role": "Giver"}, {"role": "Recipient"}, {"role": "Money"}, {"role": "Beneficiary"}, {"role": "Time-Holds"}, {"role": "Time-Within"}, {"role": "Time-Starting"}, {"role": "Place"}, {"role": "Time-Before"}, {"role": "Time-After"}]} 11 | {"event_type": "Justice:Sue", "role_list": [{"role": "Plaintiff"}, {"role": "Defendant"}, {"role": "Crime"}, {"role": "Adjudicator"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Time-Holds"}]} 12 | {"event_type": "Conflict:Demonstrate", "role_list": [{"role": "Place"}, {"role": "Entity"}, {"role": "Time-Within"}, {"role": "Time-Starting"}, {"role": "Time-At-End"}]} 13 | {"event_type": "Business:End-Org", "role_list": [{"role": "Place"}, {"role": "Org"}, {"role": "Time-Within"}, {"role": "Time-After"}, {"role": "Time-Holds"}, {"role": "Time-At-Beginning"}]} 14 | {"event_type": "Life:Injure", "role_list": [{"role": "Victim"}, {"role": "Agent"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Instrument"}]} 15 | {"event_type": "Life:Die", "role_list": [{"role": "Victim"}, {"role": "Agent"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Instrument"}, {"role": "Time-Before"}, {"role": "Time-After"}, {"role": "Person"}, {"role": "Time-Starting"}, {"role": "Time-Holds"}, {"role": "Time-Ending"}, {"role": "Time-At-Beginning"}]} 16 | {"event_type": "Justice:Arrest-Jail", "role_list": [{"role": "Person"}, {"role": "Agent"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Crime"}, {"role": "Time-Holds"}, {"role": "Time-Ending"}, {"role": "Time-Starting"}, {"role": "Time-Before"}, {"role": "Time-At-Beginning"}]} 17 | {"event_type": "Transaction:Transfer-Ownership", "role_list": [{"role": "Artifact"}, {"role": "Buyer"}, {"role": "Seller"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Time-Before"}, {"role": "Beneficiary"}, {"role": "Price"}, {"role": "Time-At-Beginning"}, {"role": "Time-Ending"}]} 18 | {"event_type": "Business:Start-Org", "role_list": [{"role": "Org"}, {"role": "Agent"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Time-Starting"}, {"role": "Time-Before"}, {"role": "Time-After"}]} 19 | {"event_type": "Justice:Execute", "role_list": [{"role": "Agent"}, {"role": "Place"}, {"role": "Person"}, {"role": "Time-After"}, {"role": "Time-Within"}, {"role": "Crime"}, {"role": "Time-At-Beginning"}]} 20 | {"event_type": "Justice:Trial-Hearing", "role_list": [{"role": "Defendant"}, {"role": "Crime"}, {"role": "Adjudicator"}, {"role": "Prosecutor"}, {"role": "Place"}, {"role": "Time-Starting"}, {"role": "Time-Within"}, {"role": "Time-At-End"}, {"role": "Time-Holds"}]} 21 | {"event_type": "Justice:Sentence", "role_list": [{"role": "Adjudicator"}, {"role": "Defendant"}, {"role": "Sentence"}, {"role": "Crime"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Time-Starting"}, {"role": "Time-At-End"}]} 22 | {"event_type": "Life:Be-Born", "role_list": [{"role": "Place"}, {"role": "Person"}, {"role": "Time-Within"}, {"role": "Time-Holds"}]} 23 | {"event_type": "Justice:Charge-Indict", "role_list": [{"role": "Adjudicator"}, {"role": "Defendant"}, {"role": "Crime"}, {"role": "Prosecutor"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Time-Before"}, {"role": "Time-Ending"}]} 24 | {"event_type": "Justice:Convict", "role_list": [{"role": "Defendant"}, {"role": "Crime"}, {"role": "Adjudicator"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Time-At-Beginning"}]} 25 | {"event_type": "Business:Declare-Bankruptcy", "role_list": [{"role": "Org"}, {"role": "Time-After"}, {"role": "Place"}, {"role": "Time-Within"}, {"role": "Time-At-Beginning"}]} 26 | {"event_type": "Justice:Release-Parole", "role_list": [{"role": "Person"}, {"role": "Entity"}, {"role": "Time-Within"}, {"role": "Place"}, {"role": "Crime"}, {"role": "Time-After"}]} 27 | {"event_type": "Justice:Fine", "role_list": [{"role": "Entity"}, {"role": "Money"}, {"role": "Crime"}, {"role": "Adjudicator"}, {"role": "Place"}, {"role": "Time-Within"}]} 28 | {"event_type": "Justice:Pardon", "role_list": [{"role": "Defendant"}, {"role": "Adjudicator"}, {"role": "Place"}, {"role": "Time-At-End"}]} 29 | {"event_type": "Justice:Appeal", "role_list": [{"role": "Adjudicator"}, {"role": "Plaintiff"}, {"role": "Place"}, {"role": "Crime"}, {"role": "Time-Within"}, {"role": "Time-Holds"}]} 30 | {"event_type": "Business:Merge-Org", "role_list": [{"role": "Org"}, {"role": "Time-Ending"}]} 31 | {"event_type": "Justice:Extradite", "role_list": [{"role": "Destination"}, {"role": "Origin"}, {"role": "Person"}, {"role": "Agent"}, {"role": "Time-Within"}]} 32 | {"event_type": "Life:Divorce", "role_list": [{"role": "Person"}, {"role": "Time-Within"}, {"role": "Place"}]} 33 | {"event_type": "Justice:Acquit", "role_list": [{"role": "Defendant"}, {"role": "Adjudicator"}, {"role": "Crime"}, {"role": "Time-Within"}]} 34 | -------------------------------------------------------------------------------- /event_extractor/data/en/test.json: -------------------------------------------------------------------------------- 1 | {"text": "Energy regulator named new head of Britain's FSA finance watchdog", "id": "202005100", "event_list": []} 2 | {"text": "LONDON, April 1 (AFP)", "id": "202005101", "event_list": []} 3 | {"text": "British Chancellor of the Exchequer Gordon Brown on Tuesday named the current head of the country's energy regulator as the new chairman of finance watchdog the Financial Services Authority (FSA).", "id": "202005102", "event_list": [{"event_type": "Personnel:Nominate", "trigger": "named", "trigger_start_index": 9, "class": "Personnel", "arguments": [{"argument_start_index": 10, "role": "Person", "argument": "the current head of the country's energy regulator", "alias": []}, {"argument_start_index": 21, "role": "Position", "argument": "he new chairman of finance watchdog the Financial Services Authority", "alias": []}, {"argument_start_index": 8, "role": "Time-Within", "argument": "Tuesday", "alias": []}]}]} 4 | {"text": "Former senior banker Callum McCarthy begins what is one of the most important jobs in London's financial world in September, when incumbent Howard Davies steps down.", "id": "202005103", "event_list": [{"event_type": "Personnel:Start-Position", "trigger": "begins", "trigger_start_index": 5, "class": "Personnel", "arguments": [{"argument_start_index": 0, "role": "Person", "argument": "Former senior banker Callum McCarthy", "alias": []}, {"argument_start_index": 8, "role": "Position", "argument": "one of the most important jobs in London's financial world", "alias": []}, {"argument_start_index": 20, "role": "Time-Within", "argument": "September", "alias": []}]}, {"event_type": "Personnel:End-Position", "trigger": "Former", "trigger_start_index": 0, "class": "Personnel", "arguments": [{"argument_start_index": 0, "role": "Person", "argument": "Former senior banker Callum McCarthy", "alias": []}]}, {"event_type": "Personnel:End-Position", "trigger": "steps down", "trigger_start_index": 26, "class": "Personnel", "arguments": [{"argument_start_index": 23, "role": "Person", "argument": "incumbent Howard Davies", "alias": []}, {"argument_start_index": 15, "role": "Place", "argument": "London", "alias": []}, {"argument_start_index": 8, "role": "Position", "argument": "one of the most important jobs in London's financial world", "alias": []}, {"argument_start_index": 20, "role": "Time-Within", "argument": "September", "alias": []}]}]} 5 | {"text": "Davies is leaving to become chairman of the London School of Economics, one of the best-known parts of the University of London.", "id": "202005104", "event_list": [{"event_type": "Personnel:End-Position", "trigger": "leaving", "trigger_start_index": 2, "class": "Personnel", "arguments": [{"argument_start_index": 0, "role": "Person", "argument": "Davies", "alias": []}]}, {"event_type": "Personnel:Start-Position", "trigger": "become", "trigger_start_index": 4, "class": "Personnel", "arguments": [{"argument_start_index": 0, "role": "Person", "argument": "Davies", "alias": []}, {"argument_start_index": 7, "role": "Entity", "argument": "the London School of Economics", "alias": []}, {"argument_start_index": 5, "role": "Position", "argument": "chairman of the London School of Economics, one of the best-known parts of the University of London", "alias": []}]}]} 6 | {"text": "Brown said McCarthy would bring to the FSA \"an unrivalled combination of experience of the public sector, experience as a senior practitioner in the financial services industry, and chairman of a highly successful regulator.\"", "id": "202005105", "event_list": []} 7 | -------------------------------------------------------------------------------- /event_extractor/data/en/train.json: -------------------------------------------------------------------------------- 1 | {"text": "New Questions About Attacking Iraq; Is Torturing Terrorists Necessary?", "id": "202005100", "event_list": []} 2 | {"text": "Well, we'll debate that later on in the show.", "id": "202005101", "event_list": []} 3 | {"text": "We'll have a couple of experts come out, so I'll withhold my comments until then.", "id": "202005102", "event_list": []} 4 | {"text": "Even as the secretary of homeland security was putting his people on high alert last month, a 30-foot Cuban patrol boat with four heavily armed men landed on American shores, utterly undetected by the Coast Guard Secretary Ridge now leads.", "id": "202005103", "event_list": [{"event_type": "Movement:Transport", "trigger": "landed", "trigger_start_index": 27, "class": "Movement", "arguments": [{"argument_start_index": 17, "role": "Vehicle", "argument": "a 30-foot Cuban patrol boat with four heavily armed men", "alias": []}, {"argument_start_index": 23, "role": "Artifact", "argument": "four heavily armed men", "alias": []}, {"argument_start_index": 29, "role": "Destination", "argument": "American shores", "alias": []}]}]} 5 | -------------------------------------------------------------------------------- /event_extractor/data/zh/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/data/zh/.DS_Store -------------------------------------------------------------------------------- /event_extractor/data/zh/dev.json: -------------------------------------------------------------------------------- 1 | {"text": "消失的“外企光环”,5月份在华裁员900余人,香饽饽变“臭”了", "id": "cba11b5059495e635b4f95e7484b2684", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 15, "arguments": [{"argument_start_index": 17, "role": "裁员人数", "argument": "900余人", "alias": []}, {"argument_start_index": 10, "role": "时间", "argument": "5月份", "alias": []}], "class": "组织关系"}]} 2 | {"text": "前两天,被称为 “ 仅次于苹果的软件服务商 ” 的 Oracle( 甲骨文 )公司突然宣布在中国裁员。。", "id": "b90900665eee74f658eed125a321ee06", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 48, "arguments": [{"argument_start_index": 0, "role": "时间", "argument": "前两天", "alias": []}, {"argument_start_index": 4, "role": "裁员方", "argument": "被称为 “ 仅次于苹果的软件服务商 ” 的 Oracle( 甲骨文 )公司", "alias": []}], "class": "组织关系"}]} 3 | {"text": "不仅仅是中国IT企业在裁员,为何500强的甲骨文也发生了全球裁员", "id": "c970d67bb8d3e57db77c4dbbdfbe9769", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 11, "arguments": [{"argument_start_index": 4, "role": "裁员方", "argument": "中国IT企业", "alias": []}], "class": "组织关系"}, {"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 30, "arguments": [{"argument_start_index": 16, "role": "裁员方", "argument": "500强的甲骨文", "alias": []}], "class": "组织关系"}]} 4 | {"text": "据猛龙随队记者Josh Lewenberg报道,消息人士透露,猛龙已将前锋萨加巴-科纳特裁掉。此前他与猛龙签下了一份Exhibit 10合同。在被裁掉后,科纳特下赛季大概率将前往猛龙的发展联盟球队效力。", "id": "8a440ade8a8bac469e0357cc519ec9c0", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁掉", "trigger_start_index": 44, "arguments": [{"argument_start_index": 31, "role": "裁员方", "argument": "猛龙", "alias": []}], "class": "组织关系"}, {"event_type": "组织关系-加盟", "trigger": "签下", "trigger_start_index": 53, "arguments": [{"argument_start_index": 35, "role": "加盟者", "argument": "前锋萨加巴-科纳特", "alias": []}, {"argument_start_index": 51, "role": "所加盟组织", "argument": "猛龙", "alias": []}], "class": "组织关系"}, {"event_type": "组织关系-裁员", "trigger": "裁掉", "trigger_start_index": 73, "arguments": [{"argument_start_index": 31, "role": "裁员方", "argument": "猛龙", "alias": []}], "class": "组织关系"}]} 5 | -------------------------------------------------------------------------------- /event_extractor/data/zh/event_schema.json: -------------------------------------------------------------------------------- 1 | {"event_type": "财经/交易-出售/收购", "role_list": [{"role": "时间"}, {"role": "出售方"}, {"role": "交易物"}, {"role": "出售价格"}, {"role": "收购方"}], "id": "804336473abe8b8124d00876a5387151", "class": "财经/交易"} 2 | {"event_type": "财经/交易-跌停", "role_list": [{"role": "时间"}, {"role": "跌停股票"}], "id": "29a8f7417bf8867ddb8521f647a828d8", "class": "财经/交易"} 3 | {"event_type": "财经/交易-加息", "role_list": [{"role": "时间"}, {"role": "加息幅度"}, {"role": "加息机构"}], "id": "1fa24bacf1a6ac2e00171018d96fa629", "class": "财经/交易"} 4 | {"event_type": "财经/交易-降价", "role_list": [{"role": "时间"}, {"role": "降价方"}, {"role": "降价物"}, {"role": "降价幅度"}], "id": "2eda5c994fc93888fd7432131a40ab78", "class": "财经/交易"} 5 | {"event_type": "财经/交易-降息", "role_list": [{"role": "时间"}, {"role": "降息幅度"}, {"role": "降息机构"}], "id": "f401096ac991ca80a5a1b7a64ab31e93", "class": "财经/交易"} 6 | {"event_type": "财经/交易-融资", "role_list": [{"role": "时间"}, {"role": "跟投方"}, {"role": "领投方"}, {"role": "融资轮次"}, {"role": "融资金额"}, {"role": "融资方"}], "id": "2135e350cd9cf62bcfe703d91c14c154", "class": "财经/交易"} 7 | {"event_type": "财经/交易-上市", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "上市企业"}, {"role": "融资金额"}], "id": "e66035ff3b3a4c641f90bfa2dd035ac5", "class": "财经/交易"} 8 | {"event_type": "财经/交易-涨价", "role_list": [{"role": "时间"}, {"role": "涨价幅度"}, {"role": "涨价物"}, {"role": "涨价方"}], "id": "721b14f5ade1427f808c86d31c3fc7bb", "class": "财经/交易"} 9 | {"event_type": "财经/交易-涨停", "role_list": [{"role": "时间"}, {"role": "涨停股票"}], "id": "a4d265039f08a3754d649edf3f814496", "class": "财经/交易"} 10 | {"event_type": "产品行为-发布", "role_list": [{"role": "时间"}, {"role": "发布产品"}, {"role": "发布方"}], "id": "141fdae3c2a8fab0b2fc0a684271a89a", "class": "产品行为"} 11 | {"event_type": "产品行为-获奖", "role_list": [{"role": "时间"}, {"role": "获奖人"}, {"role": "奖项"}, {"role": "颁奖机构"}], "id": "72f2c1bf804f1eb6e2a4effbbcf4c20a", "class": "产品行为"} 12 | {"event_type": "产品行为-上映", "role_list": [{"role": "时间"}, {"role": "上映方"}, {"role": "上映影视"}], "id": "dc6e6ba46a5a358eb6d7b9fb3e215d83", "class": "产品行为"} 13 | {"event_type": "产品行为-下架", "role_list": [{"role": "时间"}, {"role": "下架产品"}, {"role": "被下架方"}, {"role": "下架方"}], "id": "2f098d2e17e7c0ac513a200844706099", "class": "产品行为"} 14 | {"event_type": "产品行为-召回", "role_list": [{"role": "时间"}, {"role": "召回内容"}, {"role": "召回方"}], "id": "c9d9762406d4bb9d4e0b32e91918b2db", "class": "产品行为"} 15 | {"event_type": "交往-道歉", "role_list": [{"role": "时间"}, {"role": "道歉对象"}, {"role": "道歉者"}], "id": "cf30e3b17f49ab98bed0e0fae3656cf9", "class": "交往"} 16 | {"event_type": "交往-点赞", "role_list": [{"role": "时间"}, {"role": "点赞方"}, {"role": "点赞对象"}], "id": "5a6148bb18072b4339b354550ef77383", "class": "交往"} 17 | {"event_type": "交往-感谢", "role_list": [{"role": "时间"}, {"role": "致谢人"}, {"role": "被感谢人"}], "id": "596349b6f65a2b03c7136d6ef9666eeb", "class": "交往"} 18 | {"event_type": "交往-会见", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "会见主体"}, {"role": "会见对象"}], "id": "24a7b25ddc749c77469ef55a28e4b4b5", "class": "交往"} 19 | {"event_type": "交往-探班", "role_list": [{"role": "时间"}, {"role": "探班主体"}, {"role": "探班对象"}], "id": "d34f1f657d9dcb297e33df4f549f16e9", "class": "交往"} 20 | {"event_type": "竞赛行为-夺冠", "role_list": [{"role": "时间"}, {"role": "冠军"}, {"role": "夺冠赛事"}], "id": "7727c52785855f7eb17e39d6f80afcf7", "class": "竞赛行为"} 21 | {"event_type": "竞赛行为-晋级", "role_list": [{"role": "时间"}, {"role": "晋级方"}, {"role": "晋级赛事"}], "id": "5dba3f7656533fe81a35c6e097ac6b69", "class": "竞赛行为"} 22 | {"event_type": "竞赛行为-禁赛", "role_list": [{"role": "时间"}, {"role": "禁赛时长"}, {"role": "被禁赛人员"}, {"role": "禁赛机构"}], "id": "c000e7efed7fc59ea164b9f4e529b4a0", "class": "竞赛行为"} 23 | {"event_type": "竞赛行为-胜负", "role_list": [{"role": "时间"}, {"role": "败者"}, {"role": "胜者"}, {"role": "赛事名称"}], "id": "200c443685340cb6ed5cff000107be8c", "class": "竞赛行为"} 24 | {"event_type": "竞赛行为-退赛", "role_list": [{"role": "时间"}, {"role": "退赛赛事"}, {"role": "退赛方"}], "id": "a8b2c77cb9f138db759e4b7d95564d9c", "class": "竞赛行为"} 25 | {"event_type": "竞赛行为-退役", "role_list": [{"role": "时间"}, {"role": "退役者"}], "id": "24e001c61bd7c0c843e77d91124680d4", "class": "竞赛行为"} 26 | {"event_type": "人生-产子/女", "role_list": [{"role": "时间"}, {"role": "产子者"}, {"role": "出生者"}], "id": "790edd7a17da7f7baf15d449de248ec7", "class": "人生"} 27 | {"event_type": "人生-出轨", "role_list": [{"role": "时间"}, {"role": "出轨方"}, {"role": "出轨对象"}], "id": "1e94e9b3c3b97b42e59d8a61bd79677a", "class": "人生"} 28 | {"event_type": "人生-订婚", "role_list": [{"role": "时间"}, {"role": "订婚主体"}], "id": "d8a3fad5037e54bf68d27356baa7da6b", "class": "人生"} 29 | {"event_type": "人生-分手", "role_list": [{"role": "时间"}, {"role": "分手双方"}], "id": "215b95ed112714ac6c7b01ccd1793bbf", "class": "人生"} 30 | {"event_type": "人生-怀孕", "role_list": [{"role": "时间"}, {"role": "怀孕者"}], "id": "54bbc19bb262b4130e74fd44518cf6dc", "class": "人生"} 31 | {"event_type": "人生-婚礼", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "参礼人员"}, {"role": "结婚双方"}], "id": "6a83ab7fc3246e74cb9f0077a4fd1b1a", "class": "人生"} 32 | {"event_type": "人生-结婚", "role_list": [{"role": "时间"}, {"role": "结婚双方"}], "id": "8179656fa2c0070c76cf41f239213557", "class": "人生"} 33 | {"event_type": "人生-离婚", "role_list": [{"role": "时间"}, {"role": "离婚双方"}], "id": "36715f5329fb4b9a8179abdea20bfbc4", "class": "人生"} 34 | {"event_type": "人生-庆生", "role_list": [{"role": "时间"}, {"role": "生日方"}, {"role": "生日方年龄"}, {"role": "庆祝方"}], "id": "cd265afb02c1782acbad38f14d8d67a4", "class": "人生"} 35 | {"event_type": "人生-求婚", "role_list": [{"role": "时间"}, {"role": "求婚者"}, {"role": "求婚对象"}], "id": "b06504d289fe290eadf2f7581fdd624d", "class": "人生"} 36 | {"event_type": "人生-失联", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "失联者"}], "id": "82e09024db77677249ef5777a2e57ef9", "class": "人生"} 37 | {"event_type": "人生-死亡", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "死者年龄"}, {"role": "死者"}], "id": "4c7a5f3e391033022c0c33577ba08764", "class": "人生"} 38 | {"event_type": "司法行为-罚款", "role_list": [{"role": "时间"}, {"role": "罚款对象"}, {"role": "执法机构"}, {"role": "罚款金额"}], "id": "4e49f8058a8dce6d047e50549a9c472f", "class": "司法行为"} 39 | {"event_type": "司法行为-拘捕", "role_list": [{"role": "时间"}, {"role": "拘捕者"}, {"role": "被拘捕者"}], "id": "d7cc52de2ee66116ba70fb94dc5d23fa", "class": "司法行为"} 40 | {"event_type": "司法行为-举报", "role_list": [{"role": "时间"}, {"role": "举报发起方"}, {"role": "举报对象"}], "id": "31a589aa7ff5dd41148c865c738b2d50", "class": "司法行为"} 41 | {"event_type": "司法行为-开庭", "role_list": [{"role": "时间"}, {"role": "开庭法院"}, {"role": "开庭案件"}], "id": "660adc190000e771efa85c53e0854fa6", "class": "司法行为"} 42 | {"event_type": "司法行为-立案", "role_list": [{"role": "时间"}, {"role": "立案机构"}, {"role": "立案对象"}], "id": "b0f6c47d166f67958235f35123f6c298", "class": "司法行为"} 43 | {"event_type": "司法行为-起诉", "role_list": [{"role": "时间"}, {"role": "被告"}, {"role": "原告"}], "id": "6346b33d56eb75869dda66de7d756232", "class": "司法行为"} 44 | {"event_type": "司法行为-入狱", "role_list": [{"role": "时间"}, {"role": "入狱者"}, {"role": "刑期"}], "id": "62a364a7880cf6a1500c003890da053f", "class": "司法行为"} 45 | {"event_type": "司法行为-约谈", "role_list": [{"role": "时间"}, {"role": "约谈对象"}, {"role": "约谈发起方"}], "id": "f38e66fcfc3022e8a0a0e98ad8069e0f", "class": "司法行为"} 46 | {"event_type": "灾害/意外-爆炸", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "死亡人数"}, {"role": "受伤人数"}], "id": "e553741f9b44264ac15ee83fc45fc093", "class": "灾害/意外"} 47 | {"event_type": "灾害/意外-车祸", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "死亡人数"}, {"role": "受伤人数"}], "id": "65156b790422d2bc8ca371297390c98c", "class": "灾害/意外"} 48 | {"event_type": "灾害/意外-地震", "role_list": [{"role": "时间"}, {"role": "死亡人数"}, {"role": "震级"}, {"role": "震源深度"}, {"role": "震中"}, {"role": "受伤人数"}], "id": "c1d05ed76a6caae74a0e6d5082d405bc", "class": "灾害/意外"} 49 | {"event_type": "灾害/意外-洪灾", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "死亡人数"}, {"role": "受伤人数"}], "id": "8dd6740b165e5924d1fd40de40368eb9", "class": "灾害/意外"} 50 | {"event_type": "灾害/意外-起火", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "死亡人数"}, {"role": "受伤人数"}], "id": "ddfbbcbfecb90f546f60b94cf14769e1", "class": "灾害/意外"} 51 | {"event_type": "灾害/意外-坍/垮塌", "role_list": [{"role": "时间"}, {"role": "坍塌主体"}, {"role": "死亡人数"}, {"role": "受伤人数"}], "id": "e1ad3ed83b77ebee1c17b67cbf423e12", "class": "灾害/意外"} 52 | {"event_type": "灾害/意外-袭击", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "袭击对象"}, {"role": "死亡人数"}, {"role": "袭击者"}, {"role": "受伤人数"}], "id": "aefe734675163030d0663324fff9d632", "class": "灾害/意外"} 53 | {"event_type": "灾害/意外-坠机", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "死亡人数"}, {"role": "受伤人数"}], "id": "a067c7d1074db1fc2ff513defa3d0a4e", "class": "灾害/意外"} 54 | {"event_type": "组织关系-裁员", "role_list": [{"role": "时间"}, {"role": "裁员方"}, {"role": "裁员人数"}], "id": "c5a4d0a926631a9d3d627239b3b2f3df", "class": "组织关系"} 55 | {"event_type": "组织关系-辞/离职", "role_list": [{"role": "时间"}, {"role": "离职者"}, {"role": "原所属组织"}], "id": "b817673c7e431d7f768ac3cd6e731912", "class": "组织关系"} 56 | {"event_type": "组织关系-加盟", "role_list": [{"role": "时间"}, {"role": "加盟者"}, {"role": "所加盟组织"}], "id": "3eb18fa2114856a1b0b727e785a740d2", "class": "组织关系"} 57 | {"event_type": "组织关系-解雇", "role_list": [{"role": "时间"}, {"role": "解雇方"}, {"role": "被解雇人员"}], "id": "e66aa69af22e02bac5f0521cb6155abc", "class": "组织关系"} 58 | {"event_type": "组织关系-解散", "role_list": [{"role": "时间"}, {"role": "解散方"}], "id": "7c2d0577044ae75b781dba1041a37295", "class": "组织关系"} 59 | {"event_type": "组织关系-解约", "role_list": [{"role": "时间"}, {"role": "被解约方"}, {"role": "解约方"}], "id": "0339725f84d17f8da489740b75bdf3a4", "class": "组织关系"} 60 | {"event_type": "组织关系-停职", "role_list": [{"role": "时间"}, {"role": "所属组织"}, {"role": "停职人员"}], "id": "67469db003c52240685d356a9d57c984", "class": "组织关系"} 61 | {"event_type": "组织关系-退出", "role_list": [{"role": "时间"}, {"role": "退出方"}, {"role": "原所属组织"}], "id": "be1bea2d91bfb93a50d76f134982e822", "class": "组织关系"} 62 | {"event_type": "组织行为-罢工", "role_list": [{"role": "时间"}, {"role": "所属组织"}, {"role": "罢工人数"}, {"role": "罢工人员"}], "id": "67d5da3d943a41a6a4d9978f7f459e11", "class": "组织行为"} 63 | {"event_type": "组织行为-闭幕", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "活动名称"}], "id": "ed20f344513cdbdd8481a9288009dc61", "class": "组织行为"} 64 | {"event_type": "组织行为-开幕", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "活动名称"}], "id": "56a4c074cead7b0774e62b4f8ef185b2", "class": "组织行为"} 65 | {"event_type": "组织行为-游行", "role_list": [{"role": "时间"}, {"role": "地点"}, {"role": "游行组织"}, {"role": "游行人数"}], "id": "3d59a1e3effc59dd8cb52562973c4f24", "class": "组织行为"} 66 | -------------------------------------------------------------------------------- /event_extractor/data/zh/test.json: -------------------------------------------------------------------------------- 1 | {"text": "7岁男孩地震遇难,父亲等来噩耗失声痛哭,网友:希望不再有伤亡", "id": "6eab49e6643d5a5f50037be79937d7cb"} 2 | {"text": "4月29日上午,衡阳祁东县人民法院公开开庭审理了被告人郭三平贪污、受贿、恶势力“保护伞”一案,此案系衡阳市首例恶势力“保护伞”案。", "id": "913087c6a608f7ce55ef04b41dbfcbff"} 3 | {"text": "Uber宣布在新一轮裁员中裁减350名员工", "id": "22b7b7070a1659e5b65258864730eb1f"} 4 | {"text": "今日共39股涨停(不包括未开板新股及ST),22股封板未遂,封板率为64%;山鼎设计、天津普林、生意宝3连板,还有7只2连板。", "id": "18ad1a228d782a7a542285bdd725eba6"} 5 | -------------------------------------------------------------------------------- /event_extractor/data/zh/train.json: -------------------------------------------------------------------------------- 1 | {"text": "雀巢裁员4000人:时代抛弃你时,连招呼都不会打!", "id": "409389c96efe78d6af1c86e0450fd2d7", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 2, "arguments": [{"argument_start_index": 0, "role": "裁员方", "argument": "雀巢", "alias": []}, {"argument_start_index": 4, "role": "裁员人数", "argument": "4000人", "alias": []}], "class": "组织关系"}]} 2 | {"text": "美国“未来为”子公司大幅度裁员,这是为什么呢?任正非正式回应", "id": "5aec2b5b759c5f8f42f9c0156eb3c924", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 13, "arguments": [{"argument_start_index": 0, "role": "裁员方", "argument": "美国“未来为”子公司", "alias": []}], "class": "组织关系"}]} 3 | {"text": "这一全球巨头“凉凉” “捅刀”华为后 裁员5000 现市值缩水800亿", "id": "82c4db0b0b209565485a1776b6f1b580", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 19, "arguments": [{"argument_start_index": 21, "role": "裁员人数", "argument": "5000", "alias": []}], "class": "组织关系"}]} 4 | {"text": "被证实将再裁员1800人 AT&T在为落后的经营模式买单", "id": "1f0eac3455f94c9d93dbacc92bbf4aec", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 5, "arguments": [{"argument_start_index": 7, "role": "裁员人数", "argument": "1800人", "alias": []}, {"argument_start_index": 13, "role": "裁员方", "argument": "AT&T", "alias": []}], "class": "组织关系"}]} 5 | {"text": "又一网约车巨头倒下:三个月裁员835名员工,滴滴又该何去何从", "id": "d5b9a75b8d1dd37f07667ed72bf69c4f", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 13, "arguments": [{"argument_start_index": 15, "role": "裁员人数", "argument": "835名员工", "alias": []}], "class": "组织关系"}]} 6 | {"text": "8月20日消息,据腾讯新闻《一线》报道,知情人士表示,为了控制成本支出,蔚来计划将美国分公司的人员规模除自动驾驶业务相关人员外,减少至200人左右。截至美国时间8月16日,蔚来位于美国硅谷的分公司已裁减100名员工。", "id": "e0bf1fc191ed8fe21d92628126cf4cd0", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁减", "trigger_start_index": 99, "arguments": [{"argument_start_index": 80, "role": "时间", "argument": "8月16日", "alias": []}, {"argument_start_index": 86, "role": "裁员方", "argument": "蔚来", "alias": []}, {"argument_start_index": 101, "role": "裁员人数", "argument": "100名员工", "alias": []}], "class": "组织关系"}]} 7 | {"text": "最近,一位前便利蜂员工就因公司违规裁员,将便利蜂所在的公司虫极科技(北京)有限公司告上法庭。", "id": "8cc3728844e456fd1c30f4df17cb179b", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 17, "arguments": [{"argument_start_index": 21, "role": "裁员方", "argument": "便利蜂所在的公司虫极科技(北京)有限公司", "alias": []}], "class": "组织关系"}, {"event_type": "司法行为-起诉", "trigger": "告上法庭", "trigger_start_index": 41, "arguments": [{"argument_start_index": 0, "role": "时间", "argument": "最近", "alias": []}, {"argument_start_index": 3, "role": "原告", "argument": "一位前便利蜂员工", "alias": []}, {"argument_start_index": 21, "role": "被告", "argument": "便利蜂所在的公司虫极科技(北京)有限公司", "alias": []}], "class": "司法行为"}]} 8 | {"text": "思科上海大规模裁员 人均可获赔100万 官方澄清事实", "id": "0b624aa1a02202c0defd572e3a1f942c", "event_list": [{"event_type": "组织关系-裁员", "trigger": "裁员", "trigger_start_index": 7, "arguments": [{"argument_start_index": 0, "role": "裁员方", "argument": "思科上海", "alias": []}], "class": "组织关系"}]} 9 | -------------------------------------------------------------------------------- /event_extractor/dataprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/dataprocess/__init__.py -------------------------------------------------------------------------------- /event_extractor/dataprocess/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/dataprocess/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /event_extractor/dataprocess/__pycache__/data_loader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/dataprocess/__pycache__/data_loader.cpython-36.pyc -------------------------------------------------------------------------------- /event_extractor/dataprocess/__pycache__/process_en.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/dataprocess/__pycache__/process_en.cpython-36.pyc -------------------------------------------------------------------------------- /event_extractor/dataprocess/data_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys, os 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | import json 8 | import numpy as np 9 | import importlib 10 | import random 11 | import math 12 | 13 | 14 | def load_sentences(path, lang, seq_len): 15 | global processor_module 16 | processor_module = importlib.import_module('event_extractor.dataprocess.process_{}'.format(lang)) 17 | sentences = [] 18 | with open(path, "r") as f: 19 | for line in f: 20 | line_dict = json.loads(line.strip()) 21 | word_sentence, seg_sentence = processor_module.get_seg_features(line_dict["text"]) 22 | if len(word_sentence) > seq_len: 23 | continue 24 | line_dict["words"] = word_sentence 25 | line_dict["segs"] = seg_sentence 26 | sentences.append(line_dict) 27 | return sentences 28 | 29 | 30 | def create_dico(item_list): 31 | assert type(item_list) is list 32 | dico = {} 33 | for items in item_list: 34 | for item in items: 35 | if item not in dico: 36 | dico[item] = 1 37 | else: 38 | dico[item] += 1 39 | return dico 40 | 41 | 42 | def create_mapping(dico): 43 | sorted_items = sorted(dico.items(), key=lambda x: (-x[1], x[0])) 44 | id_to_item = {i: v[0] for i, v in enumerate(sorted_items)} 45 | item_to_id = {v: k for k, v in id_to_item.items()} 46 | return item_to_id, id_to_item 47 | 48 | 49 | def word_mapping(sentences): 50 | words = [[x.lower() for x in s["words"]] for s in sentences] 51 | word_dico = create_dico(words) 52 | word_dico[""] = 100000001 53 | word_dico[''] = 100000000 54 | word_to_id, id_to_word = create_mapping(word_dico) 55 | print("Found %i unique words (%i in total)" % ( 56 | len(word_dico), sum(len(x) for x in words) 57 | )) 58 | 59 | return word_dico, word_to_id, id_to_word 60 | 61 | 62 | def seg_mapping(sentences): 63 | segs = [[x.lower() for x in s["segs"]] for s in sentences] 64 | seg_dico = create_dico(segs) 65 | seg_dico[""] = 100000001 66 | seg_dico[''] = 100000000 67 | seg_to_id, id_to_seg = create_mapping(seg_dico) 68 | print("Found %i unique segs (%i in total)" % ( 69 | len(seg_dico), sum(len(x) for x in segs) 70 | )) 71 | return seg_dico, seg_to_id, id_to_seg 72 | 73 | 74 | def load_schema(schema_file): 75 | id2eventtype, eventtype2id, id2role, role2id = {}, {}, {}, {} 76 | with open(schema_file, "r") as f: 77 | for line in f: 78 | line_dict = json.loads(line) 79 | event_type = line_dict["event_type"] 80 | if event_type not in eventtype2id: 81 | event_type_id = len(eventtype2id) 82 | id2eventtype[event_type_id] = event_type 83 | eventtype2id[event_type] = event_type_id 84 | 85 | for role_dict in line_dict["role_list"]: 86 | role_name = role_dict["role"] 87 | if role_name not in role2id: 88 | role_id = len(role2id) 89 | role2id[role_name] = role_id 90 | id2role[role_id] = role_name 91 | return id2eventtype, eventtype2id, id2role, role2id 92 | 93 | 94 | def seq_padding(X, padding=0): 95 | L = [len(x) for x in X] 96 | ML = max(L) 97 | return np.array([ 98 | np.concatenate([x, [padding] * (ML - len(x))]) if len(x) < ML else x for x in X 99 | ]) 100 | 101 | 102 | def prepare_dataset(sentences, eventtype2id, role2id, word2id, seg2id): 103 | features = [] 104 | for idx, sentence_info in enumerate(sentences): 105 | text_words = sentence_info['words'] 106 | text_segs = sentence_info["segs"] 107 | text = sentence_info["text"] 108 | s1, s2 = np.zeros((len(text_words), len(eventtype2id))), np.zeros((len(text_words), len(eventtype2id))) 109 | for event_info in sentence_info["event_list"]: 110 | trigger_word = event_info["trigger"] 111 | trigger_word_split, _ = processor_module.get_seg_features(trigger_word) 112 | event_type = event_info["event_type"] 113 | trigger_start_index = event_info["trigger_start_index"] 114 | s1[trigger_start_index][eventtype2id[event_type]] = 1 115 | s2[trigger_start_index + len(trigger_word_split) - 1][eventtype2id[event_type]] = 1 116 | 117 | if s1.max() == 0: 118 | continue 119 | 120 | for event_info in sentence_info["event_list"]: 121 | trigger_word = event_info["trigger"] 122 | trigger_word_split, _ = processor_module.get_seg_features(trigger_word) 123 | trigger_start_index = event_info["trigger_start_index"] 124 | k1 = trigger_start_index 125 | k2 = trigger_start_index + len(trigger_word_split) - 1 126 | 127 | o1, o2 = np.zeros((len(text_words), len(role2id))), np.zeros((len(text_words), len(role2id))) 128 | for event_argument in event_info["arguments"]: 129 | argument_word = event_argument["argument"] 130 | argument_word_split, _ = processor_module.get_seg_features(argument_word) 131 | argument_role = event_argument["role"] 132 | 133 | argument_start_index = event_argument["argument_start_index"] 134 | 135 | o1[argument_start_index][role2id[argument_role]] = 1 136 | o2[argument_start_index + len(argument_word_split) - 1][role2id[argument_role]] = 1 137 | features.append( 138 | [text, [word2id.get(word, 1) for word in text_words], [seg2id.get(seg, 1) for seg in text_segs], s1, 139 | s2, [k1], [k2], o1, o2]) 140 | return features 141 | 142 | 143 | class BatchManager(object): 144 | def __init__(self, data, batch_size, num_events, num_roles, is_sorted=True): 145 | self.batch_data = self.sort_and_pad(data, batch_size, num_events, num_roles, is_sorted) 146 | self.len_data = len(self.batch_data) 147 | 148 | def sort_and_pad(self, data, batch_size, num_events, num_roles, is_sorted): 149 | num_batch = int(math.ceil(len(data) / batch_size)) 150 | if is_sorted: 151 | sorted_data = sorted(data, key=lambda x: len(x[1])) 152 | else: 153 | sorted_data = data 154 | batch_data = list() 155 | for i in range(num_batch): 156 | batch_data.append(self.pad_data(sorted_data[i * batch_size: (i + 1) * batch_size], num_events, num_roles)) 157 | return batch_data 158 | 159 | @staticmethod 160 | def pad_data(data, num_events, num_roles): 161 | TEXTS, T1, T2, S1, S2, K1, K2, O1, O2 = [], [], [], [], [], [], [], [], [] 162 | for line in data: 163 | text, t1, t2, s1, s2, k1, k2, o1, o2 = line 164 | TEXTS.append(text) 165 | T1.append(t1) 166 | T2.append(t2) 167 | S1.append(s1) 168 | S2.append(s2) 169 | K1.append(k1) 170 | K2.append(k2) 171 | O1.append(o1) 172 | O2.append(o2) 173 | 174 | T1 = seq_padding(T1) 175 | T2 = seq_padding(T2) 176 | S1 = seq_padding(S1, np.zeros(num_events)) 177 | S2 = seq_padding(S2, np.zeros(num_events)) 178 | O1 = seq_padding(O1, np.zeros(num_roles)) 179 | O2 = seq_padding(O2, np.zeros(num_roles)) 180 | K1, K2 = np.array(K1), np.array(K2) 181 | return [TEXTS, T1, T2, S1, S2, K1, K2, O1, O2] 182 | 183 | def iter_batch(self, shuffle=False): 184 | if shuffle: 185 | random.shuffle(self.batch_data) 186 | for idx in range(self.len_data): 187 | yield self.batch_data[idx] 188 | -------------------------------------------------------------------------------- /event_extractor/dataprocess/process_en.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | 3 | 4 | def get_seg_features(text): 5 | word_sentence = [] 6 | seg_sentence = [] 7 | for word in nltk.word_tokenize(text): 8 | word = word.lower() 9 | word_sentence.append(word) 10 | seg_sentence.append(word) 11 | 12 | return word_sentence, seg_sentence 13 | 14 | 15 | def replace_first_substr(sentence, word): 16 | start_pos = sentence.index(word) 17 | start_str = sentence[:start_pos] 18 | end_str = sentence[start_pos + len(word):] 19 | return start_str + "-" * len(word) + end_str 20 | 21 | 22 | def joint_output_str(sentence, i, j): 23 | word_sentence = [] 24 | for word in nltk.word_tokenize(sentence): 25 | word_sentence.append(word) 26 | return " ".join(word_sentence[i:j + 1]) 27 | -------------------------------------------------------------------------------- /event_extractor/dataprocess/process_zh.py: -------------------------------------------------------------------------------- 1 | import jieba 2 | 3 | 4 | def get_seg_features(text): 5 | word_sentence = [] 6 | seg_sentence = [] 7 | for word in jieba.cut(text): 8 | word = word.lower() 9 | for ch in word: 10 | word_sentence.append(ch) 11 | seg_sentence.append(word) 12 | 13 | return word_sentence, seg_sentence 14 | 15 | 16 | def joint_output_str(sentence, i, j): 17 | return sentence[i:j + 1] 18 | -------------------------------------------------------------------------------- /event_extractor/db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/db.sqlite3 -------------------------------------------------------------------------------- /event_extractor/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/model/__init__.py -------------------------------------------------------------------------------- /event_extractor/model/dgcnn.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 4 | import torch 5 | import torch.nn as nn 6 | import torch.autograd as autograd 7 | import numpy as np 8 | import json 9 | 10 | 11 | class EventModule(nn.Module): 12 | def __init__(self, params, word_size, seg_size, num_events, num_roles): 13 | super(EventModule, self).__init__() 14 | self.params = params 15 | self.word_embeds = nn.Embedding(word_size, self.params["emb_dim"]) 16 | self.seg_embeds = nn.Embedding(seg_size, self.params["emb_dim"]) 17 | self.s1_position_embeds = nn.Embedding(self.params["seq_len"], self.params["emb_dim"]) 18 | self.k1_position_embeds = nn.Embedding(self.params["seq_len"], self.params["emb_dim"]) 19 | self.k2_position_embeds = nn.Embedding(self.params["seq_len"], self.params["emb_dim"]) 20 | self.dropout = nn.Dropout(p=self.params["drop_out"]) 21 | 22 | self.dgc_m1 = DilatedGatedConv1d(self.params["emb_dim"], 1) 23 | self.dgc_m2 = DilatedGatedConv1d(self.params["emb_dim"], 2) 24 | self.dgc_m3 = DilatedGatedConv1d(self.params["emb_dim"], 5) 25 | self.dgc_m4 = DilatedGatedConv1d(self.params["emb_dim"], 1) 26 | self.dgc_m5 = DilatedGatedConv1d(self.params["emb_dim"], 2) 27 | self.dgc_m6 = DilatedGatedConv1d(self.params["emb_dim"], 5) 28 | self.dgc_m7 = DilatedGatedConv1d(self.params["emb_dim"], 1) 29 | self.dgc_m8 = DilatedGatedConv1d(self.params["emb_dim"], 2) 30 | self.dgc_m9 = DilatedGatedConv1d(self.params["emb_dim"], 5) 31 | self.dgc_m10 = DilatedGatedConv1d(self.params["emb_dim"], 1) 32 | self.dgc_m11 = DilatedGatedConv1d(self.params["emb_dim"], 1) 33 | self.dgc_m12 = DilatedGatedConv1d(self.params["emb_dim"], 1) 34 | 35 | self.pn1_dense = Dense(self.params["emb_dim"], word_size) 36 | self.pn1_out_dense = Dense(word_size, 1, "sigmoid") 37 | 38 | self.pn2_dense = Dense(self.params["emb_dim"], word_size) 39 | self.pn2_out_dense = Dense(word_size, 1, "sigmoid") 40 | 41 | self.trigger_attention = SelfAttention(8, 16, self.params["emb_dim"]) 42 | 43 | self.trigger_h_conv = nn.Conv1d(in_channels=8 * 16 + self.params["emb_dim"], out_channels=word_size, 44 | kernel_size=3, padding=1) 45 | 46 | self.ps_dense = Dense(word_size, 1, "sigmoid") 47 | self.ps1_dense = Dense(word_size, num_events, "sigmoid") 48 | self.ps2_dense = Dense(word_size, num_events, "sigmoid") 49 | 50 | self.pc_dense = Dense(self.params["emb_dim"], word_size) 51 | self.pc_out_dense = Dense(word_size, num_roles, "sigmoid") 52 | 53 | self.k_gru_model = nn.GRU(input_size=self.params["emb_dim"], hidden_size=self.params["emb_dim"], 54 | bidirectional=True, 55 | batch_first=True) 56 | 57 | self.argument_attention = SelfAttention(8, 16, self.params["emb_dim"]) 58 | 59 | self.argument_h_conv = nn.Conv1d(in_channels=8 * 16 + self.params["emb_dim"] * 3, out_channels=word_size, 60 | kernel_size=3, padding=1) 61 | 62 | self.po_dense = Dense(word_size, 1, "sigmoid") 63 | self.po1_dense = Dense(word_size, num_roles, "sigmoid") 64 | self.po2_dense = Dense(word_size, num_roles, "sigmoid") 65 | 66 | self.s1_loss_model = nn.BCELoss(reduction="none") 67 | self.s2_loss_model = nn.BCELoss(reduction="none") 68 | 69 | self.o1_loss_model = nn.BCELoss(reduction="none") 70 | self.o2_loss_model = nn.BCELoss(reduction="none") 71 | 72 | self.word_size = word_size 73 | self.seg_size = seg_size 74 | self.num_events = num_events 75 | self.num_roles = num_roles 76 | 77 | self.batch_size = 1 78 | self.word_seq_length = 1 79 | 80 | def set_batch_seq_size(self, sentence): 81 | tmp = sentence.size() 82 | self.batch_size = tmp[0] 83 | self.word_seq_length = tmp[1] 84 | 85 | def rand_init_seg_embedding(self): 86 | nn.init.uniform_(self.seg_embeds.weight, -0.25, 0.25) 87 | 88 | def rand_init_word_embedding(self): 89 | nn.init.uniform_(self.word_embeds.weight, -0.25, 0.25) 90 | 91 | def rand_init_s1_position_embedding(self): 92 | nn.init.uniform_(self.s1_position_embeds.weight, -0.25, 0.25) 93 | 94 | def rand_init_k1_position_embedding(self): 95 | nn.init.uniform_(self.k1_position_embeds.weight, -0.25, 0.25) 96 | 97 | def rand_init_k2_position_embedding(self): 98 | nn.init.uniform_(self.k2_position_embeds.weight, -0.25, 0.25) 99 | 100 | def set_optimizer(self): 101 | if self.params["update"] == 'sgd': 102 | optimizer = torch.optim.SGD(self.parameters(), lr=self.params["lr"], momentum=self.params["momentum"]) 103 | elif self.params["update"] == 'adam': 104 | optimizer = torch.optim.Adam(self.parameters(), lr=self.params["lr"]) 105 | return optimizer 106 | 107 | def to_scalar(self, var): 108 | return var.view(-1).data.tolist()[0] 109 | 110 | def clip_grad_norm(self): 111 | nn.utils.clip_grad_norm_(self.parameters(), self.params["clip_grad"]) 112 | 113 | def adjust_learning_rate(self, optimizer): 114 | torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) 115 | # for param_group in optimizer.param_groups: 116 | # param_group['lr'] = lr 117 | 118 | def save_checkpoint(self, state, data_map, params, filename): 119 | with open(filename + '_map.json', 'w') as f: 120 | f.write(json.dumps(data_map, ensure_ascii=False)) 121 | with open(filename + '_params.json', 'w') as f: 122 | f.write(json.dumps(params, ensure_ascii=False)) 123 | torch.save(state, filename + '.model') 124 | 125 | def load_checkpoint_file(self, model_path): 126 | checkpoint_file = torch.load(model_path, map_location=lambda storage, loc: storage) 127 | self.load_state_dict(checkpoint_file['state_dict']) 128 | 129 | def seq_maxpool(self, x): 130 | """seq是[None, seq_len, s_size]的格式, 131 | mask是[None, seq_len, 1]的格式,先除去mask部分, 132 | 然后再做maxpooling。 133 | """ 134 | seq, mask = x 135 | seq_out = seq - (1 - mask) * 1e10 136 | return torch.max(seq_out, 1, keepdims=True) 137 | 138 | def seq_gather(self, x): 139 | """seq是[None, seq_len, s_size]的格式, 140 | idxs是[None, 1]的格式,在seq的第i个序列中选出第idxs[i]个向量, 141 | 最终输出[None, s_size]的向量。 142 | """ 143 | seq, idxs = x 144 | # batch_idxs = [idxs for _ in range(seq.shape[0])] 145 | idxs = torch.unsqueeze(idxs.long(), -1) 146 | batch_idxs = idxs.expand(idxs.shape[0], idxs.shape[1], seq.shape[-1]) 147 | seq_index = torch.gather(seq, 1, batch_idxs) 148 | seq_index_out = seq_index.view(seq_index.shape[0], seq_index.shape[-1]) 149 | return seq_index_out 150 | 151 | def position_id(self, x): 152 | if isinstance(x, list) and len(x) == 2: 153 | x, r = x 154 | else: 155 | r = 0 156 | 157 | pid = torch.unsqueeze(torch.arange(x.shape[1]), 0).repeat([x.shape[0], 1]) 158 | return torch.abs(pid - r) 159 | 160 | def trigger_model_forward(self, t1, t2): 161 | t1 = torch.LongTensor(t1) 162 | t2 = torch.LongTensor(t2) 163 | 164 | self.set_batch_seq_size(t1) 165 | 166 | mask_temp = torch.unsqueeze(t1, 2) 167 | mask_temp0 = torch.zeros(mask_temp.size()) 168 | mask_temp1 = torch.ones(mask_temp.size()) 169 | mask = torch.where(mask_temp > 0, mask_temp1, mask_temp0).float() 170 | 171 | pid = self.position_id(t1) 172 | pv = self.s1_position_embeds(pid) 173 | 174 | t1_embed = self.word_embeds(t1) # 0: padding, 1: unk 175 | t2_embed = self.seg_embeds(t2) 176 | t_embed = t1_embed + t2_embed + pv 177 | t_dropout = self.dropout(t_embed) 178 | 179 | # t = [t, mask] 180 | t_mask = t_dropout * mask 181 | t_dg1 = self.dgc_m1(t_mask, mask) 182 | t_dg2 = self.dgc_m2(t_dg1, mask) 183 | t_dg3 = self.dgc_m3(t_dg2, mask) 184 | t_dg4 = self.dgc_m4(t_dg3, mask) 185 | t_dg5 = self.dgc_m5(t_dg4, mask) 186 | t_dg6 = self.dgc_m6(t_dg5, mask) 187 | t_dg7 = self.dgc_m7(t_dg6, mask) 188 | t_dg8 = self.dgc_m8(t_dg7, mask) 189 | t_dg9 = self.dgc_m9(t_dg8, mask) 190 | t_dg10 = self.dgc_m10(t_dg9, mask) 191 | t_dg11 = self.dgc_m11(t_dg10, mask) 192 | t_dgout = self.dgc_m12(t_dg11, mask) 193 | 194 | pn1 = self.pn1_dense(t_dgout) 195 | pn1_out = self.pn1_out_dense(pn1) 196 | 197 | pn2 = self.pn2_dense(t_dgout) 198 | pn2_out = self.pn2_out_dense(pn2) 199 | 200 | h = self.trigger_attention([t_dgout, t_dgout, t_dgout, mask]) 201 | h_cat = torch.cat((t_dgout, h), dim=-1) 202 | 203 | h_conv = self.trigger_h_conv(h_cat.permute(0, 2, 1)).permute(0, 2, 1) 204 | 205 | ps = self.ps_dense(h_conv) 206 | ps1 = self.ps1_dense(h_conv) 207 | ps2 = self.ps2_dense(h_conv) 208 | 209 | ps1_out = ps * ps1 * pn1_out 210 | ps2_out = ps * ps2 * pn2_out 211 | return ps1_out, ps2_out, pn1_out, pn2_out, t_dgout, mask 212 | 213 | def argument_model_forward(self, k1, k2, pn1_out, pn2_out, t_dgout, mask): 214 | k1 = torch.LongTensor(k1) 215 | k2 = torch.LongTensor(k2) 216 | 217 | t_max, _ = self.seq_maxpool([t_dgout, mask]) 218 | pc = self.pc_dense(t_max) 219 | pc_out = self.pc_out_dense(pc) 220 | 221 | def get_k_inter(x, n=6): 222 | seq, k1, k2 = x 223 | k_inter = [torch.round(k2 - (k2 - k1) * (a / (n - 1.))) for a in np.arange(n)] 224 | k_inter_gather = [self.seq_gather([seq, k]) for k in k_inter] 225 | k_inter_unsqueeze = [torch.unsqueeze(k, 1) for k in k_inter_gather] 226 | k_inter_out = torch.cat(tuple(k_inter_unsqueeze), 1) 227 | return k_inter_out 228 | 229 | k = get_k_inter([t_dgout, k1, k2]) 230 | k_gru, _ = self.k_gru_model(k) 231 | k_last = k_gru[:, -1, :] 232 | k1v = self.k1_position_embeds(self.position_id([t_dgout, k1])) 233 | k2v = self.k2_position_embeds(self.position_id([t_dgout, k2])) 234 | kv = torch.cat((k1v, k2v), dim=-1) 235 | k_out = torch.unsqueeze(k_last, 1) + kv 236 | 237 | h_o = self.argument_attention([t_dgout, t_dgout, t_dgout, mask]) 238 | h_o_cat = torch.cat((t_dgout, h_o, k_out), dim=-1) 239 | h_o_conv = self.argument_h_conv(h_o_cat.permute(0, 2, 1)).permute(0, 2, 1) 240 | po = self.po_dense(h_o_conv) 241 | po1 = self.po1_dense(h_o_conv) 242 | po2 = self.po2_dense(h_o_conv) 243 | po1_out = po * po1 * pc_out * pn1_out 244 | po2_out = po * po2 * pc_out * pn2_out 245 | return po1_out, po2_out 246 | 247 | def forward(self, t1, t2, s1, s2, k1, k2, o1, o2): 248 | 249 | s1 = torch.LongTensor(s1) 250 | s2 = torch.LongTensor(s2) 251 | 252 | o1 = torch.LongTensor(o1) 253 | o2 = torch.LongTensor(o2) 254 | 255 | ps1_out, ps2_out, pn1_out, pn2_out, t_dgout, mask = self.trigger_model_forward(t1, t2) 256 | 257 | po1_out, po2_out = self.argument_model_forward(k1, k2, pn1_out, pn2_out, t_dgout, mask) 258 | 259 | # s1_expand = torch.unsqueeze(s1, 2) 260 | # s2_expand = torch.unsqueeze(s2, 2) 261 | 262 | s1_loss = torch.sum(self.s1_loss_model(ps1_out, s1.float()), 2, keepdims=True) 263 | s1_loss_sum = torch.sum(s1_loss * mask) / torch.sum(mask) 264 | s2_loss = torch.sum(self.s2_loss_model(ps2_out, s2.float()), 2, keepdims=True) 265 | s2_loss_sum = torch.sum(s2_loss * mask) / torch.sum(mask) 266 | 267 | o1_loss = torch.sum(self.o1_loss_model(po1_out, o1.float()), 2, keepdims=True) 268 | o1_loss_sum = torch.sum(o1_loss * mask) / torch.sum(mask) 269 | o2_loss = torch.sum(self.o2_loss_model(po2_out, o2.float()), 2, keepdims=True) 270 | o2_loss_sum = torch.sum(o2_loss * mask) / torch.sum(mask) 271 | 272 | loss = (s1_loss_sum + s2_loss_sum) + (o1_loss_sum + o2_loss_sum) 273 | 274 | return loss 275 | 276 | 277 | class Dense(nn.Module): 278 | ''' 279 | dense层 280 | ''' 281 | 282 | def __init__(self, input_size, out_size, activation="relu"): 283 | super(Dense, self).__init__() 284 | self.linear_layer = nn.Linear(input_size, out_size) 285 | if activation == "sigmoid": 286 | self.active_layer = nn.Sigmoid() 287 | else: 288 | self.active_layer = nn.ReLU() 289 | 290 | def forward(self, input_tensor): 291 | linear_result = self.linear_layer(input_tensor) 292 | return self.active_layer(linear_result) 293 | 294 | 295 | class DilatedGatedConv1d(nn.Module): 296 | ''' 297 | 膨胀门卷积 298 | ''' 299 | 300 | def __init__(self, dim, dilation_rate): 301 | super(DilatedGatedConv1d, self).__init__() 302 | self.dim = dim 303 | self.conv1d = nn.Conv1d(in_channels=dim, out_channels=dim * 2, 304 | kernel_size=3, dilation=dilation_rate, padding=dilation_rate) 305 | self.dropout_dgc = nn.Dropout(p=0.1) 306 | 307 | def _gate(self, x): 308 | s, h1 = x 309 | g1, h_inner = h1[:, :, :self.dim], h1[:, :, self.dim:] 310 | g2 = self.dropout_dgc(g1) 311 | g = torch.sigmoid(g2) 312 | return g * s + (1 - g) * h_inner 313 | 314 | def forward(self, seq, mask): 315 | convert_seq = seq.permute(0, 2, 1) 316 | h = self.conv1d(convert_seq).permute(0, 2, 1) 317 | seq_gate = self._gate([seq, h]) 318 | seq_out = seq_gate * mask 319 | return seq_out 320 | 321 | 322 | class SelfAttention(nn.Module): 323 | """多头注意力机制 324 | """ 325 | 326 | def __init__(self, nb_head, size_per_head, input_size): 327 | super(SelfAttention, self).__init__() 328 | self.nb_head = nb_head 329 | self.size_per_head = size_per_head 330 | self.out_dim = nb_head * size_per_head 331 | q_in_dim = input_size 332 | k_in_dim = input_size 333 | v_in_dim = input_size 334 | self.q_linear = nn.Linear(q_in_dim, self.out_dim) 335 | self.k_linear = nn.Linear(k_in_dim, self.out_dim) 336 | self.v_linear = nn.Linear(v_in_dim, self.out_dim) 337 | 338 | def mask(self, x, mask, mode='mul'): 339 | if mask is None: 340 | return x 341 | else: 342 | for _ in range(x.dim() - mask.dim()): 343 | mask = torch.unsqueeze(mask, mask.dim()) 344 | if mode == 'mul': 345 | return x * mask 346 | else: 347 | return x - (1 - mask) * 1e10 348 | 349 | def forward(self, inputs): 350 | q, k, v = inputs[:3] 351 | v_mask, q_mask = None, None 352 | if len(inputs) > 3: 353 | v_mask = inputs[3] 354 | if len(inputs) > 4: 355 | q_mask = inputs[4] 356 | # 线性变换 357 | qw1 = self.q_linear(q) 358 | kw1 = self.k_linear(k) 359 | vw1 = self.v_linear(v) 360 | # 形状变换 361 | qw2 = qw1.reshape(-1, qw1.shape[1], self.nb_head, self.size_per_head) 362 | kw2 = kw1.reshape(-1, kw1.shape[1], self.nb_head, self.size_per_head) 363 | vw2 = vw1.reshape(-1, vw1.shape[1], self.nb_head, self.size_per_head) 364 | # 维度置换 365 | qw = qw2.permute(0, 2, 1, 3) 366 | kw = kw2.permute(0, 2, 1, 3) 367 | vw = vw2.permute(0, 2, 1, 3) 368 | # Attention 369 | a1 = torch.matmul(qw, kw.permute(0, 1, 3, 2)) / self.size_per_head ** 0.5 370 | a2 = a1.permute(0, 3, 2, 1) 371 | a3 = self.mask(a2, v_mask, 'add') 372 | a4 = a3.permute(0, 3, 2, 1) 373 | a = torch.softmax(a4, dim=-1) 374 | # 完成输出 375 | o1 = torch.matmul(a, vw) 376 | o2 = o1.permute(0, 2, 1, 3) 377 | o3 = o2.reshape(-1, o2.shape[1], self.out_dim) 378 | o = self.mask(o3, q_mask, 'mul') 379 | return o 380 | -------------------------------------------------------------------------------- /event_extractor/predict/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/predict/__init__.py -------------------------------------------------------------------------------- /event_extractor/predict/event_pipeline.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | import json 9 | import importlib 10 | import numpy as np 11 | import re 12 | 13 | 14 | class EventExtractor(object): 15 | def __init__(self, lang): 16 | self.lang = lang 17 | self.max_word_count = 500 18 | 19 | self.processor_module = importlib.import_module('event_extractor.dataprocess.process_{}'.format(lang)) 20 | 21 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 22 | arg_path = os.path.join(BASE_DIR, "checkpoint", "event_params.json") 23 | map_path = os.path.join(BASE_DIR, "checkpoint", "event_map.json") 24 | model_path = os.path.join(BASE_DIR, "checkpoint", "event.model") 25 | 26 | with open(arg_path, 'r') as f: 27 | jd = json.load(f) 28 | self.params = jd['params'] 29 | with open(map_path, "r") as f: 30 | self.event_map = json.load(f) 31 | 32 | # self.params["batch_size"] = 1 33 | 34 | event_module = importlib.import_module( 35 | "event_extractor.model.{}".format(self.params["model_name"])) 36 | self.event_model = event_module.EventModule(self.params, len(self.event_map["word_to_id"]), 37 | len(self.event_map["seg_to_id"]), 38 | len(self.event_map["eventtype2id"]), len(self.event_map["role2id"])) 39 | 40 | self.event_model.load_checkpoint_file(model_path) 41 | self.event_model.eval() 42 | 43 | def get_event_result(self, input_json): 44 | sentences = re.split("。|?|!|;|;|,|.|\\?", input_json["text"]) 45 | event_result = {"id": input_json["id"], "event_list": []} 46 | 47 | for sentence in sentences: 48 | word_sentence, seg_sentence = self.processor_module.get_seg_features(sentence) 49 | t1 = [[self.event_map["word_to_id"].get(word, 1) for word in word_sentence]] 50 | t2 = [[self.event_map["seg_to_id"].get(seg, 1) for seg in seg_sentence]] 51 | ps1_out, ps2_out, pn1_out, pn2_out, t_dgout, mask = self.event_model.trigger_model_forward(t1, t2) 52 | ps1_out_array = ps1_out.detach().numpy() 53 | ps2_out_array = ps2_out.detach().numpy() 54 | k1s = np.where(ps1_out_array > 0.4)[1] 55 | k2s = np.where(ps2_out_array > 0.3)[1] 56 | for i in k1s: 57 | j = k2s[k2s >= i] 58 | if len(j) > 0: 59 | j = j[0] 60 | event_info = {} 61 | event_info["trigger"] = self.processor_module.joint_output_str(sentence, i, j) 62 | k1s_t = ps1_out_array[0][i] 63 | k2s_t = ps2_out_array[0][j] 64 | k1s_t_max_index = np.argmax(k1s_t) 65 | k2s_t_max_index = np.argmax(k2s_t) 66 | if k1s_t_max_index == k2s_t_max_index: 67 | event_info["event_type"] = self.event_map["id2eventtype"][str(k1s_t_max_index)] 68 | event_info["arguments"] = [] 69 | his_argument = [] 70 | po1_out, po2_out = self.event_model.argument_model_forward([i], [j], pn1_out, pn2_out, 71 | t_dgout, mask) 72 | 73 | po1_out_array = po1_out.detach().numpy() 74 | po2_out_array = po2_out.detach().numpy() 75 | k1o = np.where(po1_out_array > 0.3)[1] 76 | k2o = np.where(po2_out_array > 0.2)[1] 77 | for m in k1o: 78 | n = k2o[k2o >= m] 79 | if len(n) > 0: 80 | n = n[0] 81 | k1o_a = po1_out_array[0][m] 82 | k2o_a = po2_out_array[0][n] 83 | k1o_a_max_index = np.argmax(k1o_a) 84 | k2o_a_max_index = np.argmax(k2o_a) 85 | argument_key = "{}_{}".format(m, n) 86 | if k1o_a_max_index == k2o_a_max_index and argument_key not in his_argument: 87 | his_argument.append(argument_key) 88 | event_info["arguments"].append( 89 | {"role": self.event_map["id2role"][str(k1o_a_max_index)], 90 | "argument": self.processor_module.joint_output_str(sentence, m, n)}) 91 | event_result["event_list"].append(event_info) 92 | 93 | return event_result 94 | 95 | 96 | if __name__ == '__main__': 97 | model = EventExtractor(lang="zh") 98 | model = EventExtractor(lang="en") 99 | # lang = input("input the language:") 100 | # text = input("input the text:") 101 | print(model.get_event_result( 102 | { 103 | "text": "朱隽表奏了孙坚、刘备的功勋。刘备因朝中无人说情,被封为中山府安喜县县尉。不久,督邮来到安喜。刘备因没有向督邮送钱被督邮陷害。刘备几次到馆驿求见督邮,都被看门人拦住,拒于门外。", 104 | "id": "0aaab8985832ef9d062eab12ec82f6cf"})) 105 | 106 | print(model.get_event_result( 107 | { 108 | "text": "About 500 militants attacked the village of Masteri in the West Darfur state on Saturday afternoon, More than 60 people have been killed and another 60 injured in violence in Sudan's West Darfur region.", 109 | "id": "0aaab8985832ef9d062eab12ec82f6cf"})) 110 | -------------------------------------------------------------------------------- /event_extractor/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for MedicalKGService project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.11.7. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.11/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | # Quick-start development settings - unsuitable for production 19 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ 20 | 21 | # SECURITY WARNING: keep the secret key used in production secret! 22 | SECRET_KEY = 'dyl__+t5bgbw1+t$mopa16koo*w#)q3nudlwv*r((ftp-1bdm)' 23 | 24 | # SECURITY WARNING: don't run with debug turned on in production! 25 | DEBUG = True 26 | 27 | ALLOWED_HOSTS = ['*'] 28 | 29 | # Application definition 30 | 31 | INSTALLED_APPS = [ 32 | 'django.contrib.admin', 33 | 'django.contrib.auth', 34 | 'django.contrib.contenttypes', 35 | 'django.contrib.sessions', 36 | 'django.contrib.messages', 37 | 'django.contrib.staticfiles', 38 | 'corsheaders', 39 | ] 40 | 41 | MIDDLEWARE = [ 42 | 'django.middleware.security.SecurityMiddleware', 43 | 'django.contrib.sessions.middleware.SessionMiddleware', 44 | 'corsheaders.middleware.CorsMiddleware', 45 | 'django.middleware.common.CommonMiddleware', 46 | # 'django.middleware.csrf.CsrfViewMiddleware', 47 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 48 | 'django.contrib.messages.middleware.MessageMiddleware', 49 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 50 | ] 51 | 52 | ROOT_URLCONF = 'event_extractor.urls' 53 | 54 | TEMPLATES = [ 55 | { 56 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 57 | 'DIRS': [os.path.join(BASE_DIR, 'templates')] 58 | , 59 | 'APP_DIRS': True, 60 | 'OPTIONS': { 61 | 'context_processors': [ 62 | 'django.template.context_processors.debug', 63 | 'django.template.context_processors.request', 64 | 'django.contrib.auth.context_processors.auth', 65 | 'django.contrib.messages.context_processors.messages', 66 | ], 67 | }, 68 | }, 69 | ] 70 | 71 | WSGI_APPLICATION = 'event_extractor.wsgi.application' 72 | 73 | # Database 74 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases 75 | 76 | DATABASES = { 77 | 'default': { 78 | 'ENGINE': 'django.db.backends.sqlite3', 79 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 80 | } 81 | } 82 | 83 | # Password validation 84 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators 85 | 86 | AUTH_PASSWORD_VALIDATORS = [ 87 | { 88 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 89 | }, 90 | { 91 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 92 | }, 93 | { 94 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 95 | }, 96 | { 97 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 98 | }, 99 | ] 100 | 101 | # Internationalization 102 | # https://docs.djangoproject.com/en/1.11/topics/i18n/ 103 | 104 | LANGUAGE_CODE = 'en-us' 105 | 106 | TIME_ZONE = 'UTC' 107 | 108 | USE_I18N = True 109 | 110 | USE_L10N = True 111 | 112 | USE_TZ = True 113 | 114 | # Static files (CSS, JavaScript, Images) 115 | # https://docs.djangoproject.com/en/1.11/howto/static-files/ 116 | 117 | STATIC_URL = '/html/' 118 | 119 | CORS_ALLOW_CREDENTIALS = True 120 | CORS_ORIGIN_ALLOW_ALL = True 121 | CORS_ORIGIN_WHITELIST = () 122 | 123 | CORS_ALLOW_METHODS = ( 124 | 'DELETE', 125 | 'GET', 126 | 'OPTIONS', 127 | 'PATCH', 128 | 'POST', 129 | 'PUT', 130 | 'VIEW', 131 | ) 132 | 133 | CORS_ALLOW_HEADERS = ( 134 | 'accept', 135 | 'accept-encoding', 136 | 'authorization', 137 | 'content-type', 138 | 'dnt', 139 | 'origin', 140 | 'user-agent', 141 | 'x-csrftoken', 142 | 'x-requested-with', 143 | ) 144 | -------------------------------------------------------------------------------- /event_extractor/test.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | file_path = "/Users/mac/Desktop/dev_data.json" 4 | with open(file_path, "r") as f: 5 | for line in f: 6 | line_dict = json.loads(line) 7 | subjects = [] 8 | for spo in line_dict["spo_list"]: 9 | subject = spo["subject"] 10 | if subject not in subjects: 11 | subjects.append(subject) 12 | if len(subjects) > 1: 13 | print(line) 14 | -------------------------------------------------------------------------------- /event_extractor/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/train/__init__.py -------------------------------------------------------------------------------- /event_extractor/train/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/train/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /event_extractor/train/__pycache__/eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenking2020/event_extract_master/6b8d470d2caa5ec6785eae07bca04e66fb3734b7/event_extractor/train/__pycache__/eval.cpython-36.pyc -------------------------------------------------------------------------------- /event_extractor/train/eval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def evaluate(event_model, dev_manager): 5 | event_model.eval() 6 | 7 | A, B, C = 1e-10, 1e-10, 1e-10 8 | 9 | for batch in dev_manager.iter_batch(shuffle=True): 10 | text, t1, t2, s1, s2, k1, k2, o1, o2 = batch 11 | ps1_out, ps2_out, pn1_out, pn2_out, t_dgout, mask = event_model.trigger_model_forward(t1, t2) 12 | po1_out, po2_out = event_model.argument_model_forward(k1, k2, pn1_out, pn2_out, t_dgout, mask) 13 | 14 | s1_pre = ps1_out.detach().numpy() 15 | s1_pre = np.where(s1_pre > 0.4, 1, 0) 16 | s1 = s1.astype(np.int) 17 | A += np.sum(s1 & s1_pre) 18 | B += np.sum(s1_pre) 19 | C += np.sum(s1) 20 | 21 | s2_pre = ps2_out.detach().numpy() 22 | s2_pre = np.where(s2_pre > 0.3, 1, 0) 23 | s2 = s2.astype(np.int) 24 | A += np.sum(s2 & s2_pre) 25 | B += np.sum(s2_pre) 26 | C += np.sum(s2) 27 | 28 | o1_pre = po1_out.detach().numpy() 29 | o1_pre = np.where(o1_pre > 0.3, 1, 0) 30 | o1 = o1.astype(np.int) 31 | A += np.sum(o1 & o1_pre) 32 | B += np.sum(o1_pre) 33 | C += np.sum(o1) 34 | 35 | o2_pre = po2_out.detach().numpy() 36 | o2_pre = np.where(o2_pre > 0.2, 1, 0) 37 | o2 = o2.astype(np.int) 38 | A += np.sum(o2 & o2_pre) 39 | B += np.sum(o2_pre) 40 | C += np.sum(o2) 41 | 42 | return 2 * A / (B + C), A / B, A / C 43 | -------------------------------------------------------------------------------- /event_extractor/train/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys, os 4 | 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | 7 | from event_extractor.dataprocess import data_loader 8 | from event_extractor.train.eval import evaluate 9 | import importlib 10 | import time 11 | 12 | 13 | class TrainProcess(object): 14 | def __init__(self, params): 15 | self.params = params 16 | 17 | def load_data(self): 18 | # ToDo 暂时从本地读取文件,以后改成从库中读取,暂时按照本地文件分训练、验证和测试,以后改成自动切分 19 | data_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "data", 20 | self.params["lang"]) 21 | self.all_train_sentences = data_loader.load_sentences(os.path.join(data_path, "train.json"), 22 | self.params["lang"], self.params["seq_len"]) 23 | self.all_dev_sentences = data_loader.load_sentences(os.path.join(data_path, "dev.json"), self.params["lang"], 24 | self.params["seq_len"]) 25 | 26 | _w, self.word_to_id, self.id_to_word = data_loader.word_mapping(self.all_train_sentences) 27 | _s, self.seg_to_id, self.id_to_seg = data_loader.seg_mapping(self.all_train_sentences) 28 | 29 | self.id2eventtype, self.eventtype2id, self.id2role, self.role2id = data_loader.load_schema( 30 | os.path.join(data_path, "event_schema.json")) 31 | 32 | train_data = data_loader.prepare_dataset(self.all_train_sentences, self.eventtype2id, self.role2id, 33 | self.word_to_id, self.seg_to_id) 34 | dev_data = data_loader.prepare_dataset(self.all_dev_sentences, self.eventtype2id, self.role2id, 35 | self.word_to_id, self.seg_to_id) 36 | 37 | self.train_manager = data_loader.BatchManager(train_data, self.params["batch_size"], len(self.eventtype2id), 38 | len(self.role2id), is_sorted=True) 39 | self.dev_manager = data_loader.BatchManager(dev_data, self.params["batch_size"], len(self.eventtype2id), 40 | len(self.role2id), is_sorted=True) 41 | 42 | def train(self): 43 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 44 | model_path = os.path.join(BASE_DIR, "checkpoint") 45 | 46 | event_module = importlib.import_module( 47 | "event_extractor.model.{}".format(self.params["model_name"])) 48 | event_model = event_module.EventModule(self.params, len(self.word_to_id), len(self.seg_to_id), 49 | len(self.eventtype2id), len(self.role2id)) 50 | event_model.rand_init_word_embedding() 51 | event_model.rand_init_seg_embedding() 52 | event_model.rand_init_s1_position_embedding() 53 | event_model.rand_init_k1_position_embedding() 54 | event_model.rand_init_k2_position_embedding() 55 | 56 | optimizer = event_model.set_optimizer() 57 | # tot_length = len(self.all_train_sentences) 58 | print("has train data: {}".format(len(self.all_train_sentences))) 59 | print("has dev data: {}".format(len(self.all_dev_sentences))) 60 | 61 | best_f1 = float('-inf') 62 | best_f1_epoch = 0 63 | start_time = time.time() 64 | patience_count = 0 65 | 66 | for epoch_idx in range(self.params["epoch"]): 67 | event_model.train() 68 | print("-------------------------------------------------------------------------------------") 69 | epoch_loss = 0 70 | iter_step = 0 71 | for batch in self.train_manager.iter_batch(shuffle=True): 72 | text, t1, t2, s1, s2, k1, k2, o1, o2 = batch 73 | iter_step += 1 74 | step_start_time = time.time() 75 | event_model.zero_grad() 76 | loss = event_model(t1, t2, s1, s2, k1, k2, o1, o2) 77 | epoch_loss += event_model.to_scalar(loss) 78 | loss.backward() 79 | # event_model.clip_grad_norm() 80 | optimizer.step() 81 | print("epoch: %s, current step: %s, current loss: %.4f time use: %s" % ( 82 | epoch_idx, iter_step, loss / len(t1), 83 | time.time() - step_start_time)) 84 | epoch_loss /= iter_step 85 | # update lr 86 | event_model.adjust_learning_rate(optimizer) 87 | 88 | f1, p, r = evaluate(event_model, self.dev_manager) 89 | print("dev: f1: {}, p: {}, r: {}".format(f1, p, r)) 90 | 91 | if f1 >= best_f1: 92 | best_f1 = f1 93 | best_f1_epoch = epoch_idx 94 | patience_count = 0 95 | print('best average f1: %.4f in epoch_idx: %d , saving...' % (best_f1, best_f1_epoch)) 96 | 97 | try: 98 | event_model.save_checkpoint({ 99 | 'epoch': epoch_idx, 100 | 'state_dict': event_model.state_dict(), 101 | 'optimizer': optimizer.state_dict()}, { 102 | 'word_to_id': self.word_to_id, 103 | 'id_to_word': self.id_to_word, 104 | 'seg_to_id': self.seg_to_id, 105 | 'id_to_seg': self.id_to_seg, 106 | "id2eventtype": self.id2eventtype, 107 | "eventtype2id": self.eventtype2id, 108 | "id2role": self.id2role, 109 | "role2id": self.role2id 110 | }, {'params': self.params}, 111 | os.path.join(model_path, 'event')) 112 | except Exception as inst: 113 | print(inst) 114 | else: 115 | patience_count += 1 116 | print( 117 | 'poor current average f1: %.4f, best average f1: %.4f in epoch_idx: %d' % ( 118 | f1, best_f1, best_f1_epoch)) 119 | 120 | print('epoch: ' + str(epoch_idx) + '\t in ' + str(self.params["epoch"]) + ' take: ' + str( 121 | time.time() - start_time) + ' s') 122 | 123 | if patience_count >= self.params["patience"] and epoch_idx >= self.params["least_iters"]: 124 | break 125 | 126 | 127 | if __name__ == '__main__': 128 | # train_d = TrainProcess( 129 | # {"task_name": "event_test", "lang": "zh", "model_name": "dgcnn", "batch_size": 32, "epochs": 200, 130 | # "seq_len": 500, "emb_dim": 128, "drop_out": 0.25, 131 | # "update": "adam", "lr": 0.0001, "lr_decay": 0.05, "clip_grad": 5, "epoch": 500, "patience": 15, 132 | # "least_iters": 50, "gpu": -1}) 133 | # train_d.load_data() 134 | # train_d.train() 135 | 136 | train_d = TrainProcess( 137 | {"task_name": "event_test", "lang": "en", "model_name": "dgcnn", "batch_size": 32, "epochs": 200, 138 | "seq_len": 500, "emb_dim": 128, "drop_out": 0.25, 139 | "update": "adam", "lr": 0.0001, "lr_decay": 0.05, "clip_grad": 5, "epoch": 500, "patience": 15, 140 | "least_iters": 50, "gpu": -1}) 141 | train_d.load_data() 142 | train_d.train() 143 | -------------------------------------------------------------------------------- /event_extractor/urls.py: -------------------------------------------------------------------------------- 1 | """MedicalKGService URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url 17 | from . import views 18 | 19 | urlpatterns = [ 20 | url(r'^apis/create_train$', views.create_train), 21 | url(r'^apis/event_extract$', views.event_extract), 22 | ] 23 | -------------------------------------------------------------------------------- /event_extractor/views.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 4 | 5 | from django.http import HttpResponse, JsonResponse 6 | import json 7 | from event_extractor.train.train import TrainProcess 8 | from event_extractor.predict.event_pipeline import EventExtractor 9 | 10 | lang_model = {} 11 | 12 | 13 | def index(request): 14 | return HttpResponse("index page") 15 | 16 | 17 | def create_train(request): 18 | if request.method != "POST": 19 | return JsonResponse({"success": "false", "message": "request method must via POST!"}) 20 | req_body = request.body.decode("utf-8") 21 | receive_json = json.loads(req_body) 22 | params = receive_json["params"] 23 | algo_name = params["algo_type"] 24 | result_dict = {} 25 | result_dict["success"] = "True" 26 | result_dict["message"] = "Successfully!" 27 | result_dict["result_list"] = {} 28 | if algo_name == "entity": 29 | # try: 30 | entity_instance = TrainProcess(params) 31 | entity_instance.load_data() 32 | entity_instance.train() 33 | # except: 34 | # result_dict["message"] = "failed!" 35 | ret_obj = JsonResponse(result_dict, safe=False) 36 | return ret_obj 37 | 38 | 39 | def event_extract(request): 40 | if request.method != "POST": 41 | return JsonResponse({"success": "false", "message": "request method must via POST!"}) 42 | req_body = request.body.decode("utf-8") 43 | receive_json = json.loads(req_body) 44 | lang = receive_json["language"] 45 | model_id = receive_json["model_id"] 46 | if lang not in lang_model: 47 | lang_model[lang] = EventExtractor(lang=lang, model_id=model_id) 48 | result_dict = {} 49 | result_dict["success"] = "True" 50 | result_dict["message"] = "Successfully!" 51 | result_dict["result_list"] = lang_model[lang].get_event_result(receive_json) 52 | 53 | ret_obj = JsonResponse(result_dict, safe=False) 54 | return ret_obj 55 | -------------------------------------------------------------------------------- /event_extractor/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for MedicalKGService project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "event_extractor.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "event_extractor.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | except ImportError: 16 | raise ImportError( 17 | "Couldn't import Django. Are you sure it's installed and " 18 | "available on your PYTHONPATH environment variable? Did you " 19 | "forget to activate a virtual environment?" 20 | ) 21 | raise 22 | execute_from_command_line(sys.argv) 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.2.4 2 | jieba==0.42.1 3 | Django==2.1.7 4 | django-cors-headers==3.2.1 5 | torch==1.3.0 6 | tqdm==4.48.0 7 | --------------------------------------------------------------------------------