├── .gitattributes ├── .gitignore ├── .idea ├── ee-2019-baseline-master.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.md ├── all_chars_me.json ├── ccksDemo ├── .idea │ ├── ccksDemo.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── __pycache__ │ └── app.cpython-35.pyc ├── app.py ├── best_model.weights ├── static │ ├── ccks.ico │ ├── ccks.png │ ├── description.png │ ├── logo.jpg │ ├── register-login.css │ ├── style1.css │ └── test.html.bak ├── templates │ └── test.html └── utils │ ├── __pycache__ │ └── extract_entity.cpython-35.pyc │ ├── all_chars_me.json │ ├── best_model.weights │ ├── classes.json │ ├── event_type_entity_extract_train.csv │ ├── extract_entity.py │ └── random_order_train.json ├── classes.json ├── ee-2019-baseline-master ├── .idea │ ├── ee-2019-baseline-master.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── best_model.weights ├── data │ ├── event_type_entity_extract_eval.csv │ └── event_type_entity_extract_train.csv ├── ee.py └── result.txt ├── random_order_train.json └── 运行结果.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | *.js linguist-language=python 2 | *.css linguist-language=python 3 | *.html linguist-language=python 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /venv 2 | -------------------------------------------------------------------------------- /.idea/ee-2019-baseline-master.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | write 78 | def 79 | test 80 | test_data 81 | D 82 | D[ 83 | train_D 84 | train_model 85 | Input 86 | Embedding 87 | expand_dims 88 | class2id 89 | Attention 90 | add_loss 91 | argmax 92 | Concatenate 93 | weights 94 | Evaluate 95 | data_generator 96 | 97 | 98 | 99 | 101 | 102 | 107 | 108 | 109 | 110 | 111 | true 112 | DEFINITION_ORDER 113 | 114 | 115 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 176 | 177 | 178 | 179 | 180 | 193 | 194 | 195 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 1558920358166 225 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 261 | 262 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | -------------------------------------------------------------------------------- /ccksDemo/__pycache__/app.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/__pycache__/app.cpython-35.pyc -------------------------------------------------------------------------------- /ccksDemo/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, request, flash, jsonify, redirect, url_for, session 2 | from utils import extract_entity 3 | 4 | app = Flask(__name__) 5 | 6 | 7 | @app.route('/', methods=['GET', 'POST']) 8 | def test(): 9 | if request.method == 'GET': 10 | result = 'NaN' 11 | return render_template('test.html', result=result) 12 | else: 13 | comments = request.form.get('comments') 14 | print(comments) 15 | lst = comments.split(',', 1) 16 | 17 | print(lst) 18 | 19 | s = u'"%s"' % (extract_entity.extract_entity_self(lst[0].replace('\t', ''), lst[1])) 20 | print(s) 21 | return render_template('test.html', result=s) 22 | 23 | 24 | if __name__ == '__main__': 25 | app.run() 26 | -------------------------------------------------------------------------------- /ccksDemo/best_model.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/best_model.weights -------------------------------------------------------------------------------- /ccksDemo/static/ccks.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/static/ccks.ico -------------------------------------------------------------------------------- /ccksDemo/static/ccks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/static/ccks.png -------------------------------------------------------------------------------- /ccksDemo/static/description.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/static/description.png -------------------------------------------------------------------------------- /ccksDemo/static/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/static/logo.jpg -------------------------------------------------------------------------------- /ccksDemo/static/register-login.css: -------------------------------------------------------------------------------- 1 | /* 2 | create by zrong.me 3 | 请大家关注我的个人微博 说Z先生爱你 4 | 也请大家关注我的个人网站 zrong.me 5 | */ 6 | *{ 7 | padding: 0; 8 | margin: 0; 9 | list-style: none; 10 | text-decoration: none; 11 | } 12 | html,body{ 13 | height: 100%; 14 | width: 100%; 15 | font-family: 'Helvetica Neue',Helvetica,'PingFang SC','Hiragino Sans GB','Microsoft YaHei',Arial,sans-serif; 16 | color: #555; 17 | font-size: 15px; 18 | line-height: 1.7; 19 | } 20 | input:focus{ 21 | outline: none; 22 | } 23 | canvas{ 24 | display:block; 25 | vertical-align:bottom; 26 | } 27 | #box{ 28 | width: 100%; 29 | height: 100%; 30 | background-color: #F7FAFC; 31 | background-image: url(''); 32 | background-size: cover; 33 | background-position: 50% 50%; 34 | background-repeat: no-repeat; 35 | position: fixed; 36 | top: 0; 37 | left: 0; 38 | right: 0; 39 | bottom: 0; 40 | z-index: 1; 41 | } 42 | .cent-box{ 43 | width: 300px; 44 | height: 440px; 45 | vertical-align: middle; 46 | white-space: normal; 47 | margin: 0 auto; 48 | position: absolute; 49 | z-index: 2; 50 | left: 50%; 51 | top: 50%; 52 | margin-left: -150px; 53 | margin-top: -220px; 54 | } 55 | .register-box{ 56 | height: 490px; 57 | margin-top: -270px; 58 | } 59 | .cent-box-header{ 60 | text-align: center; 61 | } 62 | .hide{ 63 | font: 0/0 a; 64 | color: transparent; 65 | text-shadow: none; 66 | background-color: transparent; 67 | border: 0; 68 | } 69 | .cent-box-header .main-title{ 70 | width: 80px; 71 | height: 74px; 72 | margin: 0 auto; 73 | background: url('../images/logo220.png') no-repeat; 74 | background-size: contain; 75 | } 76 | .cent-box-header .sub-title{ 77 | margin: 30px 0 20px; 78 | font-weight: 400; 79 | font-size: 18px; 80 | line-height: 1; 81 | } 82 | .clearfix:before{ 83 | content: ''; 84 | display: table; 85 | } 86 | .index-tab{ 87 | text-align: center; 88 | font-size: 18px; 89 | margin-bottom: 10px; 90 | } 91 | .index-tab .index-slide-nav{ 92 | display: inline-block; 93 | position: relative; 94 | } 95 | .index-tab .index-slide-nav a{ 96 | float: left; 97 | width: 4em; 98 | line-height: 35px; 99 | opacity: 0.7; 100 | -webkit-transition: opacity .15s,color .15s; 101 | transition: opacity .15s,color .15s; 102 | color: #555; 103 | } 104 | .index-tab .index-slide-nav a:hover{ 105 | color: #0f88eb; 106 | opacity: 1; 107 | } 108 | .index-tab .index-slide-nav a.active{ 109 | opacity: 1; 110 | color: #0f88eb; 111 | } 112 | .slide-bar{ 113 | position: absolute; 114 | left: 0; 115 | bottom: 0; 116 | margin: 0 .8em; 117 | width: 2.4em; 118 | height: 2px; 119 | background: #0f88eb; 120 | } 121 | .slide-bar1{ 122 | left: 4em; 123 | } 124 | .form{ 125 | float: none; 126 | margin: auto; 127 | text-align: left; 128 | width: 300px; 129 | } 130 | .form .group{ 131 | padding: 1px 0; 132 | border: 1px solid #d5d5d5; 133 | border-radius: 3px; 134 | } 135 | .form .group .group-ipt{ 136 | position: relative; 137 | margin: 0; 138 | overflow: hidden; 139 | } 140 | .form .group .group-ipt input{ 141 | padding: 1em .8em; 142 | width: 100%; 143 | box-sizing: border-box; 144 | border: 0; 145 | border-radius: 0; 146 | box-shadow: none; 147 | background: rgba(255,255,255,0.5); 148 | font-family: 'Microsoft Yahei'; 149 | color: #666; 150 | position: relative; 151 | } 152 | #password,#verify,#user,#password1{ 153 | border-top: 1px solid #e8e8e8; 154 | } 155 | .imgcode{ 156 | width: 95px; 157 | position: absolute; 158 | right: 0; 159 | top: 2px; 160 | cursor: pointer; 161 | height: 40px; 162 | } 163 | .button{ 164 | margin-top:18px; 165 | } 166 | #button{ 167 | width: 100%; 168 | background: #0f88eb; 169 | box-shadow: none; 170 | border: 0; 171 | border-radius: 3px; 172 | line-height: 41px; 173 | color: #fff; 174 | display: block; 175 | font-size: 15px; 176 | cursor: pointer; 177 | font-family: 'Microsoft Yahei'; 178 | } 179 | #button:hover{ 180 | background: #80c3f7; 181 | } 182 | .remember{ 183 | margin-top: 10px; 184 | line-height: 30px; 185 | } 186 | .remember label{ 187 | display: block; 188 | } 189 | .remember-me{ 190 | font-size: 14px; 191 | float: left; 192 | position: relative; 193 | cursor: pointer; 194 | } 195 | .icon{ 196 | width: 11px; 197 | height: 11px; 198 | display: block; 199 | border: 1px solid #ccc; 200 | float: left; 201 | margin-top: 8px; 202 | margin-right: 5px; 203 | cursor: pointer; 204 | } 205 | .zt{ 206 | width: 9px; 207 | height: 9px; 208 | background: #0f88eb; 209 | margin: 1px; 210 | display: block; 211 | } 212 | #remember-me{ 213 | position: absolute; 214 | left: 0; 215 | top: 8px; 216 | opacity: 0; 217 | cursor: pointer; 218 | } 219 | .forgot-password{ 220 | float: right; 221 | font-size: 14px; 222 | } 223 | .forgot-password a{ 224 | color: #555; 225 | } 226 | .forgot-password a:hover{ 227 | text-decoration: underline; 228 | } 229 | .footer{ 230 | position: fixed; 231 | width: 100%; 232 | height: 40px; 233 | bottom: 0; 234 | left: 0; 235 | text-align: center; 236 | color: #999; 237 | z-index: 2; 238 | padding-bottom: 10px; 239 | font-size: 13px; 240 | } 241 | .footer a{ 242 | color: #666; 243 | text-decoration: underline; 244 | } 245 | -------------------------------------------------------------------------------- /ccksDemo/static/style1.css: -------------------------------------------------------------------------------- 1 | @charset "utf-8"; 2 | /* CSS Document */ 3 | /* 重置开始 */ 4 | *{ padding:0; margin:0;} 5 | blockquote, body, button, dd, dl, dt, fieldset, form, h1, h2, h3, h4, h5, h6, hr, input, legend, li, ol, p, pre, td, textarea, th, ul { 6 | margin: 0; 7 | padding: 0 8 | } 9 | body, button, input, select, textarea { 10 | font-family: "微软雅黑"; 11 | } 12 | 13 | address, cite, dfn, em, i, var { 14 | font-style: normal 15 | } 16 | code, kbd, pre, samp { 17 | font-family: courier new, courier, monospace 18 | } 19 | small { 20 | font-size: 12px 21 | } 22 | ol, ul { 23 | list-style: none; 24 | } 25 | a { 26 | text-decoration: none; 27 | cursor: pointer 28 | } 29 | a:hover { 30 | text-decoration: none 31 | } 32 | sup { 33 | vertical-align: text-top 34 | } 35 | sub { 36 | vertical-align: text-bottom 37 | } 38 | legend { 39 | color: #000 40 | } 41 | fieldset{ 42 | border: 0; 43 | display:block; 44 | } 45 | button, input, select, textarea { 46 | font-size: 100% 47 | } 48 | table { 49 | border-spacing: 0; 50 | border-collapse: collapse 51 | } 52 | article, aside, audio, details, figcaption, figure, footer, header, hgroup, mark, menu, nav, section, summary, time, video { 53 | display: block; 54 | margin: 0; 55 | padding: 0 56 | } 57 | mark { 58 | background: #ff0 59 | } 60 | a:active, a:focus { 61 | outline: 0; 62 | zoom:expression(this.style.zoom=1, this.hideFocus=true) 63 | } 64 | /*重置结束*/ 65 | .wapper_box{ min-width:1200px; overflow:hidden; background:#eeeeee;} 66 | /*头部开始*/ 67 | .header_box{ width:100%; 68 | height:116px;} 69 | .header_box1{ width:100%; 70 | height:116px; 71 | background:#fefefe; 72 | position:absolute; 73 | left:0; 74 | top:0; 75 | box-shadow:0px 4px 14px #666; 76 | z-index:600; } 77 | .header_box2{ width:1200px; 78 | margin:0 auto;} 79 | .logo_box{ display:block; 80 | margin-top:20px; 81 | float:left;} 82 | .nav_box{ width:902px; 83 | height:116px; 84 | float:left; 85 | margin-left:50px; 86 | margin-top:50px;} 87 | .nav_box .nav_1{float:right; position:relative;} 88 | .nav_but{ font-size:16px; 89 | color:black; 90 | margin-left:22px; 91 | padding:48px 14px;} 92 | .nav_but:hover{ background:#003d79; 93 | color:white;} 94 | .nav_slide{ width:150px; 95 | position:absolute; 96 | left:22px; 97 | display:none; 98 | top:66px; 99 | z-index:800;} 100 | .nav_slide1{ width:140px; 101 | background:white; 102 | border-bottom:3px solid #e0e2e3;} 103 | .nav_slide a{ padding:17px 20px 17px 15px; 104 | text-align:center; 105 | margin-left:20px; 106 | width:64px; 107 | color:#8c8c8c; 108 | display:block; 109 | position:relative; 110 | background:url(../images/siade_bg.png) no-repeat left center; 111 | border-bottom:1px solid #eeeeee; } 112 | .on{} 113 | .nav_slide span{ position:absolute; 114 | right:-31px; 115 | top:20px; 116 | display:none;} 117 | .nav_slide a:hover{ color:#003d79; 118 | background:none;} 119 | .nav_slide a:hover span{ display:block;} 120 | /*头部结束*/ 121 | /***********************************首页banner开始**********/ 122 | .banner_box { width:100%; height:100%; margin:0 auto; position: relative; overflow:hidden; background:url(../images/banner_shadow.png) no-repeat center 616px;} 123 | .banner_pic li img{ display:block; width:100%; height:100%px;} 124 | /*数字按钮样式*/ 125 | .banner_box .num { overflow:hidden; height: 25px; position: absolute; bottom:10px; right:24%; zoom:1; z-index:3 } 126 | .banner_box .num li { width: 20px; height: 20px; line-height: 20px; text-align: center; font-weight: 400; font-family: "微软雅黑", Arial; color: #FFFFFF; background: #afaeae; margin-right: 10px; border-radius:50%; cursor:pointer; float: left; } 127 | .banner_box .num li.on { background: #003d79; } /*当前项*/ 128 | 129 | /*上一个 下一个*/ 130 | .banner_box .prev, 131 | .banner_box .next { display: none; width: 40px; height: 100px; background: url(../images/btn.png) no-repeat; position: absolute; top:42%;} 132 | .banner_box .prev { left:2%; } 133 | .banner_box .next { right:2%; background-position: right; } 134 | /**************************首页banner图结束***************/ 135 | /****************************************************************************************************************************************************/ 136 | /***************************二级页banner图开始************/ 137 | .banner_box1 { width:100%; height:313px; margin:0 auto; position: relative; overflow:hidden; background:url(../images/banner_shadow.png) no-repeat center 616px;} 138 | .banner_pic1 li img{ display:block; width:100%; height:313px;} 139 | /*数字按钮样式*/ 140 | .banner_box1 .num { overflow:hidden; height: 25px; position: absolute; bottom:10px; right:24%; zoom:1; z-index:3 } 141 | .banner_box1 .num li { width: 20px; height: 20px; line-height: 20px; text-align: center; font-weight: 400; font-family: "微软雅黑", Arial; color: #FFFFFF; background: #afaeae; margin-right: 10px; border-radius:50%; cursor:pointer; float: left; } 142 | .banner_box1 .num li.on { background: #003d79; } /*当前项*/ 143 | 144 | /*上一个 下一个*/ 145 | .banner_box1 .prev, 146 | .banner_box1 .next { display: none; width: 40px; height: 100px; background: url(../images/btn.png) no-repeat; position: absolute; top:42%;} 147 | .banner_box1 .prev { left:2%; } 148 | .banner_box1 .next { right:2%; background-position: right; } 149 | /***************************二级页banner图结束************/ 150 | /****************************************************************************************************************************************************/ 151 | /***********************底部开始*************************/ 152 | .footer_box{ width:100%; 153 | height:44px; 154 | background:#696969;} 155 | .footer_box p{ width:1200px; margin:0 auto;text-align:center; line-height:42px; color:white;} 156 | /*********************底部结束*************************/ 157 | /****************************************************************************************************************************************************/ 158 | /************************首页内容开始************************************/ 159 | .content_box{ width:80%; 160 | margin:0 auto;} 161 | .content_box_top{ width:1200px; 162 | height:100px; 163 | margin:30px auto 35px;} 164 | .content_box_top1{ width:1200px; 165 | height:0px; 166 | margin:30px auto 35px;} 167 | .content_box_top li{ width:180px; text-align:center; float:left; margin:0 60px;} 168 | .content_box_top li a{ width:82px; height:82px; display:block; margin:0 auto;} 169 | .content_box_top li img{ border-radius:100px; 170 | -moz-border-radius:100px; 171 | -webkit-border-radius:100px; 172 | -mos-border-radius:100px; 173 | -o-border-radius:100px; 174 | transition:All 1s ease-in-out; 175 | -ms-transition:All 1s ease-in-out; 176 | -webkit-transition:All 1s ease-in-out; 177 | -moz-transition:All 1s ease-in-out; 178 | -o-transition:All 1s ease-in-out;} 179 | .content_box_top li:hover img{ border:1px solid #003d79; 180 | transform:rotateY(360deg); 181 | -webkit-transform:rotateY(360deg); 182 | -moz-transform:rotateY(360deg); 183 | -o-transform:rotateY(360deg); 184 | -ms-transform:rotateY(360deg);} 185 | .obj_title{ font-size:16px; 186 | color:#666666;} 187 | .content_box_top li:hover .obj_title{ color:#003d79;} 188 | .content_box_top li p{ font-size:12px; 189 | color:#666666; 190 | line-height:18px;} 191 | .content_box_bot{ width:95%; 192 | height:400px; 193 | background:#f9f9f9; 194 | margin:0 auto 34px; 195 | padding:20px 30px; 196 | overflow:hidden; 197 | border:1px red solid; 198 | box-shadow:0 0 6px #CCC; 199 | border:1px solid white;} 200 | .content_box_bot1{ width:100%; 201 | height:800px; 202 | background:#f9f9f9; 203 | margin:0 auto 34px; 204 | padding:20px 30px; 205 | overflow:hidden; 206 | border:1px red solid; 207 | box-shadow:0 0 6px #CCC; 208 | border:1px solid white;} 209 | 210 | 211 | .content_box_bot_l{ width:300px; 212 | height:330px; 213 | float:left;} 214 | .content_box_bot_r{ width:10px; 215 | height:100%; 216 | float:right;} 217 | .info_news{ width:300px; 218 | height:160px; 219 | margin-bottom:6px; 220 | position:relative;} 221 | .info_news_but{ overflow:hidden; border-bottom:1px solid #e6e6e6; height:38px; margin-bottom:6px;} 222 | .info_news_but p{ font-size:16px; 223 | text-align:center; 224 | line-height:38px; 225 | float:left; 226 | width:96px; 227 | height:38px;} 228 | .on{ background:#e6e6e6; color:#003d79;} 229 | .on1{ color:#003d79;} 230 | .info_news_text{ width:300px; 231 | height:120px;} 232 | .info_news_text1{ position:absolute; left:0; top:45px; z-index:30;} 233 | .info_news_text2{ position:absolute; left:0; top:45px; display:none; z-index:20;} 234 | .info_news_text li{ overflow:hidden; height:28px; line-height:28px; width:552px;} 235 | .info_news_text li a{ color:#666666;} 236 | .info_news_text li:hover a{ color:#232323; text-decoration:underline;} 237 | .info_news_text li img{ float:left; margin-top:12px; margin-right:4px} 238 | .info_news_text li p{ float:left; overflow:hidden; width:300px; font-size:12px;} 239 | .info_news_text li span.date{ float:right; display:block;} 240 | .search_obj_title{ font-size:16px; color:#005e11; margin-top:10px;} 241 | .search_obj_title span{ font-size:12px; color:#999999; margin-left:8px;} 242 | .search_obj{width:560px; 243 | height:160px;} 244 | .search_obj1{ width:540px; 245 | height:106px; 246 | margin-top:20px; 247 | overflow:hidden;} 248 | .search_obj a{ width:161px; 249 | height:46px; 250 | display:block; 251 | float:left; 252 | margin:5px 8px; 253 | text-align:center; 254 | line-height:46px; 255 | border:1px solid #e6e6e6; 256 | color:#b9b9b9; 257 | background:url(../images/but_bg.png) repeat-x;} 258 | .search_obj a:hover{ box-shadow:0 0 4px #CCC; 259 | -webkit-box-shadow:0 0 4px #CCC; 260 | -o-box-shadow:0 0 4px #CCC; 261 | -mos-box-shadow:0 0 4px #CCC;} 262 | .content_box_bot_r{ width:496px; 263 | height:250px; 264 | float:right;} 265 | .jianjie_box{ width:600px; 266 | height:130px;} 267 | .jianjie_title{ font-size:16px; color:#003d79;} 268 | .jianjie_title span{ font-size:12px; color:#999999; margin-left:6px;} 269 | .jianjie_box1{ width:495px; 270 | height:96px; 271 | margin-top:16px; 272 | overflow:hidden;} 273 | .jianjie_box1 img{ float:left; margin-top:6px;} 274 | .jianjie_box1 a{ float:left; 275 | display:block; 276 | width:364px; 277 | color:#919191; 278 | margin-left:10px; 279 | font-size:12px; 280 | line-height:24px; } 281 | .jianjie_box1 a span{ color:#003d79;} 282 | .jianjie_box1 a:hover{ text-decoration:underline;} 283 | .zichan_box{ width:496px; 284 | height:103px; 285 | margin-top:40px;} 286 | .zichan_box p{ font-size:16px; 287 | color:#005e11; 288 | margin-top:20px;} 289 | .zichan_box span{ font-size:12px; 290 | color:#999999; 291 | margin-left:6px;} 292 | .zichan_box a{ display:block; 293 | width:496px; 294 | height:64px; 295 | font-size:12px; 296 | margin-top:10px; 297 | line-height:24px; 298 | color:#909090;} 299 | .zichan_box a span{ color:#005e11;} 300 | .zichan_box a:hover{ text-decoration:underline;} 301 | .info_search{ width:1200px; 302 | height:56px; 303 | overflow:hidden;} 304 | .info_search_l{ width:440px;} 305 | .info_search_l li{ float:left; margin-right:10px;} 306 | .info_search_l select{ width:120px;} 307 | .info_search_r{ width:234px; 308 | float:right; 309 | overflow:hidden; 310 | height:32px;} 311 | .info_search_r a{ line-height:32px; 312 | display:block; 313 | height:32px; 314 | float:left; 315 | margin-left:30px; 316 | color:#727272;} 317 | .info_search_r a:hover{ text-decoration:underline; color:#005e11;} 318 | .info_search_r a span{ display:block; 319 | float:right; 320 | margin-left:8px;} 321 | /************************首页内容结束************************************/ 322 | /****************************************************************************************************************************************************/ 323 | /*************************关于华企开始**********************************/ 324 | .content_box1{ width:100%; 325 | position:absolute; 326 | top:398px; 327 | z-index:595; 328 | left:0;} 329 | .content_box2{ width:1200px; 330 | overflow:hidden; 331 | margin:0 auto;} 332 | .menu_box{ width:207px; float:left;} 333 | .menu_box_title{ width:207px; 334 | height:83px; 335 | color:white; 336 | font-size:24px; 337 | text-align:center; 338 | padding-top:20px; 339 | background:#003d79;} 340 | .menu_box_title span{ display:block; font-size:20px; color:#ffffff;} 341 | .menu_box1{ width:207px;} 342 | .menu_box1 li{ width:207px; 343 | height:46px; 344 | font-size:16px; 345 | margin:2px 0; 346 | text-align:center; 347 | line-height:46px;} 348 | .menu_box1 li a{ color:#003d79; 349 | display:block; 350 | background:#cacaca; 351 | width:207px; 352 | height:46px;} 353 | .menu_box1 li a:hover{ background:#003d79; color:white;} 354 | .content_boxL{ width:978px; 355 | float:right; 356 | overflow:hidden;} 357 | .content_boxL_nav{ width:978px; 358 | height:56px; 359 | line-height:56px; 360 | margin-top:47px; 361 | overflow:hidden; 362 | border-bottom:1px solid #cacaca;} 363 | .content_boxL_nav_title{ float:left; 364 | font-size:18px; 365 | color:#363636;} 366 | .content_boxL_nav_R{ float:right;} 367 | .content_boxL_nav_R a{ color:#363636;} 368 | .content_boxL_nav_R a:hover{ text-decoration:underline;} 369 | .content_boxL_text{ width:945px; margin:0 auto 80px;} 370 | .content_boxL_pic{ display:block;margin:50px auto;} 371 | .content_boxL_text p{ text-indent:2em; font:14px/24px "微软雅黑"; color:#666666; margin:30px 0;} 372 | .content_boxL_text p.yewu_p{ text-indent:0em; font:14px/24px "微软雅黑"; color:#666666; margin:30px 0;} 373 | .content_boxL_text p .yewu_title{ font-weight:600;} 374 | .clear{ clear:both;} 375 | /*************************关于华企结束**********************************/ 376 | /*************************新闻中心开始*****************************/ 377 | .content_boxL_list{ width:978px; margin:30px auto; overflow:hidden;} 378 | .content_boxL_list li{ border-bottom:1px dashed #d1d1d1; line-height:45px; overflow:hidden; width:958px;} 379 | .content_boxL_list_info{ padding-left:30px; 380 | color:#656565; 381 | width:948px; 382 | background:url(../images/info_icon2.png) no-repeat left center;} 383 | .content_boxL_list_info:hover{ text-decoration:underline;} 384 | .yewu_date{ float:right;} 385 | .fenye_box{ width:500px; margin:0 auto 80px;} 386 | .fenye_box a{ color:#848383; font-size:14px; padding:5px 14px; background:#CCC; margin:0 5px} 387 | .fenye_box a:hover{ color:white; background:#0b651b; text-decoration:underline;} 388 | .xqy_news_title{ font-size:20px; 389 | text-align:center; 390 | padding-bottom:20px; 391 | border-bottom:1px dashed #d1d1d1;} 392 | .xqy_news_title span{ font-size:12px;} 393 | .news_xqy_pic{ margin:0 auto; display:block;} 394 | .xqy_news_title1{ width:958px; margin:0 auto;} 395 | /*************************新闻中心结束*****************************/ 396 | /*************************成功案例开始*****************************/ 397 | .cgal_list{ width:978px; 398 | margin:30px auto;} 399 | .cgal_list li{ width:958px; overflow:hidden; height:220px; margin:20px 0;} 400 | .cgal_pic{ float:left; display:block;} 401 | .cgan_xqy{ width:614px; height:220px; float:right;} 402 | .cgan_xqy p{ margin-top:0; line-height:28px;} 403 | .cgan_xqy a:hover{ text-decoration:underline;} 404 | /*************************成功案例结束*****************************/ 405 | /*************************联系我们开始*****************************/ 406 | .lxwm_box{ width:978px; 407 | margin:40px auto 80px; 408 | overflow:hidden;} 409 | .lxwm_pic{ display:block; float:left;} 410 | .lxwm_info{ width:360px; float:right;} 411 | .lxwm_info p{ font-size:14px; padding-left:30px; width:324px; line-height:36px; color:#a5a5a5;} 412 | .lxwm_info1{ background:url(../images/lxwm_icon1.png) no-repeat left center;} 413 | .lxwm_info2{ background:url(../images/lxwm_icon2.png) no-repeat left center;} 414 | .lxwm_info3{ background:url(../images/lxwm_icon3.png) no-repeat left center;} 415 | .lxwm_info4{ background:url(../images/lxwm_icon4.png) no-repeat left center;} 416 | .lxwm_info5{ background:url(../images/lxwm_icon5.png) no-repeat left center;} 417 | /*************************联系我们结束*****************************/ 418 | /*************************留言板开始*****************************/ 419 | .lyb_box{ width:800px; margin:50px auto 100px; overflow:hidden;} 420 | .lyb_box1{ float:left; text-align:right;} 421 | .lyb_box1 p{ margin:14px 0;} 422 | .lyb_box2{ float:left;} 423 | .lyb_box2 li{ margin:10px 0;} 424 | .lyb_box2 input{ width:426px;} 425 | .lyb_box2 textarea{ width:600px; height:200px;} 426 | .lyb_box2 input.submit{ background:#005e11; 427 | color:white; 428 | height:30px; 429 | line-height:30px; 430 | text-align:center; 431 | border:none; 432 | border-radius:4px; 433 | -webkit-border-radius:4px; 434 | -o-border-radius:4px; 435 | -ms-border-radius:4px;} 436 | /*************************留言板结束*****************************/ 437 | /*************************浮屏开始*****************/ 438 | .fuping_box{ width:150px; 439 | height:400px; 440 | background:#5685cf; 441 | position:fixed; 442 | z-index:600; 443 | left:1%; 444 | bottom:24%;} 445 | /**************************浮屏结束***************/ 446 | 447 | 448 | .content{ 449 | margin: 20px 0; 450 | } 451 | 452 | 453 | 454 | * { 455 | padding: 0; 456 | margin: 0; 457 | } 458 | 459 | .process-parent1 { 460 | margin-top: 20px; 461 | margin-left:70px; 462 | position: absolute; 463 | width: 250px; 464 | border: 1px solid #ccc; 465 | height: 20px; 466 | border-radius: 3px; 467 | } 468 | 469 | 470 | .info1{ 471 | position: absolute; 472 | margin-top:20px; 473 | margin-left:3px; 474 | background: #fff; 475 | width: 70px; 476 | height: 20px; 477 | } 478 | 479 | .process-parent2 { 480 | margin-top:50px; 481 | position: absolute; 482 | margin-left:70px; 483 | width: 250px; 484 | border: 1px solid #ccc; 485 | height: 20px; 486 | border-radius: 3px; 487 | 488 | } 489 | .info2{ 490 | position: absolute; 491 | margin-top:50px; 492 | margin-left:3px; 493 | background: #fff; 494 | width: 70px; 495 | height: 20px; 496 | } 497 | .process-parent3 { 498 | margin-top:80px; 499 | position: absolute; 500 | margin-left:70px; 501 | width: 250px; 502 | border: 1px solid #ccc; 503 | height: 20px; 504 | border-radius: 3px; 505 | 506 | } 507 | .info3{ 508 | position: absolute; 509 | margin-top:80px; 510 | margin-left:3px; 511 | background: #fff; 512 | width: 70px; 513 | height: 20px; 514 | } 515 | .process-parent4 { 516 | position: absolute; 517 | margin-top:110px; 518 | margin-left:70px; 519 | width: 250px; 520 | border: 1px solid #ccc; 521 | height: 20px; 522 | border-radius: 3px; 523 | 524 | } 525 | .info4{ 526 | position: absolute; 527 | margin-top:110px; 528 | margin-left:3px; 529 | background: #fff; 530 | width: 70px; 531 | height: 20px; 532 | } 533 | .process-parent5 { 534 | margin-top:140px; 535 | margin-left:70px; 536 | position: absolute; 537 | width: 250px; 538 | border: 1px solid #ccc; 539 | height: 20px; 540 | border-radius: 3px; 541 | 542 | } 543 | .info5{ 544 | position: absolute; 545 | margin-top:140px; 546 | margin-left:3px; 547 | background: #fff; 548 | width: 70px; 549 | height: 20px; 550 | } 551 | .process-parent6 { 552 | margin-top:170px; 553 | position: absolute; 554 | margin-left:70px; 555 | width: 250px; 556 | border: 1px solid #ccc; 557 | height: 20px; 558 | border-radius: 3px; 559 | 560 | } 561 | .info6{ 562 | position: absolute; 563 | margin-top:170px; 564 | margin-left:3px; 565 | background: #fff; 566 | width: 70px; 567 | height: 20px; 568 | } 569 | .process-parent7 { 570 | margin-top:200px; 571 | margin-left:70px; 572 | position: absolute; 573 | width: 250px; 574 | border: 1px solid #ccc; 575 | height: 20px; 576 | border-radius: 3px; 577 | 578 | } 579 | .info7{ 580 | position: absolute; 581 | margin-top:200px; 582 | margin-left:3px; 583 | background: #fff; 584 | width: 70px; 585 | height: 20px; 586 | } 587 | .process-parent8 { 588 | position: absolute; 589 | margin-top:230px; 590 | margin-left:70px; 591 | width: 250px; 592 | border: 1px solid #ccc; 593 | height: 20px; 594 | border-radius: 3px; 595 | 596 | } 597 | .info8{ 598 | position: absolute; 599 | margin-top:230px; 600 | margin-left:3px; 601 | background: #fff; 602 | width: 70px; 603 | height: 20px; 604 | } 605 | .process-parent9 { 606 | margin-top:260px; 607 | position: absolute; 608 | margin-left:70px; 609 | width: 250px; 610 | border: 1px solid #ccc; 611 | height: 20px; 612 | border-radius: 3px; 613 | 614 | } 615 | .info9{ 616 | position: absolute; 617 | margin-top:260px; 618 | margin-left:3px; 619 | background: #fff; 620 | width: 70px; 621 | height: 20px; 622 | } 623 | .process-parent10 { 624 | margin-top:290px; 625 | position: absolute; 626 | margin-left:70px; 627 | width: 250px; 628 | border: 1px solid #ccc; 629 | height: 20px; 630 | border-radius: 3px; 631 | 632 | } 633 | .info10{ 634 | position: absolute; 635 | margin-top:290px; 636 | margin-left:3px; 637 | background: #fff; 638 | width: 70px; 639 | height: 20px; 640 | } 641 | .process-parent11 { 642 | position: absolute; 643 | margin-top:320px; 644 | margin-left:70px; 645 | width: 250px; 646 | border: 1px solid #ccc; 647 | height: 20px; 648 | border-radius: 3px; 649 | 650 | } 651 | .info11{ 652 | position: absolute; 653 | margin-top:320px; 654 | margin-left:3px; 655 | background: #fff; 656 | width: 70px; 657 | height: 20px; 658 | } 659 | .process-parent12 { 660 | position: absolute; 661 | margin-top:350px; 662 | width: 250px; 663 | margin-left:70px; 664 | border: 1px solid #ccc; 665 | height: 20px; 666 | border-radius: 3px; 667 | 668 | } 669 | .info12{ 670 | position: absolute; 671 | margin-top:350px; 672 | margin-left:3px; 673 | background: #fff; 674 | width: 70px; 675 | height: 20px; 676 | } 677 | .submit{ 678 | position: absolute; 679 | margin-top:400px; 680 | margin-left:100px; 681 | height: 60px; 682 | width: 100px; 683 | border-radius: 3px; 684 | } 685 | 686 | .reset{ 687 | position: absolute; 688 | margin-top:400px; 689 | margin-left:250px; 690 | height: 60px; 691 | width: 100px; 692 | border-radius: 3px; 693 | } 694 | .scoreConfirm{ 695 | position: absolute; 696 | margin-top:100px; 697 | margin-left:10px; 698 | height: 100px; 699 | width: 200px; 700 | border-radius: 3px; 701 | } 702 | .process-son { 703 | position: relative; 704 | background: green; 705 | height: 100%; 706 | font-size: 8px; 707 | text-align: center; 708 | color: #fff; 709 | } 710 | 711 | .bg-success{ 712 | background: green; 713 | color: green; 714 | } 715 | .progress-bar .bg-warning { 716 | background: yellow; 717 | } 718 | .progress-bar, .bg-danger{ 719 | background: red; 720 | } 721 | 722 | .recommed_course{ 723 | position: absolute; 724 | margin-top:100px; 725 | margin-left:200px; 726 | } 727 | 728 | .recommed_person{ 729 | position: absolute; 730 | margin-top:100px; 731 | margin-left:1200px; 732 | } -------------------------------------------------------------------------------- /ccksDemo/static/test.html.bak: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 面向金融领域的事件提取 10 | 11 | 93 | 94 | 95 | 96 |
97 |

面向金融领域的事件主体提取服务

98 | 99 | 100 |

待检测文本:

101 | 102 | 103 | 104 | 105 | 106 |
107 | 108 |
109 |
110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /ccksDemo/templates/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 面向金融领域的事件提取 10 | 11 | 12 | 255 | 256 | 257 |
258 |
259 |

面向金融领域的事件主体提取服务

260 | 261 |
262 | 263 | 264 | 265 | 270 | 271 | 272 |
266 |
267 | 268 |
269 |

事件主体

{{ result }}

273 |
274 |
275 |
276 | 277 | 278 | 279 | 280 | -------------------------------------------------------------------------------- /ccksDemo/utils/__pycache__/extract_entity.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/utils/__pycache__/extract_entity.cpython-35.pyc -------------------------------------------------------------------------------- /ccksDemo/utils/best_model.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiefan-guo/CCKS2019_subject_extraction/4b4913470244f72665cef0149ec5e3ebde2b0115/ccksDemo/utils/best_model.weights -------------------------------------------------------------------------------- /ccksDemo/utils/classes.json: -------------------------------------------------------------------------------- 1 | [{"0": "\u8d44\u91d1\u8d26\u6237\u98ce\u9669", "1": "\u6d89\u5acc\u6b3a\u8bc8", "2": "\u4e1a\u7ee9\u4e0b\u6ed1", "3": "\u4fe1\u6279\u8fdd\u89c4", "4": "\u6d89\u5acc\u4f20\u9500", "5": "\u4ea4\u6613\u8fdd\u89c4", "6": "\u8d22\u52a1\u9020\u5047", "7": "\u8bc4\u7ea7\u8c03\u6574", "8": "\u91cd\u7ec4\u5931\u8d25", "9": "\u5b9e\u63a7\u4eba\u80a1\u4e1c\u53d8\u66f4", "10": "\u4e0d\u80fd\u5c65\u804c", "11": "\u6d89\u5acc\u8fdd\u6cd5", "12": "\u6d89\u5acc\u975e\u6cd5\u96c6\u8d44", "13": "\u8d44\u4ea7\u8d1f\u9762", "14": "\u6b47\u4e1a\u505c\u4e1a", "15": "\u63d0\u73b0\u56f0\u96be", "16": "\u9ad8\u7ba1\u8d1f\u9762", "17": "\u6295\u8bc9\u7ef4\u6743", "18": "\u5931\u8054\u8dd1\u8def", "19": "\u4ea7\u54c1\u8fdd\u89c4", "20": "\u516c\u53f8\u80a1\u5e02\u5f02\u5e38"}, {"\u91cd\u7ec4\u5931\u8d25": 8, "\u6b47\u4e1a\u505c\u4e1a": 14, "\u4e1a\u7ee9\u4e0b\u6ed1": 2, "\u4ea7\u54c1\u8fdd\u89c4": 19, "\u4fe1\u6279\u8fdd\u89c4": 3, "\u8d44\u4ea7\u8d1f\u9762": 13, "\u63d0\u73b0\u56f0\u96be": 15, "\u4ea4\u6613\u8fdd\u89c4": 5, "\u6295\u8bc9\u7ef4\u6743": 17, "\u8d22\u52a1\u9020\u5047": 6, "\u5b9e\u63a7\u4eba\u80a1\u4e1c\u53d8\u66f4": 9, "\u8d44\u91d1\u8d26\u6237\u98ce\u9669": 0, "\u6d89\u5acc\u975e\u6cd5\u96c6\u8d44": 12, "\u4e0d\u80fd\u5c65\u804c": 10, "\u516c\u53f8\u80a1\u5e02\u5f02\u5e38": 20, "\u9ad8\u7ba1\u8d1f\u9762": 16, "\u5931\u8054\u8dd1\u8def": 18, "\u6d89\u5acc\u8fdd\u6cd5": 11, "\u6d89\u5acc\u4f20\u9500": 4, "\u6d89\u5acc\u6b3a\u8bc8": 1, "\u8bc4\u7ea7\u8c03\u6574": 7}] -------------------------------------------------------------------------------- /ccksDemo/utils/extract_entity.py: -------------------------------------------------------------------------------- 1 | from keras.layers import * 2 | from keras.models import Model,load_model 3 | import keras.backend as K 4 | from keras.callbacks import Callback 5 | from keras.optimizers import Adam 6 | import json 7 | from tqdm import tqdm 8 | import os, re 9 | import numpy as np 10 | import pandas as pd 11 | 12 | mode = 0 13 | min_count = 2 14 | char_size = 128 15 | maxlen = 256 16 | 17 | # 读取数据,排除“其他”类型 18 | D = pd.read_csv('./utils/event_type_entity_extract_train.csv', encoding='utf-8', header=None) 19 | D = D[D[2] != u'其他'] 20 | D = D[D[1].str.len() <= maxlen] 21 | 22 | train_data = [] 23 | for t,c,n in zip(D[1], D[2], D[3]): 24 | """ 25 | t:整个事件 26 | c:谓语 27 | n:主语 28 | """ 29 | start = t.find(n) 30 | if start != -1: 31 | """ 32 | 在事件中可以找到主语就将其append到train_data 33 | """ 34 | train_data.append((t, c, n)) 35 | 36 | id2char, char2id = json.load(open('./utils/all_chars_me.json')) 37 | id2class, class2id = json.load(open('./utils/classes.json')) 38 | random_order = json.load(open('./utils/random_order_train.json')) 39 | 40 | def seq_padding(X, padding=0): 41 | L = [len(x) for x in X] 42 | ML = max(L) 43 | return np.array([ 44 | np.concatenate([x, [padding] * (ML - len(x))]) if len(x) < ML else x for x in X 45 | ]) 46 | 47 | class Attention(Layer): 48 | """多头注意力机制 49 | """ 50 | def __init__(self, nb_head, size_per_head, **kwargs): 51 | self.nb_head = nb_head 52 | self.size_per_head = size_per_head 53 | self.out_dim = nb_head * size_per_head 54 | super(Attention, self).__init__(**kwargs) 55 | def build(self, input_shape): 56 | q_in_dim = input_shape[0][-1] 57 | k_in_dim = input_shape[1][-1] 58 | v_in_dim = input_shape[2][-1] 59 | self.q_kernel = self.add_weight(name='q_kernel', 60 | shape=(q_in_dim, self.out_dim), 61 | initializer='glorot_normal') 62 | self.k_kernel = self.add_weight(name='k_kernel', 63 | shape=(k_in_dim, self.out_dim), 64 | initializer='glorot_normal') 65 | self.v_kernel = self.add_weight(name='w_kernel', 66 | shape=(v_in_dim, self.out_dim), 67 | initializer='glorot_normal') 68 | def mask(self, x, mask, mode='mul'): 69 | if mask is None: 70 | return x 71 | else: 72 | for _ in range(K.ndim(x) - K.ndim(mask)): 73 | # ndim以整数形式返回张量中的轴数。 74 | mask = K.expand_dims(mask, K.ndim(mask)) 75 | if mode == 'mul': 76 | return x * mask 77 | else: 78 | return x - (1 - mask) * 1e10 79 | def call(self, inputs): 80 | q, k, v = inputs[:3] 81 | v_mask, q_mask = None, None 82 | if len(inputs) > 3: 83 | v_mask = inputs[3] 84 | if len(inputs) > 4: 85 | q_mask = inputs[4] 86 | # 线性变化 87 | qw = K.dot(q, self.q_kernel) 88 | kw = K.dot(k, self.k_kernel) 89 | vw = K.dot(v, self.v_kernel) 90 | # 形状变换 91 | qw = K.reshape(qw, (-1, K.shape(qw)[1], self.nb_head, self.size_per_head)) 92 | kw = K.reshape(kw, (-1, K.shape(kw)[1], self.nb_head, self.size_per_head)) 93 | vw = K.reshape(vw, (-1, K.shape(vw)[1], self.nb_head, self.size_per_head)) 94 | # 维度置换 95 | qw = K.permute_dimensions(qw, (0, 2, 1, 3)) 96 | kw = K.permute_dimensions(kw, (0, 2, 1, 3)) 97 | vw = K.permute_dimensions(vw, (0, 2, 1, 3)) 98 | # Attention 99 | a = K.batch_dot(qw, kw, [3, 3]) / self.size_per_head**0.5 100 | a = K.permute_dimensions(a, (0, 3, 2, 1)) 101 | a = self.mask(a, v_mask, 'add') 102 | a = K.permute_dimensions(a, (0, 3, 2, 1)) 103 | a = K.softmax(a) 104 | # 完成输出 105 | o = K.batch_dot(a, vw, [3, 2]) 106 | o = K.permute_dimensions(o, (0, 2, 1, 3)) 107 | o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim)) 108 | o = self.mask(o, q_mask, 'mul') 109 | return o 110 | def compute_output_shape(self, input_shape): 111 | return (input_shape[0][0], input_shape[0][1], self.out_dim) 112 | 113 | class data_generator: 114 | def __init__(self, data, batch_size=64): 115 | self.data = data 116 | self.batch_size = batch_size 117 | self.steps = len(self.data) // self.batch_size 118 | if len(self.data) % self.batch_size != 0: 119 | self.steps += 1 120 | def __len__(self): 121 | return self.steps 122 | def __iter__(self): 123 | while True: 124 | idxs = list(range(len(self.data))) 125 | np.random.shuffle(idxs) 126 | X, C, S1, S2 = [], [], [], [] 127 | for i in idxs: 128 | d = self.data[i] 129 | # 遍历每一个数据元组(事件,谓语,主语) 130 | text = d[0] 131 | x = [char2id.get(c, 1) for c in text] 132 | c = class2id[d[1]] 133 | s1, s2 = np.zeros(len(text)), np.zeros(len(text)) 134 | start = text.find(d[2]) 135 | end = start + len(d[2]) - 1 136 | s1[start] = 1 137 | s2[end] = 1 138 | X.append(x) 139 | C.append([c]) 140 | S1.append(s1) 141 | S2.append(s2) 142 | if len(X) == self.batch_size or i == idxs[-1]: 143 | X = seq_padding(X) 144 | C = seq_padding(C) 145 | S1 = seq_padding(S1) 146 | S2 = seq_padding(S2) 147 | yield [X, C, S1, S2], None 148 | X, C, S1, S2 = [], [], [], [] 149 | 150 | 151 | dev_data = [train_data[j] for i, j in enumerate(random_order) if i % 9 == mode] 152 | train_data = [train_data[j] for i, j in enumerate(random_order) if i % 9 != mode] 153 | 154 | x_in = Input(shape=(None,)) # 待识别句子输入 155 | c_in = Input(shape=(1,)) # 事件类型 156 | s1_in = Input(shape=(None,)) # 实体左边界(标签) 157 | s2_in = Input(shape=(None,)) # 实体右边界(标签) 158 | 159 | x, c, s1, s2 = x_in, c_in, s1_in, s2_in 160 | x_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x) 161 | # Lamda : 将任意表达式封装为 Layer 对象。 162 | x = Embedding(len(id2char)+2, char_size)(x) 163 | # 参数:输入数据的最大下标(字典长度),全连接嵌入的维度 164 | # Embedding 将正整数(索引值)转换为固定尺寸的稠密向量。 例如: [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]] 165 | # 该层只能用作模型中的第一层。 166 | c = Embedding(len(class2id), char_size)(c) 167 | c = Lambda(lambda x: x[0] * 0 + x[1])([x, c]) 168 | x = Add()([x, c]) 169 | x = Dropout(0.2)(x) 170 | # Dropout 将 Dropout 应用于输入。 171 | x = Lambda(lambda x: x[0] * x[1])([x, x_mask]) 172 | x = Bidirectional(CuDNNLSTM(char_size//2, return_sequences=True))(x) 173 | # CuDNNLSTM 由 CuDNN 支持的快速 LSTM 实现。只能以 TensorFlow 后端运行在 GPU 上 174 | # Bidirectional RNN 的双向封装器,对序列进行前向和后向计算。 175 | x = Lambda(lambda x: x[0] * x[1])([x, x_mask]) 176 | x = Bidirectional(CuDNNLSTM(char_size//2, return_sequences=True))(x) 177 | x = Lambda(lambda x: x[0] * x[1])([x, x_mask]) 178 | 179 | xo = x 180 | x = Attention(8, 16)([x, x, x, x_mask, x_mask]) 181 | x = Lambda(lambda x: x[0] + x[1])([xo, x]) 182 | 183 | x = Concatenate()([x, c]) 184 | # Concatenate Concatenate 层的函数式接口。 185 | 186 | x1 = Dense(char_size, use_bias=False, activation='tanh')(x) 187 | # Dense 全连接层。 188 | ps1 = Dense(1, use_bias=False)(x1) 189 | ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps1, x_mask]) 190 | x2 = Dense(char_size, use_bias=False, activation='tanh')(x) 191 | ps2 = Dense(1, use_bias=False)(x2) 192 | ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps2, x_mask]) 193 | 194 | model = Model([x_in, c_in], [ps1, ps2]) 195 | 196 | train_model = Model([x_in, c_in, s1_in, s2_in], [ps1, ps2]) 197 | 198 | loss1 = K.mean(K.categorical_crossentropy(s1_in, ps1, from_logits=True)) 199 | # categorical_crossentropy输出张量与目标张量之间的分类交叉熵。 200 | # mean 张量在某一指定轴的均值。 201 | loss2 = K.mean(K.categorical_crossentropy(s2_in, ps2, from_logits=True)) 202 | loss = loss1 + loss2 203 | print(loss) 204 | train_model.add_loss(loss) 205 | # 指定自定义的损失函数,通过调用 self.add_loss(loss_tensor) 206 | train_model.compile(optimizer=Adam(1e-3)) 207 | # compile用于配置训练模型。 optimizer: 字符串(优化器名)或者优化器实例。 208 | train_model.summary() 209 | 210 | def extract_entity(text_in, c_in): 211 | """解码函数,应自行添加更多规则,保证解码出来的是一个公司名 212 | """ 213 | if c_in not in class2id: 214 | return 'NaN' 215 | _x = [char2id.get(c, 1) for c in text_in] 216 | _x = np.array([_x]) 217 | _c = np.array([[class2id[c_in]]]) 218 | _ps1, _ps2 = model.predict([_x, _c]) #为输入样本生成输出预测。 219 | start = _ps1[0].argmax() 220 | # 返回指定轴的最大值的索引。keras.backend.argmax(x, axis=-1) x: 张量或变量。 axis: 执行归约操作的轴。 221 | end = _ps2[0][start:].argmax() + start 222 | return text_in[start: end+1] 223 | 224 | 225 | class Evaluate(Callback): 226 | def __init__(self): 227 | self.ACC = [] 228 | self.best = 0. 229 | def on_epoch_end(self, epoch, logs=None): 230 | acc = self.evaluate() 231 | self.ACC.append(acc) 232 | if acc > self.best: 233 | self.best = acc 234 | train_model.save_weights('best_model.weights') 235 | # model.save_weights(filepath) 将模型权重存储为 HDF5 文件。 236 | print('acc: %.4f, best acc: %.4f\n' % (acc, self.best)) 237 | def evaluate(self): 238 | A = 1e-10 239 | for d in tqdm(iter(dev_data)): 240 | R = extract_entity(d[0], d[1]) 241 | if R == d[2]: 242 | A += 1 243 | return A / len(dev_data) 244 | 245 | 246 | evaluator = Evaluate() 247 | train_D = data_generator(train_data) 248 | 249 | train_model.fit_generator(train_D.__iter__(), 250 | steps_per_epoch=len(train_D), 251 | epochs=1, 252 | callbacks=[evaluator] 253 | ) 254 | 255 | train_model.load_weights('best_model.weights') 256 | 257 | 258 | def extract_entity_self(text_in, c_in): 259 | 260 | if c_in not in class2id: 261 | return 'NaN' 262 | _x = [char2id.get(c, 1) for c in text_in] 263 | _x = np.array([_x]) 264 | _c = np.array([[class2id[c_in]]]) 265 | _ps1, _ps2 = model.predict([_x, _c]) #为输入样本生成输出预测。 266 | start = _ps1[0].argmax() 267 | # 返回指定轴的最大值的索引。keras.backend.argmax(x, axis=-1) x: 张量或变量。 axis: 执行归约操作的轴。 268 | end = _ps2[0][start:].argmax() + start 269 | return text_in[start: end+1] 270 | 271 | 272 | # 四川双马(000935)待注资产再“缩水” 盈利预测下滑广电运通(002152)减持股份退出神州控股争夺战,业绩下滑 -------------------------------------------------------------------------------- /classes.json: -------------------------------------------------------------------------------- 1 | [{"0": "\u8d44\u91d1\u8d26\u6237\u98ce\u9669", "1": "\u6d89\u5acc\u6b3a\u8bc8", "2": "\u4e1a\u7ee9\u4e0b\u6ed1", "3": "\u4fe1\u6279\u8fdd\u89c4", "4": "\u6d89\u5acc\u4f20\u9500", "5": "\u4ea4\u6613\u8fdd\u89c4", "6": "\u8d22\u52a1\u9020\u5047", "7": "\u8bc4\u7ea7\u8c03\u6574", "8": "\u91cd\u7ec4\u5931\u8d25", "9": "\u5b9e\u63a7\u4eba\u80a1\u4e1c\u53d8\u66f4", "10": "\u4e0d\u80fd\u5c65\u804c", "11": "\u6d89\u5acc\u8fdd\u6cd5", "12": "\u6d89\u5acc\u975e\u6cd5\u96c6\u8d44", "13": "\u8d44\u4ea7\u8d1f\u9762", "14": "\u6b47\u4e1a\u505c\u4e1a", "15": "\u63d0\u73b0\u56f0\u96be", "16": "\u9ad8\u7ba1\u8d1f\u9762", "17": "\u6295\u8bc9\u7ef4\u6743", "18": "\u5931\u8054\u8dd1\u8def", "19": "\u4ea7\u54c1\u8fdd\u89c4", "20": "\u516c\u53f8\u80a1\u5e02\u5f02\u5e38"}, {"\u91cd\u7ec4\u5931\u8d25": 8, "\u6b47\u4e1a\u505c\u4e1a": 14, "\u4e1a\u7ee9\u4e0b\u6ed1": 2, "\u4ea7\u54c1\u8fdd\u89c4": 19, "\u4fe1\u6279\u8fdd\u89c4": 3, "\u8d44\u4ea7\u8d1f\u9762": 13, "\u63d0\u73b0\u56f0\u96be": 15, "\u4ea4\u6613\u8fdd\u89c4": 5, "\u6295\u8bc9\u7ef4\u6743": 17, "\u8d22\u52a1\u9020\u5047": 6, "\u5b9e\u63a7\u4eba\u80a1\u4e1c\u53d8\u66f4": 9, "\u8d44\u91d1\u8d26\u6237\u98ce\u9669": 0, "\u6d89\u5acc\u975e\u6cd5\u96c6\u8d44": 12, "\u4e0d\u80fd\u5c65\u804c": 10, "\u516c\u53f8\u80a1\u5e02\u5f02\u5e38": 20, "\u9ad8\u7ba1\u8d1f\u9762": 16, "\u5931\u8054\u8dd1\u8def": 18, "\u6d89\u5acc\u8fdd\u6cd5": 11, "\u6d89\u5acc\u4f20\u9500": 4, "\u6d89\u5acc\u6b3a\u8bc8": 1, "\u8bc4\u7ea7\u8c03\u6574": 7}] -------------------------------------------------------------------------------- /ee-2019-baseline-master/.idea/ee-2019-baseline-master.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /ee-2019-baseline-master/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /ee-2019-baseline-master/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ee-2019-baseline-master/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | random_order_train 78 | 79 | 80 | 81 | 86 | 87 | 88 | 89 | 90 | true 91 | DEFINITION_ORDER 92 | 93 | 94 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 |