├── README.md ├── csv ├── managers.csv └── stocks.csv ├── cypher └── build.cypher ├── db └── ongdb-enterprise-1.0.4-qabot-demo.7z ├── images ├── QABot.png └── txt ├── license ├── ppt ├── tt └── 问答机器人框架.pptx └── python ├── fetch_managers.py ├── fetch_stocks.py └── system_constant.py /README.md: -------------------------------------------------------------------------------- 1 | # Graph QABot Demo 2 | 3 | ## 一、软件安装 4 | ### 1.1 安装ONgDB 5 | - 下载ONgDB,下载社区版和企业版都可以 6 | ```url 7 | https://github.com/graphfoundation/ongdb/releases/tag/1.0.4 8 | ``` 9 | 10 | ### 1.2 安装APOC和OLAB组件 11 | >将组件下载后放置在/plugins文件夹下面。 12 | 13 | - 下载APOC 14 | ```url 15 | # 下载all版本 16 | https://github.com/graphfoundation/ongdb-apoc/releases/tag/3.4.0.10 17 | ``` 18 | 19 | - 下载OLAB 20 | ```url 21 | https://github.com/ongdb-contrib/ongdb-lab-apoc/releases/tag/1.0.0 22 | ``` 23 | 24 | ### 1.3 修改配置并启动ONgDB 25 | - 修改配置文件 26 | ```shell 27 | # conf/ongdb.conf 28 | dbms.security.procedures.unrestricted=apoc.*,olab.*,custom.* 29 | apoc.import.file.enabled=true 30 | ``` 31 | 32 | - 启动图数据库节点 33 | ```shell 34 | # Windows启动ONgDB 35 | bin\ongdb.bat console 36 | ``` 37 | 38 | ## 二、获取数据 39 | ### 2.1 沪深股票 40 | ```python 41 | import tushare as ts 42 | import system_constant as sys_cnt 43 | import os 44 | 45 | # 初始化pro接口 46 | pro = ts.pro_api(sys_cnt.tushare_token()) 47 | 48 | 49 | # 获取股票相关信息 50 | def run(): 51 | # 查询当前所有正常上市交易的股票列表 52 | data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') 53 | data.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\stocks.csv', 54 | encoding='GBK', 55 | columns=['ts_code', 'symbol', 'name', 'area', 'industry', 'list_date'], 56 | index=False) 57 | 58 | 59 | if __name__ == '__main__': 60 | run() 61 | ``` 62 | 63 | ### 2.2 上市公司管理层 64 | ```python 65 | import tushare as ts 66 | import pandas as pd 67 | import system_constant as sys_cnt 68 | import os 69 | import time 70 | 71 | # 初始化pro接口 72 | pro = ts.pro_api(sys_cnt.tushare_token()) 73 | 74 | 75 | # 获取股票相关信息 76 | def run(): 77 | # 查询当前所有正常上市交易的股票列表 78 | data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') 79 | tss = data['ts_code'] 80 | result = [] 81 | ct = 0 82 | bk_ct = 100000 83 | for code in tss: 84 | time.sleep(0.5) 85 | # 获取单个公司高管全部数据 86 | df = pro.stk_managers(ts_code=code) 87 | result.append(df) 88 | ct += 1 89 | if ct > bk_ct: 90 | break 91 | df_merge = pd.concat(result) 92 | print(df_merge) 93 | df_merge.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\managers.csv', 94 | encoding='GBK', 95 | columns=['ts_code', 'ann_date', 'name', 'gender', 'lev', 'title', 'edu', 'national', 'birthday', 96 | 'begin_date', 'end_date'], 97 | index=False) 98 | 99 | 100 | if __name__ == '__main__': 101 | run() 102 | ``` 103 | 104 | ## 三、设计图数据模型 105 | >将下面的JSON格式数据导入,[Graphene图数据建模工具](https://ongdb-contrib.github.io/graphene/app/)。 106 | 107 | ```json 108 | {"data":{"nodes":[{"x":613,"y":284,"color":"#E52B50","label":"股票代码","properties":[{"id":"a2aeffb2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"afb48f90","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a1b949a0","isSelected":false,"isNode":true},{"x":678,"y":444,"color":"#AB274F","label":"股票","properties":[{"id":"a4a8beb4","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a9bf77ad","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a5844bbd","isSelected":false,"isNode":true},{"x":413,"y":368,"color":"#3B7A57","label":"股票名称","properties":[{"id":"a08f768e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a09d0ab9","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a78c159d","isSelected":false,"isNode":true},{"x":426,"y":475,"color":"#FF7E00","label":"地域","properties":[{"id":"a59d20a1","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a28d19bb","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a49f8ebc","isSelected":false,"isNode":true},{"x":481,"y":557,"color":"#FF033E","label":"行业","properties":[{"id":"aea1c5b2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"aaa4b082","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a381f688","isSelected":false,"isNode":true},{"x":714,"y":569,"color":"#AF002A","label":"上市日期","properties":[{"id":"a284f1aa","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a0b444a4","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"ac81ffb4","isSelected":false,"isNode":true},{"x":834,"y":380,"color":"#E32636","label":"高管","properties":[{"id":"a4b02384","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"a2ade2ad","isSelected":false,"isNode":true},{"x":890,"y":266,"color":"#C46210","label":"性别","properties":[{"id":"ad9cab82","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a2aa0f86","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"af9a0f97","isSelected":false,"isNode":true},{"x":977,"y":447,"color":"#E52B50","label":"学历","properties":[{"id":"a090f498","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a8a0e1b4","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"ad931890","isSelected":false,"isNode":true},{"x":1001,"y":180,"color":"#AB274F","label":"性别_别名","properties":[{"id":"a3980888","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"aa8b5199","isSelected":false,"isNode":true}],"edges":[{"startNodeId":"a5844bbd","endNodeId":"a1b949a0","middlePointOffset":[0,0],"properties":[{"id":"a29766a0","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"股票代码","id":"a5934a81","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"a78c159d","middlePointOffset":[0,0],"properties":[{"id":"a196c08e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"股票名称","id":"acaecfb9","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"a49f8ebc","middlePointOffset":[0,0],"properties":[{"id":"a88ae1a4","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"地域","id":"aa94de98","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"a381f688","middlePointOffset":[0,0],"properties":[{"id":"a493c4a1","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"所属行业","id":"aead0aa2","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"ac81ffb4","middlePointOffset":[0,0],"properties":[{"id":"aebda5bd","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"上市日期","id":"a19f3281","isSelected":false,"isEdge":true},{"startNodeId":"a2ade2ad","endNodeId":"a5844bbd","middlePointOffset":[0,0],"properties":[{"id":"adaf73aa","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"任职于","id":"a3bcec8c","isSelected":false,"isEdge":true},{"startNodeId":"a2ade2ad","endNodeId":"af9a0f97","middlePointOffset":[0,0],"properties":[{"id":"aca28680","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"性别","id":"ae973eb2","isSelected":false,"isEdge":true},{"startNodeId":"a2ade2ad","endNodeId":"ad931890","middlePointOffset":[0,0],"properties":[{"id":"a7af3fb0","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"学历","id":"aca71396","isSelected":false,"isEdge":true},{"startNodeId":"af9a0f97","endNodeId":"aa8b5199","middlePointOffset":[5.5,-6],"properties":[{"id":"a692c7a2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"别名","id":"a9a6bb89","isSelected":false,"isEdge":true}]}} 109 | ``` 110 | 111 | ## 四、构建图数据 112 | ### 4.1 设计索引和约束 113 | ```cypher 114 | CREATE CONSTRAINT ON (n:股票代码) ASSERT (n.value) IS NODE KEY; 115 | CREATE CONSTRAINT ON (n:股票) ASSERT (n.value) IS NODE KEY; 116 | CREATE CONSTRAINT ON (n:股票名称) ASSERT (n.value) IS NODE KEY; 117 | CREATE CONSTRAINT ON (n:地域) ASSERT (n.value) IS NODE KEY; 118 | CREATE CONSTRAINT ON (n:行业) ASSERT (n.value) IS NODE KEY; 119 | CREATE CONSTRAINT ON (n:上市日期) ASSERT (n.value) IS NODE KEY; 120 | CREATE CONSTRAINT ON (n:高管) ASSERT (n.md5) IS NODE KEY; 121 | CREATE INDEX ON :高管(value); 122 | CREATE CONSTRAINT ON (n:性别) ASSERT (n.value) IS NODE KEY; 123 | CREATE CONSTRAINT ON (n:学历) ASSERT (n.value) IS NODE KEY; 124 | CREATE CONSTRAINT ON (n:性别_别名) ASSERT (n.value) IS NODE KEY; 125 | ``` 126 | 127 | ### 4.2 构建数据 128 | >将生成的CSV数据,放置在/import目录下,CSV转为UTF-8-BOM格式(可以使用Notepad++转格式)。 129 | 130 | ```cypher 131 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 132 | WITH row.ts_code AS fval,row.symbol AS tval 133 | WHERE tval IS NOT NULL AND tval<>'' 134 | MERGE (f:股票 {value:fval}) 135 | MERGE (t:股票代码 {value:tval}) 136 | MERGE (f)-[:股票代码]->(t); 137 | 138 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 139 | WITH row.ts_code AS fval,row.name AS tval 140 | WHERE tval IS NOT NULL AND tval<>'' 141 | MERGE (f:股票 {value:fval}) 142 | MERGE (t:股票名称 {value:tval}) 143 | MERGE (f)-[:股票名称]->(t); 144 | 145 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 146 | WITH row.ts_code AS fval,row.area AS tval 147 | WHERE tval IS NOT NULL AND tval<>'' 148 | MERGE (f:股票 {value:fval}) 149 | MERGE (t:地域 {value:tval}) 150 | MERGE (f)-[:地域]->(t); 151 | 152 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 153 | WITH row.ts_code AS fval,row.industry AS tval 154 | WHERE tval IS NOT NULL AND tval<>'' 155 | MERGE (f:股票 {value:fval}) 156 | MERGE (t:行业 {value:tval}) 157 | MERGE (f)-[:所属行业]->(t); 158 | 159 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 160 | WITH row.ts_code AS fval,row.list_date AS tval 161 | WHERE tval IS NOT NULL AND tval<>'' 162 | MERGE (f:股票 {value:fval}) 163 | MERGE (t:上市日期 {value:TOINTEGER(tval)}) 164 | MERGE (f)-[:上市日期]->(t); 165 | 166 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row 167 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.ts_code AS tval 168 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='') 169 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name} 170 | MERGE (t:股票 {value:tval}) 171 | MERGE (f)-[:任职于]->(t); 172 | 173 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row 174 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.gender AS tval 175 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='') 176 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name} 177 | MERGE (t:性别 {value:tval}) 178 | MERGE (f)-[:性别]->(t); 179 | 180 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row 181 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.edu AS tval 182 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='') 183 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name} 184 | MERGE (t:学历 {value:tval}) 185 | MERGE (f)-[:学历]->(t); 186 | 187 | WITH ['女性','女'] AS list 188 | UNWIND list AS wd 189 | WITH wd 190 | MATCH (n:性别) WHERE n.value='F' WITH n,wd 191 | MERGE (t:性别_别名 {value:wd}) 192 | MERGE (n)-[:别名]->(t); 193 | 194 | WITH ['男性','男'] AS list 195 | UNWIND list AS wd 196 | WITH wd 197 | MATCH (n:性别) WHERE n.value='M' WITH n,wd 198 | MERGE (t:性别_别名 {value:wd}) 199 | MERGE (n)-[:别名]->(t); 200 | ``` 201 | 202 | ## 五、问答组件配置 203 | ### 5.1 配置图数据模型 204 | ```sql 205 | CALL apoc.custom.asFunction( 206 | 'inference.search.qabot', 207 | 'RETURN \'{"graph":{"nodes":[{"properties_filter":[],"id":"1","labels":["股票代码"]},{"properties_filter":[],"id":"2","labels":["股票"]},{"properties_filter":[],"id":"3","labels":["股票名称"]},{"properties_filter":[],"id":"4","labels":["地域"]},{"properties_filter":[],"id":"5","labels":["行业"]},{"properties_filter":[],"id":"6","labels":["上市日期"]},{"properties_filter":[],"id":"7","labels":["高管"]},{"properties_filter":[],"id":"8","labels":["性别"]},{"properties_filter":[],"id":"9","labels":["学历"]},{"properties_filter":[],"id":"10","labels":["性别_别名"]}],"relationships":[{"startNode":"2","properties_filter":[],"id":"1","type":"股票代码","endNode":"1"},{"startNode":"2","properties_filter":[],"id":"2","type":"股票名称","endNode":"3"},{"startNode":"2","properties_filter":[],"id":"3","type":"地域","endNode":"4"},{"startNode":"2","properties_filter":[],"id":"4","type":"所属行业","endNode":"5"},{"startNode":"2","properties_filter":[],"id":"5","type":"上市日期","endNode":"6"},{"startNode":"7","properties_filter":[],"id":"6","type":"任职于","endNode":"2"},{"startNode":"7","properties_filter":[],"id":"7","type":"性别","endNode":"8"},{"startNode":"7","properties_filter":[],"id":"8","type":"学历","endNode":"9"},{"startNode":"8","properties_filter":[],"id":"9","type":"别名","endNode":"10"}]}}\' AS graphDataSchema', 208 | 'STRING', 209 | NULL, 210 | false, 211 | '搜索时图数据扩展模式(schema)的动态获取' 212 | ); 213 | ``` 214 | 215 | ### 5.2 配置实体权重规则 216 | ```sql 217 | CALL apoc.custom.asFunction( 218 | 'inference.weight.qabot', 219 | 'RETURN \'{"LABEL":{"股票":12,"高管":11}}\' AS weight', 220 | 'STRING', 221 | NULL, 222 | false, 223 | '本体权重' 224 | ); 225 | ``` 226 | 227 | ### 5.3 配置实体搜索规则 228 | ```sql 229 | CALL apoc.custom.asFunction( 230 | 'inference.match.qabot', 231 | 'RETURN \'{"股票名称":"value","地域":"value","上市日期":"value","性别_别名":"value","高管":"value","学历":"value","行业":"value","股票代码":"value"}\' AS nodeHitsRules', 232 | 'STRING', 233 | NULL, 234 | false, 235 | '实体匹配规则' 236 | ); 237 | ``` 238 | 239 | ### 5.4 配置意图匹配规则 240 | >`CONTAINS`模式、`EQUALS`模式、`OTHER`词列表求交集模式 241 | ```sql 242 | CALL apoc.custom.asFunction( 243 | 'inference.intended.qabot', 244 | 'RETURN \'[{"label":"上市日期","return_var_alias":"n1","sort":1,"list":["是什么时候","什么时候"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"学历","return_var_alias":"n2","sort":2,"list":["什么学历"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"高管","return_var_alias":"n3","sort":3,"list":["高管有多少位","高管都有哪些","高管有多少个"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票","return_var_alias":"n4","sort":4,"list":["哪些上市公司","有多少家上市公司","哪个公司","有多少家","公司有哪些","公司有多少家","股票有哪些","股票有多少支","股票有多少个","股票代码?","股票?"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"行业","return_var_alias":"n5","sort":5,"list":["什么行业","同一个行业嘛"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票名称","return_var_alias":"n6","sort":6,"list":["股票名称?"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n7","sort":7,"list":["性别"],"parse_mode":"OTHER","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n8","sort":8,"list":["查询性别"],"parse_mode":"EQUALS","order_by_field":null,"order_by_type":null}]\' AS intendedIntent', 245 | 'STRING', 246 | NULL, 247 | false, 248 | '预期意图' 249 | ); 250 | ``` 251 | 252 | ### 5.5 配置时间页码处理等规则 253 | ```sql 254 | CALL apoc.custom.asFunction( 255 | 'inference.parseadd.qabot', 256 | 'WITH $time AS time,$page AS page,$entityRecognitionHit AS entityRecognitionHit WITH entityRecognitionHit,REDUCE(l=\'\',e IN time.list | l+\'({var}.value>=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[0],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\' AND {var}.value<=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[1],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\') OR \') AS timeFilter, REDUCE(l=\'\',e IN page.list | l+\'{var}.value\'+e[0]+e[1]+\' AND \') AS pageFilter CALL apoc.case([size(timeFilter)>4,\'RETURN SUBSTRING($timeFilter,0,size($timeFilter)-4) AS timeFilter\'],\'RETURN "" AS timeFilter\',{timeFilter:timeFilter}) YIELD value WITH entityRecognitionHit,value.timeFilter AS timeFilter,pageFilter CALL apoc.case([size(pageFilter)>5,\'RETURN SUBSTRING($pageFilter,0,size($pageFilter)-5) AS pageFilter\'],\'RETURN "" AS pageFilter\',{pageFilter:pageFilter}) YIELD value WITH entityRecognitionHit,timeFilter,value.pageFilter AS pageFilter WITH entityRecognitionHit,timeFilter,pageFilter CALL apoc.case([timeFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'上市日期\\\',[{category:\\\'node\\\',labels:[\\\'上市日期\\\'],properties_filter:[{value:$timeFilter}]}]]]) AS time\'],\'RETURN {} AS time\',{timeFilter:timeFilter}) YIELD value WITH value.time AS time,pageFilter,entityRecognitionHit CALL apoc.case([pageFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'文章页数\\\',[{category:\\\'node\\\',labels:[\\\'文章页数\\\'],properties_filter:[{value:$pageFilter}]}]]]) AS page\'],\'RETURN {} AS page\',{pageFilter:pageFilter}) YIELD value WITH value.page AS page,time,entityRecognitionHit RETURN apoc.map.setKey({},\'entities\',apoc.map.merge(apoc.map.merge(entityRecognitionHit.entities,time),page)) AS entityRecognitionHit', 257 | 'MAP', 258 | [['entityRecognitionHit','MAP'],['time','MAP'],['page','MAP']], 259 | false, 260 | '问答解析的内容增加到entityRecognitionHit' 261 | ); 262 | ``` 263 | 264 | ### 5.6 配置算子规则 265 | ```sql 266 | CALL apoc.custom.asFunction( 267 | 'inference.operators.qabot', 268 | 'RETURN \'[{"keywords":["最多","最大"],"operator":{"sort": 1,"agg_operator_field": "value","agg_operator": "MAX","agg_operator_type": "NODE"}},{"keywords":["最小","最少"],"operator":{"sort": 2,"agg_operator_field": "value","agg_operator": "MIN","agg_operator_type": "NODE"}},{"keywords":["平均"],"operator":{"sort": 3,"agg_operator_field": "value","agg_operator": "AVG","agg_operator_type": "NODE"}},{"keywords":["有多少","有多少只","有多少支","多少只","多少支","多少","一共"],"operator":{"sort": 5,"agg_operator_field": "value","agg_operator": "COUNT","agg_operator_type": "NODE"}}]\' AS weight', 269 | 'STRING', 270 | NULL, 271 | false, 272 | '配置算子规则' 273 | ); 274 | ``` 275 | 276 | ### 5.7 算子解析模块配置 277 | ```sql 278 | CALL apoc.custom.asFunction( 279 | 'inference.operators.parse', 280 | 'WITH $query AS query,custom.inference.operators.qabot() AS list WITH query,apoc.convert.fromJsonList(list) AS list UNWIND list AS map WITH map,REDUCE(l=[],em IN apoc.coll.sortMaps(REDUCE(l=[],e IN map.keywords | l+{wd:e,len:LENGTH(e)}),\'len\')| l+em.wd) AS keywords,query UNWIND keywords AS keyword WITH map,keyword,query CALL apoc.case([query CONTAINS keyword,\'RETURN $keyword AS hit\'],\'RETURN \\\' \\\' AS hit\',{keyword:keyword}) YIELD value WITH map,keyword,query,REPLACE(query,value.hit,\' \') AS trim_query,value.hit AS hit WITH query,trim_query,map.operator AS operator ORDER BY map.operator.sort ASC WITH COLLECT({query:query,trim_query:trim_query,operator:operator}) AS list WITH FILTER(e IN list WHERE e.query<>e.trim_query)[0] AS map,list WITH map,list[0] AS smap CALL apoc.case([map IS NOT NULL,\'RETURN $map AS map\'],\'RETURN $smap AS map\',{map:map,smap:smap}) YIELD value WITH value.map AS map CALL apoc.case([map.trim_query=map.query,\'RETURN {} AS operator\'],\'RETURN $operator AS operator\',{trim_query:map.trim_query,operator:map.operator}) YIELD value RETURN map.query AS query,map.trim_query AS trim_query,value.operator AS operator', 281 | 'MAP', 282 | [['query','STRING']], 283 | false, 284 | '算子解析' 285 | ); 286 | ``` 287 | 288 | ## 六、配置词库 289 | ### 6.1 安装词库依赖 290 | >下载下面的文件夹,放置在根目录下,并设置Python的执行路径和词典位置。 291 | ```url 292 | https://github.com/ongdb-contrib/ongdb-lab-apoc/tree/1.0/nlp 293 | ``` 294 | 295 | ```shell 296 | # nlp/nlp.properties 297 | defined.dic.path=nlp/dic/dynamic/dynamic.dic 298 | python.commands.path=E:\\software\\anaconda3\\python.exe 299 | ``` 300 | 301 | ### 6.2 配置词 302 | ```cypher 303 | //dic,dynamic,dynamic.dic 304 | //意图配置相关词 305 | WITH custom.inference.intended.qabot() AS str 306 | WITH apoc.convert.fromJsonList(str) as list 307 | UNWIND list AS map 308 | WITH map.label AS label,map.list as list,map 309 | WHERE UPPER(map.parse_mode)<>'CONTAINS' AND UPPER(map.parse_mode)<>'EQUALS' 310 | WITH apoc.coll.union([label],list) as list 311 | UNWIND list AS wd 312 | WITH collect(DISTINCT wd) AS list 313 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words; 314 | 315 | //图数据模型相关词 316 | WITH custom.inference.search.qabot() AS str 317 | WITH apoc.convert.fromJsonMap(str).graph AS graph 318 | WITH graph.relationships AS relationships,graph.nodes AS nodes 319 | WITH REDUCE(l=[],e IN relationships | l+e.type) AS relationships,REDUCE(l=[],e IN nodes | l+e.labels[0]) AS nodes 320 | WITH apoc.coll.union(relationships,nodes) AS list 321 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words; 322 | 323 | //实体匹配规则相关词 324 | WITH custom.inference.match.qabot() AS str 325 | WITH olab.map.keys(apoc.convert.fromJsonMap(str)) AS list 326 | UNWIND list AS lb 327 | WITH lb 328 | WHERE lb<>'性别' AND lb<>'上市日期' 329 | CALL apoc.cypher.run('MATCH (n:'+lb+') WHERE NOT n.value CONTAINS \' \' RETURN COLLECT(DISTINCT n.value) AS list',{}) YIELD value 330 | WITH value.list AS list 331 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words; 332 | 333 | RETURN olab.nlp.userdic.add('dynamic.dic',['测试','胡永乐','性别'],true,'UTF-8') AS words; 334 | ``` 335 | 336 | ### 6.3 词库热更新 337 | ```cypher 338 | RETURN olab.nlp.userdic.refresh(); 339 | ``` 340 | 341 | ## 七、安装问答模块存储过程 342 | ### 7.1 问答结果 343 | #### 7.1.1 意图字符匹配和列表求交集【非分类模型解意图】 344 | ```sql 345 | // 1.搜索语句 346 | WITH LOWER('火力发电行业博士学历的男性高管有多少位?') AS query 347 | // 2.个性化配置:图数据模型/本体权重/实体匹配规则/预期意图 348 | WITH query, 349 | custom.inference.search.qabot() AS graphDataSchema, 350 | custom.inference.weight.qabot() AS weight, 351 | custom.inference.match.qabot() AS nodeHitsRules, 352 | //预期意图定义中支持设置一个排序参数 353 | custom.inference.intended.qabot() AS intendedIntent, 354 | custom.inference.operators.parse(query) AS oper 355 | // 3.个性化语句解析:解析时间/解析页面 356 | WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, 357 | olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page 358 | // 4.从查询语句中过滤时间词 359 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 360 | olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query 361 | // 5.过滤时间词后进行分词 362 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 363 | olab.hanlp.standard.segment(query) AS words 364 | // 6.分词后结果只保留名词且不能是纯数字 365 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 366 | EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words 367 | // 7.实体识别 368 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, 369 | olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits 370 | // 8.生成权重搜索队列 371 | WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits 372 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value 373 | WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 374 | // 9.将个性化语句解析结果增加到entityRecognitionHit 375 | WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit 376 | // 10.意图识别 377 | WITH oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, 378 | apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema 379 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) 380 | // 11.图上下文语义解析 381 | WITH operator,graphDataSchema,intentSchema,intendedIntent, 382 | olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema 383 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】 384 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher 385 | WITH REPLACE(cypher,'RETURN n','RETURN DISTINCT n') AS cypher 386 | // 13.执行查询【value返回为一个MAP,MAP的KEY SIZE小于等于解析后返回意图类别个数】 387 | CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 388 | WITH olab.map.keys(value) AS keys,value 389 | UNWIND keys AS key 390 | WITH apoc.map.get(value,key) AS n 391 | CALL apoc.case([apoc.coll.contains(['NODE'],apoc.meta.cypher.type(n)),'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result'],'WITH $n AS n RETURN TOSTRING(n) AS result',{n:n,sml:':'}) YIELD value 392 | RETURN value.result AS result; 393 | ``` 394 | 395 | ```sql 396 | CALL apoc.custom.asProcedure( 397 | 'qabot', 398 | 'WITH LOWER($ask) AS query WITH query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent, custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent, olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH olab.map.keys(value) AS keys,value UNWIND keys AS key WITH apoc.map.get(value,key) AS n CALL apoc.case([apoc.coll.contains([\'NODE\'],apoc.meta.cypher.type(n)),\'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result\'],\'WITH $n AS n RETURN TOSTRING(n) AS result\',{n:n,sml:\':\'}) YIELD value RETURN value.result AS result;', 399 | 'READ', 400 | [['result','STRING']], 401 | [['ask','STRING']], 402 | '问答机器人' 403 | ); 404 | ``` 405 | 406 | ```sql 407 | CALL custom.qabot('火力发电行业博士学历的男性高管有多少位?') YIELD result RETURN result; 408 | ``` 409 | 410 | #### 7.1.2 带分类模型的查询方式【分类模型解意图】 411 | ```json 412 | 问题:胡永乐是男性还是女性? 413 | 问题类别:查询性别 414 | ``` 415 | ```sql 416 | // 1.搜索语句 417 | WITH LOWER('胡永乐是男性还是女性?') AS query,'查询性别' AS classifier_type 418 | // 2.个性化配置:图数据模型/本体权重/实体匹配规则/预期意图 419 | WITH classifier_type,query, 420 | custom.inference.search.qabot() AS graphDataSchema, 421 | custom.inference.weight.qabot() AS weight, 422 | custom.inference.match.qabot() AS nodeHitsRules, 423 | //预期意图定义中支持设置一个排序参数 424 | custom.inference.intended.qabot() AS intendedIntent, 425 | custom.inference.operators.parse(query) AS oper 426 | // 3.个性化语句解析:解析时间/解析页面 427 | WITH classifier_type,oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, 428 | olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page 429 | // 4.从查询语句中过滤时间词 430 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 431 | olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query 432 | // 5.过滤时间词后进行分词 433 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 434 | olab.hanlp.standard.segment(query) AS words 435 | // 6.分词后结果只保留名词且不能是纯数字 436 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 437 | EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words 438 | // 7.实体识别 439 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, 440 | olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits 441 | // 8.生成权重搜索队列 442 | WITH classifier_type,oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits 443 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value 444 | WITH classifier_type,oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 445 | // 9.将个性化语句解析结果增加到entityRecognitionHit 446 | WITH classifier_type,oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit 447 | // 10.意图识别 448 | WITH classifier_type,oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, 449 | apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent,classifier_type)) AS intentSchema 450 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) 451 | // 11.图上下文语义解析 452 | WITH operator,graphDataSchema,intentSchema,intendedIntent, 453 | olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema 454 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】 455 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher 456 | WITH REPLACE(cypher,'RETURN n','RETURN DISTINCT n') AS cypher 457 | // 13.执行查询【value返回为一个MAP,MAP的KEY SIZE小于等于解析后返回意图类别个数】 458 | CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 459 | WITH olab.map.keys(value) AS keys,value 460 | UNWIND keys AS key 461 | WITH apoc.map.get(value,key) AS n 462 | CALL apoc.case([apoc.coll.contains(['NODE'],apoc.meta.cypher.type(n)),'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result'],'WITH $n AS n RETURN TOSTRING(n) AS result',{n:n,sml:':'}) YIELD value 463 | RETURN value.result AS result; 464 | ``` 465 | 466 | ### 7.2 问答结果Cypher 467 | ```sql 468 | // 1.搜索语句 469 | WITH LOWER('火力发电行业博士学历的男性高管有多少位?') AS query 470 | // 2.个性化配置:图数据模型/本体权重/实体匹配规则/预期意图 471 | WITH query, 472 | custom.inference.search.qabot() AS graphDataSchema, 473 | custom.inference.weight.qabot() AS weight, 474 | custom.inference.match.qabot() AS nodeHitsRules, 475 | //预期意图定义中支持设置一个排序参数 476 | custom.inference.intended.qabot() AS intendedIntent, 477 | custom.inference.operators.parse(query) AS oper 478 | // 3.个性化语句解析:解析时间/解析页面 479 | WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, 480 | olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page 481 | // 4.从查询语句中过滤时间词 482 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 483 | olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query 484 | // 5.过滤时间词后进行分词 485 | WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 486 | olab.hanlp.standard.segment(query) AS words 487 | // 6.分词后结果只保留名词且不能是纯数字 488 | WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 489 | EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words 490 | // 7.实体识别 491 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, 492 | olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits 493 | // 8.生成权重搜索队列 494 | WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits 495 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value 496 | WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 497 | // 9.将个性化语句解析结果增加到entityRecognitionHit 498 | WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit 499 | // 10.意图识别 500 | WITH operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, 501 | apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema 502 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) 503 | // 11.图上下文语义解析 504 | WITH operator,graphDataSchema,intentSchema,intendedIntent, 505 | olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema 506 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】 507 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher 508 | WITH REPLACE(cypher,'RETURN n','RETURN DISTINCT n') AS cypher 509 | RETURN cypher; 510 | ``` 511 | 512 | ```sql 513 | CALL apoc.custom.asProcedure( 514 | 'qabot.cypher', 515 | 'WITH LOWER($ask) AS query WITH query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent, custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent, olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher RETURN cypher;', 516 | 'READ', 517 | [['cypher','STRING']], 518 | [['ask','STRING']], 519 | '问答机器人:生成查询语句' 520 | ); 521 | ``` 522 | 523 | ```sql 524 | CALL custom.qabot.cypher('火力发电行业博士学历的男性高管有多少位?') YIELD cypher RETURN cypher; 525 | ``` 526 | 527 | ### 7.3 问答推理图谱 528 | ```sql 529 | // 1.搜索语句 530 | WITH LOWER('火力发电行业博士学历的男性高管有多少位?') AS query 531 | // 2.个性化配置:图数据模型/本体权重/实体匹配规则/预期意图 532 | WITH query, 533 | custom.inference.search.qabot() AS graphDataSchema, 534 | custom.inference.weight.qabot() AS weight, 535 | custom.inference.match.qabot() AS nodeHitsRules, 536 | //预期意图定义中支持设置一个排序参数 537 | custom.inference.intended.qabot() AS intendedIntent 538 | // 3.个性化语句解析:解析时间/解析页面 539 | WITH query AS oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent, 540 | olab.nlp.timeparser(query) AS time,olab.nlp.pagenum.parse(query) AS page 541 | // 4.从查询语句中过滤时间词 542 | WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 543 | olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query 544 | // 5.过滤时间词后进行分词 545 | WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 546 | olab.hanlp.standard.segment(query) AS words 547 | // 6.分词后结果只保留名词且不能是纯数字 548 | WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 549 | EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words 550 | // 7.实体识别 551 | WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, 552 | olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits 553 | // 8.生成权重搜索队列 554 | WITH oper,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits 555 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value 556 | WITH oper,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 557 | // 9.将个性化语句解析结果增加到entityRecognitionHit 558 | WITH oper,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit 559 | // 10.意图识别 560 | WITH graphDataSchema,intendedIntent,words,entityRecognitionHit, 561 | apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper,words,intendedIntent)) AS intentSchema 562 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) 563 | // 11.图上下文语义解析 564 | WITH graphDataSchema,intentSchema,intendedIntent, 565 | olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema 566 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】 567 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),'',-1,10,{}) AS cypher 568 | // 13.执行查询【value返回为一个MAP,MAP的KEY SIZE小于等于解析后返回意图类别个数】 569 | CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 570 | WITH value.graph AS graph 571 | UNWIND graph AS path 572 | RETURN path; 573 | ``` 574 | 575 | ```sql 576 | CALL apoc.custom.asProcedure( 577 | 'qabot.graph', 578 | 'WITH LOWER($ask) AS query WITH query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent WITH query AS oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(query) AS time,olab.nlp.pagenum.parse(query) AS page WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH graphDataSchema,intendedIntent,words,entityRecognitionHit, apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH graphDataSchema,intentSchema,intendedIntent, olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),\'\',-1,10,{}) AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH value.graph AS graph UNWIND graph AS path RETURN path;', 579 | 'READ', 580 | [['path','PATH']], 581 | [['ask','STRING']], 582 | '问答机器人:问答推理图谱' 583 | ); 584 | ``` 585 | 586 | ```sql 587 | CALL custom.qabot.graph('火力发电行业博士学历的男性高管有多少位?') YIELD path RETURN path; 588 | ``` 589 | 590 | ### 7.4 生成推荐问题列表 591 | ```cypher 592 | // 1.搜索语句 593 | WITH LOWER('2023年三月六日上市的股票代码?') AS query 594 | // 2.个性化配置:图数据模型/本体权重/实体匹配规则/预期意图 595 | WITH query,query AS raw_query, 596 | custom.inference.search.qabot() AS graphDataSchema, 597 | custom.inference.weight.qabot() AS weight, 598 | custom.inference.match.qabot() AS nodeHitsRules, 599 | //预期意图定义中支持设置一个排序参数 600 | custom.inference.intended.qabot() AS intendedIntent, 601 | custom.inference.operators.parse(query) AS oper 602 | // 3.个性化语句解析:解析时间/解析页面 603 | WITH raw_query,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, 604 | olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page 605 | // 4.从查询语句中过滤时间词 606 | WITH raw_query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 607 | olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query 608 | // 5.过滤时间词后进行分词 609 | WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 610 | olab.hanlp.standard.segment(query) AS words 611 | // 6.分词后结果只保留名词且不能是纯数字 612 | WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, 613 | EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words 614 | // 7.实体识别 615 | WITH raw_query,query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, 616 | olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits 617 | // 8.生成权重搜索队列 618 | WITH raw_query,query,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits 619 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value 620 | WITH raw_query,query,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit 621 | WITH raw_query,entityRecognitionHit.entities AS map 622 | WITH raw_query,olab.map.keys(map) AS keys,map 623 | WITH raw_query,REDUCE(l=[],key IN keys | l+{raw:key,rep:FILTER(e IN apoc.map.get(map,key,NULL,FALSE) WHERE SIZE(key)>2)[0].labels[0]}) AS reps 624 | WITH raw_query,FILTER(e IN reps WHERE e.rep IS NOT NULL) AS reps 625 | WITH raw_query,olab.replace(raw_query,reps) AS re_query 626 | WITH raw_query,re_query,olab.editDistance(raw_query,re_query) AS score WHERE score<1 627 | RETURN raw_query,re_query,score ORDER BY score DESC 628 | ``` 629 | 630 | ```cypher 631 | CALL apoc.custom.asProcedure( 632 | 'qabot.recommend_list', 633 | 'WITH LOWER($qa) AS query WITH query,query AS raw_query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent, custom.inference.operators.parse(query) AS oper WITH raw_query,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH raw_query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH raw_query,query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.4}) AS entityRecognitionHits WITH raw_query,query,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH raw_query,query,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit WITH raw_query,entityRecognitionHit.entities AS map WITH raw_query,olab.map.keys(map) AS keys,map WITH raw_query,REDUCE(l=[],key IN keys | l+{raw:key,rep:FILTER(e IN apoc.map.get(map,key,NULL,FALSE) WHERE SIZE(key)>2)[0].labels[0]}) AS reps WITH raw_query,FILTER(e IN reps WHERE e.rep IS NOT NULL) AS reps WITH raw_query,olab.replace(raw_query,reps) AS re_query WITH raw_query,re_query,olab.editDistance(raw_query,re_query) AS score WHERE score<1 AND score>0.6 RETURN DISTINCT raw_query,re_query,score ORDER BY score DESC LIMIT 10', 634 | 'READ', 635 | [['raw_query','STRING'],['re_query','STRING'],['score','NUMBER']], 636 | [['qa','STRING']], 637 | '推荐问题列表:自动推荐' 638 | ); 639 | ``` 640 | ```cypher 641 | CALL custom.qabot.recommend_list('2023年三月六日上市的股票代码?') YIELD raw_query,re_query,score RETURN raw_query,re_query,score 642 | ``` 643 | 644 | ## 八、配置样例问答 645 | ```cypher 646 | WITH '[{"qa":"火力发电行业博士学历的男性高管有多少位?","label":"学历"},{"qa":"山西都有哪些上市公司?","label":"地域"},{"qa":"富奥股份的高管都是什么学历?","label":"学历"},{"qa":"中国宝安属于什么行业?","label":"股票"},{"qa":"建筑工程行业有多少家上市公司?","label":"行业"},{"qa":"刘卫国是哪个公司的高管?","label":"高管"},{"qa":"美丽生态上市时间是什么时候?","label":"时间"},{"qa":"山西的上市公司有多少家?","label":"地域"},{"qa":"博士学历的高管都有哪些?","label":"学历"},{"qa":"上市公司是博士学历的高管有多少个?","label":"学历"},{"qa":"刘卫国是什么学历?","label":"高管"},{"qa":"富奥股份的男性高管有多少个?","label":"高管"},{"qa":"同在火力发电行业的上市公司有哪些?","label":"行业"},{"qa":"同在火力发电行业的上市公司有多少家?","label":"行业"},{"qa":"大悦城和荣盛发展是同一个行业嘛?","label":"股票"},{"qa":"同在河北的上市公司有哪些?","label":"股票"},{"qa":"神州高铁是什么时候上市的?","label":"时间"},{"qa":"火力发电行业男性高管有多少个?","label":"高管"},{"qa":"2023年三月六日上市的股票代码?","label":"股票"},{"qa":"2023年三月六日上市的股票有哪些?","label":"股票"},{"qa":"2023年三月六日上市的股票有多少个?","label":"股票"},{"qa":"胡永乐是什么性别?","label":"性别"}]' AS list 647 | WITH apoc.convert.fromJsonList(list) AS list 648 | UNWIND list AS map 649 | WITH map 650 | MERGE (n:DEMO_QA {qa:map.qa,label:map.label}); 651 | ``` 652 | 653 | ## 九、运行Graph QABot页面 654 | ![QABot](images/QABot.png) 655 | 656 | -------------------------------------------------------------------------------- /cypher/build.cypher: -------------------------------------------------------------------------------- 1 | //设计索引和约束 2 | CREATE CONSTRAINT ON (n:股票代码) ASSERT (n.value) IS NODE KEY; 3 | CREATE CONSTRAINT ON (n:股票) ASSERT (n.value) IS NODE KEY; 4 | CREATE CONSTRAINT ON (n:股票名称) ASSERT (n.value) IS NODE KEY; 5 | CREATE CONSTRAINT ON (n:地域) ASSERT (n.value) IS NODE KEY; 6 | CREATE CONSTRAINT ON (n:行业) ASSERT (n.value) IS NODE KEY; 7 | CREATE CONSTRAINT ON (n:上市日期) ASSERT (n.value) IS NODE KEY; 8 | CREATE CONSTRAINT ON (n:高管) ASSERT (n.md5) IS NODE KEY; 9 | CREATE INDEX ON :高管(value); 10 | CREATE CONSTRAINT ON (n:性别) ASSERT (n.value) IS NODE KEY; 11 | CREATE CONSTRAINT ON (n:学历) ASSERT (n.value) IS NODE KEY; 12 | CREATE CONSTRAINT ON (n:性别_别名) ASSERT (n.value) IS NODE KEY; 13 | 14 | //构建数据 15 | //将生成的CSV数据,放置在/import目录下,CSV转为UTF-8-BOM格式(可以使用Notepad++转格式) 16 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 17 | WITH row.ts_code AS fval,row.symbol AS tval 18 | WHERE tval IS NOT NULL AND tval<>'' 19 | MERGE (f:股票 {value:fval}) 20 | MERGE (t:股票代码 {value:tval}) 21 | MERGE (f)-[:股票代码]->(t); 22 | 23 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 24 | WITH row.ts_code AS fval,row.name AS tval 25 | WHERE tval IS NOT NULL AND tval<>'' 26 | MERGE (f:股票 {value:fval}) 27 | MERGE (t:股票名称 {value:tval}) 28 | MERGE (f)-[:股票名称]->(t); 29 | 30 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 31 | WITH row.ts_code AS fval,row.area AS tval 32 | WHERE tval IS NOT NULL AND tval<>'' 33 | MERGE (f:股票 {value:fval}) 34 | MERGE (t:地域 {value:tval}) 35 | MERGE (f)-[:地域]->(t); 36 | 37 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 38 | WITH row.ts_code AS fval,row.industry AS tval 39 | WHERE tval IS NOT NULL AND tval<>'' 40 | MERGE (f:股票 {value:fval}) 41 | MERGE (t:行业 {value:tval}) 42 | MERGE (f)-[:所属行业]->(t); 43 | 44 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row 45 | WITH row.ts_code AS fval,row.list_date AS tval 46 | WHERE tval IS NOT NULL AND tval<>'' 47 | MERGE (f:股票 {value:fval}) 48 | MERGE (t:上市日期 {value:TOINTEGER(tval)}) 49 | MERGE (f)-[:上市日期]->(t); 50 | 51 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row 52 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.ts_code AS tval 53 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='') 54 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name} 55 | MERGE (t:股票 {value:tval}) 56 | MERGE (f)-[:任职于]->(t); 57 | 58 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row 59 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.gender AS tval 60 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='') 61 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name} 62 | MERGE (t:性别 {value:tval}) 63 | MERGE (f)-[:性别]->(t); 64 | 65 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row 66 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.edu AS tval 67 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='') 68 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name} 69 | MERGE (t:学历 {value:tval}) 70 | MERGE (f)-[:学历]->(t); 71 | 72 | WITH ['女性','女'] AS list 73 | UNWIND list AS wd 74 | WITH wd 75 | MATCH (n:性别) WHERE n.value='F' WITH n,wd 76 | MERGE (t:性别_别名 {value:wd}) 77 | MERGE (n)-[:别名]->(t); 78 | 79 | WITH ['男性','男'] AS list 80 | UNWIND list AS wd 81 | WITH wd 82 | MATCH (n:性别) WHERE n.value='M' WITH n,wd 83 | MERGE (t:性别_别名 {value:wd}) 84 | MERGE (n)-[:别名]->(t); 85 | 86 | //配置图数据模型 87 | CALL apoc.custom.asFunction( 88 | 'inference.search.qabot', 89 | 'RETURN \'{"graph":{"nodes":[{"properties_filter":[],"id":"1","labels":["股票代码"]},{"properties_filter":[],"id":"2","labels":["股票"]},{"properties_filter":[],"id":"3","labels":["股票名称"]},{"properties_filter":[],"id":"4","labels":["地域"]},{"properties_filter":[],"id":"5","labels":["行业"]},{"properties_filter":[],"id":"6","labels":["上市日期"]},{"properties_filter":[],"id":"7","labels":["高管"]},{"properties_filter":[],"id":"8","labels":["性别"]},{"properties_filter":[],"id":"9","labels":["学历"]},{"properties_filter":[],"id":"10","labels":["性别_别名"]}],"relationships":[{"startNode":"2","properties_filter":[],"id":"1","type":"股票代码","endNode":"1"},{"startNode":"2","properties_filter":[],"id":"2","type":"股票名称","endNode":"3"},{"startNode":"2","properties_filter":[],"id":"3","type":"地域","endNode":"4"},{"startNode":"2","properties_filter":[],"id":"4","type":"所属行业","endNode":"5"},{"startNode":"2","properties_filter":[],"id":"5","type":"上市日期","endNode":"6"},{"startNode":"7","properties_filter":[],"id":"6","type":"任职于","endNode":"2"},{"startNode":"7","properties_filter":[],"id":"7","type":"性别","endNode":"8"},{"startNode":"7","properties_filter":[],"id":"8","type":"学历","endNode":"9"},{"startNode":"8","properties_filter":[],"id":"9","type":"别名","endNode":"10"}]}}\' AS graphDataSchema', 90 | 'STRING', 91 | NULL, 92 | false, 93 | '搜索时图数据扩展模式(schema)的动态获取' 94 | ); 95 | 96 | //配置实体权重规则 97 | CALL apoc.custom.asFunction( 98 | 'inference.weight.qabot', 99 | 'RETURN \'{"LABEL":{"股票":12,"高管":11}}\' AS weight', 100 | 'STRING', 101 | NULL, 102 | false, 103 | '本体权重' 104 | ); 105 | 106 | //配置实体搜索规则 107 | CALL apoc.custom.asFunction( 108 | 'inference.match.qabot', 109 | 'RETURN \'{"股票名称":"value","地域":"value","上市日期":"value","性别_别名":"value","高管":"value","学历":"value","行业":"value","股票代码":"value"}\' AS nodeHitsRules', 110 | 'STRING', 111 | NULL, 112 | false, 113 | '实体匹配规则' 114 | ); 115 | 116 | //配置意图匹配规则 117 | CALL apoc.custom.asFunction( 118 | 'inference.intended.qabot', 119 | 'RETURN \'[{"label":"上市日期","return_var_alias":"n1","sort":1,"list":["是什么时候","什么时候"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"学历","return_var_alias":"n2","sort":2,"list":["什么学历"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"高管","return_var_alias":"n3","sort":3,"list":["高管有多少位","高管都有哪些","高管有多少个"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票","return_var_alias":"n4","sort":4,"list":["哪些上市公司","有多少家上市公司","哪个公司","有多少家","公司有哪些","公司有多少家","股票有哪些","股票有多少支","股票有多少个","股票代码?","股票?"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"行业","return_var_alias":"n5","sort":5,"list":["什么行业","同一个行业嘛"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票名称","return_var_alias":"n6","sort":6,"list":["股票名称?"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n7","sort":7,"list":["性别"],"parse_mode":"OTHER","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n8","sort":8,"list":["查询性别"],"parse_mode":"EQUALS","order_by_field":null,"order_by_type":null}]\' AS intendedIntent', 120 | 'STRING', 121 | NULL, 122 | false, 123 | '预期意图' 124 | ); 125 | 126 | //配置时间页码处理等规则 127 | CALL apoc.custom.asFunction( 128 | 'inference.parseadd.qabot', 129 | 'WITH $time AS time,$page AS page,$entityRecognitionHit AS entityRecognitionHit WITH entityRecognitionHit,REDUCE(l=\'\',e IN time.list | l+\'({var}.value>=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[0],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\' AND {var}.value<=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[1],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\') OR \') AS timeFilter, REDUCE(l=\'\',e IN page.list | l+\'{var}.value\'+e[0]+e[1]+\' AND \') AS pageFilter CALL apoc.case([size(timeFilter)>4,\'RETURN SUBSTRING($timeFilter,0,size($timeFilter)-4) AS timeFilter\'],\'RETURN "" AS timeFilter\',{timeFilter:timeFilter}) YIELD value WITH entityRecognitionHit,value.timeFilter AS timeFilter,pageFilter CALL apoc.case([size(pageFilter)>5,\'RETURN SUBSTRING($pageFilter,0,size($pageFilter)-5) AS pageFilter\'],\'RETURN "" AS pageFilter\',{pageFilter:pageFilter}) YIELD value WITH entityRecognitionHit,timeFilter,value.pageFilter AS pageFilter WITH entityRecognitionHit,timeFilter,pageFilter CALL apoc.case([timeFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'上市日期\\\',[{category:\\\'node\\\',labels:[\\\'上市日期\\\'],properties_filter:[{value:$timeFilter}]}]]]) AS time\'],\'RETURN {} AS time\',{timeFilter:timeFilter}) YIELD value WITH value.time AS time,pageFilter,entityRecognitionHit CALL apoc.case([pageFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'文章页数\\\',[{category:\\\'node\\\',labels:[\\\'文章页数\\\'],properties_filter:[{value:$pageFilter}]}]]]) AS page\'],\'RETURN {} AS page\',{pageFilter:pageFilter}) YIELD value WITH value.page AS page,time,entityRecognitionHit RETURN apoc.map.setKey({},\'entities\',apoc.map.merge(apoc.map.merge(entityRecognitionHit.entities,time),page)) AS entityRecognitionHit', 130 | 'MAP', 131 | [['entityRecognitionHit','MAP'],['time','MAP'],['page','MAP']], 132 | false, 133 | '问答解析的内容增加到entityRecognitionHit' 134 | ); 135 | 136 | //配置算子规则 137 | CALL apoc.custom.asFunction( 138 | 'inference.operators.qabot', 139 | 'RETURN \'[{"keywords":["最多","最大"],"operator":{"sort": 1,"agg_operator_field": "value","agg_operator": "MAX","agg_operator_type": "NODE"}},{"keywords":["最小","最少"],"operator":{"sort": 2,"agg_operator_field": "value","agg_operator": "MIN","agg_operator_type": "NODE"}},{"keywords":["平均"],"operator":{"sort": 3,"agg_operator_field": "value","agg_operator": "AVG","agg_operator_type": "NODE"}},{"keywords":["有多少","有多少只","有多少支","多少只","多少支","多少","一共"],"operator":{"sort": 5,"agg_operator_field": "value","agg_operator": "COUNT","agg_operator_type": "NODE"}}]\' AS weight', 140 | 'STRING', 141 | NULL, 142 | false, 143 | '配置算子规则' 144 | ); 145 | 146 | //算子解析模块配置 147 | CALL apoc.custom.asFunction( 148 | 'inference.operators.parse', 149 | 'WITH $query AS query,custom.inference.operators.qabot() AS list WITH query,apoc.convert.fromJsonList(list) AS list UNWIND list AS map WITH map,REDUCE(l=[],em IN apoc.coll.sortMaps(REDUCE(l=[],e IN map.keywords | l+{wd:e,len:LENGTH(e)}),\'len\')| l+em.wd) AS keywords,query UNWIND keywords AS keyword WITH map,keyword,query CALL apoc.case([query CONTAINS keyword,\'RETURN $keyword AS hit\'],\'RETURN \\\' \\\' AS hit\',{keyword:keyword}) YIELD value WITH map,keyword,query,REPLACE(query,value.hit,\' \') AS trim_query,value.hit AS hit WITH query,trim_query,map.operator AS operator ORDER BY map.operator.sort ASC WITH COLLECT({query:query,trim_query:trim_query,operator:operator}) AS list WITH FILTER(e IN list WHERE e.query<>e.trim_query)[0] AS map,list WITH map,list[0] AS smap CALL apoc.case([map IS NOT NULL,\'RETURN $map AS map\'],\'RETURN $smap AS map\',{map:map,smap:smap}) YIELD value WITH value.map AS map CALL apoc.case([map.trim_query=map.query,\'RETURN {} AS operator\'],\'RETURN $operator AS operator\',{trim_query:map.trim_query,operator:map.operator}) YIELD value RETURN map.query AS query,map.trim_query AS trim_query,value.operator AS operator', 150 | 'MAP', 151 | [['query','STRING']], 152 | false, 153 | '算子解析' 154 | ); 155 | 156 | 157 | //dic,dynamic,dynamic.dic 158 | //意图配置相关词 159 | WITH custom.inference.intended.qabot() AS str 160 | WITH apoc.convert.fromJsonList(str) as list 161 | UNWIND list AS map 162 | WITH map.label AS label,map.list as list,map 163 | WHERE UPPER(map.parse_mode)<>'CONTAINS' AND UPPER(map.parse_mode)<>'EQUALS' 164 | WITH apoc.coll.union([label],list) as list 165 | UNWIND list AS wd 166 | WITH collect(DISTINCT wd) AS list 167 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words; 168 | 169 | //图数据模型相关词 170 | WITH custom.inference.search.qabot() AS str 171 | WITH apoc.convert.fromJsonMap(str).graph AS graph 172 | WITH graph.relationships AS relationships,graph.nodes AS nodes 173 | WITH REDUCE(l=[],e IN relationships | l+e.type) AS relationships,REDUCE(l=[],e IN nodes | l+e.labels[0]) AS nodes 174 | WITH apoc.coll.union(relationships,nodes) AS list 175 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words; 176 | 177 | //实体匹配规则相关词 178 | WITH custom.inference.match.qabot() AS str 179 | WITH olab.map.keys(apoc.convert.fromJsonMap(str)) AS list 180 | UNWIND list AS lb 181 | WITH lb 182 | WHERE lb<>'性别' AND lb<>'上市日期' 183 | CALL apoc.cypher.run('MATCH (n:'+lb+') WHERE NOT n.value CONTAINS \' \' RETURN COLLECT(DISTINCT n.value) AS list',{}) YIELD value 184 | WITH value.list AS list 185 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words; 186 | 187 | RETURN olab.nlp.userdic.add('dynamic.dic',['测试','胡永乐','性别'],true,'UTF-8') AS words; 188 | 189 | RETURN olab.nlp.userdic.refresh(); 190 | 191 | //安装问答模块存储过程 192 | //问答结果 193 | CALL apoc.custom.asProcedure( 194 | 'qabot', 195 | 'WITH LOWER($ask) AS query WITH query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent, custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent, olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH olab.map.keys(value) AS keys,value UNWIND keys AS key WITH apoc.map.get(value,key) AS n CALL apoc.case([apoc.coll.contains([\'NODE\'],apoc.meta.cypher.type(n)),\'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result\'],\'WITH $n AS n RETURN TOSTRING(n) AS result\',{n:n,sml:\':\'}) YIELD value RETURN value.result AS result;', 196 | 'READ', 197 | [['result','STRING']], 198 | [['ask','STRING']], 199 | '问答机器人' 200 | ); 201 | 202 | 203 | //问答结果Cypher 204 | CALL apoc.custom.asProcedure( 205 | 'qabot.cypher', 206 | 'WITH LOWER($ask) AS query WITH query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent, custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH operator,graphDataSchema,intendedIntent,words,entityRecognitionHit, apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent, olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher RETURN cypher;', 207 | 'READ', 208 | [['cypher','STRING']], 209 | [['ask','STRING']], 210 | '问答机器人:生成查询语句' 211 | ); 212 | 213 | //问答推理图谱 214 | CALL apoc.custom.asProcedure( 215 | 'qabot.graph', 216 | 'WITH LOWER($ask) AS query WITH query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent WITH query AS oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(query) AS time,olab.nlp.pagenum.parse(query) AS page WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH graphDataSchema,intendedIntent,words,entityRecognitionHit, apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH graphDataSchema,intentSchema,intendedIntent, olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),\'\',-1,10,{}) AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH value.graph AS graph UNWIND graph AS path RETURN path;', 217 | 'READ', 218 | [['path','PATH']], 219 | [['ask','STRING']], 220 | '问答机器人:问答推理图谱' 221 | ); 222 | 223 | //生成推荐问题列表 224 | CALL apoc.custom.asProcedure( 225 | 'qabot.recommend_list', 226 | 'WITH LOWER($qa) AS query WITH query,query AS raw_query, custom.inference.search.qabot() AS graphDataSchema, custom.inference.weight.qabot() AS weight, custom.inference.match.qabot() AS nodeHitsRules, custom.inference.intended.qabot() AS intendedIntent, custom.inference.operators.parse(query) AS oper WITH raw_query,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent, olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH raw_query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, olab.hanlp.standard.segment(query) AS words WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page, EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH raw_query,query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words, olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.4}) AS entityRecognitionHits WITH raw_query,query,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH raw_query,query,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit WITH raw_query,entityRecognitionHit.entities AS map WITH raw_query,olab.map.keys(map) AS keys,map WITH raw_query,REDUCE(l=[],key IN keys | l+{raw:key,rep:FILTER(e IN apoc.map.get(map,key,NULL,FALSE) WHERE SIZE(key)>2)[0].labels[0]}) AS reps WITH raw_query,FILTER(e IN reps WHERE e.rep IS NOT NULL) AS reps WITH raw_query,olab.replace(raw_query,reps) AS re_query WITH raw_query,re_query,olab.editDistance(raw_query,re_query) AS score WHERE score<1 AND score>0.6 RETURN DISTINCT raw_query,re_query,score ORDER BY score DESC LIMIT 10', 227 | 'READ', 228 | [['raw_query','STRING'],['re_query','STRING'],['score','NUMBER']], 229 | [['qa','STRING']], 230 | '推荐问题列表:自动推荐' 231 | ); 232 | 233 | //配置样例问答 234 | WITH '[{"qa":"火力发电行业博士学历的男性高管有多少位?","label":"学历"},{"qa":"山西都有哪些上市公司?","label":"地域"},{"qa":"富奥股份的高管都是什么学历?","label":"学历"},{"qa":"中国宝安属于什么行业?","label":"股票"},{"qa":"建筑工程行业有多少家上市公司?","label":"行业"},{"qa":"刘卫国是哪个公司的高管?","label":"高管"},{"qa":"美丽生态上市时间是什么时候?","label":"时间"},{"qa":"山西的上市公司有多少家?","label":"地域"},{"qa":"博士学历的高管都有哪些?","label":"学历"},{"qa":"上市公司是博士学历的高管有多少个?","label":"学历"},{"qa":"刘卫国是什么学历?","label":"高管"},{"qa":"富奥股份的男性高管有多少个?","label":"高管"},{"qa":"同在火力发电行业的上市公司有哪些?","label":"行业"},{"qa":"同在火力发电行业的上市公司有多少家?","label":"行业"},{"qa":"大悦城和荣盛发展是同一个行业嘛?","label":"股票"},{"qa":"同在河北的上市公司有哪些?","label":"股票"},{"qa":"神州高铁是什么时候上市的?","label":"时间"},{"qa":"火力发电行业男性高管有多少个?","label":"高管"},{"qa":"2023年三月六日上市的股票代码?","label":"股票"},{"qa":"2023年三月六日上市的股票有哪些?","label":"股票"},{"qa":"2023年三月六日上市的股票有多少个?","label":"股票"},{"qa":"胡永乐是什么性别?","label":"性别"}]' AS list 235 | WITH apoc.convert.fromJsonList(list) AS list 236 | UNWIND list AS map 237 | WITH map 238 | MERGE (n:DEMO_QA {qa:map.qa,label:map.label}); 239 | 240 | -------------------------------------------------------------------------------- /db/ongdb-enterprise-1.0.4-qabot-demo.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ongdb-contrib/graph-qabot-demo/46eae129c5f4242d559eddf2b4ff5d078d6c83aa/db/ongdb-enterprise-1.0.4-qabot-demo.7z -------------------------------------------------------------------------------- /images/QABot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ongdb-contrib/graph-qabot-demo/46eae129c5f4242d559eddf2b4ff5d078d6c83aa/images/QABot.png -------------------------------------------------------------------------------- /images/txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /ppt/tt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ppt/问答机器人框架.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ongdb-contrib/graph-qabot-demo/46eae129c5f4242d559eddf2b4ff5d078d6c83aa/ppt/问答机器人框架.pptx -------------------------------------------------------------------------------- /python/fetch_managers.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import pandas as pd 3 | import system_constant as sys_cnt 4 | import os 5 | import time 6 | 7 | # 初始化pro接口 8 | pro = ts.pro_api(sys_cnt.tushare_token()) 9 | 10 | 11 | # 获取股票相关信息 12 | def run(): 13 | # 查询当前所有正常上市交易的股票列表 14 | data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') 15 | tss = data['ts_code'] 16 | result = [] 17 | ct = 0 18 | bk_ct = 100000 19 | for code in tss: 20 | time.sleep(0.5) 21 | # 获取单个公司高管全部数据 22 | df = pro.stk_managers(ts_code=code) 23 | result.append(df) 24 | ct += 1 25 | if ct > bk_ct: 26 | break 27 | df_merge = pd.concat(result) 28 | print(df_merge) 29 | df_merge.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\managers.csv', 30 | encoding='GBK', 31 | columns=['ts_code', 'ann_date', 'name', 'gender', 'lev', 'title', 'edu', 'national', 'birthday', 32 | 'begin_date', 'end_date'], 33 | index=False) 34 | 35 | 36 | if __name__ == '__main__': 37 | run() 38 | -------------------------------------------------------------------------------- /python/fetch_stocks.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import system_constant as sys_cnt 3 | import os 4 | 5 | # 初始化pro接口 6 | pro = ts.pro_api(sys_cnt.tushare_token()) 7 | 8 | 9 | # 获取股票相关信息 10 | def run(): 11 | # 查询当前所有正常上市交易的股票列表 12 | data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') 13 | data.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\stocks.csv', 14 | encoding='GBK', 15 | columns=['ts_code', 'symbol', 'name', 'area', 'industry', 'list_date'], 16 | index=False) 17 | 18 | 19 | if __name__ == '__main__': 20 | run() 21 | -------------------------------------------------------------------------------- /python/system_constant.py: -------------------------------------------------------------------------------- 1 | # 在https://tushare.pro/网站注册账号并获取token 2 | def tushare_token(): 3 | return 'YOUR TOKEN' 4 | --------------------------------------------------------------------------------