├── README.md
├── csv
    ├── managers.csv
    └── stocks.csv
├── cypher
    └── build.cypher
├── db
    └── ongdb-enterprise-1.0.4-qabot-demo.7z
├── images
    ├── QABot.png
    └── txt
├── license
├── ppt
    ├── tt
    └── 问答机器人框架.pptx
└── python
    ├── fetch_managers.py
    ├── fetch_stocks.py
    └── system_constant.py


/README.md:
--------------------------------------------------------------------------------
  1 | # Graph QABot Demo
  2 | 
  3 | ## 一、软件安装
  4 | ### 1.1 安装ONgDB
  5 | - 下载ONgDB，下载社区版和企业版都可以
  6 | ```url
  7 | https://github.com/graphfoundation/ongdb/releases/tag/1.0.4
  8 | ```
  9 | 
 10 | ### 1.2 安装APOC和OLAB组件
 11 | >将组件下载后放置在<ONGDB_HOME>/plugins文件夹下面。
 12 | 
 13 | - 下载APOC
 14 | ```url
 15 | # 下载all版本
 16 | https://github.com/graphfoundation/ongdb-apoc/releases/tag/3.4.0.10
 17 | ```
 18 | 
 19 | - 下载OLAB
 20 | ```url
 21 | https://github.com/ongdb-contrib/ongdb-lab-apoc/releases/tag/1.0.0
 22 | ```
 23 | 
 24 | ### 1.3 修改配置并启动ONgDB
 25 | - 修改配置文件
 26 | ```shell
 27 | # conf/ongdb.conf
 28 | dbms.security.procedures.unrestricted=apoc.*,olab.*,custom.*
 29 | apoc.import.file.enabled=true
 30 | ```
 31 | 
 32 | - 启动图数据库节点
 33 | ```shell
 34 | # Windows启动ONgDB
 35 | bin\ongdb.bat console
 36 | ```
 37 | 
 38 | ## 二、获取数据
 39 | ### 2.1 沪深股票
 40 | ```python
 41 | import tushare as ts
 42 | import system_constant as sys_cnt
 43 | import os
 44 | 
 45 | # 初始化pro接口
 46 | pro = ts.pro_api(sys_cnt.tushare_token())
 47 | 
 48 | 
 49 | # 获取股票相关信息
 50 | def run():
 51 |     # 查询当前所有正常上市交易的股票列表
 52 |     data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
 53 |     data.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\stocks.csv',
 54 |               encoding='GBK',
 55 |               columns=['ts_code', 'symbol', 'name', 'area', 'industry', 'list_date'],
 56 |               index=False)
 57 | 
 58 | 
 59 | if __name__ == '__main__':
 60 |     run()
 61 | ```
 62 | 
 63 | ### 2.2 上市公司管理层
 64 | ```python
 65 | import tushare as ts
 66 | import pandas as pd
 67 | import system_constant as sys_cnt
 68 | import os
 69 | import time
 70 | 
 71 | # 初始化pro接口
 72 | pro = ts.pro_api(sys_cnt.tushare_token())
 73 | 
 74 | 
 75 | # 获取股票相关信息
 76 | def run():
 77 |     # 查询当前所有正常上市交易的股票列表
 78 |     data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
 79 |     tss = data['ts_code']
 80 |     result = []
 81 |     ct = 0
 82 |     bk_ct = 100000
 83 |     for code in tss:
 84 |         time.sleep(0.5)
 85 |         # 获取单个公司高管全部数据
 86 |         df = pro.stk_managers(ts_code=code)
 87 |         result.append(df)
 88 |         ct += 1
 89 |         if ct > bk_ct:
 90 |             break
 91 |     df_merge = pd.concat(result)
 92 |     print(df_merge)
 93 |     df_merge.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\managers.csv',
 94 |                     encoding='GBK',
 95 |                     columns=['ts_code', 'ann_date', 'name', 'gender', 'lev', 'title', 'edu', 'national', 'birthday',
 96 |                              'begin_date', 'end_date'],
 97 |                     index=False)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     run()
102 | ```
103 | 
104 | ## 三、设计图数据模型
105 | >将下面的JSON格式数据导入，[Graphene图数据建模工具](https://ongdb-contrib.github.io/graphene/app/)。
106 | 
107 | ```json
108 | {"data":{"nodes":[{"x":613,"y":284,"color":"#E52B50","label":"股票代码","properties":[{"id":"a2aeffb2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"afb48f90","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a1b949a0","isSelected":false,"isNode":true},{"x":678,"y":444,"color":"#AB274F","label":"股票","properties":[{"id":"a4a8beb4","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a9bf77ad","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a5844bbd","isSelected":false,"isNode":true},{"x":413,"y":368,"color":"#3B7A57","label":"股票名称","properties":[{"id":"a08f768e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a09d0ab9","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a78c159d","isSelected":false,"isNode":true},{"x":426,"y":475,"color":"#FF7E00","label":"地域","properties":[{"id":"a59d20a1","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a28d19bb","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a49f8ebc","isSelected":false,"isNode":true},{"x":481,"y":557,"color":"#FF033E","label":"行业","properties":[{"id":"aea1c5b2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"aaa4b082","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a381f688","isSelected":false,"isNode":true},{"x":714,"y":569,"color":"#AF002A","label":"上市日期","properties":[{"id":"a284f1aa","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a0b444a4","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"ac81ffb4","isSelected":false,"isNode":true},{"x":834,"y":380,"color":"#E32636","label":"高管","properties":[{"id":"a4b02384","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"a2ade2ad","isSelected":false,"isNode":true},{"x":890,"y":266,"color":"#C46210","label":"性别","properties":[{"id":"ad9cab82","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a2aa0f86","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"af9a0f97","isSelected":false,"isNode":true},{"x":977,"y":447,"color":"#E52B50","label":"学历","properties":[{"id":"a090f498","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a8a0e1b4","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"ad931890","isSelected":false,"isNode":true},{"x":1001,"y":180,"color":"#AB274F","label":"性别_别名","properties":[{"id":"a3980888","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"aa8b5199","isSelected":false,"isNode":true}],"edges":[{"startNodeId":"a5844bbd","endNodeId":"a1b949a0","middlePointOffset":[0,0],"properties":[{"id":"a29766a0","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"股票代码","id":"a5934a81","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"a78c159d","middlePointOffset":[0,0],"properties":[{"id":"a196c08e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"股票名称","id":"acaecfb9","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"a49f8ebc","middlePointOffset":[0,0],"properties":[{"id":"a88ae1a4","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"地域","id":"aa94de98","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"a381f688","middlePointOffset":[0,0],"properties":[{"id":"a493c4a1","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"所属行业","id":"aead0aa2","isSelected":false,"isEdge":true},{"startNodeId":"a5844bbd","endNodeId":"ac81ffb4","middlePointOffset":[0,0],"properties":[{"id":"aebda5bd","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"上市日期","id":"a19f3281","isSelected":false,"isEdge":true},{"startNodeId":"a2ade2ad","endNodeId":"a5844bbd","middlePointOffset":[0,0],"properties":[{"id":"adaf73aa","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"任职于","id":"a3bcec8c","isSelected":false,"isEdge":true},{"startNodeId":"a2ade2ad","endNodeId":"af9a0f97","middlePointOffset":[0,0],"properties":[{"id":"aca28680","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"性别","id":"ae973eb2","isSelected":false,"isEdge":true},{"startNodeId":"a2ade2ad","endNodeId":"ad931890","middlePointOffset":[0,0],"properties":[{"id":"a7af3fb0","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"学历","id":"aca71396","isSelected":false,"isEdge":true},{"startNodeId":"af9a0f97","endNodeId":"aa8b5199","middlePointOffset":[5.5,-6],"properties":[{"id":"a692c7a2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"别名","id":"a9a6bb89","isSelected":false,"isEdge":true}]}}
109 | ```
110 | 
111 | ## 四、构建图数据
112 | ### 4.1 设计索引和约束
113 | ```cypher
114 | CREATE CONSTRAINT ON (n:股票代码) ASSERT (n.value) IS NODE KEY;
115 | CREATE CONSTRAINT ON (n:股票) ASSERT (n.value) IS NODE KEY;
116 | CREATE CONSTRAINT ON (n:股票名称) ASSERT (n.value) IS NODE KEY;
117 | CREATE CONSTRAINT ON (n:地域) ASSERT (n.value) IS NODE KEY;
118 | CREATE CONSTRAINT ON (n:行业) ASSERT (n.value) IS NODE KEY;
119 | CREATE CONSTRAINT ON (n:上市日期) ASSERT (n.value) IS NODE KEY;
120 | CREATE CONSTRAINT ON (n:高管) ASSERT (n.md5) IS NODE KEY;
121 | CREATE INDEX ON :高管(value);
122 | CREATE CONSTRAINT ON (n:性别) ASSERT (n.value) IS NODE KEY;
123 | CREATE CONSTRAINT ON (n:学历) ASSERT (n.value) IS NODE KEY;
124 | CREATE CONSTRAINT ON (n:性别_别名) ASSERT (n.value) IS NODE KEY;
125 | ```
126 | 
127 | ### 4.2 构建数据
128 | >将生成的CSV数据，放置在<ONGDB_HOME>/import目录下，CSV转为UTF-8-BOM格式（可以使用Notepad++转格式）。
129 | 
130 | ```cypher
131 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
132 | WITH row.ts_code AS fval,row.symbol AS tval
133 | WHERE tval IS NOT NULL AND tval<>''
134 | MERGE (f:股票 {value:fval})
135 | MERGE (t:股票代码 {value:tval})
136 | MERGE (f)-[:股票代码]->(t);
137 | 
138 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
139 | WITH row.ts_code AS fval,row.name AS tval
140 | WHERE tval IS NOT NULL AND tval<>''
141 | MERGE (f:股票 {value:fval})
142 | MERGE (t:股票名称 {value:tval})
143 | MERGE (f)-[:股票名称]->(t);
144 | 
145 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
146 | WITH row.ts_code AS fval,row.area AS tval
147 | WHERE tval IS NOT NULL AND tval<>''
148 | MERGE (f:股票 {value:fval})
149 | MERGE (t:地域 {value:tval})
150 | MERGE (f)-[:地域]->(t);
151 | 
152 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
153 | WITH row.ts_code AS fval,row.industry AS tval
154 | WHERE tval IS NOT NULL AND tval<>''
155 | MERGE (f:股票 {value:fval})
156 | MERGE (t:行业 {value:tval})
157 | MERGE (f)-[:所属行业]->(t);
158 | 
159 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
160 | WITH row.ts_code AS fval,row.list_date AS tval
161 | WHERE tval IS NOT NULL AND tval<>''
162 | MERGE (f:股票 {value:fval})
163 | MERGE (t:上市日期 {value:TOINTEGER(tval)})
164 | MERGE (f)-[:上市日期]->(t);
165 | 
166 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
167 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.ts_code AS tval
168 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
169 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
170 | MERGE (t:股票 {value:tval})
171 | MERGE (f)-[:任职于]->(t);
172 | 
173 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
174 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.gender AS tval
175 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
176 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
177 | MERGE (t:性别 {value:tval})
178 | MERGE (f)-[:性别]->(t);
179 | 
180 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
181 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.edu AS tval
182 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
183 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
184 | MERGE (t:学历 {value:tval})
185 | MERGE (f)-[:学历]->(t);
186 | 
187 | WITH ['女性','女'] AS list
188 | UNWIND list AS wd
189 | WITH wd
190 | MATCH (n:性别) WHERE n.value='F' WITH n,wd
191 | MERGE (t:性别_别名 {value:wd})
192 | MERGE (n)-[:别名]->(t);
193 | 
194 | WITH ['男性','男'] AS list
195 | UNWIND list AS wd
196 | WITH wd
197 | MATCH (n:性别) WHERE n.value='M' WITH n,wd
198 | MERGE (t:性别_别名 {value:wd})
199 | MERGE (n)-[:别名]->(t);
200 | ```
201 | 
202 | ## 五、问答组件配置
203 | ### 5.1 配置图数据模型
204 | ```sql
205 | CALL apoc.custom.asFunction(
206 | 'inference.search.qabot',
207 | 'RETURN \'{"graph":{"nodes":[{"properties_filter":[],"id":"1","labels":["股票代码"]},{"properties_filter":[],"id":"2","labels":["股票"]},{"properties_filter":[],"id":"3","labels":["股票名称"]},{"properties_filter":[],"id":"4","labels":["地域"]},{"properties_filter":[],"id":"5","labels":["行业"]},{"properties_filter":[],"id":"6","labels":["上市日期"]},{"properties_filter":[],"id":"7","labels":["高管"]},{"properties_filter":[],"id":"8","labels":["性别"]},{"properties_filter":[],"id":"9","labels":["学历"]},{"properties_filter":[],"id":"10","labels":["性别_别名"]}],"relationships":[{"startNode":"2","properties_filter":[],"id":"1","type":"股票代码","endNode":"1"},{"startNode":"2","properties_filter":[],"id":"2","type":"股票名称","endNode":"3"},{"startNode":"2","properties_filter":[],"id":"3","type":"地域","endNode":"4"},{"startNode":"2","properties_filter":[],"id":"4","type":"所属行业","endNode":"5"},{"startNode":"2","properties_filter":[],"id":"5","type":"上市日期","endNode":"6"},{"startNode":"7","properties_filter":[],"id":"6","type":"任职于","endNode":"2"},{"startNode":"7","properties_filter":[],"id":"7","type":"性别","endNode":"8"},{"startNode":"7","properties_filter":[],"id":"8","type":"学历","endNode":"9"},{"startNode":"8","properties_filter":[],"id":"9","type":"别名","endNode":"10"}]}}\' AS graphDataSchema',
208 | 'STRING',
209 | NULL,
210 | false,
211 | '搜索时图数据扩展模式(schema)的动态获取'
212 | );
213 | ```
214 | 
215 | ### 5.2 配置实体权重规则
216 | ```sql
217 | CALL apoc.custom.asFunction(
218 | 'inference.weight.qabot',
219 | 'RETURN \'{"LABEL":{"股票":12,"高管":11}}\' AS weight',
220 | 'STRING',
221 | NULL,
222 | false,
223 | '本体权重'
224 | );
225 | ```
226 | 
227 | ### 5.3 配置实体搜索规则
228 | ```sql
229 | CALL apoc.custom.asFunction(
230 | 'inference.match.qabot',
231 | 'RETURN \'{"股票名称":"value","地域":"value","上市日期":"value","性别_别名":"value","高管":"value","学历":"value","行业":"value","股票代码":"value"}\' AS nodeHitsRules',
232 | 'STRING',
233 | NULL,
234 | false,
235 | '实体匹配规则'
236 | );
237 | ```
238 | 
239 | ### 5.4 配置意图匹配规则
240 | >`CONTAINS`模式、`EQUALS`模式、`OTHER`词列表求交集模式
241 | ```sql
242 | CALL apoc.custom.asFunction(
243 | 'inference.intended.qabot',
244 | 'RETURN \'[{"label":"上市日期","return_var_alias":"n1","sort":1,"list":["是什么时候","什么时候"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"学历","return_var_alias":"n2","sort":2,"list":["什么学历"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"高管","return_var_alias":"n3","sort":3,"list":["高管有多少位","高管都有哪些","高管有多少个"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票","return_var_alias":"n4","sort":4,"list":["哪些上市公司","有多少家上市公司","哪个公司","有多少家","公司有哪些","公司有多少家","股票有哪些","股票有多少支","股票有多少个","股票代码？","股票？"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"行业","return_var_alias":"n5","sort":5,"list":["什么行业","同一个行业嘛"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票名称","return_var_alias":"n6","sort":6,"list":["股票名称？"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n7","sort":7,"list":["性别"],"parse_mode":"OTHER","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n8","sort":8,"list":["查询性别"],"parse_mode":"EQUALS","order_by_field":null,"order_by_type":null}]\' AS intendedIntent',
245 | 'STRING',
246 | NULL,
247 | false,
248 | '预期意图'
249 | );
250 | ```
251 | 
252 | ### 5.5 配置时间页码处理等规则
253 | ```sql
254 | CALL apoc.custom.asFunction(
255 | 'inference.parseadd.qabot',
256 | 'WITH $time AS time,$page AS page,$entityRecognitionHit AS entityRecognitionHit WITH entityRecognitionHit,REDUCE(l=\'\',e IN time.list | l+\'({var}.value>=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[0],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\' AND {var}.value<=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[1],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\') OR \') AS timeFilter, REDUCE(l=\'\',e IN page.list | l+\'{var}.value\'+e[0]+e[1]+\' AND \') AS pageFilter CALL apoc.case([size(timeFilter)>4,\'RETURN SUBSTRING($timeFilter,0,size($timeFilter)-4) AS timeFilter\'],\'RETURN "" AS timeFilter\',{timeFilter:timeFilter}) YIELD value WITH entityRecognitionHit,value.timeFilter AS timeFilter,pageFilter CALL apoc.case([size(pageFilter)>5,\'RETURN SUBSTRING($pageFilter,0,size($pageFilter)-5) AS pageFilter\'],\'RETURN "" AS pageFilter\',{pageFilter:pageFilter}) YIELD value WITH entityRecognitionHit,timeFilter,value.pageFilter AS pageFilter WITH entityRecognitionHit,timeFilter,pageFilter CALL apoc.case([timeFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'上市日期\\\',[{category:\\\'node\\\',labels:[\\\'上市日期\\\'],properties_filter:[{value:$timeFilter}]}]]]) AS time\'],\'RETURN {} AS time\',{timeFilter:timeFilter}) YIELD value WITH value.time AS time,pageFilter,entityRecognitionHit CALL apoc.case([pageFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'文章页数\\\',[{category:\\\'node\\\',labels:[\\\'文章页数\\\'],properties_filter:[{value:$pageFilter}]}]]]) AS page\'],\'RETURN {} AS page\',{pageFilter:pageFilter}) YIELD value WITH value.page AS page,time,entityRecognitionHit RETURN apoc.map.setKey({},\'entities\',apoc.map.merge(apoc.map.merge(entityRecognitionHit.entities,time),page)) AS entityRecognitionHit',
257 | 'MAP',
258 | [['entityRecognitionHit','MAP'],['time','MAP'],['page','MAP']],
259 | false,
260 | '问答解析的内容增加到entityRecognitionHit'
261 | );
262 | ```
263 | 
264 | ### 5.6 配置算子规则
265 | ```sql
266 | CALL apoc.custom.asFunction(
267 | 'inference.operators.qabot',
268 | 'RETURN \'[{"keywords":["最多","最大"],"operator":{"sort": 1,"agg_operator_field": "value","agg_operator": "MAX","agg_operator_type": "NODE"}},{"keywords":["最小","最少"],"operator":{"sort": 2,"agg_operator_field": "value","agg_operator": "MIN","agg_operator_type": "NODE"}},{"keywords":["平均"],"operator":{"sort": 3,"agg_operator_field": "value","agg_operator": "AVG","agg_operator_type": "NODE"}},{"keywords":["有多少","有多少只","有多少支","多少只","多少支","多少","一共"],"operator":{"sort": 5,"agg_operator_field": "value","agg_operator": "COUNT","agg_operator_type": "NODE"}}]\' AS weight',
269 | 'STRING',
270 | NULL,
271 | false,
272 | '配置算子规则'
273 | );
274 | ```
275 | 
276 | ### 5.7 算子解析模块配置
277 | ```sql
278 | CALL apoc.custom.asFunction(
279 | 'inference.operators.parse',
280 | 'WITH $query AS query,custom.inference.operators.qabot() AS list WITH query,apoc.convert.fromJsonList(list) AS list UNWIND list AS map WITH map,REDUCE(l=[],em IN apoc.coll.sortMaps(REDUCE(l=[],e IN map.keywords | l+{wd:e,len:LENGTH(e)}),\'len\')| l+em.wd) AS keywords,query UNWIND keywords AS keyword WITH map,keyword,query CALL apoc.case([query CONTAINS keyword,\'RETURN $keyword AS hit\'],\'RETURN \\\' \\\' AS hit\',{keyword:keyword}) YIELD value WITH map,keyword,query,REPLACE(query,value.hit,\' \') AS trim_query,value.hit AS hit WITH query,trim_query,map.operator AS operator ORDER BY map.operator.sort ASC WITH COLLECT({query:query,trim_query:trim_query,operator:operator}) AS list WITH FILTER(e IN list WHERE e.query<>e.trim_query)[0] AS map,list WITH map,list[0] AS smap CALL apoc.case([map IS NOT NULL,\'RETURN $map AS map\'],\'RETURN $smap AS map\',{map:map,smap:smap}) YIELD value WITH value.map AS map CALL apoc.case([map.trim_query=map.query,\'RETURN {} AS operator\'],\'RETURN $operator AS operator\',{trim_query:map.trim_query,operator:map.operator}) YIELD value RETURN map.query AS query,map.trim_query AS trim_query,value.operator AS operator',
281 | 'MAP',
282 | [['query','STRING']],
283 | false,
284 | '算子解析'
285 | );
286 | ```
287 | 
288 | ## 六、配置词库
289 | ### 6.1 安装词库依赖
290 | >下载下面的文件夹，放置在<ONGDB_HOME>根目录下，并设置Python的执行路径和词典位置。
291 | ```url
292 | https://github.com/ongdb-contrib/ongdb-lab-apoc/tree/1.0/nlp
293 | ```
294 | 
295 | ```shell
296 | # nlp/nlp.properties
297 | defined.dic.path=nlp/dic/dynamic/dynamic.dic
298 | python.commands.path=E:\\software\\anaconda3\\python.exe
299 | ```
300 | 
301 | ### 6.2 配置词
302 | ```cypher
303 | //dic,dynamic,dynamic.dic
304 | //意图配置相关词
305 | WITH custom.inference.intended.qabot() AS str
306 | WITH apoc.convert.fromJsonList(str) as list
307 | UNWIND list AS map
308 | WITH map.label AS label,map.list as list,map
309 | WHERE UPPER(map.parse_mode)<>'CONTAINS' AND UPPER(map.parse_mode)<>'EQUALS'
310 | WITH apoc.coll.union([label],list) as list
311 | UNWIND list AS wd
312 | WITH collect(DISTINCT wd) AS list
313 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words;
314 | 
315 | //图数据模型相关词
316 | WITH custom.inference.search.qabot() AS str
317 | WITH apoc.convert.fromJsonMap(str).graph AS graph
318 | WITH graph.relationships AS relationships,graph.nodes AS nodes
319 | WITH REDUCE(l=[],e IN relationships | l+e.type) AS relationships,REDUCE(l=[],e IN nodes | l+e.labels[0]) AS nodes
320 | WITH apoc.coll.union(relationships,nodes) AS list
321 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words;
322 | 
323 | //实体匹配规则相关词
324 | WITH custom.inference.match.qabot() AS str
325 | WITH olab.map.keys(apoc.convert.fromJsonMap(str)) AS list
326 | UNWIND list AS lb
327 | WITH lb
328 | WHERE lb<>'性别' AND lb<>'上市日期'
329 | CALL apoc.cypher.run('MATCH (n:'+lb+') WHERE NOT n.value CONTAINS \' \' RETURN COLLECT(DISTINCT n.value) AS list',{}) YIELD value
330 | WITH value.list AS list
331 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words;
332 | 
333 | RETURN olab.nlp.userdic.add('dynamic.dic',['测试','胡永乐','性别'],true,'UTF-8') AS words;
334 | ```
335 | 
336 | ### 6.3 词库热更新
337 | ```cypher
338 | RETURN olab.nlp.userdic.refresh();
339 | ```
340 | 
341 | ## 七、安装问答模块存储过程
342 | ### 7.1 问答结果
343 | #### 7.1.1 意图字符匹配和列表求交集【非分类模型解意图】
344 | ```sql
345 | // 1.搜索语句
346 | WITH LOWER('火力发电行业博士学历的男性高管有多少位？') AS query
347 | // 2.个性化配置：图数据模型/本体权重/实体匹配规则/预期意图
348 | WITH query,
349 |      custom.inference.search.qabot() AS graphDataSchema,
350 |      custom.inference.weight.qabot() AS weight,
351 |      custom.inference.match.qabot() AS nodeHitsRules,
352 |      //预期意图定义中支持设置一个排序参数
353 |      custom.inference.intended.qabot() AS intendedIntent,
354 |      custom.inference.operators.parse(query) AS oper
355 | // 3.个性化语句解析：解析时间/解析页面
356 | WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,
357 |      olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page
358 | // 4.从查询语句中过滤时间词
359 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
360 |      olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query
361 | // 5.过滤时间词后进行分词
362 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
363 |      olab.hanlp.standard.segment(query) AS words
364 | // 6.分词后结果只保留名词且不能是纯数字
365 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
366 |      EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words
367 | // 7.实体识别
368 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,
369 |      olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits
370 | // 8.生成权重搜索队列
371 | WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits
372 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value
373 | WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1
374 | // 9.将个性化语句解析结果增加到entityRecognitionHit
375 | WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit
376 | // 10.意图识别
377 | WITH oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,
378 |      apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema
379 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes)
380 | // 11.图上下文语义解析
381 | WITH operator,graphDataSchema,intentSchema,intendedIntent,
382 |      olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema
383 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】
384 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher
385 | WITH REPLACE(cypher,'RETURN n','RETURN DISTINCT n') AS cypher
386 | // 13.执行查询【value返回为一个MAP，MAP的KEY SIZE小于等于解析后返回意图类别个数】
387 | CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10
388 | WITH olab.map.keys(value) AS keys,value
389 | UNWIND keys AS key
390 | WITH apoc.map.get(value,key) AS n
391 | CALL apoc.case([apoc.coll.contains(['NODE'],apoc.meta.cypher.type(n)),'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result'],'WITH $n AS n RETURN TOSTRING(n) AS result',{n:n,sml:'：'}) YIELD value
392 | RETURN value.result AS result;
393 | ```
394 | 
395 | ```sql
396 | CALL apoc.custom.asProcedure(
397 | 'qabot',
398 | 'WITH LOWER($ask) AS query WITH query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent,  custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,  apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent,  olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH olab.map.keys(value) AS keys,value UNWIND keys AS key WITH apoc.map.get(value,key) AS n CALL apoc.case([apoc.coll.contains([\'NODE\'],apoc.meta.cypher.type(n)),\'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result\'],\'WITH $n AS n RETURN TOSTRING(n) AS result\',{n:n,sml:\'：\'}) YIELD value RETURN value.result AS result;',
399 | 'READ',
400 | [['result','STRING']],
401 | [['ask','STRING']],
402 | '问答机器人'
403 | );
404 | ```
405 | 
406 | ```sql
407 | CALL custom.qabot('火力发电行业博士学历的男性高管有多少位？') YIELD result RETURN result;
408 | ```
409 | 
410 | #### 7.1.2 带分类模型的查询方式【分类模型解意图】
411 | ```json
412 | 问题：胡永乐是男性还是女性？
413 | 问题类别：查询性别
414 | ```
415 | ```sql
416 | // 1.搜索语句
417 | WITH LOWER('胡永乐是男性还是女性？') AS query,'查询性别' AS classifier_type
418 | // 2.个性化配置：图数据模型/本体权重/实体匹配规则/预期意图
419 | WITH classifier_type,query,
420 |      custom.inference.search.qabot() AS graphDataSchema,
421 |      custom.inference.weight.qabot() AS weight,
422 |      custom.inference.match.qabot() AS nodeHitsRules,
423 |      //预期意图定义中支持设置一个排序参数
424 |      custom.inference.intended.qabot() AS intendedIntent,
425 |      custom.inference.operators.parse(query) AS oper
426 | // 3.个性化语句解析：解析时间/解析页面
427 | WITH classifier_type,oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,
428 |      olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page
429 | // 4.从查询语句中过滤时间词
430 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
431 |      olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query
432 | // 5.过滤时间词后进行分词
433 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
434 |      olab.hanlp.standard.segment(query) AS words
435 | // 6.分词后结果只保留名词且不能是纯数字
436 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
437 |      EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words
438 | // 7.实体识别
439 | WITH classifier_type,oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,
440 |      olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits
441 | // 8.生成权重搜索队列
442 | WITH classifier_type,oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits
443 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value
444 | WITH classifier_type,oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1
445 | // 9.将个性化语句解析结果增加到entityRecognitionHit
446 | WITH classifier_type,oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit
447 | // 10.意图识别
448 | WITH classifier_type,oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,
449 |      apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent,classifier_type)) AS intentSchema
450 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes)
451 | // 11.图上下文语义解析
452 | WITH operator,graphDataSchema,intentSchema,intendedIntent,
453 |      olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema
454 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】
455 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher
456 | WITH REPLACE(cypher,'RETURN n','RETURN DISTINCT n') AS cypher
457 | // 13.执行查询【value返回为一个MAP，MAP的KEY SIZE小于等于解析后返回意图类别个数】
458 | CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10
459 | WITH olab.map.keys(value) AS keys,value
460 | UNWIND keys AS key
461 | WITH apoc.map.get(value,key) AS n
462 | CALL apoc.case([apoc.coll.contains(['NODE'],apoc.meta.cypher.type(n)),'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result'],'WITH $n AS n RETURN TOSTRING(n) AS result',{n:n,sml:'：'}) YIELD value
463 | RETURN value.result AS result;
464 | ```
465 | 
466 | ### 7.2 问答结果Cypher
467 | ```sql
468 | // 1.搜索语句
469 | WITH LOWER('火力发电行业博士学历的男性高管有多少位？') AS query
470 | // 2.个性化配置：图数据模型/本体权重/实体匹配规则/预期意图
471 | WITH query,
472 |      custom.inference.search.qabot() AS graphDataSchema,
473 |      custom.inference.weight.qabot() AS weight,
474 |      custom.inference.match.qabot() AS nodeHitsRules,
475 |      //预期意图定义中支持设置一个排序参数
476 |      custom.inference.intended.qabot() AS intendedIntent,
477 |      custom.inference.operators.parse(query) AS oper
478 | // 3.个性化语句解析：解析时间/解析页面
479 | WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,
480 |      olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page
481 | // 4.从查询语句中过滤时间词
482 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
483 |      olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query
484 | // 5.过滤时间词后进行分词
485 | WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
486 |      olab.hanlp.standard.segment(query) AS words
487 | // 6.分词后结果只保留名词且不能是纯数字
488 | WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
489 |      EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words
490 | // 7.实体识别
491 | WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,
492 |      olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits
493 | // 8.生成权重搜索队列
494 | WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits
495 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value
496 | WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1
497 | // 9.将个性化语句解析结果增加到entityRecognitionHit
498 | WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit
499 | // 10.意图识别
500 | WITH operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,
501 |      apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema
502 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes)
503 | // 11.图上下文语义解析
504 | WITH operator,graphDataSchema,intentSchema,intendedIntent,
505 |      olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema
506 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】
507 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher
508 | WITH REPLACE(cypher,'RETURN n','RETURN DISTINCT n') AS cypher
509 | RETURN cypher;
510 | ```
511 | 
512 | ```sql
513 | CALL apoc.custom.asProcedure(
514 | 'qabot.cypher',
515 | 'WITH LOWER($ask) AS query WITH query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent,  custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,  apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent,  olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher RETURN cypher;',
516 | 'READ',
517 | [['cypher','STRING']],
518 | [['ask','STRING']],
519 | '问答机器人：生成查询语句'
520 | );
521 | ```
522 | 
523 | ```sql
524 | CALL custom.qabot.cypher('火力发电行业博士学历的男性高管有多少位？') YIELD cypher RETURN cypher;
525 | ```
526 | 
527 | ### 7.3 问答推理图谱
528 | ```sql
529 | // 1.搜索语句
530 | WITH LOWER('火力发电行业博士学历的男性高管有多少位？') AS query
531 | // 2.个性化配置：图数据模型/本体权重/实体匹配规则/预期意图
532 | WITH query,
533 |      custom.inference.search.qabot() AS graphDataSchema,
534 |      custom.inference.weight.qabot() AS weight,
535 |      custom.inference.match.qabot() AS nodeHitsRules,
536 |      //预期意图定义中支持设置一个排序参数
537 |      custom.inference.intended.qabot() AS intendedIntent
538 | // 3.个性化语句解析：解析时间/解析页面
539 | WITH query AS oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,
540 |      olab.nlp.timeparser(query) AS time,olab.nlp.pagenum.parse(query) AS page
541 | // 4.从查询语句中过滤时间词
542 | WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
543 |      olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query
544 | // 5.过滤时间词后进行分词
545 | WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
546 |      olab.hanlp.standard.segment(query) AS words
547 | // 6.分词后结果只保留名词且不能是纯数字
548 | WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
549 |      EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words
550 | // 7.实体识别
551 | WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,
552 |      olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits
553 | // 8.生成权重搜索队列
554 | WITH oper,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits
555 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value
556 | WITH oper,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1
557 | // 9.将个性化语句解析结果增加到entityRecognitionHit
558 | WITH oper,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit
559 | // 10.意图识别
560 | WITH graphDataSchema,intendedIntent,words,entityRecognitionHit,
561 |      apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper,words,intendedIntent)) AS intentSchema
562 | WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes)
563 | // 11.图上下文语义解析
564 | WITH graphDataSchema,intentSchema,intendedIntent,
565 |      olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema
566 | // 12.查询转换【不设置skip参数,每个查询抽取100条结果】
567 | WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),'',-1,10,{}) AS cypher
568 | // 13.执行查询【value返回为一个MAP，MAP的KEY SIZE小于等于解析后返回意图类别个数】
569 | CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10
570 | WITH value.graph AS graph
571 | UNWIND graph AS path
572 | RETURN path;
573 | ```
574 | 
575 | ```sql
576 | CALL apoc.custom.asProcedure(
577 | 'qabot.graph',
578 | 'WITH LOWER($ask) AS query WITH query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent WITH query AS oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(query) AS time,olab.nlp.pagenum.parse(query) AS page WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH graphDataSchema,intendedIntent,words,entityRecognitionHit,  apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH graphDataSchema,intentSchema,intendedIntent,  olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),\'\',-1,10,{}) AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH value.graph AS graph UNWIND graph AS path RETURN path;',
579 | 'READ',
580 | [['path','PATH']],
581 | [['ask','STRING']],
582 | '问答机器人：问答推理图谱'
583 | );
584 | ```
585 | 
586 | ```sql
587 | CALL custom.qabot.graph('火力发电行业博士学历的男性高管有多少位？') YIELD path RETURN path;
588 | ```
589 | 
590 | ### 7.4 生成推荐问题列表
591 | ```cypher
592 | // 1.搜索语句
593 | WITH LOWER('2023年三月六日上市的股票代码？') AS query
594 | // 2.个性化配置：图数据模型/本体权重/实体匹配规则/预期意图
595 | WITH query,query AS raw_query,
596 |      custom.inference.search.qabot() AS graphDataSchema,
597 |      custom.inference.weight.qabot() AS weight,
598 |      custom.inference.match.qabot() AS nodeHitsRules,
599 |      //预期意图定义中支持设置一个排序参数
600 |      custom.inference.intended.qabot() AS intendedIntent,
601 |      custom.inference.operators.parse(query) AS oper
602 | // 3.个性化语句解析：解析时间/解析页面
603 | WITH raw_query,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,
604 |      olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page
605 | // 4.从查询语句中过滤时间词
606 | WITH raw_query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
607 |      olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:' '})) AS query
608 | // 5.过滤时间词后进行分词
609 | WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
610 |      olab.hanlp.standard.segment(query) AS words
611 | // 6.分词后结果只保留名词且不能是纯数字
612 | WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,
613 |      EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH 'n' AND olab.string.matchCnEn(mp.word)<>'') OR mp.nature='uw')| m.word) AS words
614 | // 7.实体识别
615 | WITH raw_query,query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,
616 |      olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,'EXACT',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits
617 | // 8.生成权重搜索队列
618 | WITH raw_query,query,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits
619 | CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value
620 | WITH raw_query,query,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit
621 | WITH raw_query,entityRecognitionHit.entities AS map
622 | WITH raw_query,olab.map.keys(map) AS keys,map
623 | WITH raw_query,REDUCE(l=[],key IN keys | l+{raw:key,rep:FILTER(e IN apoc.map.get(map,key,NULL,FALSE) WHERE SIZE(key)>2)[0].labels[0]}) AS reps
624 | WITH raw_query,FILTER(e IN reps WHERE e.rep IS NOT NULL) AS reps
625 | WITH raw_query,olab.replace(raw_query,reps) AS re_query
626 | WITH raw_query,re_query,olab.editDistance(raw_query,re_query) AS score WHERE score<1
627 | RETURN raw_query,re_query,score ORDER BY score DESC
628 | ```
629 | 
630 | ```cypher
631 | CALL apoc.custom.asProcedure(
632 |  'qabot.recommend_list',
633 |  'WITH LOWER($qa) AS query WITH query,query AS raw_query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent,  custom.inference.operators.parse(query) AS oper WITH raw_query,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH raw_query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH raw_query,query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.4}) AS entityRecognitionHits WITH raw_query,query,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH raw_query,query,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit WITH raw_query,entityRecognitionHit.entities AS map WITH raw_query,olab.map.keys(map) AS keys,map WITH raw_query,REDUCE(l=[],key IN keys | l+{raw:key,rep:FILTER(e IN apoc.map.get(map,key,NULL,FALSE) WHERE SIZE(key)>2)[0].labels[0]}) AS reps WITH raw_query,FILTER(e IN reps WHERE e.rep IS NOT NULL) AS reps WITH raw_query,olab.replace(raw_query,reps) AS re_query WITH raw_query,re_query,olab.editDistance(raw_query,re_query) AS score WHERE score<1 AND score>0.6 RETURN DISTINCT raw_query,re_query,score ORDER BY score DESC LIMIT 10',
634 |  'READ',
635 |  [['raw_query','STRING'],['re_query','STRING'],['score','NUMBER']],
636 |  [['qa','STRING']],
637 |  '推荐问题列表：自动推荐'
638 | );
639 | ```
640 | ```cypher
641 | CALL custom.qabot.recommend_list('2023年三月六日上市的股票代码？') YIELD raw_query,re_query,score RETURN raw_query,re_query,score
642 | ```
643 | 
644 | ## 八、配置样例问答
645 | ```cypher
646 | WITH '[{"qa":"火力发电行业博士学历的男性高管有多少位？","label":"学历"},{"qa":"山西都有哪些上市公司？","label":"地域"},{"qa":"富奥股份的高管都是什么学历？","label":"学历"},{"qa":"中国宝安属于什么行业？","label":"股票"},{"qa":"建筑工程行业有多少家上市公司？","label":"行业"},{"qa":"刘卫国是哪个公司的高管？","label":"高管"},{"qa":"美丽生态上市时间是什么时候？","label":"时间"},{"qa":"山西的上市公司有多少家？","label":"地域"},{"qa":"博士学历的高管都有哪些？","label":"学历"},{"qa":"上市公司是博士学历的高管有多少个？","label":"学历"},{"qa":"刘卫国是什么学历？","label":"高管"},{"qa":"富奥股份的男性高管有多少个？","label":"高管"},{"qa":"同在火力发电行业的上市公司有哪些？","label":"行业"},{"qa":"同在火力发电行业的上市公司有多少家？","label":"行业"},{"qa":"大悦城和荣盛发展是同一个行业嘛？","label":"股票"},{"qa":"同在河北的上市公司有哪些？","label":"股票"},{"qa":"神州高铁是什么时候上市的？","label":"时间"},{"qa":"火力发电行业男性高管有多少个？","label":"高管"},{"qa":"2023年三月六日上市的股票代码？","label":"股票"},{"qa":"2023年三月六日上市的股票有哪些？","label":"股票"},{"qa":"2023年三月六日上市的股票有多少个？","label":"股票"},{"qa":"胡永乐是什么性别？","label":"性别"}]' AS list
647 | WITH apoc.convert.fromJsonList(list) AS list
648 | UNWIND list AS map
649 | WITH map
650 | MERGE (n:DEMO_QA {qa:map.qa,label:map.label});
651 | ```
652 | 
653 | ## 九、运行Graph QABot页面
654 | ![QABot](images/QABot.png)
655 | 
656 | 


--------------------------------------------------------------------------------
/cypher/build.cypher:
--------------------------------------------------------------------------------
  1 | //设计索引和约束
  2 | CREATE CONSTRAINT ON (n:股票代码) ASSERT (n.value) IS NODE KEY;
  3 | CREATE CONSTRAINT ON (n:股票) ASSERT (n.value) IS NODE KEY;
  4 | CREATE CONSTRAINT ON (n:股票名称) ASSERT (n.value) IS NODE KEY;
  5 | CREATE CONSTRAINT ON (n:地域) ASSERT (n.value) IS NODE KEY;
  6 | CREATE CONSTRAINT ON (n:行业) ASSERT (n.value) IS NODE KEY;
  7 | CREATE CONSTRAINT ON (n:上市日期) ASSERT (n.value) IS NODE KEY;
  8 | CREATE CONSTRAINT ON (n:高管) ASSERT (n.md5) IS NODE KEY;
  9 | CREATE INDEX ON :高管(value);
 10 | CREATE CONSTRAINT ON (n:性别) ASSERT (n.value) IS NODE KEY;
 11 | CREATE CONSTRAINT ON (n:学历) ASSERT (n.value) IS NODE KEY;
 12 | CREATE CONSTRAINT ON (n:性别_别名) ASSERT (n.value) IS NODE KEY;
 13 | 
 14 | //构建数据
 15 | //将生成的CSV数据，放置在<ONGDB_HOME>/import目录下，CSV转为UTF-8-BOM格式（可以使用Notepad++转格式）
 16 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
 17 | WITH row.ts_code AS fval,row.symbol AS tval
 18 | WHERE tval IS NOT NULL AND tval<>''
 19 | MERGE (f:股票 {value:fval})
 20 | MERGE (t:股票代码 {value:tval})
 21 | MERGE (f)-[:股票代码]->(t);
 22 | 
 23 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
 24 | WITH row.ts_code AS fval,row.name AS tval
 25 | WHERE tval IS NOT NULL AND tval<>''
 26 | MERGE (f:股票 {value:fval})
 27 | MERGE (t:股票名称 {value:tval})
 28 | MERGE (f)-[:股票名称]->(t);
 29 | 
 30 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
 31 | WITH row.ts_code AS fval,row.area AS tval
 32 | WHERE tval IS NOT NULL AND tval<>''
 33 | MERGE (f:股票 {value:fval})
 34 | MERGE (t:地域 {value:tval})
 35 | MERGE (f)-[:地域]->(t);
 36 | 
 37 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
 38 | WITH row.ts_code AS fval,row.industry AS tval
 39 | WHERE tval IS NOT NULL AND tval<>''
 40 | MERGE (f:股票 {value:fval})
 41 | MERGE (t:行业 {value:tval})
 42 | MERGE (f)-[:所属行业]->(t);
 43 | 
 44 | LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
 45 | WITH row.ts_code AS fval,row.list_date AS tval
 46 | WHERE tval IS NOT NULL AND tval<>''
 47 | MERGE (f:股票 {value:fval})
 48 | MERGE (t:上市日期 {value:TOINTEGER(tval)})
 49 | MERGE (f)-[:上市日期]->(t);
 50 | 
 51 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
 52 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.ts_code AS tval
 53 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
 54 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
 55 | MERGE (t:股票 {value:tval})
 56 | MERGE (f)-[:任职于]->(t);
 57 | 
 58 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
 59 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.gender AS tval
 60 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
 61 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
 62 | MERGE (t:性别 {value:tval})
 63 | MERGE (f)-[:性别]->(t);
 64 | 
 65 | LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
 66 | WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.edu AS tval
 67 | WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
 68 | MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
 69 | MERGE (t:学历 {value:tval})
 70 | MERGE (f)-[:学历]->(t);
 71 | 
 72 | WITH ['女性','女'] AS list
 73 | UNWIND list AS wd
 74 | WITH wd
 75 | MATCH (n:性别) WHERE n.value='F' WITH n,wd
 76 | MERGE (t:性别_别名 {value:wd})
 77 | MERGE (n)-[:别名]->(t);
 78 | 
 79 | WITH ['男性','男'] AS list
 80 | UNWIND list AS wd
 81 | WITH wd
 82 | MATCH (n:性别) WHERE n.value='M' WITH n,wd
 83 | MERGE (t:性别_别名 {value:wd})
 84 | MERGE (n)-[:别名]->(t);
 85 | 
 86 | //配置图数据模型
 87 | CALL apoc.custom.asFunction(
 88 | 'inference.search.qabot',
 89 | 'RETURN \'{"graph":{"nodes":[{"properties_filter":[],"id":"1","labels":["股票代码"]},{"properties_filter":[],"id":"2","labels":["股票"]},{"properties_filter":[],"id":"3","labels":["股票名称"]},{"properties_filter":[],"id":"4","labels":["地域"]},{"properties_filter":[],"id":"5","labels":["行业"]},{"properties_filter":[],"id":"6","labels":["上市日期"]},{"properties_filter":[],"id":"7","labels":["高管"]},{"properties_filter":[],"id":"8","labels":["性别"]},{"properties_filter":[],"id":"9","labels":["学历"]},{"properties_filter":[],"id":"10","labels":["性别_别名"]}],"relationships":[{"startNode":"2","properties_filter":[],"id":"1","type":"股票代码","endNode":"1"},{"startNode":"2","properties_filter":[],"id":"2","type":"股票名称","endNode":"3"},{"startNode":"2","properties_filter":[],"id":"3","type":"地域","endNode":"4"},{"startNode":"2","properties_filter":[],"id":"4","type":"所属行业","endNode":"5"},{"startNode":"2","properties_filter":[],"id":"5","type":"上市日期","endNode":"6"},{"startNode":"7","properties_filter":[],"id":"6","type":"任职于","endNode":"2"},{"startNode":"7","properties_filter":[],"id":"7","type":"性别","endNode":"8"},{"startNode":"7","properties_filter":[],"id":"8","type":"学历","endNode":"9"},{"startNode":"8","properties_filter":[],"id":"9","type":"别名","endNode":"10"}]}}\' AS graphDataSchema',
 90 | 'STRING',
 91 | NULL,
 92 | false,
 93 | '搜索时图数据扩展模式(schema)的动态获取'
 94 | );
 95 | 
 96 | //配置实体权重规则
 97 | CALL apoc.custom.asFunction(
 98 | 'inference.weight.qabot',
 99 | 'RETURN \'{"LABEL":{"股票":12,"高管":11}}\' AS weight',
100 | 'STRING',
101 | NULL,
102 | false,
103 | '本体权重'
104 | );
105 | 
106 | //配置实体搜索规则
107 | CALL apoc.custom.asFunction(
108 | 'inference.match.qabot',
109 | 'RETURN \'{"股票名称":"value","地域":"value","上市日期":"value","性别_别名":"value","高管":"value","学历":"value","行业":"value","股票代码":"value"}\' AS nodeHitsRules',
110 | 'STRING',
111 | NULL,
112 | false,
113 | '实体匹配规则'
114 | );
115 | 
116 | //配置意图匹配规则
117 | CALL apoc.custom.asFunction(
118 | 'inference.intended.qabot',
119 | 'RETURN \'[{"label":"上市日期","return_var_alias":"n1","sort":1,"list":["是什么时候","什么时候"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"学历","return_var_alias":"n2","sort":2,"list":["什么学历"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"高管","return_var_alias":"n3","sort":3,"list":["高管有多少位","高管都有哪些","高管有多少个"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票","return_var_alias":"n4","sort":4,"list":["哪些上市公司","有多少家上市公司","哪个公司","有多少家","公司有哪些","公司有多少家","股票有哪些","股票有多少支","股票有多少个","股票代码？","股票？"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"行业","return_var_alias":"n5","sort":5,"list":["什么行业","同一个行业嘛"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"股票名称","return_var_alias":"n6","sort":6,"list":["股票名称？"],"parse_mode":"CONTAINS","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n7","sort":7,"list":["性别"],"parse_mode":"OTHER","order_by_field":null,"order_by_type":null},{"label":"性别","return_var_alias":"n8","sort":8,"list":["查询性别"],"parse_mode":"EQUALS","order_by_field":null,"order_by_type":null}]\' AS intendedIntent',
120 | 'STRING',
121 | NULL,
122 | false,
123 | '预期意图'
124 | );
125 | 
126 | //配置时间页码处理等规则
127 | CALL apoc.custom.asFunction(
128 | 'inference.parseadd.qabot',
129 | 'WITH $time AS time,$page AS page,$entityRecognitionHit AS entityRecognitionHit WITH entityRecognitionHit,REDUCE(l=\'\',e IN time.list | l+\'({var}.value>=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[0],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\' AND {var}.value<=\'+TOINTEGER(apoc.date.convertFormat(e.detail.time[1],\'yyyy-MM-dd HH:mm:ss\',\'yyyyMMdd\'))+\') OR \') AS timeFilter, REDUCE(l=\'\',e IN page.list | l+\'{var}.value\'+e[0]+e[1]+\' AND \') AS pageFilter CALL apoc.case([size(timeFilter)>4,\'RETURN SUBSTRING($timeFilter,0,size($timeFilter)-4) AS timeFilter\'],\'RETURN "" AS timeFilter\',{timeFilter:timeFilter}) YIELD value WITH entityRecognitionHit,value.timeFilter AS timeFilter,pageFilter CALL apoc.case([size(pageFilter)>5,\'RETURN SUBSTRING($pageFilter,0,size($pageFilter)-5) AS pageFilter\'],\'RETURN "" AS pageFilter\',{pageFilter:pageFilter}) YIELD value WITH entityRecognitionHit,timeFilter,value.pageFilter AS pageFilter WITH entityRecognitionHit,timeFilter,pageFilter CALL apoc.case([timeFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'上市日期\\\',[{category:\\\'node\\\',labels:[\\\'上市日期\\\'],properties_filter:[{value:$timeFilter}]}]]]) AS time\'],\'RETURN {} AS time\',{timeFilter:timeFilter}) YIELD value WITH value.time AS time,pageFilter,entityRecognitionHit CALL apoc.case([pageFilter<>"",\'RETURN apoc.map.setPairs({},[[\\\'文章页数\\\',[{category:\\\'node\\\',labels:[\\\'文章页数\\\'],properties_filter:[{value:$pageFilter}]}]]]) AS page\'],\'RETURN {} AS page\',{pageFilter:pageFilter}) YIELD value WITH value.page AS page,time,entityRecognitionHit RETURN apoc.map.setKey({},\'entities\',apoc.map.merge(apoc.map.merge(entityRecognitionHit.entities,time),page)) AS entityRecognitionHit',
130 | 'MAP',
131 | [['entityRecognitionHit','MAP'],['time','MAP'],['page','MAP']],
132 | false,
133 | '问答解析的内容增加到entityRecognitionHit'
134 | );
135 | 
136 | //配置算子规则
137 | CALL apoc.custom.asFunction(
138 | 'inference.operators.qabot',
139 | 'RETURN \'[{"keywords":["最多","最大"],"operator":{"sort": 1,"agg_operator_field": "value","agg_operator": "MAX","agg_operator_type": "NODE"}},{"keywords":["最小","最少"],"operator":{"sort": 2,"agg_operator_field": "value","agg_operator": "MIN","agg_operator_type": "NODE"}},{"keywords":["平均"],"operator":{"sort": 3,"agg_operator_field": "value","agg_operator": "AVG","agg_operator_type": "NODE"}},{"keywords":["有多少","有多少只","有多少支","多少只","多少支","多少","一共"],"operator":{"sort": 5,"agg_operator_field": "value","agg_operator": "COUNT","agg_operator_type": "NODE"}}]\' AS weight',
140 | 'STRING',
141 | NULL,
142 | false,
143 | '配置算子规则'
144 | );
145 | 
146 | //算子解析模块配置
147 | CALL apoc.custom.asFunction(
148 | 'inference.operators.parse',
149 | 'WITH $query AS query,custom.inference.operators.qabot() AS list WITH query,apoc.convert.fromJsonList(list) AS list UNWIND list AS map WITH map,REDUCE(l=[],em IN apoc.coll.sortMaps(REDUCE(l=[],e IN map.keywords | l+{wd:e,len:LENGTH(e)}),\'len\')| l+em.wd) AS keywords,query UNWIND keywords AS keyword WITH map,keyword,query CALL apoc.case([query CONTAINS keyword,\'RETURN $keyword AS hit\'],\'RETURN \\\' \\\' AS hit\',{keyword:keyword}) YIELD value WITH map,keyword,query,REPLACE(query,value.hit,\' \') AS trim_query,value.hit AS hit WITH query,trim_query,map.operator AS operator ORDER BY map.operator.sort ASC WITH COLLECT({query:query,trim_query:trim_query,operator:operator}) AS list WITH FILTER(e IN list WHERE e.query<>e.trim_query)[0] AS map,list WITH map,list[0] AS smap CALL apoc.case([map IS NOT NULL,\'RETURN $map AS map\'],\'RETURN $smap AS map\',{map:map,smap:smap}) YIELD value WITH value.map AS map CALL apoc.case([map.trim_query=map.query,\'RETURN {} AS operator\'],\'RETURN $operator AS operator\',{trim_query:map.trim_query,operator:map.operator}) YIELD value RETURN map.query AS query,map.trim_query AS trim_query,value.operator AS operator',
150 | 'MAP',
151 | [['query','STRING']],
152 | false,
153 | '算子解析'
154 | );
155 | 
156 | 
157 | //dic,dynamic,dynamic.dic
158 | //意图配置相关词
159 | WITH custom.inference.intended.qabot() AS str
160 | WITH apoc.convert.fromJsonList(str) as list
161 | UNWIND list AS map
162 | WITH map.label AS label,map.list as list,map
163 | WHERE UPPER(map.parse_mode)<>'CONTAINS' AND UPPER(map.parse_mode)<>'EQUALS'
164 | WITH apoc.coll.union([label],list) as list
165 | UNWIND list AS wd
166 | WITH collect(DISTINCT wd) AS list
167 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words;
168 | 
169 | //图数据模型相关词
170 | WITH custom.inference.search.qabot() AS str
171 | WITH apoc.convert.fromJsonMap(str).graph AS graph
172 | WITH graph.relationships AS relationships,graph.nodes AS nodes
173 | WITH REDUCE(l=[],e IN relationships | l+e.type) AS relationships,REDUCE(l=[],e IN nodes | l+e.labels[0]) AS nodes
174 | WITH apoc.coll.union(relationships,nodes) AS list
175 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words;
176 | 
177 | //实体匹配规则相关词
178 | WITH custom.inference.match.qabot() AS str
179 | WITH olab.map.keys(apoc.convert.fromJsonMap(str)) AS list
180 | UNWIND list AS lb
181 | WITH lb
182 | WHERE lb<>'性别' AND lb<>'上市日期'
183 | CALL apoc.cypher.run('MATCH (n:'+lb+') WHERE NOT n.value CONTAINS \' \' RETURN COLLECT(DISTINCT n.value) AS list',{}) YIELD value
184 | WITH value.list AS list
185 | RETURN olab.nlp.userdic.add('dynamic.dic',list,true,'UTF-8') AS words;
186 | 
187 | RETURN olab.nlp.userdic.add('dynamic.dic',['测试','胡永乐','性别'],true,'UTF-8') AS words;
188 | 
189 | RETURN olab.nlp.userdic.refresh();
190 | 
191 | //安装问答模块存储过程
192 | //问答结果
193 | CALL apoc.custom.asProcedure(
194 | 'qabot',
195 | 'WITH LOWER($ask) AS query WITH query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent,  custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH oper,operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,  apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent,  olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH olab.map.keys(value) AS keys,value UNWIND keys AS key WITH apoc.map.get(value,key) AS n CALL apoc.case([apoc.coll.contains([\'NODE\'],apoc.meta.cypher.type(n)),\'WITH $n AS n,LABELS($n) AS lbs WITH lbs[0] AS label,n.value AS value RETURN label+$sml+UPPER(TOSTRING(value)) AS result\'],\'WITH $n AS n RETURN TOSTRING(n) AS result\',{n:n,sml:\'：\'}) YIELD value RETURN value.result AS result;',
196 | 'READ',
197 | [['result','STRING']],
198 | [['ask','STRING']],
199 | '问答机器人'
200 | );
201 | 
202 | 
203 | //问答结果Cypher
204 | CALL apoc.custom.asProcedure(
205 | 'qabot.cypher',
206 | 'WITH LOWER($ask) AS query WITH query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent,  custom.inference.operators.parse(query) AS oper WITH oper,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH oper,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,operator,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH operator,graphDataSchema,intendedIntent,words,entityRecognitionHit,  apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper.query,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH operator,graphDataSchema,intentSchema,intendedIntent,  olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),intentSchema,-1,10,{},operator) AS cypher WITH REPLACE(cypher,\'RETURN n\',\'RETURN DISTINCT n\') AS cypher RETURN cypher;',
207 | 'READ',
208 | [['cypher','STRING']],
209 | [['ask','STRING']],
210 | '问答机器人：生成查询语句'
211 | );
212 | 
213 | //问答推理图谱
214 | CALL apoc.custom.asProcedure(
215 | 'qabot.graph',
216 | 'WITH LOWER($ask) AS query WITH query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent WITH query AS oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(query) AS time,olab.nlp.pagenum.parse(query) AS page WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH oper,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH oper,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.5}) AS entityRecognitionHits WITH oper,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH oper,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit LIMIT 1 WITH oper,graphDataSchema,intendedIntent,words,custom.inference.parseadd.qabot(entityRecognitionHit,time,page).entityRecognitionHit AS entityRecognitionHit WITH graphDataSchema,intendedIntent,words,entityRecognitionHit,  apoc.convert.toJson(olab.intent.schema.parse(graphDataSchema,oper,words,intendedIntent)) AS intentSchema WHERE SIZE(apoc.convert.fromJsonList(intendedIntent))>SIZE(apoc.convert.fromJsonMap(intentSchema).graph.nodes) WITH graphDataSchema,intentSchema,intendedIntent,  olab.semantic.schema(graphDataSchema,intentSchema,apoc.convert.toJson(entityRecognitionHit)) AS semantic_schema WITH olab.semantic.cypher(apoc.convert.toJson(semantic_schema),\'\',-1,10,{}) AS cypher CALL apoc.cypher.run(cypher,{}) YIELD value WITH value SKIP 0 LIMIT 10 WITH value.graph AS graph UNWIND graph AS path RETURN path;',
217 | 'READ',
218 | [['path','PATH']],
219 | [['ask','STRING']],
220 | '问答机器人：问答推理图谱'
221 | );
222 | 
223 | //生成推荐问题列表
224 | CALL apoc.custom.asProcedure(
225 |  'qabot.recommend_list',
226 |  'WITH LOWER($qa) AS query WITH query,query AS raw_query,  custom.inference.search.qabot() AS graphDataSchema,  custom.inference.weight.qabot() AS weight,  custom.inference.match.qabot() AS nodeHitsRules,  custom.inference.intended.qabot() AS intendedIntent,  custom.inference.operators.parse(query) AS oper WITH raw_query,oper.query AS query,oper.operator AS operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,  olab.nlp.timeparser(oper.query) AS time,olab.nlp.pagenum.parse(oper.query) AS page WITH raw_query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.replace(query,REDUCE(l=[],mp IN time.list | l+{raw:mp.text,rep:\' \'})) AS query WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  olab.hanlp.standard.segment(query) AS words WITH raw_query,operator,query,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,  EXTRACT(m IN FILTER(mp IN words WHERE (mp.nature STARTS WITH \'n\' AND olab.string.matchCnEn(mp.word)<>\'\') OR mp.nature=\'uw\')| m.word) AS words WITH raw_query,query,operator,graphDataSchema,weight,nodeHitsRules,intendedIntent,time,page,words,  olab.entity.recognition(graphDataSchema,nodeHitsRules,NULL,\'EXACT\',words,{isMergeLabelHit:true,labelMergeDis:0.4}) AS entityRecognitionHits WITH raw_query,query,operator,graphDataSchema,weight,intendedIntent,time,page,words,entityRecognitionHits CALL olab.entity.ptmd.queue(graphDataSchema,entityRecognitionHits,weight) YIELD value WITH raw_query,query,operator,graphDataSchema,intendedIntent,time,page,words,value AS entityRecognitionHit WITH raw_query,entityRecognitionHit.entities AS map WITH raw_query,olab.map.keys(map) AS keys,map WITH raw_query,REDUCE(l=[],key IN keys | l+{raw:key,rep:FILTER(e IN apoc.map.get(map,key,NULL,FALSE) WHERE SIZE(key)>2)[0].labels[0]}) AS reps WITH raw_query,FILTER(e IN reps WHERE e.rep IS NOT NULL) AS reps WITH raw_query,olab.replace(raw_query,reps) AS re_query WITH raw_query,re_query,olab.editDistance(raw_query,re_query) AS score WHERE score<1 AND score>0.6 RETURN DISTINCT raw_query,re_query,score ORDER BY score DESC LIMIT 10',
227 |  'READ',
228 |  [['raw_query','STRING'],['re_query','STRING'],['score','NUMBER']],
229 |  [['qa','STRING']],
230 |  '推荐问题列表：自动推荐'
231 | );
232 | 
233 | //配置样例问答
234 | WITH '[{"qa":"火力发电行业博士学历的男性高管有多少位？","label":"学历"},{"qa":"山西都有哪些上市公司？","label":"地域"},{"qa":"富奥股份的高管都是什么学历？","label":"学历"},{"qa":"中国宝安属于什么行业？","label":"股票"},{"qa":"建筑工程行业有多少家上市公司？","label":"行业"},{"qa":"刘卫国是哪个公司的高管？","label":"高管"},{"qa":"美丽生态上市时间是什么时候？","label":"时间"},{"qa":"山西的上市公司有多少家？","label":"地域"},{"qa":"博士学历的高管都有哪些？","label":"学历"},{"qa":"上市公司是博士学历的高管有多少个？","label":"学历"},{"qa":"刘卫国是什么学历？","label":"高管"},{"qa":"富奥股份的男性高管有多少个？","label":"高管"},{"qa":"同在火力发电行业的上市公司有哪些？","label":"行业"},{"qa":"同在火力发电行业的上市公司有多少家？","label":"行业"},{"qa":"大悦城和荣盛发展是同一个行业嘛？","label":"股票"},{"qa":"同在河北的上市公司有哪些？","label":"股票"},{"qa":"神州高铁是什么时候上市的？","label":"时间"},{"qa":"火力发电行业男性高管有多少个？","label":"高管"},{"qa":"2023年三月六日上市的股票代码？","label":"股票"},{"qa":"2023年三月六日上市的股票有哪些？","label":"股票"},{"qa":"2023年三月六日上市的股票有多少个？","label":"股票"},{"qa":"胡永乐是什么性别？","label":"性别"}]' AS list
235 | WITH apoc.convert.fromJsonList(list) AS list
236 | UNWIND list AS map
237 | WITH map
238 | MERGE (n:DEMO_QA {qa:map.qa,label:map.label});
239 | 
240 | 


--------------------------------------------------------------------------------
/db/ongdb-enterprise-1.0.4-qabot-demo.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ongdb-contrib/graph-qabot-demo/46eae129c5f4242d559eddf2b4ff5d078d6c83aa/db/ongdb-enterprise-1.0.4-qabot-demo.7z


--------------------------------------------------------------------------------
/images/QABot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ongdb-contrib/graph-qabot-demo/46eae129c5f4242d559eddf2b4ff5d078d6c83aa/images/QABot.png


--------------------------------------------------------------------------------
/images/txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/ppt/tt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ppt/问答机器人框架.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ongdb-contrib/graph-qabot-demo/46eae129c5f4242d559eddf2b4ff5d078d6c83aa/ppt/问答机器人框架.pptx


--------------------------------------------------------------------------------
/python/fetch_managers.py:
--------------------------------------------------------------------------------
 1 | import tushare as ts
 2 | import pandas as pd
 3 | import system_constant as sys_cnt
 4 | import os
 5 | import time
 6 | 
 7 | # 初始化pro接口
 8 | pro = ts.pro_api(sys_cnt.tushare_token())
 9 | 
10 | 
11 | # 获取股票相关信息
12 | def run():
13 |     # 查询当前所有正常上市交易的股票列表
14 |     data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
15 |     tss = data['ts_code']
16 |     result = []
17 |     ct = 0
18 |     bk_ct = 100000
19 |     for code in tss:
20 |         time.sleep(0.5)
21 |         # 获取单个公司高管全部数据
22 |         df = pro.stk_managers(ts_code=code)
23 |         result.append(df)
24 |         ct += 1
25 |         if ct > bk_ct:
26 |             break
27 |     df_merge = pd.concat(result)
28 |     print(df_merge)
29 |     df_merge.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\managers.csv',
30 |                     encoding='GBK',
31 |                     columns=['ts_code', 'ann_date', 'name', 'gender', 'lev', 'title', 'edu', 'national', 'birthday',
32 |                              'begin_date', 'end_date'],
33 |                     index=False)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     run()
38 | 


--------------------------------------------------------------------------------
/python/fetch_stocks.py:
--------------------------------------------------------------------------------
 1 | import tushare as ts
 2 | import system_constant as sys_cnt
 3 | import os
 4 | 
 5 | # 初始化pro接口
 6 | pro = ts.pro_api(sys_cnt.tushare_token())
 7 | 
 8 | 
 9 | # 获取股票相关信息
10 | def run():
11 |     # 查询当前所有正常上市交易的股票列表
12 |     data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
13 |     data.to_csv(path_or_buf=os.getcwd().replace('python', 'csv') + '\\stocks.csv',
14 |               encoding='GBK',
15 |               columns=['ts_code', 'symbol', 'name', 'area', 'industry', 'list_date'],
16 |               index=False)
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     run()
21 | 


--------------------------------------------------------------------------------
/python/system_constant.py:
--------------------------------------------------------------------------------
1 | # 在https://tushare.pro/网站注册账号并获取token
2 | def tushare_token():
3 |     return 'YOUR TOKEN'
4 | 


--------------------------------------------------------------------------------