├── .gitignore ├── LICENSE ├── README.md └── query_analysis ├── Dockerfile ├── README.md ├── conf ├── __init__.py └── settings.py ├── dict ├── __init__.py ├── animal │ ├── __init__.py │ └── animal.csv ├── common │ ├── __init__.py │ ├── adjective.csv │ ├── adverb.csv │ ├── auxiliary.csv │ ├── degree.csv │ ├── directional.csv │ ├── honorific.csv │ ├── interj.csv │ ├── modals.csv │ ├── numeral.csv │ ├── prefix_unsual.csv │ ├── prep.csv │ ├── pronoun.csv │ ├── quantifier.csv │ └── stop_words.csv ├── dict.py ├── opera │ ├── __init__.py │ └── opera.csv ├── sight │ ├── __init__.py │ └── sight.csv ├── story │ ├── __init__.py │ └── story.csv └── vehicle │ ├── __init__.py │ └── vehicle.csv ├── handler ├── __init__.py ├── base.py └── interpreter.py ├── init.py ├── lib ├── __init__.py ├── scene.py └── status.py ├── main.py ├── makefile ├── nlu ├── __init__.py ├── animal.py ├── battery.py ├── dance.py ├── display.py ├── entertainment.py ├── expand_instruction.py ├── media │ ├── __init__.py │ ├── cartoon.py │ ├── comic.py │ ├── common.py │ ├── opera.py │ ├── picture_book.py │ └── story.py ├── mode.py ├── motion.py ├── music.py ├── nlu_framework.py ├── phone.py ├── photo.py ├── profile.py ├── recognition.py ├── rule.py ├── sight.py ├── smart │ ├── __init__.py │ ├── smart_home_aircleaner.py │ ├── smart_home_airconditioner.py │ ├── smart_home_common.py │ ├── smart_home_curtain.py │ ├── smart_home_light.py │ └── smart_home_tv.py ├── store_location.py ├── trick.py ├── vehicle.py ├── volume.py └── xiaoyi.py ├── requirements ├── scene ├── __init__.py ├── dance.py ├── phone.py ├── scene_framework.py └── trick.py ├── server ├── __init__.py ├── semantic_service.py └── server.py ├── supervisord.conf ├── supervisord_docker.conf ├── test ├── __init__.py ├── conf │ ├── animal.yaml │ ├── battery.yaml │ ├── dance.yaml │ ├── display.yaml │ ├── entertainment.yaml │ ├── extend_instruction.yaml │ ├── mode.yaml │ ├── motion.yaml │ ├── photo.yaml │ ├── profile.yaml │ ├── recognition.yaml │ ├── sight.yaml │ ├── story.yaml │ ├── vehicle.yaml │ ├── volume.yaml │ └── xiaoyi.yaml └── nlu_test.py ├── tools ├── __init__.py └── dict_maker.py ├── usage.py └── utils ├── __init__.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .settings/org.eclipse.core.resources.prefs 3 | .pydevproject 4 | *.swp 5 | .DS_Store 6 | .project 7 | .editorconfig 8 | *.orig 9 | .idea/ 10 | *./ 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RobotAIEngine 2 | 儿童机器人AI引擎,包括自然语言理解、对话生成、多轮对话、状态跟踪、智能推荐以及第三方服务的集成。 3 | -------------------------------------------------------------------------------- /query_analysis/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM hub.c.163.com/netease_comb/centos:7 2 | MAINTAINER xiaoyi 3 | 4 | # 更新yum源 5 | RUN yum makecache fast && yum -y update glibc 6 | 7 | # 安装常用软件 8 | RUN yum install -y openssh-server vim tar wget curl rsync bzip2 iptables tcpdump less telnet net-tools lsof 9 | RUN yum -y install epel-release 10 | RUN yum -y install python-pip 11 | RUN yum -y install git 12 | RUN yum -y install gcc 13 | #RUN yum -y install gcc-c++ 14 | RUN yum -y install python-devel 15 | RUN yum -y install make automake 16 | RUN yum -y install redis 17 | RUN yum clean all 18 | 19 | RUN mkdir -p /data/www/query_analysis 20 | RUN mkdir -p /data/logs 21 | ADD . /data/www/query_analysis 22 | 23 | RUN pip install --upgrade pip 24 | RUN pip install supervisor 25 | RUN pip install tornado 26 | RUN pip install redis 27 | RUN pip install PyYAML 28 | RUN pip install regex 29 | 30 | WORKDIR /data/www/query_analysis 31 | ENTRYPOINT ["supervisord", "-n", "-c", "supervisord_docker.conf"] 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /query_analysis/README.md: -------------------------------------------------------------------------------- 1 | # QueryAnalysis # 2 | 3 | 当前业界文本语义信息抽取基本是使用正则表达式来完成的,正则表达式本身并不提供管理与测试功能。 4 | 在进行基于正则的文本语义抽取的时候,还需要做大量的基础工作,来便于编写、管理和测试正则表达式。 5 | 本项目提供大量底层与框架支持,便于使用者方便的编写,管理与测试大批量的正则表达式,以支持高效稳定的语义抽取服务。 6 | 7 | ## 本项目由3块组成 ## 8 | * 1.词典(用于组织需要被正则模板使用的底层词库) 9 | * 2.正则语义模板(根据不同业务编写的一组正则表达式) 10 | * 3.框架与lib包(用于加载正则表达式完成对输入文本的语义抽取) 11 | 12 | ## 1.词典模块 ## 13 | 词典模块位于dict目录 14 | 对于需要公共使用的词典放到common目录里,其它各语义需要使用的词典放到各自的目录中 15 | 词典模块中的词典文件使用csv文件格式 16 | 词典内容支持多列形式,支持对词典中每个词添加一个或多个属性(词典中用,分隔) 17 | 对于写成的词典需要在/dict/dict.py中加载上来,供正则模板使用 18 | 方法如下: 19 | 方法一:加载不带属性 20 | > stop_words = WordsDict( 21 | > './dict/common/stop_words.csv') 22 | 23 | 方法二:加载带属性的字典(字典中用逗号分隔的) 24 | > animal_name = WordsDict( 25 | > './dict/animal/animal.csv', 26 | > property_name_list=['arid'], 27 | > group_name='animal') 28 | 29 | property_name_list参数,指定逗号分隔,依次第n个参数返回时的字段名 30 | group_name参数,指定被命中的信息,在字典中返回的key名 31 | 如:我要看老虎,这句老虎在字典中定义如下: 32 | >老虎,1 33 | >被正则命中后取得的返回结果中就会包含如下信息: 34 | >{'arid': '1', 'animal': '老虎'} 35 | 36 | ## 2.正则语义模板 ## 37 | 正则语义模板位于/nlu目录 38 | media目录是指用于需播放的信息,如相声,戏曲,动画,故事等,这类播放结构较为类似,故放在同一目录下 39 | 使用/nlu/media/common提取公共部分,在各模块内部只完成各自不同的信息即可。 40 | smart目录下是指智能硬件的模块。 41 | 42 | 在/nlu目录下的其它模块,每个模块均代表1种语义 43 | 每个正则语义模块下有2个需要添加的内容: 44 | 1.service属性,这个属性指明这个类是代表什么语义的,在输入文本命中本类中包含的正则语义后, 45 | 返回的信息里会包含这个service属性。 46 | 2.nlu.rule.Rule对象,这个对象中包含的正则表达式信息,框架会用来对输入文本进行匹配。 47 | 48 | ## 3.框架与lib包 ## 49 | 当包含有正则表达式的语义类完成之后,需要把这个类注册到框架中去,方法如下: 50 | > import nlu.animal as animal 51 | > from nlu.nlu_framework import Nlu_Framework 52 | > Nlu_Framework.register(animal.Animal) 53 | 这样框架就可以用Animal类里的Rule对象所指定的正则变量来处理输入文本了 54 | 55 | 使用框架匹配正则模块如下: 56 | > match_dict_list = Nlu_Framework.match('我想看老虎') 57 | > print match_dict_list[0].items() 58 | 59 | 输出如下: 60 | > [('operation', 'query'), ('service', 'animal'), 61 | > ('parameters', {'arid': '1', 'rule': '5', 'animal': '\xe8\x80\x81\xe8\x99\x8e'})] 62 | 63 | lib包里提供了一些在完成正则语义编写常用的函数 64 | > attach_name函数:对指定正则附加一个名字属性,当命中这个正则时会返回这个属性 65 | > attach_perperty函数:对指定的正则表达式附加一个属性,当命中此正则时返回这个属性 66 | > e函数:表示正则里的可出现也可不出现(({})?) 67 | > o函数:表示正则里的或条件(({a|b})) 68 | > r函数:表示正则里的重复 69 | 70 | #Docker支持# 71 | 本项目支持在docker容器中运行 72 | 方法1(自制镜象): 73 | * 1.进入本目录 74 | * 2.docker build -t query_analysis . 75 | * 3.docker run --rm --name query_analysis --net=host hub.c.163.com/yufeiok/query_analysis 76 | 77 | 方法2(直接下载镜象): 78 | * 1.docker pull hub.c.163.com/yufeiok/query_analysis:latest 79 | * 2.docker run --rm --name query_analysis --net=host hub.c.163.com/yufeiok/query_analysis 80 | * 3.curl '127.0.0.1:8700/interpreter/info?speech=%E6%88%91%E8%A6%81%E7%9C%8B%E8%80%81%E8%99%8E&robot_code=0'(测试) 81 | 82 | 这就可以直接在本机的8700端口进行测试了,返回结果如下: 83 | curl '127.0.0.1:8700/interpreter/info?speech=%E6%88%91%E8%A6%81%E7%9C%8B%E8%80%81%E8%99%8E&robot_code=0' (发送文本:我要看老虎) 84 | 85 | 返回结果: 86 | {"msg": "OK", "body": {"operation": "query", "service": "animal", "parameters": {"arid": "1", "rule": "5", "animal": "\u8001\u864e"}}, "code": 0} 87 | 88 | #使用方法# 89 | * 1.编写用于提取语义的类,如下所示: 90 | > usage.py 91 | > class Test(object): 92 | > # 标识是test领域(这个service字段必须存在,命中本类中正则时,会输出这个字段) 93 | > service = 'test' 94 | > # 表示抓取2个字长度的信息,输出字段为name 95 | > name = range_tag(2, 'user_name') 96 | > # 正则规则:我的名字是小明 97 | > name_case1 = '我的名字是' + name 98 | > # 生成规则对象(附加的参数会在输出结果中被输出,operation代表具体的操作) 99 | > rule_case1 = Rule(attach_perperty(name_case1, {'operation': 'query', 'rule': 1})) 100 | 101 | * 2.把本类注册到NLU框架中 102 | > from nlu.nlu_framework import Nlu_Framework 103 | > Nlu_Framework.register(Test) 104 | 105 | * 3.使用规则来处理输入文本 106 | > match_dict_list = Nlu_Framework.match('我的名字是小明') 107 | > for k, v in match_dict_list[0].items(): 108 | > print '{} : {}'.format(k, v) 109 | 110 | * 4.输出结果如下: 111 | > operation : query 112 | > service : test 113 | > parameters : {'user_name': '\xe5\xb0\x8f\xe6\x98\x8e', 'rule': '1'} 114 | -------------------------------------------------------------------------------- /query_analysis/conf/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/11/12 10 | """ -------------------------------------------------------------------------------- /query_analysis/conf/settings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:配置信息 8 | 创 建 者:余菲 9 | 创建日期:16/11/12 10 | """ 11 | 12 | # 喜马拉雅token请求 13 | API_XMLY_TOKEN = "http://api.ximalaya.com/oauth2/secure_access_token" 14 | 15 | # 喜马拉雅track请求 16 | API_XMLY_TRACK = "http://api.ximalaya.com/openapi-gateway-app/search/tracks" 17 | 18 | -------------------------------------------------------------------------------- /query_analysis/dict/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/5/21 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/animal/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/5 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/animal/animal.csv: -------------------------------------------------------------------------------- 1 | 老虎,1 狮子,2 猪,3 -------------------------------------------------------------------------------- /query_analysis/dict/common/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/5/21 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/common/adjective.csv: -------------------------------------------------------------------------------- 1 | 哀愁 2 | 哀伤 3 | 哀痛 4 | 矮 5 | 矮小 6 | 爱国 7 | 碍事 8 | 暧昧 9 | 安定 10 | 安分 11 | 安静 12 | 安乐 13 | 安谧 14 | 安宁 15 | 安全 16 | 安稳 17 | 安闲 18 | 安详 19 | 安心 20 | 安逸 21 | 暗 22 | 暗淡 23 | 黯淡 24 | 肮脏 25 | 昂贵 26 | 拗口 27 | 傲慢 28 | 奥妙 29 | 拔尖 30 | 霸道 31 | 白 32 | 白净 33 | 般配 34 | 薄 35 | 薄情 36 | 薄弱 37 | 饱 38 | 饱满 39 | 宝贵 40 | 保暖 41 | 保险 42 | 抱歉 43 | 暴烈 44 | 暴虐 45 | 暴躁 46 | 卑鄙 47 | 卑贱 48 | 卑劣 49 | 卑怯 50 | 卑下 51 | 悲哀 52 | 悲惨 53 | 悲观 54 | 悲凉 55 | 悲伤 56 | 悲痛 57 | 悲壮 58 | 被动 59 | 笨 60 | 笨重 61 | 笨拙 62 | 逼真 63 | 闭塞 64 | 蔽塞 65 | 便利 66 | 便宜 67 | 标致 68 | 憋闷 69 | 憋气 70 | 别扭 71 | 别致 72 | 蹩脚 73 | 不安 74 | 不便 75 | 不错 76 | 不公 77 | 不和 78 | 不赖 79 | 不力 80 | 不满 81 | 不妥 82 | 不幸 83 | 不周 84 | 残暴 85 | 残酷 86 | 残忍 87 | 惭愧 88 | 惨 89 | 惨重 90 | 灿烂 91 | 仓促 92 | 苍白 93 | 苍老 94 | 苍凉 95 | 嘈杂 96 | 草率 97 | 诧异 98 | 差 99 | 差劲 100 | 馋 101 | 缠绵 102 | 缠手 103 | 昌盛 104 | 猖獗 105 | 猖狂 106 | 长 107 | 长寿 108 | 长远 109 | 常见 110 | 常用 111 | 敞亮 112 | 怅惘 113 | 畅快 114 | 畅通 115 | 潮湿 116 | 彻底 117 | 沉 118 | 沉静 119 | 沉闷 120 | 沉默 121 | 沉稳 122 | 沉重 123 | 沉着 124 | 陈旧 125 | 称心 126 | 称职 127 | 成功 128 | 成熟 129 | 诚恳 130 | 诚朴 131 | 诚实 132 | 吃亏 133 | 吃力 134 | 吃香 135 | 迟 136 | 迟钝 137 | 迟缓 138 | 充分 139 | 充沛 140 | 充实 141 | 充裕 142 | 充足 143 | 崇高 144 | 抽象 145 | 惆怅 146 | 稠密 147 | 丑 148 | 丑陋 149 | 臭 150 | 出名 151 | 出色 152 | 出众 153 | 纯 154 | 纯洁 155 | 纯净 156 | 纯朴 157 | 纯真 158 | 纯正 159 | 淳朴 160 | 醇厚 161 | 蠢 162 | 蠢笨 163 | 慈祥 164 | 刺耳 165 | 刺眼 166 | 聪慧 167 | 聪明 168 | 聪颖 169 | 从容 170 | 凑巧 171 | 粗 172 | 粗暴 173 | 粗糙 174 | 粗犷 175 | 粗陋 176 | 粗鲁 177 | 粗浅 178 | 粗率 179 | 粗俗 180 | 粗心 181 | 粗野 182 | 粗壮 183 | 脆弱 184 | 大 185 | 大胆 186 | 大度 187 | 大方 188 | 呆板 189 | 歹毒 190 | 单薄 191 | 单纯 192 | 单调 193 | 胆大 194 | 胆怯 195 | 胆小 196 | 淡 197 | 淡漠 198 | 倒霉 199 | 到家 200 | 得劲 201 | 得力 202 | 得体 203 | 得意 204 | 低 205 | 低沉 206 | 低级 207 | 低廉 208 | 低劣 209 | 低下 210 | 地道 211 | 典型 212 | 典雅 213 | 刁钻 214 | 丢脸 215 | 丢人 216 | 懂事 217 | 懂行 218 | 动人 219 | 动听 220 | 陡 221 | 毒辣 222 | 独到 223 | 独特 224 | 端正 225 | 端庄 226 | 短 227 | 短暂 228 | 对 229 | 对称 230 | 对路 231 | 钝 232 | 多 233 | 多情 234 | 多心 235 | 恶毒 236 | 恶劣 237 | 恶心 238 | 饿 239 | 发达 240 | 乏 241 | 乏味 242 | 烦 243 | 烦乱 244 | 烦恼 245 | 烦琐 246 | 烦躁 247 | 繁华 248 | 繁忙 249 | 繁荣 250 | 繁琐 251 | 繁杂 252 | 繁重 253 | 反常 254 | 反感 255 | 方便 256 | 放荡 257 | 放松 258 | 肥 259 | 肥大 260 | 肥胖 261 | 肥沃 262 | 费工 263 | 费解 264 | 费劲 265 | 费事 266 | 分散 267 | 纷乱 268 | 愤慨 269 | 愤怒 270 | 丰富 271 | 丰满 272 | 丰美 273 | 丰饶 274 | 丰盛 275 | 丰硕 276 | 丰裕 277 | 风流 278 | 疯狂 279 | 锋利 280 | 肤浅 281 | 伏贴 282 | 浮浅 283 | 浮躁 284 | 腐败 285 | 腐朽 286 | 负责 287 | 复杂 288 | 富 289 | 富丽 290 | 富饶 291 | 富有 292 | 富裕 293 | 富足 294 | 干 295 | 干脆 296 | 干净 297 | 干练 298 | 干燥 299 | 甘美 300 | 甘甜 301 | 尴尬 302 | 感动 303 | 感慨 304 | 感人 305 | 刚烈 306 | 刚正 307 | 刚直 308 | 高 309 | 高傲 310 | 高大 311 | 高贵 312 | 高级 313 | 高明 314 | 高尚 315 | 高兴 316 | 高雅 317 | 高涨 318 | 革命 319 | 耿直 320 | 工整 321 | 公道 322 | 公平 323 | 公正 324 | 恭敬 325 | 巩固 326 | 孤单 327 | 孤独 328 | 孤立 329 | 孤僻 330 | 古 331 | 古板 332 | 古怪 333 | 古老 334 | 古朴 335 | 鼓 336 | 固定 337 | 固执 338 | 乖僻 339 | 乖巧 340 | 怪 341 | 怪僻 342 | 关切 343 | 管用 344 | 光 345 | 光彩 346 | 光滑 347 | 光溜 348 | 光荣 349 | 广 350 | 广博 351 | 广泛 352 | 广阔 353 | 诡怪 354 | 贵重 355 | 果断 356 | 过分 357 | 过火 358 | 过硬 359 | 害臊 360 | 害羞 361 | 憨厚 362 | 含糊 363 | 含混 364 | 含蓄 365 | 寒碜 366 | 寒酸 367 | 豪放 368 | 豪华 369 | 豪爽 370 | 好 371 | 好看 372 | 好客 373 | 好奇 374 | 好强 375 | 好受 376 | 好笑 377 | 好学 378 | 浩大 379 | 合法 380 | 合理 381 | 合群 382 | 合适 383 | 合算 384 | 和蔼 385 | 和缓 386 | 和睦 387 | 和气 388 | 和谐 389 | 和煦 390 | 黑 391 | 黑暗 392 | 狠 393 | 红 394 | 红润 395 | 宏伟 396 | 洪亮 397 | 厚 398 | 厚道 399 | 糊涂 400 | 华贵 401 | 华丽 402 | 华美 403 | 滑 404 | 滑稽 405 | 滑润 406 | 划算 407 | 坏 408 | 欢 409 | 欢畅 410 | 欢快 411 | 欢乐 412 | 缓和 413 | 缓慢 414 | 荒凉 415 | 荒谬 416 | 荒唐 417 | 慌 418 | 慌乱 419 | 慌张 420 | 黄 421 | 恍惚 422 | 灰心 423 | 诙谐 424 | 辉煌 425 | 晦涩 426 | 浑 427 | 浑厚 428 | 浑浊 429 | 混沌 430 | 混乱 431 | 豁达 432 | 豁朗 433 | 活 434 | 活泼 435 | 活跃 436 | 火暴 437 | 机警 438 | 机灵 439 | 机智 440 | 积极 441 | 激昂 442 | 激动 443 | 激进 444 | 激烈 445 | 及时 446 | 吉利 447 | 急 448 | 急促 449 | 急躁 450 | 集中 451 | 挤 452 | 寂寞 453 | 假 454 | 尖 455 | 尖刻 456 | 尖锐 457 | 奸猾 458 | 奸诈 459 | 坚定 460 | 坚固 461 | 坚决 462 | 坚强 463 | 坚硬 464 | 艰巨 465 | 艰苦 466 | 艰难 467 | 艰深 468 | 艰险 469 | 俭朴 470 | 检点 471 | 简单 472 | 简短 473 | 简洁 474 | 简练 475 | 简陋 476 | 简略 477 | 简明 478 | 简易 479 | 见效 480 | 贱 481 | 健康 482 | 健全 483 | 健谈 484 | 健忘 485 | 健壮 486 | 僵 487 | 僵硬 488 | 娇 489 | 娇媚 490 | 娇嫩 491 | 娇娆 492 | 娇小 493 | 娇羞 494 | 娇艳 495 | 骄傲 496 | 骄横 497 | 焦急 498 | 狡猾 499 | 狡黠 500 | 矫健 501 | 杰出 502 | 洁净 503 | 结实 504 | 解馋 505 | 解恨 506 | 矜持 507 | 紧 508 | 紧凑 509 | 紧迫 510 | 紧俏 511 | 紧张 512 | 谨慎 513 | 尽心 514 | 近 515 | 惊诧 516 | 惊慌 517 | 惊奇 518 | 惊人 519 | 惊喜 520 | 惊讶 521 | 精 522 | 精辟 523 | 精彩 524 | 精干 525 | 精悍 526 | 精练 527 | 精美 528 | 精密 529 | 精明 530 | 精巧 531 | 精确 532 | 精心 533 | 精湛 534 | 精致 535 | 景气 536 | 净 537 | 静 538 | 窘促 539 | 窘迫 540 | 久 541 | 旧 542 | 拘谨 543 | 拘束 544 | 沮丧 545 | 具体 546 | 剧烈 547 | 娟秀 548 | 绝对 549 | 绝望 550 | 倔强 551 | 均匀 552 | 俊美 553 | 俊俏 554 | 峻峭 555 | 开化 556 | 开阔 557 | 开朗 558 | 开明 559 | 开通 560 | 开胃 561 | 开心 562 | 慷慨 563 | 苛刻 564 | 科学 565 | 可爱 566 | 可悲 567 | 可耻 568 | 可恶 569 | 可观 570 | 可贵 571 | 可敬 572 | 可靠 573 | 可口 574 | 可怜 575 | 可怕 576 | 可体 577 | 可惜 578 | 可笑 579 | 可信 580 | 可行 581 | 可疑 582 | 渴 583 | 刻板 584 | 刻薄 585 | 刻苦 586 | 客观 587 | 客气 588 | 肯定 589 | 恳切 590 | 空 591 | 空泛 592 | 空旷 593 | 空虚 594 | 恐怖 595 | 恐慌 596 | 恐惧 597 | 枯涩 598 | 枯燥 599 | 苦 600 | 苦闷 601 | 苦恼 602 | 苦涩 603 | 快 604 | 快活 605 | 快乐 606 | 宽 607 | 宽敞 608 | 宽绰 609 | 宽大 610 | 宽广 611 | 宽厚 612 | 宽阔 613 | 宽松 614 | 宽心 615 | 宽裕 616 | 狂 617 | 狂热 618 | 狂妄 619 | 魁梧 620 | 愧疚 621 | 困 622 | 困惑 623 | 困倦 624 | 困难 625 | 阔 626 | 阔绰 627 | 阔气 628 | 邋遢 629 | 辣 630 | 辣手 631 | 蓝 632 | 懒 633 | 懒惰 634 | 懒散 635 | 烂 636 | 滥 637 | 狼狈 638 | 浪漫 639 | 劳累 640 | 牢 641 | 牢固 642 | 牢靠 643 | 老 644 | 老成 645 | 老练 646 | 老实 647 | 潦草 648 | 乐观 649 | 累 650 | 冷 651 | 冷淡 652 | 冷静 653 | 冷酷 654 | 冷漠 655 | 冷僻 656 | 冷清 657 | 理想 658 | 厉害 659 | 利索 660 | 连贯 661 | 廉洁 662 | 凉 663 | 凉快 664 | 凉爽 665 | 亮 666 | 亮堂 667 | 辽阔 668 | 烈 669 | 凛冽 670 | 吝啬 671 | 伶俐 672 | 灵便 673 | 灵活 674 | 灵敏 675 | 灵巧 676 | 灵验 677 | 凌乱 678 | 零乱 679 | 零散 680 | 零碎 681 | 流畅 682 | 流利 683 | 流行 684 | 笼统 685 | 隆重 686 | 鲁莽 687 | 露骨 688 | 绿 689 | 乱 690 | 落后 691 | 麻利 692 | 麻木 693 | 马虎 694 | 蛮横 695 | 满 696 | 漫长 697 | 慢 698 | 忙 699 | 忙碌 700 | 盲目 701 | 莽撞 702 | 毛糙 703 | 矛盾 704 | 茂密 705 | 冒失 706 | 没趣 707 | 美 708 | 美好 709 | 美丽 710 | 美满 711 | 美妙 712 | 闷 713 | 朦胧 714 | 猛 715 | 猛烈 716 | 迷人 717 | 迷惘 718 | 秘密 719 | 密集 720 | 密切 721 | 勉强 722 | 腼腆 723 | 苗条 724 | 渺茫 725 | 渺小 726 | 妙 727 | 民主 728 | 敏感 729 | 敏捷 730 | 敏锐 731 | 名贵 732 | 明白 733 | 明达 734 | 明快 735 | 明朗 736 | 明亮 737 | 明媚 738 | 明确 739 | 明晰 740 | 明显 741 | 明智 742 | 模糊 743 | 摩登 744 | 陌生 745 | 默契 746 | 耐寒 747 | 耐磨 748 | 耐热 749 | 耐心 750 | 耐用 751 | 难 752 | 难得 753 | 难过 754 | 难堪 755 | 难受 756 | 难听 757 | 难忘 758 | 难为情 759 | 难闻 760 | 内疚 761 | 内向 762 | 内行 763 | 嫩 764 | 能干 765 | 年轻 766 | 宁静 767 | 浓 768 | 浓厚 769 | 浓密 770 | 浓郁 771 | 努力 772 | 暖和 773 | 懦弱 774 | 偶然 775 | 庞大 776 | 庞杂 777 | 蓬松 778 | 皮实 779 | 疲惫 780 | 疲乏 781 | 疲倦 782 | 疲劳 783 | 疲软 784 | 僻静 785 | 偏 786 | 偏僻 787 | 偏心 788 | 片面 789 | 漂亮 790 | 贫乏 791 | 贫瘠 792 | 贫困 793 | 贫穷 794 | 频繁 795 | 平 796 | 平安 797 | 平常 798 | 平淡 799 | 平等 800 | 平凡 801 | 平和 802 | 平衡 803 | 平滑 804 | 平缓 805 | 平缓 806 | 平静 807 | 平均 808 | 平坦 809 | 平稳 810 | 平庸 811 | 平整 812 | 泼辣 813 | 朴实 814 | 朴素 815 | 迫切 816 | 破 817 | 破旧 818 | 破烂 819 | 破碎 820 | 铺张 821 | 普遍 822 | 普及 823 | 普通 824 | 凄惨 825 | 凄怆 826 | 凄凉 827 | 齐备 828 | 齐全 829 | 奇怪 830 | 奇妙 831 | 奇特 832 | 奇异 833 | 崎岖 834 | 气愤 835 | 恰当 836 | 牵强 837 | 谦卑 838 | 谦恭 839 | 谦和 840 | 谦虚 841 | 谦逊 842 | 虔诚 843 | 浅 844 | 浅薄 845 | 浅陋 846 | 浅显 847 | 浅易 848 | 纤巧 849 | 纤弱 850 | 纤细 851 | 强 852 | 强大 853 | 强悍 854 | 强横 855 | 强劲 856 | 强烈 857 | 强硬 858 | 强壮 859 | 抢手 860 | 憔悴 861 | 巧 862 | 巧妙 863 | 俏皮 864 | 切题 865 | 怯懦 866 | 怯弱 867 | 惬意 868 | 亲 869 | 亲密 870 | 亲昵 871 | 亲切 872 | 亲热 873 | 勤 874 | 勤奋 875 | 勤俭 876 | 勤恳 877 | 勤快 878 | 勤劳 879 | 青 880 | 轻 881 | 轻薄 882 | 轻浮 883 | 轻快 884 | 轻狂 885 | 轻巧 886 | 轻柔 887 | 轻率 888 | 轻松 889 | 轻微 890 | 轻闲 891 | 轻盈 892 | 清 893 | 清白 894 | 清楚 895 | 清脆 896 | 清淡 897 | 清高 898 | 清洁 899 | 清静 900 | 清苦 901 | 清廉 902 | 清凉 903 | 清贫 904 | 清爽 905 | 清晰 906 | 清闲 907 | 清新 908 | 清醒 909 | 清秀 910 | 清雅 911 | 清正 912 | 晴和 913 | 晴朗 914 | 穷 915 | 穷苦 916 | 穷困 917 | 曲折 918 | 屈才 919 | 全 920 | 全面 921 | 缺德 922 | 确切 923 | 确凿 924 | 绕嘴 925 | 热 926 | 热诚 927 | 热乎 928 | 热火 929 | 热烈 930 | 热门 931 | 热闹 932 | 热情 933 | 热心 934 | 仁慈 935 | 认真 936 | 任性 937 | 荣幸 938 | 容易 939 | 融洽 940 | 柔和 941 | 柔嫩 942 | 柔韧 943 | 柔软 944 | 柔弱 945 | 柔顺 946 | 肉麻 947 | 如意 948 | 入神 949 | 入神 950 | 入时 951 | 软 952 | 软和 953 | 软弱 954 | 锐利 955 | 弱 956 | 洒脱 957 | 散 958 | 散漫 959 | 散漫 960 | 扫兴 961 | 沙哑 962 | 傻 963 | 善良 964 | 伤感 965 | 伤心 966 | 上进 967 | 上相 968 | 少 969 | 少见 970 | 奢侈 971 | 奢华 972 | 深 973 | 深奥 974 | 深沉 975 | 深厚 976 | 深刻 977 | 深切 978 | 深入 979 | 深远 980 | 深重 981 | 神秘 982 | 神妙 983 | 神奇 984 | 神气 985 | 神圣 986 | 审慎 987 | 慎重 988 | 生动 989 | 生僻 990 | 生气 991 | 生涩 992 | 生硬 993 | 省力 994 | 失常 995 | 失望 996 | 湿 997 | 湿润 998 | 时髦 999 | 实惠 1000 | 实际 1001 | 实用 1002 | 实在 1003 | 世故 1004 | 适时 1005 | 适用 1006 | 适中 1007 | 守旧 1008 | 瘦 1009 | 瘦弱 1010 | 瘦削 1011 | 瘦小 1012 | 舒畅 1013 | 舒服 1014 | 舒适 1015 | 舒坦 1016 | 舒心 1017 | 疏松 1018 | 熟 1019 | 熟练 1020 | 衰弱 1021 | 帅 1022 | 帅气 1023 | 爽口 1024 | 爽快 1025 | 爽快 1026 | 爽直 1027 | 顺 1028 | 顺畅 1029 | 顺当 1030 | 顺耳 1031 | 顺口 1032 | 顺利 1033 | 顺手 1034 | 顺心 1035 | 顺眼 1036 | 嘶哑 1037 | 死 1038 | 死板 1039 | 松 1040 | 松弛 1041 | 松脆 1042 | 松快 1043 | 松软 1044 | 松散 1045 | 松懈 1046 | 酥 1047 | 酥脆 1048 | 酥软 1049 | 俗 1050 | 肃静 1051 | 肃穆 1052 | 素净 1053 | 素雅 1054 | 随便 1055 | 随和 1056 | 随意 1057 | 碎 1058 | 琐碎 1059 | 踏实 1060 | 太平 1061 | 贪婪 1062 | 坦诚 1063 | 坦荡 1064 | 坦率 1065 | 淘气 1066 | 特别 1067 | 特殊 1068 | 疼痛 1069 | 体面 1070 | 天真 1071 | 恬淡 1072 | 恬静 1073 | 甜美 1074 | 甜蜜 1075 | 挑剔 1076 | 调皮 1077 | 贴切 1078 | 贴题 1079 | 贴心 1080 | 挺拔 1081 | 挺括 1082 | 通顺 1083 | 通俗 1084 | 统一 1085 | 痛 1086 | 痛苦 1087 | 痛快 1088 | 痛心 1089 | 头痛 1090 | 投缘 1091 | 透 1092 | 透彻 1093 | 透明 1094 | 突出 1095 | 突然 1096 | 团结 1097 | 颓废 1098 | 拖沓 1099 | 妥当 1100 | 妥善 1101 | 妥帖 1102 | 歪 1103 | 外向 1104 | 外行 1105 | 弯 1106 | 弯曲 1107 | 完备 1108 | 完好 1109 | 完美 1110 | 完善 1111 | 完整 1112 | 顽固 1113 | 顽皮 1114 | 顽强 1115 | 晚 1116 | 惋惜 1117 | 婉转 1118 | 婉转 1119 | 旺盛 1120 | 危急 1121 | 危险 1122 | 威风 1123 | 威武 1124 | 威严 1125 | 微薄 1126 | 微妙 1127 | 微弱 1128 | 微小 1129 | 为难 1130 | 伟大 1131 | 卫生 1132 | 温存 1133 | 温和 1134 | 温暖 1135 | 温热 1136 | 温柔 1137 | 温顺 1138 | 温馨 1139 | 文静 1140 | 文明 1141 | 文雅 1142 | 紊乱 1143 | 稳 1144 | 稳当 1145 | 稳定 1146 | 稳固 1147 | 稳妥 1148 | 稳重 1149 | 窝囊 1150 | 龌龊 1151 | 无聊 1152 | 无情 1153 | 无知 1154 | 妩媚 1155 | 武断 1156 | 希罕 1157 | 稀 1158 | 稀薄 1159 | 稀罕 1160 | 稀奇 1161 | 稀疏 1162 | 喜人 1163 | 喜悦 1164 | 细 1165 | 细密 1166 | 细腻 1167 | 细弱 1168 | 细微 1169 | 细小 1170 | 细心 1171 | 细致 1172 | 狭隘 1173 | 狭小 1174 | 狭窄 1175 | 下贱 1176 | 下流 1177 | 吓人 1178 | 先进 1179 | 鲜 1180 | 鲜美 1181 | 鲜明 1182 | 鲜嫩 1183 | 鲜艳 1184 | 闲 1185 | 闲散 1186 | 贤惠 1187 | 贤淑 1188 | 咸 1189 | 显赫 1190 | 显眼 1191 | 显要 1192 | 显著 1193 | 险恶 1194 | 险峻 1195 | 险要 1196 | 现实 1197 | 相近 1198 | 相似 1199 | 相像 1200 | 香 1201 | 香甜 1202 | 详尽 1203 | 详实 1204 | 详细 1205 | 祥和 1206 | 响 1207 | 响亮 1208 | 像样 1209 | 消沉 1210 | 消极 1211 | 消瘦 1212 | 萧条 1213 | 潇洒 1214 | 嚣张 1215 | 小 1216 | 小气 1217 | 小心 1218 | 协调 1219 | 邪 1220 | 邪恶 1221 | 斜 1222 | 懈怠 1223 | 心虚 1224 | 辛苦 1225 | 辛劳 1226 | 辛勤 1227 | 辛酸 1228 | 欣慰 1229 | 欣喜 1230 | 新 1231 | 新奇 1232 | 新鲜 1233 | 新颖 1234 | 兴奋 1235 | 兴隆 1236 | 兴盛 1237 | 兴旺 1238 | 醒目 1239 | 幸福 1240 | 幸运 1241 | 凶 1242 | 凶残 1243 | 凶恶 1244 | 凶悍 1245 | 凶狠 1246 | 凶猛 1247 | 凶险 1248 | 雄厚 1249 | 雄伟 1250 | 雄壮 1251 | 羞怯 1252 | 羞涩 1253 | 秀丽 1254 | 秀美 1255 | 秀气 1256 | 秀雅 1257 | 虚 1258 | 虚荣 1259 | 虚弱 1260 | 虚伪 1261 | 虚心 1262 | 绚烂 1263 | 绚丽 1264 | 寻常 1265 | 迅猛 1266 | 迅速 1267 | 压抑 1268 | 雅致 1269 | 严 1270 | 严格 1271 | 严谨 1272 | 严峻 1273 | 严厉 1274 | 严密 1275 | 严肃 1276 | 严整 1277 | 严重 1278 | 炎热 1279 | 眼热 1280 | 眼熟 1281 | 艳 1282 | 洋气 1283 | 痒 1284 | 妖媚 1285 | 妖艳 1286 | 妖冶 1287 | 遥远 1288 | 要好 1289 | 要紧 1290 | 要强 1291 | 耀眼 1292 | 野 1293 | 野蛮 1294 | 一般 1295 | 遗憾 1296 | 疑惑 1297 | 异常 1298 | 抑郁 1299 | 意外 1300 | 阴暗 1301 | 阴毒 1302 | 阴冷 1303 | 阴凉 1304 | 阴森 1305 | 阴险 1306 | 殷切 1307 | 殷勤 1308 | 殷实 1309 | 淫荡 1310 | 淫秽 1311 | 隐蔽 1312 | 英俊 1313 | 英明 1314 | 英武 1315 | 英勇 1316 | 硬 1317 | 硬朗 1318 | 庸俗 1319 | 臃肿 1320 | 勇敢 1321 | 勇猛 1322 | 踊跃 1323 | 用功 1324 | 用心 1325 | 优厚 1326 | 优美 1327 | 优柔 1328 | 优秀 1329 | 优异 1330 | 优裕 1331 | 优越 1332 | 忧愁 1333 | 忧伤 1334 | 忧郁 1335 | 幽暗 1336 | 幽静 1337 | 幽默 1338 | 悠久 1339 | 悠闲 1340 | 悠扬 1341 | 犹豫 1342 | 油腻 1343 | 友好 1344 | 有力 1345 | 有利 1346 | 有趣 1347 | 有限 1348 | 有效 1349 | 有益 1350 | 幼小 1351 | 幼稚 1352 | 诱人 1353 | 迂腐 1354 | 愉快 1355 | 愚笨 1356 | 愚蠢 1357 | 愚昧 1358 | 郁闷 1359 | 郁热 1360 | 郁悒 1361 | 冤 1362 | 渊博 1363 | 原始 1364 | 圆 1365 | 圆滑 1366 | 圆满 1367 | 圆润 1368 | 圆熟 1369 | 远 1370 | 远大 1371 | 悦耳 1372 | 匀称 1373 | 匀净 1374 | 匀整 1375 | 杂 1376 | 杂乱 1377 | 在理 1378 | 在行 1379 | 脏乱 1380 | 糟糕 1381 | 早 1382 | 燥热 1383 | 扎实 1384 | 扎手 1385 | 窄 1386 | 窄小 1387 | 仗义 1388 | 胀 1389 | 珍贵 1390 | 真 1391 | 真诚 1392 | 真切 1393 | 真实 1394 | 真挚 1395 | 镇定 1396 | 镇静 1397 | 狰狞 1398 | 整 1399 | 整洁 1400 | 整齐 1401 | 正 1402 | 正常 1403 | 正规 1404 | 正经 1405 | 正派 1406 | 正确 1407 | 正式 1408 | 正统 1409 | 正直 1410 | 郑重 1411 | 知心 1412 | 执著 1413 | 直 1414 | 直观 1415 | 直接 1416 | 直率 1417 | 值钱 1418 | 质朴 1419 | 中看 1420 | 中肯 1421 | 中听 1422 | 中用 1423 | 忠诚 1424 | 忠厚 1425 | 忠实 1426 | 重 1427 | 重要 1428 | 周到 1429 | 周密 1430 | 周全 1431 | 周详 1432 | 主动 1433 | 主观 1434 | 专断 1435 | 专横 1436 | 专心 1437 | 专一 1438 | 庄严 1439 | 庄重 1440 | 壮 1441 | 壮观 1442 | 壮健 1443 | 壮丽 1444 | 壮烈 1445 | 壮美 1446 | 壮实 1447 | 准 1448 | 准确 1449 | 准时 1450 | 拙劣 1451 | 茁壮 1452 | 卓越 1453 | 着急 1454 | 仔细 1455 | 紫 1456 | 自爱 1457 | 自傲 1458 | 自卑 1459 | 自大 1460 | 自负 1461 | 自豪 1462 | 自觉 1463 | 自满 1464 | 自然 1465 | 自如 1466 | 自私 1467 | 自由 1468 | 自在 1469 | 走俏 1470 | 足 1471 | 尊贵 -------------------------------------------------------------------------------- /query_analysis/dict/common/adverb.csv: -------------------------------------------------------------------------------- 1 | 挨个 2 | 安然 3 | 按理 4 | 按期 5 | 按时 6 | 暗暗 7 | 暗中 8 | 暗自 9 | 傲然 10 | 白 11 | 白白 12 | 保不定 13 | 备不住 14 | 倍加 15 | 被迫 16 | 本 17 | 本当 18 | 本来 19 | 比较 20 | 必 21 | 必定 22 | 必将 23 | 必然 24 | 必须 25 | 毕竟 26 | 便 27 | 别 28 | 秉公 29 | 并 30 | 并肩 31 | 并排 32 | 不 33 | 不定 34 | 不断 35 | 不妨 36 | 不过 37 | 不见得 38 | 不禁 39 | 不愧 40 | 不巧 41 | 不慎 42 | 不胜 43 | 不时 44 | 不意 45 | 不由得 46 | 不再 47 | 不曾 48 | 不住 49 | 才 50 | 草草 51 | 刹时 52 | 差点儿 53 | 长期 54 | 常 55 | 常常 56 | 常年 57 | 彻夜 58 | 趁便 59 | 趁机 60 | 趁势 61 | 趁早 62 | 成倍 63 | 成天 64 | 成心 65 | 诚然 66 | 乘机 67 | 乘势 68 | 乘隙 69 | 乘兴 70 | 迟迟 71 | 迟早 72 | 初 73 | 初步 74 | 初次 75 | 处处 76 | 匆匆 77 | 从 78 | 从此 79 | 从来 80 | 从速 81 | 从头 82 | 从未 83 | 从中 84 | 存心 85 | 大大 86 | 大抵 87 | 大都 88 | 大多 89 | 大概 90 | 大举 91 | 大力 92 | 大约 93 | 大致 94 | 单 95 | 单单 96 | 单独 97 | 当场 98 | 当即 99 | 当面 100 | 当然 101 | 当真 102 | 当众 103 | 倒 104 | 到处 105 | 到底 106 | 登时 107 | 低声 108 | 的确 109 | 顶多 110 | 定期 111 | 定然 112 | 定向 113 | 动不动 114 | 陡然 115 | 都 116 | 独 117 | 独独 118 | 独个儿 119 | 独力 120 | 独自 121 | 断然 122 | 顿然 123 | 顿时 124 | 多半 125 | 多方 126 | 多么 127 | 而后 128 | 而今 129 | 凡 130 | 凡是 131 | 反 132 | 反倒 133 | 反而 134 | 反复 135 | 反过来 136 | 反正 137 | 泛泛 138 | 仿佛 139 | 飞速 140 | 非 141 | 非常 142 | 分别 143 | 分批 144 | 分期 145 | 分头 146 | 分外 147 | 纷纷 148 | 奋力 149 | 奋勇 150 | 改日 151 | 改天 152 | 概 153 | 赶紧 154 | 赶快 155 | 赶忙 156 | 赶巧 157 | 敢情 158 | 刚 159 | 刚刚 160 | 刚好 161 | 刚巧 162 | 高度 163 | 高声 164 | 高速 165 | 格外 166 | 根本 167 | 更 168 | 更加 169 | 更为 170 | 公然 171 | 共 172 | 共同 173 | 共总 174 | 姑且 175 | 固然 176 | 故意 177 | 乖乖 178 | 怪 179 | 怪不得 180 | 光 181 | 果然 182 | 果真 183 | 过 184 | 过多 185 | 过于 186 | 悍然 187 | 好 188 | 好歹 189 | 好好 190 | 好容易 191 | 好生 192 | 好在 193 | 何必 194 | 何尝 195 | 何等 196 | 何妨 197 | 何苦 198 | 何以 199 | 很 200 | 很快 201 | 狠狠 202 | 狠命 203 | 恨不得 204 | 忽 205 | 忽地 206 | 忽而 207 | 忽然 208 | 胡乱 209 | 互 210 | 互相 211 | 还 212 | 还是 213 | 缓缓 214 | 会心 215 | 活活 216 | 火速 217 | 或许 218 | 或者 219 | 霍地 220 | 基本上 221 | 及早 222 | 极 223 | 极度 224 | 极端 225 | 极力 226 | 极其 227 | 极为 228 | 即 229 | 即将 230 | 即刻 231 | 即时 232 | 即兴 233 | 急剧 234 | 急忙 235 | 几乎 236 | 既 237 | 加倍 238 | 间或 239 | 简直 240 | 健步 241 | 渐 242 | 渐次 243 | 渐渐 244 | 将 245 | 将近 246 | 将要 247 | 交互 248 | 交替 249 | 较 250 | 较为 251 | 皆 252 | 接连 253 | 竭诚 254 | 竭力 255 | 届时 256 | 借故 257 | 仅 258 | 仅仅 259 | 紧紧 260 | 谨 261 | 尽 262 | 尽 263 | 尽管 264 | 尽可能 265 | 尽快 266 | 尽力 267 | 尽量 268 | 尽情 269 | 尽先 270 | 进一步 271 | 近 272 | 近来 273 | 经常 274 | 径直 275 | 径自 276 | 净 277 | 竞相 278 | 竟 279 | 竟然 280 | 迥然 281 | 究竟 282 | 久久 283 | 久已 284 | 就 285 | 就便 286 | 就此 287 | 就地 288 | 就近 289 | 就势 290 | 就手 291 | 就要 292 | 居然 293 | 决 294 | 决计 295 | 决心 296 | 决意 297 | 绝 298 | 均 299 | 可巧 300 | 刻意 301 | 恐怕 302 | 口头 303 | 快 304 | 快 305 | 快步 306 | 快快 307 | 快速 308 | 快要 309 | 牢牢 310 | 老 311 | 老是 312 | 冷不防 313 | 冷眼 314 | 历来 315 | 厉声 316 | 立即 317 | 立刻 318 | 立时 319 | 立体 320 | 连连 321 | 连忙 322 | 连年 323 | 连日 324 | 连声 325 | 连夜 326 | 谅必 327 | 临场 328 | 临床 329 | 临时 330 | 另 331 | 另外 332 | 另行 333 | 陆续 334 | 屡 335 | 屡次 336 | 屡屡 337 | 略 338 | 略微 339 | 轮番 340 | 马上 341 | 蛮 342 | 满心 343 | 慢慢 344 | 贸然 345 | 没 346 | 没有 347 | 每每 348 | 猛不防 349 | 猛地 350 | 猛然 351 | 明明 352 | 莫 353 | 莫不 354 | 莫非 355 | 蓦地 356 | 蓦然 357 | 默默 358 | 难道 359 | 难怪 360 | 偶 361 | 偶尔 362 | 偏 363 | 偏偏 364 | 偏巧 365 | 飘然 366 | 拼命 367 | 拼死 368 | 频频 369 | 平素 370 | 凭空 371 | 颇 372 | 齐声 373 | 其实 374 | 岂 375 | 起初 376 | 起码 377 | 起先 378 | 恰 379 | 恰好 380 | 恰恰 381 | 恰巧 382 | 千万 383 | 潜心 384 | 强行 385 | 悄悄 386 | 悄然 387 | 悄声 388 | 切切 389 | 且 390 | 亲笔 391 | 亲耳 392 | 亲口 393 | 亲身 394 | 亲手 395 | 亲眼 396 | 亲自 397 | 轻轻 398 | 轻易 399 | 权且 400 | 全 401 | 全都 402 | 全力 403 | 全然 404 | 全速 405 | 却 406 | 确 407 | 冉冉 408 | 任情 409 | 任意 410 | 仍 411 | 仍旧 412 | 仍然 413 | 日渐 414 | 日趋 415 | 日夜 416 | 日益 417 | 如期 418 | 如实 419 | 如数 420 | 霎时 421 | 擅自 422 | 尚 423 | 尚未 424 | 稍 425 | 稍顷 426 | 稍稍 427 | 稍微 428 | 稍为 429 | 稍许 430 | 舍命 431 | 深深 432 | 甚 433 | 甚至 434 | 生来 435 | 时不时 436 | 时常 437 | 时而 438 | 时刻 439 | 时时 440 | 实际上 441 | 始终 442 | 势必 443 | 事先 444 | 适才 445 | 誓死 446 | 首次 447 | 首先 448 | 书面 449 | 倏地 450 | 率先 451 | 顺便 452 | 顺带 453 | 顺脚 454 | 顺势 455 | 私下 456 | 私自 457 | 死 458 | 死死 459 | 似乎 460 | 伺机 461 | 肆意 462 | 素常 463 | 素来 464 | 随处 465 | 随地 466 | 随后 467 | 随机 468 | 随即 469 | 随口 470 | 随身 471 | 随时 472 | 随手 473 | 遂 474 | 索性 475 | 太 476 | 特 477 | 特别 478 | 特此 479 | 特地 480 | 特意 481 | 提早 482 | 挺 483 | 通常 484 | 通共 485 | 通盘 486 | 通通 487 | 同年 488 | 同期 489 | 同时 490 | 统共 491 | 统统 492 | 偷偷 493 | 徒步 494 | 徒手 495 | 团团 496 | 婉言 497 | 万分 498 | 万万 499 | 往往 500 | 微微 501 | 巍然 502 | 唯 503 | 惟 504 | 惟独 505 | 委实 506 | 未 507 | 未必 508 | 未尝 509 | 未免 510 | 未曾 511 | 稳步 512 | 无不 513 | 无偿 514 | 无处 515 | 无非 516 | 无故 517 | 无怪 518 | 无奈 519 | 无私 520 | 无暇 521 | 无须 522 | 无疑 523 | 毋庸 524 | 勿 525 | 务必 526 | 务期 527 | 务须 528 | 悉心 529 | 瞎 530 | 先 531 | 先后 532 | 先期 533 | 先是 534 | 险些 535 | 现 536 | 相当 537 | 相对 538 | 相互 539 | 相继 540 | 向来 541 | 协力 542 | 欣然 543 | 新近 544 | 信手 545 | 兴许 546 | 行将 547 | 幸而 548 | 幸好 549 | 幸亏 550 | 休 551 | 须 552 | 徐徐 553 | 蓄意 554 | 旋即 555 | 压根儿 556 | 眼看 557 | 也 558 | 也许 559 | 业已 560 | 一 561 | 一边 562 | 一并 563 | 一旦 564 | 一道 565 | 一定 566 | 一度 567 | 一概 568 | 一共 569 | 一骨碌 570 | 一贯 571 | 一举 572 | 一块儿 573 | 一连 574 | 一律 575 | 一面 576 | 一齐 577 | 一起 578 | 一气 579 | 一时 580 | 一同 581 | 一味 582 | 一向 583 | 一心 584 | 一再 585 | 一直 586 | 依次 587 | 依法 588 | 已 589 | 已经 590 | 义务 591 | 亦 592 | 益发 593 | 毅然 594 | 因故 595 | 迎面 596 | 迎头 597 | 硬 598 | 硬是 599 | 永 600 | 永不 601 | 永世 602 | 永远 603 | 尤 604 | 尤其 605 | 尤为 606 | 由此 607 | 由衷 608 | 犹 609 | 有点 610 | 有时 611 | 有意 612 | 又 613 | 预 614 | 预先 615 | 愈 616 | 愈发 617 | 愈加 618 | 愈益 619 | 原 620 | 原来 621 | 原先 622 | 远远 623 | 怨不得 624 | 约 625 | 约略 626 | 约莫 627 | 越 628 | 越发 629 | 越加 630 | 越来越 631 | 越是 632 | 匀速 633 | 再 634 | 再次 635 | 再度 636 | 再三 637 | 再行 638 | 在 639 | 暂 640 | 暂且 641 | 暂时 642 | 早日 643 | 早已 644 | 早早 645 | 择优 646 | 曾 647 | 曾经 648 | 照例 649 | 照样 650 | 照直 651 | 真 652 | 真正 653 | 整天 654 | 整整 655 | 正 656 | 正要 657 | 正在 658 | 执意 659 | 直 660 | 直截 661 | 只 662 | 只不过 663 | 只顾 664 | 只管 665 | 只好 666 | 只身 667 | 只是 668 | 指不定 669 | 至 670 | 至此 671 | 至多 672 | 至今 673 | 至少 674 | 终 675 | 终归 676 | 终究 677 | 终年 678 | 终于 679 | 衷心 680 | 重 681 | 重点 682 | 重新 683 | 骤 684 | 骤然 685 | 逐步 686 | 逐个 687 | 逐级 688 | 逐渐 689 | 逐日 690 | 逐一 691 | 主要 692 | 专 693 | 专程 694 | 专门 695 | 转瞬 696 | 转眼 697 | 谆谆 698 | 准 699 | 准保 700 | 酌情 701 | 着实 702 | 着意 703 | 自 704 | 自动 705 | 自发 706 | 自古 707 | 自然 708 | 自行 709 | 总 710 | 总共 711 | 总归 712 | 总是 713 | 总算 714 | 纵情 715 | 足 716 | 足以 717 | 足足 718 | 最 719 | 最终 -------------------------------------------------------------------------------- /query_analysis/dict/common/auxiliary.csv: -------------------------------------------------------------------------------- 1 | 的 2 | 了 3 | 着 4 | 地 5 | 等 6 | 所 7 | 得 8 | 之 9 | 呢 10 | 吧 11 | 吗 12 | 过 13 | 呀 14 | 啦 15 | 等等 16 | 分之 17 | 啊 18 | 来 19 | 哩 20 | 嘛 21 | 似的 22 | 也 23 | 的话 24 | 底 25 | 呵 26 | 而已 27 | 似地 28 | 是的 29 | 罢 30 | 也好 31 | 罢了 32 | 矣 33 | 乎 34 | 么 35 | 呐 36 | 咧 37 | 哪 38 | 呗 39 | 者 40 | 喽 41 | 哇 42 | 着呢 43 | 也罢 44 | 哉 45 | 罗 46 | 般 47 | 哟 48 | 阿 49 | 耶 50 | 呕 51 | 噢 52 | 呦 53 | 啵 54 | 嘞 55 | 咯 56 | 哈 57 | 不 58 | 哎 59 | 哎呀 60 | 哎哟 61 | 咦 62 | 不成 63 | 得了 64 | 来着 65 | 了得 66 | 着哩 67 | 着呐 68 | 来的 69 | 便了 70 | 好了 71 | 再说 72 | 来听 73 | 来听听 74 | 好吗 75 | 好不 76 | 好不好 77 | 可以吗 78 | 不行吗 79 | 行不行 80 | 行不行啊 81 | 就不行吗 -------------------------------------------------------------------------------- /query_analysis/dict/common/degree.csv: -------------------------------------------------------------------------------- 1 | 绝对 2 | 肯定 3 | 当然 4 | 必须 5 | 必然 6 | 最 7 | 最为 8 | 顶 9 | 极 10 | 极其 11 | 极端 12 | 极度 13 | 极为 14 | 格外 15 | 分外 16 | 忒 17 | 尽 18 | 那么 19 | 特 20 | 特别 21 | 尤其 22 | 尤为 23 | 很 24 | 蛮 25 | 非常 26 | 十分 27 | 十足 28 | 更 29 | 更加 30 | 更其 31 | 越 32 | 越发 33 | 越加 34 | 过于 35 | 完全 36 | 甚为 37 | 颇 38 | 挺 39 | 还 40 | 相当 41 | 愈 42 | 愈加 43 | 愈来愈 44 | 越来越 45 | 进一步 46 | 一点也 47 | 一点都 48 | 稍 49 | 稍微 50 | 略微 51 | 稍为 52 | 稍加 53 | 略 54 | 略略 55 | 多少 56 | 几乎 57 | 点 58 | 有点 59 | 微乎其微 60 | 些许 61 | 较 62 | 比较 63 | 较为 64 | 半点 65 | 再 66 | 继续 67 | 接着 68 | 随便 69 | 赶紧 70 | 赶快 71 | 立即 72 | 立刻 73 | 马上 -------------------------------------------------------------------------------- /query_analysis/dict/common/directional.csv: -------------------------------------------------------------------------------- 1 | 来 2 | 起来 3 | 出 4 | 出来 5 | 去 6 | 回来 7 | 过 8 | 过去 9 | 下来 10 | 过来 11 | 下 12 | 下去 13 | 上 14 | 回去 15 | 上来 16 | 起 17 | 出去 18 | 进来 19 | 上去 20 | 进 21 | 进去 22 | 入 23 | 以来 24 | 开 25 | 回 26 | 开来 27 | 中 28 | 到 -------------------------------------------------------------------------------- /query_analysis/dict/common/honorific.csv: -------------------------------------------------------------------------------- 1 | 请 2 | 麻烦 3 | 求 4 | 劳驾 5 | 能否 6 | 要求 7 | 要 8 | 让 -------------------------------------------------------------------------------- /query_analysis/dict/common/interj.csv: -------------------------------------------------------------------------------- 1 | 哎 2 | 呵 3 | 嗯 4 | 噢 5 | 哟 6 | 哼 7 | 喂 8 | 哎呀 9 | 嘿 10 | 好了 11 | 唔 12 | 咦 13 | 嗨 14 | 咳 15 | 嗬 16 | 呃 17 | 哎哟 18 | 嗳 19 | 啊呀 20 | 喏 21 | 好的 22 | 好啦 23 | 真烦 -------------------------------------------------------------------------------- /query_analysis/dict/common/modals.csv: -------------------------------------------------------------------------------- 1 | 能 2 | 能不能 3 | 能够 4 | 会 5 | 会不会 6 | 可 7 | 可能 8 | 可以 9 | 得以 10 | 愿意 11 | 乐意 12 | 情愿 13 | 肯 14 | 要 15 | 愿 16 | 想要 17 | 要想 18 | 敢 19 | 敢于 20 | 乐于 21 | 应 22 | 应该 23 | 应当 24 | 得 25 | 该 26 | 当 27 | 须得 28 | 理当 29 | 值得 30 | 便于 31 | 难于 32 | 望 33 | 希望 34 | 想 35 | 咋不给 36 | 怎么不 37 | 那 38 | 给 -------------------------------------------------------------------------------- /query_analysis/dict/common/numeral.csv: -------------------------------------------------------------------------------- 1 | 一 2 | 两 3 | 三 4 | 几 5 | 一些 6 | 二 7 | 四 8 | 五 9 | 第一 10 | 多 11 | 第二 12 | 十 13 | 六 14 | 有些 15 | 半 16 | 八 17 | 第三 18 | 七 19 | 万 20 | 百 21 | 二十 22 | 九 23 | 之一 24 | 多少 25 | 三十 26 | 千 27 | 数 28 | 十二 29 | 五十 30 | 十五 31 | 四十 32 | 一百 33 | 十一 34 | 十九 35 | 几十 36 | 若干 37 | 一半 38 | 十八 39 | 亿 40 | 无数 41 | 一点 42 | 六十 43 | 第四 44 | 十三 45 | 许多 46 | 十七 47 | 十几 48 | 十六 49 | 零 50 | 好几 51 | 七十 52 | 八十 53 | 十四 54 | 二百 55 | 很多 56 | 一千 57 | 俩 58 | 三百 59 | 好多 60 | 一个 61 | 一下 62 | 余 63 | 十万 64 | 五百 65 | 几百 66 | 几千 67 | 三千 68 | 一万 69 | 百万 70 | 第五 71 | 四百 72 | 好些 73 | 些 74 | 九十 75 | 多数 76 | 二千 77 | 千万 78 | 多半 79 | 多种 80 | 两千 81 | 五千 82 | 大量 83 | 第六 84 | 双 85 | 大多数 86 | 八百 87 | 几个 88 | 丙 89 | 点 90 | 第七 91 | 多次 92 | 六百 93 | 甲 94 | 四千 95 | 乙 96 | 千百万 97 | 不少 98 | 三万 99 | 一二 100 | 七百 101 | 几万 102 | 五万 103 | 千百 104 | 两万 105 | 第八 106 | 几十万 107 | 一百万 108 | 二万 109 | 上百 110 | 少数 111 | 少量 112 | 亿万 113 | 六千 114 | 第十 115 | 仨 116 | 上千 117 | 大多 118 | 大批 -------------------------------------------------------------------------------- /query_analysis/dict/common/prefix_unsual.csv: -------------------------------------------------------------------------------- 1 | 今天 2 | 现在 -------------------------------------------------------------------------------- /query_analysis/dict/common/prep.csv: -------------------------------------------------------------------------------- 1 | 从 2 | 自 3 | 自从 4 | 于 5 | 打 6 | 到 7 | 往 8 | 在 9 | 当 10 | 朝 11 | 向 12 | 顺着 13 | 沿着 14 | 随着 15 | 按 16 | 照 17 | 按照 18 | 依 19 | 依照 20 | 本着 21 | 经过 22 | 通过 23 | 根据 24 | 以 25 | 凭 26 | 为 27 | 为了 28 | 为着 29 | 因 30 | 由于 31 | 因为 32 | 对 33 | 对于 34 | 把 35 | 向 36 | 跟 37 | 与 38 | 同 39 | 给 40 | 关于 41 | 除 42 | 除了 43 | 除去 44 | 除非 45 | 被 46 | 叫 47 | 让 48 | 给 49 | 比 50 | 和 51 | 同 -------------------------------------------------------------------------------- /query_analysis/dict/common/pronoun.csv: -------------------------------------------------------------------------------- 1 | 小忆 2 | 小姨 3 | 机器人 4 | 机器 5 | 小主人 6 | 我 7 | 我们 8 | 咱 9 | 咱们 10 | 你 11 | 你们 12 | 您 13 | 您们 14 | 他 15 | 他们 16 | 她 17 | 她们 18 | 它 19 | 它们 20 | 姐姐 21 | 妹 22 | 弟 23 | 哥 24 | 叔 25 | 舅 26 | 姑 27 | 婶 28 | 姥 29 | 姥爷 30 | 爷 31 | 奶 32 | 爸 33 | 妈 34 | 娘 35 | 老子 36 | 大爷 37 | 哥们 38 | 姐们 39 | 俺 40 | 俺们 41 | 本 42 | 本人 43 | 本身 44 | 彼 45 | 彼此 46 | 别的 47 | 别人 48 | 此 49 | 此间 50 | 大伙儿 51 | 大家 52 | 多少 53 | 二者 54 | 该 55 | 各 56 | 各处 57 | 各地 58 | 各个 59 | 各国 60 | 各界 61 | 各人 62 | 各位 63 | 各样 64 | 各种 65 | 各自 66 | 各族 67 | 何 68 | 何处 69 | 何时 70 | 后者 71 | 几时 72 | 两者 73 | 每 74 | 某 75 | 某个 76 | 某些 77 | 哪 78 | 哪边 79 | 哪儿 80 | 哪个 81 | 哪里 82 | 哪些 83 | 哪样 84 | 那 85 | 那边 86 | 那儿 87 | 那个 88 | 那会儿 89 | 那里 90 | 那么 91 | 那么些 92 | 那么样 93 | 那时 94 | 那些 95 | 那样 96 | 旁人 97 | 其 98 | 其他 99 | 其它 100 | 其余 101 | 其中 102 | 前者 103 | 人家 104 | 任何 105 | 如此 106 | 如何 107 | 啥 108 | 甚么 109 | 什么 110 | 什么样 111 | 谁 112 | 他人 113 | 为何 114 | 为什么 115 | 一切 116 | 有的 117 | 有些 118 | 怎 119 | 怎么 120 | 怎么办 121 | 怎么样 122 | 怎样 123 | 咋 124 | 这 125 | 这边 126 | 这儿 127 | 这个 128 | 这会儿 129 | 这里 130 | 这么 131 | 这么点儿 132 | 这么些 133 | 这么样 134 | 这时 135 | 这些 136 | 这样 137 | 诸位 138 | 自 139 | 自个儿 140 | 自各儿 141 | 自己 142 | 自家 143 | 自身 144 | 自我 -------------------------------------------------------------------------------- /query_analysis/dict/common/quantifier.csv: -------------------------------------------------------------------------------- 1 | 个 2 | 种 3 | 次 4 | 条 5 | 位 6 | 只 7 | 块 8 | 元 9 | 些 10 | 项 11 | 张 12 | 层 13 | 段 14 | 米 15 | 件 16 | 座 17 | 场 18 | 句 19 | 点 20 | 支 21 | 双 22 | 颗 23 | 届 24 | 批 25 | 吨 26 | 亩 27 | 篇 28 | 斤 29 | 名 30 | 把 31 | 克 32 | 分 33 | 里 34 | 分钟 35 | 群 36 | 秒 37 | 根 38 | 公里 39 | 片 40 | 厘米 41 | 份 42 | 辆 43 | 棵 44 | 口 45 | 本 46 | 封 47 | 遍 48 | 岁 49 | 副 50 | 回 51 | 公斤 52 | 页 53 | 部 54 | 幅 55 | 阵 56 | 顿 57 | 大批 58 | 台 59 | 道 60 | 间 61 | 对 62 | 朵 63 | 声 64 | 股 65 | 尺 66 | 卷 67 | 门 68 | 下 69 | 枚 70 | 头 71 | 度 72 | 毫升 73 | 首 74 | 毫米 75 | 架 76 | 套 77 | 匹 78 | 堆 79 | 寸 80 | 艘 81 | 丈 82 | 千米 83 | 步 84 | 小时 85 | 家 86 | 幢 87 | 排 88 | 番 89 | 所 90 | 平方米 91 | 升 92 | 串 93 | 伏 94 | 盏 95 | 章 96 | 趟 97 | 两 98 | 粒 99 | 毫克 100 | 伏特 101 | 千克 102 | 公顷 103 | 杯 104 | 类 105 | 株 106 | 枝 107 | 安培 108 | 平方公里 109 | 滴 110 | 节 111 | 等 112 | 摩 113 | 种种 114 | 一下 115 | 册 116 | 户 117 | 微米 118 | 立方米 119 | 出 120 | 级 121 | 期 122 | 组 123 | 欧姆 124 | 重 125 | 行 126 | 担 127 | 角 128 | 公尺 129 | 束 130 | 个个 131 | 平方 132 | 扇 133 | 摩尔 134 | 好几个 135 | 千瓦 136 | 起 137 | 面 138 | 簇 139 | 具 140 | 列 141 | 辈 142 | 摄氏度 143 | 石 144 | 包 145 | 顶 146 | 付 147 | 样 148 | 帮 149 | 笔 150 | 缕 151 | 号 152 | 尊 153 | 盘 154 | 半截 155 | 大群 156 | 桩 157 | 斗 158 | 磅 159 | 代 160 | 发 161 | 毛 162 | 团 163 | 瓣 164 | 眼 165 | 人次 166 | 焦耳 167 | 捆 168 | 碗 169 | 瓶 170 | 圈 171 | 光年 172 | 盆 173 | 库仑 174 | 栋 175 | 一点 176 | 海里 177 | 成 178 | 堵 -------------------------------------------------------------------------------- /query_analysis/dict/dict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:配置信息,用于加载所有词典信息 8 | 创 建 者:余菲 9 | 创建日期:16/5/21 10 | """ 11 | 12 | class WordsDict(object): 13 | """ 14 | 用于判断词语的Words 15 | """ 16 | prefix = '' 17 | suffix = '' 18 | 19 | def __init__(self, file_name, property_name_list=None, 20 | group_name=None, prefex='', suffix=''): 21 | """ 22 | 用于加载csv文件, 取得文件的属性,来初始化word列表 23 | :param file_name: 文件名 24 | :param property_name_list: 属性列表 25 | :param group_name: 分组名 26 | :return: 27 | """ 28 | self.words_list = [] 29 | self.property = {} 30 | self.group_name = group_name 31 | self.prefix = prefex 32 | self.suffix = suffix 33 | 34 | if not property_name_list: 35 | self.load_from_file(file_name, group_name) 36 | return 37 | 38 | self.load_from_file_with_perperty(file_name, 39 | property_name_list, 40 | group_name) 41 | 42 | def load_from_file(self, file_name, group_name=None): 43 | """ 44 | 从文件中直接加载词典,只有词,没有属性 45 | :param file_name: 46 | :param group_name: 47 | :return: 48 | """ 49 | with open(file_name) as f: 50 | self.words_list = [line.strip().replace('\n', '') for line in f] 51 | self.group_name = group_name 52 | 53 | def load_from_file_with_perperty(self, file_name, property_name_list, group_name): 54 | """ 55 | 从文件中加载词典,有词有属性 56 | :param file_name: 57 | :param property_name_list: 58 | :param group_name: 59 | :return: 60 | """ 61 | property = {} 62 | words_list = [] 63 | with open(file_name, 'rU') as f: 64 | for line in f: 65 | array = line.strip().split(',') 66 | if not array: 67 | continue 68 | words_list.append(array[0]) 69 | property[array[0]] = dict(zip(property_name_list, array[1:])) 70 | self.words_list = words_list 71 | self.property = property 72 | self.group_name = group_name 73 | 74 | @property 75 | def join_all(self): 76 | """ 77 | 返回用于正则表达式的词列表(老虎|兔子),或返回用于命中与捕捉的带名字指定的块 78 | :return: (老虎|兔子), (?P老虎|兔子) 79 | """ 80 | if not self.group_name: 81 | result = '({}{}{})'.format(self.prefix, '|'.join(self.words_list), self.suffix) 82 | return result 83 | else: 84 | result = '(?P<{}>({}{}{}))'.format(self.group_name, self.prefix, '|'.join(self.words_list), self.suffix) 85 | return result 86 | 87 | def set_group_name(self, group_name): 88 | """ 89 | 设置当前dict的group_name 90 | :param group_name: 分组名 91 | :return: 92 | """ 93 | self.group_name = group_name 94 | 95 | # 注意group_name中不能出现下划线 96 | animal_name = WordsDict( 97 | './dict/animal/animal.csv', 98 | property_name_list=['arid'], 99 | group_name='animal') 100 | 101 | # 注意group_name中不能出现下划线 102 | opera_name = WordsDict( 103 | './dict/opera/opera.csv', 104 | property_name_list=['type'], 105 | group_name='opera') 106 | 107 | story_name = WordsDict( 108 | './dict/story/story.csv', 109 | group_name='story') 110 | 111 | vehicle_name = WordsDict( 112 | './dict/vehicle/vehicle.csv', 113 | property_name_list=['arid'], 114 | group_name='vehicle') 115 | 116 | sight_name = WordsDict( 117 | './dict/sight/sight.csv', 118 | property_name_list=['arid'], 119 | group_name='sight') 120 | 121 | stop_words = WordsDict('./dict/common/stop_words.csv') 122 | pronoun = WordsDict('./dict/common/pronoun.csv') 123 | adverb = WordsDict('./dict/common/adverb.csv') 124 | modals = WordsDict('./dict/common/modals.csv') 125 | prep = WordsDict('./dict/common/prep.csv') 126 | degree = WordsDict('./dict/common/degree.csv') 127 | honorific = WordsDict('./dict/common/honorific.csv') 128 | interj = WordsDict('./dict/common/interj.csv') 129 | auxiliary = WordsDict('./dict/common/auxiliary.csv') 130 | quantifier = WordsDict('./dict/common/quantifier.csv') 131 | numeral = WordsDict('./dict/common/numeral.csv') 132 | adjective = WordsDict('./dict/common/adjective.csv') 133 | directional = WordsDict('./dict/common/directional.csv') 134 | prefix_unsual = WordsDict('./dict/common/prefix_unsual.csv') 135 | 136 | any_w = '(.)*' 137 | 138 | from nlu.nlu_framework import Nlu_Framework 139 | Nlu_Framework.register_dict('animal', animal_name) 140 | Nlu_Framework.register_dict('opera', opera_name) 141 | Nlu_Framework.register_dict('vehicle', vehicle_name) 142 | Nlu_Framework.register_dict('sight', sight_name) 143 | -------------------------------------------------------------------------------- /query_analysis/dict/opera/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:17/2/25 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/opera/opera.csv: -------------------------------------------------------------------------------- 1 | 戏曲,0 2 | 京剧,1 3 | 越剧,2 4 | 黄梅戏,3 5 | 沪剧,4 6 | 昆曲,5 7 | 二人转,6 8 | 豫剧,7 9 | 河南坠子,8 -------------------------------------------------------------------------------- /query_analysis/dict/sight/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/19 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/sight/sight.csv: -------------------------------------------------------------------------------- 1 | 雪山,18 2 | 自由女神像,20 3 | 埃菲尔铁塔,24 4 | 地球,30 5 | 热气球,31 6 | 龙卷风,53 7 | 太阳系,57 8 | 闪电,60 9 | 烟花,62 10 | 火焰,63 11 | 火,63 12 | 火山,76 -------------------------------------------------------------------------------- /query_analysis/dict/story/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/5 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/vehicle/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/11 10 | """ -------------------------------------------------------------------------------- /query_analysis/dict/vehicle/vehicle.csv: -------------------------------------------------------------------------------- 1 | 出租车,4 2 | 摩托车,5 3 | 摩托,5 4 | 自行车,6 5 | 卡车,7 6 | 巴士,5 7 | 公交车,5 8 | 大巴,8 9 | 警车,9 10 | 直升机,23 11 | 挖掘机,26 12 | 火箭,32 13 | 救护车,41 14 | 急救车,41 15 | 吊车,42 16 | 越野车,51 17 | 轮船,69 18 | 拖拉机,80 19 | 消防车,88 20 | 坦克,91 21 | 小轿车,94 -------------------------------------------------------------------------------- /query_analysis/handler/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/9/3 10 | """ -------------------------------------------------------------------------------- /query_analysis/handler/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2014,小忆机器人 5 | All rights reserved. 6 | 摘要: 7 | 创建者:yufei 8 | 创建日期:2016/1/27 9 | """ 10 | import functools 11 | import json 12 | import logging 13 | import datetime 14 | import tornado 15 | from tornado.web import RequestHandler, MissingArgumentError, HTTPError 16 | 17 | from utils.utils import force_utf8_new 18 | 19 | E_SUCC = 0 20 | E_PARAM = 1 21 | E_INTER = 2 22 | 23 | auto_route_handlers = [] 24 | 25 | def exception_control(func): 26 | ''' 异常控制装饰器 27 | ''' 28 | @functools.wraps(func) 29 | def wrapper(self): 30 | ''' 装饰函数 31 | ''' 32 | try: 33 | code, msg, body = E_SUCC, "OK", func(self) 34 | except (MissingArgumentError, AssertionError) as ex: 35 | code, msg, body = E_PARAM, str(ex), None 36 | except tornado.web.HTTPError: 37 | raise 38 | except Exception as ex: 39 | code, msg, body = E_INTER, str(ex), None 40 | log_msg = self.request.uri \ 41 | if self.request.files else \ 42 | "%s %s" % (self.request.uri, self.request.body) 43 | logging.error(log_msg, exc_info=True) 44 | self.send_json(body, code, msg) 45 | return wrapper 46 | 47 | 48 | class BaseHandler(RequestHandler): 49 | '''基础功能封装 50 | ''' 51 | 52 | def initialize(self, prefix=None): 53 | '''重写Handler初始化 54 | ''' 55 | self.module_prefix = prefix 56 | 57 | def send_json(self, res, code=E_SUCC, msg=None): 58 | '''发送json数据, 要逐步替代send_json 59 | { 60 | 'code':'状态码', 61 | 'msg':'错误信息', 62 | 'body':'数据内容' 63 | } 64 | ''' 65 | result = {'code': code, 'msg': msg, 'body': res} 66 | result = json.dumps(result) 67 | self.finish(result) 68 | 69 | def get_argument( 70 | self, name, default=tornado.web.RequestHandler._ARG_DEFAULT, 71 | strip=True): 72 | '''重写以把unicode的参数都进行utf-8编码 73 | ''' 74 | value = super(BaseHandler, self).get_argument(name, default, strip) 75 | if isinstance(value, unicode): 76 | value = value.encode('utf-8') 77 | return value 78 | 79 | def process_module(self, module): 80 | ''' 81 | 内部路由分发 82 | ''' 83 | module = module or '' 84 | if self.module_prefix: 85 | module = '%s/%s' % (self.module_prefix, module) 86 | module = '__'.join([i for i in module.split('/') if i]) 87 | method = getattr(self, module or 'index', None) 88 | if method and module not in ('get', 'post'): 89 | try: 90 | result = method() 91 | if result: 92 | self.send_json(result) 93 | except Exception as ex: 94 | logging.error('%s\n%s\n', self.request, str(ex), exc_info=True) 95 | self.send_json(None, E_INTER, str(ex)) 96 | else: 97 | raise tornado.web.HTTPError(404) 98 | 99 | def get(self, module): 100 | ''' 101 | HTTP GET处理 102 | ''' 103 | self.process_module(module) 104 | 105 | def post(self, module): 106 | ''' 107 | HTTP POST处理 108 | ''' 109 | self.process_module(module) 110 | 111 | 112 | class BaseInnerAPIHandler(BaseHandler): 113 | ''' 内部API Handler 114 | ''' 115 | 116 | def get_argument( 117 | self, name, default=tornado.web.RequestHandler._ARG_DEFAULT, 118 | strip=True): 119 | '''重写以把unicode的参数都进行utf-8编码 120 | ''' 121 | value = super(BaseInnerAPIHandler, self).get_argument(name, default, strip) 122 | if isinstance(value, unicode): 123 | value = value.encode('utf-8') 124 | return value 125 | 126 | def render_json(self, jsonable): 127 | ''' 渲染json 128 | ''' 129 | self.set_header("Content-Type", "application/json") 130 | jsonable = force_utf8_new(jsonable) 131 | self.finish(json.dumps(jsonable, default=json_default)) 132 | 133 | def send_json(self, body, code, msg=""): 134 | ''' 渲染json 135 | ''' 136 | self.render_json({'code': code, 'msg': msg, 'body': body}) 137 | 138 | def _get(self): 139 | raise HTTPError(405) 140 | 141 | def _post(self): 142 | raise HTTPError(405) 143 | 144 | @exception_control 145 | def get(self): 146 | return self._get() 147 | 148 | @exception_control 149 | def post(self): 150 | return self._post() 151 | 152 | 153 | class CoroutingHandler(BaseHandler): 154 | """ 155 | 异步handler 156 | """ 157 | 158 | @tornado.gen.coroutine 159 | def _get(self): 160 | raise HTTPError(405) 161 | 162 | @tornado.gen.coroutine 163 | def _post(self): 164 | raise HTTPError(405) 165 | 166 | @tornado.gen.coroutine 167 | def get(self): 168 | result = yield self._get() 169 | if result: 170 | self.send_json(result) 171 | 172 | @tornado.gen.coroutine 173 | def post(self): 174 | result = yield self._post() 175 | if result: 176 | self.send_json(result) 177 | 178 | @tornado.gen.coroutine 179 | def send_json_2(self, res, code=E_SUCC, msg=None): 180 | '''发送json数据, 要逐步替代send_json 181 | { 182 | 'code':'状态码', 183 | 'msg':'错误信息', 184 | 'body':'数据内容' 185 | } 186 | ''' 187 | result = {'code': code, 'msg': msg, 'body': res} 188 | result = json.dumps(result, default=json_default) 189 | self.finish(result) 190 | 191 | @tornado.gen.coroutine 192 | def sleep(self, sleep_time): 193 | """ 194 | 睡眠时间 195 | :param sleep_time: 196 | :return: 197 | """ 198 | yield tornado.gen.sleep(sleep_time) 199 | 200 | def route(pattern): 201 | ''' 自动路由装饰器 202 | ''' 203 | 204 | def decorator(cls): 205 | ''' 类装饰器 206 | ''' 207 | assert issubclass(cls, BaseHandler), "route只用来装饰handler" 208 | auto_route_handlers.append((pattern, cls)) 209 | return cls 210 | 211 | return decorator 212 | 213 | 214 | def json_default(obj): 215 | '''实现json包对datetime的处理 216 | ''' 217 | if isinstance(obj, datetime.datetime): 218 | return obj.strftime('%Y-%m-%d %H:%M:%S') 219 | elif isinstance(obj, datetime.date): 220 | raise TypeError('%r is not JSON serializable' % obj) 221 | 222 | if __name__ == '__main__': 223 | baseHandler = BaseHandler() 224 | print 123 -------------------------------------------------------------------------------- /query_analysis/handler/interpreter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:用户语义模板的handler 8 | 创 建 者:余菲 9 | 创建日期:16/9/3 10 | """ 11 | import tornado 12 | 13 | from handler.base import route, BaseInnerAPIHandler 14 | from server.server import service 15 | from utils.utils import force_utf8_new 16 | 17 | 18 | @route("/interpreter/info") 19 | class Interpreter(BaseInnerAPIHandler): 20 | """ 21 | 用户普通语义模板的handler 22 | """ 23 | def _get(self): 24 | """ 25 | 用户上传文本信息,取得答案 26 | :@param parameters:参数 27 | :@param robot_id:机器人ID 28 | @return: 29 | """ 30 | speech = self.get_argument('speech') 31 | robot_code = self.get_argument('robot_code') 32 | body = service.get_semantic_info(robot_code, speech) 33 | body = force_utf8_new(body) 34 | return body 35 | 36 | 37 | @route("/interpreter/scene_info") 38 | class SceneInterpreter(BaseInnerAPIHandler): 39 | """ 40 | 用户场景语义模块的handler 41 | """ 42 | def _get(self): 43 | """ 44 | 用户在某个场景下,上传文本信息,取得答案 45 | :return: 46 | """ 47 | speech = self.get_argument('speech') 48 | robot_code = self.get_argument('robot_code') 49 | body = service.get_scene_semantic_info(robot_code, speech) 50 | body = force_utf8_new(body) 51 | return body 52 | 53 | -------------------------------------------------------------------------------- /query_analysis/init.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2014,小忆机器人 5 | All rights reserved. 6 | 摘要: 7 | 创建者:yufei 8 | 创建日期:2016/1/27 9 | """ 10 | import logging 11 | import time 12 | import signal 13 | import sys 14 | import os 15 | import tornado 16 | import tornado.ioloop 17 | import tornado.httpserver 18 | from tornado.options import options, define 19 | 20 | from handler.base import auto_route_handlers 21 | from handler.interpreter import Interpreter 22 | from handler.interpreter import SceneInterpreter 23 | 24 | define('port', default=8700, help='run on this port', type=int) 25 | define('debug', default=True, help='enable debug mode') 26 | define('project_path', default=sys.path[0], help='deploy_path') 27 | tornado.options.parse_command_line() 28 | 29 | class Application(tornado.web.Application): 30 | 31 | ''' 32 | 应用类 33 | ''' 34 | 35 | def __init__(self): 36 | ''' 37 | 应用初始化 38 | ''' 39 | settings = { 40 | 'xsrf_cookies': False, 41 | 'site_title': 'demo', 42 | 'debug': options.debug, 43 | 'static_path': os.path.join(options.project_path, 'static'), 44 | 'template_path': os.path.join(options.project_path, 'tpl'), 45 | } 46 | handlers = auto_route_handlers 47 | logging.info("----> %s", handlers) 48 | tornado.web.Application.__init__(self, handlers, **settings) 49 | 50 | def log_request(self, handler): 51 | '''定制如何记录日志 52 | 53 | @handler: request handler 54 | ''' 55 | status = handler.get_status() 56 | request_time = 1000.0 * handler.request.request_time() 57 | msg = '%d %s %.2f' % ( 58 | status, handler._request_summary(), request_time) 59 | if status < 400: 60 | log_method = logging.info 61 | elif status < 500: 62 | log_method = logging.warning 63 | else: 64 | log_method = logging.error 65 | log_method(msg) 66 | 67 | 68 | def shutdown(ioloop, server): 69 | ''' 关闭server 70 | 71 | :param server: tornado.httpserver.HTTPServer 72 | ''' 73 | logging.info( 74 | "HTTP interpreter service will shutdown in %ss...", 1) 75 | server.stop() 76 | 77 | deadline = time.time() + 1 78 | 79 | def stop_loop(): 80 | ''' 尝试关闭loop 81 | ''' 82 | now = time.time() 83 | if now < deadline and (ioloop._callbacks or ioloop._timeouts): 84 | ioloop.add_timeout(now + 1, stop_loop) 85 | else: 86 | # 处理完现有的 callback 和 timeout 后 87 | ioloop.stop() 88 | logging.info('Shutdown!') 89 | 90 | stop_loop() 91 | 92 | 93 | def main(): 94 | ''' main 函数 95 | ''' 96 | # 开启 search_engin_server 97 | ioloop = tornado.ioloop.IOLoop.instance() 98 | server = tornado.httpserver.HTTPServer(Application(), xheaders=True) 99 | server.listen(options.port) 100 | 101 | def sig_handler(sig, _): 102 | ''' 信号接收函数 103 | ''' 104 | logging.warn("Caught signal: %s", sig) 105 | shutdown(ioloop, server) 106 | 107 | signal.signal(signal.SIGTERM, sig_handler) 108 | signal.signal(signal.SIGINT, sig_handler) 109 | ioloop.start() 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | 115 | -------------------------------------------------------------------------------- /query_analysis/lib/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/12/10 10 | """ -------------------------------------------------------------------------------- /query_analysis/lib/scene.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/12/17 10 | """ 11 | import json 12 | 13 | import redis 14 | 15 | scene_redis = redis.Redis('127.0.0.1', '6579', 0, socket_timeout=2) 16 | 17 | class RobotScene(object): 18 | """ 19 | 管理机器人的场景 20 | """ 21 | def __init__(self): 22 | pass 23 | 24 | @staticmethod 25 | def get_scene(robot_code): 26 | """ 27 | 取得robot_code的当前场景与场景详情 28 | :param robot_code:机器人code 29 | :return: 30 | """ 31 | scene = scene_redis.hgetall(robot_code) 32 | return scene 33 | 34 | @staticmethod 35 | def set_scene_name(robot_code, scene_name): 36 | """ 37 | 设置机器人的场景 38 | :param robot_code: 机器人code 39 | :param scene_name: 场景名 40 | :return: 41 | """ 42 | scene_redis.hset(robot_code, 'name', scene_name) 43 | 44 | @staticmethod 45 | def get_scene_name(robot_code): 46 | """ 47 | 取得当前机器人的场景名 48 | :param robot_code: 机器人code 49 | :return: 50 | """ 51 | return scene_redis.hget(robot_code, 'name') 52 | 53 | @staticmethod 54 | def clear_scene_name(robot_code): 55 | """ 56 | 清空场景名 57 | :param robot_code: 机器码 58 | :return: 59 | """ 60 | scene_redis.delete(robot_code) 61 | 62 | @staticmethod 63 | def set_scene_kv(robot_code, key, value): 64 | """ 65 | 设置机器人的场景kv对 66 | :param robot_code: 机器人code 67 | :param key: key 68 | :param value: value 69 | :return: 70 | """ 71 | scene_redis.hset(robot_code, key, value) 72 | 73 | @staticmethod 74 | def get_scene_kv(robot_code, key): 75 | """ 76 | 取得机器人的场景指定KV对 77 | :param robot_code: 机器人code 78 | :param key: key 79 | :return: 80 | """ 81 | return scene_redis.hget(robot_code, key) 82 | 83 | -------------------------------------------------------------------------------- /query_analysis/lib/status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:管理机器人的状态 8 | 创 建 者:余菲 9 | 创建日期:16/10/23 10 | """ 11 | import json 12 | 13 | import redis 14 | 15 | status_redis = redis.Redis('127.0.0.1', '6549', 0, socket_timeout=2) 16 | 17 | class RobotStatus(object): 18 | """ 19 | 管理机器人的状态 20 | """ 21 | def __init__(self): 22 | pass 23 | 24 | @staticmethod 25 | def get_robot_status(robot_code): 26 | """ 27 | 取得robot_code的当前状态 28 | :param robot_code:机器人code 29 | :return: 30 | """ 31 | status = status_redis.lindex('status_{}'.format(robot_code), 0) 32 | return json.loads(status) if status else None 33 | 34 | @staticmethod 35 | def set_robot_status(robot_code, status_info): 36 | """ 37 | 设置robot的状态 38 | :param robot_code: 机器人code 39 | :param status_info: 状态信息 40 | :return: 41 | """ 42 | status_redis.lpush('status_{}'.format(robot_code), json.dumps(status_info)) 43 | 44 | @staticmethod 45 | def clear_robot_status(robot_code): 46 | """ 47 | 清空robot状态 48 | :param robot_code: 49 | :return: 50 | """ 51 | status_redis.delete('status_{}'.format(robot_code)) 52 | -------------------------------------------------------------------------------- /query_analysis/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/4 10 | """ 11 | import re 12 | import time 13 | 14 | import nlu.animal as animal 15 | import nlu.battery as battery 16 | import nlu.dance as dance 17 | import nlu.display as display 18 | import nlu.entertainment as entertainment 19 | import nlu.expand_instruction as expand_instruction 20 | import nlu.mode as mode 21 | import nlu.motion as motion 22 | import nlu.photo as photo 23 | import nlu.profile as profile 24 | import nlu.recognition as recognition 25 | import nlu.media.story as story 26 | import nlu.sight as sight 27 | import nlu.vehicle as vehichel 28 | import nlu.volume as volume 29 | import nlu.xiaoyi as xiaoyi 30 | import nlu.media.opera as opera 31 | import nlu.store_location as store_location 32 | from nlu import music 33 | from nlu import phone 34 | from nlu import trick 35 | from nlu import vehicle 36 | 37 | from nlu.nlu_framework import Nlu_Framework 38 | 39 | # Nlu_Framework.register(animal.Animal) 40 | # Nlu_Framework.register(battery.Battery) 41 | # Nlu_Framework.register(dance.Dance) 42 | # Nlu_Framework.register(display.Display) 43 | # Nlu_Framework.register(entertainment.Entertainment) 44 | # Nlu_Framework.register(photo.Photo) 45 | # Nlu_Framework.register(recognition.Recognition) 46 | # Nlu_Framework.register(xiaoyi.XiaoYi) 47 | # Nlu_Framework.register(story.Story) 48 | # Nlu_Framework.register(expand_instruction.ExtendInstruction) 49 | # Nlu_Framework.register(volume.Volume) 50 | # Nlu_Framework.register(mode.Mode) 51 | # Nlu_Framework.register(motion.Motion) 52 | # Nlu_Framework.register(profile.Profile) 53 | # Nlu_Framework.register(vehicle.Vehicle) 54 | # Nlu_Framework.register(sight.Sight) 55 | Nlu_Framework.register(music.Music) 56 | # Nlu_Framework.register(phone.Phone) 57 | # Nlu_Framework.register(trick.Trick) 58 | # Nlu_Framework.register(store_location.StoreLocation) 59 | 60 | # def load_from_file(file_name): 61 | # """ 62 | # 从文件中直接加载词典,只有词,没有属性 63 | # :param file_name: 64 | # :param group_name: 65 | # :return: 66 | # """ 67 | # with open(file_name) as f: 68 | # test_list = [line.strip().replace('\n', '') for line in f if line and not line.startswith('=')] 69 | # return test_list 70 | # 71 | # test_list = load_from_file('./test/test.csv') 72 | # 73 | # 74 | # 75 | # 76 | # import yaml 77 | # # list1 = [] 78 | # # for test_sentence in test_list: 79 | # # match_dict_list = Nlu_Framework.match(test_sentence) 80 | # # # print str(test_sentence) 81 | # # dict1 = {"input": test_sentence, 82 | # # "output": match_dict_list[0]} 83 | # # list1.append(dict1) 84 | # # # print force_utf8_new(match_dict_list[0]) 85 | # # 86 | # # 87 | # # list1 = force_utf8_new(list1) 88 | # # 89 | # # print yaml.safe_dump_all(list1, allow_unicode=True, encoding='utf-8') 90 | # f = open('./test/test.yaml') 91 | # input = yaml.safe_load_all(f) 92 | # for i in input: 93 | # print force_utf8_new(i) 94 | 95 | # print 'start' 96 | # a = time.time() 97 | # for i in range(1000): 98 | match_dict_list = Nlu_Framework.match('我要听刘德华的冰雨') 99 | print match_dict_list 100 | # b = time.time() 101 | # print a - b 102 | # print match_dict_list 103 | 104 | 105 | # a = re.match('(小忆我问你)(?P(.)+?)(放在|在)(哪里|什么地方|什么位置)', '小忆我问你手机放在哪里').groups() 106 | # for i in a: 107 | # print i -------------------------------------------------------------------------------- /query_analysis/makefile: -------------------------------------------------------------------------------- 1 | # query_analysis deploy 2 | 3 | GIT=git 4 | CTL=supervisorctl -s unix:///tmp/query_analysis.supervisor.sock 5 | PYTHON=python 6 | START_PORT=8700 7 | END_PORT=8700 8 | 9 | start: 10 | for i in {${START_PORT}..${END_PORT}}; do ${CTL} start 'web:service-'$${i}; done 11 | 12 | stop: 13 | for i in {${START_PORT}..${END_PORT}}; do ${CTL} stop 'web:service-'$${i}; done 14 | 15 | restart: 16 | for i in {${START_PORT}..${END_PORT}}; do ${CTL} restart 'web:service-'$${i}; done 17 | 18 | .PHONY: test 19 | test: 20 | ${PYTHON} ./test/unit_test.py 21 | 22 | env: 23 | ${GIT} pull 24 | 25 | -------------------------------------------------------------------------------- /query_analysis/nlu/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/5/21 10 | """ -------------------------------------------------------------------------------- /query_analysis/nlu/animal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:动物类词义 8 | 创 建 者:余菲 9 | 创建日期:16/5/21 10 | """ 11 | from dict.dict import animal_name, pronoun, stop_words, any_w, modals 12 | from nlu.rule import Rule 13 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 14 | 15 | 16 | class Animal(object): 17 | # 标识是animal领域 18 | service = 'animal' 19 | 20 | animal = animal_name.join_all 21 | pronoun = pronoun.join_all 22 | stop_words = stop_words.join_all 23 | modals = modals.join_all 24 | 25 | want = '(要|想|想要)' 26 | query1 = '(查|查询|看|显示)(一下)?' 27 | query2 = '(长|什么)' 28 | query3 = '(样|外表|形状)' 29 | query4 = '(显示|找|搜索|学习|展览|展示|展出|放|表演|播|陈现|陈放|放映|给|出|画|拿)(一下)?' 30 | 31 | roar = '(叫|叫声)' 32 | append = '(加|增加|添加|加上)' 33 | how = '(如何|怎么)' 34 | call = '(是|叫)' 35 | 36 | animal_type = '(虫|鸟)' 37 | animal_type = attach_name(animal_type, 'type') 38 | all_animal = o(animal, animal_type) 39 | 40 | # 老虎(进入动物场景后,说动物名才起作用) 41 | case_1 = attach_perperty(all_animal, {'operation': 'query', 'rule': 1}) 42 | rule_1 = Rule(case_1, {'status': 'animal'}) 43 | 44 | # 老虎长什么样 45 | case_2 = all_animal + any_w + '({})?'.format(query2) + any_w + query3 + any_w 46 | case_2 = attach_perperty(case_2, {'operation': 'query', 'rule': 2}) 47 | rule_2 = Rule(case_2) 48 | 49 | # 我现在想要看虫 50 | case_3 = e(pronoun) + e(stop_words) + want + any_w + query1 + any_w + animal_type + any_w 51 | case_3 = attach_perperty(case_3, {'operation': 'query', 'rule': 3}) 52 | rule_3 = Rule(case_3) 53 | 54 | # 给我显示个大老虎看看 55 | case_4 = e(modals) + e(pronoun) + query4 + any_w + all_animal + stop_words 56 | case_4 = attach_perperty(case_4, {'operation': 'query', 'rule': 4}) 57 | rule_4 = Rule(case_4) 58 | 59 | # 我要个大老虎看看 60 | case_5 = pronoun + want + any_w + all_animal + any_w 61 | case_5 = attach_perperty(case_5, {'operation': 'query', 'rule': 5}) 62 | rule_5 = Rule(case_5) 63 | 64 | # 什么是老虎 65 | case_6 = query2 + call + all_animal 66 | case_6 = attach_perperty(case_6, {'operation': 'query', 'rule': 6}) 67 | rule_6 = Rule(case_6) 68 | 69 | # 老虎怎么叫 70 | case_7 = any_w + all_animal + any_w + how + roar + any_w 71 | case_7 = attach_perperty(case_7, {'operation': 'roar', 'rule': 7}) 72 | rule_7 = Rule(case_7) 73 | 74 | # 再加一只老虎 75 | tag = range_tag(2, 'tag') 76 | case_8 = e(pronoun) + e(stop_words) + append + tag + all_animal + e(stop_words) 77 | case_8 = attach_perperty(case_8, {'operation': 'append', 'rule': 8}) 78 | rule_8 = Rule(case_8 + "{1,3}") 79 | 80 | 81 | -------------------------------------------------------------------------------- /query_analysis/nlu/battery.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:电量语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/19 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty 16 | 17 | class Battery(object): 18 | # 标识是battery领域 19 | service = 'battery' 20 | 21 | pronoun = pronoun.join_all 22 | modals = modals.join_all 23 | prep = prep.join_all 24 | degree = degree.join_all 25 | honorific = honorific.join_all 26 | interj = interj.join_all 27 | prefix_unsual = '(今天|现在)' 28 | auxiliary = auxiliary.join_all 29 | quantifier = quantifier.join_all 30 | numeral = numeral.join_all 31 | adjective = adjective.join_all 32 | 33 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 34 | postfix = o(auxiliary, prep, pronoun) 35 | infix = o(prep, pronoun, degree) 36 | 37 | prefix_0_5 = r(prefix, 0, 5) 38 | postfix_0_3 = r(postfix, 0, 3) 39 | 40 | query = '(查|查询|告诉我)(下)?' 41 | howmuch = '(多少|百分之多少|百分之几|几个|几格)' 42 | left = '(还)?(剩|剩余|剩下|有)' + e(howmuch) 43 | power = '(的)?(电量|电|电池)' 44 | 45 | # [剩余电量]语义解析:电量/查电量/告诉我电量/还剩多少电量/电量剩多少 46 | battery_case1 = prefix_0_5 + e(query) + left + power + postfix_0_3 # [查询] 剩余 电量 47 | battery_case2 = prefix_0_5 + e(query) + power + left + postfix_0_3 # [查询] 电量 剩余 48 | battery_case3 = e(query) + power # 电量 49 | 50 | battery_sentence = o(battery_case1, battery_case2, battery_case3) 51 | rule_battery_sentence = Rule(attach_perperty(r(battery_sentence, 1, 3), {'operation': 'get', 'rule': 1})) 52 | -------------------------------------------------------------------------------- /query_analysis/nlu/dance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:舞蹈语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/19 10 | """ 11 | 12 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 13 | auxiliary, quantifier, numeral, adjective, any_w 14 | from nlu.rule import Rule 15 | 16 | from utils.utils import o, r, e, attach_perperty, attach_name 17 | 18 | 19 | class Dance(object): 20 | # 标识是dance领域 21 | service = 'dance' 22 | 23 | pronoun = pronoun.join_all 24 | modals = modals.join_all 25 | prep = prep.join_all 26 | degree = degree.join_all 27 | honorific = honorific.join_all 28 | interj = interj.join_all 29 | prefix_unsual = '(今天|现在)' 30 | auxiliary = auxiliary.join_all 31 | quantifier = quantifier.join_all 32 | numeral = numeral.join_all 33 | adjective = adjective.join_all 34 | 35 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 36 | postfix = o(auxiliary, prep, pronoun) 37 | infix = o(prep, pronoun, degree) 38 | 39 | prefix_0_5 = r(prefix, 0, 5) 40 | postfix_0_3 = r(postfix, 0, 3) 41 | infix_0_2 = r(infix, 0, 2) 42 | 43 | # 修饰动作的副词 44 | hurry = '(快|快点|赶紧|赶快)' 45 | 46 | # 跳舞特征词,日志中有多个唱舞蹈的表述特此增加 47 | move = e(hurry) + '(跳|来|唱)' + e(numeral) +e(quantifier) 48 | dance = e('把|将') + '(舞|舞蹈|跳舞)' 49 | see = '(看|看看|观看|查看|瞧|瞅|瞄|观赏|欣赏)' + e(quantifier) 50 | postures = e(adjective) + '(舞姿|舞蹈)' 51 | 52 | # 舞蹈名称、类型、标签 53 | dance_name = '(杰克逊|小苹果|生日快乐)' 54 | 55 | dance_type1 = '机械舞' 56 | dance_type2 = '街舞' 57 | dance_type3 = '芭蕾舞' 58 | dance_type4 = '现代舞' 59 | dance_type5 = '国标舞' 60 | dance_type6 = '摩登舞' 61 | dance_type7 = '华尔兹' 62 | dance_type8 = '探戈' 63 | dance_type9 = '狐步' 64 | dance_type10 = '快步' 65 | dance_type11 = '维也纳华尔兹' 66 | dance_type12 = '拉丁舞' 67 | dance_type13 = '伦巴' 68 | dance_type14 = '恰恰恰' 69 | dance_type15 = '桑巴' 70 | dance_type16 = '斗牛' 71 | dance_type17 = '牛仔舞' 72 | dance_type18 = '民族舞' 73 | dance_type19 = '扇子舞' 74 | dance_type20 = '秧歌' 75 | dance_type21 = '手绢花' 76 | dance_type22 = '伞舞' 77 | dance_type23 = '孔雀舞' 78 | dance_type24 = '竹竿舞' 79 | dance_type25 = '儿童舞' 80 | dance_type26 = '丝带舞' 81 | dance_type27 = '踢踏舞' 82 | dance_type28 = '爵士舞' 83 | dance_type29 = '钢管舞' 84 | dance_type30 = '广场舞' 85 | 86 | dance_type1 = attach_perperty(dance_type1, {'type': 1}) 87 | dance_type2 = attach_perperty(dance_type2, {'type': 2}) 88 | dance_type3 = attach_perperty(dance_type3, {'type': 3}) 89 | dance_type4 = attach_perperty(dance_type4, {'type': 4}) 90 | dance_type5 = attach_perperty(dance_type5, {'type': 5}) 91 | dance_type6 = attach_perperty(dance_type6, {'type': 6}) 92 | dance_type7 = attach_perperty(dance_type7, {'type': 7}) 93 | dance_type8 = attach_perperty(dance_type8, {'type': 8}) 94 | dance_type9 = attach_perperty(dance_type9, {'type': 9}) 95 | dance_type10 = attach_perperty(dance_type10, {'type': 10}) 96 | dance_type11 = attach_perperty(dance_type11, {'type': 11}) 97 | dance_type12 = attach_perperty(dance_type12, {'type': 12}) 98 | dance_type13 = attach_perperty(dance_type13, {'type': 13}) 99 | dance_type14 = attach_perperty(dance_type14, {'type': 14}) 100 | dance_type15 = attach_perperty(dance_type15, {'type': 15}) 101 | dance_type16 = attach_perperty(dance_type16, {'type': 16}) 102 | dance_type17 = attach_perperty(dance_type17, {'type': 17}) 103 | dance_type18 = attach_perperty(dance_type18, {'type': 18}) 104 | dance_type19 = attach_perperty(dance_type19, {'type': 19}) 105 | dance_type20 = attach_perperty(dance_type20, {'type': 20}) 106 | dance_type21 = attach_perperty(dance_type21, {'type': 21}) 107 | dance_type22 = attach_perperty(dance_type22, {'type': 22}) 108 | dance_type23 = attach_perperty(dance_type23, {'type': 23}) 109 | dance_type24 = attach_perperty(dance_type24, {'type': 24}) 110 | dance_type25 = attach_perperty(dance_type25, {'type': 25}) 111 | dance_type26 = attach_perperty(dance_type26, {'type': 26}) 112 | dance_type27 = attach_perperty(dance_type27, {'type': 27}) 113 | dance_type28 = attach_perperty(dance_type28, {'type': 28}) 114 | dance_type29 = attach_perperty(dance_type29, {'type': 29}) 115 | dance_type30 = attach_perperty(dance_type30, {'type': 30}) 116 | 117 | dance_type = o(dance_type1, dance_type2, dance_type3, dance_type4, dance_type5, dance_type6, 118 | dance_type7, dance_type8, dance_type9, dance_type10, dance_type11, dance_type12, 119 | dance_type13, dance_type14, dance_type15, dance_type16, dance_type17, 120 | dance_type18, dance_type19, dance_type20, dance_type21, dance_type22, 121 | dance_type23, dance_type24, dance_type25, dance_type26, dance_type27, 122 | dance_type28, dance_type29, dance_type30) 123 | 124 | # 其他不好写规则的特征表达 125 | dance_unusual = '(来到舞蹈)' 126 | 127 | # 舞蹈语义解析 128 | dance_case1 = prefix_0_5 + move + dance + postfix_0_3 # 跳 舞 129 | rule_dance_case1 = Rule(attach_perperty(r(dance_case1, 1, 3), {'operation': 'action', 'rule': 1})) 130 | dance_case2 = prefix_0_5 + dance + move + postfix_0_3 # 舞 来 131 | rule_dance_case2 = Rule(attach_perperty(r(dance_case2, 1, 3), {'operation': 'action', 'rule': 2})) 132 | dance_case3 = prefix_0_5 + move + dance_name + '(的)?' + e(dance) + postfix_0_3 # 跳 小苹果 133 | rule_dance_case3 = Rule(attach_perperty(r(dance_case3, 1, 3), {'operation': 'action', 'rule': 3})) 134 | dance_case4 = prefix_0_5 + dance_name + '(的)?' + e(dance) + move + postfix_0_3 # 小苹果 来 135 | rule_dance_case4 = Rule(attach_perperty(r(dance_case4, 1, 3), {'operation': 'action', 'rule': 4})) 136 | dance_case5 = prefix_0_5 + move + dance_type + e(dance) + postfix_0_3 # 跳 机械舞 137 | rule_dance_case5 = Rule(attach_perperty(r(dance_case5, 1, 3), {'operation': 'action', 'rule': 5})) 138 | dance_case6 = prefix_0_5 + dance_type + e(dance) + move + postfix_0_3 # 机械舞 来 139 | rule_dance_case6 = Rule(attach_perperty(r(dance_case6, 1, 3), {'operation': 'action', 'rule': 6})) 140 | dance_case7 = prefix_0_5 + see + '(你(的)?)' + postures + postfix_0_3 # 看 你的 舞姿 141 | rule_dance_case7 = Rule(attach_perperty(r(dance_case7, 1, 3), {'operation': 'action', 'rule': 7})) 142 | dance_case8 = prefix_0_5 + dance + '(你)?' + modals + move + postfix_0_3 # 舞 会不会 跳 143 | rule_dance_case8 = Rule(attach_perperty(r(dance_case8, 1, 3), {'operation': 'action', 'rule': 8})) 144 | dance_case9 = postfix_0_3 + dance_unusual + postfix_0_3 # 特殊规则 145 | rule_dance_case9 = Rule(attach_perperty(r(dance_case9, 1, 3), {'operation': 'action', 'rule': 9})) 146 | 147 | # 跳舞场景 148 | dance_case10 = '(小忆)?你会跳舞吗' 149 | rule_dance_case10 = Rule(attach_perperty(dance_case10, {'operation': 'action', 'rule': 10, 'scene': 'dance', 'node': 'query'})) 150 | 151 | # 跳舞场景 152 | dance_case11 = '(小忆)?你会跳什么舞' 153 | rule_dance_case11 = Rule(attach_perperty(dance_case11, {'operation': 'action', 'rule': 11, 'scene': 'dance', 'node': 'get'})) 154 | -------------------------------------------------------------------------------- /query_analysis/nlu/display.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:屏幕显示 8 | 创 建 者:余菲 9 | 创建日期:16/6/10 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty 16 | 17 | 18 | class Display(object): 19 | # 标识是display领域 20 | service = 'display' 21 | 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | prep = prep.join_all 25 | degree = degree.join_all 26 | honorific = honorific.join_all 27 | interj = interj.join_all 28 | prefix_unsual = '(今天|现在)' 29 | auxiliary = auxiliary.join_all 30 | quantifier = quantifier.join_all 31 | numeral = numeral.join_all 32 | adjective = adjective.join_all 33 | adverb = adverb.join_all 34 | 35 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 36 | postfix = o(auxiliary, prep, pronoun) 37 | infix = o(prep, pronoun, degree) 38 | 39 | prefix_0_5 = r(prefix, 0, 5) 40 | postfix_0_3 = r(postfix, 0, 3) 41 | 42 | # 调整特征词 43 | change_up = "(增|加|升|扩)" + e(numeral) + e(quantifier) 44 | change_up = attach_perperty(change_up, {'direction': 'up'}) 45 | change_down = "(减|降|缩)" + e(numeral) + e(quantifier) 46 | change_down = attach_perperty(change_down, {'direction': 'down'}) 47 | change = "(调|调整|弄|搞|放|变|整)" 48 | 49 | # 方向-向上 50 | up = "(亮|大|高|强|重)" + e(numeral) + e(quantifier) 51 | up = attach_perperty(up, {'direction': 'up'}) 52 | light_up = "(亮|白)" + e(numeral) + e(quantifier) 53 | light_up = attach_perperty(light_up, {'direction': 'up'}) 54 | size_up = "(大|高|宽|胖)" + e(numeral) + e(quantifier) 55 | size_up = attach_perperty(size_up, {'direction': 'up'}) 56 | 57 | # 方向-向下 58 | down = "(暗|小|低|弱|轻|低沉)" + e(numeral) + e(quantifier) 59 | down = attach_perperty(down, {'direction': 'down'}) 60 | light_down = "(暗|黑)" + e(numeral) + e(quantifier) 61 | light_down = attach_perperty(light_down, {'direction': 'down'}) 62 | size_down = "(小|矮|窄|瘦)" + e(numeral) + e(quantifier) 63 | size_down = attach_perperty(size_down, {'direction': 'down'}) 64 | 65 | # 方向-最大 66 | maximum = e("(到|至)") + "(最大|最高|最强|最重|最响|最亮)" 67 | maximum = attach_perperty(maximum, {'direction': 'up', 'step': 'toend'}) 68 | light_maximum = e("(到|至)") + "(最亮)" 69 | light_maximum = attach_perperty(light_maximum, {'direction': 'up', 'step': 'toend'}) 70 | size_maximum = e("(到|至)")+ "(最大)" 71 | size_maximum = attach_perperty(size_maximum, {'direction': 'up', 'step': 'toend'}) 72 | 73 | # 方向-最小 74 | minimum = e("(到|至)") + "(最小|最低|最弱|最轻)" 75 | minimum = attach_perperty(minimum, {'direction': 'down', 'step': 'toend'}) 76 | light_minimum = e("(到|至)") + "(最暗)" 77 | light_minimum = attach_perperty(light_minimum, {'direction': 'down', 'step': 'toend'}) 78 | size_minimum = e("(到|至)") + "(最小)" 79 | size_minimum = attach_perperty(size_minimum, {'direction': 'down', 'step': 'toend'}) 80 | 81 | # 方向-汇总 82 | direction = o(up, down, maximum, minimum) # 强一点 83 | light_direction = o(light_up, light_down, light_maximum, light_minimum) # 亮一点 84 | size_direction = o(size_up, size_down, size_maximum, size_minimum) # 大一点 85 | 86 | # 调整+方向 87 | change_case1 = o(change_up, change_down) + e(degree) + e(direction) # 加[重] 88 | change_case2 = e(change) + e(degree) + e(degree) + direction # [调]大 89 | change_case3 = "(往|向|朝)" + e(degree) + e(degree) + direction + e(change) # 往大[调] 90 | change_direction = o(change_case1, change_case2, change_case3) 91 | 92 | # 调整+亮度方向 93 | change_light_case1 = e(o(change, change_up, change_down)) + e(degree) + e(degree) + light_direction # [调]暗 94 | change_light_case2 = "(往|向|朝)" + e(degree) + e(degree) + light_direction + e(o(change, change_up, change_down)) # 往暗[调] 95 | change_light_direction = o(change_light_case1, change_light_case2) 96 | 97 | # 调整+大小方向 98 | change_size_case1 = e(o(change, change_up, change_down)) + e(degree) + e(degree) + size_direction # [调]大 99 | change_size_case2 = "(往|向|朝)" + e(degree) + e(degree) + size_direction + e(o(change, change_up, change_down)) # 往暗[调] 100 | change_size_direction = o(change_size_case1, change_size_case2) 101 | 102 | # 屏幕调节特征词 103 | display = "(把|将)?" + "(显示|屏|屏幕|图像|图片|界面)" + e(numeral) + e(quantifier) 104 | light = "(把|将)?" + "(亮度)" + e(numeral) + e(quantifier) 105 | size = "(把|将)?" + "(大小|尺寸)" + e(numeral) + e(quantifier) 106 | 107 | # 亮度表达方式 108 | display_light1 = prefix_0_5 + r(display,0, 2) + light + e(infix) + change_direction + postfix_0_3 # [把屏幕] 亮度 调高 109 | display_light2 = prefix_0_5 + r(display,0, 2) + e(infix) + change_direction + light + postfix_0_3 # [把屏幕] 调高 亮度 110 | display_light3 = prefix_0_5 + r(display,0, 2) + e(infix) + change_light_direction + postfix_0_3 # [屏幕] 调亮 111 | display_light4 = prefix_0_5 + change_direction + r(display, 0, 2) + light + postfix_0_3 # //调高 [屏幕] 亮度 112 | display_light_sentence = o(display_light1, display_light2, display_light3, display_light4) 113 | display_light_sentence = attach_perperty(display_light_sentence, {'operation': 'light', 'rule': 1}) 114 | rule_display_light_sentence = Rule(r(display_light_sentence, 1, 3)) 115 | 116 | # 大小表达方式:屏幕再调大点;请你把屏幕调到最小可以吗;把屏幕调整更大一点; 117 | display_size1 = prefix_0_5 + r(display, 0, 2) + size + e(infix) + change_direction + postfix_0_3 # [把屏幕] 大小 调高 118 | display_size2 = prefix_0_5 + r(display, 0, 2) + e(infix) + change_direction + size + postfix_0_3 # [把屏幕] 调高 大小 119 | display_size3 = prefix_0_5 + r(display, 0, 2) + e(infix) + change_size_direction + postfix_0_3 # [屏幕] 调大 120 | display_size4 = prefix_0_5 + change_direction + r(display, 0, 2) + size + postfix_0_3 # 调高 [屏幕] 亮度 121 | display_size_sentence = o(display_size1, display_size2, display_size3, display_size4) 122 | display_size_sentence = attach_perperty(display_size_sentence, {'operation': 'size', 'rule': 2}) 123 | rule_display_size_sentence = Rule(r(display_size_sentence, 1, 3)) 124 | -------------------------------------------------------------------------------- /query_analysis/nlu/entertainment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:游戏语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/19 10 | """ 11 | 12 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 13 | auxiliary, quantifier, numeral, adjective, adverb, stop_words 14 | from nlu.rule import Rule 15 | 16 | from utils.utils import o, r, e, range_tag, attach_perperty, attach_name 17 | 18 | 19 | class Entertainment(object): 20 | # 标识是entertainment领域 21 | service = 'entertainment' 22 | 23 | pronoun = pronoun.join_all 24 | modals = modals.join_all 25 | prep = prep.join_all 26 | degree = degree.join_all 27 | honorific = honorific.join_all 28 | interj = interj.join_all 29 | prefix_unsual = '(今天|现在)' 30 | auxiliary = auxiliary.join_all 31 | quantifier = quantifier.join_all 32 | numeral = numeral.join_all 33 | adjective = adjective.join_all 34 | adverb = adverb.join_all 35 | stop_words = stop_words.join_all 36 | 37 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 38 | postfix = o(auxiliary, prep, pronoun) 39 | infix = o(prep, pronoun, degree) 40 | 41 | prefix_0_5 = r(prefix, 0, 5) 42 | postfix_0_3 = r(postfix, 0, 3) 43 | 44 | play = '(玩|玩儿|打开|启动)' 45 | game = '(的)?' + '(游戏|程序|软件)' 46 | name = range_tag(12, 'name') 47 | 48 | names_1 = '认知' 49 | names_1 = attach_perperty(names_1, {'id':1}) 50 | names_2 = '英语' 51 | names_2 = attach_perperty(names_2, {'id':2}) 52 | names = o(names_1, names_2) 53 | names = attach_name(names, 'name') 54 | 55 | study = '(学习)' 56 | 57 | # 游戏语义解析 58 | game_case1 = prefix_0_5 + play + e(name) + e(study) + game + postfix_0_3 # 玩 [xxx] 游戏 59 | rule_game_case1 = Rule(attach_perperty(r(game_case1, 1, 3), {'operation': 'start', 'rule': 1})) 60 | game_case2 = prefix_0_5 + play + names + e(study) + game + postfix_0_3 # 玩 认知 游戏 61 | rule_game_case2 = Rule(attach_perperty(r(game_case2, 1, 3), {'operation': 'start', 'rule': 2})) 62 | game_case3 = names + game # 认知 游戏 63 | rule_game_case3 = Rule(attach_perperty(r(game_case3, 1, 3), {'operation': 'start', 'rule': 3})) 64 | game_case4 = names + study # 英语 学习 65 | rule_game_case4 = Rule(attach_perperty(r(game_case4, 1, 3), {'operation': 'start', 'rule': 4})) 66 | -------------------------------------------------------------------------------- /query_analysis/nlu/expand_instruction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:扩展指令 8 | 创 建 者:余菲 9 | 创建日期:16/6/5 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb 13 | 14 | # 标识是animal领域 15 | from nlu.rule import Rule 16 | from utils.utils import o, r, e, attach_perperty 17 | 18 | 19 | class ExtendInstruction(object): 20 | service = 'extend_instruction' 21 | 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | prep = prep.join_all 25 | degree = degree.join_all 26 | honorific = honorific.join_all 27 | interj = interj.join_all 28 | prefix_unsual = '(今天|现在)' 29 | auxiliary = auxiliary.join_all 30 | quantifier = quantifier.join_all 31 | numeral = numeral.join_all 32 | adjective = adjective.join_all 33 | adverb = adverb.join_all 34 | 35 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 36 | postfix = o(auxiliary, prep, pronoun) 37 | infix = o(prep, pronoun, degree) 38 | 39 | # 被中止的指令 40 | play = '(播放|播|说话|说|唱歌|唱|歌唱|讲故事|讲|听歌|听故事|听|跳舞|跳)' 41 | 42 | # 中止特征词 43 | do_not = '(不要|不想|不准|不想要|不行|不|别)' 44 | stop = '(停一停|停一下|停止|暂停|停下|停|打住|闭嘴|安静|打断|shut up|stop)' 45 | get = '(知道|明白|理解|了解)' 46 | 47 | prefix_0_5 = r(prefix, 0, 5) 48 | postfix_0_3 = r(postfix, 0, 3) 49 | 50 | # 中止语义解析 51 | # 请你不要播放了;别说了; 52 | stop_case1 = prefix_0_5 + do_not + e(degree) + play + postfix_0_3 53 | # 请你不要播放了;别说了; 54 | stop_case2 = prefix_0_5 + stop + play + postfix_0_3 55 | # 麻烦你停一下好吗;停停停;停啊停啊停啊 56 | stop_case3 = prefix_0_5 + o(stop, get) + postfix_0_3 57 | stop_sentence = r(o(stop_case1, stop_case2, stop_case3), 1, 3) 58 | rule_stop_sentence = Rule(attach_perperty(stop_sentence, {'operation': 'stop', 'rule': 1})) 59 | 60 | # 是语义解析:绝对正确啊;相当可以啊;挺好;对对对; 61 | yes = '(对|好|可以|行|正确|那还用说)' 62 | yes_case1 = e(adjective) + yes + r(auxiliary, 0, 3) 63 | yes_sentence = r(yes_case1, 1, 3) 64 | rule_yes_sentence = Rule(attach_perperty(yes_sentence, {'operation': 'yes', 'rule': 2})) 65 | 66 | # 不是语义解析:完全不对;不行;不是很对啊;不行不行; 67 | not_str = '(不是|不|没|没有)' 68 | no_case1 = not_str + yes 69 | no_case2 = prefix_0_5 + not_str + e(degree) + yes + postfix_0_3 70 | no_sentence = r(o(no_case1, no_case2), 1, 3) 71 | rule_no_sentence = Rule(attach_perperty(no_sentence, {'operation': 'no', 'rule': 3})) 72 | 73 | # [不告诉你]语义解析:我不告诉你;不想告诉你;就不告诉你; 74 | tell_not = e(adverb) + "(不想|不)" 75 | tell = "(告诉)" 76 | wont_tell_case1 = prefix_0_5 + tell_not + e(degree) + tell + e(pronoun) + postfix_0_3 77 | rule_wont_tell_sentence = Rule(attach_perperty(wont_tell_case1, {'operation': 'wonttell', 'rule': 4})) 78 | -------------------------------------------------------------------------------- /query_analysis/nlu/media/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:17/2/25 10 | """ -------------------------------------------------------------------------------- /query_analysis/nlu/media/cartoon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:卡通 8 | 创 建 者:余菲 9 | 创建日期:17/2/25 10 | """ 11 | from nlu.rule import Rule 12 | from nlu.media.common import * 13 | from utils.utils import attach_perperty 14 | 15 | 16 | class Cartoon(object): 17 | """ 18 | 卡通语义 19 | """ 20 | # 标识是cartoon领域 21 | service = 'cartoon' 22 | 23 | show_obj = '(动画片|动画|卡通片|卡通|儿童动画)' 24 | 25 | # 我想听XX 26 | case_1 = e(me) + e(want) + e(again) + e(adverb) + see + e(tag) + e('的') + show_obj + any_w 27 | rule_1 = Rule(attach_perperty(case_1, {'operation': 'play', 'rule': 1})) 28 | 29 | # 请给我播放XX 30 | case_2 = e(ask) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + any_w 31 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'play', 'rule': 2})) 32 | 33 | # 小忆你可以给我播放XX吗 34 | case_3 = e(robot) + can + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 35 | rule_3 = Rule(attach_perperty(case_3, {'operation': 'play', 'rule': 3})) 36 | 37 | # 我能请你帮我打开XX吗 38 | case_4 = e(me) + can + e(ask) + e(robot) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 39 | rule_4 = Rule(attach_perperty(case_4, {'operation': 'play', 'rule': 4})) 40 | 41 | # 请你帮我打开XX 42 | case_5 = ask + robot + give + me + tell + e(tag) + e('的') + show_obj + e(stop_words) 43 | rule_5 = Rule(attach_perperty(case_5, {'operation': 'play', 'rule': 5})) 44 | -------------------------------------------------------------------------------- /query_analysis/nlu/media/comic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:相声语义 8 | 创 建 者:余菲 9 | 创建日期:17/2/10 10 | """ 11 | from nlu.rule import Rule 12 | from nlu.media.common import * 13 | from utils.utils import attach_perperty 14 | 15 | 16 | class Comic(object): 17 | """ 18 | 相声语义 19 | """ 20 | # 标识是mode领域 21 | service = 'comic' 22 | 23 | show_obj = '(相声|段子)' 24 | 25 | # 我想听XX 26 | case_1 = e(me) + e(want) + e(again) + e(adverb) + see + e(tag) + e('的') + show_obj + any_w 27 | rule_1 = Rule(attach_perperty(case_1, {'operation': 'play', 'rule': 1})) 28 | 29 | # 请给我播放XX 30 | case_2 = e(ask) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + any_w 31 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'play', 'rule': 2})) 32 | 33 | # 小忆你可以给我播放相声吗 34 | case_3 = e(robot) + can + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 35 | rule_3 = Rule(attach_perperty(case_3, {'operation': 'play', 'rule': 3})) 36 | 37 | # 我能请你帮我打开相声吗 38 | case_4 = e(me) + can + e(ask) + e(robot) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 39 | rule_4 = Rule(attach_perperty(case_4, {'operation': 'play', 'rule': 4})) 40 | 41 | # 请你帮我打开相声 42 | case_5 = ask + robot + give + me + tell + e(tag) + e('的') + show_obj + e(stop_words) 43 | rule_5 = Rule(attach_perperty(case_5, {'operation': 'play', 'rule': 5})) 44 | -------------------------------------------------------------------------------- /query_analysis/nlu/media/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:播放类的语义父模块(相声,故事,戏剧) 8 | 创 建 者:余菲 9 | 创建日期:17/2/25 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual, any_w, stop_words 13 | 14 | from utils.utils import o, r, e, range_tag 15 | 16 | pronoun = pronoun.join_all 17 | modals = modals.join_all 18 | prep = prep.join_all 19 | degree = degree.join_all 20 | honorific = honorific.join_all 21 | interj = interj.join_all 22 | prefix_unsual = prefix_unsual.join_all 23 | auxiliary = auxiliary.join_all 24 | quantifier = quantifier.join_all 25 | numeral = numeral.join_all 26 | adjective = adjective.join_all 27 | adverb = adverb.join_all 28 | stop_words = stop_words.join_all 29 | want = '(要|想要|想|需要)' 30 | 31 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 32 | postfix = o(auxiliary, prep, pronoun) 33 | infix = o(prep, pronoun, degree) 34 | 35 | prefix_0_5 = r(prefix, 0, 5) 36 | postfix_0_3 = r(postfix, 0, 3) 37 | 38 | # 语义意图,仅支持一种,播放 39 | see = '(听|看)' + e('一') + e('个') 40 | tell = '(讲|说|播放|播|来|看|打开)' + e('一') + e('个|段') 41 | 42 | # 不能明确理解语义的限定词,统一归类到TAG 43 | tag = range_tag(12, 'tag') 44 | 45 | # robot 46 | robot = '(你|机器人|小忆)' 47 | 48 | # me 49 | me = '(我|我们|咱|咱们|俺)' 50 | 51 | # 再,又 52 | again = '(再|又|多)' 53 | 54 | # ask 55 | ask = '(请|让|叫|要|要求|麻烦)' 56 | 57 | # give 58 | give = '(给|对|帮忙|帮助|帮|为)' 59 | 60 | # can 61 | can = '(能|可以)' 62 | 63 | # 你讲 64 | you_say = robot + tell 65 | -------------------------------------------------------------------------------- /query_analysis/nlu/media/opera.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:戏曲语义 8 | 创 建 者:余菲 9 | 创建日期:17/2/11 10 | """ 11 | from dict.dict import opera_name 12 | from nlu.rule import Rule 13 | from nlu.media.common import * 14 | from utils.utils import attach_perperty 15 | 16 | 17 | class Opera(object): 18 | """ 19 | 戏曲语义 20 | """ 21 | # 标识是mode领域 22 | service = 'opera' 23 | 24 | # 戏曲的说法 25 | show_obj = opera_name.join_all 26 | 27 | # 我想听XX 28 | case_1 = e(me) + e(want) + e(again) + e(adverb) + see + e(tag) + e('的') + show_obj + any_w 29 | rule_1 = Rule(attach_perperty(case_1, {'operation': 'play', 'rule': 1})) 30 | 31 | # 请给我播放XX 32 | case_2 = e(ask) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + any_w 33 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'play', 'rule': 2})) 34 | 35 | # 小忆你可以给我播放戏曲吗 36 | case_3 = e(robot) + can + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 37 | rule_3 = Rule(attach_perperty(case_3, {'operation': 'play', 'rule': 3})) 38 | 39 | # 我能请你帮我打开戏曲吗 40 | case_4 = e(me) + can + e(ask) + e(robot) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 41 | rule_4 = Rule(attach_perperty(case_4, {'operation': 'play', 'rule': 4})) 42 | 43 | # 请你帮我打开戏曲 44 | case_5 = ask + robot + give + me + tell + e(tag) + e('的') + show_obj + e(stop_words) 45 | rule_5 = Rule(attach_perperty(case_5, {'operation': 'play', 'rule': 5})) 46 | -------------------------------------------------------------------------------- /query_analysis/nlu/media/picture_book.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:绘本语义 8 | 创 建 者:余菲 9 | 创建日期:17/2/10 10 | """ 11 | from nlu.rule import Rule 12 | from nlu.media.common import * 13 | from utils.utils import attach_perperty 14 | 15 | 16 | class PictureBook(object): 17 | """ 18 | 绘本语义 19 | """ 20 | # 标识是mode领域 21 | service = 'picturebook' 22 | 23 | # 绘本的说法 24 | show_obj = '(绘本|会呗|会吧|快本|会呢|会的|会吧|会吗)' 25 | 26 | # 我想听XX 27 | case_1 = e(me) + e(want) + e(again) + e(adverb) + see + e(tag) + e('的') + show_obj + any_w 28 | rule_1 = Rule(attach_perperty(case_1, {'operation': 'play', 'rule': 1})) 29 | 30 | # 请给我播放XX 31 | case_2 = e(ask) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + any_w 32 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'play', 'rule': 2})) 33 | 34 | # 小忆你可以给我播放绘本吗 35 | case_3 = e(robot) + can + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 36 | rule_3 = Rule(attach_perperty(case_3, {'operation': 'play', 'rule': 3})) 37 | 38 | # 我能请你帮我打开绘本吗 39 | case_4 = e(me) + can + e(ask) + e(robot) + e(give) + e(me) + tell + e(tag) + e('的') + show_obj + e(stop_words) 40 | rule_4 = Rule(attach_perperty(case_4, {'operation': 'play', 'rule': 4})) 41 | 42 | # 请你帮我打开绘本 43 | case_5 = ask + robot + give + me + tell + e(tag) + e('的') + show_obj + e(stop_words) 44 | rule_5 = Rule(attach_perperty(case_5, {'operation': 'play', 'rule': 5})) 45 | -------------------------------------------------------------------------------- /query_analysis/nlu/media/story.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:故事类语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/5 10 | """ 11 | from dict.dict import story_name, pronoun, adverb, modals, stop_words, honorific, any_w 12 | from nlu.rule import Rule 13 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 14 | 15 | 16 | class Story(object): 17 | # 标识是story领域 18 | service = 'story' 19 | 20 | story_name = story_name.join_all 21 | stop_words = stop_words.join_all 22 | pronoun = pronoun.join_all 23 | can_words = modals.join_all 24 | adverb = adverb.join_all 25 | ask = honorific.join_all 26 | want = '(要|想要|想|需要)' 27 | 28 | # 语义意图,仅支持一种,播放 29 | listen = '(听|看)(一)?(个)?' 30 | tell = '(讲|说|播|播放|来|看)(一)?(个|段)?' 31 | 32 | # 故事分类 33 | story_type = '(童话|寓言|神话|传奇|成语|睡前|名人|益智|历史|民间|爱国|人物|动物|儿童小说|战争)' 34 | story_type = attach_name(story_type, 'genre') 35 | 36 | # 不故道名字的故事 37 | unknow_story_name = range_tag(6, 'unknow_story_name') + "的" 38 | 39 | # 不能明确理解语义的限定词,统一归类到TAG 40 | tag = range_tag(12, 'tag') 41 | 42 | # 故事的感觉 43 | sence = '(幽默|讽刺)' 44 | sence = attach_name(sence, 'sence') 45 | 46 | # 故事适合的听众 47 | audience = '(儿童|3岁|4岁|5岁|6岁|7岁|8岁)' 48 | audience = attach_name(audience, 'audience') 49 | 50 | # 故事的说法,包含这些词的会被归类到故事语义 51 | story = '(故事|寓言|童话)' 52 | 53 | # 直接说故事名称 54 | title_case = o(listen, tell) + story_name + stop_words; 55 | 56 | # robot 57 | robot = '(你|机器人|小忆)' 58 | 59 | # me 60 | me = '(我|我们|咱|咱们|俺)' 61 | 62 | # 再,又 63 | again = '(再|又|多)' 64 | 65 | # 个 66 | an = e('个|一个') 67 | 68 | # 你讲 69 | you_say = robot + tell 70 | 71 | # 听故事 72 | case_1 = e(me) + e(want) + e(again) + e(adverb) + listen + e(you_say) \ 73 | + o(story, story_name) + e(stop_words) 74 | case_2 = e(me) + e(want) + e(again) + e(adverb) + listen + e(you_say) \ 75 | + o(story_type, sence, audience, story_name) + e('的') + story + any_w 76 | case_3 = e(me) + e(want) + e(again) + e(adverb) + listen + e(you_say) \ 77 | + tag + e('的') + story_name + any_w 78 | 79 | # 故事我要听 80 | case_4 = story_name + me + e(want) + listen 81 | 82 | # 我能请你给我讲故事 83 | case_5 = e(me) + e(can_words) + e(ask) + e(robot) + e(adverb) + e('来') \ 84 | + e('给') + e(me) + e(again) + tell + an \ 85 | + o(story, story_name) + any_w 86 | 87 | case_6 = e(me) + e(can_words) + e(ask) + e(robot) + e(adverb) + e('来') \ 88 | + e('给') + e(me) + e(again) + tell + an \ 89 | + any_w + o(story_type, sence, audience, story_name, unknow_story_name) + e('的') \ 90 | + story + any_w 91 | 92 | # 你会讲小红帽的故事吗 93 | case_8 = e(robot) + e(can_words) + e(adverb) + e('给') + e(me) + e(again) \ 94 | + tell + an + e(o(story_type, sence, audience, story_name, unknow_story_name)) \ 95 | + e('的') + story + any_w 96 | 97 | # 小红帽的故事你会讲吗 98 | case_9 = story_name + e('的') + e(story) + e(robot) + e(can_words) + e(tell) + e(stop_words) 99 | 100 | # 小红帽的故事给我讲吧 101 | case_10 = story_name + e('的') + e(story) + e(ask) + e('为|给') + e(me) \ 102 | + tell + e(stop_words) 103 | 104 | # 那讲个故事吧 105 | case_11 = e(can_words) + e('请') + tell + '(个|一个)' \ 106 | + o(story_type, sence, audience, story_name) + any_w 107 | 108 | # 不好听 换个故事 109 | case_12 = any_w + e(robot) + e(adverb) + '(换|变|再找)' \ 110 | + an + e(tag) + e(story_type) + e('的') + story + any_w 111 | 112 | rule_1 = Rule(attach_perperty(case_1, {'operation': 'play', 'rule': 1})) 113 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'play', 'rule': 2})) 114 | rule_3 = Rule(attach_perperty(case_3, {'operation': 'play', 'rule': 3})) 115 | rule_4 = Rule(attach_perperty(case_4, {'operation': 'play', 'rule': 4})) 116 | rule_5 = Rule(attach_perperty(case_5, {'operation': 'play', 'rule': 5})) 117 | rule_6 = Rule(attach_perperty(case_6, {'operation': 'play', 'rule': 6})) 118 | rule_8 = Rule(attach_perperty(case_8, {'operation': 'play', 'rule': 8})) 119 | rule_9 = Rule(attach_perperty(case_9, {'operation': 'play', 'rule': 9})) 120 | rule_10 = Rule(attach_perperty(case_10, {'operation': 'play', 'rule': 10})) 121 | rule_11 = Rule(attach_perperty(case_11, {'operation': 'play', 'rule': 11})) 122 | rule_12 = Rule(attach_perperty(case_12, {'operation': 'play', 'rule': 12})) 123 | -------------------------------------------------------------------------------- /query_analysis/nlu/mode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:运行模式 8 | 创 建 者:余菲 9 | 创建日期:16/6/11 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty 16 | 17 | class Mode(object): 18 | # 标识是mode领域 19 | service = 'mode' 20 | 21 | pronoun = pronoun.join_all 22 | modals = modals.join_all 23 | prep = prep.join_all 24 | degree = degree.join_all 25 | honorific = honorific.join_all 26 | interj = interj.join_all 27 | prefix_unsual = prefix_unsual.join_all 28 | auxiliary = auxiliary.join_all 29 | quantifier = quantifier.join_all 30 | numeral = numeral.join_all 31 | adjective = adjective.join_all 32 | adverb = adverb.join_all 33 | 34 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 35 | postfix = o(auxiliary, prep, pronoun) 36 | infix = o(prep, pronoun, degree) 37 | 38 | prefix_0_5 = r(prefix, 0, 5) 39 | postfix_0_3 = r(postfix, 0, 3) 40 | 41 | # 动作 42 | action_enter = "(进入|进|启动|开启)" + e(numeral) + e(quantifier) 43 | action_enter = attach_perperty(action_enter, {'operation': 'enter'}) 44 | action_quit = "(退出|退)" + e(numeral) + e(quantifier) 45 | action_quit = attach_perperty(action_quit, {'operation': 'quit'}) 46 | action = o(action_enter, action_quit) 47 | 48 | # 模式 49 | mode_show = "(展会|展览|参展|展厅|展示)" 50 | mode_show = attach_perperty(mode_show, {'name': 'show'}) 51 | mode_dance = "(跳舞|舞蹈)" 52 | mode_dance = attach_perperty(mode_dance, {'name': 'dance'}) 53 | mode_emotion = "(表情|舞蹈)" 54 | mode_emotion = attach_perperty(mode_emotion, {'name': 'emotion'}) 55 | mode_attract = "(招揽|招来|招徕|揽客)" 56 | mode_attract = attach_perperty(mode_attract, {'name': 'attract'}) 57 | mode_term = '(运行|运新|运型)?' + '(模式|方式|形式|样式|招式|状态|情景|表演)' 58 | action_mode = '(把|将)?' + o(mode_show, mode_dance, mode_emotion, mode_attract) + mode_term 59 | 60 | # 查询 61 | query = '(查询|查|显示)' + e(numeral) + e(quantifier) 62 | current = '(当前|现在|目前|眼下)' + e('的') 63 | what = e('是') + '(什么|哪个|多少|怎样|如何|什么样)' 64 | 65 | # 进入退出表达方式 66 | action_case1 = prefix_0_5 + action + action_mode + postfix_0_3 # 启动 展会模式 67 | action_case2 = prefix_0_5 + action_mode + action + postfix_0_3 # 展会模式 开启 68 | action_case3 = prefix_0_5 + mode_term + action_quit + postfix_0_3 # 运行模式 退出 69 | action_sentence = o(action_case1, action_case2, action_case3) 70 | rule_action_sentence = Rule(attach_perperty(action_sentence, {'rule': '1'})) 71 | 72 | # 查询表达方式 73 | get_case1 = prefix_0_5 + query + e(current) + mode_term + postfix_0_3 # 显示 [当前] 模式 74 | get_case2 = prefix_0_5 + current + what + mode_term + postfix_0_3 # 现在是哪个运行模式 75 | get_case3 = prefix_0_5 + current + mode_term + what + postfix_0_3 # 现在的运行模式是哪个 76 | get_sentence = o(get_case1, get_case2, get_case3) 77 | rule_get_sentence = Rule(attach_perperty(get_sentence, {'attribute': 'name', 'operation': 'get', 'rule': '2'})) 78 | -------------------------------------------------------------------------------- /query_analysis/nlu/motion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:动作语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/11 10 | """ 11 | from dict.dict import pronoun, stop_words, any_w, modals 12 | from nlu.rule import Rule 13 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 14 | 15 | class Motion(object): 16 | # 标识是motion领域 17 | service = 'motion' 18 | 19 | pronoun = pronoun.join_all 20 | stop_words = stop_words.join_all 21 | modals = modals.join_all 22 | 23 | robot = '(你|机器人|小忆)' 24 | ask = '(请|让|要|要求|麻烦)' 25 | word1 = '(将|把)' 26 | 27 | up = '(抬头|抬起头|看|抬起来)' 28 | up = attach_perperty(up, {'code': 'lookUp', 'emotion': 12}) 29 | down = '(低头|低下头|低个头|向下|低下去)' 30 | down = attach_perperty(down, {'code': 'lookDown', 'emotion': 10}) 31 | shake = '(摇头|摇下头|摇个头|转头)' 32 | shake = attach_perperty(shake, {'code': 'shakeHead', 'emotion': 8}) 33 | nod = '(点(下|个)?头)' 34 | nod = attach_perperty(nod, {'code': 'nod', 'emotion': 21}) 35 | twist = '(扭下|扭个|动下|动个)' 36 | twist = attach_perperty(twist, {'code': 'twist', 'emotion': 12}) 37 | stand = "站(直|好|稳|住|定)" 38 | stand = attach_perperty(stand, {'code': 'stand', 'emotion': 12}) 39 | 40 | run = '跑{1,2}' 41 | run = attach_perperty(run, {'operation': 'run'}) 42 | fly = '飞{1,2}' 43 | fly = attach_perperty(fly, {'operation': 'fly'}) 44 | jump = '跳{1,2}' 45 | jump = attach_perperty(jump, {'operation': 'jump'}) 46 | swim = '游{1,2}' 47 | swim = attach_perperty(swim, {'operation': 'swim'}) 48 | turn = '转{1,2}' 49 | turn = attach_perperty(turn, {'operation': 'turn'}) 50 | 51 | move = o(run, fly, jump, swim, turn) 52 | 53 | fast = '(全速|快点|快)' 54 | fast = attach_perperty(fast, {'speed': 'fast'}) 55 | slow = '(慢速|慢点|缓慢|慢)' 56 | slow = attach_perperty(slow, {'speed': 'slow'}) 57 | speed = o(fast, slow) 58 | 59 | will = '(向|朝)' 60 | 61 | dirction_1 = '上' 62 | dirction_1 = attach_perperty(dirction_1, {'direction': 'up'}) 63 | dirction_2 = '下' 64 | dirction_2 = attach_perperty(dirction_2, {'direction': 'down'}) 65 | dirction_3 = '前' 66 | dirction_3 = attach_perperty(dirction_3, {'direction': 'forth'}) 67 | dirction_4 = '后' 68 | dirction_4 = attach_perperty(dirction_4, {'direction': 'back'}) 69 | dirction_5 = '左' 70 | dirction_5 = attach_perperty(dirction_5, {'direction': 'left'}) 71 | dirction_6 = '右' 72 | dirction_6 = attach_perperty(dirction_6, {'direction': 'right'}) 73 | dirction = o(dirction_1, dirction_2, dirction_3, dirction_4, dirction_5, dirction_6) 74 | dirction = e(will) + dirction 75 | 76 | # 快向上看 77 | action_sentence1 = e(robot) + e(speed) + e(dirction) + o(up, down, shake, nod, twist) + e(stop_words) 78 | action_sentence1 = attach_perperty(action_sentence1, {'operation': 'action', 'rule': 1}) 79 | 80 | # 请将头抬起来 81 | action_sentence2 = e(robot) + e(ask) + e(robot) + e(word1) + e('头') + o(up, down, shake, nod, twist) + e(stop_words) 82 | action_sentence2 = attach_perperty(action_sentence2, {'operation': 'action', 'rule': 2}) 83 | 84 | # 快X呀 85 | move_sentence1 = e(speed) + move + e(stop_words) 86 | move_sentence1 = attach_perperty(move_sentence1, {'rule': 3}) 87 | 88 | # 你X 89 | move_sentence2 = e(modals) + e(pronoun) + move + e(stop_words) 90 | move_sentence2 = attach_perperty(move_sentence2, {'rule': 4}) 91 | 92 | # 快点向左转 93 | action = o(run, fly, jump, swim, turn) 94 | action_sentence3 = e(ask) + e(robot) + e(speed) + e(will) + e(stop_words) + dirction + e(stop_words) + action + e(stop_words) 95 | action_sentence3 = attach_perperty(action_sentence3, {'rule': 5}) 96 | 97 | # 转快点 98 | action_sentence4 = e(ask) + e(robot) + action + e(speed) + e(stop_words) 99 | action_sentence4 = attach_perperty(action_sentence4, {'rule': 6}) 100 | 101 | # 站直了 102 | action_sentence5 = e(ask) + e(robot) + stand + e(speed) + e(stop_words) 103 | action_sentence5 = attach_perperty(action_sentence5, {'rule': 7}) 104 | 105 | rule_case1 = Rule(o(action_sentence1, action_sentence2)) 106 | rule_case2 = Rule(action_sentence3) 107 | rule_case3 = Rule(action_sentence4) 108 | rule_case4 = Rule(o(move_sentence1, move_sentence2)) 109 | rule_case5 = Rule(action_sentence5) 110 | 111 | -------------------------------------------------------------------------------- /query_analysis/nlu/music.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:音乐语义 8 | 创 建 者:余菲 9 | 创建日期:16/9/24 10 | """ 11 | 12 | from dict.dict import story_name, pronoun, adverb, modals, stop_words, honorific, any_w 13 | from nlu.rule import Rule 14 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 15 | 16 | class Music(object): 17 | # 标识是music领域 18 | service = 'music' 19 | 20 | story_name = story_name.join_all 21 | stop_words = stop_words.join_all 22 | pronoun = pronoun.join_all 23 | can_words = modals.join_all 24 | adverb = adverb.join_all 25 | ask = honorific.join_all 26 | give = '(给)' 27 | want = '(要|想要|想|需要)' 28 | 29 | # 语义意图,仅支持一种,播放 30 | listen = '(听|来)(一)?(首)?' 31 | sing = '(唱|播放|播|来|看|放|听)' 32 | 33 | # 音乐人名 34 | artist = range_tag(4, 'artist') 35 | 36 | # 音乐名 37 | music_name = range_tag(8, 'song') 38 | 39 | # 音乐的说法 40 | music = '(音乐|儿歌|歌曲|歌)' 41 | 42 | # robot 43 | robot = '(你|机器人|小忆)' 44 | 45 | # me 46 | me = '(我|我们|咱|咱们|俺)' 47 | 48 | # 再,又 49 | again = '(再|又|多)' 50 | 51 | # 首 52 | an = e('((一)?(首|个))') 53 | 54 | # 你唱 55 | you_sing = robot + sing 56 | 57 | # 听音乐 58 | case_1 = me + e(want) + listen + e(you_sing) \ 59 | + o(an) + o(music, music_name) + e(stop_words) 60 | case_2 = me + e(want) + listen + e(you_sing) + e(artist) + e('的') + o(music, music_name) 61 | case_3 = sing + music_name 62 | 63 | # (给我)唱首xxx的歌 64 | case_4 = e(give) + e(me) + sing + an + artist + '的歌' 65 | 66 | rule_1 = Rule(attach_perperty(case_1, {'operation': 'play', 'rule': 1})) 67 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'play', 'rule': 2})) 68 | rule_3 = Rule(attach_perperty(case_3, {'operation': 'play', 'rule': 3})) 69 | rule_4 = Rule(attach_perperty(case_4, {'operation': 'play', 'rule': 4})) 70 | -------------------------------------------------------------------------------- /query_analysis/nlu/nlu_framework.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:NLU的框架类,用于支持多个领域的对象进行过滤 8 | 创 建 者:余菲 9 | 创建日期:16/6/4 10 | """ 11 | from collections import defaultdict 12 | import regex as re 13 | 14 | # import re2 as re 15 | from utils.utils import get_attach_perperty 16 | from rule import Rule 17 | 18 | 19 | class Nlu_Framework(object): 20 | """ 21 | NLU的框架类 22 | """ 23 | service_map = defaultdict(list) 24 | 25 | word_dict_map = {} 26 | 27 | duplicate_key_re = re.compile(r"<([^>]*)>") 28 | 29 | @staticmethod 30 | def register(service_module): 31 | """ 32 | 把这个领域对象注册到NLU_框架里去 33 | :param service_module: 34 | :return: 35 | """ 36 | Nlu_Framework.service_map[service_module.service] = \ 37 | Nlu_Framework.service_init(service_module) 38 | 39 | @staticmethod 40 | def register_dict(bussiness, word_dict): 41 | """ 42 | 在指定语义bussiness, 下注册词典用于抓取arid的这种属性,如老虎:1 43 | :param bussiness: 语义名 44 | :param word_dict: 词典对象 45 | :return: 46 | """ 47 | assert word_dict.group_name, "word_dict don't have group name" 48 | key = "{}_{}".format(bussiness, word_dict.group_name) 49 | Nlu_Framework.word_dict_map[key] = word_dict.property 50 | 51 | @staticmethod 52 | def service_init(service_module): 53 | """ 54 | 把service_module中所有Rule对象取出来,写入list 55 | :param service_module: 56 | :return: 多个re对象的list 57 | """ 58 | re_list = [] 59 | for property_name in dir(service_module): 60 | object_in_module = getattr(service_module, property_name) 61 | if not isinstance(object_in_module, (Rule, )): 62 | continue 63 | re_list.append(object_in_module) 64 | return re_list 65 | 66 | @staticmethod 67 | def on_status_filter(status): 68 | """ 69 | 按状态名进行过滤 70 | :param status: 状态名 71 | :return: 72 | """ 73 | temp_rule_map = defaultdict(list) 74 | for service_name, rule_list in Nlu_Framework.service_map.iteritems(): 75 | for rule in rule_list: 76 | if not rule.filters.get('status') == status: 77 | continue 78 | temp_rule_map[service_name].append(rule.rule) 79 | return temp_rule_map 80 | 81 | @staticmethod 82 | def match(query_string, re_filter={}, re_filter_func='on_status_filter'): 83 | """ 84 | 检查query_string,并按注册的service,去进行解析与分析,看是否能取得一个属性dict 85 | :param query_string: 用于查询的query_string(我要看老虎) 86 | :param re_filter: 当前状态过滤函数参数 87 | :param re_filter_func: 过滤函数名 88 | :return:返回这个字符串处理后得到的属性 89 | """ 90 | match_result_list = [] 91 | 92 | # 每次过滤时重新构造需要过滤的模板,用于以后加上倒排表 93 | if not re_filter: 94 | temp_rule_map = Nlu_Framework.service_map 95 | else: 96 | filter_func = getattr(Nlu_Framework, re_filter_func) 97 | temp_rule_map = filter_func(**re_filter) 98 | 99 | for k, v in temp_rule_map.items(): 100 | # 进入下一次的时候需要清空result_dict 101 | result_dict = {} 102 | for re_object in v: 103 | match_object = re_object.match(query_string) 104 | # 注意这里要用match_object.group(0),因为不用这个会抓前边部分数据 105 | if not (match_object and [m_value for m_key, m_value in match_object.groupdict().items() 106 | if m_value == query_string]): 107 | continue 108 | 109 | # 匹配上了 110 | result_dict = defaultdict(dict) 111 | for key, math_value in match_object.groupdict().items(): 112 | # 去掉用于去重复而添加的三下划线___, 注意这里不去单下划线 113 | key = key.replace("___", "") 114 | if '__' in key and math_value: 115 | result_dict.update(get_attach_perperty(key)) 116 | else: 117 | # 多个值(a|b)时,只有命中才添加 118 | if math_value: 119 | result_dict.update({key: math_value}) 120 | 121 | # 处理值的附加属性 122 | dict_attach_key = "{}_{}".format(k, key) 123 | if dict_attach_key in Nlu_Framework.word_dict_map: 124 | result_dict.update(Nlu_Framework.word_dict_map[dict_attach_key][math_value]) 125 | result_dict['service'] = k 126 | if result_dict: 127 | result_dict = Nlu_Framework._format_result(result_dict) 128 | match_result_list.append(result_dict) 129 | return match_result_list 130 | 131 | @staticmethod 132 | def _format_result(result_dict): 133 | """ 134 | 格式化输出,只有service与operation字段在外层,其它全部放到parameters字段中 135 | :param result_dict: 136 | :return: 137 | """ 138 | temp_dict = defaultdict(dict) 139 | for key, value in result_dict.iteritems(): 140 | if key in ('service', 'operation'): 141 | temp_dict[key] = value 142 | else: 143 | temp_dict['parameters'].update({key: value}) 144 | return temp_dict 145 | 146 | 147 | -------------------------------------------------------------------------------- /query_analysis/nlu/phone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:电话语义 8 | 创 建 者:余菲 9 | 创建日期:16/12/17 10 | """ 11 | from dict.dict import pronoun, adverb, modals, stop_words, honorific, any_w 12 | from nlu.rule import Rule 13 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 14 | 15 | 16 | class Phone(object): 17 | # 标识是phone领域 18 | service = 'phone' 19 | 20 | pronoun = pronoun.join_all 21 | can_words = modals.join_all 22 | adverb = adverb.join_all 23 | ask = honorific.join_all 24 | want = '(要|想要|想|需要)' 25 | 26 | # 语义意图,仅支持一种,打 27 | dial = '(打|挂|拨)(一)?(个)?' 28 | 29 | # 电话 30 | phone = '(电话)' 31 | 32 | # robot 33 | robot = '(你|机器人|小忆)' 34 | 35 | # me 36 | me = '(我|我们|咱|咱们|俺)' 37 | 38 | # 再,又 39 | again = '(再|又|多)' 40 | 41 | # 关系 42 | relation = '(爸爸|妈妈|爷爷|奶奶)' 43 | relation = attach_name(relation, 'relation') 44 | 45 | # 个 46 | an = '(个|一个)' 47 | 48 | # 给 49 | give = '(给)' 50 | 51 | # 我要打电话 52 | case_1 = e(robot) + e(me) + e(want) + e(again) + dial + e(an) + phone + e(stop_words) 53 | rule_1 = Rule(attach_perperty(case_1, {'scene': 'phone_call', 'operation': 'phone', 'rule': 1})) 54 | 55 | # 给爸爸打电话 56 | case_2 = give + relation + dial + phone 57 | rule_2 = Rule(attach_perperty(case_2, {'operation': 'phone', 'rule': 2})) 58 | -------------------------------------------------------------------------------- /query_analysis/nlu/photo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:照片语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/18 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, any_w 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty 16 | 17 | 18 | class Photo(object): 19 | # 标识是photo领域 20 | service = 'photo' 21 | 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | prep = prep.join_all 25 | degree = degree.join_all 26 | honorific = honorific.join_all 27 | interj = interj.join_all 28 | prefix_unsual = '(今天|现在)' 29 | auxiliary = auxiliary.join_all 30 | quantifier = quantifier.join_all 31 | numeral = numeral.join_all 32 | adjective = adjective.join_all 33 | 34 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 35 | postfix = o(auxiliary, prep, pronoun) 36 | infix = o(prep, pronoun, degree) 37 | 38 | prefix_0_5 = r(prefix, 0, 5) 39 | postfix_0_5 = r(postfix, 0, 5) 40 | 41 | # 拍特征词 42 | take = '(拍|照|合|来)' 43 | 44 | # 照片特征词 45 | photo = '(照|照片|相|相片|像|像片|影|合影)' 46 | 47 | # 打开特征词 48 | open = '(打开|开启|启动)' 49 | 50 | # 相机特征词 51 | camera = '(相机|照相机|摄像头)' 52 | 53 | # 不好写规则的特征表达 54 | take_photo = photo + photo 55 | 56 | # 拍照语义解析 57 | photo_case1 = prefix_0_5 + take + photo + postfix_0_5 # 拍照 58 | rule_photo_case1 = Rule(attach_perperty(r(photo_case1, 1, 3), {'operation': 'take', 'rule': 1})) 59 | 60 | photo_case2 = prefix_0_5 + '(把|将)?' + photo + take + postfix_0_5 # 照 拍 61 | rule_photo_case2 = Rule(attach_perperty(r(photo_case2, 1, 3), {'operation': 'take', 'rule': 2})) 62 | 63 | photo_case3 = prefix_0_5 + '(把|将)?' + photo + take + any_w + quantifier + postfix_0_5 # 拍 照; 64 | rule_photo_case3 = Rule(attach_perperty(r(photo_case3, 1, 3), {'operation': 'take', 'rule': 3})) 65 | 66 | photo_case4 = prefix_0_5 + take + any_w + quantifier + photo + postfix_0_5 # 拍 张 照; 67 | rule_photo_case4 = Rule(attach_perperty(r(photo_case4, 1, 3), {'operation': 'take', 'rule': 4})) 68 | 69 | photo_case5 = prefix_0_5 + '(拍|照)' + '(我|我们)' + postfix_0_5 # 拍我 70 | rule_photo_case5 = Rule(attach_perperty(r(photo_case5, 1, 3), {'operation': 'take', 'rule': 5})) 71 | 72 | photo_case6 = prefix_0_5 + open + camera + postfix_0_5 # 打开 相机 73 | rule_photo_case6 = Rule(attach_perperty(r(photo_case6, 1, 3), {'operation': 'take', 'rule': 6})) 74 | 75 | photo_case7 = prefix_0_5 + '(把|将)?' + camera + open + postfix_0_5 # 相机 打开 76 | rule_photo_case7 = Rule(attach_perperty(r(photo_case7, 1, 3), {'operation': 'take', 'rule': 7})) 77 | -------------------------------------------------------------------------------- /query_analysis/nlu/profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:个人信息 8 | 创 建 者:余菲 9 | 创建日期:16/6/11 10 | """ 11 | from dict.dict import pronoun, stop_words, any_w, modals 12 | from nlu.rule import Rule 13 | from utils.utils import attach_perperty, attach_name, o, e, r, range_tag, range_not_tag 14 | 15 | 16 | class Profile(object): 17 | # 标识是profile领域 18 | service = 'profile' 19 | 20 | pronoun = pronoun.join_all 21 | stop_words = stop_words.join_all 22 | modals = modals.join_all 23 | 24 | me = '(我|俺|咱)' 25 | me = attach_perperty(me, {'subject': 'user'}) 26 | your = '(你)' 27 | your = attach_perperty(your, {'subject': 'robot'}) 28 | person = o(me, your) 29 | 30 | ti_ch_0_3 = range_tag(3, start=0) 31 | 32 | what = '(什么|几|多少|啥)' 33 | what_name = '(什么|啥)' 34 | know = '(知道|清楚|明白|了解|知不知道|知道不知道|搞清楚|看|说)' 35 | name = '(名字|名称|名|称呼)' 36 | name = attach_perperty(name, {'attribute': 'name'}) 37 | 38 | age = '(年级|年龄|岁)' 39 | age = attach_perperty(age, {'attribute': 'age'}) 40 | 41 | man = '(男|男生|男人|帅哥|男同学)' 42 | man = attach_perperty(man, {'gender': 1}) 43 | 44 | woman = '(女|女生|女人|美女|女同学)' 45 | woman = attach_perperty(woman, {'gender': 2}) 46 | gender = o(man, woman) 47 | 48 | strange_relation = range_tag(4) 49 | strange_relation = attach_name(strange_relation, 'relationDesc') 50 | strange_relation = attach_perperty(strange_relation, {'relationid': 99, 'closeReletive': 'no'}) 51 | 52 | relationid_0 = '(小){0,1}主人' 53 | relationid_0 = attach_perperty(relationid_0, {'relationid': 0, 'closeReletive': 'yes'}) 54 | 55 | relationid_1 = '(爸爸|老豆|老爸)' 56 | relationid_1 = attach_perperty(relationid_1, {'relationid': 1, 'closeReletive': 'yes'}) 57 | 58 | relationid_2 = '(妈妈|老妈|妈)' 59 | relationid_2 = attach_perperty(relationid_2, {'relationid': 2, 'closeReletive': 'yes'}) 60 | 61 | relationid_3 = '(爷爷|祖父)' 62 | relationid_3 = attach_perperty(relationid_3, {'relationid': 3, 'closeReletive': 'yes'}) 63 | 64 | relationid_4 = '(奶奶|祖母)' 65 | relationid_4 = attach_perperty(relationid_4, {'relationid': 4, 'closeReletive': 'yes'}) 66 | 67 | relationid_5 = '(姥姥|外婆|外祖母)' 68 | relationid_5 = attach_perperty(relationid_5, {'relationid': 5, 'closeReletive': 'yes'}) 69 | 70 | relationid_6 = '(姥爷|外公|外祖父)' 71 | relationid_6 = attach_perperty(relationid_6, {'relationid': 6, 'closeReletive': 'yes'}) 72 | 73 | relationid_7 = '(哥哥|哥)' 74 | relationid_7 = attach_perperty(relationid_7, {'relationid': 7, 'closeReletive': 'yes'}) 75 | 76 | relationid_8 = '(弟弟|弟)' 77 | relationid_8 = attach_perperty(relationid_8, {'relationid': 8, 'closeReletive': 'yes'}) 78 | 79 | relationid_9 = '(姐姐|姐)' 80 | relationid_9 = attach_perperty(relationid_9, {'relationid': 9, 'closeReletive': 'yes'}) 81 | 82 | relationid_10 = '(妹妹|妹)' 83 | relationid_10 = attach_perperty(relationid_10, {'relationid': 10, 'closeReletive': 'yes'}) 84 | 85 | relationid_11 = '(伯伯|叔叔)' 86 | relationid_11 = attach_perperty(relationid_11, {'relationid': 11, 'closeReletive': 'no'}) 87 | 88 | relationid_12 = '(伯母|婶婶)' 89 | relationid_12 = attach_perperty(relationid_12, {'relationid': 12, 'closeReletive': 'no'}) 90 | 91 | relationid_13 = '(姑姑)' 92 | relationid_13 = attach_perperty(relationid_13, {'relationid': 13, 'closeReletive': 'no'}) 93 | 94 | relationid_14 = '(姑父|姑丈)' 95 | relationid_14 = attach_perperty(relationid_14, {'relationid': 14, 'closeReletive': 'no'}) 96 | 97 | relationid_15 = '(舅舅|舅公)' 98 | relationid_15 = attach_perperty(relationid_15, {'relationid': 15, 'closeReletive': 'no'}) 99 | 100 | relationid_16 = '(舅妈|舅母)' 101 | relationid_16 = attach_perperty(relationid_16, {'relationid': 16, 'closeReletive': 'no'}) 102 | 103 | relationid_17 = '(姨姨|姨|姨妈)' 104 | relationid_17 = attach_perperty(relationid_17, {'relationid': 17, 'closeReletive': 'no'}) 105 | 106 | relationid_18 = '(姨夫|姨丈)' 107 | relationid_18 = attach_perperty(relationid_18, {'relationid': 18, 'closeReletive': 'no'}) 108 | 109 | relationid_19 = '(堂哥|堂弟|表哥|表弟)' 110 | relationid_19 = attach_perperty(relationid_19, {'relationid': 19, 'closeReletive': 'no'}) 111 | 112 | relationid_20 = '(堂姐|堂妹|表姐|表妹)' 113 | relationid_20 = attach_perperty(relationid_20, {'relationid': 20, 'closeReletive': 'no'}) 114 | 115 | relation = o(relationid_0, relationid_1, relationid_2, relationid_3, relationid_4, relationid_5, 116 | relationid_6, relationid_7, relationid_8, relationid_9, relationid_10, relationid_11, 117 | relationid_11, relationid_12, relationid_13, relationid_14, relationid_15, relationid_16, 118 | relationid_17, relationid_18, relationid_19, relationid_20) 119 | relation = attach_name(relation, 'relationDesc') 120 | 121 | master = '(小主人|他|你)' 122 | slave = '(你|机器人|小忆)' 123 | 124 | real_name = range_tag(3, 'name') 125 | real_age = range_not_tag(2, what, 'age') 126 | 127 | # 我叫什么名字 128 | query_name1 = person + '(叫)' + what + name 129 | query_name1 = attach_perperty(query_name1, {'attribute': 'name', 'rule': 'queryName1'}) 130 | rule_query_name1 = Rule(attach_perperty(query_name1, {'operation': 'get'})) 131 | 132 | # 我几岁 133 | query_age1 = person + '(今年|现在)?' + what + '岁' 134 | query_age1 = attach_perperty(query_age1, {'attribute': 'age', 'rule': 'queryName1'}) 135 | rule_query_age1 = Rule(attach_perperty(query_age1, {'operation': 'get'})) 136 | 137 | # 你知道我的名字吗 138 | query_name2 = ti_ch_0_3 + slave + ti_ch_0_3 + know + person + ti_ch_0_3 + name + e(stop_words) 139 | query_name2 = attach_perperty(query_name2, {'attribute': 'name', 'rule': 'queryName2'}) 140 | rule_query_name2 = Rule(attach_perperty(query_name2, {'operation': 'get'})) 141 | 142 | # 你知道我叫什么吗 143 | query_name3 = ti_ch_0_3 + slave + ti_ch_0_3 + know + person + ti_ch_0_3 + '叫' + what_name + e(name) + e(stop_words) 144 | query_name3 = attach_perperty(query_name3, {'attribute': 'name', 'rule': 'queryName3'}) 145 | rule_query_name3 = Rule(attach_perperty(query_name3, {'operation': 'get'})) 146 | 147 | # 你几岁 148 | query_age2 = ti_ch_0_3 + slave + ti_ch_0_3 + what + age + e(stop_words) 149 | query_age2 = attach_perperty(query_age2, {'rule': 'query_age2'}) 150 | rule_query_age2 = Rule(attach_perperty(query_age2, {'operation': 'get'})) 151 | 152 | # 你多大 153 | query_age3 = ti_ch_0_3 + person + ti_ch_0_3 + '(多大)' + e('(年龄|年纪)') + e(stop_words) 154 | query_age3 = attach_perperty(query_age3, {'rule': 'query_age3', 'attribute': 'age'}) 155 | rule_query_age3 = Rule(attach_perperty(query_age3, {'operation': 'get'})) 156 | 157 | # 你看我多大了 158 | query_age4 = ti_ch_0_3 + slave + know + person + '(多大)' + e('(年龄|年纪)') + e(stop_words) 159 | query_age4 = attach_perperty(query_age4, {'rule': 'query_age4', 'attribute': 'age'}) 160 | rule_query_age4 = Rule(attach_perperty(query_age4, {'operation': 'get'})) 161 | 162 | # 你知道我是谁吗(我是谁) 163 | query_relation1 = ti_ch_0_3 + e(slave) + e(know) + person + ti_ch_0_3 + '(谁)' + e(stop_words) 164 | query_relation1 = attach_perperty(query_relation1, {'attribute': 'relation', 'rule': 'queryRelation1'}) 165 | rule_query_relation1 = Rule(attach_perperty(query_relation1, {'operation': 'get'})) 166 | 167 | # 你知道我的性别? 168 | query_gender1 = ti_ch_0_3 + slave + ti_ch_0_3 + know + person + ti_ch_0_3 + '(性别|男女|是男是女|男的女的)' + e(stop_words) 169 | query_gender1 = attach_perperty(query_gender1, {'attribute': 'gender', 'rule': 'queryGender1'}) 170 | rule_query_gender1 = Rule(attach_perperty(query_gender1, {'operation': 'get'})) 171 | 172 | # 我是男是女? 173 | query_gender2 = ti_ch_0_3 + person + ti_ch_0_3 + '(性别|男女|是男是女)' + e(stop_words) 174 | query_gender2 = attach_perperty(query_gender2, {'attribute': 'gender', 'rule': 'queryGender2'}) 175 | rule_query_gender2 = Rule(attach_perperty(query_gender2, {'operation': 'get'})) 176 | 177 | # 我叫XX 178 | answer_name = ti_ch_0_3 + person + ti_ch_0_3 + "(叫)" + real_name 179 | answer_name = attach_perperty(answer_name, {'rule': 'answerName'}) 180 | rule_answer_name = Rule(attach_perperty(answer_name, {'operation': 'answer'})) 181 | 182 | # 我XX岁 183 | answer_age = ti_ch_0_3 + person + e('(今年|现在)') + real_age + '岁' + e(stop_words) 184 | # answer_age = ti_ch_0_3 + person + e('(今年|现在)') + real_age + '岁' + e(stop_words) 185 | answer_age = attach_perperty(answer_age, {'rule': 'answerAge'}) 186 | rule_answer_age = Rule(attach_perperty(answer_age, {'operation': 'answer'})) 187 | 188 | # 我是男/女的 189 | answer_gender = ti_ch_0_3 + person + '(是|就是)' + gender + e(stop_words) 190 | answer_gender = attach_perperty(answer_gender, {'rule': 'answerGender'}) 191 | rule_answer_gender = Rule(attach_perperty(answer_gender, {'operation': 'answer'})) 192 | 193 | # 我是小主人的xx 194 | answer_relation_1 = ti_ch_0_3 + person + '(是|就是)' + master + '(的)' + relation + e(stop_words) 195 | answer_relation_1 = attach_perperty(answer_relation_1, {'rule': 'answerRelation1'}) 196 | rule_answer_relation_1 = Rule(attach_perperty(answer_relation_1, {'operation': 'answer'})) 197 | 198 | # 我是小主人/爸爸 199 | answer_relation_2 = ti_ch_0_3 + person + '(是|就是)' + relation 200 | answer_relation_2 = attach_perperty(answer_relation_2, {'rule': 'answerRelation2'}) 201 | rule_answer_relation_2 = Rule(attach_perperty(answer_relation_2, {'operation': 'answer'})) 202 | 203 | # # 我是小主人的三爷爷 204 | # answer_relation_3 = person + '(就是|是)' + o('你|小忆|机器人|小主人') + ('(的)') + strange_relation 205 | # answer_relation_3 = attach_perperty(answer_relation_3, {'rule': 'answerRelation3'}) 206 | # rule_strange_relation_3 = Rule(attach_perperty(answer_relation_3, {'operation': 'answer'})) 207 | -------------------------------------------------------------------------------- /query_analysis/nlu/recognition.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:物体语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/19 10 | """ 11 | 12 | from dict.dict import pronoun, modals, adverb, stop_words, any_w 13 | from nlu.rule import Rule 14 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 15 | 16 | 17 | class Recognition(object): 18 | # 标识recognition领域 19 | service = 'recognition' 20 | 21 | stop_words = stop_words.join_all 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | adverb = adverb.join_all 25 | 26 | look = '(看|猜|说|查看|瞅|瞧|观察|鉴别|望|瞟|问|告诉我)' 27 | location = '(这|那|这边|那边|这里|那里|这个)' 28 | what = '(什么|啥)' 29 | robot = '(你|机器人|小忆)' 30 | 31 | color = '(颜色|彩色|色|色泽|红|黄|蓝|绿|黑|白|紫|桔|橙|青)' 32 | color = attach_perperty(color, {'attribute': 'color'}) 33 | 34 | thing = '(东西|物品|水果)' 35 | thing = attach_perperty(thing, {'attribute': 'name'}) 36 | 37 | # 这是什么颜色 38 | color_case = '(请|那)?' + e(robot) + e(look) + e(location) + '是' + what + color + any_w 39 | rule_color_case = Rule(attach_perperty(color_case, {'operation': 'get', 'attribute': 'color'})) 40 | 41 | # 这是什么 42 | thing_case = '(请|那)?' + e(robot) + e(look) + location + '是' + what + e(thing) + any_w 43 | rule_thing_case = Rule(attach_perperty(thing_case, {'operation': 'get', 'attribute': 'name'})) 44 | -------------------------------------------------------------------------------- /query_analysis/nlu/rule.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:Rule规则的实现类 8 | 创 建 者:余菲 9 | 创建日期:16/12/31 10 | """ 11 | from collections import Counter 12 | 13 | import regex as re 14 | 15 | 16 | class Rule(object): 17 | """ 18 | 对于规则的封装类 19 | """ 20 | 21 | duplicate_key_re = re.compile(r"<([^>]*)>") 22 | 23 | def __init__(self, rule_str, filter_list={}): 24 | """ 25 | 初始化 26 | :param rule_str: 规则文本 27 | :param filter_list: 支持的filter列表['filter1', 'filter2'] 28 | """ 29 | self.rule_str = rule_str 30 | self.filters = filter_list 31 | self.rule = re.compile(self._fix_duplicate(rule_str), max_mem=1024 * 1024 * 100) 32 | 33 | def is_match(self, filter_name): 34 | """ 35 | 是否match指定filter,对本filter_list 36 | :param filter_name: 过滤器名 37 | :return: 38 | """ 39 | return filter_name in self.filters 40 | 41 | def match(self, query_string): 42 | """ 43 | 看本规则是否匹配query_string 44 | :param query_string: 文本信息 45 | :return: 46 | """ 47 | return self.rule.match(query_string) 48 | 49 | def _fix_duplicate(self, re_string): 50 | """ 51 | 对需要获取的部分进行去重复(在其后加___),正则表达式不支持在同一句中出现重复的捕获字段, 52 | 所以对于出现相同的捕获字段,需要去重复(通过加___处理) 53 | :param re_string: 54 | :return: 55 | """ 56 | duplicate_key_list = self.duplicate_key_re.findall(re_string) 57 | 58 | # 判断是否有重复key 59 | if len(set(duplicate_key_list)) == len(duplicate_key_list): 60 | return re_string 61 | 62 | for key, value in Counter(duplicate_key_list).items(): 63 | if value > 1: 64 | for k in range(value): 65 | # 注意这里替换的时候,必须要带上<>,否则会把有包含关系的东西替换掉如:direction_up与direction_up_step_toend 66 | re_string = re_string.replace("<{}>".format(key), "<{}>".format(key + (k + 1) * '___'), 1) 67 | 68 | # 是否替换成了相同的key, 使用递归解决此问题 69 | re_string = self._fix_duplicate(re_string) 70 | return re_string 71 | -------------------------------------------------------------------------------- /query_analysis/nlu/sight.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:景观语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/16 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, directional, sight_name, any_w 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty, attach_name 16 | 17 | class Sight(object): 18 | # 标识是sight领域 19 | service = 'sight' 20 | 21 | pronoun = pronoun.join_all 22 | modals = modals.join_all 23 | prep = prep.join_all 24 | degree = degree.join_all 25 | honorific = honorific.join_all 26 | interj = interj.join_all 27 | prefix_unsual = '(今天|现在)' 28 | auxiliary = auxiliary.join_all 29 | quantifier = quantifier.join_all 30 | numeral = numeral.join_all 31 | adjective = adjective.join_all 32 | directional = directional.join_all 33 | sight_name = sight_name.join_all 34 | 35 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 36 | postfix = o(auxiliary, prep, pronoun) 37 | infix = o(prep, pronoun, degree) 38 | 39 | prefix_0_5 = r(prefix, 0, 5) 40 | postfix_0_3 = r(postfix, 0, 3) 41 | infix_0_2 = r(infix, 0, 2) 42 | 43 | sight_name = attach_name(sight_name, 'name') 44 | 45 | sight = r('(把|将)', 0, 1) + sight_name 46 | 47 | shape = '(样子|形状|外表|图片)' 48 | what = '(什么|哪种|怎么)' 49 | look = '(看看|观看|查看|看|瞧|瞅|瞄|观赏|欣赏)' + e(numeral) + e(quantifier) 50 | 51 | show = '(显示|出现|演示|表演)' + e(directional) + e(numeral) + e(quantifier) 52 | 53 | # 查询表达方式1:雪山 54 | sight_case1 = sight 55 | rule_sight_case1 = Rule(attach_perperty(sight_case1, {'operation': 'query', 'rule': 1})) 56 | 57 | # 查询表达方式2:我想看一下雪山 58 | sight_case2 = prefix_0_5 + look + e(infix) + sight + postfix_0_3 59 | rule_sight_case2 = Rule(attach_perperty(sight_case2, {'operation': 'query', 'rule': 2})) 60 | 61 | # 查询表达方式3:你把雪山显示给我看看;雪山显示出来给我看看;雪山显示一下看看; 62 | sight_case3 = prefix_0_5 + sight + e(show) + infix_0_2 + look + postfix_0_3 63 | rule_sight_case3 = Rule(attach_perperty(sight_case3, {'operation': 'query', 'rule': 3})) 64 | 65 | # 查询表达方式4:显示雪山;显示出来雪山看看; 66 | sight_case4 = prefix_0_5 + show + e(infix) + sight + e(look) + postfix_0_3 67 | rule_sight_case4 = Rule(attach_perperty(sight_case4, {'operation': 'query', 'rule': 4})) 68 | 69 | # 查询表达方式5:雪山是什么样子的 70 | sight_case5 = prefix_0_5 + sight + '(是)?' + what + shape + postfix_0_3 71 | rule_sight_case5 = Rule(attach_perperty(sight_case5, {'operation': 'query', 'rule': 5})) 72 | -------------------------------------------------------------------------------- /query_analysis/nlu/smart/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:17/2/11 10 | """ -------------------------------------------------------------------------------- /query_analysis/nlu/smart/smart_home_aircleaner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:空气净化器 8 | 创 建 者:余菲 9 | 创建日期:17/2/14 10 | """ 11 | from nlu.rule import Rule 12 | 13 | from smart_home_common import * 14 | 15 | class Aircleaner(object): 16 | # 标识是smart_home_aircleaner领域 17 | service = 'smart_home_aircleaner' 18 | 19 | # 动作特征词 20 | turn = o(turn_on, turn_off) 21 | query = attach_perperty(o('查一下', '查询', '查查', '怎么样'), {'operation': 'query'}) 22 | aircleaner = e(desc) + e('的') + o('空气净化器', '空净') + e('的') 23 | airquality = e(aircleaner) + o('空气质量', '空气'); 24 | 25 | volume = attach_perperty('音量', {'parameter': 'volume'}) 26 | channel = attach_perperty('频道', {'parameter': 'channel'}) 27 | rack = attach_perperty('架', {'parameter': 'rack'}) 28 | parameter = o(volume, channel, rack) 29 | 30 | # [空净]控制语义解析 31 | aircleaner_case1 = prefix_0_3 + turn + e(position) + aircleaner + e(parameter) + postfix_0_3 # 关空气净化器 32 | rule_aircleaner_case1 = Rule(attach_perperty(aircleaner_case1, {'rule': 1})) 33 | 34 | aircleaner_case2 = prefix_0_3 + e('把') + e('position') + aircleaner + e(parameter) + turn + postfix_0_3 # 把空气净化器打开 35 | rule_aircleaner_case2 = Rule(attach_perperty(aircleaner_case2, {'rule': 2})) 36 | 37 | aircleaner_case3 = prefix_0_3 + e('把') + e('position') + aircleaner + query + postfix_0_3 # 卧室的空气质量查一下 38 | rule_aircleaner_case3 = Rule(attach_perperty(aircleaner_case3, {'rule': 2})) 39 | 40 | aircleaner_case4 = prefix_0_5 + e('把') + query + e('position') + aircleaner + postfix_0_3 # 卧室的空气质量查一下 41 | rule_aircleaner_case4 = Rule(attach_perperty(aircleaner_case4, {'rule': 2})) 42 | -------------------------------------------------------------------------------- /query_analysis/nlu/smart/smart_home_airconditioner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:空调 8 | 创 建 者:余菲 9 | 创建日期:17/2/14 10 | """ 11 | 12 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 13 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual, any_w, stop_words 14 | from nlu.rule import Rule 15 | 16 | from smart_home_common import * 17 | 18 | class Curtain(object): 19 | # 标识是mode领域 20 | service = 'smart_home_airconditioner' 21 | 22 | # 动作特征词 23 | turn = o(turn_on, turn_off, turn_up, turn_down) 24 | airconditioner = e(desc) + e('的') + o('空调机', '空调') + e('的') 25 | 26 | temperature = '(温度)' 27 | cool = attach_perperty('(制冷|冷风)', {'parameter': 'cool'}) 28 | heat = attach_perperty('(制热|热风)', {'parameter': 'heat'}) 29 | parameter = o(temperature, cool, heat) + e('调到') 30 | parameter = attach_perperty(parameter, {'operation': 'set'}) 31 | 32 | degree = range_tag(2, 'degree') + '(度)' 33 | 34 | # [空调]控制语义解析 35 | airconditioner_case1 = prefix_0_3 + turn + e(position) + airconditioner + e(parameter) + postfix_0_3 # 关空调 36 | rule_conditioner_case_1 = Rule(attach_perperty(airconditioner_case1, {'rule': 1})) 37 | 38 | airconditioner_case2 = prefix_0_3 + e('把') + e(position) + airconditioner + e(parameter) + turn + postfix_0_3 # 把空调打开 39 | rule_conditioner_case_2 = Rule(attach_perperty(airconditioner_case2, {'rule': 2})) 40 | 41 | airconditioner_case3 = prefix_0_5 + '(把)' + e(position) + airconditioner + parameter + e(turn) + degree + postfix_0_3 # 空调冷风16度 42 | rule_conditioner_case_3 = Rule(attach_perperty(airconditioner_case3, {'rule': 3})) 43 | -------------------------------------------------------------------------------- /query_analysis/nlu/smart/smart_home_common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:智能家居基类 8 | 创 建 者:余菲 9 | 创建日期:17/2/14 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual, any_w, stop_words 13 | 14 | from utils.utils import o, r, e, attach_perperty, range_tag 15 | 16 | pronoun = pronoun.join_all 17 | modals = modals.join_all 18 | prep = prep.join_all 19 | degree = degree.join_all 20 | honorific = honorific.join_all 21 | interj = interj.join_all 22 | prefix_unsual = prefix_unsual.join_all 23 | auxiliary = auxiliary.join_all 24 | quantifier = quantifier.join_all 25 | numeral = numeral.join_all 26 | adjective = adjective.join_all 27 | adverb = adverb.join_all 28 | stop_words = stop_words.join_all 29 | want = '(要|想要|想|需要)' 30 | 31 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 32 | postfix = o(auxiliary, prep, pronoun) 33 | infix = o(prep, pronoun, degree) 34 | 35 | prefix_0_5 = r(prefix, 0, 5) 36 | prefix_0_3 = r(prefix, 0, 3) 37 | postfix_0_3 = r(postfix, 0, 3) 38 | 39 | # 位置 40 | bedroom = attach_perperty('(卧室|卧房|睡房)', {'position': 'bedroom'}) 41 | main_bedroom = attach_perperty('(主卧)', {'position': 'main_bedroom'}) 42 | second_bedroom = attach_perperty('(次卧)', {'position': 'second_bedroom'}) 43 | living_room = attach_perperty('(客厅)', {'position': 'living_room'}) 44 | kitchen = attach_perperty('(厨房)', {'position': 'kitchen'}) 45 | dining_room = attach_perperty('(餐厅)', {'position': 'dining_room'}) 46 | bath_room = attach_perperty('(卫生间)', {'position': 'bath_room'}) 47 | position = o(bedroom, main_bedroom, second_bedroom, living_room, kitchen, dining_room, bath_room) + e('的') 48 | 49 | # 动作特征词 50 | turn_on = attach_perperty('开|打开', {'operation': 'turn_on'}) 51 | turn_off = attach_perperty('(关上|关闭|关下|关一下|关掉|关)', {'operation': 'turn_off'}) 52 | turn_up = attach_perperty('(调高|升高|升起来)', {'operation': 'turn_up'}) 53 | turn_down = attach_perperty('(调低|降低|降下来)', {'operation': 'turn_down'}) 54 | 55 | desc = range_tag(6, 'desc') -------------------------------------------------------------------------------- /query_analysis/nlu/smart/smart_home_curtain.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:窗帘控制 8 | 创 建 者:余菲 9 | 创建日期:17/2/11 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual, any_w, stop_words 13 | from nlu.rule import Rule 14 | 15 | from smart_home_common import * 16 | 17 | class Curtain(object): 18 | # 标识是mode领域 19 | service = 'smart_home_curtain' 20 | 21 | # 动作特征词 22 | turn = o(turn_on, turn_off, turn_up, turn_down) 23 | tv = e(desc) + e('的') + o('电视机', '电视') + e('的') 24 | 25 | volume = attach_perperty('音量', {'parameter': 'volume'}) 26 | channel = attach_perperty('频道', {'parameter': 'channel'}) 27 | rack = attach_perperty('架', {'parameter': 'rack'}) 28 | parameter = o(volume, channel, rack) 29 | 30 | # [电视]控制语义解析 31 | tv_case1 = prefix_0_3 + turn + e(position) + tv + e(parameter) + postfix_0_3 # 关灯 32 | rule_tv_case_1 = Rule(attach_perperty(tv_case1, {'rule': 1})) 33 | 34 | tv_case2 = prefix_0_3 + e('把') + e(position) + tv + e(parameter) + turn + postfix_0_3 # 把灯打开 35 | rule_tv_case_2 = Rule(attach_perperty(tv_case2, {'rule': 2})) 36 | -------------------------------------------------------------------------------- /query_analysis/nlu/smart/smart_home_light.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:灯光控制 8 | 创 建 者:余菲 9 | 创建日期:17/2/11 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual, any_w, stop_words 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty, range_tag 16 | 17 | from smart_home_common import * 18 | 19 | class Light(object): 20 | 21 | # 标识是Light领域 22 | service = 'smart_home_light' 23 | 24 | # 动作特征词 25 | turn = o(turn_on, turn_off) 26 | light = e(desc) + e('的') + o('灯', '灯光') 27 | 28 | # [灯]控制语义解析 29 | light_case1 = prefix_0_3 + turn + e(position) + light + postfix_0_3 # 关灯 30 | rule_comic_case_1 = Rule(attach_perperty(light_case1, {'rule': 1})) 31 | 32 | light_case2 = prefix_0_3 + e('把') + e('position') + light + turn + postfix_0_3 # 把灯打开 33 | rule_comic_case_2 = Rule(attach_perperty(light_case2, {'rule': 2})) 34 | -------------------------------------------------------------------------------- /query_analysis/nlu/smart/smart_home_tv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:电视控制 8 | 创 建 者:余菲 9 | 创建日期:17/2/11 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, prefix_unsual, any_w, stop_words 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty, range_tag 16 | 17 | from smart_home_common import * 18 | 19 | class TV(object): 20 | # 标识是mode领域 21 | service = 'smart_home_tv' 22 | 23 | # 动作特征词 24 | turn = o(turn_on, turn_off, turn_up, turn_down) 25 | tv = e(desc) + e('的') + o('电视机', '电视') + e('的') 26 | 27 | volume = attach_perperty('音量', {'parameter': 'volume'}) 28 | channel = attach_perperty('频道', {'parameter': 'channel'}) 29 | rack = attach_perperty('架', {'parameter': 'rack'}) 30 | parameter = o(volume, channel, rack) 31 | 32 | # [电视]控制语义解析 33 | tv_case1 = prefix_0_3 + turn + e(position) + tv + e(parameter) + postfix_0_3 # 关灯 34 | rule_tv_case_1 = Rule(attach_perperty(tv_case1, {'rule': 1})) 35 | 36 | tv_case2 = prefix_0_3 + e('把') + e('position') + tv + e(parameter) + turn + postfix_0_3 # 把灯打开 37 | rule_tv_case_2 = Rule(attach_perperty(tv_case2, {'rule': 2})) 38 | -------------------------------------------------------------------------------- /query_analysis/nlu/store_location.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,掌阅科技 5 | All rights reserved. 6 | 7 | 摘 要:储物位置(小忆我告诉你xx放在xx/小忆我问你xx放在哪里) 8 | 创 建 者:余菲 9 | 创建日期:17/5/6 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, stop_words, vehicle_name 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, range_tag, attach_perperty, range_not_tag 16 | 17 | 18 | class StoreLocation(object): 19 | # 标识是store_location领域 20 | service = 'store_location' 21 | 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | prep = prep.join_all 25 | degree = degree.join_all 26 | honorific = honorific.join_all 27 | interj = interj.join_all 28 | prefix_unsual = '(今天|现在)' 29 | auxiliary = auxiliary.join_all 30 | quantifier = quantifier.join_all 31 | numeral = numeral.join_all 32 | adjective = adjective.join_all 33 | adverb = adverb.join_all 34 | stop_words = stop_words.join_all 35 | vehicle = vehicle_name.join_all 36 | 37 | # 如果结尾使用非则会导致完全不匹配而失败 38 | set_location_1_5 = range_tag(5, name='location', start=1) 39 | 40 | # 为了解决相近的(放|在|放在)问题使用非贪婪 41 | object_1_5 = '(?P(.)+?)' 42 | location_1_5 = '(?P(.)+?)' 43 | 44 | set_prefix = '(小忆我告诉你)' 45 | query_prefix = '(小忆我问你)' 46 | 47 | put = '(放在|在)' 48 | 49 | keep = '((在)|(放)|(放在))' 50 | 51 | where = o('哪里', '什么地方', '什么位置') 52 | 53 | # 设置位置表达方式1: 54 | set_case1 = set_prefix + object_1_5 + put + set_location_1_5 55 | rule_set_case1 = Rule(attach_perperty(set_case1, {'operation': 'set', 'rule': 1})) 56 | 57 | # 查询位置表达方式1: 58 | query_case1 = query_prefix + object_1_5 + keep + where + e(stop_words) 59 | rule_query_case1 = Rule(attach_perperty(query_case1, {'operation': 'query', 'rule': 2})) 60 | 61 | -------------------------------------------------------------------------------- /query_analysis/nlu/trick.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:脑筋急转弯语义 8 | 创 建 者:余菲 9 | 创建日期:16/12/17 10 | """ 11 | from dict.dict import pronoun, adverb, modals, stop_words, honorific, any_w 12 | from nlu.rule import Rule 13 | from utils.utils import attach_perperty, attach_name, o, e, range_tag 14 | 15 | 16 | class Trick(object): 17 | # 标识是trick领域 18 | service = 'trick' 19 | 20 | pronoun = pronoun.join_all 21 | can_words = modals.join_all 22 | adverb = adverb.join_all 23 | ask = honorific.join_all 24 | want = '(要|想|想要|需要)' 25 | 26 | # 语义意图,仅支持一种,听 27 | listen = '(听)(一)?(个)?' 28 | 29 | # 语义意图 30 | tell = '(讲|来)(一)?(个)?' 31 | 32 | # 脑筋急转弯 33 | trick = '(脑筋急转弯|急转弯|脑经急转弯)' 34 | 35 | # robot 36 | robot = '(你|机器人|小忆)' 37 | 38 | # me 39 | me = '(我|我们|咱|咱们|俺)' 40 | 41 | # 再,又 42 | again = '(再|又|多)' 43 | 44 | # 个 45 | an = '(个|一个)' 46 | 47 | # 给 48 | give = '(给|为)' 49 | 50 | # 我要听个急转弯 51 | case_1 = e(robot) + e(me) + e(want) + e(again) + listen + e(an) + trick + e(stop_words) 52 | rule_1 = Rule(attach_perperty(case_1, {'scene': 'trick', 'operation': 'trick', 'rule': 1})) 53 | 54 | # 给我讲个急转弯 55 | case_2 = e(robot) + e(give) + e(me) + tell + e(an) + trick + e(stop_words) 56 | rule_2 = Rule(attach_perperty(case_2, {'scene': 'trick', 'operation': 'trick', 'rule': 2})) 57 | 58 | # 再来一个 59 | case_3 = attach_perperty('(再来一个)', {'scene': 'trick', 'operation': 'trick', 'rule': 3}) 60 | rule_3 = Rule(case_3, {'status': 'trick'}) 61 | -------------------------------------------------------------------------------- /query_analysis/nlu/vehicle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:交通工具 8 | 创 建 者:余菲 9 | 创建日期:16/6/11 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective, adverb, stop_words, vehicle_name 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, range_tag, attach_perperty 16 | 17 | 18 | class Vehicle(object): 19 | # 标识是vehicle领域 20 | service = 'vehicle' 21 | 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | prep = prep.join_all 25 | degree = degree.join_all 26 | honorific = honorific.join_all 27 | interj = interj.join_all 28 | prefix_unsual = '(今天|现在)' 29 | auxiliary = auxiliary.join_all 30 | quantifier = quantifier.join_all 31 | numeral = numeral.join_all 32 | adjective = adjective.join_all 33 | adverb = adverb.join_all 34 | stop_words = stop_words.join_all 35 | vehicle = vehicle_name.join_all 36 | 37 | ti_ch_0_3 = range_tag(3, start=0) 38 | 39 | show = '(样子|形状|外表|图片)' 40 | what = '(什么|哪种|怎么|啥)' 41 | append = '(加|增加|添加|加上|补充|再|再来|再加|再次|再一次|继续|接着)' 42 | append = attach_perperty(append, {'operation': 'append'}) 43 | 44 | # me 45 | me = '(我|我们|咱|咱们|俺)' 46 | 47 | # ask 48 | ask = '(请|让|要|要求|麻烦)' 49 | 50 | # 再,又 51 | again = '(再|又|多)' 52 | 53 | # robot 54 | robot = '(你|机器人|小忆)' 55 | 56 | want = '(要|想|想要|需要|能)' 57 | 58 | look = '(放|来|看|给)' + e('一') + e('个|辆') 59 | look = attach_perperty(look, {'operation': 'name'}) 60 | 61 | # 消防车是什么样子的 62 | case1 = ti_ch_0_3 + vehicle + ti_ch_0_3 + what + show + e(stop_words) 63 | rule_case1 = Rule(attach_perperty(case1, {'operation': 'name', 'rule': 1})) 64 | 65 | # 我能请你给我看下自行车 66 | case2 = e(me) + e(want) + e(again) + e('请') + e(robot) + e(look) + e(me) + e(adverb) + look + e('下') + vehicle + e(stop_words) 67 | rule_case2 = Rule(attach_perperty(case2, {'operation': 'query', 'rule': 2})) 68 | 69 | # 小忆我要自行车 70 | case3 = e(robot) + me + e(want) + e(look) + vehicle + e(stop_words) 71 | rule_case3 = Rule(attach_perperty(case3, {'operation': 'query', 'rule': 3})) 72 | 73 | # 消防车是干什么的 74 | case4 = ti_ch_0_3 + vehicle + '(是)' + ti_ch_0_3 + what + ti_ch_0_3 + '(用)?' + ti_ch_0_3 75 | rule_case4 = Rule(attach_perperty(case4, {'operation': 'query', 'rule': 4})) 76 | 77 | # 自行车 78 | case5 = vehicle + e('我要看') + e(stop_words) 79 | rule_case5 = Rule(attach_perperty(case5, {'operation': 'query', 'rule': 5})) 80 | 81 | # 小忆你有没有自行车 82 | case6 = e(robot) + e('你') + '(有没有)' + vehicle + e('我') + e('要') + e('看') + e(stop_words) 83 | rule_case6 = Rule(attach_perperty(case6, {'operation': 'query', 'rule': 6})) 84 | 85 | # 再加一辆自行车 86 | case7 = ti_ch_0_3 + append + ti_ch_0_3 + quantifier + vehicle + ti_ch_0_3 87 | rule_case7 = Rule(attach_perperty(case7, {'operation': 'append', 'rule': 7})) 88 | -------------------------------------------------------------------------------- /query_analysis/nlu/volume.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:音量控制 8 | 创 建 者:余菲 9 | 创建日期:16/6/5 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty 16 | 17 | 18 | class Volume(object): 19 | # 标识是volume领域 20 | service = 'volume' 21 | 22 | pronoun = pronoun.join_all 23 | modals = modals.join_all 24 | prep = prep.join_all 25 | degree = degree.join_all 26 | honorific = honorific.join_all 27 | interj = interj.join_all 28 | prefix_unsual = '(今天|现在)' 29 | auxiliary = auxiliary.join_all 30 | quantifier = quantifier.join_all 31 | numeral = numeral.join_all 32 | adjective = adjective.join_all 33 | 34 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 35 | postfix = o(auxiliary, prep, pronoun) 36 | infix = o(prep, pronoun) 37 | 38 | prefix_0_5 = r(prefix, 0, 5) 39 | postfix_0_3 = r(postfix, 0, 3) 40 | 41 | # 调整特征词 42 | # $change_up{direction%up} = (增 | 加 | 升 | 扩)[$numeral_words] [$quantifier_words]; 43 | change_up = '(增|加|升|扩)' + e(numeral) + e(quantifier) 44 | change_up = attach_perperty(change_up, {'direction': "up"}) 45 | 46 | # $change_down{direction%down} = (减 | 降 | 缩)[$numeral_words] [$quantifier_words]; 47 | change_down = '(减|降|缩)' + e(numeral) + e(quantifier) 48 | change_down = attach_perperty(change_down, {'direction': "down"}) 49 | 50 | # $change = (调 | 调整 | 调节 | 调动 | 调理 | 调弄 | 改变 | 转变 | 弄 | 搞 | 放 | 变 | 整); 51 | change = '(调|调整|调节|调动|调理|调弄|改变|转变|弄|搞|放|变|整)' 52 | 53 | # 方向 54 | # $up{direction%up} = (大 | 高 | 强 | 重 | 响 | 响亮)[$numeral_words] [$quantifier_words]; 55 | up = '(大|高|强|重|响|响亮)' + e(numeral) + e(quantifier) 56 | up = attach_perperty(up, {'direction': "up"}) 57 | 58 | # $down{direction%down} = (小 | 低 | 弱 | 轻 | 低沉)[$numeral_words] [$quantifier_words]; 59 | down = '(小|低|弱|轻|低沉)' + e(numeral) + e(quantifier) 60 | down = attach_perperty(down, {'direction': "down"}) 61 | 62 | # $maximum{direction%up}{step%toend} = [(到 | 至)](最大 | 最高 | 最强 | 最重 | 最响 | 最亮); 63 | maximum = e('到|至') + '(最大|最高|最强|最重|最响|最亮)' 64 | maximum = attach_perperty(maximum, {'direction': "up", 'step': "toend"}) 65 | 66 | # $minimum{direction%down}{step%toend} = [(到 | 至)](最小 | 最低 | 最弱 | 最轻); 67 | minimum = e('到|至') + '(最小|最低|最弱|最轻)' 68 | minimum = attach_perperty(minimum, {'direction': "down", 'step': "toend"}) 69 | 70 | # $direction = ($up | $down | $maximum | $minimum); 71 | direction = o(up, down, maximum, minimum) 72 | 73 | # 反转的方向,用于双重否定的场景 74 | # $reverse_up{direction%down} = (大 | 高 | 强 | 重 | 响 | 响亮)[$numeral_words] [$quantifier_words]; 75 | reverse_up = '(大|高|强|重|响|响亮)' + e(numeral) + e(quantifier) 76 | reverse_up = attach_perperty(reverse_up, {'direction': "down"}) 77 | 78 | # $reverse_down{direction%up} = (小 | 低 | 弱 | 轻 | 低沉)[$numeral_words] [$quantifier_words]; 79 | reverse_down = '(小|低|弱|轻|低沉)' + e(numeral) + e(quantifier) 80 | reverse_down = attach_perperty(reverse_down, {'direction': "up"}) 81 | 82 | # $reverse_maximum{direction%down}{step%toend} = [(到 | 至)](最大 | 最高 | 最强 | 最重 | 最响 | 最亮); 83 | reverse_maximum = e('(到|至)') + '(最大|最高|最强|最重|最响|最亮)' 84 | reverse_maximum = attach_perperty(reverse_maximum, {'direction': "down", 'step': "toend"}) 85 | 86 | # $reverse_minimum{direction%up}{step%toend} = [(到 | 至)](最小 | 最低 | 最弱 | 最轻); 87 | reverse_minimum = e('(到|至)') + '(最小|最低|最弱|最轻)' 88 | reverse_minimum = attach_perperty(reverse_minimum, {'direction': "up", 'step': "toend"}) 89 | 90 | # $reverse_direction = ($reverse_up | $reverse_down | $reverse_maximum | $reverse_minimum); 91 | reverse_direction = o(reverse_up, reverse_down, reverse_maximum, reverse_minimum) 92 | 93 | # $change_case1 = ($change_up | $change_down) [$direction]; 94 | change_case1 = o(change_up, change_down) + e(direction) 95 | # $change_case2 = [$change] [$degree_words] $direction; 96 | change_case2 = e(change) + e(degree) + direction 97 | # $change_case3 = (往|向|朝) [$degree_words] $direction [$change]; 98 | change_case3 = '(往|向|朝)' + e(degree) + direction + e(change) 99 | # $change_direction = ($change_case1 | $change_case2 | $change_case3); 100 | change_direction = o(change_case1, change_case2, change_case3) 101 | 102 | # 音量调节特征词 103 | # $volume = (把|将)<0-1> [你的] (声音 | 声 | 声儿 | 音量 | 音响 | 喇叭 | 喇叭声 | 说话力气)[$numeral_words] [$quantifier_words]; 104 | volume = r('(把|将)', 0, 1) + e('你的') + '(声音|声儿|声|音量|音响|喇叭|喇叭声|说话力气)' + e(numeral) + e(quantifier) 105 | 106 | # 表达方式1:大点声;小声点好吗; 107 | # $volume_case1 = $prefix<0-5> $change_direction $volume $postfix<0-3>; 108 | volume_case1 = prefix_0_5 + change_direction + volume + postfix_0_3 109 | rule_volume_case1 = Rule(attach_perperty(volume_case1, {'operation': 'change', 'rule': '1'})) 110 | 111 | # 表达方式2:声音再调大点;请你把声音调到最小可以吗;把声音调整更大一点; 112 | # $volume_case2 = $prefix<0-5> $volume [$infix] $change_direction $postfix<0-3>; 113 | volume_case2 = prefix_0_5 + volume + e(infix) + change_direction + postfix_0_3 114 | rule_volume_case2 = Rule(attach_perperty(volume_case2, {'operation': 'change', 'rule': '2'})) 115 | 116 | # 表达方式3:别那么大声; 117 | # $do_not = (不 | 不要 | 不想 | 不准 | 不想要 | 别); 118 | do_not = '(不要|不想|不准|不想要|不|别)' 119 | # $volume_case3 = $prefix<0-5> $do_not [$degree_words] $reverse_direction [$infix] $volume $postfix<0-3>; 120 | volume_case3 = prefix_0_5 + do_not + e(degree) + reverse_direction + e(infix) + volume + postfix_0_3 121 | rule_case3 = Rule(attach_perperty(volume_case3, {'operation': 'change', 'rule': 3})) 122 | 123 | # 表达方式4-9:那么大声干嘛;声音这么大干啥;干啥这么大声; 124 | # $why = (干嘛 | 干吗 | 干啥 |干什么 | 做什么 | 怎么 | 怎的 | 咋 | 为啥 | 为什么); 125 | why = '(干嘛|干吗|干啥|干什么|做什么|怎么|怎的|咋|为啥|为什么)' 126 | # $volume_case4 = $prefix<0-5> $why [$infix] $reverse_direction [$infix] $volume $postfix<0-3>; 127 | volume_case4 = prefix_0_5 + why + e(infix) + reverse_direction + e(infix) + volume + postfix_0_3 128 | rule_case4 = Rule(attach_perperty(volume_case4, {'operation': 'change', 'rule': 4})) 129 | 130 | # $volume_case5 = $prefix<0-5> $why [$infix] $volume [$infix] $reverse_direction $postfix<0-3>; 131 | volume_case5 = prefix_0_5 + why + e(infix) + volume + e(infix) + reverse_direction + postfix_0_3 132 | rule_case5 = Rule(attach_perperty(volume_case5, {'operation': 'change', 'rule': 5})) 133 | 134 | # $volume_case6 = $prefix<0-5> $reverse_direction [$infix] $volume [$infix] $why $postfix<0-3>; 135 | volume_case6 = prefix_0_5 + reverse_direction + e(infix) + volume + e(infix) + why + postfix_0_3 136 | rule_case6 = Rule(attach_perperty(volume_case6, {'operation': 'change', 'rule': 6})) 137 | 138 | # $volume_case7 = $prefix<0-5> $volume [$infix] $reverse_direction [$infix] $why $postfix<0-3>; 139 | volume_case7 = prefix_0_5 + volume + e(infix) + reverse_direction + e(infix) + why + postfix_0_3 140 | rule_case7 = Rule(attach_perperty(volume_case7, {'operation': 'change', 'rule': 7})) 141 | 142 | # $volume_case8 = $prefix<0-5> $reverse_direction [$infix] $why [$infix] $volume $postfix<0-3>; 143 | volume_case8 = prefix_0_5 + reverse_direction + e(infix) + why + e(infix) + volume + postfix_0_3 144 | rule_case8 = Rule(attach_perperty(volume_case8, {'operation': 'change', 'rule': 8})) 145 | 146 | # $volume_case9 = $prefix<0-5> $volume [$infix] $why[$infix] $reverse_direction $postfix<0-3>; 147 | volume_case9 = prefix_0_5 + volume + e(infix) + why + e(infix) + reverse_direction + postfix_0_3 148 | rule_case9 = Rule(attach_perperty(volume_case9, {'operation': 'change', 'rule': 9})) 149 | 150 | # 表达方式10-11:声音太大了;你说话力气好小; 151 | # $too_much = (太 | 过 | 过于 | 好); 152 | too_much = '(太|过于|过|好)' 153 | volume_case10 = prefix_0_5 + volume + too_much + reverse_direction + postfix_0_3 154 | rule_case10 = Rule(attach_perperty(volume_case10, {'operation': 'change', 'rule': 10})) 155 | 156 | volume_case11 = prefix_0_5 + too_much + reverse_direction + volume + postfix_0_3 157 | rule_case11 = Rule(attach_perperty(volume_case11, {'operation': 'change', 'rule': 11})) 158 | 159 | # 表达方式12:听不清楚;吵死了;你没吃饭吗; 160 | # $can_not_hear{direction%up} = (听不清 | 听不清楚 | 听不见 | 听不到 | 听不着 | 没吃饭); 161 | can_not_hear = '(听不清楚|听不清|听不见|听不到|听不着|没吃饭)' 162 | can_not_hear = attach_perperty(can_not_hear, {'direction': "up"}) 163 | 164 | # $too_loud{direction%down} = (太吵 | 吵死 | 吵死人 | 吵到我 | 吃多); 165 | too_loud = '(太吵|吵死人|吵死|吵到我|吃多)' 166 | can_not_hear = attach_perperty(can_not_hear, {'direction': "down"}) 167 | 168 | # $volume_case12 = $prefix<0-5> ($can_not_hear | $too_loud) $postfix<0-3>; 169 | volume_case12 = prefix_0_5 + o(can_not_hear, too_loud) + postfix_0_3 170 | rule_case12 = Rule(attach_perperty(volume_case12, {'operation': 'change', 'rule': 12})) 171 | 172 | # 表达方式13:嘘 173 | # $volume_case13 = (嘘 | 需 | 旭 | 徐); 174 | volume_case13 = '(嘘|需|旭|徐)' 175 | rule_case13 = Rule(attach_perperty(volume_case13, {'operation': 'change', 'rule': 13})) 176 | -------------------------------------------------------------------------------- /query_analysis/nlu/xiaoyi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:小忆语义 8 | 创 建 者:余菲 9 | 创建日期:16/6/19 10 | """ 11 | from dict.dict import pronoun, modals, prep, degree, honorific, interj, \ 12 | auxiliary, quantifier, numeral, adjective 13 | from nlu.rule import Rule 14 | 15 | from utils.utils import o, r, e, attach_perperty 16 | 17 | class XiaoYi(object): 18 | # 标识是xiaoyi领域 19 | service = 'xiaoyi' 20 | 21 | pronoun = pronoun.join_all 22 | modals = modals.join_all 23 | prep = prep.join_all 24 | degree = degree.join_all 25 | honorific = honorific.join_all 26 | interj = interj.join_all 27 | prefix_unsual = '(今天|现在)' 28 | auxiliary = auxiliary.join_all 29 | quantifier = quantifier.join_all 30 | numeral = numeral.join_all 31 | adjective = adjective.join_all 32 | 33 | prefix = o(pronoun, prep, modals, degree, honorific, interj, prefix_unsual) 34 | postfix = o(auxiliary, prep, pronoun) 35 | infix = o(prep, pronoun, degree) 36 | 37 | prefix_0_5 = r(prefix, 0, 5) 38 | postfix_0_3 = r(postfix, 0, 3) 39 | 40 | show = '(显示|打开|亮出|查|查询)(下)?' 41 | your = '(你|机器|机器人|小忆)(的)?' 42 | 43 | # [二维码]语义解析 44 | barcode = '(二维码|身份证)' 45 | barcode_case1 = barcode 46 | barcode_case2 = prefix_0_5 + show + e(your) + barcode + postfix_0_3 # 显示 二维码 47 | barcode_case3 = prefix_0_5 + your + barcode + postfix_0_3 # 机器人 二维码 48 | barcode_sentence = o(barcode_case1, barcode_case2, barcode_case3) 49 | rule_barcode_sentence = Rule(attach_perperty(barcode_sentence, {'attribute': 'barcode', 'operation': 'get', 'rule': 1})) 50 | 51 | # [软件版本]语义解析 52 | version = '(软件)?(版本)(号|信息)?' 53 | version_case1 = version 54 | version_case2 = prefix_0_5 + show + e(your) + version + postfix_0_3 # 显示 版本号 55 | version_case3 = prefix_0_5 + your + version + postfix_0_3 # 机器人 版本号 56 | version_sentence = o(version_case1, version_case2, version_case3) 57 | rule_version_sentence = Rule(attach_perperty(version_sentence, {'attribute': 'version', 'operation': 'get', 'rule': 2})) 58 | 59 | # [绑定状态]语义解析 60 | bind = '(绑定状态)' 61 | bind_case1 = bind 62 | bind_case2 = prefix_0_5 + show + e(your) + bind + postfix_0_3 # 显示 绑定状态 63 | bind_case3 = prefix_0_5 + your + bind + postfix_0_3 # 机器人 绑定状态 64 | bind_sentence = o(bind_case1, bind_case2, bind_case3) 65 | rule_bind_sentence = Rule(attach_perperty(bind_sentence, {'attribute': 'bind', 'operation': 'get', 'rule': 3})) 66 | 67 | # [网络状态]语义解析 68 | wifi = '(网络状态)' 69 | wifi_case1 = wifi # 网络状态 70 | wifi_case2 = prefix_0_5 + show + e(your) + wifi + postfix_0_3 # 显示 网络状态 71 | wifi_case3 = prefix_0_5 + your + wifi + postfix_0_3 # 机器人 网络状态 72 | wifi_sentence = o(wifi_case1, wifi_case2, wifi_case3) 73 | rule_wifi_sentence = Rule(attach_perperty(wifi_sentence, {'attribute': 'wifi', 'operation': 'get', 'rule': 4})) 74 | -------------------------------------------------------------------------------- /query_analysis/requirements: -------------------------------------------------------------------------------- 1 | tornado==4.1 2 | redis==2.10.5 3 | PyYAML==3.11 -------------------------------------------------------------------------------- /query_analysis/scene/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/12/17 10 | """ -------------------------------------------------------------------------------- /query_analysis/scene/dance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:跳舞的场景语义 8 | 创 建 者:余菲 9 | 创建日期:16/12/24 10 | """ 11 | import re 12 | 13 | 14 | class Dance(object): 15 | """ 16 | 跳舞的场景语义 17 | """ 18 | RE_OK = re.compile('(?= 40: 56 | break 57 | print '$%s{arid_list%s%s} = (%s);' % (type_name, '%', 58 | ','.join(temp_list_id), 59 | item1) 60 | temp_list_id = [] 61 | temp_list_content = [] 62 | 63 | print "" 64 | print "" 65 | 66 | content_list = sorted(content_list, key=lambda x: x[4]) 67 | for item1, item2 in groupby(content_list, itemgetter(4)): 68 | if not item1: 69 | continue 70 | type_name = 'type_{}'.format(i) 71 | i += 1 72 | for subitem in item2: 73 | temp_list_id.append(str(subitem[2])) 74 | temp_list_content.append(subitem[1]) 75 | print '$%s{arid_list%s%s} = (%s);' % (type_name, '%', 76 | ','.join(temp_list_id), 77 | item1) 78 | temp_list_id = [] 79 | temp_list_content = [] 80 | -------------------------------------------------------------------------------- /query_analysis/usage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要:演示简单使用方法 8 | 创 建 者:余菲 9 | 创建日期:17/3/5 10 | """ 11 | # test_rule.py 12 | from nlu.nlu_framework import Nlu_Framework 13 | from nlu.rule import Rule 14 | from utils.utils import range_tag, attach_perperty 15 | 16 | 17 | class Test(object): 18 | # 标识是test领域(这个service字段必须存在,命中本类中正则时,会输出这个字段) 19 | service = 'test' 20 | # 表示抓取2个字长度的信息,输出字段为name 21 | name = range_tag(2, 'user_name') 22 | 23 | # 正则规则:我的名字是小明 24 | name_case1 = '我的名字是' + name 25 | 26 | # 生成规则对象 27 | rule_case1 = Rule(attach_perperty(name_case1, {'operation': 'query', 'rule': 1})) 28 | 29 | Nlu_Framework.register(Test) 30 | 31 | match_dict_list = Nlu_Framework.match('我的名字是小明') 32 | 33 | for k, v in match_dict_list[0].items(): 34 | print '{} : {}'.format(k, v) 35 | -------------------------------------------------------------------------------- /query_analysis/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/4 10 | """ -------------------------------------------------------------------------------- /query_analysis/utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Copyright (c) 2016,小忆机器人 5 | All rights reserved. 6 | 7 | 摘 要: 8 | 创 建 者:余菲 9 | 创建日期:16/6/4 10 | """ 11 | from collections import defaultdict 12 | 13 | 14 | def attach_name(case, name): 15 | """ 16 | 对某个case附加一个名字属性(这种属性只有1个对象如type或name等) 17 | :param case: case的string 如(虫|鸟) 18 | :param name: type或name 19 | :return: 20 | """ 21 | return "(?P<{}>({}))".format(name, case) 22 | 23 | 24 | def attach_perperty(case, perperty): 25 | """ 26 | 对某个case附加一个或一组属性(这种属性是有2个对象的如 {'operation':'query'}) 27 | (对于必须出现重复的attach_string,可以采用在最后加三下划线的方法规避, 如:'operation':'query___') 28 | 注意只能在最后加才可以正确捕获 29 | :param case: case的string表示如:(我|咱)(想|要)看(老虎|狮子) 30 | :param perperty: {'operation':'query'} 31 | :return: 32 | """ 33 | perperty_string = '__'.join(['{}__{}'.format(k, v) for k, v in perperty.items()]) 34 | return "(?P<{}>({}))".format(perperty_string, case) 35 | 36 | 37 | def get_attach_perperty(attach_string): 38 | """ 39 | 取得附加的属性值(对于必须出现重复的attach_string,可以采用在最后加双下划线的方法规避) 40 | :param attach_string: 附加属性的字符串 'operation_query' 41 | :return: {'a':'b'} --附加属性的dict 42 | """ 43 | if not attach_string or len(attach_string.strip()) == 0: 44 | return {} 45 | attach_array = attach_string.split('__') 46 | assert len(attach_array) % 2 == 0, 'attach_string is error: %s' % attach_string 47 | attach_perperty = defaultdict(dict) 48 | for k in range(len(attach_array) / 2): 49 | if attach_array[k*2] and attach_array[k*2+1]: 50 | attach_perperty[attach_array[k*2]] = attach_array[k*2+1] 51 | return attach_perperty 52 | 53 | 54 | def e(expression): 55 | """ 56 | either 可选, 表明这个experssion是可出现,可不出现的 57 | :param expression: 58 | :return: (expression)? 59 | """ 60 | return '({})?'.format(expression) 61 | 62 | 63 | def o(*args): 64 | """ 65 | OR 表明这2个expression是可选的 66 | :param experssion1: 67 | :param expression2: 68 | :return: 69 | """ 70 | template = '|'.join(args) 71 | return "({})".format(template) 72 | 73 | 74 | def r(expression, min, max): 75 | """ 76 | repeat 表明这个expression可重复的次数 77 | :param expression: 78 | :param min: 最小重复次数 79 | :param max: 最大重复次数 80 | :return: 81 | """ 82 | return "((%s){%s,%s})" % (expression, min, max) 83 | 84 | 85 | def range_tag(length, name=None, start=1): 86 | """ 87 | 返回一个范围字段 88 | :param name: 89 | :param length: 90 | :param start:最小的出现次数 91 | :return: 92 | """ 93 | if name: 94 | return '(?P<%s>(.){%s,%s})' % (name, start, length*3) 95 | return '((.){%s,%s})' % (start, length*3) 96 | 97 | 98 | def range_not_tag(length, tag, name=None, start=1): 99 | """ 100 | 返回一个范围字段,此时抓取的信息不能是tag字段 101 | :param length: 102 | :param tag: 103 | :param name: 104 | :param start: 105 | :return: 106 | """ 107 | if name: 108 | return '(?P<%s>(((?!%s).)*){%s,%s})' % (name, tag, start, length*3) 109 | 110 | 111 | def force_utf8(data, force_key=False): 112 | ''' 113 | 数据转换为utf8 114 | @data: 待转换的数据 115 | @return: utf8编码 116 | ''' 117 | if force_key: 118 | return force_utf8_new(data) 119 | if isinstance(data, unicode): 120 | return data.encode('utf-8') 121 | elif isinstance(data, list): 122 | for idx, i in enumerate(data): 123 | data[idx] = force_utf8(i) 124 | elif isinstance(data, dict): 125 | for i in data: 126 | data[i] = force_utf8(data[i]) 127 | return data 128 | 129 | def force_utf8_new(data): 130 | ''' 131 | 数据转换为utf8,如果是字典key 也需要转化 132 | 数据转换为utf8 133 | @data: 待转换的数据 134 | @return: utf8编码 135 | ''' 136 | if isinstance(data, dict): 137 | return {force_utf8_new(key): force_utf8_new(value) for key, value in data.iteritems()} 138 | elif isinstance(data, list): 139 | return [force_utf8_new(element) for element in data] 140 | elif isinstance(data, tuple): 141 | return [force_utf8_new(element) for element in data] 142 | elif isinstance(data, unicode): 143 | return data.encode('utf-8') 144 | else: 145 | return data --------------------------------------------------------------------------------