├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── chat_agent.py ├── main.py ├── search.py └── zh_util.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # QAMaster Demo 2 | ## 构建 AI 资料收集助手 调用 3 | * Google Search 4 | * OepnAI ChatGPT 5 | * 百度 文心一言 6 | * 阿里 通义千问 7 | * Google Bard 8 | 9 | ## 涉及工具 10 | * Airtable 11 | * Serper 12 | 13 | ## 使用前 14 | * 运行前,请务必先自行完善 .env文件中的参数。包括但不限于各类 API key\Secret Key等。 15 | * 如果有疑问, 欢迎 email: 798800165@qq.com 16 | 17 | ## .env文件必要参数 list 18 | * URL_HOT= 19 | * URL_INVITED= 20 | * URL_RECOMMEND= 21 | 22 | * AIRTABLE_TOKEN= 23 | * AIRTABLE_DB_ID= 24 | * AIRTABLE_TABLE_ID= 25 | 26 | * GOOGLE_URL= 27 | * SERPER_API_KEY= 28 | * SERPER_HEADERS= X-API-KEY=, Content-Type= 29 | 30 | * OPENAI_API_KEY= 31 | 32 | * BD_ACCESS_TOKEN= 33 | * BD_SESSION_KEY= 34 | 35 | * ALI_API_KEY= 36 | 37 | * GOOGLE_BARD_TOKEN= 38 | * 1PSID= 39 | * 1PSIDTS= 40 | * 1PSIDCC= 41 | 42 | 43 | ## 视频讲解 44 | * B站 https://www.bilibili.com/video/BV1aj411y7Si/ 45 | * youtube https://youtu.be/HFqkdLqtB40 46 | 47 | ## 小小声明 48 | 49 | ### 本项目代码均为演示 Demo 和授课讲解用,因为不是生产环境使用且时间仓促原因,代码质量请各位包涵,如果实际应用,请务必按照代码规范完善,谢谢 50 | 51 | 52 | -------------------------------------------------------------------------------- /chat_agent.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import json 4 | from http import HTTPStatus 5 | 6 | import bardapi 7 | import requests 8 | from environs import Env 9 | 10 | import openai 11 | from dashscope import Generation 12 | from bardapi import Bard 13 | from bardapi.constants import SESSION_HEADERS 14 | 15 | env = Env() 16 | 17 | class ChatAgent(): 18 | 19 | def __init__(self,env, prompt): 20 | print(f'\n\n======= 调用 Chat Bot =======') 21 | self.prompt = prompt 22 | self.env = env 23 | 24 | def chat_gpt_agent(self): 25 | print(f'\n+++++++++++++++++++++++++++++++') 26 | print(f'++++++ 调用 OpenAI ChatGPT++++++') 27 | print(f'++++++++++++++++++++++++++++++++') 28 | print("提问内容 :"+ self.prompt) 29 | openai.api_key = os.getenv("OPENAI_API_KEY") 30 | 31 | chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", 32 | messages=[{"role": "user", "content": self.prompt}]) 33 | 34 | jsonObj = json.loads(chat_completion.__str__()) 35 | chat_gpt_answer = jsonObj['choices'][0]['message']['content'] 36 | 37 | print(chat_gpt_answer) 38 | #print(chat_completion.__str__()) 39 | 40 | return chat_gpt_answer 41 | 42 | def bd_wxyy_agent(self): 43 | print(f'\n+++++++++++++++++++++++++++++') 44 | print(f'++++++ 调用 百度 文心一言 ++++++') 45 | print(f'+++++++++++++++++++++++++++++') 46 | print("提问内容 :" + self.prompt) 47 | 48 | # url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant?access_token=" + os.getenv("BD_ACCESS_TOKEN") 49 | 50 | url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + os.getenv("BD_ACCESS_TOKEN") 51 | #print(url) 52 | 53 | payload = json.dumps({ 54 | "messages": [ 55 | { 56 | "role": "user", 57 | "content": self.prompt 58 | } 59 | ] 60 | }) 61 | headers = { 62 | 'Content-Type': 'application/json' 63 | } 64 | response = requests.request("POST", url, headers=headers, data=payload) 65 | 66 | # print(response.text) 67 | 68 | jsonObj = json.loads(response.text) 69 | bd_wxyy_answer = jsonObj['result'] 70 | 71 | print(bd_wxyy_answer) 72 | 73 | return bd_wxyy_answer 74 | 75 | 76 | def ali_tyqw_agent(self): 77 | print(f'\n+++++++++++++++++++++++++++++') 78 | print(f'++++++ 调用 阿里 通义千问 ++++++') 79 | print(f'+++++++++++++++++++++++++++++') 80 | print("提问内容 :" + self.prompt) 81 | 82 | messages = [{'role': 'system', 'content': '你是达摩院的知识助手机器人。'}, 83 | {'role': 'user', 'content': self.prompt}] 84 | gen = Generation() 85 | response = gen.call( 86 | Generation.Models.qwen_v1, 87 | api_key=os.getenv("ALI_API_KEY"), 88 | messages=messages, 89 | result_format='message', # set the result is message format. 90 | ) 91 | if response.status_code == HTTPStatus.OK: 92 | #print(response) 93 | jsonObj = json.loads(response.__str__()) 94 | ali_tyqw_answer = jsonObj['output']['choices'][0]['message']['content'] 95 | print(ali_tyqw_answer) 96 | return ali_tyqw_answer 97 | else: 98 | print('Request id: %s, Status code: %s, error code: %s, error message: %s' % ( 99 | response.request_id, response.status_code, 100 | response.code, response.message 101 | )) 102 | 103 | 104 | def google_bard_agent(self): 105 | print(f'\n+++++++++++++++++++++++++++++') 106 | print(f'++++++ 调用 Google Bard ++++++') 107 | print(f'+++++++++++++++++++++++++++++') 108 | print("提问内容 :" + self.prompt) 109 | 110 | bard_token =os.getenv("1PSID") 111 | #print(bard_token) 112 | 113 | session = requests.Session() 114 | session.headers = SESSION_HEADERS 115 | session.cookies.set("__Secure-1PSID", os.getenv("1PSID")) 116 | session.cookies.set("__Secure-1PSIDTS", os.getenv("1PSIDTS")) 117 | session.cookies.set("__Secure-1PSIDCC", os.getenv("1PSIDCC")) 118 | 119 | bard = Bard(token=bard_token, session=session) 120 | response = bard.get_answer(self.prompt).__str__().replace('\'',"\"") 121 | print(response) 122 | return response 123 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import requests 4 | import json 5 | from bs4 import BeautifulSoup 6 | from environs import Env 7 | from pyairtable import Api 8 | from bardapi import Bard 9 | from dashscope import Generation 10 | 11 | from zh_util import * 12 | from search import * 13 | from chat_agent import * 14 | 15 | env = Env() 16 | 17 | # Step1: 获取知乎的问题数据 或者 自定义问题 18 | 19 | # Step2.1: 调用 google 给出答案 20 | 21 | # Step2.2: 调用 openAI 直接给出回答 22 | 23 | # Step2.3: 调用 百度 文心一言 直接给出回答 24 | 25 | # Step2.5 调用 阿里 通义千问 直接给出回答 26 | 27 | # Step2.6: 调用 google bard 直接给出回答 28 | 29 | # Step3: 数据格式化后写入 airtable 30 | 31 | # Step4: 调用插件总结(未完成) 32 | 33 | 34 | 35 | # 获取元数据 36 | def get_question(request_url): 37 | HEADERS = env.dict('HEADERS') 38 | # print(type(HEADERS), HEADERS) 39 | 40 | response = requests.get(url=request_url, headers=HEADERS) 41 | html = response.text 42 | soup = BeautifulSoup(html, 'html.parser') 43 | # print(soup.get_text()) 44 | 45 | return soup.get_text() 46 | 47 | #获取知乎 邀请回答页 数据 48 | def add_inivited_to_airtable(result, table): 49 | jsonObj = json.loads(result) 50 | jsonData = jsonObj['data'] 51 | # print(jsonData) 52 | 53 | for i in range(20): 54 | # print() 55 | jsonQuestion = jsonData[i]['target_source'] 56 | print(str(i+1)+" : "+jsonQuestion['text']+" --- "+jsonQuestion['sub_text']) 57 | 58 | # print(jsonQuestion['sub_text']) 59 | visitCount = ZHUtil.getVisitCount(str(jsonQuestion['sub_text'])) 60 | 61 | #s = Search(env,jsonQuestion['text']) 62 | #google_result = s.doSearch() 63 | # print("所有的搜索结果 :"+google_result) 64 | 65 | #agent = ChatAgent(env, jsonQuestion['text']) 66 | 67 | # 调用 openai 68 | #openai_result = agent.chat_gpt_agent() 69 | # print("\n\nOpenAI ChatGPT:\n"+openai_result) 70 | 71 | # 调用 百度 文心一言 72 | #bd_wxyy_result = agent.bd_wxyy_agent() 73 | # print("\n\n百度 文心一言 :\n"+bd_wxyy_result) 74 | 75 | # 调用 阿里 通义千问 76 | #ali_tyqw_result = agent.ali_tyqw_agent() 77 | # print("\n\n 阿里 通义千问 :\n"+ali_tyqw_result) 78 | 79 | # 调用 google bard 80 | #bard_result = agent.google_bard_agent() 81 | # print("\n\n Google Bard :\n"+bard_result) 82 | 83 | record = table.create({"content": jsonQuestion['text'], "visit": visitCount}, False,True) 84 | print(record) 85 | 86 | # Step2: 数据格式化后写入 airtable 87 | # record = table.create({"content": jsonQuestion['text'], "visit": visitCount, 88 | # "google_response": google_result, 89 | # "openai_response": openai_result, 90 | # "wxyy_response": bd_wxyy_result, 91 | # "tyqw_response": ali_tyqw_result}, False, True) 92 | # 93 | # print("\n\n==============================================") 94 | # print("") 95 | # print("写入 AirTable: " + record.__str__()) 96 | # print("") 97 | # print("==============================================") 98 | 99 | #获取知乎 为你推荐页 数据 100 | def add_recommend_to_airtable(result,table): 101 | jsonObj = json.loads(result) 102 | jsonData = jsonObj['data'] 103 | print(jsonData) 104 | 105 | for i in range(20): 106 | print() 107 | jsonQuestion = jsonData[i]['question'] 108 | # print(str(i+1)+" : "+jsonQuestion['title']+" --- "+ str(jsonQuestion['visit_count'])) 109 | 110 | # Step2: 数据格式化后写入 airtable 111 | record = table.create({"content": jsonQuestion['title'], "visit": str(jsonQuestion['visit_count'])}, False, True) 112 | print(record) 113 | 114 | 115 | #自定义数据 收集数据 116 | def answer_custom_question(question,table): 117 | 118 | #调用 google 搜索 提取前五个答案 119 | s = Search(env, question) 120 | google_result = s.doSearch() 121 | #print("\n\n所有的搜索结果 :\n"+google_result) 122 | 123 | 124 | agent = ChatAgent(env, question) 125 | #调用 openai 126 | openai_result = agent.chat_gpt_agent() 127 | # print("\n\nOpenAI ChatGPT:\n"+openai_result) 128 | 129 | #调用 百度 文心一言 130 | bd_wxyy_result = agent.bd_wxyy_agent() 131 | # print("\n\n百度 文心一言 :\n"+bd_wxyy_result) 132 | 133 | #调用 阿里 通义千问 134 | ali_tyqw_result = agent.ali_tyqw_agent() 135 | # print("\n\n 阿里 通义千问 :\n"+ali_tyqw_result) 136 | 137 | #调用 google bard 138 | #bard_result = agent.google_bard_agent() 139 | # print("\n\n Google Bard :\n"+bard_result) 140 | 141 | #写入 AirTable 142 | record = table.create({"content": question, "visit": "100", 143 | "google_response": google_result, 144 | "openai_response": openai_result, 145 | "wxyy_response":bd_wxyy_result, 146 | "tyqw_response":ali_tyqw_result}, False, True) 147 | 148 | 149 | print("\n\n==============================================") 150 | print("") 151 | print("写入 AirTable: " + record.__str__()) 152 | print("") 153 | print("==============================================") 154 | 155 | 156 | 157 | def controller(): 158 | #获取系统参数 159 | env.read_env() 160 | 161 | #conenect airtable 获取存储数据的 table 162 | airtable_api = Api(env('AIRTABLE_TOKEN')) 163 | table = airtable_api.table(env.str('AIRTABLE_DB_ID'), env.str('AIRTABLE_TABLE_ID')) 164 | 165 | #问题实例 166 | #question = "韩国为什么被称为世界经济金丝雀" 167 | #question = "为什么回不到计划经济了?" 168 | question="描写晚霞的唐诗" 169 | 170 | #有些知乎 问题 在 各个LLM 都找不到答案 请优先使用自定义问题进行测试 171 | # Step1: 获取知乎的问题数据 172 | #invitedResult = get_question(env('URL_INVITED')) 173 | #print("元数据: "+invitedResult) 174 | 175 | # Step2: 数据格式化后写入 airtable 176 | #add_inivited_to_airtable(invitedResult,table) 177 | 178 | # Step1: 获取知乎的问题数据 179 | #recommendResult = get_question(env('URL_RECOMMEND')) 180 | #print(recommendResult) 181 | 182 | # Step2: 数据格式化后写入 airtable 183 | #add_recommend_to_airtable(recommendResult,table) 184 | 185 | 186 | 187 | #自定义问题 先后调用 LLM 和 Google search 188 | #测试请优先使用这个 189 | answer_custom_question(question,table) 190 | 191 | 192 | 193 | def print_message(message): 194 | print(f'***********************************************************') 195 | print(f'======= {message} =======') 196 | print(f'***********************************************************') 197 | print(f'') 198 | 199 | if __name__ == '__main__': 200 | print_message('调用 Google search & LLM 实现 AI 数据收集 Demo') 201 | controller() 202 | 203 | 204 | 205 | -------------------------------------------------------------------------------- /search.py: -------------------------------------------------------------------------------- 1 | from environs import Env 2 | import requests 3 | import json 4 | 5 | env = Env() 6 | class Search(object): 7 | 8 | def __init__(self,env, content): 9 | print(f'======= 调用 google 搜索 =======') 10 | self.content = content 11 | self.env = env 12 | 13 | 14 | # 请求 google API 获取搜索结果 15 | def doSearch(self): 16 | #GOOGLE_URL = https: // google.serper.dev / search 17 | url = env.str('GOOGLE_URL') 18 | # print(url) 19 | print("搜索内容: " + self.content) 20 | 21 | payload = json.dumps({"q": self.content, "gl": "cn", "hl": "zh-cn"}) 22 | 23 | #Serper的 API Key 包含在 headers 中 24 | response = requests.request("POST", url, headers=env.dict('SERPER_HEADERS'), data=payload) 25 | # print(response.text) 26 | 27 | allAnswsers = self.getResult(response.text) 28 | 29 | return allAnswsers 30 | 31 | # 获取搜索结果 32 | def getResult(self,result): 33 | jsonObj = json.loads(result) 34 | jsonData = jsonObj['organic'] 35 | #(jsonData) 36 | 37 | # 获取前 5 个搜索结果 38 | allAnswsers = "" 39 | for i in range(5): 40 | answser = jsonData[i]['snippet'] + " --- "+ jsonData[i]['link'] 41 | print("搜索结果:" + answser) 42 | allAnswsers+=answser+"\n\n" 43 | 44 | return allAnswsers 45 | -------------------------------------------------------------------------------- /zh_util.py: -------------------------------------------------------------------------------- 1 | 2 | class ZHUtil: 3 | 4 | # 只保留 浏览数据 5 | @staticmethod 6 | def getVisitCount(visitCountRaw): 7 | # print(visitCountRaw) 8 | 9 | pos1 = visitCountRaw.index("浏览") 10 | visitCountStr = (str(visitCountRaw))[0:pos1] 11 | 12 | if ("万" in visitCountStr): 13 | pos2 = visitCountStr.index("万") 14 | realVisitCount = (str(visitCountStr))[0:pos2-1] 15 | # print(realVisitCount) 16 | # print(type(realVisitCount)) 17 | return str(int(float(realVisitCount)*10000)) 18 | 19 | return visitCountStr 20 | 21 | 22 | # 把含有 "万"字的 转换为 实际数字 23 | def caculateRealVisitCount(visitCount): 24 | if("万" in visitCount): 25 | pos = visitCount.index("万") 26 | visitCountStr = (str(visitCount))[0:pos] 27 | 28 | return int(visitCountStr)*10000 29 | 30 | --------------------------------------------------------------------------------