├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── config.json.template ├── docs └── images │ ├── csdn.jpg │ ├── red.jpg │ └── wechat_mp.jpg └── jina_sum.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | config.json 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Han Fangyuan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jina_sumary 2 | ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内容 3 | 4 | 支持总结公众号、小红书、csdn等分享卡片链接(有的卡片链接会触发验证,一般直链没有此问题) 5 | 6 | ![wechat_mp](./docs/images/wechat_mp.jpg) 7 | ![red](./docs/images/red.jpg) 8 | ![csdn](./docs/images/csdn.jpg) 9 | 10 | config.json 配置说明 11 | ```bash 12 | { 13 | "jina_reader_base": "https://r.jina.ai", # jina reader链接,默认为https://r.jina.ai 14 | "open_ai_api_base": "https://api.openai.com/v1", # chatgpt chat url 15 | "open_ai_api_key": "sk-xxx", # chatgpt api key 16 | "open_ai_model": "gpt-3.5-turbo", # chatgpt model 17 | "max_words": 8000, # 网页链接内容的最大字数,防止超过最大输入token,使用字符串长度简单计数 18 | "white_url_list": [], # url白名单, 列表为空时不做限制,黑名单优先级大于白名单,即当一个url既在白名单又在黑名单时,黑名单生效 19 | "black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"], # url黑名单,排除不支持总结的视频号等链接 20 | "prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" # 链接内容总结提示词 21 | } 22 | ``` 23 | 24 | 为了尽可能减少用户的配置与使用成本,本仓库只提供了简单链接总结功能,如需要配置追问、总结群聊名单等功能,可以使用 https://github.com/sofs2005/jina_sum 25 | 26 | ## Star History 27 | 28 | [![Star History Chart](https://api.star-history.com/svg?repos=hanfangyuan4396/jina_sum&type=Date)](https://star-history.com/#hanfangyuan4396/jina_sum&Date) 29 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .jina_sum import * 2 | -------------------------------------------------------------------------------- /config.json.template: -------------------------------------------------------------------------------- 1 | { 2 | "jina_reader_base": "https://r.jina.ai", 3 | "open_ai_api_base": "https://api.openai.com/v1", 4 | "open_ai_api_key": "sk-xxx", 5 | "open_ai_model": "gpt-3.5-turbo", 6 | "max_words": 8000, 7 | "white_url_list": [], 8 | "black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"], 9 | "prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" 10 | } 11 | -------------------------------------------------------------------------------- /docs/images/csdn.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanfangyuan4396/jina_sum/838fd8126f0794bd42cfdcf7bf4e8f7889659094/docs/images/csdn.jpg -------------------------------------------------------------------------------- /docs/images/red.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanfangyuan4396/jina_sum/838fd8126f0794bd42cfdcf7bf4e8f7889659094/docs/images/red.jpg -------------------------------------------------------------------------------- /docs/images/wechat_mp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanfangyuan4396/jina_sum/838fd8126f0794bd42cfdcf7bf4e8f7889659094/docs/images/wechat_mp.jpg -------------------------------------------------------------------------------- /jina_sum.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import json 3 | import os 4 | import html 5 | from urllib.parse import urlparse 6 | 7 | import requests 8 | 9 | import plugins 10 | from bridge.context import ContextType 11 | from bridge.reply import Reply, ReplyType 12 | from common.log import logger 13 | from plugins import * 14 | 15 | @plugins.register( 16 | name="JinaSum", 17 | desire_priority=10, 18 | hidden=False, 19 | desc="Sum url link content with jina reader and llm", 20 | version="0.0.1", 21 | author="hanfangyuan", 22 | ) 23 | class JinaSum(Plugin): 24 | 25 | jina_reader_base = "https://r.jina.ai" 26 | open_ai_api_base = "https://api.openai.com/v1" 27 | open_ai_model = "gpt-3.5-turbo" 28 | max_words = 8000 29 | prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n" 30 | white_url_list = [] 31 | black_url_list = [ 32 | "https://support.weixin.qq.com", # 视频号视频 33 | "https://channels-aladin.wxqcloud.qq.com", # 视频号音乐 34 | ] 35 | 36 | def __init__(self): 37 | super().__init__() 38 | try: 39 | self.config = super().load_config() 40 | if not self.config: 41 | self.config = self._load_config_template() 42 | self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base) 43 | self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base) 44 | self.open_ai_api_key = self.config.get("open_ai_api_key", "") 45 | self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model) 46 | self.max_words = self.config.get("max_words", self.max_words) 47 | self.prompt = self.config.get("prompt", self.prompt) 48 | self.white_url_list = self.config.get("white_url_list", self.white_url_list) 49 | self.black_url_list = self.config.get("black_url_list", self.black_url_list) 50 | logger.info(f"[JinaSum] inited, config={self.config}") 51 | self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context 52 | except Exception as e: 53 | logger.error(f"[JinaSum] 初始化异常:{e}") 54 | raise "[JinaSum] init failed, ignore " 55 | 56 | def on_handle_context(self, e_context: EventContext, retry_count: int = 0): 57 | try: 58 | context = e_context["context"] 59 | content = context.content 60 | if context.type != ContextType.SHARING and context.type != ContextType.TEXT: 61 | return 62 | if not self._check_url(content): 63 | logger.debug(f"[JinaSum] {content} is not a valid url, skip") 64 | return 65 | if retry_count == 0: 66 | logger.debug("[JinaSum] on_handle_context. content: %s" % content) 67 | reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结,请稍候...") 68 | channel = e_context["channel"] 69 | channel.send(reply, context) 70 | 71 | target_url = html.unescape(content) # 解决公众号卡片链接校验问题,参考 https://github.com/fatwang2/sum4all/commit/b983c49473fc55f13ba2c44e4d8b226db3517c45 72 | jina_url = self._get_jina_url(target_url) 73 | headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"} 74 | response = requests.get(jina_url, headers=headers, timeout=60) 75 | response.raise_for_status() 76 | target_url_content = response.text 77 | 78 | openai_chat_url = self._get_openai_chat_url() 79 | openai_headers = self._get_openai_headers() 80 | openai_payload = self._get_openai_payload(target_url_content) 81 | logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}") 82 | response = requests.post(openai_chat_url, headers=openai_headers, json=openai_payload, timeout=60) 83 | response.raise_for_status() 84 | result = response.json()['choices'][0]['message']['content'] 85 | reply = Reply(ReplyType.TEXT, result) 86 | e_context["reply"] = reply 87 | e_context.action = EventAction.BREAK_PASS 88 | 89 | except Exception as e: 90 | if retry_count < 3: 91 | logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}") 92 | self.on_handle_context(e_context, retry_count + 1) 93 | return 94 | 95 | logger.exception(f"[JinaSum] {str(e)}") 96 | reply = Reply(ReplyType.ERROR, "我暂时无法总结链接,请稍后再试") 97 | e_context["reply"] = reply 98 | e_context.action = EventAction.BREAK_PASS 99 | 100 | def get_help_text(self, verbose, **kwargs): 101 | return f'使用jina reader和ChatGPT总结网页链接内容' 102 | 103 | def _load_config_template(self): 104 | logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template") 105 | try: 106 | plugin_config_path = os.path.join(self.path, "config.json.template") 107 | if os.path.exists(plugin_config_path): 108 | with open(plugin_config_path, "r", encoding="utf-8") as f: 109 | plugin_conf = json.load(f) 110 | return plugin_conf 111 | except Exception as e: 112 | logger.exception(e) 113 | 114 | def _get_jina_url(self, target_url): 115 | return self.jina_reader_base + "/" + target_url 116 | 117 | def _get_openai_chat_url(self): 118 | return self.open_ai_api_base + "/chat/completions" 119 | 120 | def _get_openai_headers(self): 121 | return { 122 | 'Authorization': f"Bearer {self.open_ai_api_key}", 123 | 'Host': urlparse(self.open_ai_api_base).netloc 124 | } 125 | 126 | def _get_openai_payload(self, target_url_content): 127 | target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断 128 | sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''" 129 | messages = [{"role": "user", "content": sum_prompt}] 130 | payload = { 131 | 'model': self.open_ai_model, 132 | 'messages': messages 133 | } 134 | return payload 135 | 136 | def _check_url(self, target_url: str): 137 | stripped_url = target_url.strip() 138 | # 简单校验是否是url 139 | if not stripped_url.startswith("http://") and not stripped_url.startswith("https://"): 140 | return False 141 | 142 | # 检查白名单 143 | if len(self.white_url_list): 144 | if not any(stripped_url.startswith(white_url) for white_url in self.white_url_list): 145 | return False 146 | 147 | # 排除黑名单,黑名单优先级>白名单 148 | for black_url in self.black_url_list: 149 | if stripped_url.startswith(black_url): 150 | return False 151 | 152 | return True 153 | --------------------------------------------------------------------------------