├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── config.json.template
├── docs
    └── images
    │   ├── csdn.jpg
    │   ├── red.jpg
    │   └── wechat_mp.jpg
└── jina_sum.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | config.json
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Han Fangyuan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # jina_sumary
 2 | ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内容
 3 | 
 4 | 支持总结公众号、小红书、csdn等分享卡片链接(有的卡片链接会触发验证，一般直链没有此问题)
 5 | 
 6 | ![wechat_mp](./docs/images/wechat_mp.jpg)
 7 | ![red](./docs/images/red.jpg)
 8 | ![csdn](./docs/images/csdn.jpg)
 9 | 
10 | config.json 配置说明
11 | ```bash
12 | {
13 |   "jina_reader_base": "https://r.jina.ai",           # jina reader链接，默认为https://r.jina.ai
14 |   "open_ai_api_base": "https://api.openai.com/v1",   # chatgpt chat url
15 |   "open_ai_api_key":  "sk-xxx",                      # chatgpt api key
16 |   "open_ai_model": "gpt-3.5-turbo",                  # chatgpt model
17 |   "max_words": 8000,                                 # 网页链接内容的最大字数，防止超过最大输入token，使用字符串长度简单计数
18 |   "white_url_list": [],                              # url白名单, 列表为空时不做限制，黑名单优先级大于白名单，即当一个url既在白名单又在黑名单时，黑名单生效
19 |   "black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"],  # url黑名单，排除不支持总结的视频号等链接
20 |   "prompt": "我需要对下面的文本进行总结，总结输出包括以下三个部分：\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。"                           # 链接内容总结提示词
21 | }
22 | ```
23 | 
24 | 为了尽可能减少用户的配置与使用成本，本仓库只提供了简单链接总结功能，如需要配置追问、总结群聊名单等功能，可以使用 https://github.com/sofs2005/jina_sum
25 | 
26 | ## Star History
27 | 
28 | [![Star History Chart](https://api.star-history.com/svg?repos=hanfangyuan4396/jina_sum&type=Date)](https://star-history.com/#hanfangyuan4396/jina_sum&Date)
29 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .jina_sum import *
2 | 


--------------------------------------------------------------------------------
/config.json.template:
--------------------------------------------------------------------------------
 1 | {
 2 |   "jina_reader_base": "https://r.jina.ai",
 3 |   "open_ai_api_base": "https://api.openai.com/v1",
 4 |   "open_ai_api_key":  "sk-xxx",
 5 |   "open_ai_model": "gpt-3.5-turbo",
 6 |   "max_words": 8000,
 7 |   "white_url_list": [],
 8 |   "black_url_list": ["https://support.weixin.qq.com", "https://channels-aladin.wxqcloud.qq.com"],
 9 |   "prompt": "我需要对下面的文本进行总结，总结输出包括以下三个部分：\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。"
10 | }
11 | 


--------------------------------------------------------------------------------
/docs/images/csdn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanfangyuan4396/jina_sum/838fd8126f0794bd42cfdcf7bf4e8f7889659094/docs/images/csdn.jpg


--------------------------------------------------------------------------------
/docs/images/red.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanfangyuan4396/jina_sum/838fd8126f0794bd42cfdcf7bf4e8f7889659094/docs/images/red.jpg


--------------------------------------------------------------------------------
/docs/images/wechat_mp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanfangyuan4396/jina_sum/838fd8126f0794bd42cfdcf7bf4e8f7889659094/docs/images/wechat_mp.jpg


--------------------------------------------------------------------------------
/jina_sum.py:
--------------------------------------------------------------------------------
  1 | # encoding:utf-8
  2 | import json
  3 | import os
  4 | import html
  5 | from urllib.parse import urlparse
  6 | 
  7 | import requests
  8 | 
  9 | import plugins
 10 | from bridge.context import ContextType
 11 | from bridge.reply import Reply, ReplyType
 12 | from common.log import logger
 13 | from plugins import *
 14 | 
 15 | @plugins.register(
 16 |     name="JinaSum",
 17 |     desire_priority=10,
 18 |     hidden=False,
 19 |     desc="Sum url link content with jina reader and llm",
 20 |     version="0.0.1",
 21 |     author="hanfangyuan",
 22 | )
 23 | class JinaSum(Plugin):
 24 | 
 25 |     jina_reader_base = "https://r.jina.ai"
 26 |     open_ai_api_base = "https://api.openai.com/v1"
 27 |     open_ai_model = "gpt-3.5-turbo"
 28 |     max_words = 8000
 29 |     prompt = "我需要对下面引号内文档进行总结，总结输出包括以下三个部分：\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"
 30 |     white_url_list = []
 31 |     black_url_list = [
 32 |         "https://support.weixin.qq.com", # 视频号视频
 33 |         "https://channels-aladin.wxqcloud.qq.com", # 视频号音乐
 34 |     ]
 35 | 
 36 |     def __init__(self):
 37 |         super().__init__()
 38 |         try:
 39 |             self.config = super().load_config()
 40 |             if not self.config:
 41 |                 self.config = self._load_config_template()
 42 |             self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base)
 43 |             self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base)
 44 |             self.open_ai_api_key = self.config.get("open_ai_api_key", "")
 45 |             self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
 46 |             self.max_words = self.config.get("max_words", self.max_words)
 47 |             self.prompt = self.config.get("prompt", self.prompt)
 48 |             self.white_url_list = self.config.get("white_url_list", self.white_url_list)
 49 |             self.black_url_list = self.config.get("black_url_list", self.black_url_list)
 50 |             logger.info(f"[JinaSum] inited, config={self.config}")
 51 |             self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
 52 |         except Exception as e:
 53 |             logger.error(f"[JinaSum] 初始化异常：{e}")
 54 |             raise "[JinaSum] init failed, ignore "
 55 | 
 56 |     def on_handle_context(self, e_context: EventContext, retry_count: int = 0):
 57 |         try:
 58 |             context = e_context["context"]
 59 |             content = context.content
 60 |             if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
 61 |                 return
 62 |             if not self._check_url(content):
 63 |                 logger.debug(f"[JinaSum] {content} is not a valid url, skip")
 64 |                 return
 65 |             if retry_count == 0:
 66 |                 logger.debug("[JinaSum] on_handle_context. content: %s" % content)
 67 |                 reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结，请稍候...")
 68 |                 channel = e_context["channel"]
 69 |                 channel.send(reply, context)
 70 | 
 71 |             target_url = html.unescape(content) # 解决公众号卡片链接校验问题，参考 https://github.com/fatwang2/sum4all/commit/b983c49473fc55f13ba2c44e4d8b226db3517c45
 72 |             jina_url = self._get_jina_url(target_url)
 73 |             headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"}
 74 |             response = requests.get(jina_url, headers=headers, timeout=60)
 75 |             response.raise_for_status()
 76 |             target_url_content = response.text
 77 | 
 78 |             openai_chat_url = self._get_openai_chat_url()
 79 |             openai_headers = self._get_openai_headers()
 80 |             openai_payload = self._get_openai_payload(target_url_content)
 81 |             logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}")
 82 |             response = requests.post(openai_chat_url, headers=openai_headers, json=openai_payload, timeout=60)
 83 |             response.raise_for_status()
 84 |             result = response.json()['choices'][0]['message']['content']
 85 |             reply = Reply(ReplyType.TEXT, result)
 86 |             e_context["reply"] = reply
 87 |             e_context.action = EventAction.BREAK_PASS
 88 | 
 89 |         except Exception as e:
 90 |             if retry_count < 3:
 91 |                 logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}")
 92 |                 self.on_handle_context(e_context, retry_count + 1)
 93 |                 return
 94 | 
 95 |             logger.exception(f"[JinaSum] {str(e)}")
 96 |             reply = Reply(ReplyType.ERROR, "我暂时无法总结链接，请稍后再试")
 97 |             e_context["reply"] = reply
 98 |             e_context.action = EventAction.BREAK_PASS
 99 | 
100 |     def get_help_text(self, verbose, **kwargs):
101 |         return f'使用jina reader和ChatGPT总结网页链接内容'
102 | 
103 |     def _load_config_template(self):
104 |         logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template")
105 |         try:
106 |             plugin_config_path = os.path.join(self.path, "config.json.template")
107 |             if os.path.exists(plugin_config_path):
108 |                 with open(plugin_config_path, "r", encoding="utf-8") as f:
109 |                     plugin_conf = json.load(f)
110 |                     return plugin_conf
111 |         except Exception as e:
112 |             logger.exception(e)
113 | 
114 |     def _get_jina_url(self, target_url):
115 |         return self.jina_reader_base + "/" + target_url
116 | 
117 |     def _get_openai_chat_url(self):
118 |         return self.open_ai_api_base + "/chat/completions"
119 | 
120 |     def _get_openai_headers(self):
121 |         return {
122 |             'Authorization': f"Bearer {self.open_ai_api_key}",
123 |             'Host': urlparse(self.open_ai_api_base).netloc
124 |         }
125 | 
126 |     def _get_openai_payload(self, target_url_content):
127 |         target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断
128 |         sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''"
129 |         messages = [{"role": "user", "content": sum_prompt}]
130 |         payload = {
131 |             'model': self.open_ai_model,
132 |             'messages': messages
133 |         }
134 |         return payload
135 | 
136 |     def _check_url(self, target_url: str):
137 |         stripped_url = target_url.strip()
138 |         # 简单校验是否是url
139 |         if not stripped_url.startswith("http://") and not stripped_url.startswith("https://"):
140 |             return False
141 | 
142 |         # 检查白名单
143 |         if len(self.white_url_list):
144 |             if not any(stripped_url.startswith(white_url) for white_url in self.white_url_list):
145 |                 return False
146 | 
147 |         # 排除黑名单，黑名单优先级>白名单
148 |         for black_url in self.black_url_list:
149 |             if stripped_url.startswith(black_url):
150 |                 return False
151 | 
152 |         return True
153 | 


--------------------------------------------------------------------------------