├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   └── feature_request.yml
├── .gitignore
├── LICENSE
├── README.md
├── _conf_schema.json
├── constant.py
├── fonts
    ├── LXGWWenKai-Regular.ttf
    └── OFL.txt
├── main.py
├── metadata.yaml
├── requirements.txt
├── stop_words.txt
├── utils.py
└── wordcloud_core
    ├── __init__.py
    ├── generator.py
    ├── history_manager.py
    └── scheduler.py


/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
  1 | name: 错误报告 (Bug Report)
  2 | description: 提交 CloudRank 插件的错误
  3 | title: "[BUG] <请在此处填写你遇到的问题>"
  4 | labels: ["bug"]
  5 | assignees:
  6 |   - GEMILUXVII
  7 | body:
  8 |   - type: markdown
  9 |     attributes:
 10 |       value: |
 11 |         请尽可能详细地描述问题，以便能更快地定位并修复它。
 12 | 
 13 |   - type: checkboxes
 14 |     attributes:
 15 |       label: 提交前确认
 16 |       description: 在提交此错误报告前，请确认以下内容：
 17 |       options:
 18 |         - label: 我已仔细阅读过项目的 README.md 文档，确认其中没有关于此错误的说明或解决方案，并且我也已检查过现有的 Issues，未发现重复报告。
 19 |           required: true
 20 | 
 21 |   - type: textarea
 22 |     id: bug-description
 23 |     attributes:
 24 |       label: 错误描述 (Bug Description)
 25 |       description: 请清晰简洁地描述遇到的错误
 26 |       placeholder: 例如：“当我尝试使用 /wordcloud 命令时，插件崩溃了，并且机器人没有发送任何词云图片”
 27 |     validations:
 28 |       required: true
 29 | 
 30 |   - type: textarea
 31 |     id: steps-to-reproduce
 32 |     attributes:
 33 |       label: 复现步骤 (Steps to Reproduce)
 34 |       description: 请详细说明如何复现这个错误
 35 |       placeholder: |
 36 |         1. 在群聊 X 中发送消息...
 37 |         2. 输入命令 `/wordcloud 3`...
 38 |         3. 观察到机器人没有任何回复/机器人回复了错误信息...
 39 |         4. （其他相关步骤）
 40 |     validations:
 41 |       required: true
 42 | 
 43 |   - type: textarea
 44 |     id: expected-behavior
 45 |     attributes:
 46 |       label: 期望行为 (Expected Behavior)
 47 |       description: 请描述在上述步骤之后，期望发生什么
 48 |       placeholder: 例如：“机器人应该发送一张包含最近3天聊天内容的词云图片”
 49 |     validations:
 50 |       required: true
 51 | 
 52 |   - type: textarea
 53 |     id: actual-behavior
 54 |     attributes:
 55 |       label: 实际行为 (Actual Behavior)
 56 |       description: 请描述实际发生了什么
 57 |       placeholder: 例如：“机器人没有任何回复，控制台输出了 XXX 错误”
 58 |     validations:
 59 |       required: true
 60 | 
 61 |   - type: textarea
 62 |     id: screenshots-logs
 63 |     attributes:
 64 |       label: 截图/日志 (Screenshots/Logs)
 65 |       description: |
 66 |         如果适用，请在此处添加截图或日志以帮助解释问题
 67 |         对于日志，请开启插件的 `debug_mode` (如果问题与运行时错误相关) 并复制相关的日志片段
 68 |         **重要提示：** 请确保在上传截图或日志前，已移除或遮盖所有个人身份信息 (PII) 或其他敏感数据！
 69 |       placeholder: |
 70 |         （在此处粘贴截图或日志）
 71 |         ```log
 72 |         [时间戳] [级别] 详细的错误日志...
 73 |         ```
 74 |     validations:
 75 |       required: false
 76 | 
 77 |   - type: input
 78 |     id: plugin-version
 79 |     attributes:
 80 |       label: 插件版本 (CloudRank Version)
 81 |       description: 正在使用的 CloudRank 插件版本是多少？ (例如 v1.3.6)
 82 |       placeholder: "例如：v1.3.6"
 83 |     validations:
 84 |       required: true
 85 | 
 86 |   - type: input
 87 |     id: astrbot-version
 88 |     attributes:
 89 |       label: AstrBot 版本 (AstrBot Version)
 90 |       description: 正在使用的 AstrBot 版本是多少？
 91 |       placeholder: "例如：v3.6.8"
 92 |     validations:
 93 |       required: true
 94 | 
 95 |   - type: input
 96 |     id: python-version
 97 |     attributes:
 98 |       label: Python 版本 (Python Version)
 99 |       description: 使用的 Python 版本是多少？
100 |       placeholder: "例如：3.11.11"
101 |     validations:
102 |       required: true
103 | 
104 |   - type: dropdown
105 |     id: os
106 |     attributes:
107 |       label: 操作系统 (Operating System)
108 |       description: 在哪个操作系统上运行 AstrBot 和插件？
109 |       options:
110 |         - Windows
111 |         - Linux (请在下方“其他信息”中注明发行版)
112 |         - macOS
113 |         - Docker (请在下方“其他信息”中注明基础镜像)
114 |         - 其他 (请在下方“其他信息”中注明)
115 |     validations:
116 |       required: true
117 | 
118 |   - type: textarea
119 |     id: relevant-config
120 |     attributes:
121 |       label: 相关配置 (Relevant Configuration)
122 |       description: |
123 |         请列出可能与此错误相关的 CloudRank 插件配置项及其值
124 |         例如：`font_path`, `custom_mask_path`, `enabled_group_list`, `auto_generate_cron` 等
125 |         **请不要泄露敏感信息，如 API 密钥或密码**
126 |       placeholder: |
127 |         enabled_group_list: "123456789"
128 |         font_path: "my_custom_font.ttf"
129 |         custom_mask_path: "mask.png"
130 |         # 其他可能相关的配置...
131 |     validations:
132 |       required: false
133 | 
134 |   - type: textarea
135 |     id: additional-context
136 |     attributes:
137 |       label: 其他信息 (Additional Context)
138 |       description: 在此处添加有关该问题的任何其他上下文或备注例如，问题是间歇性出现还是稳定复现？是否尝试过其他排查步骤？
139 |       placeholder: "例如：这个问题只在特定群聊中出现我尝试重启了 AstrBot 但问题依旧"
140 |     validations:
141 |       required: false
142 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: 功能请求 (Feature Request)
 2 | description: 为 CloudRank 插件提出新功能或改进建议
 3 | title: "[FEATURE] <简述功能建议>"
 4 | labels: ["enhancement"]
 5 | assignees:
 6 |   - GEMILUXVII
 7 | body:
 8 |   - type: markdown
 9 |     attributes:
10 |       value: |
11 |         感谢为 CloudRank 插件提出功能建议，请详细描述你的想法
12 | 
13 |   - type: checkboxes
14 |     id: readme-checked
15 |     attributes:
16 |       label: README 阅读确认
17 |       description: 提交此功能请求前，请确认：
18 |       options:
19 |         - label: 我已阅读项目的 README.md 文档，确认所提功能在现有版本中无法实现，或现有实现存在不足
20 |           required: true
21 | 
22 |   - type: textarea
23 |     id: problem-related
24 |     attributes:
25 |       label: 此功能请求是否与某个现有问题相关？ (选填)
26 |       description: |
27 |         清晰简洁地描述相关问题如果只是一个新点子，而非解决特定痛点，可简单说明或跳过
28 |         例如：“目前排行榜的发送时间与词云绑定，不够灵活”
29 |         或者：“希望词云有更多预设形状”
30 |       placeholder: "例如：当前 [...] 方面存在不便"
31 | 
32 |   - type: textarea
33 |     id: solution-description
34 |     attributes:
35 |       label: 描述期望的解决方案或新功能 (必填)
36 |       description: 清晰简洁地描述希望实现的功能及其理想工作方式
37 |       placeholder: |
38 |         例如：“增加配置项，允许独立设置词云的字体字形”
39 |     validations:
40 |       required: true
41 | 
42 |   - type: textarea
43 |     id: alternatives-considered
44 |     attributes:
45 |       label: 是否考虑过其他替代方案？ (选填)
46 |       description: 清晰简洁地描述在提出此建议前，是否考虑过其他替代方案或类似功能
47 |       placeholder: "例如：曾尝试调整每日词云的 CRON 表达式，但这会同时影响词云生成时间，并非理想方案"
48 | 
49 |   - type: textarea
50 |     id: additional-context
51 |     attributes:
52 |       label: 补充信息 (选填)
53 |       description: |
54 |         在此添加关于此功能请求的其他上下文、使用场景、预期益处、可能的实现思路（若方便）或相关截图
55 |       placeholder: |
56 |         例如：“此功能有助于群管理员更灵活地管理社群，并及时激励活跃用户”
57 |         （可在此附上相关的草图或参考示例截图）
58 | 
59 |   - type: markdown
60 |     attributes:
61 |       value: |
62 |         感谢你的建议！
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | wordcloud_core/__pycache__/__init__.cpython-310.pyc
2 | wordcloud_core/__pycache__/generator.cpython-310.pyc
3 | .github/copilot-instructions.md
4 | __pycache__/constant.cpython-310.pyc
5 | __pycache__/main.cpython-310.pyc
6 | __pycache__/utils.cpython-310.pyc
7 | wordcloud_core/__pycache__/history_manager.cpython-310.pyc
8 | wordcloud_core/__pycache__/scheduler.cpython-310.pyc
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published
637 |     by the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <https://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # <div align="center"> ☁️ CloudRank </div>
  2 | 
  3 | <div align="center"> <em> 智能词云分析 · 聊天热度排行 </em> </div>
  4 | 
  5 | <br>
  6 | 
  7 | <div align="center">  <a href="#更新日志"> <img src="https://img.shields.io/badge/version-v2.0.1-9644F4?style=for-the-badge" alt="Version"></a>
  8 |   <a href="https://github.com/GEMILUXVII/astrbot_plugin_cloudrank/blob/main/LICENSE"> <img src="https://img.shields.io/badge/license-AGPL--3.0-E53935?style=for-the-badge" alt="License"></a>
  9 |   <a href="https://www.python.org/downloads/"> <img src="https://img.shields.io/badge/Python-3.10+-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="Python Version"></a>
 10 |   <a href="https://github.com/AstrBotDevs/AstrBot"> <img src="https://img.shields.io/badge/AstrBot-Compatible-00BFA5?style=for-the-badge&logo=robot&logoColor=white" alt="AstrBot Compatible"></a>
 11 | </div>
 12 | 
 13 | <div align="center">
 14 |   <a href="https://github.com/botuniverse/onebot-11"> <img src="https://img.shields.io/badge/OneBotv11-AIOCQHTTP-FF9800?style=for-the-badge&logo=qq&logoColor=white" alt="OneBot v11 Support"></a>
 15 |   <a href="https://github.com/WeChatPadPro/WeChatPadPro"> <img src="https://img.shields.io/badge/WeChat-PadPro-07C160?style=for-the-badge&logo=wechat&logoColor=white" alt="WeChatPadPro Support"></a>
 16 |   <a href="https://github.com/GEMILUXVII/astrbot_plugin_cloudrank/commits/main"> <img src="https://img.shields.io/badge/updated-2025--09--13-0097A7?style=for-the-badge&logo=calendar&logoColor=white" alt="Last Updated"></a>
 17 | </div>
 18 | 
 19 | ## ◆ 介绍
 20 | 
 21 | CloudRank 插件是一款用于 AstrBot 的插件，能够将群聊或私聊中的文本消息进行分析，并生成美观的词云图像。通过词云，用户可以直观地了解一段时间内聊天内容的关键词和热点话题。插件同时提供用户活跃度排名功能，展示群内最活跃的成员。插件支持自动定时生成和手动触发生成，并提供了丰富的配置选项，让您可以定制个性化的词云和排名显示。
 22 | 
 23 | 现已完全适配 AstrBot 版本 v4.0.0 及以上，经本地测试验证，各项功能运行正常。插件使用现代异步 SQLAlchemy ORM 进行数据库操作，确保了数据访问的高效性和可靠性。所有数据库操作都通过 AstrBot 提供的中心化数据库管理系统进行，无需额外的数据库配置。
 24 | 
 25 | ## ◆ 功能特性
 26 | 
 27 | - **定时自动生成**：支持 Cron 表达式配置，定时为指定群聊或所有启用的会话生成词云
 28 | - **每日词云**：可在每日固定时间生成当天的聊天词云并推送到指定群聊，可自定义标题
 29 | - **手动触发生成**：用户可以通过命令手动生成指定天数内的聊天词云
 30 | - **多种视觉定制**：
 31 |   - **背景颜色**：自定义词云图片的背景色
 32 |   - **配色方案**：选择不同的预设配色方案，改变词语的颜色分布
 33 |   - **字体**：支持指定自定义字体文件，解决特殊字符显示问题或实现特定视觉风格
 34 |   - **形状**：支持预设形状（如圆形、矩形、菱形、三角形），更重要的是支持通过 **自定义蒙版图片 (`custom_mask_path`)** 来定义任意词云轮廓
 35 | - **灵活的配置管理**：
 36 |   - **群聊启用/禁用**：可以指定哪些群聊启用词云功能
 37 |   - **词语过滤**：设置最小词长度、最大词数量
 38 |   - **停用词**：支持自定义停用词列表，过滤常见但无意义的词语
 39 |   - **机器人消息统计**：可配置是否将机器人自身发送的消息计入词云统计 (`include_bot_messages`)
 40 | - **用户活跃度排行**：
 41 |   - 词云生成后自动显示群内活跃用户排行榜
 42 |   - 可自定义排行显示人数和奖牌样式
 43 |   - 显示用户名称和发言贡献度
 44 | - **消息历史记录**：插件会自动记录消息用于分析，用户无需额外操作
 45 | - **易于使用**：提供简洁的命令进行交互
 46 | - **调试模式**：可选的详细日志输出，方便排查问题
 47 | 
 48 | ## ◆ 系统要求
 49 | 
 50 | - **AstrBot 版本**：v4.0.0 及以上
 51 |   - 使用新版异步 ORM 数据库系统
 52 |   - 无需额外的数据库配置
 53 | - **Python 版本**：3.10+
 54 |   - 支持异步特性
 55 |   - 兼容新版 SQLAlchemy
 56 | 
 57 | ## ◆ 平台支持
 58 | 
 59 | CloudRank 插件基于 AstrBot 平台开发：
 60 | 
 61 | - **QQ**：支持 QQ 群聊的词云生成
 62 | - **微信**：支持基于 WeChatPadPro 微信群聊的词云生成
 63 | 
 64 | ## ◆ 安装方法
 65 | 
 66 | 1. **下载插件**:
 67 |    - 通过 `git clone https://github.com/GEMILUXVII/astrbot_plugin_cloudrank.git` 克隆仓库到本地
 68 | 2. **放置插件文件**:
 69 |    - 解压下载的压缩包
 70 |    - 将整个插件文件夹 ( `CloudRank`) 复制到 AstrBot 的插件目录: `AstrBot/data/plugins/`
 71 |    - 最终路径应为 `AstrBot/data/plugins/cloudrank/`
 72 | 3. **安装依赖**:
 73 |    - 打开终端或命令行，进入插件目录: `cd AstrBot/data/plugins/cloudrank/`
 74 |    - 安装所需的 Python 包: `pip install -r requirements.txt`
 75 | 4. **重启 AstrBot**:
 76 |    - 完全重启 AstrBot 以加载新插件
 77 | 5. **配置插件**:
 78 |    - 在 AstrBot 的插件管理界面找到 "CloudRank" 插件，进行相关配置
 79 | 
 80 | ## ◆ 配置说明
 81 | 
 82 | 插件的配置通过 `_conf_schema.json` 文件定义，您可以在 AstrBot 后台的插件配置页面进行修改。以下是主要的配置项及其说明：
 83 | 
 84 | <table width="100%">
 85 |   <tr>
 86 |     <th width="15%"> 配置项 </th>
 87 |     <th width="8%"> 类型 </th>
 88 |     <th width="25%"> 描述 </th>
 89 |     <th width="12%"> 默认值 </th>
 90 |     <th width="40%"> 效果说明 </th>
 91 |   </tr>
 92 |   <tr>
 93 |     <td> <code> auto_generate_enabled </code> </td>
 94 |     <td> <code> bool </code> </td>
 95 |     <td> 是否启用自动生成词云功能 </td>
 96 |     <td> <code> true </code> </td>
 97 |     <td> <code> true </code> 时，插件会根据 <code> auto_generate_cron </code> 的设置定时生成词云 </td>
 98 |   </tr>
 99 |   <tr>
100 |     <td> <code> auto_generate_cron </code> </td>
101 |     <td> <code> string </code> </td>
102 |     <td> 自动生成词云的 CRON 表达式 </td>
103 |     <td> <code> 0 20 * * *</code> </td>
104 |     <td> 标准 CRON 格式 (<code> 分 时 日 月 周 </code>)。例如，默认值表示每天晚上 20:00 执行 </td>
105 |   </tr>
106 |   <tr>
107 |     <td> <code> timezone </code> </td>
108 |     <td> <code> string </code> </td>
109 |     <td> 自定义插件使用的时区 </td>
110 |     <td> <code> Asia/Shanghai </code> </td>
111 |     <td> 有效的 IANA 时区名称，例如 `Asia/Shanghai`, `Europe/London`, `America/New_York`, 或者 `UTC` </td>
112 |   </tr>
113 |   <tr>
114 |     <td> <code> daily_generate_enabled </code> </td>
115 |     <td> <code> bool </code> </td>
116 |     <td> 是否启用每日词云生成功能 </td>
117 |     <td> <code> true </code> </td>
118 |     <td> <code> true </code> 时，插件会根据 <code> daily_generate_time </code> 的设置每日生成词云 </td>
119 |   </tr>
120 |   <tr>
121 |     <td> <code> daily_generate_time </code> </td>
122 |     <td> <code> string </code> </td>
123 |     <td> 每日词云的生成时间 </td>
124 |     <td> <code> 23:30 </code> </td>
125 |     <td> 格式为 <code> HH: MM </code>。例如，<code> 23:30 </code> 表示每天晚上 11 点 30 分 </td>
126 |   </tr>
127 |   <tr>
128 |     <td> <code> daily_summary_title </code> </td>
129 |     <td> <code> string </code> </td>
130 |     <td> 每日词云图片的标题模板 </td>
131 |     <td> <code> "{date} {group_name} 今日词云" </code> </td>
132 |     <td> 支持占位符: <code>{date}</code> (当前日期), <code>{group_name}</code> (群聊名称)</td>
133 |   </tr>
134 |   <tr>
135 |     <td> <code> enabled_group_list </code> </td>
136 |     <td> <code> string </code> </td>
137 |     <td> 启用词云功能的群聊列表 </td>
138 |     <td> <code> "" </code> (空字符串)</td>
139 |     <td> 以英文逗号分隔的群号列表，例如 <code> 123456789,987654321 </code>。仅在此处填写的群号才会启用词云功能。如果留空，则默认所有群聊都不启用词云功能 </td>
140 |   </tr>
141 |   <tr>
142 |     <td> <code> history_days </code> </td>
143 |     <td> <code> int </code> </td>
144 |     <td> 手动生成词云时，默认统计的历史消息天数 </td>
145 |     <td> <code> 7 </code> </td>
146 |     <td> 当用户使用 <code>/wordcloud </code> 命令且未指定天数时，将使用此值 </td>
147 |   </tr>
148 |   <tr>
149 |     <td> <code> max_word_count </code> </td>
150 |     <td> <code> int </code> </td>
151 |     <td> 词云图片中显示的最大词语数量 </td>
152 |     <td> <code> 100 </code> </td>
153 |     <td> 控制词云的密集程度和信息量。建议值在 50 到 200 之间 </td>
154 |   </tr>
155 |   <tr>
156 |     <td> <code> min_word_length </code> </td>
157 |     <td> <code> int </code> </td>
158 |     <td> 参与词频统计的最小词语长度 </td>
159 |     <td> <code> 2 </code> </td>
160 |     <td> 小于此长度的词语（通常是单个字或无意义的短词）将被忽略 </td>
161 |   </tr>
162 |   <tr>
163 |     <td> <code> min_word_frequency </code> </td>
164 |     <td> <code> int </code> </td>
165 |     <td> 最小词频 </td>
166 |     <td> <code> 1 </code> </td>
167 |     <td> 出现次数低于此值的词将被过滤，以优化词云视觉效果，设为 1 则不过滤 </td>
168 |   </tr>
169 |   <tr>
170 |     <td> <code> min_font_size </code> </td>
171 |     <td> <code> int </code> </td>
172 |     <td> 词云中最小字体大小 </td>
173 |     <td> <code> 8 </code> </td>
174 |     <td> 控制低频词汇的最小显示字体大小，与 max_font_size 配合调整词云的字体大小对比度 </td>
175 |   </tr>
176 |   <tr>
177 |     <td> <code> max_font_size </code> </td>
178 |     <td> <code> int </code> </td>
179 |     <td> 词云中最大字体大小 </td>
180 |     <td> <code> 170 </code> </td>
181 |     <td> 控制高频词汇的最大显示字体大小，与 min_font_size 配合调整词云的字体大小对比度，使高频词更加突出 </td>
182 |   </tr>
183 |   <tr>
184 |     <td> <code> background_color </code> </td>
185 |     <td> <code> string </code> </td>
186 |     <td> 词云图片的背景颜色 </td>
187 |     <td> <code> white </code> </td>
188 |     <td> 可以是颜色名称 (如 <code> white </code>, <code> black </code>, <code> lightyellow </code>) 或十六进制颜色代码 (如 <code>#FFFFFF </code>)</td>
189 |   </tr>
190 |   <tr>
191 |     <td> <code> colormap </code> </td>
192 |     <td> <code> string </code> </td>
193 |     <td> 词云的配色方案，决定词语的颜色 </td>
194 |     <td> <code> viridis </code> </td>
195 |     <td> 不同的 Colormap 会给词云带来完全不同的视觉风格。可选值包括: <code> viridis </code>, <code> plasma </code>, <code> inferno </code>, <code> rainbow </code>, <code> jet </code> 等 </td>
196 |   </tr>
197 |   <tr>
198 |     <td> <code> font_path </code> </td>
199 |     <td> <code> string </code> </td>
200 |     <td> 自定义字体文件的路径 </td>
201 |     <td> <code> "" </code> (空字符串)</td>
202 |     <td> 如果留空，插件会尝试使用内置的默认字体 (通常是霞鹜文楷) 或系统字体。可指定 <code>.ttf </code> 或 <code>.otf </code> 字体文件 </td>
203 |   </tr>
204 |   <tr>
205 |     <td> <code> stop_words_file </code> </td>
206 |     <td> <code> string </code> </td>
207 |     <td> 停用词文件的路径 </td>
208 |     <td> <code> stop_words.txt </code> </td>
209 |     <td> 指定一个文本文件，每行包含一个要忽略的词语。路径相对于插件 <code> resources/</code> 目录或绝对路径 </td>
210 |   </tr>
211 |   <tr>
212 |     <td> <code> include_bot_messages </code> </td>
213 |     <td> <code> bool </code> </td>
214 |     <td> 是否将机器人自身的消息计入词云统计 </td>
215 |     <td> <code> false </code> </td>
216 |     <td> <code> true </code> 时，机器人自己发送的消息也会被用于生成词云。默认为关闭 </td>
217 |   </tr>
218 |   <tr>
219 |     <td> <code> shape </code> </td>
220 |     <td> <code> string </code> </td>
221 |     <td> 词云的预设形状 </td>
222 |     <td> <code> rectangle </code> </td>
223 |     <td> 支持 <code> rectangle </code> (矩形), <code> circle </code> (圆形), <code> diamond </code> (菱形), <code> triangle_up </code> (上三角)。如果设置了下方的 "自定义蒙版图片路径"，则此选项无效 </td>
224 |   </tr>
225 |   <tr>
226 |     <td> <code> custom_mask_path </code> </td>
227 |     <td> <code> string </code> </td>
228 |     <td> 自定义蒙版图片路径 </td>
229 |     <td> <code>&quot;&quot; </code> (空字符串)</td>
230 |     <td> 提供一个图片文件的路径作为词云的形状蒙版：图片中白色区域将被忽略，非白色区域将用于绘制词语。如果设置了此路径，则预设的 '形状' 选项将无效。支持相对路径（相对于插件数据目录下的 <code> resources/images/</code> 子目录）或绝对路径 </td>
231 |   </tr>
232 |   <tr>
233 |     <td> <code> show_user_ranking </code> </td>
234 |     <td> <code> bool </code> </td>
235 |     <td> 是否在每日词云中显示用户活跃度排行 </td>
236 |     <td> <code> true </code> </td>
237 |     <td> <code> true </code> 时，词云生成后会同时显示当天发言最活跃的用户排行榜，包含发言人数统计和贡献度排名 </td>
238 |   </tr>
239 |   <tr>
240 |     <td> <code> ranking_user_count </code> </td>
241 |     <td> <code> int </code> </td>
242 |     <td> 用户排行榜显示的人数 </td>
243 |     <td> <code> 5 </code> </td>
244 |     <td> 设置排行榜显示前多少名活跃用户，建议设置 5-10 之间的值，过多可能导致排行榜信息过长 </td>
245 |   </tr>
246 |   <tr>
247 |     <td> <code> ranking_medals </code> </td>
248 |     <td> <code> string </code> </td>
249 |     <td> 排行榜奖牌表情 </td>
250 |     <td> <code> 🥇, 🥈, 🥉, 🏅, 🏅 </code> </td>
251 |     <td> 用逗号分隔的表情符号，前三名会使用前三个表情，其余位置使用后续表情 </td>
252 |   </tr>
253 |   <tr>
254 |     <td> <code> debug_mode </code> </td>
255 |     <td> <code> bool </code> </td>
256 |     <td> 是否启用详细调试日志 </td>
257 |     <td> <code> false </code> </td>
258 |     <td> <code> true </code> 时，插件会在控制台输出更详细的运行信息，主要用于开发者排查问题 </td>
259 |   </tr>
260 | </table>
261 | 
262 | ## ◆ 使用命令
263 | 
264 | 以下是与词云插件交互的主要命令:
265 | 
266 | <table width="100%">
267 |   <tr>
268 |     <th width="30%"> 命令 </th>
269 |     <th width="40%"> 描述 </th>
270 |     <th width="30%"> 示例 </th>
271 |   </tr>
272 |   <tr>
273 |     <td> <code>/wordcloud [天数] </code> </td>
274 |     <td> 生成当前会话 (群聊或私聊) 的词云，可选择指定统计过去多少天的消息 </td>
275 |     <td> <code>/wordcloud </code> (使用默认天数) <br> <code>/wordcloud 3 </code> (最近 3 天)</td>
276 |   </tr>
277 |   <tr>
278 |     <td> <code>/wc help </code> </td>
279 |     <td> 显示本插件的帮助信息，包括命令列表 </td>
280 |     <td> <code>/wc help </code> </td>
281 |   </tr>
282 |   <tr>
283 |     <td> <code>/wc test </code> </td>
284 |     <td> 生成测试词云，无需历史数据 </td>
285 |     <td> <code>/wc test </code> </td>
286 |   </tr>
287 |   <tr>
288 |     <td> <code>/wc today </code> </td>
289 |     <td> 手动触发生成当前会话今天的词云 </td>
290 |     <td> <code>/wc today </code> </td>
291 |   </tr>
292 |   <tr>
293 |     <td> <code>/wc enable [群号] </code> </td>
294 |     <td> 在指定群聊启用词云功能，如果未提供群号，则在当前群聊启用 (管理员权限)</td>
295 |     <td> <code>/wc enable 123456789 </code> </td>
296 |   </tr>
297 |   <tr>
298 |     <td> <code>/wc disable [群号] </code> </td>
299 |     <td> 在指定群聊禁用词云功能，如果未提供群号，则在当前群聊禁用 (管理员权限)</td>
300 |     <td> <code>/wc disable 123456789 </code> </td>
301 |   </tr>
302 |   <tr>
303 |     <td> <code>/wc force_daily </code> </td>
304 |     <td> 强制为所有配置了每日词云的会话立即生成一次每日词云(管理员权限) </td>
305 |     <td> <code>/wc force_daily </code> </td>
306 |   </tr>
307 | </table>
308 | 
309 | ## ◆ 自然语言关键词
310 | 
311 | 除了上述命令外，您还可以使用以下自然语言关键词触发相应功能：
312 | 
313 | <table width="100%">
314 |   <tr>
315 |     <th width="25%"> 关键词 </th>
316 |     <th width="40%"> 功能描述 </th>
317 |     <th width="35%"> 等效命令 </th>
318 |   </tr>
319 |   <tr>
320 |     <td> 今日词云<br>获取今日词云<br>查看今日词云<br>生成今日词云 </td>
321 |     <td> 生成当前会话今天的词云图 </td>
322 |     <td> <code>/wc today </code> </td>
323 |   </tr>
324 |   <tr>
325 |     <td> 生成词云<br>查看词云<br>最近词云<br>历史词云 </td>
326 |     <td> 生成最近 7 天（或配置的默认天数）的词云图 </td>
327 |     <td> <code>/wordcloud </code> </td>
328 |   </tr>
329 |   <tr>
330 |     <td> 词云帮助<br>词云功能<br>词云说明<br>词云指令 </td>
331 |     <td> 显示词云插件的帮助信息 </td>
332 |     <td> <code>/wc help </code> </td>
333 |   </tr>
334 | </table>
335 | 
336 | > [!TIP]
337 | >
338 | > 使用自然语言关键词可以更方便地触发功能，无需记忆复杂的命令格式
339 | 
340 | ### 自定义关键词
341 | 
342 | 如果您想添加或修改触发关键词，可以编辑 `constant.py` 文件中的 `NATURAL_KEYWORDS` 字典：
343 | 
344 | ```python
345 | # 自然语言关键词 - 用于触发命令的关键词
346 | # 格式: {"command": ["关键词1", "关键词2", ...]}
347 | NATURAL_KEYWORDS = {
348 |     "today": ["今日词云", "获取今日词云", "查看今日词云", "生成今日词云"],
349 |     "wordcloud": ["生成词云", "查看词云", "最近词云", "历史词云"],
350 |     "help": ["词云帮助", "词云功能", "词云说明", "词云指令"],
351 | }
352 | ```
353 | 
354 | 您可以根据需要添加新的命令和关键词，或者为现有命令添加更多关键词。修改后重启机器人即可生效
355 | 
356 | ## ◆ 词云样例
357 | 
358 | ![Image](https://i.imgur.com/GdOOd7y.png)
359 | 
360 | > [!NOTE]
361 | >
362 | > <small> <i> 上图词云样例采用以下主要配置生成：`max_word_count`: 50, `min_word_length`: 2, `min_word_frequency`: 2, `min_font_size`: 8, `max_font_size`: 170, `background_color`: pink, `colormap`: magma, `font_path`: (使用内置霞鹜文楷), `shape`: circle.</i> </small>
363 | 
364 | ## ◆ 项目结构 (简化)
365 | 
366 | ```
367 | cloudrank/
368 | ├── wordcloud_core/           # 核心词云生成与管理逻辑
369 | │   ├── generator.py          # 词云图像生成器
370 | │   ├── history_manager.py    # 聊天历史记录管理
371 | │   ├── scheduler.py          # 定时任务调度器
372 | │   └── __init__.py           # 包初始化文件
373 | ├── fonts/                    # 字体文件目录
374 | ├── _conf_schema.json         # 插件配置文件结构定义
375 | ├── main.py                   # 插件主逻辑 (Star 类定义)
376 | ├── constant.py               # 插件内部常量和自然语言关键词配置
377 | ├── utils.py                  # 工具函数
378 | ├── stop_words.txt            # 默认停用词列表
379 | ├── requirements.txt          # Python 依赖包列表
380 | ├── metadata.yaml             # 插件元数据 (供 AstrBot 识别)
381 | ├── LICENSE                   # 开源许可证
382 | └── README.md                 # 本说明文档
383 | ```
384 | 
385 | 数据目录结构 (通过 StarTools.get_data_dir 动态创建):
386 | 
387 | ```
388 | AstrBot/data/plugin_data/cloudrank/
389 | ├── resources/                # 资源文件目录
390 | │   ├── fonts/                # 字体文件目录（存放LXGWWenKai-Regular.ttf等字体）
391 | │   ├── images/               # 自定义蒙版图片存放目录 (例如 my_mask.png)
392 | │   └── stop_words.txt        # 自定义停用词列表
393 | ├── images/                   # 生成的词云图片缓存目录 (这是插件输出图片的目录)
394 | └── debug/                    # 调试信息目录（仅在排查问题时使用）
395 | ```
396 | 
397 | ## ◆ 高级说明与定制
398 | 
399 | ### 自定义停用词
400 | 
401 | 编辑位于数据目录的 `resources/stop_words.txt` 文件，每行添加一个不想出现在词云中的词。
402 | 
403 | ### 自定义字体
404 | 
405 | 将字体文件 (如 `.ttf`, `.otf`) 放入数据目录 `resources/fonts/` 下，然后在插件配置中将 `font_path` 设置为该字体文件的名称 (例如 `my_font.ttf`)。如果字体在系统其他位置，可以设置绝对路径。
406 | 
407 | ### 自定义词云形状 (使用蒙版图片)
408 | 
409 | 1. **准备蒙版图片**
410 | 
411 |    - 创建一个图像文件 (推荐使用 `.png` 格式，背景透明更佳，但 `.jpg` 等常见格式也可以)
412 |    - 在图片中，**您希望词语出现的区域应该是深色（如黑色）**，而 **希望留空的背景区域应该是浅色（如白色）**
413 |    - 词云生成器会将图片中接近纯黑色的部分作为词语填充的有效区域，纯白色部分则会忽略
414 |    - 图片尺寸会影响最终词云的分辨率和细节，但插件会尝试适应。一个几百像素到一千像素宽高的图片通常效果不错
415 | 
416 | 2. **放置蒙版图片**
417 | 
418 |    - 将您的蒙版图片文件（例如 `my_mask.png`）放置到插件的数据目录下的 `resources/images/` 子目录中
419 |    - 完整路径通常是 `AstrBot/data/plugin_data/cloudrank/resources/images/`
420 |    - 如果该 `images` 子目录不存在，插件在启动时会自动创建它
421 | 
422 | 3. **配置插件**
423 | 
424 |    - 在 AstrBot 的插件管理界面，找到 "CloudRank" 插件的配置
425 |    - 在 **"自定义蒙版图片路径 (`custom_mask_path`)"** 配置项中，填入您放置的图片文件名，例如 `my_mask.png`
426 |    - **注意**: 如果您在这里配置了有效的图片路径，那么预设的 "词云的预设形状 (`shape`)" 配置项将会被忽略
427 | 
428 | 4. **重新加载/测试**
429 |    - 保存配置后，建议重新加载插件或重启 AstrBot (如果插件管理界面支持热重载，则可能无需重启)
430 |    - 然后尝试生成一个词云 (例如使用 `/wc test` 命令) 来查看自定义形状的效果
431 | 
432 | ### 自定义时区
433 | 
434 | 插件允许您配置运行时使用的时区，这对于确保定时任务（如每日词云生成、CRON 表达式定义的任务）按照您期望的本地时间执行至关重要。
435 | 
436 | - **配置方法**: 在 AstrBot 的插件管理界面，找到 "CloudRank" 插件的配置中的 **"自定义插件使用的时区 (IANA 时区名称) (`timezone`)"** 选项
437 | - **有效值**: 您需要输入一个有效的 IANA 时区名称，例如：
438 |   - `Asia/Shanghai` (默认值)
439 |   - `Europe/London`
440 |   - `America/New_York`
441 |   - `UTC`
442 | - **参考资源**: 您可以参考 [维基百科的时区列表](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) 或通过 Python 的 `pytz.all_timezones` (如果您熟悉 Python 环境) 来查找合适的时区名称
443 | - **影响范围**: 此设置会影响所有与时间相关的调度，包括每日词云的生成时间和 CRON 任务的触发时间
444 | 
445 | ## ◆ 注意事项
446 | 
447 | - **首次使用**: 首次生成词云或插件加载时，可能需要一些时间来初始化分词库 (如 `jieba`) 和其他资源
448 | - **中文字体**: 为确保中文在词云中正确显示，建议在配置中明确指定一个包含中文字符的字体路径 (`font_path`)。插件会尝试使用内置的霞鹜文楷字体，如果加载失败或需要特定字体，则此配置项非常重要
449 | - **资源存储**: 插件会在 AstrBot 的数据目录 (通常是 `AstrBot/data/plugin_data/cloudrank/` 或由 `StarTools.get_data_dir(PLUGIN_NAME)` 返回的路径) 下存储字体、停用词和生成的图片缓存。此目录包含三个主要子目录：`resources/`（存放字体和停用词）、`images/`（存放生成的词云图片）和 `debug/`（存放调试信息）。请确保 AstrBot 运行的用户对此目录有读写权限，并有足够的存储空间
450 | - **消息数据存储与 session_id 标准化**:
451 |   - 本插件的消息历史记录存储在 **AstrBot 核心的中央 SQLite 数据库** 中 (通常是 `AstrBot/data/data_v3.db` 或类似路径)，具体表名为 `wordcloud_message_history`。插件本身不在其独立的插件数据目录下创建数据库文件
452 |   - 这一更改意味着，更新插件后，新记录的群聊消息将使用此标准 ID。旧的群聊消息如果之前是按其他 `session_id` 格式存储的，可能不会被包含在更新后的群聊词云查询中，除非进行数据迁移。查看或备份消息数据需要访问 AstrBot 的主数据库
453 | - **消息内容与统计范围**:
454 |   - 本插件设计的初衷是基于 **文本内容** 生成词云。因此，在记录消息时，只有那些实际包含文本的消息才会被存储到 `wordcloud_message_history` 数据库表中。纯图片、文件、系统提示、语音消息或大部分表情符号（如果它们没有附带文本描述）等非文本内容将 **不会** 被记录，也不会计入词云生成的消息总数中
455 |   - 因此，插件报告的 "共统计了 X 条消息" 或 "共产生 X 条发言" 是指在指定时间段内，**被插件记录下来的、包含文本内容的消息数量**，这个数量可能少于您在该聊天中看到的总事件数
456 | - **性能考虑**: 记录和分析大量聊天数据可能会消耗一定的系统资源，对于非常活跃的机器人或服务器资源有限的情况，请适当调整历史记录天数和词云生成频率
457 | - **依赖冲突**: 确保 `requirements.txt` 中列出的依赖版本与您的 Python 环境和其他 AstrBot 插件兼容
458 | 
459 | ## ◆ 问题排查 (FAQ)
460 | 
461 | - **词云不显示中文/中文显示为方框**:
462 |   - **原因**: 未找到合适的中文字体或配置的字体不包含所需字符
463 |   - **解决**: 在插件配置中设置 `font_path` 为一个有效的中文字体文件路径，可以将字体文件放入 `resources/fonts/` 目录并指定文件名，或使用系统字体的绝对路径
464 | - **命令没有反应**:
465 |   - **原因**: 插件未正确加载、被禁用、命令输入错误或权限不足
466 |   - **解决**: 检查 AstrBot 后台插件是否已启用，查看 AstrBot 日志有无报错，确认命令格式正确，以及执行需要权限的命令时是否拥有相应权限
467 | - **自动生成词云未按时执行**:
468 |   - **原因**: CRON 表达式配置错误、AstrBot 或插件在此期间未运行、或任务调度器出现问题
469 |   - **解决**: 检查 `auto_generate_cron` 和 `daily_generate_time` 的配置格式是否正确，确保 AstrBot 持续运行，查看日志中与 `TaskScheduler` 或词云生成相关的错误
470 | - **如何添加更多停用词**:
471 |   - **解决**: 找到插件的数据目录下的 `resources/stop_words.txt` 文件，直接编辑该文件，每行添加一个词
472 | - **词云颜色不喜欢**:
473 |   - **解决**: 修改配置项 `background_color` 设置背景色，修改 `colormap` 选择不同的词语配色方案
474 | - **自然语言关键词没有触发**:
475 |   - **原因**: 关键词未正确配置、关键词大小写或空格不匹配、或消息被识别为命令
476 |   - **解决**: 确保消息格式完全匹配 `constant.py` 中定义的关键词，包括空格和标点符号，确保消息不以 `/` 开头，否则会被视为命令而非普通消息
477 | 
478 | ## ◆ 更新日志
479 | 
480 | #### **v2.0.1** (2025-09-13)
481 | 
482 | **问题修复**:
483 | 
484 | - 修复每日定时词云发送失败的问题
485 |   - 解决平台 ID 映射不正确导致的发送失败问题
486 |   - 修正 session_id 格式转换逻辑，确保与 AstrBot 统一消息来源格式匹配
487 |   - 更新定时任务中的消息发送机制，使用正确的 MessageEventResult API
488 | 
489 | #### **v2.0.0** (2025-09-12)
490 | 
491 | **重大更新**:
492 | 
493 | - 完全适配 AstrBot v4.0.0 数据库系统
494 |   - 迁移到现代异步 SQLAlchemy ORM
495 |   - 移除所有直接 SQL 操作
496 |   - 使用 AstrBot 提供的中央数据库服务
497 |   - 提升数据操作的性能和可靠性
498 | 
499 | **优化改进**:
500 | 
501 | - 增强了指令过滤机制
502 |   - 优化词云生成时的消息过滤
503 |   - 更准确地排除指令相关文字
504 |   - 改进自然语言命令的处理逻辑
505 | 
506 | **系统要求**:
507 | 
508 | - 需要 AstrBot v4.0.0 或更高版本
509 | - Python 3.10+ 运行环境
510 | 
511 | #### **v1.3.9** (2025-07-06)
512 | 
513 | **效果改进**:
514 | 
515 | - 增强了消息清洗逻辑，能更精确地过滤指令、@消息、昵称等无意义内容，提高词云质量
516 | 
517 | #### **v1.3.8-rev1** (2025-05-30)
518 | 
519 | **效果改进**:
520 | 
521 | - 移除了词云生成时对最大字体大小 (`max_font_size`) 的硬编码上限(原为 120)及 `relative_scaling` 参数的固定设置，允许用户通过配置更自由地控制字体大小
522 | 
523 | **修复**:
524 | 
525 | - 修正 `min_word_frequency` 配置项的默认值为 `1`
526 | - 统一了 `_conf_schema.json`, `main.py` 和 `README.md` 中关于 `min_word_frequency` 的默认值描述
527 | - 调整了 `README.md` 中配置项表格的顺序，使其与 `_conf_schema.json` 一致
528 | 
529 | #### **v1.3.8** (2025-05-30)
530 | 
531 | **新增功能**:
532 | 
533 | - 新增 `min_word_frequency` 配置项，允许用户设置词云生成时词语的最小出现频率
534 | - 出现次数低于此配置值的词语将被过滤，有助于生成更清晰、更聚焦高频词汇的词云
535 | - 默认值为 `2`，即词语至少出现 2 次才会被统计，设置为 `1` 则不进行词频过滤
536 | 
537 | **配置更新：**
538 | 
539 | - `min_word_frequency`: 控制词云中词语的最小出现次数（默认值：2）
540 | 
541 | #### **v1.3.7**（2025-05-29）
542 | 
543 | **平台支持扩展：**
544 | 
545 | - 新增 WeChatPadPro 平台词云生成支持
546 | 
547 | **贡献者：**
548 | 
549 | - 感谢 [@xu-wish](https://github.com/xu-wish) 通过 [PR #9](https://github.com/GEMILUXVII/astrbot_plugin_cloudrank/pull/9) 贡献 WeChatPadPro 平台支持
550 | 
551 | #### **v1.3.6**（2025-05-28）
552 | 
553 | **停用词系统更新：**
554 | 
555 | - 大幅增强停用词过滤系统，从原有的 4 个示例停用词扩展到 700+个综合停用词
556 | - 新增中文常用停用词：的、了、在、和、是等基础词汇
557 | - 新增中文语气词和感叹词：阿、啊、哈哈、呵呵等表情化词汇
558 | - 新增中文代词和指示词：俺们、这个、那个、某些等指代词汇
559 | - 新增中文连词和介词：按照、从而、对于、关于等连接词汇
560 | - 新增英文常用停用词：a、the、and、but 等英文基础词汇
561 | - 新增网络用语和表情符号文字：emmm、哈哈哈、呵呵等网络表达
562 | - 新增常见无意义词汇：东西、事情、情况、方面等模糊词汇
563 | - 新增标点符号和特殊字符过滤支持
564 | 
565 | **改进效果：**
566 | 
567 | - 显著提升词云质量，过滤掉无意义的高频词汇
568 | - 让关键词汇更加突出，提高词云的可读性和价值
569 | - 支持中英文混合文本的高质量词云生成
570 | 
571 | #### **v1.3.5**（2025-05-28）
572 | 
573 | **新功能：**
574 | 
575 | - 新增 `min_font_size` 和 `max_font_size` 配置项，允许自定义词云字体大小范围
576 | - 改进字体大小对比度，从默认的 10-120 调整为 8-170，使高频词汇更加突出
577 | - 增强词云视觉效果，提供更好的高低频词汇对比显示
578 | 
579 | **配置更新：**
580 | 
581 | - `min_font_size`: 控制低频词汇的最小字体大小（默认值：8）
582 | - `max_font_size`: 控制高频词汇的最大字体大小（默认值：170）
583 | - 这些配置项允许用户根据需要调整词云的视觉对比度
584 | 
585 | #### **v1.3.4**（2025-05-27）
586 | 
587 | **重要修复：**
588 | 
589 | - 修复词云生成时包含群成员@提及 ID 的问题
590 | - 在 `segment_text` 函数中添加正则表达式过滤，自动移除@用户提及内容
591 | - 确保词云统计结果更加准确和美观，不再出现如 "@6emasvii" 等用户 ID
592 | 
593 | #### **v1.3.3**（2025-05-23）
594 | 
595 | **新功能与改进：**
596 | 
597 | - 新增 `timezone` 配置项，允许用户为插件任务自定义时区
598 | - 新增 `custom_mask_path` 配置项，允许用户指定自定义图片作为词云形状蒙版
599 | - 新增 `include_bot_messages` 配置项，允许用户选择是否将机器人自身发送的消息计入词云统计
600 | 
601 | #### **v1.3.2**（2025-05-12）
602 | 
603 | **优化与修复：**
604 | 
605 | - 确保每日词云和排行榜统计准确反映当天数据
606 | - 修复 `/wc force_daily` 指令 `no attribute 'data_dir'` 的问题
607 | - 修复每日词云可能无法正常生成的问题
608 | - 解决排行榜 SQL 查询和消息构建中的问题
609 | - 统一排行榜输出样式，修复会话 ID 格式错误
610 | - 新增用户统计方法，提升灵活性
611 | 
612 | #### **v1.3.1**（2025-05-11）
613 | 
614 | **日志与线程改进：**
615 | 
616 | - 标准化日志输出，便于问题排查
617 | - 解决线程重载警告，提升稳定性
618 | 
619 | #### **v1.3.0**（2025-05-10）
620 | 
621 | **性能与安全提升：**
622 | 
623 | - 修复定时任务重复问题，优化资源管理
624 | - 增强线程安全性，改进词云生成过程
625 | - 完善日志记录，优化性能
626 | 
627 | #### v1.2.1（2025-05-09）
628 | 
629 | **关键词与文档更新：**
630 | 
631 | - 添加自然语言关键词处理，提高命令稳定性
632 | - 完善文档，添加更多使用说明
633 | 
634 | #### **v1.2.0**（2025-05-08）
635 | 
636 | **配置逻辑调整：**
637 | 
638 | - 修改群聊启用逻辑，更新配置文件提示
639 | 
640 | #### **v1.1.2**（2025-05-08）
641 | 
642 | **线程安全修复：**
643 | 
644 | - 解决非主线程生成词云时的 `RuntimeError`
645 | 
646 | #### **v1.1.1**（2025-05-08）
647 | 
648 | **会话与日志优化：**
649 | 
650 | - 修复会话 ID 处理逻辑，优化日志输出
651 | 
652 | #### **v1.1.0**（2025-05-08）
653 | 
654 | **功能扩展：**
655 | 
656 | - 插件更名为 "CloudRank"，新增用户活跃度排行榜功能
657 | 
658 | #### **v1.0.0**（2025-05-08）
659 | 
660 | **初始发布：**
661 | 
662 | - 发布基础词云生成功能，支持多种视觉定制和配置管理
663 | 
664 | ## ◆ 许可证
665 | 
666 | 本插件采用 [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.gnu.org/licenses/agpl-3.0.html) 许可证
667 | 
668 | ## ◆ 致谢
669 | 
670 | 本项目基于或参考了以下开源项目:
671 | 
672 | - [AstrBot](https://github.com/AstrBotDevs/AstrBot) - 提供强大的聊天机器人平台支持
673 | - [LXGW WenKai](https://github.com/lxgw/LxgwWenKai) - 霞鹜文楷字体项目，提供了美观的开源中文字体
674 | 


--------------------------------------------------------------------------------
/_conf_schema.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "auto_generate_enabled": {
  3 |         "type": "bool",
  4 |         "description": "是否启用自动生成词云",
  5 |         "hint": "定时执行，根据cron表达式配置的时间生成词云",
  6 |         "default": true
  7 |     },
  8 |     "auto_generate_cron": {
  9 |         "type": "string",
 10 |         "description": "自动生成词云的cron表达式",
 11 |         "hint": "使用标准cron格式（分 时 日 月 周），默认每天晚上8点执行",
 12 |         "default": "0 20 * * *"
 13 |     },
 14 |     "timezone": {
 15 |         "type": "string",
 16 |         "description": "自定义插件使用的时区 (IANA时区名称)",
 17 |         "hint": "请输入有效的IANA时区名称，例如：Asia/Shanghai, Europe/London, America/New_York, UTC，您可以参考维基百科的列表 (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) 或通过 Python 的 pytz.all_timezones 获取，这将用于确定每日生成词云和cron表达式的本地时间，",
 18 |         "default": "Asia/Shanghai"
 19 |     },
 20 |     "daily_generate_enabled": {
 21 |         "type": "bool",
 22 |         "description": "是否启用每日词云生成",
 23 |         "hint": "每天固定时间生成当天的聊天词云",
 24 |         "default": true
 25 |     },
 26 |     "daily_generate_time": {
 27 |         "type": "string",
 28 |         "description": "每日词云生成时间",
 29 |         "hint": "格式为 HH:MM，如：23:30 表示每天晚上11点30分",
 30 |         "default": "23:30"
 31 |     },
 32 |     "daily_summary_title": {
 33 |         "type": "string",
 34 |         "description": "每日词云标题模板",
 35 |         "hint": "支持变量：{date}=日期，{group_name}=群名称",
 36 |         "default": "{date} {group_name} 今日词云"
 37 |     },
 38 |     "enabled_group_list": {
 39 |         "type": "string",
 40 |         "description": "启用今日词云功能的群聊列表",
 41 |         "hint": "以逗号分隔的群号列表，如：123456789,987654321，仅在此处填写的群号才会启用词云功能，留空则默认所有群都不启用，",
 42 |         "default": ""
 43 |     },
 44 |     "history_days": {
 45 |         "type": "int",
 46 |         "description": "统计历史消息的天数",
 47 |         "hint": "手动生成词云时默认统计的天数",
 48 |         "default": 7
 49 |     },
 50 |     "max_word_count": {
 51 |         "type": "int",
 52 |         "description": "词云中最大显示词数",
 53 |         "hint": "建议在50-200之间",
 54 |         "default": 100
 55 |     },
 56 |     "min_word_length": {
 57 |         "type": "int",
 58 |         "description": "最小词长度",
 59 |         "hint": "小于此长度的词会被忽略，建议为2",
 60 |         "default": 2
 61 |     },
 62 |     "min_word_frequency": {
 63 |         "type": "int",
 64 |         "description": "最小词频",
 65 |         "hint": "出现次数低于此值的词将被过滤，以优化词云视觉效果，设为1则不过滤",
 66 |         "default": 1
 67 |     },
 68 |     "background_color": {
 69 |         "type": "string",
 70 |         "description": "词云背景颜色",
 71 |         "hint": "可使用颜色名称或十六进制值，如：white、black、#FFFFFF、#000000",
 72 |         "default": "white"
 73 |     },
 74 |     "colormap": {
 75 |         "type": "string",
 76 |         "description": "词云配色方案",
 77 |         "hint": "影响词云中词语的颜色",
 78 |         "default": "viridis",
 79 |         "options": ["viridis", "plasma", "inferno", "magma", "cividis", "rainbow", "jet", "turbo", "cool", "hot"]
 80 |     },
 81 |     "font_path": {
 82 |         "type": "string",
 83 |         "description": "字体路径",
 84 |         "hint": "可使用相对路径或绝对路径，留空使用默认字体，",
 85 |         "default": ""
 86 |     },
 87 |     "stop_words_file": {
 88 |         "type": "string",
 89 |         "description": "停用词文件路径",
 90 |         "hint": "可使用相对路径或绝对路径，留空使用默认停用词，",
 91 |         "default": "stop_words.txt"
 92 |     },
 93 |     "include_bot_messages": {
 94 |         "type": "bool",
 95 |         "description": "是否将机器人自身的消息计入词云统计",
 96 |         "hint": "开启后，机器人自己发送的消息也会被用于生成词云，默认为关闭，",
 97 |         "default": false
 98 |     },
 99 |     "shape": {
100 |         "type": "string",
101 |         "description": "词云形状",
102 |         "hint": "决定词云的整体形状，如果设置了自定义蒙版路径，则此选项无效，",
103 |         "default": "rectangle",
104 |         "options": ["rectangle", "circle", "diamond", "triangle_up"]
105 |     },
106 |     "custom_mask_path": {
107 |         "type": "string",
108 |         "description": "自定义蒙版图片路径",
109 |         "hint": "提供一个图片文件的路径作为词云的形状蒙版，图片中白色区域将被忽略，非白色区域将用于绘制词语，如果设置了此路径，则预设的'形状'选项将无效，支持相对路径（相对于插件数据目录下的resources/images/子目录）或绝对路径，",
110 |         "default": ""
111 |     },
112 |     "min_font_size": {
113 |         "type": "int",
114 |         "description": "词云最小字体大小",
115 |         "hint": "低频词语的最小字体大小，建议设置为8-15之间",
116 |         "default": 8
117 |     },
118 |     "max_font_size": {
119 |         "type": "int",
120 |         "description": "词云最大字体大小",
121 |         "hint": "高频词语的最大字体大小，建议设置为150-300之间",
122 |         "default": 170
123 |     },
124 |     "show_user_ranking": {
125 |         "type": "bool",
126 |         "description": "是否在每日词云中显示用户活跃度排行",
127 |         "hint": "开启后，每日词云生成时会同时显示当天发言最活跃的用户排行榜",
128 |         "default": true
129 |     },
130 |     "ranking_user_count": {
131 |         "type": "int",
132 |         "description": "用户排行榜显示的人数",
133 |         "hint": "设置排行榜显示的用户数量，建议5-10之间",
134 |         "default": 5
135 |     },
136 |     "ranking_medals": {
137 |         "type": "string",
138 |         "description": "排行榜奖牌表情",
139 |         "hint": "用逗号分隔的表情符号，例如：🥇,🥈,🥉,🏅,🏅 前三名会使用前三个表情",
140 |         "default": "🥇,🥈,🥉,🏅,🏅"
141 |     },
142 |     "debug_mode": {
143 |         "type": "bool",
144 |         "description": "启用详细调试日志",
145 |         "hint": "开启后会在控制台输出非常详细的调度器和任务执行日志，用于问题排查，请仅在需要时开启，",
146 |         "default": false,
147 |         "obvious_hint": false
148 |     }
149 | }


--------------------------------------------------------------------------------
/constant.py:
--------------------------------------------------------------------------------
  1 | """
  2 | CloudRank插件常量定义
  3 | """
  4 | 
  5 | import os
  6 | from pathlib import Path
  7 | 
  8 | # 插件信息
  9 | PLUGIN_NAME = "cloudrank"
 10 | PLUGIN_AUTHOR = "GEMILUXVII"
 11 | PLUGIN_DESC = "词云与排名插件 (CloudRank) 是一个文本可视化工具，能将聊天记录关键词以词云形式展现，并显示用户活跃度排行榜，支持定时或手动生成"
 12 | PLUGIN_VERSION = "2.0.1"
 13 | PLUGIN_REPO = "https://github.com/GEMILUXVII/astrbot_plugin_cloudrank"
 14 | 
 15 | # 路径常量
 16 | PLUGIN_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
 17 | 
 18 | # DATA_DIR通过StarTools.get_data_dir动态获取
 19 | # 这里只是定义一个占位变量，真正的目录会在初始化时设置
 20 | # 正确的数据目录应该是：data/plugin_data/cloudrank
 21 | DATA_DIR = None  # 由主模块初始化
 22 | 
 23 | # 词云生成常量
 24 | DEFAULT_WIDTH = 800
 25 | DEFAULT_HEIGHT = 400
 26 | DEFAULT_MAX_WORDS = 200
 27 | DEFAULT_BACKGROUND_COLOR = "white"
 28 | DEFAULT_COLORMAP = "viridis"
 29 | DEFAULT_MIN_WORD_LENGTH = 2
 30 | 
 31 | # 命令常量
 32 | CMD_GENERATE = "wordcloud"
 33 | CMD_GROUP = "wc"
 34 | CMD_CONFIG = "config"
 35 | CMD_HELP = "help"
 36 | 
 37 | # 自然语言关键词 - 用于触发命令的关键词
 38 | # 格式: {"command": ["关键词1", "关键词2", ...]}
 39 | NATURAL_KEYWORDS = {
 40 |     "today": ["今日词云", "获取今日词云", "查看今日词云", "生成今日词云"],
 41 |     "wordcloud": ["生成词云", "查看词云", "最近词云", "历史词云"],
 42 |     "help": ["词云帮助", "词云功能", "词云说明", "词云指令"],
 43 | }
 44 | 
 45 | # 默认停用词列表
 46 | DEFAULT_STOPWORDS = [
 47 |     "的",
 48 |     "了",
 49 |     "在",
 50 |     "是",
 51 |     "我",
 52 |     "有",
 53 |     "和",
 54 |     "就",
 55 |     "不",
 56 |     "人",
 57 |     "都",
 58 |     "一",
 59 |     "一个",
 60 |     "上",
 61 |     "也",
 62 |     "很",
 63 |     "到",
 64 |     "说",
 65 |     "要",
 66 |     "去",
 67 |     "你",
 68 |     "会",
 69 |     "着",
 70 |     "没有",
 71 |     "看",
 72 |     "好",
 73 |     "自己",
 74 |     "这",
 75 |     "the",
 76 |     "and",
 77 |     "to",
 78 |     "of",
 79 |     "a",
 80 |     "is",
 81 |     "in",
 82 |     "it",
 83 |     "that",
 84 |     "for",
 85 |     "on",
 86 |     "with",
 87 |     "as",
 88 |     "be",
 89 |     "at",
 90 |     "this",
 91 |     "have",
 92 |     "from",
 93 |     "by",
 94 |     "was",
 95 |     "are",
 96 |     "or",
 97 |     "an",
 98 |     "I",
 99 |     "but",
100 |     "not",
101 |     "you",
102 |     "he",
103 |     "they",
104 |     "she",
105 |     "we",
106 | ]
107 | 


--------------------------------------------------------------------------------
/fonts/LXGWWenKai-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GEMILUXVII/astrbot_plugin_cloudrank/34388ef1a7b241c26e6eff121af6750a4113c12b/fonts/LXGWWenKai-Regular.ttf


--------------------------------------------------------------------------------
/fonts/OFL.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2021-2025 LXGW (https://github.com/lxgw/LxgwWenKai)
 2 | Copyright 2020 The Klee Project Authors (https://github.com/fontworks-fonts/Klee)
 3 | 
 4 | This Font Software is licensed under the SIL Open Font License, Version 1.1.
 5 | This license is copied below, and is also available with a FAQ at:
 6 | https://openfontlicense.org
 7 | 
 8 | 
 9 | -----------------------------------------------------------
10 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
11 | -----------------------------------------------------------
12 | 
13 | PREAMBLE
14 | The goals of the Open Font License (OFL) are to stimulate worldwide
15 | development of collaborative font projects, to support the font creation
16 | efforts of academic and linguistic communities, and to provide a free and
17 | open framework in which fonts may be shared and improved in partnership
18 | with others.
19 | 
20 | The OFL allows the licensed fonts to be used, studied, modified and
21 | redistributed freely as long as they are not sold by themselves. The
22 | fonts, including any derivative works, can be bundled, embedded, 
23 | redistributed and/or sold with any software provided that any reserved
24 | names are not used by derivative works. The fonts and derivatives,
25 | however, cannot be released under any other type of license. The
26 | requirement for fonts to remain under this license does not apply
27 | to any document created using the fonts or their derivatives.
28 | 
29 | DEFINITIONS
30 | "Font Software" refers to the set of files released by the Copyright
31 | Holder(s) under this license and clearly marked as such. This may
32 | include source files, build scripts and documentation.
33 | 
34 | "Reserved Font Name" refers to any names specified as such after the
35 | copyright statement(s).
36 | 
37 | "Original Version" refers to the collection of Font Software components as
38 | distributed by the Copyright Holder(s).
39 | 
40 | "Modified Version" refers to any derivative made by adding to, deleting,
41 | or substituting -- in part or in whole -- any of the components of the
42 | Original Version, by changing formats or by porting the Font Software to a
43 | new environment.
44 | 
45 | "Author" refers to any designer, engineer, programmer, technical
46 | writer or other person who contributed to the Font Software.
47 | 
48 | PERMISSION & CONDITIONS
49 | Permission is hereby granted, free of charge, to any person obtaining
50 | a copy of the Font Software, to use, study, copy, merge, embed, modify,
51 | redistribute, and sell modified and unmodified copies of the Font
52 | Software, subject to the following conditions:
53 | 
54 | 1) Neither the Font Software nor any of its individual components,
55 | in Original or Modified Versions, may be sold by itself.
56 | 
57 | 2) Original or Modified Versions of the Font Software may be bundled,
58 | redistributed and/or sold with any software, provided that each copy
59 | contains the above copyright notice and this license. These can be
60 | included either as stand-alone text files, human-readable headers or
61 | in the appropriate machine-readable metadata fields within text or
62 | binary files as long as those fields can be easily viewed by the user.
63 | 
64 | 3) No Modified Version of the Font Software may use the Reserved Font
65 | Name(s) unless explicit written permission is granted by the corresponding
66 | Copyright Holder. This restriction only applies to the primary font name as
67 | presented to the users.
68 | 
69 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
70 | Software shall not be used to promote, endorse or advertise any
71 | Modified Version, except to acknowledge the contribution(s) of the
72 | Copyright Holder(s) and the Author(s) or with their explicit written
73 | permission.
74 | 
75 | 5) The Font Software, modified or unmodified, in part or in whole,
76 | must be distributed entirely under this license, and must not be
77 | distributed under any other license. The requirement for fonts to
78 | remain under this license does not apply to any document created
79 | using the Font Software.
80 | 
81 | TERMINATION
82 | This license becomes null and void if any of the above conditions are
83 | not met.
84 | 
85 | DISCLAIMER
86 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
87 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
88 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
89 | OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
90 | COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
91 | INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
92 | DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
93 | FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
94 | OTHER DEALINGS IN THE FONT SOFTWARE.
95 | 


--------------------------------------------------------------------------------
/metadata.yaml:
--------------------------------------------------------------------------------
1 | name: cloudrank
2 | desc: 词云与排名插件(CloudRank)是一个文本可视化工具，能将聊天记录关键词以词云形式展现，并显示用户活跃度排行榜，支持定时或手动生成
3 | version: v2.0.1
4 | author: GEMILUXVII
5 | repo: https://github.com/GEMILUXVII/astrbot_plugin_cloudrank
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | wordcloud>=1.9.4
2 | matplotlib==3.10.0
3 | jieba==0.42.1
4 | croniter==1.3.8
5 | numpy>=1.23.2
6 | pillow>=9.4.0
7 | pytz 


--------------------------------------------------------------------------------
/stop_words.txt:
--------------------------------------------------------------------------------
  1 | # 中文常用停用词
  2 | 的
  3 | 了
  4 | 在
  5 | 和
  6 | 是
  7 | 就
  8 | 也
  9 | 还
 10 | 有
 11 | 我
 12 | 你
 13 | 他
 14 | 她
 15 | 它
 16 | 我们
 17 | 你们
 18 | 他们
 19 | 她们
 20 | 它们
 21 | 这个
 22 | 那个
 23 | 一些
 24 | 什么
 25 | 怎么
 26 | 为什么
 27 | 因为
 28 | 所以
 29 | 但是
 30 | 然而
 31 | 而且
 32 | 并且
 33 | 或者
 34 | 以及
 35 | 于是
 36 | 一种
 37 | 一个
 38 | 一样
 39 | 一下
 40 | 一般
 41 | 依然
 42 | 必须
 43 | 比如
 44 | 得到
 45 | 的确
 46 | 否则
 47 | 另外
 48 | 目前
 49 | 那么
 50 | 那些
 51 | 如果
 52 | 如何
 53 | 日前
 54 | 时候
 55 | 虽然
 56 | 通过
 57 | 同时
 58 | 往往
 59 | 为何
 60 | 为了
 61 | 问题
 62 | 也许
 63 | 一定
 64 | 以后
 65 | 因此
 66 | 由于
 67 | 与
 68 | 则
 69 | 这样
 70 | 这里
 71 | 这种
 72 | 作为
 73 | 
 74 | # 中文语气词和感叹词
 75 | 阿
 76 | 啊
 77 | 哎
 78 | 哎呀
 79 | 哎哟
 80 | 唉
 81 | 吧
 82 | 呃
 83 | 嗯
 84 | 哈
 85 | 哈哈
 86 | 呵
 87 | 嘿
 88 | 哼
 89 | 哼唷
 90 | 呼哧
 91 | 哗
 92 | 哦
 93 | 喔唷
 94 | 啪达
 95 | 呸
 96 | 啥
 97 | 哇
 98 | 喂
 99 | 嗡嗡
100 | 唔
101 | 唔呼
102 | 咦
103 | 哉
104 | 吱
105 | 着呢
106 | 
107 | # 中文代词和指示词
108 | 俺
109 | 俺们
110 | 本
111 | 此
112 | 此间
113 | 此外
114 | 该
115 | 个
116 | 各
117 | 各个
118 | 各位
119 | 己
120 | 某
121 | 某个
122 | 某些
123 | 哪
124 | 那
125 | 那边
126 | 那儿
127 | 哪个
128 | 那会儿
129 | 那里
130 | 那么些
131 | 那么样
132 | 那时
133 | 那些
134 | 那样
135 | 旁人
136 | 人家
137 | 谁
138 | 谁知
139 | 他人
140 | 它们
141 | 她们
142 | 咱
143 | 咱们
144 | 者
145 | 这
146 | 这边
147 | 这儿
148 | 这会儿
149 | 这就是说
150 | 这么
151 | 这么点儿
152 | 这么些
153 | 这么样
154 | 这时
155 | 这些
156 | 诸位
157 | 自
158 | 自从
159 | 自各儿
160 | 自个儿
161 | 自己
162 | 自家
163 | 自身
164 | 
165 | # 中文连词和介词
166 | 按
167 | 按照
168 | 被
169 | 比起
170 | 比如说
171 | 并
172 | 不比
173 | 不成
174 | 不单
175 | 不但
176 | 不独
177 | 不管
178 | 不光
179 | 不过
180 | 不仅
181 | 不拘
182 | 不论
183 | 不怕
184 | 不然
185 | 不如
186 | 不特
187 | 不惟
188 | 不问
189 | 不只
190 | 朝
191 | 朝着
192 | 趁
193 | 趁着
194 | 乘
195 | 冲
196 | 除
197 | 除此之外
198 | 除非
199 | 除了
200 | 从
201 | 从而
202 | 打
203 | 待
204 | 当
205 | 当着
206 | 到
207 | 得
208 | 等
209 | 等等
210 | 地
211 | 第
212 | 对
213 | 对于
214 | 多
215 | 而
216 | 而外
217 | 而言
218 | 而已
219 | 尔后
220 | 反过来
221 | 反过来说
222 | 反之
223 | 非但
224 | 非徒
225 | 根据
226 | 跟
227 | 故
228 | 故此
229 | 固然
230 | 关于
231 | 管
232 | 归
233 | 果然
234 | 果真
235 | 过
236 | 何
237 | 何处
238 | 何况
239 | 何时
240 | 乎
241 | 还是
242 | 换句话说
243 | 换言之
244 | 或
245 | 或是
246 | 既
247 | 既然
248 | 及
249 | 及其
250 | 及至
251 | 即
252 | 即便
253 | 即或
254 | 即令
255 | 即若
256 | 即使
257 | 几
258 | 几时
259 | 加之
260 | 假如
261 | 假若
262 | 假使
263 | 鉴于
264 | 将
265 | 较
266 | 较之
267 | 叫
268 | 接着
269 | 结果
270 | 借
271 | 紧接着
272 | 进而
273 | 尽
274 | 尽管
275 | 尽管如此
276 | 据
277 | 据此
278 | 据实而言
279 | 据悉
280 | 据我所知
281 | 据说
282 | 举凡
283 | 可
284 | 可见
285 | 可是
286 | 可以
287 | 况且
288 | 来
289 | 来着
290 | 离
291 | 例如
292 | 连
293 | 两样
294 | 临
295 | 另
296 | 另一方面
297 | 论
298 | 每
299 | 每当
300 | 们
301 | 莫若
302 | 乃
303 | 乃至
304 | 能
305 | 您
306 | 宁
307 | 宁可
308 | 宁肯
309 | 宁愿
310 | 凭
311 | 凭借
312 | 其
313 | 其次
314 | 其二
315 | 其他
316 | 其它
317 | 其一
318 | 其余
319 | 其中
320 | 起
321 | 起见
322 | 岂但
323 | 岂止
324 | 恰恰相反
325 | 前后
326 | 前者
327 | 且
328 | 然后
329 | 然则
330 | 让
331 | 任
332 | 任何
333 | 任凭
334 | 如
335 | 如此
336 | 如其
337 | 如若
338 | 如上所述
339 | 若
340 | 若非
341 | 若是
342 | 尚且
343 | 设若
344 | 设使
345 | 甚而
346 | 甚么
347 | 甚至
348 | 省得
349 | 什么样
350 | 是的
351 | 首先
352 | 顺
353 | 顺着
354 | 俟
355 | 虽说
356 | 虽则
357 | 随
358 | 随着
359 | 所
360 | 腾
361 | 替
362 | 同
363 | 同样
364 | 万一
365 | 往
366 | 望
367 | 为
368 | 为着
369 | 以便
370 | 以免
371 | 以前
372 | 以至
373 | 以至于
374 | 以致
375 | 抑或
376 | 矣
377 | 用
378 | 由
379 | 由此可见
380 | 有的
381 | 有些
382 | 有关
383 | 与此同时
384 | 与否
385 | 与其
386 | 越是
387 | 云云
388 | 再
389 | 再其次
390 | 再则
391 | 再说
392 | 在下
393 | 在于
394 | 怎
395 | 怎么办
396 | 怎么样
397 | 怎样
398 | 咋
399 | 照
400 | 照着
401 | 之
402 | 之类
403 | 之所以
404 | 之一
405 | 之前
406 | 之后
407 | 之中
408 | 止
409 | 只
410 | 只不过
411 | 只限
412 | 只要
413 | 只有
414 | 至
415 | 至于
416 | 着
417 | 自各儿
418 | 综上所述
419 | 总而言之
420 | 总之
421 | 总的说来
422 | 纵
423 | 纵令
424 | 纵然
425 | 纵使
426 | 遵照
427 | 遵循
428 | 依照
429 | 按照
430 | 
431 | # 中文常用短语
432 | 也就是说
433 | 换句话说
434 | 总的来说
435 | 一般而言
436 | 实际上
437 | 事实上
438 | 例如说
439 | 等等等等
440 | 另外的话
441 | 与此同时
442 | 
443 | # 网络常用词汇
444 | 哈哈哈
445 | 呵呵
446 | 嗯嗯
447 | 额
448 | 呃呃
449 | emmm
450 | 诶
451 | 咦咦
452 | 哇哈哈
453 | 啦啦啦
454 | 噢噢
455 | 嗷嗷
456 | 咯咯
457 | 嘻嘻
458 | 吼吼
459 | 哼哼
460 | 
461 | # 数字和符号相关
462 | 一
463 | 二
464 | 三
465 | 四
466 | 五
467 | 六
468 | 七
469 | 八
470 | 九
471 | 十
472 | 零
473 | 百
474 | 千
475 | 万
476 | 亿
477 | 
478 | # English Stop Words
479 | a
480 | able
481 | about
482 | above
483 | across
484 | after
485 | all
486 | almost
487 | also
488 | am
489 | among
490 | an
491 | and
492 | any
493 | are
494 | as
495 | at
496 | be
497 | because
498 | been
499 | but
500 | by
501 | can
502 | cannot
503 | could
504 | dear
505 | did
506 | do
507 | does
508 | either
509 | else
510 | ever
511 | every
512 | for
513 | from
514 | get
515 | got
516 | had
517 | has
518 | have
519 | he
520 | her
521 | hers
522 | him
523 | his
524 | how
525 | however
526 | i
527 | if
528 | in
529 | into
530 | is
531 | it
532 | its
533 | just
534 | least
535 | let
536 | like
537 | likely
538 | may
539 | me
540 | might
541 | most
542 | must
543 | my
544 | neither
545 | no
546 | nor
547 | not
548 | of
549 | off
550 | often
551 | on
552 | only
553 | or
554 | other
555 | our
556 | own
557 | rather
558 | said
559 | say
560 | says
561 | she
562 | should
563 | since
564 | so
565 | some
566 | than
567 | that
568 | the
569 | their
570 | them
571 | then
572 | there
573 | these
574 | they
575 | this
576 | tis
577 | to
578 | too
579 | twas
580 | us
581 | wants
582 | was
583 | we
584 | were
585 | what
586 | when
587 | where
588 | which
589 | while
590 | who
591 | whom
592 | why
593 | will
594 | with
595 | would
596 | yet
597 | you
598 | your
599 | 
600 | # 常用标点和符号 (如果分词器会产生)
601 | ，
602 | 。
603 | ！
604 | ？
605 | ；
606 | ：
607 | "
608 | "
609 | '
610 | '
611 | （
612 | ）
613 | 【
614 | 】
615 | 《
616 | 》
617 | 、
618 | …
619 | —
620 | ～
621 | ·
622 | @
623 | #
624 | %
625 | &
626 | *
627 | +
628 | -
629 | =
630 | |
631 | \
632 | /
633 | <
634 | >
635 | ^
636 | _
637 | `
638 | {
639 | }
640 | [
641 | ]
642 | 
643 | # 网络用语和表情符号文字
644 | 哈
645 | 呵
646 | 哦
647 | 啊
648 | 嗯
649 | 额
650 | 诶
651 | 咦
652 | 哇
653 | 噢
654 | 嗷
655 | 咯
656 | 嘻
657 | 吼
658 | 哼
659 | 嘿
660 | 喔
661 | 唔
662 | 嘘
663 | 嗬
664 | 咳
665 | 啧
666 | 唷
667 | 咿
668 | 呀
669 | 吖
670 | 唉
671 | 嗨
672 | 嗯哼
673 | 啊哈
674 | 呃呃
675 | 呵呵
676 | 嗯嗯
677 | 咦咦
678 | 哇哈
679 | 噢噢
680 | 嗷嗷
681 | 咯咯
682 | 嘻嘻
683 | 吼吼
684 | 哼哼
685 | 
686 | # 常见无意义词汇
687 | 东西
688 | 事情
689 | 情况
690 | 方面
691 | 地方
692 | 时间
693 | 时候
694 | 样子
695 | 这样
696 | 那样
697 | 怎样
698 | 这种
699 | 那种
700 | 哪种
701 | 如此
702 | 这么
703 | 那么
704 | 多么
705 | 怎么
706 | 为什么
707 | 什么时候
708 | 什么地方
709 | 什么事情
710 | 什么东西
711 | 什么样子
712 | 这个样子
713 | 那个样子
714 | 什么样的
715 | 这样的
716 | 那样的
717 | 怎样的
718 | 这种的
719 | 那种的
720 | 如此的
721 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 词云插件工具函数
  3 | """
  4 | 
  5 | import os
  6 | import time
  7 | import datetime
  8 | from pathlib import Path
  9 | from typing import List, Optional, Tuple, Set
 10 | 
 11 | import jieba
 12 | from astrbot.api import logger
 13 | from astrbot.api.star import StarTools
 14 | 
 15 | from .constant import DATA_DIR, DEFAULT_STOPWORDS
 16 | 
 17 | 
 18 | def ensure_directory(path: Path) -> None:
 19 |     """确保目录存在"""
 20 |     if not path.exists():
 21 |         path.mkdir(parents=True, exist_ok=True)
 22 |         logger.info(f"创建目录: {path}")
 23 | 
 24 | 
 25 | def get_current_timestamp() -> int:
 26 |     """获取当前时间戳"""
 27 |     return int(time.time())
 28 | 
 29 | 
 30 | def format_timestamp(timestamp: int) -> str:
 31 |     """格式化时间戳为可读字符串"""
 32 |     return datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S")
 33 | 
 34 | 
 35 | def format_date(timestamp: Optional[int] = None) -> str:
 36 |     """格式化时间戳为日期字符串"""
 37 |     if timestamp is None:
 38 |         timestamp = get_current_timestamp()
 39 |     return datetime.datetime.fromtimestamp(timestamp).strftime("%Y年%m月%d日")
 40 | 
 41 | 
 42 | def parse_time_str(time_str: str) -> Tuple[int, int]:
 43 |     """
 44 |     解析时间字符串为小时和分钟
 45 |     格式为 HH:MM，例如 23:30
 46 | 
 47 |     Args:
 48 |         time_str: 时间字符串，如 "23:30"
 49 | 
 50 |     Returns:
 51 |         小时和分钟的元组，如 (23, 30)
 52 | 
 53 |     Raises:
 54 |         ValueError: 如果时间格式无效
 55 |     """
 56 |     try:
 57 |         hour, minute = time_str.split(":")
 58 |         hour = int(hour.strip())
 59 |         minute = int(minute.strip())
 60 | 
 61 |         if not (0 <= hour < 24 and 0 <= minute < 60):
 62 |             raise ValueError(f"无效的时间值: {hour}:{minute}")
 63 | 
 64 |         return hour, minute
 65 |     except Exception as e:
 66 |         logger.error(f"解析时间字符串失败 '{time_str}': {e}")
 67 |         # 默认返回晚上11:30
 68 |         return 23, 30
 69 | 
 70 | 
 71 | def time_str_to_cron(time_str: str) -> str:
 72 |     """
 73 |     将时间字符串转换为cron表达式
 74 | 
 75 |     Args:
 76 |         time_str: 格式为HH:MM的时间字符串，如 "23:30"
 77 | 
 78 |     Returns:
 79 |         cron表达式，如 "30 23 * * *"
 80 |     """
 81 |     try:
 82 |         # 去除可能的空白字符
 83 |         time_str = time_str.strip()
 84 | 
 85 |         # 验证时间格式
 86 |         if ":" not in time_str:
 87 |             logger.error(f"时间格式错误 '{time_str}': 缺少冒号分隔符")
 88 |             return "0 0 * * *"  # 默认午夜执行
 89 | 
 90 |         # 分割小时和分钟
 91 |         try:
 92 |             hour_str, minute_str = time_str.split(":")
 93 |             hour = int(hour_str.strip())
 94 |             minute = int(minute_str.strip())
 95 | 
 96 |             # 验证小时和分钟范围
 97 |             if not (0 <= hour < 24):
 98 |                 logger.error(f"小时值超出范围: {hour}")
 99 |                 hour = 0  # 修正为有效值
100 | 
101 |             if not (0 <= minute < 60):
102 |                 logger.error(f"分钟值超出范围: {minute}")
103 |                 minute = 0  # 修正为有效值
104 | 
105 |         except ValueError:
106 |             logger.error(f"无法解析时间字符串: '{time_str}'")
107 |             return "0 0 * * *"  # 默认午夜执行
108 | 
109 |         # 检查时区问题 - 中国是UTC+8，如果系统可能在内部使用UTC时间
110 |         import time
111 | 
112 |         timezone_offset = -time.timezone // 3600  # 获取本地时区偏移（小时）
113 |         logger.info(
114 |             f"系统时区信息: UTC{'+' if timezone_offset >= 0 else ''}{timezone_offset}"
115 |         )
116 | 
117 |         # 如果是UTC时区而不是本地时区，需调整
118 |         if timezone_offset != 0:
119 |             logger.info(
120 |                 f"检测到时区差异，将调整时间从本地时间 {hour:02d}:{minute:02d} 到cron时间"
121 |             )
122 | 
123 |         # 构建cron表达式 - 标准cron格式为：分 时 日 月 周
124 |         # 我们直接使用本地时间，不进行时区转换，让croniter基于本地时间处理
125 |         cron_expression = f"{minute} {hour} * * *"
126 |         logger.info(f"时间字符串 '{time_str}' 已转换为cron表达式: '{cron_expression}'")
127 | 
128 |         # 验证cron表达式格式
129 |         try:
130 |             from croniter import croniter
131 | 
132 |             if not croniter.is_valid(cron_expression):
133 |                 logger.error(f"生成的cron表达式无效: '{cron_expression}'")
134 |                 return "0 0 * * *"  # 默认午夜执行
135 | 
136 |             # 附加检查：计算下一个执行时间，确保表达式可以正确工作
137 |             import datetime
138 | 
139 |             base = datetime.datetime.now()
140 |             cron = croniter(cron_expression, base)
141 |             next_run = cron.get_next(datetime.datetime)
142 | 
143 |             # 输出下次执行的本地时间，方便验证
144 |             local_next_run = next_run
145 |             logger.info(
146 |                 f"使用cron表达式 '{cron_expression}' 计算的下次执行时间: {local_next_run.strftime('%Y-%m-%d %H:%M:%S')} (本地时间)"
147 |             )
148 | 
149 |         except Exception as croniter_error:
150 |             logger.error(f"cron表达式验证失败: {croniter_error}")
151 |             return "0 0 * * *"  # 默认午夜执行
152 | 
153 |         return cron_expression
154 |     except Exception as e:
155 |         logger.error(f"转换时间字符串到cron表达式失败 '{time_str}': {e}")
156 |         import traceback
157 | 
158 |         logger.error(f"转换错误详情: {traceback.format_exc()}")
159 |         return "0 0 * * *"  # 默认午夜执行
160 | 
161 | 
162 | def parse_group_list(group_list_str: str) -> Set[str]:
163 |     """
164 |     解析群列表字符串为群号集合
165 | 
166 |     Args:
167 |         group_list_str: 以逗号分隔的群号字符串，如 "123456789,987654321"
168 | 
169 |     Returns:
170 |         群号的集合
171 |     """
172 |     if not group_list_str or not group_list_str.strip():
173 |         return set()
174 | 
175 |     # 分割并去除空白
176 |     groups = set()
177 |     for group_id in group_list_str.split(","):
178 |         group_id = group_id.strip()
179 |         if group_id:
180 |             groups.add(group_id)
181 | 
182 |     return groups
183 | 
184 | 
185 | def is_group_enabled(group_id: str, enabled_groups: Set[str]) -> bool:
186 |     """
187 |     检查群是否启用词云功能
188 | 
189 |     Args:
190 |         group_id: 群ID
191 |         enabled_groups: 启用词云的群集合，空集合表示全部启用
192 | 
193 |     Returns:
194 |         群是否启用词云功能
195 |     """
196 |     # 输入类型验证，确保group_id是字符串
197 |     if not isinstance(group_id, str):
198 |         try:
199 |             group_id = str(group_id)
200 |         except:
201 |             # 如果转换失败，默认不启用
202 |             logger.warning(f"群ID类型错误: {type(group_id)}，无法判断群聊是否启用")
203 |             return False
204 | 
205 |     # 如果启用列表为空，表示没有群被特别指定启用，因此默认不启用此群
206 |     if not enabled_groups:
207 |         logger.debug(f"启用群列表为空，群 {group_id} 未在指定启用列表中，默认不启用。")
208 |         return False
209 | 
210 |     # 否则，检查是否在启用列表中
211 |     result = group_id in enabled_groups
212 |     logger.debug(f"群 {group_id} {'在' if result else '不在'}启用列表中")
213 |     return result
214 | 
215 | 
216 | def get_day_start_end_timestamps() -> Tuple[int, int]:
217 |     """
218 |     获取今天的开始和结束时间戳
219 | 
220 |     Returns:
221 |         (开始时间戳, 结束时间戳)的元组
222 |     """
223 |     now = datetime.datetime.now()
224 |     start_of_day = datetime.datetime(now.year, now.month, now.day, 0, 0, 0)
225 |     end_of_day = datetime.datetime(now.year, now.month, now.day, 23, 59, 59)
226 | 
227 |     return int(start_of_day.timestamp()), int(end_of_day.timestamp())
228 | 
229 | 
230 | def get_image_path(session_id: str, timestamp: Optional[int] = None) -> Path:
231 |     """获取词云图片存储路径"""
232 |     if timestamp is None:
233 |         timestamp = get_current_timestamp()
234 | 
235 |     # 使用会话ID作为目录名，避免不同会话的图片混淆
236 |     safe_session_id = session_id.replace("/", "_").replace(":", "_")
237 | 
238 |     # 确保DATA_DIR已经初始化
239 |     if DATA_DIR is None:
240 |         # 尝试通过StarTools获取数据目录
241 |         try:
242 |             from .constant import PLUGIN_NAME
243 | 
244 |             data_dir = StarTools.get_data_dir(PLUGIN_NAME)
245 |             logger.info(f"通过StarTools获取数据目录: {data_dir}")
246 |         except Exception:
247 |             # 使用临时目录作为备用
248 |             from pathlib import Path
249 | 
250 |             data_dir = Path(__file__).parent / "temp_data"
251 |             data_dir.mkdir(exist_ok=True)
252 |             logger.warning(
253 |                 f"DATA_DIR未初始化且无法通过StarTools获取，使用临时目录存储图片: {data_dir}"
254 |             )
255 |     else:
256 |         data_dir = DATA_DIR
257 | 
258 |     # 在数据目录下创建images子目录
259 |     images_dir = data_dir / "images"
260 |     ensure_directory(images_dir)
261 | 
262 |     # 在images目录下为每个会话创建子目录
263 |     session_dir = images_dir / safe_session_id
264 |     ensure_directory(session_dir)
265 | 
266 |     # 生成图片路径
267 |     image_path = session_dir / f"wordcloud_{format_timestamp(timestamp)}.png"
268 |     return image_path
269 | 
270 | 
271 | def get_daily_image_path(session_id: str, date: Optional[datetime.date] = None) -> Path:
272 |     """
273 |     获取每日词云图片存储路径
274 | 
275 |     Args:
276 |         session_id: 会话ID
277 |         date: 日期，默认为今天
278 | 
279 |     Returns:
280 |         图片路径
281 |     """
282 |     if date is None:
283 |         date = datetime.date.today()
284 | 
285 |     # 使用会话ID作为目录名，避免不同会话的图片混淆
286 |     safe_session_id = session_id.replace("/", "_").replace(":", "_")
287 | 
288 |     # 确保DATA_DIR已经初始化
289 |     if DATA_DIR is None:
290 |         # 尝试通过StarTools获取数据目录
291 |         try:
292 |             from .constant import PLUGIN_NAME
293 | 
294 |             data_dir = StarTools.get_data_dir(PLUGIN_NAME)
295 |             logger.info(f"通过StarTools获取数据目录: {data_dir}")
296 |         except Exception:
297 |             # 使用临时目录作为备用
298 |             from pathlib import Path
299 | 
300 |             data_dir = Path(__file__).parent / "temp_data"
301 |             data_dir.mkdir(exist_ok=True)
302 |             logger.warning(
303 |                 f"DATA_DIR未初始化且无法通过StarTools获取，使用临时目录存储图片: {data_dir}"
304 |             )
305 |     else:
306 |         data_dir = DATA_DIR
307 | 
308 |     # 在数据目录下创建daily_images子目录
309 |     images_dir = data_dir / "daily_images"
310 |     ensure_directory(images_dir)
311 | 
312 |     # 在daily_images目录下为每个会话创建子目录
313 |     session_dir = images_dir / safe_session_id
314 |     ensure_directory(session_dir)
315 | 
316 |     # 生成图片路径，使用日期作为文件名
317 |     date_str = date.strftime("%Y-%m-%d")
318 |     image_path = session_dir / f"daily_wordcloud_{date_str}.png"
319 |     return image_path
320 | 
321 | 
322 | def segment_text(
323 |     text: str, min_length: int = 2, stop_words: Optional[List[str]] = None
324 | ) -> List[str]:
325 |     """
326 |     使用jieba进行中文分词
327 | 
328 |     Args:
329 |         text: 需要分词的文本
330 |         min_length: 最小词长度
331 |         stop_words: 停用词列表
332 | 
333 |     Returns:
334 |         分词后的词语列表
335 |     """
336 |     if stop_words is None:
337 |         stop_words = DEFAULT_STOPWORDS
338 | 
339 |     # 预处理文本：移除@用户提及和指令相关文字
340 |     import re
341 | 
342 |     # 跳过指令和相关关键词
343 |     text_lower = text.lower()
344 |     if (text_lower.startswith(('#', '/')) or 
345 |         text_lower.startswith('wc') or 
346 |         text_lower.startswith('词云') or
347 |         '生成词云' in text_lower or
348 |         '/wordcloud' in text_lower):
349 |         return []
350 | 
351 |     # 移除@用户提及，支持多种格式：@username、@用户名、@123456等
352 |     text = re.sub(r"@[^\s]+", "", text)
353 |     # 移除多余的空白字符
354 |     text = re.sub(r"\s+", " ", text).strip()
355 | 
356 |     # 使用jieba进行分词
357 |     words = jieba.lcut(text)
358 | 
359 |     # 过滤停用词和短词
360 |     filtered_words = []
361 |     for word in words:
362 |         word_stripped = word.strip()
363 |         if (
364 |             len(word_stripped) >= min_length
365 |             and word not in stop_words
366 |             and not word.isdigit()  # 过滤纯数字
367 |             and not all(c.isascii() and not c.isalpha() for c in word)  # 过滤纯符号
368 |             and not word_stripped.startswith("@")  # 额外保护：过滤任何以@开头的词
369 |         ):
370 |             filtered_words.append(word)
371 | 
372 |     return filtered_words
373 | 
374 | 
375 | def load_stop_words(file_path: Optional[str] = None) -> List[str]:
376 |     """
377 |     从文件加载停用词
378 | 
379 |     Args:
380 |         file_path: 停用词文件路径
381 | 
382 |     Returns:
383 |         停用词列表，如果文件不存在则返回默认停用词
384 |     """
385 |     stop_words = DEFAULT_STOPWORDS.copy()
386 |     if file_path and os.path.exists(file_path):
387 |         try:
388 |             with open(file_path, "r", encoding="utf-8") as f:
389 |                 for line in f:
390 |                     word = line.strip()
391 |                     if word and word not in stop_words:
392 |                         stop_words.append(word)
393 |         except Exception as e:
394 |             logger.error(f"加载停用词文件失败: {e}")
395 | 
396 |     return stop_words
397 | 
398 | 
399 | def extract_group_id_from_session(session_id: str) -> Optional[str]:
400 |     """
401 |     从会话ID中提取群号，支持多种格式
402 | 
403 |     Args:
404 |         session_id: 会话ID，支持多种格式:
405 |                    - "aiocqhttp:GroupMessage:123456789"
406 |                    - "aiocqhttp:GroupMessage:0_123456789"
407 |                    - "qqofficial:group:123456789"
408 |                    - "aiocqhttp_group_123456789"
409 |                    - "123456789"（纯群号）
410 |                    - "wechatpadpro_group_123456789@chatroom"
411 |                    - 其他可能的格式
412 | 
413 |     Returns:
414 |         群号，如果不是群消息则返回None
415 |     """
416 |     try:
417 |         if not session_id:
418 |             logger.warning("会话ID为空，无法提取群号")
419 |             return None
420 | 
421 |         # 特别处理带 "@chatroom" 的格式（如 wechatpadpro_group_123456789@chatroom）
422 |         import re
423 | 
424 |         match = re.match(r".+?_group_(\d+@chatroom)", session_id)
425 |         if match:
426 |             return match.group(1)
427 | 
428 |         # 处理会话ID为纯数字的情况
429 |         if isinstance(session_id, str) and session_id.isdigit():
430 |             logger.debug(f"会话ID是纯数字，直接作为群号: {session_id}")
431 |             return session_id
432 | 
433 |         # 处理 "platform_group_groupid" 格式 (例如 "aiocqhttp_group_142443871")
434 |         if isinstance(session_id, str) and "_group_" in session_id:
435 |             parts = session_id.split("_group_")
436 |             if len(parts) == 2 and parts[1].isdigit():
437 |                 logger.debug(
438 |                     f"从下划线分隔的会话ID '{session_id}' 提取到群号: {parts[1]}"
439 |                 )
440 |                 return parts[1]
441 | 
442 |         # 处理复杂格式会话ID
443 |         if isinstance(session_id, str) and ":" in session_id:
444 |             parts = session_id.split(":")
445 | 
446 |             # 1. 标准三段式QQ格式: [平台]:[类型]:[群号]
447 |             if len(parts) >= 3:
448 |                 # 检查中间部分是否包含群聊关键词
449 |                 middle_part = parts[1].lower()
450 |                 if (
451 |                     "group" in middle_part
452 |                     or "群" in middle_part
453 |                     or "multi" in middle_part
454 |                     or "channel" in middle_part
455 |                 ):
456 |                     # 提取第三部分作为群号
457 |                     third_part = parts[2]
458 | 
459 |                     # 处理可能包含前缀的情况，如 "0_123456789"
460 |                     if "_" in third_part:
461 |                         group_id = third_part.split("_")[-1]
462 |                     else:
463 |                         group_id = third_part
464 | 
465 |                     if group_id.isdigit():
466 |                         logger.debug(
467 |                             f"从三段式会话ID '{session_id}' 提取到群号: {group_id}"
468 |                         )
469 |                         return group_id
470 | 
471 |             # 2. 从会话ID的各部分中寻找可能的群号，优先选择最后一部分
472 |             for i in range(len(parts) - 1, -1, -1):  # 从后向前查找
473 |                 part = parts[i]
474 | 
475 |                 # 处理可能包含前缀的情况，如 "0_123456789"
476 |                 if "_" in part:
477 |                     potential_id = part.split("_")[-1]
478 |                 else:
479 |                     potential_id = part
480 | 
481 |                 if potential_id.isdigit() and len(potential_id) >= 5:  # 群号通常至少5位
482 |                     logger.debug(
483 |                         f"从会话ID '{session_id}' 的第{i + 1}部分提取到可能的群号: {potential_id}"
484 |                     )
485 |                     return potential_id
486 | 
487 |         # 使用正则表达式提取会话ID中的任何数字序列
488 |         import re
489 | 
490 |         # 匹配连续5位及以上的数字（可能的群号）
491 |         matches = re.findall(r"\d{5,}", str(session_id))
492 |         if matches:
493 |             # 找出最长的数字串
494 |             longest_match = max(matches, key=len)
495 |             logger.debug(
496 |                 f"使用正则表达式从会话ID '{session_id}' 提取到可能的群号: {longest_match}"
497 |             )
498 |             return longest_match
499 | 
500 |         logger.warning(f"无法从会话ID '{session_id}' 提取群号")
501 |         return None
502 |     except Exception as e:
503 |         logger.error(f"提取群号时出错: {e}")
504 |         import traceback
505 | 
506 |         logger.error(f"提取群号错误详情: {traceback.format_exc()}")
507 |         return None
508 | 


--------------------------------------------------------------------------------
/wordcloud_core/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | 词云生成器核心模块
3 | """
4 | 


--------------------------------------------------------------------------------
/wordcloud_core/generator.py:
--------------------------------------------------------------------------------
   1 | """
   2 | 词云生成核心模块
   3 | """
   4 | 
   5 | import os
   6 | import time
   7 | import datetime
   8 | import shutil
   9 | import traceback
  10 | import threading
  11 | from typing import Dict, List, Optional, Tuple
  12 | from collections import Counter
  13 | from pathlib import Path
  14 | 
  15 | import numpy as np
  16 | from wordcloud import WordCloud
  17 | import matplotlib.pyplot as plt
  18 | import matplotlib
  19 | 
  20 | matplotlib.use("Agg")  # 使用非交互式后端
  21 | from matplotlib.font_manager import FontProperties
  22 | from PIL import Image, ImageDraw, ImageFont, ImageStat
  23 | from astrbot.api import logger
  24 | from astrbot.api.star import StarTools
  25 | 
  26 | from ..utils import segment_text, load_stop_words, get_image_path
  27 | from ..constant import (
  28 |     DEFAULT_WIDTH,
  29 |     DEFAULT_HEIGHT,
  30 |     DEFAULT_MAX_WORDS,
  31 |     DEFAULT_BACKGROUND_COLOR,
  32 |     DEFAULT_COLORMAP,
  33 |     DEFAULT_MIN_WORD_LENGTH,
  34 |     PLUGIN_DIR,
  35 |     DATA_DIR,
  36 |     PLUGIN_NAME,
  37 | )
  38 | 
  39 | # 全局锁，用于防止多个线程同时生成相同的词云
  40 | _WORDCLOUD_LOCKS = {}
  41 | _GLOBAL_LOCK = threading.Lock()
  42 | 
  43 | 
  44 | # 确保当前词云生成请求唯一性的方法
  45 | def _get_lock_for_key(key: str) -> threading.Lock:
  46 |     """
  47 |     获取指定键的锁对象，如果不存在则创建
  48 |     """
  49 |     with _GLOBAL_LOCK:
  50 |         if key not in _WORDCLOUD_LOCKS:
  51 |             _WORDCLOUD_LOCKS[key] = threading.Lock()
  52 |         return _WORDCLOUD_LOCKS[key]
  53 | 
  54 | 
  55 | class WordCloudGenerator:
  56 |     """词云生成器类"""
  57 | 
  58 |     def __init__(
  59 |         self,
  60 |         width: int = DEFAULT_WIDTH,
  61 |         height: int = DEFAULT_HEIGHT,
  62 |         max_words: int = DEFAULT_MAX_WORDS,
  63 |         background_color: str = DEFAULT_BACKGROUND_COLOR,
  64 |         colormap: str = DEFAULT_COLORMAP,
  65 |         font_path: Optional[str] = None,
  66 |         min_word_length: int = DEFAULT_MIN_WORD_LENGTH,
  67 |         stop_words_file: Optional[str] = None,
  68 |         shape: str = "rectangle",  # 修改默认形状为矩形
  69 |         custom_mask_path: Optional[str] = None,  # 添加自定义蒙版路径参数
  70 |         min_font_size: int = 8,  # 添加最小字体大小参数
  71 |         max_font_size: int = 200,  # 添加最大字体大小参数
  72 |         min_word_frequency: int = 1,  # 新增：最小词频参数
  73 |     ):
  74 |         """
  75 |         初始化词云生成器
  76 | 
  77 |         Args:
  78 |             width: 词云图片宽度
  79 |             height: 词云图片高度
  80 |             max_words: 最大词数量
  81 |             background_color: 背景颜色
  82 |             colormap: 颜色映射
  83 |             font_path: 字体路径
  84 |             min_word_length: 最小词长度
  85 |             stop_words_file: 停用词文件路径
  86 |             shape: 词云形状，支持"circle"和"rectangle"
  87 |             custom_mask_path: 自定义蒙版图片路径
  88 |             min_font_size: 最小字体大小，用于低频词
  89 |             max_font_size: 最大字体大小，用于高频词
  90 |         """
  91 |         self.width = width
  92 |         self.height = height
  93 |         self.max_words = max_words
  94 |         self.background_color = background_color
  95 |         self.colormap = colormap
  96 |         self.shape = shape
  97 |         self.custom_mask_path = custom_mask_path  # 保存自定义蒙版路径
  98 |         self.min_font_size = min_font_size  # 保存最小字体大小
  99 |         self.max_font_size = max_font_size  # 保存最大字体大小
 100 |         self.min_word_frequency = min_word_frequency  # 新增：保存最小词频
 101 | 
 102 |         # 获取数据目录，优先使用StarTools确保可用
 103 |         data_dir = None
 104 |         try:
 105 |             # 先尝试通过StarTools获取数据目录，这是最可靠的方式
 106 |             data_dir = StarTools.get_data_dir(PLUGIN_NAME)
 107 |             logger.info(f"通过StarTools获取数据目录: {data_dir}")
 108 |         except Exception as e:
 109 |             logger.warning(f"通过StarTools获取数据目录失败: {e}")
 110 |             # 尝试使用全局DATA_DIR
 111 |             if DATA_DIR is not None:
 112 |                 data_dir = DATA_DIR
 113 |                 logger.info(f"使用全局定义的DATA_DIR: {data_dir}")
 114 |             else:
 115 |                 # 无法获取数据目录，使用临时目录作为备用
 116 |                 temp_data_dir = PLUGIN_DIR / "temp_data"
 117 |                 temp_data_dir.mkdir(exist_ok=True)
 118 |                 data_dir = temp_data_dir
 119 |                 logger.warning(f"无法获取标准数据目录，使用临时目录: {temp_data_dir}")
 120 | 
 121 |         # 确保资源目录存在
 122 |         resources_dir = data_dir / "resources"
 123 |         resources_dir.mkdir(exist_ok=True)
 124 |         fonts_dir = resources_dir / "fonts"
 125 |         fonts_dir.mkdir(exist_ok=True)
 126 | 
 127 |         # 设置默认字体路径，从插件目录复制到数据目录
 128 |         plugin_font_path = PLUGIN_DIR / "fonts" / "LXGWWenKai-Regular.ttf"
 129 |         data_font_path = fonts_dir / "LXGWWenKai-Regular.ttf"
 130 | 
 131 |         # 如果数据目录中没有字体，尝试从插件目录复制
 132 |         if not data_font_path.exists() and plugin_font_path.exists():
 133 |             try:
 134 |                 shutil.copy(plugin_font_path, data_font_path)
 135 |                 logger.info(f"已将字体文件复制到数据目录: {data_font_path}")
 136 |             except Exception as e:
 137 |                 logger.warning(f"复制字体文件失败: {e}")
 138 | 
 139 |         # 处理字体路径
 140 |         if font_path and os.path.exists(font_path):
 141 |             # 如果是相对路径，可能需要相对于插件目录解析
 142 |             if not os.path.isabs(font_path):
 143 |                 abs_font_path = PLUGIN_DIR / font_path
 144 |                 if os.path.exists(abs_font_path):
 145 |                     self.font_path = str(abs_font_path)
 146 |                     logger.info(f"使用插件目录中的字体: {self.font_path}")
 147 |                 else:
 148 |                     # 尝试相对于数据目录
 149 |                     data_relative_font_path = (
 150 |                         data_dir / "resources" / "fonts" / os.path.basename(font_path)
 151 |                     )
 152 |                     if os.path.exists(data_relative_font_path):
 153 |                         self.font_path = str(data_relative_font_path)
 154 |                         logger.info(f"使用数据目录中的字体: {self.font_path}")
 155 |                     else:
 156 |                         self.font_path = (
 157 |                             font_path  # 使用原始路径，可能是相对于当前工作目录
 158 |                         )
 159 |             else:
 160 |                 self.font_path = font_path  # 使用绝对路径
 161 |         elif data_font_path.exists():
 162 |             self.font_path = str(data_font_path)
 163 |             logger.info(f"使用数据目录中的字体: {self.font_path}")
 164 |         elif plugin_font_path.exists():
 165 |             self.font_path = str(plugin_font_path)
 166 |             logger.info(f"使用插件目录中的字体: {self.font_path}")
 167 |         else:
 168 |             self.font_path = None
 169 |             logger.warning("未找到有效字体文件，将使用系统默认字体")
 170 | 
 171 |         # 处理停用词文件
 172 |         if stop_words_file:
 173 |             # 处理相对路径
 174 |             if not os.path.isabs(stop_words_file):
 175 |                 # 尝试相对于插件目录解析
 176 |                 plugin_stopwords_path = PLUGIN_DIR / stop_words_file
 177 |                 data_stopwords_path = (
 178 |                     data_dir / "resources" / os.path.basename(stop_words_file)
 179 |                 )
 180 | 
 181 |                 # 如果插件目录有文件但数据目录没有，复制过去
 182 |                 if plugin_stopwords_path.exists() and not data_stopwords_path.exists():
 183 |                     try:
 184 |                         shutil.copy(plugin_stopwords_path, data_stopwords_path)
 185 |                         logger.info(
 186 |                             f"已将停用词文件复制到数据目录: {data_stopwords_path}"
 187 |                         )
 188 |                         # 使用数据目录中的文件
 189 |                         stop_words_file = str(data_stopwords_path)
 190 |                     except Exception as e:
 191 |                         logger.warning(f"复制停用词文件失败: {e}")
 192 |                         # 如果复制失败，使用插件目录中的文件
 193 |                         if plugin_stopwords_path.exists():
 194 |                             stop_words_file = str(plugin_stopwords_path)
 195 |                 elif data_stopwords_path.exists():
 196 |                     # 使用数据目录中的文件
 197 |                     stop_words_file = str(data_stopwords_path)
 198 |                 elif plugin_stopwords_path.exists():
 199 |                     # 使用插件目录中的文件
 200 |                     stop_words_file = str(plugin_stopwords_path)
 201 | 
 202 |         self.min_word_length = min_word_length
 203 |         self.stop_words = load_stop_words(stop_words_file)
 204 | 
 205 |         # 保存临时使用的data_dir
 206 |         self._temp_data_dir = data_dir
 207 | 
 208 |         # 初始化词云生成器
 209 |         self._init_wordcloud()
 210 | 
 211 |     def _create_circle_mask(self):
 212 |         """
 213 |         创建圆形蒙版
 214 | 
 215 |         在WordCloud中，蒙版的工作方式与直觉相反：
 216 |         - 值为0的区域允许绘制文字
 217 |         - 值为非0（如255）的区域不允许绘制文字
 218 | 
 219 |         为生成在圆形内部的词云，我们需要：
 220 |         1. 创建一个全为255的数组（默认不允许绘制）
 221 |         2. 将圆形内部区域设置为0（允许绘制）
 222 |         3. 确保圆形外部区域保持为255（不允许绘制）
 223 |         """
 224 |         # 创建一个正方形画布，边长取width和height的最大值确保圆形不会被压缩
 225 |         size = max(self.width, self.height)
 226 | 
 227 |         # 创建一个全255数组作为基础蒙版（默认不允许绘制）
 228 |         mask = np.ones((size, size), dtype=np.uint8) * 255
 229 | 
 230 |         # 计算圆心和半径
 231 |         center = size // 2
 232 |         radius = int(center * 0.9)  # 使用较小的半径避免太靠近边缘
 233 | 
 234 |         # 创建一个网格坐标系用于计算每个点到圆心的距离
 235 |         y, x = np.ogrid[:size, :size]
 236 | 
 237 |         # 计算每个点到圆心的距离的平方
 238 |         dist_from_center = (x - center) ** 2 + (y - center) ** 2
 239 | 
 240 |         # 圆内区域的布尔掩码（True表示在圆内）
 241 |         circle = dist_from_center <= radius**2
 242 | 
 243 |         # 将圆内区域设为0（允许绘制文字），其余区域保持为255（不绘制文字）
 244 |         mask[circle] = 0
 245 | 
 246 |         # 验证蒙版：记录圆内（值为0）像素的数量和总像素数
 247 |         circle_pixels = np.sum(mask == 0)
 248 |         total_pixels = size * size
 249 |         circle_ratio = circle_pixels / total_pixels
 250 | 
 251 |         logger.info(f"生成圆形蒙版: 大小={size}x{size}, 半径={radius}")
 252 |         logger.info(
 253 |             f"圆内像素数量: {circle_pixels}, 总像素数: {total_pixels}, 比例: {circle_ratio:.2f}"
 254 |         )
 255 | 
 256 |         return mask
 257 | 
 258 |     def _create_diamond_mask(self, width: int, height: int):
 259 |         """创建菱形蒙版 (白色背景，黑色形状 - 词云绘制区域)
 260 |         词云库通常期望蒙版中值为0的区域绘制文字，非0区域不绘制。
 261 |         所以我们画黑色菱形在白色背景上，然后转换时黑色变0，白色变255.
 262 |         """
 263 |         img = Image.new("L", (width, height), 255)  # 白色背景 (不绘制区域)
 264 |         draw = ImageDraw.Draw(img)
 265 |         # 定义菱形的四个顶点
 266 |         # (width/2, 0), (width, height/2), (width/2, height), (0, height/2)
 267 |         points = [
 268 |             (width // 2, 0),
 269 |             (width, height // 2),
 270 |             (width // 2, height - 1),  # height-1 to avoid going out of bounds
 271 |             (0, height // 2),
 272 |         ]
 273 |         draw.polygon(points, fill=0)  # 黑色菱形 (绘制区域)
 274 |         mask = np.array(img)
 275 |         logger.info(f"生成菱形蒙版: 大小={width}x{height}")
 276 |         return mask
 277 | 
 278 |     def _create_triangle_mask(self, width: int, height: int):
 279 |         """创建上三角形蒙版 (白色背景，黑色形状 - 词云绘制区域)"""
 280 |         img = Image.new("L", (width, height), 255)  # 白色背景
 281 |         draw = ImageDraw.Draw(img)
 282 |         # 定义上三角形的三个顶点
 283 |         # (width/2, 0), (width, height), (0, height)
 284 |         points = [
 285 |             (width // 2, 0),
 286 |             (width - 1, height - 1),  # width-1, height-1 to avoid going out of bounds
 287 |             (0, height - 1),
 288 |         ]
 289 |         draw.polygon(points, fill=0)  # 黑色三角形
 290 |         mask = np.array(img)
 291 |         logger.info(f"生成上三角形蒙版: 大小={width}x{height}")
 292 |         return mask
 293 | 
 294 |     def _create_cloud_mask(self, width: int, height: int):
 295 |         """创建底部平坦、顶部具有3-4个起伏圆弧的云朵形状蒙版"""
 296 |         img = Image.new("L", (width, height), 255)  # 白色背景 (不绘制区域)
 297 |         draw = ImageDraw.Draw(img)
 298 | 
 299 |         y_bottom_line_factor = 0.7  # 平底从高度的70%处开始
 300 | 
 301 |         # 定义构成顶部起伏的椭圆参数 (cx, cy, rx, ry)
 302 |         # cx: 中心点X轴比例, cy: 中心点Y轴比例
 303 |         # rx: X轴半径比例, ry: Y轴半径比例
 304 | 
 305 |         ellipses_params = [
 306 |             # 尝试构成3个主要、较宽的顶部凸起，以及一个更小的顶部点缀
 307 |             # 主要凸起1 (中间，最高)
 308 |             (0.50, 0.35, 0.25, 0.22),  # Y中心0.35, Y半径0.22 -> 顶部在0.13, 底部在0.57
 309 |             # 主要凸起2 (左侧)
 310 |             (
 311 |                 0.25,
 312 |                 0.45,
 313 |                 0.28,
 314 |                 0.20,
 315 |             ),  # Y中心0.45, Y半径0.20 -> 顶部在0.25, 底部在0.65. X左边缘0.25-0.28 = -0.03 (会裁剪到0)
 316 |             # 主要凸起3 (右侧)
 317 |             (
 318 |                 0.75,
 319 |                 0.45,
 320 |                 0.28,
 321 |                 0.20,
 322 |             ),  # Y中心0.45, Y半径0.20 -> 顶部在0.25, 底部在0.65. X右边缘0.75+0.28 = 1.03 (会裁剪到1)
 323 |             # 额外的顶部小凸起，增加起伏感 (可选，如果上面3个效果够好，可以移除或调整)
 324 |             (0.50, 0.20, 0.12, 0.10),  # 更小的，在中央凸起之上
 325 |         ]
 326 | 
 327 |         min_x_coord = width
 328 |         max_x_coord = 0
 329 | 
 330 |         for cx_f, cy_f, rx_f, ry_f in ellipses_params:
 331 |             center_x = int(width * cx_f)
 332 |             center_y = int(height * cy_f)
 333 |             radius_x = max(1, int(width * rx_f))
 334 |             radius_y = max(1, int(height * ry_f))
 335 | 
 336 |             bbox = (
 337 |                 center_x - radius_x,
 338 |                 center_y - radius_y,
 339 |                 center_x + radius_x,
 340 |                 center_y + radius_y,
 341 |             )
 342 |             draw.ellipse(bbox, fill=0)  # 值为0的区域是词云绘制区
 343 | 
 344 |             min_x_coord = min(min_x_coord, center_x - radius_x)
 345 |             max_x_coord = max(max_x_coord, center_x + radius_x)
 346 | 
 347 |         # 确保 min_x 和 max_x 在图像范围内
 348 |         min_x_coord = max(0, min_x_coord)
 349 |         max_x_coord = min(width - 1, max_x_coord)
 350 | 
 351 |         # 绘制平坦的底部矩形
 352 |         if min_x_coord < max_x_coord:  # 只有当云朵有宽度时才画底部
 353 |             flat_bottom_y_start = int(height * y_bottom_line_factor)
 354 | 
 355 |             fill_rect_bbox = (
 356 |                 min_x_coord,
 357 |                 flat_bottom_y_start,
 358 |                 max_x_coord,
 359 |                 height - 1,  # 延伸到图像底部
 360 |             )
 361 |             draw.rectangle(fill_rect_bbox, fill=0)
 362 | 
 363 |         mask = np.array(img)
 364 |         logger.info(
 365 |             f"通过程序化绘制生成顶部起伏、底部平坦的云朵蒙版: 大小={width}x{height}"
 366 |         )
 367 |         return mask
 368 | 
 369 |     def _init_wordcloud(self) -> None:
 370 |         """初始化词云生成器"""
 371 |         # 如果形状设置为圆形，创建圆形蒙版
 372 |         mask = None
 373 |         processed_custom_mask = False  # 标记是否成功处理了自定义蒙版
 374 | 
 375 |         # 优先处理自定义蒙版
 376 |         if self.custom_mask_path:
 377 |             mask_image_path = None
 378 |             # 检查是绝对路径还是相对路径
 379 |             if os.path.isabs(self.custom_mask_path):
 380 |                 mask_image_path = Path(self.custom_mask_path)
 381 |             else:
 382 |                 # 相对路径，相对于插件数据目录下的 resources/images/
 383 |                 if self._temp_data_dir:  # _temp_data_dir 在 __init__ 中设置
 384 |                     mask_image_path = (
 385 |                         self._temp_data_dir
 386 |                         / "resources"
 387 |                         / "images"
 388 |                         / self.custom_mask_path
 389 |                     )
 390 |                 else:  # Fallback if _temp_data_dir is somehow not set
 391 |                     mask_image_path = (
 392 |                         PLUGIN_DIR / "resources" / "images" / self.custom_mask_path
 393 |                     )
 394 | 
 395 |             if (
 396 |                 mask_image_path
 397 |                 and mask_image_path.exists()
 398 |                 and mask_image_path.is_file()
 399 |             ):
 400 |                 try:
 401 |                     logger.info(f"加载自定义蒙版图片: {mask_image_path}")
 402 |                     custom_mask_image = Image.open(mask_image_path)
 403 |                     mask = np.array(custom_mask_image)
 404 |                     # 确保蒙版是2D的 (灰度图或alpha通道)
 405 |                     if mask.ndim == 3:
 406 |                         # 如果是RGB(A)，尝试取一个通道，比如红色，或者转换为灰度
 407 |                         # WordCloud 通常期望蒙版是单通道的，非零表示区域，零表示空白
 408 |                         # 但更常见的做法是白色(255)为忽略区域，黑色(0)或深色为绘制区域
 409 |                         # 如果是RGBA，第四个通道是alpha，也可以用。这里我们简单转灰度
 410 |                         # Image.open().convert('L') 之后再 np.array() 是更标准做法
 411 |                         # 为了安全，重新用 convert('L') 加载
 412 |                         custom_mask_image_gray = Image.open(mask_image_path).convert(
 413 |                             "L"
 414 |                         )
 415 |                         mask = np.array(custom_mask_image_gray)
 416 |                         logger.info("自定义蒙版已转换为灰度图.")
 417 | 
 418 |                     # 检查蒙版的值范围，wordcloud期望非绘制区域为255
 419 |                     # 如果蒙版主要是深色背景，浅色图案，可能需要反转
 420 |                     # 例如，如果用户提供的是黑底白云的图片，需要转换
 421 |                     # 这里我们假设用户提供的图片是白底黑图案 (黑色区域为词云形状)
 422 |                     # wordcloud库会将蒙版中值为0或接近0的区域视为绘制区域，255为忽略区域
 423 |                     # 所以，如果我们的图片是黑形状白背景，Pillow读入后黑是0，白是255，正好符合预期
 424 |                     logger.info(
 425 |                         f"自定义蒙版加载成功，形状: {mask.shape}, 类型: {mask.dtype}"
 426 |                     )
 427 |                     processed_custom_mask = True
 428 |                 except Exception as e:
 429 |                     logger.error(
 430 |                         f"加载或处理自定义蒙版图片失败: {mask_image_path}, 错误: {e}"
 431 |                     )
 432 |                     mask = None  # 加载失败，不使用蒙版
 433 |             else:
 434 |                 logger.warning(
 435 |                     f"自定义蒙版图片路径无效或文件不存在: {self.custom_mask_path} (解析后路径: {mask_image_path})"
 436 |                 )
 437 | 
 438 |         # 如果没有成功处理自定义蒙版，再根据 shape 参数创建预设蒙版
 439 |         if not processed_custom_mask:
 440 |             if self.shape == "circle":
 441 |                 mask = self._create_circle_mask()
 442 |             elif self.shape == "diamond":
 443 |                 mask = self._create_diamond_mask(self.width, self.height)
 444 |             elif self.shape == "triangle_up":
 445 |                 mask = self._create_triangle_mask(self.width, self.height)
 446 |             elif self.shape == "cloud":
 447 |                 mask = self._create_cloud_mask(self.width, self.height)
 448 |             # 对于 "rectangle" 或其他未指定蒙版的形状，mask 保持为 None，词云将默认为矩形        # 词云参数
 449 |         wordcloud_params = {
 450 |             "width": self.width,
 451 |             "height": self.height,
 452 |             "max_words": self.max_words,
 453 |             "background_color": self.background_color,
 454 |             "colormap": self.colormap,
 455 |             "min_font_size": self.min_font_size,
 456 |             "max_font_size": self.max_font_size,
 457 |             "random_state": 42,
 458 |             "collocations": False,  # 避免重复显示词组
 459 |             "normalize_plurals": False,
 460 |             "mask": mask,  # 设置蒙版
 461 |             "prefer_horizontal": 0.9,  # 调整为90%水平显示，增加布局多样性
 462 |             "repeat": False,  # 不重复使用词以填满空间，避免文字出现在不应该出现的地方
 463 |             "mode": "RGB",  # 使用RGB模式，避免与轮廓绘制时的通道不匹配问题
 464 |         }
 465 | 
 466 |         # 添加轮廓效果，增强形状
 467 |         if self.shape == "circle":
 468 |             # 由于通道不匹配问题，暂时禁用轮廓效果
 469 |             # wordcloud_params['contour_width'] = 1
 470 |             # wordcloud_params['contour_color'] = self.background_color
 471 |             pass
 472 | 
 473 |         # 如果提供了字体路径，则使用它
 474 |         if self.font_path and os.path.exists(self.font_path):
 475 |             wordcloud_params["font_path"] = self.font_path
 476 | 
 477 |         self.wordcloud = WordCloud(**wordcloud_params)
 478 | 
 479 |     def process_text(self, text: str) -> List[str]:
 480 |         """
 481 |         处理文本，进行分词和过滤
 482 | 
 483 |         Args:
 484 |             text: 输入文本
 485 | 
 486 |         Returns:
 487 |             处理后的词语列表
 488 |         """
 489 |         return segment_text(text, self.min_word_length, self.stop_words)
 490 | 
 491 |     def process_texts(self, texts: List[str]) -> Dict[str, int]:
 492 |         """
 493 |         处理多条文本，统计词频
 494 | 
 495 |         Args:
 496 |             texts: 文本列表
 497 | 
 498 |         Returns:
 499 |             词频统计字典
 500 |         """
 501 |         # 合并所有文本并分词
 502 |         all_words = []
 503 |         for text in texts:
 504 |             words = self.process_text(text)
 505 |             all_words.extend(words)
 506 | 
 507 |         # 统计词频
 508 |         word_counts = Counter(all_words)
 509 |         return dict(word_counts)
 510 | 
 511 |     def _filter_by_frequency(self, word_counts: Dict[str, int]) -> Dict[str, int]:
 512 |         """
 513 |         根据最小词频过滤词汇。
 514 | 
 515 |         Args:
 516 |             word_counts: 原始词频统计。
 517 | 
 518 |         Returns:
 519 |             过滤后的词频统计。
 520 |         """
 521 |         if self.min_word_frequency <= 1:
 522 |             return word_counts  # 如果最小词频设置为1或更小，则不进行过滤
 523 | 
 524 |         filtered_counts = {
 525 |             word: count
 526 |             for word, count in word_counts.items()
 527 |             if count >= self.min_word_frequency
 528 |         }
 529 |         logger.info(
 530 |             f"词频过滤 (min_freq={self.min_word_frequency}): 原始词汇 {len(word_counts)}个 -> 过滤后 {len(filtered_counts)}个"
 531 |         )
 532 |         return filtered_counts
 533 | 
 534 |     def _add_timestamp_to_image(
 535 |         self, img: Image.Image, timestamp: Optional[int] = None
 536 |     ) -> Image.Image:
 537 |         """
 538 |         向图片添加时间戳水印
 539 | 
 540 |         Args:
 541 |             img: 原始图片
 542 |             timestamp: 时间戳，默认为当前时间
 543 | 
 544 |         Returns:
 545 |             添加水印后的图片
 546 |         """
 547 |         if timestamp is None:
 548 |             timestamp = int(time.time())
 549 | 
 550 |         # 格式化时间戳
 551 |         time_str = f"生成时间: {datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')}"
 552 | 
 553 |         # 创建可绘制对象
 554 |         draw = ImageDraw.Draw(img, "RGBA")
 555 | 
 556 |         # 字体大小
 557 |         font_size = 14
 558 | 
 559 |         try:
 560 |             # 尝试加载自定义字体
 561 |             font = None
 562 |             try:
 563 |                 if self.font_path and os.path.exists(self.font_path):
 564 |                     # 尝试加载指定的字体
 565 |                     try:
 566 |                         font = ImageFont.truetype(self.font_path, font_size)
 567 |                     except:
 568 |                         # 如果加载失败，尝试使用默认字体
 569 |                         font = ImageFont.load_default()
 570 |                         logger.warning(f"加载指定字体失败: {self.font_path}")
 571 |                 else:
 572 |                     # 尝试从系统中查找可用的中文字体
 573 |                     system_fonts = [
 574 |                         # Windows中文字体
 575 |                         "C:/Windows/Fonts/simhei.ttf",  # 黑体
 576 |                         "C:/Windows/Fonts/simsun.ttc",  # 宋体
 577 |                         "C:/Windows/Fonts/simkai.ttf",  # 楷体
 578 |                         "C:/Windows/Fonts/msyh.ttc",  # 微软雅黑
 579 |                         # Linux中文字体
 580 |                         "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf",
 581 |                         "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
 582 |                         # macOS中文字体
 583 |                         "/System/Library/Fonts/PingFang.ttc",
 584 |                     ]
 585 | 
 586 |                     for font_path in system_fonts:
 587 |                         try:
 588 |                             if os.path.exists(font_path):
 589 |                                 font = ImageFont.truetype(font_path, font_size)
 590 |                                 logger.debug(f"成功加载系统字体: {font_path}")
 591 |                                 break
 592 |                         except:
 593 |                             continue
 594 |             except:
 595 |                 font = ImageFont.load_default()
 596 |         except Exception as e:
 597 |             logger.warning(f"加载字体失败: {e}，将使用默认字体")
 598 |             font = ImageFont.load_default()
 599 | 
 600 |         # 添加水印位置偏移量，确保文字放置位置合适
 601 |         margin = 10
 602 | 
 603 |         # 获取文本大小，用于定位
 604 |         try:
 605 |             if hasattr(font, "getbbox"):
 606 |                 text_width, text_height = font.getbbox(time_str)[2:]
 607 |             else:
 608 |                 text_width, text_height = font.getsize(time_str)
 609 |         except:
 610 |             # 如果无法获取文本大小，使用估计值
 611 |             text_width, text_height = len(time_str) * font_size // 2, font_size
 612 | 
 613 |         # 计算文字位置 - 左下角
 614 |         position = (margin, img.height - text_height - margin)
 615 | 
 616 |         # 检查背景颜色并确定文字颜色
 617 |         try:
 618 |             # 获取左下角区域的主要颜色
 619 |             sample_box = (0, img.height - text_height * 2, text_width * 2, img.height)
 620 |             sample_img = img.crop(sample_box)
 621 | 
 622 |             # 检查图像模式，如果有Alpha通道，转换为RGB
 623 |             if sample_img.mode == "RGBA":
 624 |                 # 创建白色背景
 625 |                 bg = Image.new("RGB", sample_img.size, (255, 255, 255))
 626 |                 # 复合Alpha通道
 627 |                 sample_img = Image.alpha_composite(
 628 |                     bg.convert("RGBA"), sample_img
 629 |                 ).convert("RGB")
 630 | 
 631 |             # 计算平均亮度
 632 |             avg_rgb = ImageStat.Stat(sample_img).mean
 633 |             brightness = sum(avg_rgb) / len(avg_rgb)
 634 |             is_dark_bg = brightness < 128
 635 | 
 636 |             # 根据背景选择文字颜色
 637 |             text_color = (255, 255, 255) if is_dark_bg else (0, 0, 0)
 638 |             bg_color = (0, 0, 0, 180) if is_dark_bg else (255, 255, 255, 180)
 639 | 
 640 |             logger.debug(
 641 |                 f"检测到{'深色' if is_dark_bg else '浅色'}背景，亮度值: {brightness:.1f}"
 642 |             )
 643 | 
 644 |         except Exception as e:
 645 |             # 出错时使用默认设置
 646 |             logger.warning(f"检测背景颜色失败: {e}，使用默认颜色")
 647 |             # 默认假设是深色背景
 648 |             text_color = (255, 255, 255)  # 白色文字
 649 |             bg_color = (0, 0, 0, 180)  # 半透明黑色背景
 650 | 
 651 |         # 使用半透明背景增加可读性
 652 |         bg_padding = 4
 653 |         bg_box = [
 654 |             position[0] - bg_padding,
 655 |             position[1] - bg_padding,
 656 |             position[0] + text_width + bg_padding,
 657 |             position[1] + text_height + bg_padding,
 658 |         ]
 659 | 
 660 |         # 绘制半透明背景
 661 |         draw.rectangle(bg_box, fill=bg_color)
 662 | 
 663 |         # 绘制文字
 664 |         draw.text(position, time_str, fill=text_color, font=font)
 665 | 
 666 |         return img
 667 | 
 668 |     def generate_wordcloud(
 669 |         self,
 670 |         word_counts: Dict[str, int],
 671 |         session_id: str,
 672 |         timestamp: Optional[int] = None,
 673 |         title: Optional[str] = None,
 674 |     ) -> Tuple[str, Path]:
 675 |         """
 676 |         生成词云图片
 677 | 
 678 |         Args:
 679 |             word_counts: 词频统计
 680 |             session_id: 会话ID
 681 |             timestamp: 时间戳，为None则使用当前时间
 682 |             title: 词云标题
 683 | 
 684 |         Returns:
 685 |             生成的图片路径(字符串), 路径对象
 686 |         """
 687 |         if timestamp is None:
 688 |             timestamp = int(time.time())
 689 | 
 690 |         if not word_counts:
 691 |             raise ValueError("无有效词频数据，无法生成词云")
 692 | 
 693 |         # 在生成词云前，根据配置的最小词频过滤词汇
 694 |         filtered_word_counts = self._filter_by_frequency(word_counts)
 695 | 
 696 |         if not filtered_word_counts:
 697 |             # 如果过滤后没有词了，可以抛出错误或者生成一个提示性的空图片
 698 |             # 这里我们选择抛出错误，因为通常这意味着数据不足或过滤条件太严格
 699 |             logger.warning("根据最小词频过滤后，没有足够的词汇来生成词云。")
 700 |             raise ValueError("过滤后无有效词频数据，无法生成词云")
 701 | 
 702 |         # 获取图片存储路径
 703 |         image_path = get_image_path(session_id, timestamp)
 704 | 
 705 |         # 创建锁的键名
 706 |         lock_key = f"wordcloud_{session_id}_{timestamp}"
 707 | 
 708 |         # 获取锁对象
 709 |         lock = _get_lock_for_key(lock_key)
 710 | 
 711 |         # 尝试获取锁
 712 |         if not lock.acquire(blocking=False):
 713 |             logger.warning(
 714 |                 f"已有其他线程正在生成相同的词云 {session_id}_{timestamp}，跳过本次生成"
 715 |             )
 716 | 
 717 |             # 如果文件已存在，直接返回路径
 718 |             if image_path.exists():
 719 |                 logger.info(f"使用已存在的词云图片: {image_path}")
 720 |                 return str(image_path), image_path
 721 | 
 722 |             # 等待一段时间看是否生成了
 723 |             try:
 724 |                 wait_start = time.time()
 725 |                 while time.time() - wait_start < 5.0:  # 最多等待5秒
 726 |                     time.sleep(0.5)
 727 |                     if image_path.exists():
 728 |                         logger.info(f"等待后找到了词云图片: {image_path}")
 729 |                         return str(image_path), image_path
 730 | 
 731 |                 # 如果等待超时仍未生成，则抛出异常
 732 |                 raise ValueError("等待词云生成超时，请稍后再试")
 733 |             except Exception as e:
 734 |                 logger.error(f"等待词云生成时出错: {e}")
 735 |                 raise ValueError("词云生成被其他任务占用，请稍后再试")
 736 | 
 737 |         try:
 738 |             # 生成词云
 739 |             self.wordcloud.generate_from_frequencies(
 740 |                 filtered_word_counts
 741 |             )  # 使用过滤后的词频
 742 | 
 743 |             # 确保目录存在
 744 |             image_path.parent.mkdir(parents=True, exist_ok=True)
 745 | 
 746 |             # 先保存词云图像到临时文件，避免直接操作wordcloud对象导致维度不匹配
 747 |             temp_path = image_path.parent / f"temp_{image_path.name}"
 748 |             self.wordcloud.to_file(str(temp_path))
 749 | 
 750 |             # 读取保存的图像
 751 |             wordcloud_img = np.array(Image.open(temp_path))
 752 | 
 753 |             # 使用matplotlib创建带标题的完整图像
 754 |             fig_width, fig_height = 10, 6.5
 755 |             dpi = 150
 756 | 
 757 |             # 创建带有背景色的图表
 758 |             fig = plt.figure(
 759 |                 figsize=(fig_width, fig_height),
 760 |                 facecolor=self.background_color,
 761 |                 dpi=dpi,
 762 |             )
 763 |             plt.rcParams.update({"figure.autolayout": True})
 764 |             ax = plt.axes()
 765 |             ax.set_facecolor(self.background_color)
 766 |             ax.set_position([0, 0, 1, 0.9])  # 为标题留出少量空间
 767 | 
 768 |             # 去除边框和刻度
 769 |             plt.axis("off")
 770 |             plt.box(False)
 771 |             plt.tight_layout(pad=0.1)  # 减少内边距
 772 | 
 773 |             # 绘制词云图像
 774 |             plt.imshow(wordcloud_img, interpolation="bilinear")
 775 | 
 776 |             # 设置标题，使用对比色
 777 |             if title:
 778 |                 # 选择与背景相反的颜色
 779 |                 title_color = (
 780 |                     "white" if self._is_dark_color(self.background_color) else "black"
 781 |                 )
 782 | 
 783 |                 logger.info(
 784 |                     f"设置词云标题: {title}, 背景色: {self.background_color}, 标题颜色: {title_color}"
 785 |                 )
 786 | 
 787 |                 # 设置中文标题字体
 788 |                 if self.font_path and os.path.exists(self.font_path):
 789 |                     try:
 790 |                         font_prop = FontProperties(fname=self.font_path)
 791 |                         plt.title(
 792 |                             title,
 793 |                             fontproperties=font_prop,
 794 |                             fontsize=16,
 795 |                             pad=10,
 796 |                             color=title_color,
 797 |                         )
 798 |                     except Exception as e:
 799 |                         logger.warning(f"使用自定义字体设置标题失败: {e}")
 800 |                         plt.title(title, fontsize=16, pad=10, color=title_color)
 801 |                 else:
 802 |                     plt.title(title, fontsize=16, pad=10, color=title_color)
 803 | 
 804 |                 # 如果是深色背景，添加文字边框增强可读性
 805 |                 if self._is_dark_color(self.background_color):
 806 |                     try:
 807 |                         # 将当前标题获取出来
 808 |                         title_obj = ax.get_title()
 809 |                         # 清除原标题
 810 |                         ax.set_title("")
 811 |                         # 重新设置带边框的标题
 812 |                         plt.title(
 813 |                             title,
 814 |                             fontproperties=font_prop
 815 |                             if "font_prop" in locals()
 816 |                             else None,
 817 |                             fontsize=16,
 818 |                             pad=10,
 819 |                             color=title_color,
 820 |                             bbox=dict(
 821 |                                 facecolor=self.background_color,
 822 |                                 alpha=0.8,
 823 |                                 edgecolor="white",
 824 |                                 boxstyle="round,pad=0.5",
 825 |                             ),
 826 |                         )
 827 |                     except Exception as title_ex:
 828 |                         logger.warning(f"设置标题边框失败: {title_ex}")
 829 |                         # 恢复原标题
 830 |                         if "title_obj" in locals():
 831 |                             ax.set_title(title_obj)
 832 | 
 833 |             # 保存图片
 834 |             plt.savefig(
 835 |                 image_path,
 836 |                 bbox_inches="tight",
 837 |                 pad_inches=0.2,  # 减少边距
 838 |                 dpi=dpi,
 839 |                 facecolor=self.background_color,
 840 |             )
 841 |             plt.close(fig)
 842 | 
 843 |             # 删除临时文件
 844 |             try:
 845 |                 if temp_path.exists():
 846 |                     os.remove(temp_path)
 847 |             except Exception as e:
 848 |                 logger.warning(f"删除临时文件失败: {e}")
 849 | 
 850 |             # 添加时间戳水印
 851 |             img = Image.open(image_path)
 852 |             final_image = self._add_timestamp_to_image(img, timestamp)
 853 |             final_image.save(image_path)
 854 | 
 855 |             # 输出图片信息
 856 |             logger.info(f"词云图片已保存至: {image_path}")
 857 | 
 858 |             return str(image_path), image_path
 859 |         except Exception as e:
 860 |             logger.error(f"生成词云时出错: {e}")
 861 |             logger.error(traceback.format_exc())
 862 |             raise
 863 |         finally:
 864 |             # 释放锁
 865 |             lock.release()
 866 | 
 867 |     def _is_dark_color(self, color_str: str) -> bool:
 868 |         """
 869 |         判断颜色是否为深色
 870 | 
 871 |         Args:
 872 |             color_str: 颜色字符串，可以是颜色名称或十六进制值
 873 | 
 874 |         Returns:
 875 |             是否为深色
 876 |         """
 877 |         # 处理常见颜色名称
 878 |         dark_color_names = [
 879 |             "black",
 880 |             "darkblue",
 881 |             "darkgreen",
 882 |             "darkcyan",
 883 |             "darkred",
 884 |             "darkmagenta",
 885 |             "darkgray",
 886 |             "darkgrey",
 887 |             "navy",
 888 |             "green",
 889 |             "teal",
 890 |             "maroon",
 891 |             "purple",
 892 |             "indigo",
 893 |             "midnightblue",
 894 |             "darkslategray",
 895 |             "darkslategrey",
 896 |             "dimgray",
 897 |             "dimgrey",
 898 |         ]
 899 | 
 900 |         light_color_names = [
 901 |             "white",
 902 |             "lightgray",
 903 |             "lightgrey",
 904 |             "whitesmoke",
 905 |             "snow",
 906 |             "ivory",
 907 |             "floralwhite",
 908 |             "linen",
 909 |             "cornsilk",
 910 |             "seashell",
 911 |             "lavenderblush",
 912 |             "papayawhip",
 913 |             "blanchedalmond",
 914 |         ]
 915 | 
 916 |         # 首先检查确定的颜色名称
 917 |         color_lower = color_str.lower()
 918 |         if color_lower in dark_color_names:
 919 |             logger.debug(f"颜色 {color_str} 在已知深色列表中")
 920 |             return True
 921 |         if color_lower in light_color_names:
 922 |             logger.debug(f"颜色 {color_str} 在已知浅色列表中")
 923 |             return False
 924 | 
 925 |         # 处理十六进制颜色值
 926 |         if color_str.startswith("#"):
 927 |             try:
 928 |                 # 去掉#号并解析RGB值
 929 |                 r, g, b = (
 930 |                     int(color_str[1:3], 16),
 931 |                     int(color_str[3:5], 16),
 932 |                     int(color_str[5:7], 16),
 933 |                 )
 934 |                 # 计算亮度 (使用更精确的亮度计算公式)
 935 |                 # 这个公式来自W3C标准：https://www.w3.org/TR/WCAG20-TECHS/G17.html
 936 |                 brightness = (r * 299 + g * 587 + b * 114) / 1000
 937 |                 is_dark = brightness < 128
 938 |                 logger.debug(
 939 |                     f"颜色 {color_str} 亮度值: {brightness:.1f}, 判定为{'深色' if is_dark else '浅色'}"
 940 |                 )
 941 |                 return is_dark
 942 |             except Exception as e:
 943 |                 logger.warning(f"解析十六进制颜色失败: {color_str}, {e}")
 944 |                 return False  # 解析失败，默认为浅色
 945 | 
 946 |         # 尝试使用matplotlib的颜色名称
 947 |         try:
 948 |             from matplotlib.colors import to_rgb
 949 | 
 950 |             rgb = to_rgb(color_str)
 951 |             r, g, b = int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255)
 952 |             brightness = (r * 299 + g * 587 + b * 114) / 1000
 953 |             is_dark = brightness < 128
 954 |             logger.debug(
 955 |                 f"颜色名称 {color_str} 转换为RGB: {r},{g},{b}, 亮度值: {brightness:.1f}, 判定为{'深色' if is_dark else '浅色'}"
 956 |             )
 957 |             return is_dark
 958 |         except Exception as e:
 959 |             logger.warning(f"解析颜色名称失败: {color_str}, {e}")
 960 |             return False  # 解析失败，默认为浅色
 961 | 
 962 |     def _filter_word_frequencies(self, word_counts: Dict[str, int]) -> Dict[str, int]:
 963 |         """
 964 |         过滤词频，移除频率过低的词汇，让词云更加利落
 965 | 
 966 |         Args:
 967 |             word_counts: 原始词频统计
 968 | 
 969 |         Returns:
 970 |             过滤后的词频统计
 971 |         """
 972 |         if not word_counts:
 973 |             return word_counts
 974 | 
 975 |         # 计算词频统计信息
 976 |         frequencies = list(word_counts.values())
 977 |         total_words = len(frequencies)
 978 |         max_freq = max(frequencies)
 979 | 
 980 |         # 动态计算最小频率阈值
 981 |         # 如果词汇总数很多，设置更严格的过滤条件
 982 |         if total_words > self.max_words * 3:
 983 |             # 词汇过多时，使用更严格的过滤
 984 |             min_freq_threshold = max(2, max_freq * 0.02)  # 至少2次，或最高频的2%
 985 |         elif total_words > self.max_words * 2:
 986 |             # 词汇较多时，适中过滤
 987 |             min_freq_threshold = max(1, max_freq * 0.01)  # 至少1次，或最高频的1%
 988 |         else:
 989 |             # 词汇不多时，轻度过滤
 990 |             min_freq_threshold = 1
 991 | 
 992 |         # 过滤低频词
 993 |         filtered_counts = {
 994 |             word: count
 995 |             for word, count in word_counts.items()
 996 |             if count >= min_freq_threshold
 997 |         }
 998 | 
 999 |         # 如果过滤后词汇仍然过多，取频率最高的词汇
1000 |         if len(filtered_counts) > self.max_words:
1001 |             # 按频率排序，取前max_words个
1002 |             sorted_words = sorted(
1003 |                 filtered_counts.items(), key=lambda x: x[1], reverse=True
1004 |             )
1005 |             filtered_counts = dict(sorted_words[: self.max_words])
1006 | 
1007 |         logger.info(
1008 |             f"词频过滤: 原始词汇{total_words}个 -> 过滤后{len(filtered_counts)}个，最小频率阈值: {min_freq_threshold}"
1009 |         )
1010 | 
1011 |         return filtered_counts
1012 | 


--------------------------------------------------------------------------------
/wordcloud_core/history_manager.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 聊天历史记录管理器
  3 | """
  4 | import re
  5 | import asyncio
  6 | from typing import List, Dict, Any, Optional, Tuple
  7 | import traceback
  8 | 
  9 | from sqlalchemy import Column, Integer, String, Boolean, Index, select, func
 10 | from sqlalchemy.orm import DeclarativeBase
 11 | from sqlalchemy.ext.asyncio import AsyncSession
 12 | 
 13 | from astrbot.api import logger
 14 | from astrbot.api.event import AstrMessageEvent
 15 | from astrbot.api.star import Context
 16 | 
 17 | from ..utils import get_current_timestamp, get_day_start_end_timestamps
 18 | 
 19 | 
 20 | class Base(DeclarativeBase):
 21 |     pass
 22 | 
 23 | 
 24 | class MessageHistory(Base):
 25 |     """聊天消息历史记录模型"""
 26 |     __tablename__ = 'wordcloud_message_history'
 27 | 
 28 |     id = Column(Integer, primary_key=True, autoincrement=True)
 29 |     session_id = Column(String, nullable=False)
 30 |     sender_id = Column(String, nullable=False)
 31 |     sender_name = Column(String)
 32 |     message = Column(String, nullable=False)
 33 |     timestamp = Column(Integer, nullable=False)
 34 |     is_group = Column(Boolean, nullable=False)
 35 | 
 36 |     # 索引
 37 |     __table_args__ = (
 38 |         Index('idx_wordcloud_session_id', 'session_id'),
 39 |         Index('idx_wordcloud_timestamp', 'timestamp'),
 40 |         Index('idx_wordcloud_session_timestamp', 'session_id', 'timestamp'),
 41 |     )
 42 | 
 43 | 
 44 | class HistoryManager:
 45 |     """聊天历史记录管理器类"""
 46 | 
 47 |     def __init__(self, context: Context):
 48 |         """
 49 |         初始化历史记录管理器
 50 | 
 51 |         Args:
 52 |             context: AstrBot上下文
 53 |         """
 54 |         self.context = context
 55 |         self.db = self.context.get_db()
 56 |         
 57 |         # 初始化数据库
 58 |         asyncio.create_task(self._ensure_table())
 59 | 
 60 |     async def _ensure_table(self) -> None:
 61 |         """确保数据库中有消息历史表"""
 62 |         try:
 63 |             # 使用异步session创建表
 64 |             async with self.db.get_db() as session:
 65 |                 conn = await session.connection()
 66 |                 await conn.run_sync(Base.metadata.create_all)
 67 |             logger.info("WordCloud历史消息表和索引创建成功或已存在")
 68 |         except Exception as e:
 69 |             logger.error(f"创建WordCloud历史消息表失败: {e}")
 70 |             logger.error(traceback.format_exc())
 71 | 
 72 | 
 73 | 
 74 |     async def save_message(self, event: AstrMessageEvent) -> bool:
 75 |         """
 76 |         保存消息到历史记录
 77 | 
 78 |         Args:
 79 |             event: 消息事件
 80 | 
 81 |         Returns:
 82 |             是否保存成功
 83 |         """
 84 |         try:
 85 |             # 获取基本信息
 86 |             sender_id = event.get_sender_id()
 87 |             sender_name = event.get_sender_name()
 88 |             message = event.message_str if hasattr(event, "message_str") else None
 89 |             timestamp = get_current_timestamp()
 90 | 
 91 |             group_id_val = event.get_group_id()
 92 |             is_group = bool(group_id_val)
 93 | 
 94 |             # 构建会话ID
 95 |             session_id_to_save: str
 96 |             if group_id_val:  # 群聊消息
 97 |                 platform_name = event.get_platform_name() or "unknown_platform"
 98 |                 session_id_to_save = f"{platform_name}_group_{group_id_val}"
 99 |             else:  # 私聊消息
100 |                 session_id_to_save = event.unified_msg_origin
101 | 
102 |             # 处理空消息
103 |             if message is None:
104 |                 try:
105 |                     # 尝试从消息链中提取文本
106 |                     if hasattr(event, "get_messages") and callable(getattr(event, "get_messages")):
107 |                         messages = event.get_messages()
108 |                         text_parts = []
109 |                         for msg in messages:
110 |                             if hasattr(msg, "text") and msg.text:
111 |                                 text_parts.append(msg.text)
112 |                         if text_parts:
113 |                             message = " ".join(text_parts)
114 | 
115 |                     # 尝试从message_obj获取内容
116 |                     if not message and hasattr(event, "message_obj"):
117 |                         if hasattr(event.message_obj, "raw_message"):
118 |                             message = event.message_obj.raw_message
119 |                         elif hasattr(event.message_obj, "message"):
120 |                             message = str(event.message_obj.message)
121 |                 except Exception as e:
122 |                     logger.debug(f"尝试提取消息内容失败: {e}")
123 | 
124 |                 if not message:
125 |                     logger.debug(f"跳过None消息: 会话ID={session_id_to_save}, 发送者={sender_name}")
126 |                     return False
127 | 
128 |             # 确保message是字符串并清理内容
129 |             try:
130 |                 message = str(message)
131 |             except:
132 |                 logger.debug(f"消息内容无法转换为字符串: {type(message)}")
133 |                 return False
134 | 
135 |             cleaned_message = await self._clean_message(message, sender_name)
136 |             if not cleaned_message:
137 |                 logger.debug(f"跳过空消息: 会话ID={session_id_to_save}, 发送者={sender_name}")
138 |                 return True
139 | 
140 |             # 创建新的消息记录
141 |             new_message = MessageHistory(
142 |                 session_id=session_id_to_save,
143 |                 sender_id=sender_id,
144 |                 sender_name=sender_name,
145 |                 message=cleaned_message,
146 |                 timestamp=timestamp,
147 |                 is_group=is_group
148 |             )
149 | 
150 |             # 保存到数据库
151 |             async with self.db.get_db() as session:
152 |                 session.add(new_message)
153 |                 await session.commit()
154 | 
155 |             logger.debug(f"消息保存成功 - 会话ID: {session_id_to_save}, 时间戳: {timestamp}")
156 |             return True
157 | 
158 |         except Exception as e:
159 |             logger.error(f"保存消息到历史记录失败: {e}")
160 |             return False
161 | 
162 |     async def get_history_messages(
163 |         self, session_id: str, days: int = 7, limit: int = 1000
164 |     ) -> List[Dict[str, Any]]:
165 |         """
166 |         获取指定会话的历史消息
167 | 
168 |         Args:
169 |             session_id: 会话ID
170 |             days: 获取最近几天的消息
171 |             limit: 最大消息数量
172 | 
173 |         Returns:
174 |             历史消息列表
175 |         """
176 |         try:
177 |             # 计算起始时间戳
178 |             current_time = get_current_timestamp()
179 |             start_time = current_time - (days * 24 * 60 * 60)
180 | 
181 |             # 创建查询
182 |             query = (
183 |                 select(MessageHistory)
184 |                 .where(MessageHistory.session_id == session_id)
185 |                 .where(MessageHistory.timestamp >= start_time)
186 |                 .order_by(MessageHistory.timestamp.desc())
187 |                 .limit(limit)
188 |             )
189 | 
190 |             async with self.db.get_db() as session:
191 |                 result = await session.execute(query)
192 |                 messages = result.scalars().all()
193 | 
194 |                 # 转换为字典列表
195 |                 message_list = [
196 |                     {
197 |                         "session_id": msg.session_id,
198 |                         "sender_id": msg.sender_id,
199 |                         "sender_name": msg.sender_name,
200 |                         "message": msg.message,
201 |                         "timestamp": msg.timestamp,
202 |                         "is_group": msg.is_group,
203 |                     }
204 |                     for msg in messages
205 |                 ]
206 | 
207 |             logger.debug(
208 |                 f"获取到{len(message_list)}条历史消息(会话ID: {session_id}, 天数: {days})"
209 |             )
210 |             return message_list
211 | 
212 |         except Exception as e:
213 |             logger.error(f"获取历史消息失败: {e}")
214 |             return []
215 | 
216 |     async def get_active_sessions(self, days: int = 7) -> List[str]:
217 |         """
218 |         获取有活动的会话ID列表
219 | 
220 |         Args:
221 |             days: 最近几天有活动的会话
222 | 
223 |         Returns:
224 |             会话ID列表
225 |         """
226 |         try:
227 |             # 计算起始时间戳
228 |             current_time = get_current_timestamp()
229 |             start_time = current_time - (days * 24 * 60 * 60)
230 | 
231 |             # 创建查询
232 |             query = (
233 |                 select(func.distinct(MessageHistory.session_id))
234 |                 .where(MessageHistory.timestamp >= start_time)
235 |             )
236 | 
237 |             async with self.db.get_db() as session:
238 |                 result = await session.execute(query)
239 |                 sessions = result.scalars().all()
240 | 
241 |             logger.info(f"获取到{len(sessions)}个活跃会话(天数: {days})")
242 |             return sessions
243 | 
244 |         except Exception as e:
245 |             logger.error(f"获取活跃会话失败: {e}")
246 |             return []
247 | 
248 |     async def get_message_texts(
249 |         self, session_id: str, days: int = 7, limit: int = 1000
250 |     ) -> List[str]:
251 |         """
252 |         获取指定会话的消息文本列表
253 | 
254 |         Args:
255 |             session_id: 会话ID
256 |             days: 获取最近几天的消息
257 |             limit: 最大消息数量
258 | 
259 |         Returns:
260 |             消息文本列表，按时间顺序返回（旧的在前，新的在后）
261 |         """
262 |         try:
263 |             # 计算起始时间戳
264 |             current_time = get_current_timestamp()
265 |             start_time = current_time - (days * 24 * 60 * 60)
266 | 
267 |             # 创建查询 - 使用正序，使旧的消息在前
268 |             query = (
269 |                 select(MessageHistory.message)
270 |                 .where(MessageHistory.session_id == session_id)
271 |                 .where(MessageHistory.timestamp >= start_time)
272 |                 .order_by(MessageHistory.timestamp.asc())
273 |                 .limit(limit)
274 |             )
275 | 
276 |             async with self.db.get_db() as session:
277 |                 result = await session.execute(query)
278 |                 messages = result.scalars().all()
279 | 
280 |             # 过滤掉空消息
281 |             messages = [msg for msg in messages if msg and msg.strip()]
282 | 
283 |             logger.debug(
284 |                 f"获取到{len(messages)}条历史消息(会话ID: {session_id}, 天数: {days})"
285 |             )
286 |             total_chars = sum(len(msg) for msg in messages)
287 |             logger.debug(f"消息文本总长度: {total_chars} 字符")
288 | 
289 |             return messages
290 | 
291 |         except Exception as e:
292 |             logger.error(f"获取消息文本失败: {e}")
293 |             return []
294 | 
295 |     async def get_todays_message_texts(self, session_id: str, limit: int = 1000) -> List[str]:
296 |         """
297 |         获取今天的消息文本列表
298 | 
299 |         Args:
300 |             session_id: 会话ID
301 |             limit: 最大消息数量限制
302 | 
303 |         Returns:
304 |             今天的消息文本列表
305 |         """
306 |         try:
307 |             # 获取今天的开始和结束时间戳
308 |             start_timestamp, end_timestamp = get_day_start_end_timestamps()
309 |             logger.info(
310 |                 f"获取今日消息 - 会话ID: {session_id}, 时间范围: {start_timestamp} 到 {end_timestamp}"
311 |             )
312 | 
313 |             # 创建查询
314 |             query = (
315 |                 select(MessageHistory.message)
316 |                 .where(MessageHistory.session_id == session_id)
317 |                 .where(MessageHistory.timestamp >= start_timestamp)
318 |                 .where(MessageHistory.timestamp <= end_timestamp)
319 |                 .order_by(MessageHistory.timestamp.asc())
320 |                 .limit(limit)
321 |             )
322 | 
323 |             async with self.db.get_db() as session:
324 |                 result = await session.execute(query)
325 |                 messages = result.scalars().all()
326 | 
327 |             # 过滤掉空消息
328 |             messages = [msg for msg in messages if msg and isinstance(msg, str) and msg.strip()]
329 | 
330 |             logger.info(
331 |                 f"今日消息获取成功 - 会话ID: {session_id}, 消息数量: {len(messages)}"
332 |             )
333 |             return messages
334 | 
335 |         except Exception as e:
336 |             logger.error(f"获取今日消息文本失败: {e}")
337 |             return []
338 | 
339 |     async def get_active_group_sessions(self, days: int = 1) -> List[str]:
340 |         """
341 |         获取有活动的群聊会话ID列表
342 | 
343 |         Args:
344 |             days: 最近几天有活动的群聊
345 | 
346 |         Returns:
347 |             群聊会话ID列表
348 |         """
349 |         try:
350 |             # 计算起始时间戳
351 |             current_time = get_current_timestamp()
352 |             start_time = current_time - (days * 24 * 60 * 60)
353 | 
354 |             # 构建查询，只获取群聊会话
355 |             query = (
356 |                 select(func.distinct(MessageHistory.session_id))
357 |                 .where(
358 |                     MessageHistory.timestamp >= start_time,
359 |                     MessageHistory.is_group == True
360 |                 )
361 |             )
362 | 
363 |             try:
364 |                 async with self.db.get_db() as session:
365 |                     result = await session.execute(query)
366 |                     sessions = result.scalars().all()
367 | 
368 |                 logger.info(f"获取到{len(sessions)}个活跃群聊会话(天数: {days})")
369 |                 return sessions
370 |             except Exception as db_error:
371 |                 logger.error(f"获取活跃群聊会话数据库操作失败: {db_error}")
372 |                 return []
373 | 
374 |         except Exception as e:
375 |             logger.error(f"获取活跃群聊会话失败: {e}")
376 |             return []
377 | 
378 |     async def get_message_count_today(self, session_id: str) -> int:
379 |         """
380 |         获取今天的消息数量
381 | 
382 |         Args:
383 |             session_id: 会话ID
384 | 
385 |         Returns:
386 |             消息数量
387 |         """
388 |         try:
389 |             # 获取今天的开始和结束时间戳
390 |             start_timestamp, end_timestamp = get_day_start_end_timestamps()
391 | 
392 |             # 构建查询
393 |             query = (
394 |                 select(func.count().label('count'))
395 |                 .select_from(MessageHistory)
396 |                 .where(
397 |                     MessageHistory.session_id == session_id,
398 |                     MessageHistory.timestamp >= start_timestamp,
399 |                     MessageHistory.timestamp <= end_timestamp
400 |                 )
401 |             )
402 | 
403 |             # 执行查询
404 |             async with self.db.get_db() as session:
405 |                 result = await session.execute(query)
406 |                 count = result.scalar()
407 | 
408 |             return count or 0
409 |         except Exception as e:
410 |             logger.error(f"获取今天的消息数量失败: {e}")
411 |             return 0
412 | 
413 |     async def get_message_count_for_days(self, session_id: str, days: int) -> int:
414 |         """
415 |         获取指定会话在过去N天内的总消息数量。
416 | 
417 |         Args:
418 |             session_id: 会话ID
419 |             days: 获取最近几天的消息
420 | 
421 |         Returns:
422 |             指定天数内的消息总数量
423 |         """
424 |         try:
425 |             # 计算起始时间戳
426 |             current_time = get_current_timestamp()
427 |             start_time = current_time - (days * 24 * 60 * 60)
428 | 
429 |             # 创建查询
430 |             query = (
431 |                 select(func.count())
432 |                 .where(MessageHistory.session_id == session_id)
433 |                 .where(MessageHistory.timestamp >= start_time)
434 |             )
435 | 
436 |             async with self.db.get_db() as session:
437 |                 result = await session.execute(query)
438 |                 count = result.scalar()
439 |                 if count:
440 |                     logger.debug(
441 |                         f"获取到 {days} 天内消息总数: {count} (会话ID: {session_id})"
442 |                     )
443 |                     return count
444 |                 return 0
445 | 
446 |         except Exception as e:
447 |             logger.error(f"获取 {days} 天内消息总数失败: {e}, session_id={session_id}")
448 |             return 0
449 | 
450 |     async def get_active_users(
451 |         self, session_id: str, days: int = 1, limit: int = 10
452 |     ) -> List[Tuple[str, str, int]]:
453 |         """
454 |         获取指定会话中最活跃的用户（按发言数量排序）
455 | 
456 |         Args:
457 |             session_id: 会话ID
458 |             days: 统计最近几天的数据，默认为1天（今天）
459 |             limit: 返回的用户数量限制
460 | 
461 |         Returns:
462 |             用户活跃度排名列表，格式为 [(user_id, user_name, message_count), ...]
463 |         """
464 |         try:
465 |             # 计算时间范围
466 |             if days == 1:
467 |                 # 使用当天时间范围
468 |                 start_timestamp, end_timestamp = get_day_start_end_timestamps()
469 |             else:
470 |                 # 计算过去days天的时间范围
471 |                 current_time = get_current_timestamp()
472 |                 start_timestamp = current_time - (days * 24 * 60 * 60)
473 |                 end_timestamp = current_time
474 | 
475 |             # 创建查询
476 |             query = (
477 |                 select(
478 |                     MessageHistory.sender_id,
479 |                     MessageHistory.sender_name,
480 |                     func.count().label('message_count')
481 |                 )
482 |                 .where(MessageHistory.session_id == session_id)
483 |                 .where(MessageHistory.timestamp >= start_timestamp)
484 |                 .where(MessageHistory.timestamp <= end_timestamp)
485 |                 .group_by(MessageHistory.sender_id, MessageHistory.sender_name)
486 |                 .order_by(func.count().desc())
487 |                 .limit(limit)
488 |             )
489 | 
490 |             async with self.db.get_db() as session:
491 |                 result = await session.execute(query)
492 |                 rows = result.all()
493 | 
494 |                 # 转换为所需格式
495 |                 user_list = [
496 |                     (
497 |                         row.sender_id,
498 |                         row.sender_name or row.sender_id,  # 如果没有名称，使用ID
499 |                         row.message_count
500 |                     )
501 |                     for row in rows
502 |                 ]
503 | 
504 |                 return user_list
505 |         except Exception as e:
506 |             logger.error(f"获取活跃用户失败: {e}, session_id={session_id}, days={days}")
507 |             return []
508 | 
509 |     async def get_total_users_today(self, session_id: str) -> int:
510 |         """
511 |         获取今天在指定会话中发言的总用户数
512 | 
513 |         Args:
514 |             session_id: 会话ID
515 | 
516 |         Returns:
517 |             用户数量
518 |         """
519 |         try:
520 |             # 获取今天的开始和结束时间戳
521 |             start_timestamp, end_timestamp = get_day_start_end_timestamps()
522 | 
523 |             # 创建查询
524 |             query = (
525 |                 select(func.count(func.distinct(MessageHistory.sender_id)))
526 |                 .where(MessageHistory.session_id == session_id)
527 |                 .where(MessageHistory.timestamp >= start_timestamp)
528 |                 .where(MessageHistory.timestamp <= end_timestamp)
529 |             )
530 | 
531 |             async with self.db.get_db() as session:
532 |                 result = await session.execute(query)
533 |                 count = result.scalar()
534 |                 return count or 0
535 | 
536 |         except Exception as e:
537 |             logger.error(f"获取今天的用户数量失败: {e}")
538 |             return 0
539 | 
540 |     async def get_total_users_for_date_range(
541 |         self, session_id: str, start_timestamp: int, end_timestamp: int
542 |     ) -> int:
543 |         """
544 |         获取指定会话在指定时间戳范围内的总独立用户数。
545 | 
546 |         Args:
547 |             session_id: 会话ID
548 |             start_timestamp: 开始时间戳
549 |             end_timestamp: 结束时间戳
550 | 
551 |         Returns:
552 |             独立用户总数
553 |         """
554 |         try:
555 |             # 创建查询
556 |             query = (
557 |                 select(func.count(func.distinct(MessageHistory.sender_id)))
558 |                 .where(MessageHistory.session_id == session_id)
559 |                 .where(MessageHistory.timestamp >= start_timestamp)
560 |                 .where(MessageHistory.timestamp <= end_timestamp)
561 |             )
562 | 
563 |             async with self.db.get_db() as session:
564 |                 result = await session.execute(query)
565 |                 count = result.scalar()
566 |                 if count:
567 |                     logger.debug(
568 |                         f"会话 {session_id} 在 {start_timestamp}-{end_timestamp} 范围内总用户数: {count}"
569 |                     )
570 |                     return count
571 |                 return 0
572 | 
573 |         except Exception as e:
574 |             logger.error(f"获取指定日期范围总用户数失败 (会话 {session_id}): {e}")
575 |             return 0
576 | 
577 |     async def get_active_users_for_date_range(
578 |         self, session_id: str, start_timestamp: int, end_timestamp: int, limit: int = 10
579 |     ) -> List[Tuple[str, str, int]]:
580 |         """
581 |         获取指定会话在指定时间戳范围内的活跃用户列表（按消息数量排序）。
582 | 
583 |         Args:
584 |             session_id: 会话ID
585 |             start_timestamp: 开始时间戳
586 |             end_timestamp: 结束时间戳
587 |             limit: 返回的用户数量上限
588 | 
589 |         Returns:
590 |             活跃用户列表，每个元素为 (sender_id, sender_name, message_count)
591 |         """
592 |         try:
593 |             # 创建查询
594 |             query = (
595 |                 select(
596 |                     MessageHistory.sender_id,
597 |                     MessageHistory.sender_name,
598 |                     func.count().label('message_count')
599 |                 )
600 |                 .where(MessageHistory.session_id == session_id)
601 |                 .where(MessageHistory.timestamp >= start_timestamp)
602 |                 .where(MessageHistory.timestamp <= end_timestamp)
603 |                 .group_by(MessageHistory.sender_id, MessageHistory.sender_name)
604 |                 .order_by(func.count().desc())
605 |                 .limit(limit)
606 |             )
607 | 
608 |             async with self.db.get_db() as session:
609 |                 result = await session.execute(query)
610 |                 rows = result.all()
611 | 
612 |                 # 转换为所需格式
613 |                 active_users = [
614 |                     (
615 |                         row.sender_id,
616 |                         row.sender_name or row.sender_id,  # 如果没有名称，使用ID
617 |                         row.message_count
618 |                     )
619 |                     for row in rows
620 |                 ]
621 | 
622 |                 logger.debug(
623 |                     f"会话 {session_id} 在 {start_timestamp}-{end_timestamp} 范围内获取到 {len(active_users)} 个活跃用户 (上限 {limit})"
624 |                 )
625 |                 return active_users
626 | 
627 |         except Exception as e:
628 |             logger.error(f"获取指定日期范围活跃用户失败 (会话 {session_id}): {e}")
629 |             return []
630 | 
631 |     async def extract_group_id_from_session(self, session_id: str) -> Optional[str]:
632 |         """
633 |         从会话ID提取群号
634 | 
635 |         Args:
636 |             session_id: 会话ID
637 | 
638 |         Returns:
639 |             群号，如果不是群聊则返回None
640 |         """
641 |         try:
642 |             # 会话ID格式通常为 "platform:GroupMessage:group_id"
643 |             parts = session_id.split(":")
644 |             if len(parts) >= 3 and "GroupMessage" in parts[1]:
645 |                 return parts[2]
646 |             return None
647 |         except Exception as e:
648 |             logger.error(f"从会话ID提取群号失败: {e}")
649 |             return None
650 | 
651 |     async def get_messages_by_timestamp_range(
652 |         self,
653 |         session_id: str,
654 |         start_timestamp: int,
655 |         end_timestamp: int,
656 |         limit: int = 1000,
657 |     ) -> List[str]:
658 |         """
659 |         获取指定时间戳范围内的消息文本列表
660 | 
661 |         Args:
662 |             session_id: 会话ID
663 |             start_timestamp: 开始时间戳
664 |             end_timestamp: 结束时间戳
665 |             limit: 最大消息数量限制
666 | 
667 |         Returns:
668 |             指定时间范围内的消息文本列表
669 |         """
670 |         try:
671 |             logger.info(
672 |                 f"获取指定时间范围消息 - 会话ID: {session_id}, 时间范围: {start_timestamp} 到 {end_timestamp}"
673 |             )
674 | 
675 |             # 创建查询
676 |             query = (
677 |                 select(MessageHistory.message)
678 |                 .where(MessageHistory.session_id == session_id)
679 |                 .where(MessageHistory.timestamp >= start_timestamp)
680 |                 .where(MessageHistory.timestamp <= end_timestamp)
681 |                 .order_by(MessageHistory.timestamp.asc())
682 |                 .limit(limit)
683 |             )
684 | 
685 |             async with self.db.get_db() as session:
686 |                 result = await session.execute(query)
687 |                 messages = result.scalars().all()
688 | 
689 |             # 过滤掉空消息
690 |             messages = [msg for msg in messages if msg and isinstance(msg, str) and msg.strip()]
691 | 
692 |             logger.info(
693 |                 f"指定时间范围消息获取成功 - 会话ID: {session_id}, 消息数量: {len(messages)}"
694 |             )
695 |             return messages
696 | 
697 |         except Exception as e:
698 |             logger.error(f"获取指定时间范围消息文本失败: {e}")
699 |             return []
700 | 
701 |     async def _clean_message(self, message: str, sender_name: Optional[str] = None) -> str:
702 |         """
703 |         清理消息内容，移除不需要计入词云的元素
704 | 
705 |         Args:
706 |             message: 原始消息
707 |             sender_name: 发送者昵称，用于移除群聊中的@某人
708 | 
709 |         Returns:
710 |             清理后的消息
711 |         """
712 |         # 移除指令和相关关键词
713 |         message_lower = message.strip().lower()
714 |         if (message_lower.startswith(('#', '/')) or 
715 |             message_lower.startswith('wc') or 
716 |             message_lower.startswith('词云') or
717 |             '生成词云' in message_lower or
718 |             '/wordcloud' in message_lower):
719 |             return ""
720 |         
721 |         # 移除@某人的内容，包括可能的空格和换行
722 |         # 匹配 @昵称(QQ号) 或 @昵称
723 |         message = re.sub(r"@\s*\S+\s*\(\d+\)|@\s*\S+", "", message)
724 | 
725 |         # 移除URL
726 |         message = re.sub(r"https?://[\w./?=&-]+", "", message)
727 | 
728 |         # 移除其他可能不需要的内容，例如CQ码
729 |         message = re.sub(r"\[CQ:[^\]]+\]", "", message)
730 | 
731 |         # 移除各种标点符号和特殊字符，只保留文本和基本空格
732 |         message = re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]+", " ", message).strip()
733 | 
734 |         return message
735 | 
736 |     async def close(self):
737 |         """
738 |         关闭历史管理器，释放资源
739 |         """
740 |         logger.info("关闭历史管理器...")
741 |         try:
742 |             # 清理数据和缓存
743 |             if hasattr(self, "word_data"):
744 |                 self.word_data = {}
745 |             if hasattr(self, "cached_word_counts"):
746 |                 self.cached_word_counts = {}
747 |             logger.info("历史数据缓存已清理")
748 |             logger.info("历史管理器已成功关闭")
749 |         except Exception as e:
750 |             logger.error(f"关闭历史管理器时出错: {e}")
751 |             logger.error(traceback.format_exc())
752 | 


--------------------------------------------------------------------------------
/wordcloud_core/scheduler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 词云插件的定时任务调度器
  3 | """
  4 | 
  5 | import asyncio
  6 | import threading
  7 | import time
  8 | import os
  9 | import datetime
 10 | from typing import Dict, Any, Optional
 11 | import traceback
 12 | import pytz
 13 | 
 14 | from croniter import croniter
 15 | import astrbot.api.message_components as Comp
 16 | from astrbot.api.event import MessageChain
 17 | from astrbot.api import logger
 18 | 
 19 | 
 20 | # 使用全局变量跟踪调度器实例
 21 | _SCHEDULER_INSTANCES = {}
 22 | _SCHEDULER_LOCK = threading.Lock()
 23 | 
 24 | 
 25 | class TaskScheduler:
 26 |     """
 27 |     定时任务调度器类，用于管理定时任务
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         context,
 33 |         main_loop: asyncio.AbstractEventLoop,
 34 |         debug_mode: bool = False,
 35 |         timezone: pytz.BaseTzInfo = pytz.utc,
 36 |     ):
 37 |         """
 38 |         初始化定时任务调度器
 39 | 
 40 |         Args:
 41 |             context: AstrBot上下文
 42 |             main_loop: 主事件循环的引用
 43 |             debug_mode: 是否启用调试模式
 44 |             timezone: 时区对象
 45 |         """
 46 |         # 检查是否有同一个上下文的调度器实例
 47 |         global _SCHEDULER_INSTANCES
 48 | 
 49 |         with _SCHEDULER_LOCK:
 50 |             # 使用上下文的ID作为标识符
 51 |             context_id = id(context)
 52 | 
 53 |             if context_id in _SCHEDULER_INSTANCES:
 54 |                 existing_scheduler = _SCHEDULER_INSTANCES[context_id]
 55 |                 if existing_scheduler.running:
 56 |                     logger.warning(
 57 |                         f"已存在运行中的调度器实例(ID: {context_id})，正在复用该实例。"
 58 |                     )
 59 |                     # 复制现有实例的属性
 60 |                     self.context = existing_scheduler.context
 61 |                     self.tasks = existing_scheduler.tasks
 62 |                     self.running = existing_scheduler.running
 63 |                     self.thread = existing_scheduler.thread
 64 |                     self.main_loop = existing_scheduler.main_loop
 65 |                     self.debug_mode = existing_scheduler.debug_mode
 66 |                     self.timezone = getattr(existing_scheduler, "timezone", pytz.utc)
 67 |                     self._event_loop = getattr(existing_scheduler, "_event_loop", None)
 68 |                     self._poller_task = getattr(
 69 |                         existing_scheduler, "_poller_task", None
 70 |                     )
 71 |                     return
 72 |                 else:
 73 |                     # 如果实例存在但没有运行，我们应该清理它
 74 |                     logger.info(f"发现未运行的调度器实例(ID: {context_id})，将替换它。")
 75 | 
 76 |             # 如果没有找到实例或实例没有运行，创建一个新实例
 77 |             self.context = context
 78 |             self.tasks: Dict[str, Dict[str, Any]] = {}
 79 |             self.running = False
 80 |             self.thread = None
 81 |             self.main_loop = main_loop
 82 |             self.debug_mode = debug_mode
 83 |             self.timezone = timezone
 84 |             self._event_loop: Optional[asyncio.AbstractEventLoop] = None
 85 |             self._poller_task: Optional[asyncio.Task] = None
 86 | 
 87 |             # 将新实例添加到全局字典
 88 |             _SCHEDULER_INSTANCES[context_id] = self
 89 | 
 90 |             logger.info(
 91 |                 f"TaskScheduler initialized with main loop ID: {id(self.main_loop)}, Debug Mode: {self.debug_mode}, Timezone: {self.timezone}"
 92 |             )
 93 | 
 94 |     def add_task(self, cron_expression: str, callback, task_id: str) -> bool:
 95 |         """
 96 |         添加定时任务
 97 | 
 98 |         Args:
 99 |             cron_expression: cron表达式，如 "30 20 * * *"（分 时 日 月 周）
100 |             callback: 回调函数，必须是可等待的
101 |             task_id: 任务ID，用于标识任务
102 | 
103 |         Returns:
104 |             是否成功添加任务
105 |         """
106 |         try:
107 |             # 检查任务是否已存在
108 |             if task_id in self.tasks:
109 |                 logger.warning(f"任务ID {task_id} 已存在，将被覆盖")
110 | 
111 |             # 验证cron表达式
112 |             if not croniter.is_valid(cron_expression):
113 |                 logger.error(f"无效的cron表达式: {cron_expression}")
114 |                 return False
115 | 
116 |             # 获取当前时间，使用配置的时区
117 |             current_time_dt = datetime.datetime.now(self.timezone)
118 |             logger.info(
119 |                 f"当前配置时区 ({self.timezone}) 时间: {current_time_dt.strftime('%Y-%m-%d %H:%M:%S %Z%z')}"
120 |             )
121 | 
122 |             try:
123 |                 # 创建croniter对象时，如果datetime对象有时区信息，croniter会使用它
124 |                 cron = croniter(cron_expression, current_time_dt)
125 | 
126 |                 # 获取下一次执行时间 (datetime对象，带有时区)
127 |                 next_run_datetime = cron.get_next(datetime.datetime)
128 |                 next_run_timestamp = next_run_datetime.timestamp()  # 转为时间戳 (UTC)
129 | 
130 |                 # 输出详细的时间信息以便调试
131 |                 next_run_str_local = next_run_datetime.astimezone(
132 |                     self.timezone
133 |                 ).strftime("%Y-%m-%d %H:%M:%S %Z%z")
134 |                 logger.info(
135 |                     f"任务 {task_id} 下次执行时间: {next_run_str_local} (时区: {self.timezone})"
136 |                 )
137 | 
138 |                 # 添加任务
139 |                 self.tasks[task_id] = {
140 |                     "cron_expression": cron_expression,
141 |                     "callback": callback,
142 |                     "next_run": next_run_timestamp,  # Store as UTC timestamp
143 |                     "cron_ref_dt": current_time_dt,  # Store reference datetime used for croniter
144 |                     "running": False,
145 |                 }
146 | 
147 |                 logger.info(
148 |                     f"成功添加定时任务: {task_id}, 下次执行时间: {next_run_str_local}"
149 |                 )
150 |                 return True
151 | 
152 |             except Exception as e:
153 |                 logger.error(f"创建cron对象或计算下次运行时间失败: {e}")
154 |                 logger.error(f"错误详情: {traceback.format_exc()}")
155 |                 return False
156 | 
157 |         except Exception as e:
158 |             logger.error(f"添加定时任务失败: {e}")
159 |             return False
160 | 
161 |     def remove_task(self, task_id: str) -> bool:
162 |         """
163 |         移除定时任务
164 | 
165 |         Args:
166 |             task_id: 任务ID
167 | 
168 |         Returns:
169 |             是否成功移除任务
170 |         """
171 |         if task_id in self.tasks:
172 |             del self.tasks[task_id]
173 |             logger.info(f"成功移除定时任务: {task_id}")
174 |             return True
175 |         else:
176 |             logger.warning(f"任务ID不存在: {task_id}")
177 |         return False
178 | 
179 |     def start(self) -> None:
180 |         """启动调度器"""
181 |         if self.running:
182 |             logger.warning("调度器已经在运行")
183 |             return
184 | 
185 |         self.running = True
186 | 
187 |         # 确保没有旧的线程在运行
188 |         if self.thread and self.thread.is_alive():
189 |             logger.warning("调度器已有线程正在运行，尝试停止它")
190 |             # 尝试优雅地停止旧线程
191 |             try:
192 |                 old_running_state = self.running
193 |                 self.running = False
194 |                 self.thread.join(timeout=2.0)
195 |                 self.running = old_running_state
196 |             except Exception as e:
197 |                 logger.error(f"停止旧线程时出错: {e}")
198 | 
199 |         # 创建新线程
200 |         self.thread = threading.Thread(
201 |             target=self._run_scheduler, name=f"TaskScheduler-{id(self)}"
202 |         )
203 |         self.thread.daemon = True
204 |         self.thread.start()
205 |         logger.info("调度器已启动")
206 | 
207 |     def stop(self) -> None:
208 |         """停止调度器"""
209 |         if not self.running:
210 |             logger.warning("调度器未运行")
211 |             return
212 | 
213 |         logger.info("正在停止调度器...")
214 |         self.running = False  # Signal the async_poller to stop
215 | 
216 |         # Stop the asyncio event loop in the scheduler's thread
217 |         if self._event_loop and self._event_loop.is_running():
218 |             logger.info(
219 |                 "SCHED: Calling loop.stop() via call_soon_threadsafe to stop run_forever."
220 |             )
221 |             self._event_loop.call_soon_threadsafe(self._event_loop.stop)
222 | 
223 |         if self.thread and self.thread.is_alive():
224 |             try:
225 |                 # Wait for the scheduler thread to finish
226 |                 logger.info("SCHED: Waiting for scheduler thread to join...")
227 |                 self.thread.join(timeout=10.0)  # Increased timeout
228 |                 if self.thread.is_alive():
229 |                     logger.warning("SCHED: Scheduler thread did not join in time.")
230 |                 else:
231 |                     logger.info("SCHED: Scheduler thread joined successfully.")
232 |             except Exception as e:
233 |                 logger.error(f"SCHED: Error stopping scheduler thread: {e}")
234 | 
235 |         # Event loop cleanup is now primarily handled in _run_scheduler's finally block
236 |         # self._event_loop = None # Nullify after thread has joined and loop is closed by _run_scheduler
237 | 
238 |         logger.info("调度器已停止")
239 | 
240 |         # 从实例字典中移除自己
241 |         with _SCHEDULER_LOCK:
242 |             for context_id, scheduler in list(_SCHEDULER_INSTANCES.items()):
243 |                 if scheduler is self:
244 |                     del _SCHEDULER_INSTANCES[context_id]
245 |                     break
246 | 
247 |     async def _async_poller(self, loop: asyncio.AbstractEventLoop):
248 |         """Asynchronous task poller running inside the scheduler's event loop."""
249 |         logger.info("SCHED ASYNC_POLLER: Async poller task started.")
250 |         last_heartbeat = time.time()
251 |         heartbeat_interval = 600  # Original: 600 seconds (10 minutes)
252 |         task_check_interval = 1.0  # Check tasks every second
253 | 
254 |         try:
255 |             while self.running:
256 |                 current_time = time.time()  # This is a UTC timestamp
257 | 
258 |                 if (
259 |                     self.debug_mode
260 |                     and current_time - last_heartbeat > heartbeat_interval
261 |                 ):
262 |                     logger.debug(
263 |                         f"SCHED ASYNC_POLLER: Heartbeat. Current UTC time: {datetime.datetime.utcfromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S UTC')}"
264 |                     )
265 |                     last_heartbeat = current_time
266 | 
267 |                 for task_id, task_info in list(
268 |                     self.tasks.items()
269 |                 ):  # Use list() for safe iteration if modifying
270 |                     if task_info.get("running", False):
271 |                         continue
272 | 
273 |                     if current_time >= task_info["next_run"]:
274 |                         if self.debug_mode:
275 |                             logger.debug(
276 |                                 f"SCHED ASYNC_POLLER: Executing task {task_id}"
277 |                             )
278 | 
279 |                         # Schedule the task execution in the main event loop
280 |                         asyncio.run_coroutine_threadsafe(
281 |                             self._execute_task(task_id, task_info), self.main_loop
282 |                         )
283 | 
284 |                         # Update next run time for this task
285 |                         try:
286 |                             # Re-initialize croniter with the reference datetime object that includes timezone
287 |                             # This ensures that DST transitions are handled correctly by croniter.
288 |                             # If task_info["cron_ref_dt"] is naive, convert it to aware using self.timezone
289 |                             ref_dt = task_info["cron_ref_dt"]
290 |                             if (
291 |                                 ref_dt.tzinfo is None
292 |                             ):  # Should not happen if add_task is correct
293 |                                 ref_dt = self.timezone.localize(ref_dt)
294 | 
295 |                             # It's better to advance from the *scheduled* `next_run_datetime` rather than `now`
296 |                             # to avoid drift if the poller is slightly delayed.
297 |                             # Convert the stored `next_run` (UTC timestamp) back to a datetime object with our timezone.
298 |                             last_scheduled_run_dt = datetime.datetime.fromtimestamp(
299 |                                 task_info["next_run"], self.timezone
300 |                             )
301 | 
302 |                             # Ensure croniter uses the correct timezone context by providing an aware datetime object
303 |                             cron = croniter(
304 |                                 task_info["cron_expression"], last_scheduled_run_dt
305 |                             )
306 |                             next_run_datetime_aware = cron.get_next(datetime.datetime)
307 |                             task_info["next_run"] = (
308 |                                 next_run_datetime_aware.timestamp()
309 |                             )  # Store as UTC timestamp
310 |                             task_info["cron_ref_dt"] = (
311 |                                 next_run_datetime_aware  # Update reference dt
312 |                             )
313 | 
314 |                             if self.debug_mode:
315 |                                 next_run_str_local = next_run_datetime_aware.astimezone(
316 |                                     self.timezone
317 |                                 ).strftime("%Y-%m-%d %H:%M:%S %Z%z")
318 |                                 logger.debug(
319 |                                     f"SCHED ASYNC_POLLER: Task {task_id} rescheduled. Next run: {next_run_str_local}"
320 |                                 )
321 |                         except Exception as e:
322 |                             logger.error(
323 |                                 f"SCHED ASYNC_POLLER: Error rescheduling task {task_id}: {e} - Task will be removed."
324 |                             )
325 |                             logger.error(f"Details: {traceback.format_exc()}")
326 |                             self.tasks.pop(task_id, None)  # Remove problematic task
327 | 
328 |                 await asyncio.sleep(task_check_interval)
329 |         except asyncio.CancelledError:
330 |             logger.info("SCHED ASYNC_POLLER: Async poller task cancelled.")
331 |         except Exception as e:
332 |             logger.error(f"SCHED ASYNC_POLLER: Error in async poller: {e}")
333 |             logger.error(f"Details: {traceback.format_exc()}")
334 |         finally:
335 |             logger.info("SCHED ASYNC_POLLER: Async poller task stopped.")
336 | 
337 |     def _run_scheduler(self) -> None:
338 |         """Runs the scheduler in a dedicated thread with its own asyncio event loop."""
339 |         logger.info("调度器线程已启动")
340 |         loop: Optional[asyncio.AbstractEventLoop] = None
341 | 
342 |         try:
343 |             loop = asyncio.new_event_loop()
344 |             asyncio.set_event_loop(loop)
345 |             self._event_loop = loop
346 |             logger.info("为调度器线程创建了新的事件循环")
347 | 
348 |             self._poller_task = loop.create_task(self._async_poller(loop))
349 | 
350 |             logger.info("SCHED: Starting event loop with run_forever().")
351 |             loop.run_forever()  # This blocks until loop.stop() is called
352 |             logger.info("SCHED: Event loop run_forever() has exited.")
353 | 
354 |         except asyncio.CancelledError:
355 |             logger.info(
356 |                 "SCHED: _run_scheduler's run_forever() was cancelled (likely during stop)."
357 |             )
358 |         except Exception as e_outer:
359 |             logger.error(f"SCHED: _run_scheduler outer error: {e_outer}")
360 |             logger.error(
361 |                 f"SCHED: _run_scheduler outer traceback: {traceback.format_exc()}"
362 |             )
363 |         finally:
364 |             logger.info("SCHED: _run_scheduler finally block entered.")
365 | 
366 |             if self._poller_task and not self._poller_task.done():
367 |                 logger.info("SCHED: Cancelling poller task in finally.")
368 |                 self._poller_task.cancel()
369 |                 if (
370 |                     loop and not loop.is_closed() and not loop.is_running()
371 |                 ):  # if run_forever exited
372 |                     # Need to run the loop briefly to process the cancellation
373 |                     try:
374 |                         logger.info(
375 |                             "SCHED: Running loop briefly to process poller cancellation."
376 |                         )
377 |                         loop.run_until_complete(self._poller_task)
378 |                     except asyncio.CancelledError:
379 |                         logger.info(
380 |                             "SCHED: Poller task successfully cancelled in finally."
381 |                         )
382 |                     except Exception as e_poll_cancel_wait:
383 |                         logger.error(
384 |                             f"SCHED: Exception waiting for poller task cancellation in finally: {e_poll_cancel_wait}"
385 |                         )
386 | 
387 |             if loop and not loop.is_closed():
388 |                 logger.info(
389 |                     "SCHED: Shutting down remaining tasks in event loop (finally)."
390 |                 )
391 | 
392 |                 # Ensure loop is stopped if it was running (e.g. if run_forever exited due to error)
393 |                 if loop.is_running():
394 |                     logger.info(
395 |                         "SCHED: Loop was still running in finally, stopping it."
396 |                     )
397 |                     loop.stop()
398 | 
399 |                 # Gather all remaining tasks
400 |                 pending_tasks = [
401 |                     t
402 |                     for t in asyncio.all_tasks(loop)
403 |                     if t is not self._poller_task and not t.done()
404 |                 ]
405 |                 if pending_tasks:
406 |                     logger.info(
407 |                         f"SCHED: {len(pending_tasks)} other pending tasks to cancel/gather."
408 |                     )
409 |                     for t in pending_tasks:
410 |                         t.cancel()
411 |                     try:
412 |                         # Run loop to process cancellations and gather results
413 |                         loop.run_until_complete(
414 |                             asyncio.gather(*pending_tasks, return_exceptions=True)
415 |                         )
416 |                         logger.info("SCHED: Gathered other pending tasks in finally.")
417 |                     except Exception as e_gather_final:
418 |                         logger.error(
419 |                             f"SCHED: Error during final gather in finally: {e_gather_final}"
420 |                         )
421 | 
422 |                 if hasattr(loop, "shutdown_asyncgens") and callable(
423 |                     loop.shutdown_asyncgens
424 |                 ):
425 |                     try:
426 |                         logger.info("SCHED: Shutting down asyncgens in finally.")
427 |                         loop.run_until_complete(loop.shutdown_asyncgens())
428 |                     except RuntimeError as e_gens_runtime:
429 |                         logger.warning(
430 |                             f"SCHED: Runtime error shutting down asyncgens in finally (may be ok if loop closed): {e_gens_runtime}"
431 |                         )
432 |                     except Exception as e_gens:
433 |                         logger.error(
434 |                             f"SCHED: Error shutting down asyncgens in finally: {e_gens}"
435 |                         )
436 | 
437 |                 if not loop.is_closed():
438 |                     logger.info("SCHED: Closing event loop in finally.")
439 |                     loop.close()
440 |                 else:
441 |                     logger.info("SCHED: Event loop was already closed in finally.")
442 | 
443 |             self._event_loop = None  # Clear the loop reference
444 |             self._poller_task = None  # Clear task reference
445 |             logger.info("调度器线程已退出 (end of _run_scheduler)")
446 | 
447 |     async def _execute_task(self, task_id: str, task: Dict[str, Any]) -> None:
448 |         """
449 |         执行定时任务
450 | 
451 |         Args:
452 |             task_id: 任务ID
453 |             task: 任务信息
454 |         """
455 |         current_loop_id = None
456 |         try:
457 |             current_loop_id = id(asyncio.get_running_loop())
458 |         except RuntimeError:
459 |             if self.debug_mode:
460 |                 logger.debug(
461 |                     f"SCHED: [{task_id}] _execute_task: Cannot get current running loop."
462 |                 )
463 | 
464 |         if self.debug_mode:
465 |             logger.debug(
466 |                 f"SCHED: [{task_id}] _execute_task ENTERED. Will run in loop ID: {current_loop_id if current_loop_id else 'Unknown'}"
467 |             )
468 |         try:
469 |             # Keep essential start log at INFO level
470 |             start_time_str = time.strftime(
471 |                 "%Y-%m-%d %H:%M:%S", time.localtime(time.time())
472 |             )
473 |             logger.info(f"[{task_id}] 开始执行定时任务，开始时间: {start_time_str}")
474 |             execution_start = time.time()
475 | 
476 |             callback = task.get("callback")
477 |             if not callback or not callable(callback):
478 |                 logger.error(f"[{task_id}] 任务回调函数无效或不可调用")  # Keep as error
479 |                 if self.debug_mode:
480 |                     logger.debug(
481 |                         f"SCHED: [{task_id}] Callback is invalid or not callable."
482 |                     )
483 |                 return
484 | 
485 |             if self.debug_mode:
486 |                 logger.debug(
487 |                     f"SCHED: [{task_id}] Callback obtained: {callback.__name__ if hasattr(callback, '__name__') else str(callback)}"
488 |                 )
489 | 
490 |             try:
491 |                 import inspect
492 | 
493 |                 if inspect.iscoroutinefunction(callback):
494 |                     if self.debug_mode:
495 |                         logger.debug(
496 |                             f"SCHED: [{task_id}] Callback is a coroutine function. Preparing to call it to get coroutine object."
497 |                         )
498 |                     coro = None
499 |                     try:
500 |                         coro = callback()
501 |                         if self.debug_mode:
502 |                             logger.debug(
503 |                                 f"SCHED: [{task_id}] Successfully CALLED callback function, got coroutine object: {type(coro)}"
504 |                             )
505 |                     except Exception as coro_creation_e:
506 |                         logger.error(
507 |                             f"[{task_id}] 调用回调函数创建协程对象时出错: {coro_creation_e}"
508 |                         )  # Keep as error
509 |                         import traceback
510 | 
511 |                         logger.error(
512 |                             f"[{task_id}] 协程创建错误详情: {traceback.format_exc()}"
513 |                         )  # Keep as error
514 |                         if self.debug_mode:
515 |                             logger.debug(
516 |                                 f"SCHED: [{task_id}] EXCEPTION during calling callback() to get coroutine object: {coro_creation_e}"
517 |                             )
518 |                         raise
519 | 
520 |                     if coro is not None:
521 |                         if self.debug_mode:
522 |                             logger.debug(
523 |                                 f"SCHED: [{task_id}] Preparing to AWAIT the coroutine object."
524 |                             )
525 |                         try:
526 |                             # 使用超时来防止协程长时间运行
527 |                             # import asyncio # Already imported at top
528 |                             # 设置一个合理的超时时间，这里使用30分钟
529 |                             timeout = 30 * 60  # 30分钟
530 |                             try:
531 |                                 await asyncio.wait_for(coro, timeout=timeout)
532 |                                 if self.debug_mode:
533 |                                     logger.debug(
534 |                                         f"SCHED: [{task_id}] Successfully AWAITED the coroutine."
535 |                                     )
536 |                                 logger.info(f"[{task_id}] 成功执行协程回调函数")
537 |                             except asyncio.TimeoutError:
538 |                                 logger.error(
539 |                                     f"[{task_id}] 协程执行超时（超过{timeout}秒）"
540 |                                 )
541 |                         except Exception as await_error:
542 |                             logger.error(
543 |                                 f"[{task_id}] 等待协程执行时出错: {await_error}"
544 |                             )
545 |                             import traceback
546 | 
547 |                             logger.error(
548 |                                 f"[{task_id}] 协程执行错误详情: {traceback.format_exc()}"
549 |                             )
550 |                     else:
551 |                         logger.error(f"[{task_id}] 协程对象为None，无法执行")
552 |                 else:
553 |                     # 如果不是协程函数，直接调用
554 |                     if self.debug_mode:
555 |                         logger.debug(
556 |                             f"SCHED: [{task_id}] Callback is NOT a coroutine function. Will call directly."
557 |                         )
558 |                     result = callback()
559 |                     if self.debug_mode:
560 |                         logger.debug(
561 |                             f"SCHED: [{task_id}] Successfully called regular function. Result: {result}"
562 |                         )
563 |                     logger.info(f"[{task_id}] 成功执行普通回调函数")
564 |             except Exception as call_error:
565 |                 logger.error(f"[{task_id}] 执行回调函数时出错: {call_error}")
566 |                 import traceback
567 | 
568 |                 logger.error(f"[{task_id}] 执行错误详情: {traceback.format_exc()}")
569 |                 if self.debug_mode:
570 |                     logger.debug(
571 |                         f"SCHED: [{task_id}] EXCEPTION during execution: {call_error}"
572 |                     )
573 | 
574 |             # 计算执行时间
575 |             execution_time = time.time() - execution_start
576 |             logger.info(f"[{task_id}] 任务执行完成，耗时: {execution_time:.2f}秒")
577 |             if self.debug_mode:
578 |                 logger.debug(
579 |                     f"SCHED: [{task_id}] Task execution completed in {execution_time:.2f} seconds"
580 |                 )
581 |         except Exception as e:
582 |             logger.error(f"[{task_id}] 执行任务过程中出错: {e}")
583 |             import traceback
584 | 
585 |             logger.error(f"[{task_id}] 任务执行错误详情: {traceback.format_exc()}")
586 |             if self.debug_mode:
587 |                 logger.debug(f"SCHED: [{task_id}] EXCEPTION in _execute_task: {e}")
588 |         finally:
589 |             # 无论成功失败，都重置任务状态
590 |             try:
591 |                 if task_id in self.tasks:
592 |                     self.tasks[task_id]["running"] = False
593 |                     if self.debug_mode:
594 |                         logger.debug(
595 |                             f"SCHED: [{task_id}] Reset task running state to False"
596 |                         )
597 |             except Exception as reset_error:
598 |                 logger.error(f"[{task_id}] 重置任务状态时出错: {reset_error}")
599 |                 if self.debug_mode:
600 |                     logger.debug(
601 |                         f"SCHED: [{task_id}] EXCEPTION when resetting task state: {reset_error}"
602 |                     )
603 | 
604 |             if self.debug_mode:
605 |                 logger.debug(f"SCHED: [{task_id}] _execute_task EXITED")
606 | 
607 |     async def send_to_session(
608 |         self, session_id: str, message_text: str, image_path: Optional[str] = None
609 |     ) -> bool:
610 |         """
611 |         向指定会话发送消息
612 | 
613 |         Args:
614 |             session_id: 会话ID
615 |             message_text: 消息文本
616 |             image_path: 可选的图片路径
617 | 
618 |         Returns:
619 |             是否成功发送消息
620 |         """
621 |         try:
622 |             logger.info(f"准备发送消息到会话: {session_id}")
623 | 
624 |             # 尝试多种会话ID格式
625 |             attempted_session_ids = []
626 |             success = False
627 | 
628 |             # 检查图片路径是否存在
629 |             if image_path and not os.path.exists(image_path):
630 |                 logger.error(f"图片路径不存在: {image_path}")
631 |                 # 尝试查找可能存在的图片文件
632 |                 if os.path.dirname(image_path):
633 |                     dir_path = os.path.dirname(image_path)
634 |                     if os.path.exists(dir_path):
635 |                         files = os.listdir(dir_path)
636 |                         logger.info(f"目录 {dir_path} 中存在的文件: {files}")
637 | 
638 |                         # 尝试找到类似名称的图片文件
639 |                         basename = os.path.basename(image_path)
640 |                         for file in files:
641 |                             if file.startswith(basename.split(".")[0]):
642 |                                 logger.info(
643 |                                     f"找到可能的替代图片: {os.path.join(dir_path, file)}"
644 |                                 )
645 |                                 image_path = os.path.join(dir_path, file)
646 |                                 break
647 | 
648 |             # 创建消息链
649 |             message_components = [Comp.Plain(message_text)]
650 | 
651 |             # 如果提供了图片路径，添加图片组件
652 |             if image_path and os.path.exists(image_path):
653 |                 try:
654 |                     logger.info(f"添加图片到消息: {image_path}")
655 |                     message_components.append(Comp.Image.fromFileSystem(image_path))
656 |                 except Exception as img_error:
657 |                     logger.error(f"添加图片到消息链失败: {img_error}")
658 |                     logger.error(f"添加图片错误详情: {traceback.format_exc()}")
659 |                     # 继续发送纯文本消息
660 | 
661 |             # 创建消息链
662 |             message_chain = MessageChain(message_components)
663 | 
664 |             # 首先尝试使用原始会话ID
665 |             logger.info(f"尝试使用原始会话ID发送: {session_id}")
666 |             attempted_session_ids.append(session_id)
667 |             success = await self.context.send_message(session_id, message_chain)
668 | 
669 |             # 如果失败，尝试使用其他会话ID格式
670 |             if not success:
671 |                 # 检查是否是群号，如果是，尝试构建完整会话ID
672 |                 if session_id.isdigit() or (":" not in session_id):
673 |                     # 从session_id提取可能的群号
674 |                     group_id = session_id
675 |                     if ":" in session_id:
676 |                         # 可能是部分会话ID，尝试提取最后部分作为群号
677 |                         parts = session_id.split(":")
678 |                         group_id = parts[-1]
679 | 
680 |                     # 尝试QQ常见会话ID格式
681 |                     for platform in ["aiocqhttp", "qqofficial"]:
682 |                         for msg_type in ["GroupMessage", "group"]:
683 |                             fixed_id = f"{platform}:{msg_type}:{group_id}"
684 |                             if fixed_id not in attempted_session_ids:
685 |                                 logger.info(f"尝试使用构造会话ID发送: {fixed_id}")
686 |                                 attempted_session_ids.append(fixed_id)
687 |                                 success = await self.context.send_message(
688 |                                     fixed_id, message_chain
689 |                                 )
690 |                                 if success:
691 |                                     logger.info(f"使用会话ID {fixed_id} 发送成功")
692 |                                     break
693 |                         if success:
694 |                             break
695 | 
696 |                 # 如果仍未成功，尝试直接获取平台实例并发送
697 |                 if not success and group_id.isdigit():
698 |                     try:
699 |                         # 尝试使用aiocqhttp平台直接发送
700 |                         platform = self.context.get_platform("aiocqhttp")
701 |                         if platform and hasattr(platform, "send_group_msg"):
702 |                             logger.info(
703 |                                 f"尝试使用aiocqhttp平台直接发送到群: {group_id}"
704 |                             )
705 |                             try:
706 |                                 await platform.send_group_msg(
707 |                                     group_id=group_id, message=message_chain
708 |                                 )
709 |                                 logger.info("使用aiocqhttp平台发送成功")
710 |                                 success = True
711 |                             except Exception as e:
712 |                                 logger.error(f"使用aiocqhttp平台发送失败: {e}")
713 | 
714 |                         # 尝试使用qqofficial平台
715 |                         if not success:
716 |                             platform = self.context.get_platform("qqofficial")
717 |                             if platform and hasattr(platform, "send_group_msg"):
718 |                                 logger.info(
719 |                                     f"尝试使用qqofficial平台直接发送到群: {group_id}"
720 |                                 )
721 |                                 try:
722 |                                     await platform.send_group_msg(
723 |                                         group_id=group_id, message=message_chain
724 |                                     )
725 |                                     logger.info("使用qqofficial平台发送成功")
726 |                                     success = True
727 |                                 except Exception as e:
728 |                                     logger.error(f"使用qqofficial平台发送失败: {e}")
729 |                     except Exception as platform_error:
730 |                         logger.error(f"尝试直接使用平台发送失败: {platform_error}")
731 | 
732 |             if success:
733 |                 logger.info(f"成功发送消息到会话: {session_id}")
734 |             else:
735 |                 logger.warning(f"所有尝试都失败，无法发送消息到会话: {session_id}")
736 |                 logger.warning(f"尝试过的会话ID: {attempted_session_ids}")
737 | 
738 |             return success
739 |         except Exception as e:
740 |             logger.error(f"发送消息到会话失败: {session_id}, 错误: {e}")
741 |             logger.error(f"发送消息错误详情: {traceback.format_exc()}")
742 |             return False
743 | 


--------------------------------------------------------------------------------