├── .github └── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.yml ├── .gitignore ├── LICENSE ├── README.md ├── _conf_schema.json ├── constant.py ├── fonts ├── LXGWWenKai-Regular.ttf └── OFL.txt ├── main.py ├── metadata.yaml ├── requirements.txt ├── stop_words.txt ├── utils.py └── wordcloud_core ├── __init__.py ├── generator.py ├── history_manager.py └── scheduler.py /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: 错误报告 (Bug Report) 2 | description: 提交 CloudRank 插件的错误 3 | title: "[BUG] <请在此处填写你遇到的问题>" 4 | labels: ["bug"] 5 | assignees: 6 | - GEMILUXVII 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | 请尽可能详细地描述问题,以便能更快地定位并修复它。 12 | 13 | - type: checkboxes 14 | attributes: 15 | label: 提交前确认 16 | description: 在提交此错误报告前,请确认以下内容: 17 | options: 18 | - label: 我已仔细阅读过项目的 README.md 文档,确认其中没有关于此错误的说明或解决方案,并且我也已检查过现有的 Issues,未发现重复报告。 19 | required: true 20 | 21 | - type: textarea 22 | id: bug-description 23 | attributes: 24 | label: 错误描述 (Bug Description) 25 | description: 请清晰简洁地描述遇到的错误 26 | placeholder: 例如:“当我尝试使用 /wordcloud 命令时,插件崩溃了,并且机器人没有发送任何词云图片” 27 | validations: 28 | required: true 29 | 30 | - type: textarea 31 | id: steps-to-reproduce 32 | attributes: 33 | label: 复现步骤 (Steps to Reproduce) 34 | description: 请详细说明如何复现这个错误 35 | placeholder: | 36 | 1. 在群聊 X 中发送消息... 37 | 2. 输入命令 `/wordcloud 3`... 38 | 3. 观察到机器人没有任何回复/机器人回复了错误信息... 39 | 4. (其他相关步骤) 40 | validations: 41 | required: true 42 | 43 | - type: textarea 44 | id: expected-behavior 45 | attributes: 46 | label: 期望行为 (Expected Behavior) 47 | description: 请描述在上述步骤之后,期望发生什么 48 | placeholder: 例如:“机器人应该发送一张包含最近3天聊天内容的词云图片” 49 | validations: 50 | required: true 51 | 52 | - type: textarea 53 | id: actual-behavior 54 | attributes: 55 | label: 实际行为 (Actual Behavior) 56 | description: 请描述实际发生了什么 57 | placeholder: 例如:“机器人没有任何回复,控制台输出了 XXX 错误” 58 | validations: 59 | required: true 60 | 61 | - type: textarea 62 | id: screenshots-logs 63 | attributes: 64 | label: 截图/日志 (Screenshots/Logs) 65 | description: | 66 | 如果适用,请在此处添加截图或日志以帮助解释问题 67 | 对于日志,请开启插件的 `debug_mode` (如果问题与运行时错误相关) 并复制相关的日志片段 68 | **重要提示:** 请确保在上传截图或日志前,已移除或遮盖所有个人身份信息 (PII) 或其他敏感数据! 69 | placeholder: | 70 | (在此处粘贴截图或日志) 71 | ```log 72 | [时间戳] [级别] 详细的错误日志... 73 | ``` 74 | validations: 75 | required: false 76 | 77 | - type: input 78 | id: plugin-version 79 | attributes: 80 | label: 插件版本 (CloudRank Version) 81 | description: 正在使用的 CloudRank 插件版本是多少? (例如 v1.3.6) 82 | placeholder: "例如:v1.3.6" 83 | validations: 84 | required: true 85 | 86 | - type: input 87 | id: astrbot-version 88 | attributes: 89 | label: AstrBot 版本 (AstrBot Version) 90 | description: 正在使用的 AstrBot 版本是多少? 91 | placeholder: "例如:v3.6.8" 92 | validations: 93 | required: true 94 | 95 | - type: input 96 | id: python-version 97 | attributes: 98 | label: Python 版本 (Python Version) 99 | description: 使用的 Python 版本是多少? 100 | placeholder: "例如:3.11.11" 101 | validations: 102 | required: true 103 | 104 | - type: dropdown 105 | id: os 106 | attributes: 107 | label: 操作系统 (Operating System) 108 | description: 在哪个操作系统上运行 AstrBot 和插件? 109 | options: 110 | - Windows 111 | - Linux (请在下方“其他信息”中注明发行版) 112 | - macOS 113 | - Docker (请在下方“其他信息”中注明基础镜像) 114 | - 其他 (请在下方“其他信息”中注明) 115 | validations: 116 | required: true 117 | 118 | - type: textarea 119 | id: relevant-config 120 | attributes: 121 | label: 相关配置 (Relevant Configuration) 122 | description: | 123 | 请列出可能与此错误相关的 CloudRank 插件配置项及其值 124 | 例如:`font_path`, `custom_mask_path`, `enabled_group_list`, `auto_generate_cron` 等 125 | **请不要泄露敏感信息,如 API 密钥或密码** 126 | placeholder: | 127 | enabled_group_list: "123456789" 128 | font_path: "my_custom_font.ttf" 129 | custom_mask_path: "mask.png" 130 | # 其他可能相关的配置... 131 | validations: 132 | required: false 133 | 134 | - type: textarea 135 | id: additional-context 136 | attributes: 137 | label: 其他信息 (Additional Context) 138 | description: 在此处添加有关该问题的任何其他上下文或备注例如,问题是间歇性出现还是稳定复现?是否尝试过其他排查步骤? 139 | placeholder: "例如:这个问题只在特定群聊中出现我尝试重启了 AstrBot 但问题依旧" 140 | validations: 141 | required: false 142 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: 功能请求 (Feature Request) 2 | description: 为 CloudRank 插件提出新功能或改进建议 3 | title: "[FEATURE] <简述功能建议>" 4 | labels: ["enhancement"] 5 | assignees: 6 | - GEMILUXVII 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | 感谢为 CloudRank 插件提出功能建议,请详细描述你的想法 12 | 13 | - type: checkboxes 14 | id: readme-checked 15 | attributes: 16 | label: README 阅读确认 17 | description: 提交此功能请求前,请确认: 18 | options: 19 | - label: 我已阅读项目的 README.md 文档,确认所提功能在现有版本中无法实现,或现有实现存在不足 20 | required: true 21 | 22 | - type: textarea 23 | id: problem-related 24 | attributes: 25 | label: 此功能请求是否与某个现有问题相关? (选填) 26 | description: | 27 | 清晰简洁地描述相关问题如果只是一个新点子,而非解决特定痛点,可简单说明或跳过 28 | 例如:“目前排行榜的发送时间与词云绑定,不够灵活” 29 | 或者:“希望词云有更多预设形状” 30 | placeholder: "例如:当前 [...] 方面存在不便" 31 | 32 | - type: textarea 33 | id: solution-description 34 | attributes: 35 | label: 描述期望的解决方案或新功能 (必填) 36 | description: 清晰简洁地描述希望实现的功能及其理想工作方式 37 | placeholder: | 38 | 例如:“增加配置项,允许独立设置词云的字体字形” 39 | validations: 40 | required: true 41 | 42 | - type: textarea 43 | id: alternatives-considered 44 | attributes: 45 | label: 是否考虑过其他替代方案? (选填) 46 | description: 清晰简洁地描述在提出此建议前,是否考虑过其他替代方案或类似功能 47 | placeholder: "例如:曾尝试调整每日词云的 CRON 表达式,但这会同时影响词云生成时间,并非理想方案" 48 | 49 | - type: textarea 50 | id: additional-context 51 | attributes: 52 | label: 补充信息 (选填) 53 | description: | 54 | 在此添加关于此功能请求的其他上下文、使用场景、预期益处、可能的实现思路(若方便)或相关截图 55 | placeholder: | 56 | 例如:“此功能有助于群管理员更灵活地管理社群,并及时激励活跃用户” 57 | (可在此附上相关的草图或参考示例截图) 58 | 59 | - type: markdown 60 | attributes: 61 | value: | 62 | 感谢你的建议! 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | wordcloud_core/__pycache__/__init__.cpython-310.pyc 2 | wordcloud_core/__pycache__/generator.cpython-310.pyc 3 | .github/copilot-instructions.md 4 | __pycache__/constant.cpython-310.pyc 5 | __pycache__/main.cpython-310.pyc 6 | __pycache__/utils.cpython-310.pyc 7 | wordcloud_core/__pycache__/history_manager.cpython-310.pyc 8 | wordcloud_core/__pycache__/scheduler.cpython-310.pyc 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #
☁️ CloudRank
2 | 3 |
智能词云分析 · 聊天热度排行
4 | 5 |
6 | 7 |
Version 8 | License 9 | Python Version 10 | AstrBot Compatible 11 |
12 | 13 |
14 | OneBot v11 Support 15 | WeChatPadPro Support 16 | Last Updated 17 |
18 | 19 | ## ◆ 介绍 20 | 21 | CloudRank 插件是一款用于 AstrBot 的插件,能够将群聊或私聊中的文本消息进行分析,并生成美观的词云图像。通过词云,用户可以直观地了解一段时间内聊天内容的关键词和热点话题。插件同时提供用户活跃度排名功能,展示群内最活跃的成员。插件支持自动定时生成和手动触发生成,并提供了丰富的配置选项,让您可以定制个性化的词云和排名显示。 22 | 23 | 现已完全适配 AstrBot 版本 v4.0.0 及以上,经本地测试验证,各项功能运行正常。插件使用现代异步 SQLAlchemy ORM 进行数据库操作,确保了数据访问的高效性和可靠性。所有数据库操作都通过 AstrBot 提供的中心化数据库管理系统进行,无需额外的数据库配置。 24 | 25 | ## ◆ 功能特性 26 | 27 | - **定时自动生成**:支持 Cron 表达式配置,定时为指定群聊或所有启用的会话生成词云 28 | - **每日词云**:可在每日固定时间生成当天的聊天词云并推送到指定群聊,可自定义标题 29 | - **手动触发生成**:用户可以通过命令手动生成指定天数内的聊天词云 30 | - **多种视觉定制**: 31 | - **背景颜色**:自定义词云图片的背景色 32 | - **配色方案**:选择不同的预设配色方案,改变词语的颜色分布 33 | - **字体**:支持指定自定义字体文件,解决特殊字符显示问题或实现特定视觉风格 34 | - **形状**:支持预设形状(如圆形、矩形、菱形、三角形),更重要的是支持通过 **自定义蒙版图片 (`custom_mask_path`)** 来定义任意词云轮廓 35 | - **灵活的配置管理**: 36 | - **群聊启用/禁用**:可以指定哪些群聊启用词云功能 37 | - **词语过滤**:设置最小词长度、最大词数量 38 | - **停用词**:支持自定义停用词列表,过滤常见但无意义的词语 39 | - **机器人消息统计**:可配置是否将机器人自身发送的消息计入词云统计 (`include_bot_messages`) 40 | - **用户活跃度排行**: 41 | - 词云生成后自动显示群内活跃用户排行榜 42 | - 可自定义排行显示人数和奖牌样式 43 | - 显示用户名称和发言贡献度 44 | - **消息历史记录**:插件会自动记录消息用于分析,用户无需额外操作 45 | - **易于使用**:提供简洁的命令进行交互 46 | - **调试模式**:可选的详细日志输出,方便排查问题 47 | 48 | ## ◆ 系统要求 49 | 50 | - **AstrBot 版本**:v4.0.0 及以上 51 | - 使用新版异步 ORM 数据库系统 52 | - 无需额外的数据库配置 53 | - **Python 版本**:3.10+ 54 | - 支持异步特性 55 | - 兼容新版 SQLAlchemy 56 | 57 | ## ◆ 平台支持 58 | 59 | CloudRank 插件基于 AstrBot 平台开发: 60 | 61 | - **QQ**:支持 QQ 群聊的词云生成 62 | - **微信**:支持基于 WeChatPadPro 微信群聊的词云生成 63 | 64 | ## ◆ 安装方法 65 | 66 | 1. **下载插件**: 67 | - 通过 `git clone https://github.com/GEMILUXVII/astrbot_plugin_cloudrank.git` 克隆仓库到本地 68 | 2. **放置插件文件**: 69 | - 解压下载的压缩包 70 | - 将整个插件文件夹 ( `CloudRank`) 复制到 AstrBot 的插件目录: `AstrBot/data/plugins/` 71 | - 最终路径应为 `AstrBot/data/plugins/cloudrank/` 72 | 3. **安装依赖**: 73 | - 打开终端或命令行,进入插件目录: `cd AstrBot/data/plugins/cloudrank/` 74 | - 安装所需的 Python 包: `pip install -r requirements.txt` 75 | 4. **重启 AstrBot**: 76 | - 完全重启 AstrBot 以加载新插件 77 | 5. **配置插件**: 78 | - 在 AstrBot 的插件管理界面找到 "CloudRank" 插件,进行相关配置 79 | 80 | ## ◆ 配置说明 81 | 82 | 插件的配置通过 `_conf_schema.json` 文件定义,您可以在 AstrBot 后台的插件配置页面进行修改。以下是主要的配置项及其说明: 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 |
配置项 类型 描述 默认值 效果说明
auto_generate_enabled bool 是否启用自动生成词云功能 true true 时,插件会根据 auto_generate_cron 的设置定时生成词云
auto_generate_cron string 自动生成词云的 CRON 表达式 0 20 * * * 标准 CRON 格式 ( 分 时 日 月 周 )。例如,默认值表示每天晚上 20:00 执行
timezone string 自定义插件使用的时区 Asia/Shanghai 有效的 IANA 时区名称,例如 `Asia/Shanghai`, `Europe/London`, `America/New_York`, 或者 `UTC`
daily_generate_enabled bool 是否启用每日词云生成功能 true true 时,插件会根据 daily_generate_time 的设置每日生成词云
daily_generate_time string 每日词云的生成时间 23:30 格式为 HH: MM 。例如, 23:30 表示每天晚上 11 点 30 分
daily_summary_title string 每日词云图片的标题模板 "{date} {group_name} 今日词云" 支持占位符: {date} (当前日期), {group_name} (群聊名称)
enabled_group_list string 启用词云功能的群聊列表 "" (空字符串) 以英文逗号分隔的群号列表,例如 123456789,987654321 。仅在此处填写的群号才会启用词云功能。如果留空,则默认所有群聊都不启用词云功能
history_days int 手动生成词云时,默认统计的历史消息天数 7 当用户使用 /wordcloud 命令且未指定天数时,将使用此值
max_word_count int 词云图片中显示的最大词语数量 100 控制词云的密集程度和信息量。建议值在 50 到 200 之间
min_word_length int 参与词频统计的最小词语长度 2 小于此长度的词语(通常是单个字或无意义的短词)将被忽略
min_word_frequency int 最小词频 1 出现次数低于此值的词将被过滤,以优化词云视觉效果,设为 1 则不过滤
min_font_size int 词云中最小字体大小 8 控制低频词汇的最小显示字体大小,与 max_font_size 配合调整词云的字体大小对比度
max_font_size int 词云中最大字体大小 170 控制高频词汇的最大显示字体大小,与 min_font_size 配合调整词云的字体大小对比度,使高频词更加突出
background_color string 词云图片的背景颜色 white 可以是颜色名称 (如 white , black , lightyellow ) 或十六进制颜色代码 (如 #FFFFFF )
colormap string 词云的配色方案,决定词语的颜色 viridis 不同的 Colormap 会给词云带来完全不同的视觉风格。可选值包括: viridis , plasma , inferno , rainbow , jet
font_path string 自定义字体文件的路径 "" (空字符串) 如果留空,插件会尝试使用内置的默认字体 (通常是霞鹜文楷) 或系统字体。可指定 .ttf .otf 字体文件
stop_words_file string 停用词文件的路径 stop_words.txt 指定一个文本文件,每行包含一个要忽略的词语。路径相对于插件 resources/ 目录或绝对路径
include_bot_messages bool 是否将机器人自身的消息计入词云统计 false true 时,机器人自己发送的消息也会被用于生成词云。默认为关闭
shape string 词云的预设形状 rectangle 支持 rectangle (矩形), circle (圆形), diamond (菱形), triangle_up (上三角)。如果设置了下方的 "自定义蒙版图片路径",则此选项无效
custom_mask_path string 自定义蒙版图片路径 "" (空字符串) 提供一个图片文件的路径作为词云的形状蒙版:图片中白色区域将被忽略,非白色区域将用于绘制词语。如果设置了此路径,则预设的 '形状' 选项将无效。支持相对路径(相对于插件数据目录下的 resources/images/ 子目录)或绝对路径
show_user_ranking bool 是否在每日词云中显示用户活跃度排行 true true 时,词云生成后会同时显示当天发言最活跃的用户排行榜,包含发言人数统计和贡献度排名
ranking_user_count int 用户排行榜显示的人数 5 设置排行榜显示前多少名活跃用户,建议设置 5-10 之间的值,过多可能导致排行榜信息过长
ranking_medals string 排行榜奖牌表情 🥇, 🥈, 🥉, 🏅, 🏅 用逗号分隔的表情符号,前三名会使用前三个表情,其余位置使用后续表情
debug_mode bool 是否启用详细调试日志 false true 时,插件会在控制台输出更详细的运行信息,主要用于开发者排查问题
261 | 262 | ## ◆ 使用命令 263 | 264 | 以下是与词云插件交互的主要命令: 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 |
命令 描述 示例
/wordcloud [天数] 生成当前会话 (群聊或私聊) 的词云,可选择指定统计过去多少天的消息 /wordcloud (使用默认天数)
/wordcloud 3 (最近 3 天)
/wc help 显示本插件的帮助信息,包括命令列表 /wc help
/wc test 生成测试词云,无需历史数据 /wc test
/wc today 手动触发生成当前会话今天的词云 /wc today
/wc enable [群号] 在指定群聊启用词云功能,如果未提供群号,则在当前群聊启用 (管理员权限) /wc enable 123456789
/wc disable [群号] 在指定群聊禁用词云功能,如果未提供群号,则在当前群聊禁用 (管理员权限) /wc disable 123456789
/wc force_daily 强制为所有配置了每日词云的会话立即生成一次每日词云(管理员权限) /wc force_daily
308 | 309 | ## ◆ 自然语言关键词 310 | 311 | 除了上述命令外,您还可以使用以下自然语言关键词触发相应功能: 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 |
关键词 功能描述 等效命令
今日词云
获取今日词云
查看今日词云
生成今日词云
生成当前会话今天的词云图 /wc today
生成词云
查看词云
最近词云
历史词云
生成最近 7 天(或配置的默认天数)的词云图 /wordcloud
词云帮助
词云功能
词云说明
词云指令
显示词云插件的帮助信息 /wc help
335 | 336 | > [!TIP] 337 | > 338 | > 使用自然语言关键词可以更方便地触发功能,无需记忆复杂的命令格式 339 | 340 | ### 自定义关键词 341 | 342 | 如果您想添加或修改触发关键词,可以编辑 `constant.py` 文件中的 `NATURAL_KEYWORDS` 字典: 343 | 344 | ```python 345 | # 自然语言关键词 - 用于触发命令的关键词 346 | # 格式: {"command": ["关键词1", "关键词2", ...]} 347 | NATURAL_KEYWORDS = { 348 | "today": ["今日词云", "获取今日词云", "查看今日词云", "生成今日词云"], 349 | "wordcloud": ["生成词云", "查看词云", "最近词云", "历史词云"], 350 | "help": ["词云帮助", "词云功能", "词云说明", "词云指令"], 351 | } 352 | ``` 353 | 354 | 您可以根据需要添加新的命令和关键词,或者为现有命令添加更多关键词。修改后重启机器人即可生效 355 | 356 | ## ◆ 词云样例 357 | 358 | ![Image](https://i.imgur.com/GdOOd7y.png) 359 | 360 | > [!NOTE] 361 | > 362 | > 上图词云样例采用以下主要配置生成:`max_word_count`: 50, `min_word_length`: 2, `min_word_frequency`: 2, `min_font_size`: 8, `max_font_size`: 170, `background_color`: pink, `colormap`: magma, `font_path`: (使用内置霞鹜文楷), `shape`: circle. 363 | 364 | ## ◆ 项目结构 (简化) 365 | 366 | ``` 367 | cloudrank/ 368 | ├── wordcloud_core/ # 核心词云生成与管理逻辑 369 | │ ├── generator.py # 词云图像生成器 370 | │ ├── history_manager.py # 聊天历史记录管理 371 | │ ├── scheduler.py # 定时任务调度器 372 | │ └── __init__.py # 包初始化文件 373 | ├── fonts/ # 字体文件目录 374 | ├── _conf_schema.json # 插件配置文件结构定义 375 | ├── main.py # 插件主逻辑 (Star 类定义) 376 | ├── constant.py # 插件内部常量和自然语言关键词配置 377 | ├── utils.py # 工具函数 378 | ├── stop_words.txt # 默认停用词列表 379 | ├── requirements.txt # Python 依赖包列表 380 | ├── metadata.yaml # 插件元数据 (供 AstrBot 识别) 381 | ├── LICENSE # 开源许可证 382 | └── README.md # 本说明文档 383 | ``` 384 | 385 | 数据目录结构 (通过 StarTools.get_data_dir 动态创建): 386 | 387 | ``` 388 | AstrBot/data/plugin_data/cloudrank/ 389 | ├── resources/ # 资源文件目录 390 | │ ├── fonts/ # 字体文件目录(存放LXGWWenKai-Regular.ttf等字体) 391 | │ ├── images/ # 自定义蒙版图片存放目录 (例如 my_mask.png) 392 | │ └── stop_words.txt # 自定义停用词列表 393 | ├── images/ # 生成的词云图片缓存目录 (这是插件输出图片的目录) 394 | └── debug/ # 调试信息目录(仅在排查问题时使用) 395 | ``` 396 | 397 | ## ◆ 高级说明与定制 398 | 399 | ### 自定义停用词 400 | 401 | 编辑位于数据目录的 `resources/stop_words.txt` 文件,每行添加一个不想出现在词云中的词。 402 | 403 | ### 自定义字体 404 | 405 | 将字体文件 (如 `.ttf`, `.otf`) 放入数据目录 `resources/fonts/` 下,然后在插件配置中将 `font_path` 设置为该字体文件的名称 (例如 `my_font.ttf`)。如果字体在系统其他位置,可以设置绝对路径。 406 | 407 | ### 自定义词云形状 (使用蒙版图片) 408 | 409 | 1. **准备蒙版图片** 410 | 411 | - 创建一个图像文件 (推荐使用 `.png` 格式,背景透明更佳,但 `.jpg` 等常见格式也可以) 412 | - 在图片中,**您希望词语出现的区域应该是深色(如黑色)**,而 **希望留空的背景区域应该是浅色(如白色)** 413 | - 词云生成器会将图片中接近纯黑色的部分作为词语填充的有效区域,纯白色部分则会忽略 414 | - 图片尺寸会影响最终词云的分辨率和细节,但插件会尝试适应。一个几百像素到一千像素宽高的图片通常效果不错 415 | 416 | 2. **放置蒙版图片** 417 | 418 | - 将您的蒙版图片文件(例如 `my_mask.png`)放置到插件的数据目录下的 `resources/images/` 子目录中 419 | - 完整路径通常是 `AstrBot/data/plugin_data/cloudrank/resources/images/` 420 | - 如果该 `images` 子目录不存在,插件在启动时会自动创建它 421 | 422 | 3. **配置插件** 423 | 424 | - 在 AstrBot 的插件管理界面,找到 "CloudRank" 插件的配置 425 | - 在 **"自定义蒙版图片路径 (`custom_mask_path`)"** 配置项中,填入您放置的图片文件名,例如 `my_mask.png` 426 | - **注意**: 如果您在这里配置了有效的图片路径,那么预设的 "词云的预设形状 (`shape`)" 配置项将会被忽略 427 | 428 | 4. **重新加载/测试** 429 | - 保存配置后,建议重新加载插件或重启 AstrBot (如果插件管理界面支持热重载,则可能无需重启) 430 | - 然后尝试生成一个词云 (例如使用 `/wc test` 命令) 来查看自定义形状的效果 431 | 432 | ### 自定义时区 433 | 434 | 插件允许您配置运行时使用的时区,这对于确保定时任务(如每日词云生成、CRON 表达式定义的任务)按照您期望的本地时间执行至关重要。 435 | 436 | - **配置方法**: 在 AstrBot 的插件管理界面,找到 "CloudRank" 插件的配置中的 **"自定义插件使用的时区 (IANA 时区名称) (`timezone`)"** 选项 437 | - **有效值**: 您需要输入一个有效的 IANA 时区名称,例如: 438 | - `Asia/Shanghai` (默认值) 439 | - `Europe/London` 440 | - `America/New_York` 441 | - `UTC` 442 | - **参考资源**: 您可以参考 [维基百科的时区列表](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) 或通过 Python 的 `pytz.all_timezones` (如果您熟悉 Python 环境) 来查找合适的时区名称 443 | - **影响范围**: 此设置会影响所有与时间相关的调度,包括每日词云的生成时间和 CRON 任务的触发时间 444 | 445 | ## ◆ 注意事项 446 | 447 | - **首次使用**: 首次生成词云或插件加载时,可能需要一些时间来初始化分词库 (如 `jieba`) 和其他资源 448 | - **中文字体**: 为确保中文在词云中正确显示,建议在配置中明确指定一个包含中文字符的字体路径 (`font_path`)。插件会尝试使用内置的霞鹜文楷字体,如果加载失败或需要特定字体,则此配置项非常重要 449 | - **资源存储**: 插件会在 AstrBot 的数据目录 (通常是 `AstrBot/data/plugin_data/cloudrank/` 或由 `StarTools.get_data_dir(PLUGIN_NAME)` 返回的路径) 下存储字体、停用词和生成的图片缓存。此目录包含三个主要子目录:`resources/`(存放字体和停用词)、`images/`(存放生成的词云图片)和 `debug/`(存放调试信息)。请确保 AstrBot 运行的用户对此目录有读写权限,并有足够的存储空间 450 | - **消息数据存储与 session_id 标准化**: 451 | - 本插件的消息历史记录存储在 **AstrBot 核心的中央 SQLite 数据库** 中 (通常是 `AstrBot/data/data_v3.db` 或类似路径),具体表名为 `wordcloud_message_history`。插件本身不在其独立的插件数据目录下创建数据库文件 452 | - 这一更改意味着,更新插件后,新记录的群聊消息将使用此标准 ID。旧的群聊消息如果之前是按其他 `session_id` 格式存储的,可能不会被包含在更新后的群聊词云查询中,除非进行数据迁移。查看或备份消息数据需要访问 AstrBot 的主数据库 453 | - **消息内容与统计范围**: 454 | - 本插件设计的初衷是基于 **文本内容** 生成词云。因此,在记录消息时,只有那些实际包含文本的消息才会被存储到 `wordcloud_message_history` 数据库表中。纯图片、文件、系统提示、语音消息或大部分表情符号(如果它们没有附带文本描述)等非文本内容将 **不会** 被记录,也不会计入词云生成的消息总数中 455 | - 因此,插件报告的 "共统计了 X 条消息" 或 "共产生 X 条发言" 是指在指定时间段内,**被插件记录下来的、包含文本内容的消息数量**,这个数量可能少于您在该聊天中看到的总事件数 456 | - **性能考虑**: 记录和分析大量聊天数据可能会消耗一定的系统资源,对于非常活跃的机器人或服务器资源有限的情况,请适当调整历史记录天数和词云生成频率 457 | - **依赖冲突**: 确保 `requirements.txt` 中列出的依赖版本与您的 Python 环境和其他 AstrBot 插件兼容 458 | 459 | ## ◆ 问题排查 (FAQ) 460 | 461 | - **词云不显示中文/中文显示为方框**: 462 | - **原因**: 未找到合适的中文字体或配置的字体不包含所需字符 463 | - **解决**: 在插件配置中设置 `font_path` 为一个有效的中文字体文件路径,可以将字体文件放入 `resources/fonts/` 目录并指定文件名,或使用系统字体的绝对路径 464 | - **命令没有反应**: 465 | - **原因**: 插件未正确加载、被禁用、命令输入错误或权限不足 466 | - **解决**: 检查 AstrBot 后台插件是否已启用,查看 AstrBot 日志有无报错,确认命令格式正确,以及执行需要权限的命令时是否拥有相应权限 467 | - **自动生成词云未按时执行**: 468 | - **原因**: CRON 表达式配置错误、AstrBot 或插件在此期间未运行、或任务调度器出现问题 469 | - **解决**: 检查 `auto_generate_cron` 和 `daily_generate_time` 的配置格式是否正确,确保 AstrBot 持续运行,查看日志中与 `TaskScheduler` 或词云生成相关的错误 470 | - **如何添加更多停用词**: 471 | - **解决**: 找到插件的数据目录下的 `resources/stop_words.txt` 文件,直接编辑该文件,每行添加一个词 472 | - **词云颜色不喜欢**: 473 | - **解决**: 修改配置项 `background_color` 设置背景色,修改 `colormap` 选择不同的词语配色方案 474 | - **自然语言关键词没有触发**: 475 | - **原因**: 关键词未正确配置、关键词大小写或空格不匹配、或消息被识别为命令 476 | - **解决**: 确保消息格式完全匹配 `constant.py` 中定义的关键词,包括空格和标点符号,确保消息不以 `/` 开头,否则会被视为命令而非普通消息 477 | 478 | ## ◆ 更新日志 479 | 480 | #### **v2.0.1** (2025-09-13) 481 | 482 | **问题修复**: 483 | 484 | - 修复每日定时词云发送失败的问题 485 | - 解决平台 ID 映射不正确导致的发送失败问题 486 | - 修正 session_id 格式转换逻辑,确保与 AstrBot 统一消息来源格式匹配 487 | - 更新定时任务中的消息发送机制,使用正确的 MessageEventResult API 488 | 489 | #### **v2.0.0** (2025-09-12) 490 | 491 | **重大更新**: 492 | 493 | - 完全适配 AstrBot v4.0.0 数据库系统 494 | - 迁移到现代异步 SQLAlchemy ORM 495 | - 移除所有直接 SQL 操作 496 | - 使用 AstrBot 提供的中央数据库服务 497 | - 提升数据操作的性能和可靠性 498 | 499 | **优化改进**: 500 | 501 | - 增强了指令过滤机制 502 | - 优化词云生成时的消息过滤 503 | - 更准确地排除指令相关文字 504 | - 改进自然语言命令的处理逻辑 505 | 506 | **系统要求**: 507 | 508 | - 需要 AstrBot v4.0.0 或更高版本 509 | - Python 3.10+ 运行环境 510 | 511 | #### **v1.3.9** (2025-07-06) 512 | 513 | **效果改进**: 514 | 515 | - 增强了消息清洗逻辑,能更精确地过滤指令、@消息、昵称等无意义内容,提高词云质量 516 | 517 | #### **v1.3.8-rev1** (2025-05-30) 518 | 519 | **效果改进**: 520 | 521 | - 移除了词云生成时对最大字体大小 (`max_font_size`) 的硬编码上限(原为 120)及 `relative_scaling` 参数的固定设置,允许用户通过配置更自由地控制字体大小 522 | 523 | **修复**: 524 | 525 | - 修正 `min_word_frequency` 配置项的默认值为 `1` 526 | - 统一了 `_conf_schema.json`, `main.py` 和 `README.md` 中关于 `min_word_frequency` 的默认值描述 527 | - 调整了 `README.md` 中配置项表格的顺序,使其与 `_conf_schema.json` 一致 528 | 529 | #### **v1.3.8** (2025-05-30) 530 | 531 | **新增功能**: 532 | 533 | - 新增 `min_word_frequency` 配置项,允许用户设置词云生成时词语的最小出现频率 534 | - 出现次数低于此配置值的词语将被过滤,有助于生成更清晰、更聚焦高频词汇的词云 535 | - 默认值为 `2`,即词语至少出现 2 次才会被统计,设置为 `1` 则不进行词频过滤 536 | 537 | **配置更新:** 538 | 539 | - `min_word_frequency`: 控制词云中词语的最小出现次数(默认值:2) 540 | 541 | #### **v1.3.7**(2025-05-29) 542 | 543 | **平台支持扩展:** 544 | 545 | - 新增 WeChatPadPro 平台词云生成支持 546 | 547 | **贡献者:** 548 | 549 | - 感谢 [@xu-wish](https://github.com/xu-wish) 通过 [PR #9](https://github.com/GEMILUXVII/astrbot_plugin_cloudrank/pull/9) 贡献 WeChatPadPro 平台支持 550 | 551 | #### **v1.3.6**(2025-05-28) 552 | 553 | **停用词系统更新:** 554 | 555 | - 大幅增强停用词过滤系统,从原有的 4 个示例停用词扩展到 700+个综合停用词 556 | - 新增中文常用停用词:的、了、在、和、是等基础词汇 557 | - 新增中文语气词和感叹词:阿、啊、哈哈、呵呵等表情化词汇 558 | - 新增中文代词和指示词:俺们、这个、那个、某些等指代词汇 559 | - 新增中文连词和介词:按照、从而、对于、关于等连接词汇 560 | - 新增英文常用停用词:a、the、and、but 等英文基础词汇 561 | - 新增网络用语和表情符号文字:emmm、哈哈哈、呵呵等网络表达 562 | - 新增常见无意义词汇:东西、事情、情况、方面等模糊词汇 563 | - 新增标点符号和特殊字符过滤支持 564 | 565 | **改进效果:** 566 | 567 | - 显著提升词云质量,过滤掉无意义的高频词汇 568 | - 让关键词汇更加突出,提高词云的可读性和价值 569 | - 支持中英文混合文本的高质量词云生成 570 | 571 | #### **v1.3.5**(2025-05-28) 572 | 573 | **新功能:** 574 | 575 | - 新增 `min_font_size` 和 `max_font_size` 配置项,允许自定义词云字体大小范围 576 | - 改进字体大小对比度,从默认的 10-120 调整为 8-170,使高频词汇更加突出 577 | - 增强词云视觉效果,提供更好的高低频词汇对比显示 578 | 579 | **配置更新:** 580 | 581 | - `min_font_size`: 控制低频词汇的最小字体大小(默认值:8) 582 | - `max_font_size`: 控制高频词汇的最大字体大小(默认值:170) 583 | - 这些配置项允许用户根据需要调整词云的视觉对比度 584 | 585 | #### **v1.3.4**(2025-05-27) 586 | 587 | **重要修复:** 588 | 589 | - 修复词云生成时包含群成员@提及 ID 的问题 590 | - 在 `segment_text` 函数中添加正则表达式过滤,自动移除@用户提及内容 591 | - 确保词云统计结果更加准确和美观,不再出现如 "@6emasvii" 等用户 ID 592 | 593 | #### **v1.3.3**(2025-05-23) 594 | 595 | **新功能与改进:** 596 | 597 | - 新增 `timezone` 配置项,允许用户为插件任务自定义时区 598 | - 新增 `custom_mask_path` 配置项,允许用户指定自定义图片作为词云形状蒙版 599 | - 新增 `include_bot_messages` 配置项,允许用户选择是否将机器人自身发送的消息计入词云统计 600 | 601 | #### **v1.3.2**(2025-05-12) 602 | 603 | **优化与修复:** 604 | 605 | - 确保每日词云和排行榜统计准确反映当天数据 606 | - 修复 `/wc force_daily` 指令 `no attribute 'data_dir'` 的问题 607 | - 修复每日词云可能无法正常生成的问题 608 | - 解决排行榜 SQL 查询和消息构建中的问题 609 | - 统一排行榜输出样式,修复会话 ID 格式错误 610 | - 新增用户统计方法,提升灵活性 611 | 612 | #### **v1.3.1**(2025-05-11) 613 | 614 | **日志与线程改进:** 615 | 616 | - 标准化日志输出,便于问题排查 617 | - 解决线程重载警告,提升稳定性 618 | 619 | #### **v1.3.0**(2025-05-10) 620 | 621 | **性能与安全提升:** 622 | 623 | - 修复定时任务重复问题,优化资源管理 624 | - 增强线程安全性,改进词云生成过程 625 | - 完善日志记录,优化性能 626 | 627 | #### v1.2.1(2025-05-09) 628 | 629 | **关键词与文档更新:** 630 | 631 | - 添加自然语言关键词处理,提高命令稳定性 632 | - 完善文档,添加更多使用说明 633 | 634 | #### **v1.2.0**(2025-05-08) 635 | 636 | **配置逻辑调整:** 637 | 638 | - 修改群聊启用逻辑,更新配置文件提示 639 | 640 | #### **v1.1.2**(2025-05-08) 641 | 642 | **线程安全修复:** 643 | 644 | - 解决非主线程生成词云时的 `RuntimeError` 645 | 646 | #### **v1.1.1**(2025-05-08) 647 | 648 | **会话与日志优化:** 649 | 650 | - 修复会话 ID 处理逻辑,优化日志输出 651 | 652 | #### **v1.1.0**(2025-05-08) 653 | 654 | **功能扩展:** 655 | 656 | - 插件更名为 "CloudRank",新增用户活跃度排行榜功能 657 | 658 | #### **v1.0.0**(2025-05-08) 659 | 660 | **初始发布:** 661 | 662 | - 发布基础词云生成功能,支持多种视觉定制和配置管理 663 | 664 | ## ◆ 许可证 665 | 666 | 本插件采用 [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.gnu.org/licenses/agpl-3.0.html) 许可证 667 | 668 | ## ◆ 致谢 669 | 670 | 本项目基于或参考了以下开源项目: 671 | 672 | - [AstrBot](https://github.com/AstrBotDevs/AstrBot) - 提供强大的聊天机器人平台支持 673 | - [LXGW WenKai](https://github.com/lxgw/LxgwWenKai) - 霞鹜文楷字体项目,提供了美观的开源中文字体 674 | -------------------------------------------------------------------------------- /_conf_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "auto_generate_enabled": { 3 | "type": "bool", 4 | "description": "是否启用自动生成词云", 5 | "hint": "定时执行,根据cron表达式配置的时间生成词云", 6 | "default": true 7 | }, 8 | "auto_generate_cron": { 9 | "type": "string", 10 | "description": "自动生成词云的cron表达式", 11 | "hint": "使用标准cron格式(分 时 日 月 周),默认每天晚上8点执行", 12 | "default": "0 20 * * *" 13 | }, 14 | "timezone": { 15 | "type": "string", 16 | "description": "自定义插件使用的时区 (IANA时区名称)", 17 | "hint": "请输入有效的IANA时区名称,例如:Asia/Shanghai, Europe/London, America/New_York, UTC,您可以参考维基百科的列表 (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) 或通过 Python 的 pytz.all_timezones 获取,这将用于确定每日生成词云和cron表达式的本地时间,", 18 | "default": "Asia/Shanghai" 19 | }, 20 | "daily_generate_enabled": { 21 | "type": "bool", 22 | "description": "是否启用每日词云生成", 23 | "hint": "每天固定时间生成当天的聊天词云", 24 | "default": true 25 | }, 26 | "daily_generate_time": { 27 | "type": "string", 28 | "description": "每日词云生成时间", 29 | "hint": "格式为 HH:MM,如:23:30 表示每天晚上11点30分", 30 | "default": "23:30" 31 | }, 32 | "daily_summary_title": { 33 | "type": "string", 34 | "description": "每日词云标题模板", 35 | "hint": "支持变量:{date}=日期,{group_name}=群名称", 36 | "default": "{date} {group_name} 今日词云" 37 | }, 38 | "enabled_group_list": { 39 | "type": "string", 40 | "description": "启用今日词云功能的群聊列表", 41 | "hint": "以逗号分隔的群号列表,如:123456789,987654321,仅在此处填写的群号才会启用词云功能,留空则默认所有群都不启用,", 42 | "default": "" 43 | }, 44 | "history_days": { 45 | "type": "int", 46 | "description": "统计历史消息的天数", 47 | "hint": "手动生成词云时默认统计的天数", 48 | "default": 7 49 | }, 50 | "max_word_count": { 51 | "type": "int", 52 | "description": "词云中最大显示词数", 53 | "hint": "建议在50-200之间", 54 | "default": 100 55 | }, 56 | "min_word_length": { 57 | "type": "int", 58 | "description": "最小词长度", 59 | "hint": "小于此长度的词会被忽略,建议为2", 60 | "default": 2 61 | }, 62 | "min_word_frequency": { 63 | "type": "int", 64 | "description": "最小词频", 65 | "hint": "出现次数低于此值的词将被过滤,以优化词云视觉效果,设为1则不过滤", 66 | "default": 1 67 | }, 68 | "background_color": { 69 | "type": "string", 70 | "description": "词云背景颜色", 71 | "hint": "可使用颜色名称或十六进制值,如:white、black、#FFFFFF、#000000", 72 | "default": "white" 73 | }, 74 | "colormap": { 75 | "type": "string", 76 | "description": "词云配色方案", 77 | "hint": "影响词云中词语的颜色", 78 | "default": "viridis", 79 | "options": ["viridis", "plasma", "inferno", "magma", "cividis", "rainbow", "jet", "turbo", "cool", "hot"] 80 | }, 81 | "font_path": { 82 | "type": "string", 83 | "description": "字体路径", 84 | "hint": "可使用相对路径或绝对路径,留空使用默认字体,", 85 | "default": "" 86 | }, 87 | "stop_words_file": { 88 | "type": "string", 89 | "description": "停用词文件路径", 90 | "hint": "可使用相对路径或绝对路径,留空使用默认停用词,", 91 | "default": "stop_words.txt" 92 | }, 93 | "include_bot_messages": { 94 | "type": "bool", 95 | "description": "是否将机器人自身的消息计入词云统计", 96 | "hint": "开启后,机器人自己发送的消息也会被用于生成词云,默认为关闭,", 97 | "default": false 98 | }, 99 | "shape": { 100 | "type": "string", 101 | "description": "词云形状", 102 | "hint": "决定词云的整体形状,如果设置了自定义蒙版路径,则此选项无效,", 103 | "default": "rectangle", 104 | "options": ["rectangle", "circle", "diamond", "triangle_up"] 105 | }, 106 | "custom_mask_path": { 107 | "type": "string", 108 | "description": "自定义蒙版图片路径", 109 | "hint": "提供一个图片文件的路径作为词云的形状蒙版,图片中白色区域将被忽略,非白色区域将用于绘制词语,如果设置了此路径,则预设的'形状'选项将无效,支持相对路径(相对于插件数据目录下的resources/images/子目录)或绝对路径,", 110 | "default": "" 111 | }, 112 | "min_font_size": { 113 | "type": "int", 114 | "description": "词云最小字体大小", 115 | "hint": "低频词语的最小字体大小,建议设置为8-15之间", 116 | "default": 8 117 | }, 118 | "max_font_size": { 119 | "type": "int", 120 | "description": "词云最大字体大小", 121 | "hint": "高频词语的最大字体大小,建议设置为150-300之间", 122 | "default": 170 123 | }, 124 | "show_user_ranking": { 125 | "type": "bool", 126 | "description": "是否在每日词云中显示用户活跃度排行", 127 | "hint": "开启后,每日词云生成时会同时显示当天发言最活跃的用户排行榜", 128 | "default": true 129 | }, 130 | "ranking_user_count": { 131 | "type": "int", 132 | "description": "用户排行榜显示的人数", 133 | "hint": "设置排行榜显示的用户数量,建议5-10之间", 134 | "default": 5 135 | }, 136 | "ranking_medals": { 137 | "type": "string", 138 | "description": "排行榜奖牌表情", 139 | "hint": "用逗号分隔的表情符号,例如:🥇,🥈,🥉,🏅,🏅 前三名会使用前三个表情", 140 | "default": "🥇,🥈,🥉,🏅,🏅" 141 | }, 142 | "debug_mode": { 143 | "type": "bool", 144 | "description": "启用详细调试日志", 145 | "hint": "开启后会在控制台输出非常详细的调度器和任务执行日志,用于问题排查,请仅在需要时开启,", 146 | "default": false, 147 | "obvious_hint": false 148 | } 149 | } -------------------------------------------------------------------------------- /constant.py: -------------------------------------------------------------------------------- 1 | """ 2 | CloudRank插件常量定义 3 | """ 4 | 5 | import os 6 | from pathlib import Path 7 | 8 | # 插件信息 9 | PLUGIN_NAME = "cloudrank" 10 | PLUGIN_AUTHOR = "GEMILUXVII" 11 | PLUGIN_DESC = "词云与排名插件 (CloudRank) 是一个文本可视化工具,能将聊天记录关键词以词云形式展现,并显示用户活跃度排行榜,支持定时或手动生成" 12 | PLUGIN_VERSION = "2.0.1" 13 | PLUGIN_REPO = "https://github.com/GEMILUXVII/astrbot_plugin_cloudrank" 14 | 15 | # 路径常量 16 | PLUGIN_DIR = Path(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | # DATA_DIR通过StarTools.get_data_dir动态获取 19 | # 这里只是定义一个占位变量,真正的目录会在初始化时设置 20 | # 正确的数据目录应该是:data/plugin_data/cloudrank 21 | DATA_DIR = None # 由主模块初始化 22 | 23 | # 词云生成常量 24 | DEFAULT_WIDTH = 800 25 | DEFAULT_HEIGHT = 400 26 | DEFAULT_MAX_WORDS = 200 27 | DEFAULT_BACKGROUND_COLOR = "white" 28 | DEFAULT_COLORMAP = "viridis" 29 | DEFAULT_MIN_WORD_LENGTH = 2 30 | 31 | # 命令常量 32 | CMD_GENERATE = "wordcloud" 33 | CMD_GROUP = "wc" 34 | CMD_CONFIG = "config" 35 | CMD_HELP = "help" 36 | 37 | # 自然语言关键词 - 用于触发命令的关键词 38 | # 格式: {"command": ["关键词1", "关键词2", ...]} 39 | NATURAL_KEYWORDS = { 40 | "today": ["今日词云", "获取今日词云", "查看今日词云", "生成今日词云"], 41 | "wordcloud": ["生成词云", "查看词云", "最近词云", "历史词云"], 42 | "help": ["词云帮助", "词云功能", "词云说明", "词云指令"], 43 | } 44 | 45 | # 默认停用词列表 46 | DEFAULT_STOPWORDS = [ 47 | "的", 48 | "了", 49 | "在", 50 | "是", 51 | "我", 52 | "有", 53 | "和", 54 | "就", 55 | "不", 56 | "人", 57 | "都", 58 | "一", 59 | "一个", 60 | "上", 61 | "也", 62 | "很", 63 | "到", 64 | "说", 65 | "要", 66 | "去", 67 | "你", 68 | "会", 69 | "着", 70 | "没有", 71 | "看", 72 | "好", 73 | "自己", 74 | "这", 75 | "the", 76 | "and", 77 | "to", 78 | "of", 79 | "a", 80 | "is", 81 | "in", 82 | "it", 83 | "that", 84 | "for", 85 | "on", 86 | "with", 87 | "as", 88 | "be", 89 | "at", 90 | "this", 91 | "have", 92 | "from", 93 | "by", 94 | "was", 95 | "are", 96 | "or", 97 | "an", 98 | "I", 99 | "but", 100 | "not", 101 | "you", 102 | "he", 103 | "they", 104 | "she", 105 | "we", 106 | ] 107 | -------------------------------------------------------------------------------- /fonts/LXGWWenKai-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GEMILUXVII/astrbot_plugin_cloudrank/34388ef1a7b241c26e6eff121af6750a4113c12b/fonts/LXGWWenKai-Regular.ttf -------------------------------------------------------------------------------- /fonts/OFL.txt: -------------------------------------------------------------------------------- 1 | Copyright 2021-2025 LXGW (https://github.com/lxgw/LxgwWenKai) 2 | Copyright 2020 The Klee Project Authors (https://github.com/fontworks-fonts/Klee) 3 | 4 | This Font Software is licensed under the SIL Open Font License, Version 1.1. 5 | This license is copied below, and is also available with a FAQ at: 6 | https://openfontlicense.org 7 | 8 | 9 | ----------------------------------------------------------- 10 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 11 | ----------------------------------------------------------- 12 | 13 | PREAMBLE 14 | The goals of the Open Font License (OFL) are to stimulate worldwide 15 | development of collaborative font projects, to support the font creation 16 | efforts of academic and linguistic communities, and to provide a free and 17 | open framework in which fonts may be shared and improved in partnership 18 | with others. 19 | 20 | The OFL allows the licensed fonts to be used, studied, modified and 21 | redistributed freely as long as they are not sold by themselves. The 22 | fonts, including any derivative works, can be bundled, embedded, 23 | redistributed and/or sold with any software provided that any reserved 24 | names are not used by derivative works. The fonts and derivatives, 25 | however, cannot be released under any other type of license. The 26 | requirement for fonts to remain under this license does not apply 27 | to any document created using the fonts or their derivatives. 28 | 29 | DEFINITIONS 30 | "Font Software" refers to the set of files released by the Copyright 31 | Holder(s) under this license and clearly marked as such. This may 32 | include source files, build scripts and documentation. 33 | 34 | "Reserved Font Name" refers to any names specified as such after the 35 | copyright statement(s). 36 | 37 | "Original Version" refers to the collection of Font Software components as 38 | distributed by the Copyright Holder(s). 39 | 40 | "Modified Version" refers to any derivative made by adding to, deleting, 41 | or substituting -- in part or in whole -- any of the components of the 42 | Original Version, by changing formats or by porting the Font Software to a 43 | new environment. 44 | 45 | "Author" refers to any designer, engineer, programmer, technical 46 | writer or other person who contributed to the Font Software. 47 | 48 | PERMISSION & CONDITIONS 49 | Permission is hereby granted, free of charge, to any person obtaining 50 | a copy of the Font Software, to use, study, copy, merge, embed, modify, 51 | redistribute, and sell modified and unmodified copies of the Font 52 | Software, subject to the following conditions: 53 | 54 | 1) Neither the Font Software nor any of its individual components, 55 | in Original or Modified Versions, may be sold by itself. 56 | 57 | 2) Original or Modified Versions of the Font Software may be bundled, 58 | redistributed and/or sold with any software, provided that each copy 59 | contains the above copyright notice and this license. These can be 60 | included either as stand-alone text files, human-readable headers or 61 | in the appropriate machine-readable metadata fields within text or 62 | binary files as long as those fields can be easily viewed by the user. 63 | 64 | 3) No Modified Version of the Font Software may use the Reserved Font 65 | Name(s) unless explicit written permission is granted by the corresponding 66 | Copyright Holder. This restriction only applies to the primary font name as 67 | presented to the users. 68 | 69 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font 70 | Software shall not be used to promote, endorse or advertise any 71 | Modified Version, except to acknowledge the contribution(s) of the 72 | Copyright Holder(s) and the Author(s) or with their explicit written 73 | permission. 74 | 75 | 5) The Font Software, modified or unmodified, in part or in whole, 76 | must be distributed entirely under this license, and must not be 77 | distributed under any other license. The requirement for fonts to 78 | remain under this license does not apply to any document created 79 | using the Font Software. 80 | 81 | TERMINATION 82 | This license becomes null and void if any of the above conditions are 83 | not met. 84 | 85 | DISCLAIMER 86 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 87 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF 88 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 89 | OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE 90 | COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 91 | INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 92 | DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 93 | FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM 94 | OTHER DEALINGS IN THE FONT SOFTWARE. 95 | -------------------------------------------------------------------------------- /metadata.yaml: -------------------------------------------------------------------------------- 1 | name: cloudrank 2 | desc: 词云与排名插件(CloudRank)是一个文本可视化工具,能将聊天记录关键词以词云形式展现,并显示用户活跃度排行榜,支持定时或手动生成 3 | version: v2.0.1 4 | author: GEMILUXVII 5 | repo: https://github.com/GEMILUXVII/astrbot_plugin_cloudrank 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | wordcloud>=1.9.4 2 | matplotlib==3.10.0 3 | jieba==0.42.1 4 | croniter==1.3.8 5 | numpy>=1.23.2 6 | pillow>=9.4.0 7 | pytz -------------------------------------------------------------------------------- /stop_words.txt: -------------------------------------------------------------------------------- 1 | # 中文常用停用词 2 | 的 3 | 了 4 | 在 5 | 和 6 | 是 7 | 就 8 | 也 9 | 还 10 | 有 11 | 我 12 | 你 13 | 他 14 | 她 15 | 它 16 | 我们 17 | 你们 18 | 他们 19 | 她们 20 | 它们 21 | 这个 22 | 那个 23 | 一些 24 | 什么 25 | 怎么 26 | 为什么 27 | 因为 28 | 所以 29 | 但是 30 | 然而 31 | 而且 32 | 并且 33 | 或者 34 | 以及 35 | 于是 36 | 一种 37 | 一个 38 | 一样 39 | 一下 40 | 一般 41 | 依然 42 | 必须 43 | 比如 44 | 得到 45 | 的确 46 | 否则 47 | 另外 48 | 目前 49 | 那么 50 | 那些 51 | 如果 52 | 如何 53 | 日前 54 | 时候 55 | 虽然 56 | 通过 57 | 同时 58 | 往往 59 | 为何 60 | 为了 61 | 问题 62 | 也许 63 | 一定 64 | 以后 65 | 因此 66 | 由于 67 | 与 68 | 则 69 | 这样 70 | 这里 71 | 这种 72 | 作为 73 | 74 | # 中文语气词和感叹词 75 | 阿 76 | 啊 77 | 哎 78 | 哎呀 79 | 哎哟 80 | 唉 81 | 吧 82 | 呃 83 | 嗯 84 | 哈 85 | 哈哈 86 | 呵 87 | 嘿 88 | 哼 89 | 哼唷 90 | 呼哧 91 | 哗 92 | 哦 93 | 喔唷 94 | 啪达 95 | 呸 96 | 啥 97 | 哇 98 | 喂 99 | 嗡嗡 100 | 唔 101 | 唔呼 102 | 咦 103 | 哉 104 | 吱 105 | 着呢 106 | 107 | # 中文代词和指示词 108 | 俺 109 | 俺们 110 | 本 111 | 此 112 | 此间 113 | 此外 114 | 该 115 | 个 116 | 各 117 | 各个 118 | 各位 119 | 己 120 | 某 121 | 某个 122 | 某些 123 | 哪 124 | 那 125 | 那边 126 | 那儿 127 | 哪个 128 | 那会儿 129 | 那里 130 | 那么些 131 | 那么样 132 | 那时 133 | 那些 134 | 那样 135 | 旁人 136 | 人家 137 | 谁 138 | 谁知 139 | 他人 140 | 它们 141 | 她们 142 | 咱 143 | 咱们 144 | 者 145 | 这 146 | 这边 147 | 这儿 148 | 这会儿 149 | 这就是说 150 | 这么 151 | 这么点儿 152 | 这么些 153 | 这么样 154 | 这时 155 | 这些 156 | 诸位 157 | 自 158 | 自从 159 | 自各儿 160 | 自个儿 161 | 自己 162 | 自家 163 | 自身 164 | 165 | # 中文连词和介词 166 | 按 167 | 按照 168 | 被 169 | 比起 170 | 比如说 171 | 并 172 | 不比 173 | 不成 174 | 不单 175 | 不但 176 | 不独 177 | 不管 178 | 不光 179 | 不过 180 | 不仅 181 | 不拘 182 | 不论 183 | 不怕 184 | 不然 185 | 不如 186 | 不特 187 | 不惟 188 | 不问 189 | 不只 190 | 朝 191 | 朝着 192 | 趁 193 | 趁着 194 | 乘 195 | 冲 196 | 除 197 | 除此之外 198 | 除非 199 | 除了 200 | 从 201 | 从而 202 | 打 203 | 待 204 | 当 205 | 当着 206 | 到 207 | 得 208 | 等 209 | 等等 210 | 地 211 | 第 212 | 对 213 | 对于 214 | 多 215 | 而 216 | 而外 217 | 而言 218 | 而已 219 | 尔后 220 | 反过来 221 | 反过来说 222 | 反之 223 | 非但 224 | 非徒 225 | 根据 226 | 跟 227 | 故 228 | 故此 229 | 固然 230 | 关于 231 | 管 232 | 归 233 | 果然 234 | 果真 235 | 过 236 | 何 237 | 何处 238 | 何况 239 | 何时 240 | 乎 241 | 还是 242 | 换句话说 243 | 换言之 244 | 或 245 | 或是 246 | 既 247 | 既然 248 | 及 249 | 及其 250 | 及至 251 | 即 252 | 即便 253 | 即或 254 | 即令 255 | 即若 256 | 即使 257 | 几 258 | 几时 259 | 加之 260 | 假如 261 | 假若 262 | 假使 263 | 鉴于 264 | 将 265 | 较 266 | 较之 267 | 叫 268 | 接着 269 | 结果 270 | 借 271 | 紧接着 272 | 进而 273 | 尽 274 | 尽管 275 | 尽管如此 276 | 据 277 | 据此 278 | 据实而言 279 | 据悉 280 | 据我所知 281 | 据说 282 | 举凡 283 | 可 284 | 可见 285 | 可是 286 | 可以 287 | 况且 288 | 来 289 | 来着 290 | 离 291 | 例如 292 | 连 293 | 两样 294 | 临 295 | 另 296 | 另一方面 297 | 论 298 | 每 299 | 每当 300 | 们 301 | 莫若 302 | 乃 303 | 乃至 304 | 能 305 | 您 306 | 宁 307 | 宁可 308 | 宁肯 309 | 宁愿 310 | 凭 311 | 凭借 312 | 其 313 | 其次 314 | 其二 315 | 其他 316 | 其它 317 | 其一 318 | 其余 319 | 其中 320 | 起 321 | 起见 322 | 岂但 323 | 岂止 324 | 恰恰相反 325 | 前后 326 | 前者 327 | 且 328 | 然后 329 | 然则 330 | 让 331 | 任 332 | 任何 333 | 任凭 334 | 如 335 | 如此 336 | 如其 337 | 如若 338 | 如上所述 339 | 若 340 | 若非 341 | 若是 342 | 尚且 343 | 设若 344 | 设使 345 | 甚而 346 | 甚么 347 | 甚至 348 | 省得 349 | 什么样 350 | 是的 351 | 首先 352 | 顺 353 | 顺着 354 | 俟 355 | 虽说 356 | 虽则 357 | 随 358 | 随着 359 | 所 360 | 腾 361 | 替 362 | 同 363 | 同样 364 | 万一 365 | 往 366 | 望 367 | 为 368 | 为着 369 | 以便 370 | 以免 371 | 以前 372 | 以至 373 | 以至于 374 | 以致 375 | 抑或 376 | 矣 377 | 用 378 | 由 379 | 由此可见 380 | 有的 381 | 有些 382 | 有关 383 | 与此同时 384 | 与否 385 | 与其 386 | 越是 387 | 云云 388 | 再 389 | 再其次 390 | 再则 391 | 再说 392 | 在下 393 | 在于 394 | 怎 395 | 怎么办 396 | 怎么样 397 | 怎样 398 | 咋 399 | 照 400 | 照着 401 | 之 402 | 之类 403 | 之所以 404 | 之一 405 | 之前 406 | 之后 407 | 之中 408 | 止 409 | 只 410 | 只不过 411 | 只限 412 | 只要 413 | 只有 414 | 至 415 | 至于 416 | 着 417 | 自各儿 418 | 综上所述 419 | 总而言之 420 | 总之 421 | 总的说来 422 | 纵 423 | 纵令 424 | 纵然 425 | 纵使 426 | 遵照 427 | 遵循 428 | 依照 429 | 按照 430 | 431 | # 中文常用短语 432 | 也就是说 433 | 换句话说 434 | 总的来说 435 | 一般而言 436 | 实际上 437 | 事实上 438 | 例如说 439 | 等等等等 440 | 另外的话 441 | 与此同时 442 | 443 | # 网络常用词汇 444 | 哈哈哈 445 | 呵呵 446 | 嗯嗯 447 | 额 448 | 呃呃 449 | emmm 450 | 诶 451 | 咦咦 452 | 哇哈哈 453 | 啦啦啦 454 | 噢噢 455 | 嗷嗷 456 | 咯咯 457 | 嘻嘻 458 | 吼吼 459 | 哼哼 460 | 461 | # 数字和符号相关 462 | 一 463 | 二 464 | 三 465 | 四 466 | 五 467 | 六 468 | 七 469 | 八 470 | 九 471 | 十 472 | 零 473 | 百 474 | 千 475 | 万 476 | 亿 477 | 478 | # English Stop Words 479 | a 480 | able 481 | about 482 | above 483 | across 484 | after 485 | all 486 | almost 487 | also 488 | am 489 | among 490 | an 491 | and 492 | any 493 | are 494 | as 495 | at 496 | be 497 | because 498 | been 499 | but 500 | by 501 | can 502 | cannot 503 | could 504 | dear 505 | did 506 | do 507 | does 508 | either 509 | else 510 | ever 511 | every 512 | for 513 | from 514 | get 515 | got 516 | had 517 | has 518 | have 519 | he 520 | her 521 | hers 522 | him 523 | his 524 | how 525 | however 526 | i 527 | if 528 | in 529 | into 530 | is 531 | it 532 | its 533 | just 534 | least 535 | let 536 | like 537 | likely 538 | may 539 | me 540 | might 541 | most 542 | must 543 | my 544 | neither 545 | no 546 | nor 547 | not 548 | of 549 | off 550 | often 551 | on 552 | only 553 | or 554 | other 555 | our 556 | own 557 | rather 558 | said 559 | say 560 | says 561 | she 562 | should 563 | since 564 | so 565 | some 566 | than 567 | that 568 | the 569 | their 570 | them 571 | then 572 | there 573 | these 574 | they 575 | this 576 | tis 577 | to 578 | too 579 | twas 580 | us 581 | wants 582 | was 583 | we 584 | were 585 | what 586 | when 587 | where 588 | which 589 | while 590 | who 591 | whom 592 | why 593 | will 594 | with 595 | would 596 | yet 597 | you 598 | your 599 | 600 | # 常用标点和符号 (如果分词器会产生) 601 | , 602 | 。 603 | ! 604 | ? 605 | ; 606 | : 607 | " 608 | " 609 | ' 610 | ' 611 | ( 612 | ) 613 | 【 614 | 】 615 | 《 616 | 》 617 | 、 618 | … 619 | — 620 | ~ 621 | · 622 | @ 623 | # 624 | % 625 | & 626 | * 627 | + 628 | - 629 | = 630 | | 631 | \ 632 | / 633 | < 634 | > 635 | ^ 636 | _ 637 | ` 638 | { 639 | } 640 | [ 641 | ] 642 | 643 | # 网络用语和表情符号文字 644 | 哈 645 | 呵 646 | 哦 647 | 啊 648 | 嗯 649 | 额 650 | 诶 651 | 咦 652 | 哇 653 | 噢 654 | 嗷 655 | 咯 656 | 嘻 657 | 吼 658 | 哼 659 | 嘿 660 | 喔 661 | 唔 662 | 嘘 663 | 嗬 664 | 咳 665 | 啧 666 | 唷 667 | 咿 668 | 呀 669 | 吖 670 | 唉 671 | 嗨 672 | 嗯哼 673 | 啊哈 674 | 呃呃 675 | 呵呵 676 | 嗯嗯 677 | 咦咦 678 | 哇哈 679 | 噢噢 680 | 嗷嗷 681 | 咯咯 682 | 嘻嘻 683 | 吼吼 684 | 哼哼 685 | 686 | # 常见无意义词汇 687 | 东西 688 | 事情 689 | 情况 690 | 方面 691 | 地方 692 | 时间 693 | 时候 694 | 样子 695 | 这样 696 | 那样 697 | 怎样 698 | 这种 699 | 那种 700 | 哪种 701 | 如此 702 | 这么 703 | 那么 704 | 多么 705 | 怎么 706 | 为什么 707 | 什么时候 708 | 什么地方 709 | 什么事情 710 | 什么东西 711 | 什么样子 712 | 这个样子 713 | 那个样子 714 | 什么样的 715 | 这样的 716 | 那样的 717 | 怎样的 718 | 这种的 719 | 那种的 720 | 如此的 721 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | 词云插件工具函数 3 | """ 4 | 5 | import os 6 | import time 7 | import datetime 8 | from pathlib import Path 9 | from typing import List, Optional, Tuple, Set 10 | 11 | import jieba 12 | from astrbot.api import logger 13 | from astrbot.api.star import StarTools 14 | 15 | from .constant import DATA_DIR, DEFAULT_STOPWORDS 16 | 17 | 18 | def ensure_directory(path: Path) -> None: 19 | """确保目录存在""" 20 | if not path.exists(): 21 | path.mkdir(parents=True, exist_ok=True) 22 | logger.info(f"创建目录: {path}") 23 | 24 | 25 | def get_current_timestamp() -> int: 26 | """获取当前时间戳""" 27 | return int(time.time()) 28 | 29 | 30 | def format_timestamp(timestamp: int) -> str: 31 | """格式化时间戳为可读字符串""" 32 | return datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S") 33 | 34 | 35 | def format_date(timestamp: Optional[int] = None) -> str: 36 | """格式化时间戳为日期字符串""" 37 | if timestamp is None: 38 | timestamp = get_current_timestamp() 39 | return datetime.datetime.fromtimestamp(timestamp).strftime("%Y年%m月%d日") 40 | 41 | 42 | def parse_time_str(time_str: str) -> Tuple[int, int]: 43 | """ 44 | 解析时间字符串为小时和分钟 45 | 格式为 HH:MM,例如 23:30 46 | 47 | Args: 48 | time_str: 时间字符串,如 "23:30" 49 | 50 | Returns: 51 | 小时和分钟的元组,如 (23, 30) 52 | 53 | Raises: 54 | ValueError: 如果时间格式无效 55 | """ 56 | try: 57 | hour, minute = time_str.split(":") 58 | hour = int(hour.strip()) 59 | minute = int(minute.strip()) 60 | 61 | if not (0 <= hour < 24 and 0 <= minute < 60): 62 | raise ValueError(f"无效的时间值: {hour}:{minute}") 63 | 64 | return hour, minute 65 | except Exception as e: 66 | logger.error(f"解析时间字符串失败 '{time_str}': {e}") 67 | # 默认返回晚上11:30 68 | return 23, 30 69 | 70 | 71 | def time_str_to_cron(time_str: str) -> str: 72 | """ 73 | 将时间字符串转换为cron表达式 74 | 75 | Args: 76 | time_str: 格式为HH:MM的时间字符串,如 "23:30" 77 | 78 | Returns: 79 | cron表达式,如 "30 23 * * *" 80 | """ 81 | try: 82 | # 去除可能的空白字符 83 | time_str = time_str.strip() 84 | 85 | # 验证时间格式 86 | if ":" not in time_str: 87 | logger.error(f"时间格式错误 '{time_str}': 缺少冒号分隔符") 88 | return "0 0 * * *" # 默认午夜执行 89 | 90 | # 分割小时和分钟 91 | try: 92 | hour_str, minute_str = time_str.split(":") 93 | hour = int(hour_str.strip()) 94 | minute = int(minute_str.strip()) 95 | 96 | # 验证小时和分钟范围 97 | if not (0 <= hour < 24): 98 | logger.error(f"小时值超出范围: {hour}") 99 | hour = 0 # 修正为有效值 100 | 101 | if not (0 <= minute < 60): 102 | logger.error(f"分钟值超出范围: {minute}") 103 | minute = 0 # 修正为有效值 104 | 105 | except ValueError: 106 | logger.error(f"无法解析时间字符串: '{time_str}'") 107 | return "0 0 * * *" # 默认午夜执行 108 | 109 | # 检查时区问题 - 中国是UTC+8,如果系统可能在内部使用UTC时间 110 | import time 111 | 112 | timezone_offset = -time.timezone // 3600 # 获取本地时区偏移(小时) 113 | logger.info( 114 | f"系统时区信息: UTC{'+' if timezone_offset >= 0 else ''}{timezone_offset}" 115 | ) 116 | 117 | # 如果是UTC时区而不是本地时区,需调整 118 | if timezone_offset != 0: 119 | logger.info( 120 | f"检测到时区差异,将调整时间从本地时间 {hour:02d}:{minute:02d} 到cron时间" 121 | ) 122 | 123 | # 构建cron表达式 - 标准cron格式为:分 时 日 月 周 124 | # 我们直接使用本地时间,不进行时区转换,让croniter基于本地时间处理 125 | cron_expression = f"{minute} {hour} * * *" 126 | logger.info(f"时间字符串 '{time_str}' 已转换为cron表达式: '{cron_expression}'") 127 | 128 | # 验证cron表达式格式 129 | try: 130 | from croniter import croniter 131 | 132 | if not croniter.is_valid(cron_expression): 133 | logger.error(f"生成的cron表达式无效: '{cron_expression}'") 134 | return "0 0 * * *" # 默认午夜执行 135 | 136 | # 附加检查:计算下一个执行时间,确保表达式可以正确工作 137 | import datetime 138 | 139 | base = datetime.datetime.now() 140 | cron = croniter(cron_expression, base) 141 | next_run = cron.get_next(datetime.datetime) 142 | 143 | # 输出下次执行的本地时间,方便验证 144 | local_next_run = next_run 145 | logger.info( 146 | f"使用cron表达式 '{cron_expression}' 计算的下次执行时间: {local_next_run.strftime('%Y-%m-%d %H:%M:%S')} (本地时间)" 147 | ) 148 | 149 | except Exception as croniter_error: 150 | logger.error(f"cron表达式验证失败: {croniter_error}") 151 | return "0 0 * * *" # 默认午夜执行 152 | 153 | return cron_expression 154 | except Exception as e: 155 | logger.error(f"转换时间字符串到cron表达式失败 '{time_str}': {e}") 156 | import traceback 157 | 158 | logger.error(f"转换错误详情: {traceback.format_exc()}") 159 | return "0 0 * * *" # 默认午夜执行 160 | 161 | 162 | def parse_group_list(group_list_str: str) -> Set[str]: 163 | """ 164 | 解析群列表字符串为群号集合 165 | 166 | Args: 167 | group_list_str: 以逗号分隔的群号字符串,如 "123456789,987654321" 168 | 169 | Returns: 170 | 群号的集合 171 | """ 172 | if not group_list_str or not group_list_str.strip(): 173 | return set() 174 | 175 | # 分割并去除空白 176 | groups = set() 177 | for group_id in group_list_str.split(","): 178 | group_id = group_id.strip() 179 | if group_id: 180 | groups.add(group_id) 181 | 182 | return groups 183 | 184 | 185 | def is_group_enabled(group_id: str, enabled_groups: Set[str]) -> bool: 186 | """ 187 | 检查群是否启用词云功能 188 | 189 | Args: 190 | group_id: 群ID 191 | enabled_groups: 启用词云的群集合,空集合表示全部启用 192 | 193 | Returns: 194 | 群是否启用词云功能 195 | """ 196 | # 输入类型验证,确保group_id是字符串 197 | if not isinstance(group_id, str): 198 | try: 199 | group_id = str(group_id) 200 | except: 201 | # 如果转换失败,默认不启用 202 | logger.warning(f"群ID类型错误: {type(group_id)},无法判断群聊是否启用") 203 | return False 204 | 205 | # 如果启用列表为空,表示没有群被特别指定启用,因此默认不启用此群 206 | if not enabled_groups: 207 | logger.debug(f"启用群列表为空,群 {group_id} 未在指定启用列表中,默认不启用。") 208 | return False 209 | 210 | # 否则,检查是否在启用列表中 211 | result = group_id in enabled_groups 212 | logger.debug(f"群 {group_id} {'在' if result else '不在'}启用列表中") 213 | return result 214 | 215 | 216 | def get_day_start_end_timestamps() -> Tuple[int, int]: 217 | """ 218 | 获取今天的开始和结束时间戳 219 | 220 | Returns: 221 | (开始时间戳, 结束时间戳)的元组 222 | """ 223 | now = datetime.datetime.now() 224 | start_of_day = datetime.datetime(now.year, now.month, now.day, 0, 0, 0) 225 | end_of_day = datetime.datetime(now.year, now.month, now.day, 23, 59, 59) 226 | 227 | return int(start_of_day.timestamp()), int(end_of_day.timestamp()) 228 | 229 | 230 | def get_image_path(session_id: str, timestamp: Optional[int] = None) -> Path: 231 | """获取词云图片存储路径""" 232 | if timestamp is None: 233 | timestamp = get_current_timestamp() 234 | 235 | # 使用会话ID作为目录名,避免不同会话的图片混淆 236 | safe_session_id = session_id.replace("/", "_").replace(":", "_") 237 | 238 | # 确保DATA_DIR已经初始化 239 | if DATA_DIR is None: 240 | # 尝试通过StarTools获取数据目录 241 | try: 242 | from .constant import PLUGIN_NAME 243 | 244 | data_dir = StarTools.get_data_dir(PLUGIN_NAME) 245 | logger.info(f"通过StarTools获取数据目录: {data_dir}") 246 | except Exception: 247 | # 使用临时目录作为备用 248 | from pathlib import Path 249 | 250 | data_dir = Path(__file__).parent / "temp_data" 251 | data_dir.mkdir(exist_ok=True) 252 | logger.warning( 253 | f"DATA_DIR未初始化且无法通过StarTools获取,使用临时目录存储图片: {data_dir}" 254 | ) 255 | else: 256 | data_dir = DATA_DIR 257 | 258 | # 在数据目录下创建images子目录 259 | images_dir = data_dir / "images" 260 | ensure_directory(images_dir) 261 | 262 | # 在images目录下为每个会话创建子目录 263 | session_dir = images_dir / safe_session_id 264 | ensure_directory(session_dir) 265 | 266 | # 生成图片路径 267 | image_path = session_dir / f"wordcloud_{format_timestamp(timestamp)}.png" 268 | return image_path 269 | 270 | 271 | def get_daily_image_path(session_id: str, date: Optional[datetime.date] = None) -> Path: 272 | """ 273 | 获取每日词云图片存储路径 274 | 275 | Args: 276 | session_id: 会话ID 277 | date: 日期,默认为今天 278 | 279 | Returns: 280 | 图片路径 281 | """ 282 | if date is None: 283 | date = datetime.date.today() 284 | 285 | # 使用会话ID作为目录名,避免不同会话的图片混淆 286 | safe_session_id = session_id.replace("/", "_").replace(":", "_") 287 | 288 | # 确保DATA_DIR已经初始化 289 | if DATA_DIR is None: 290 | # 尝试通过StarTools获取数据目录 291 | try: 292 | from .constant import PLUGIN_NAME 293 | 294 | data_dir = StarTools.get_data_dir(PLUGIN_NAME) 295 | logger.info(f"通过StarTools获取数据目录: {data_dir}") 296 | except Exception: 297 | # 使用临时目录作为备用 298 | from pathlib import Path 299 | 300 | data_dir = Path(__file__).parent / "temp_data" 301 | data_dir.mkdir(exist_ok=True) 302 | logger.warning( 303 | f"DATA_DIR未初始化且无法通过StarTools获取,使用临时目录存储图片: {data_dir}" 304 | ) 305 | else: 306 | data_dir = DATA_DIR 307 | 308 | # 在数据目录下创建daily_images子目录 309 | images_dir = data_dir / "daily_images" 310 | ensure_directory(images_dir) 311 | 312 | # 在daily_images目录下为每个会话创建子目录 313 | session_dir = images_dir / safe_session_id 314 | ensure_directory(session_dir) 315 | 316 | # 生成图片路径,使用日期作为文件名 317 | date_str = date.strftime("%Y-%m-%d") 318 | image_path = session_dir / f"daily_wordcloud_{date_str}.png" 319 | return image_path 320 | 321 | 322 | def segment_text( 323 | text: str, min_length: int = 2, stop_words: Optional[List[str]] = None 324 | ) -> List[str]: 325 | """ 326 | 使用jieba进行中文分词 327 | 328 | Args: 329 | text: 需要分词的文本 330 | min_length: 最小词长度 331 | stop_words: 停用词列表 332 | 333 | Returns: 334 | 分词后的词语列表 335 | """ 336 | if stop_words is None: 337 | stop_words = DEFAULT_STOPWORDS 338 | 339 | # 预处理文本:移除@用户提及和指令相关文字 340 | import re 341 | 342 | # 跳过指令和相关关键词 343 | text_lower = text.lower() 344 | if (text_lower.startswith(('#', '/')) or 345 | text_lower.startswith('wc') or 346 | text_lower.startswith('词云') or 347 | '生成词云' in text_lower or 348 | '/wordcloud' in text_lower): 349 | return [] 350 | 351 | # 移除@用户提及,支持多种格式:@username、@用户名、@123456等 352 | text = re.sub(r"@[^\s]+", "", text) 353 | # 移除多余的空白字符 354 | text = re.sub(r"\s+", " ", text).strip() 355 | 356 | # 使用jieba进行分词 357 | words = jieba.lcut(text) 358 | 359 | # 过滤停用词和短词 360 | filtered_words = [] 361 | for word in words: 362 | word_stripped = word.strip() 363 | if ( 364 | len(word_stripped) >= min_length 365 | and word not in stop_words 366 | and not word.isdigit() # 过滤纯数字 367 | and not all(c.isascii() and not c.isalpha() for c in word) # 过滤纯符号 368 | and not word_stripped.startswith("@") # 额外保护:过滤任何以@开头的词 369 | ): 370 | filtered_words.append(word) 371 | 372 | return filtered_words 373 | 374 | 375 | def load_stop_words(file_path: Optional[str] = None) -> List[str]: 376 | """ 377 | 从文件加载停用词 378 | 379 | Args: 380 | file_path: 停用词文件路径 381 | 382 | Returns: 383 | 停用词列表,如果文件不存在则返回默认停用词 384 | """ 385 | stop_words = DEFAULT_STOPWORDS.copy() 386 | if file_path and os.path.exists(file_path): 387 | try: 388 | with open(file_path, "r", encoding="utf-8") as f: 389 | for line in f: 390 | word = line.strip() 391 | if word and word not in stop_words: 392 | stop_words.append(word) 393 | except Exception as e: 394 | logger.error(f"加载停用词文件失败: {e}") 395 | 396 | return stop_words 397 | 398 | 399 | def extract_group_id_from_session(session_id: str) -> Optional[str]: 400 | """ 401 | 从会话ID中提取群号,支持多种格式 402 | 403 | Args: 404 | session_id: 会话ID,支持多种格式: 405 | - "aiocqhttp:GroupMessage:123456789" 406 | - "aiocqhttp:GroupMessage:0_123456789" 407 | - "qqofficial:group:123456789" 408 | - "aiocqhttp_group_123456789" 409 | - "123456789"(纯群号) 410 | - "wechatpadpro_group_123456789@chatroom" 411 | - 其他可能的格式 412 | 413 | Returns: 414 | 群号,如果不是群消息则返回None 415 | """ 416 | try: 417 | if not session_id: 418 | logger.warning("会话ID为空,无法提取群号") 419 | return None 420 | 421 | # 特别处理带 "@chatroom" 的格式(如 wechatpadpro_group_123456789@chatroom) 422 | import re 423 | 424 | match = re.match(r".+?_group_(\d+@chatroom)", session_id) 425 | if match: 426 | return match.group(1) 427 | 428 | # 处理会话ID为纯数字的情况 429 | if isinstance(session_id, str) and session_id.isdigit(): 430 | logger.debug(f"会话ID是纯数字,直接作为群号: {session_id}") 431 | return session_id 432 | 433 | # 处理 "platform_group_groupid" 格式 (例如 "aiocqhttp_group_142443871") 434 | if isinstance(session_id, str) and "_group_" in session_id: 435 | parts = session_id.split("_group_") 436 | if len(parts) == 2 and parts[1].isdigit(): 437 | logger.debug( 438 | f"从下划线分隔的会话ID '{session_id}' 提取到群号: {parts[1]}" 439 | ) 440 | return parts[1] 441 | 442 | # 处理复杂格式会话ID 443 | if isinstance(session_id, str) and ":" in session_id: 444 | parts = session_id.split(":") 445 | 446 | # 1. 标准三段式QQ格式: [平台]:[类型]:[群号] 447 | if len(parts) >= 3: 448 | # 检查中间部分是否包含群聊关键词 449 | middle_part = parts[1].lower() 450 | if ( 451 | "group" in middle_part 452 | or "群" in middle_part 453 | or "multi" in middle_part 454 | or "channel" in middle_part 455 | ): 456 | # 提取第三部分作为群号 457 | third_part = parts[2] 458 | 459 | # 处理可能包含前缀的情况,如 "0_123456789" 460 | if "_" in third_part: 461 | group_id = third_part.split("_")[-1] 462 | else: 463 | group_id = third_part 464 | 465 | if group_id.isdigit(): 466 | logger.debug( 467 | f"从三段式会话ID '{session_id}' 提取到群号: {group_id}" 468 | ) 469 | return group_id 470 | 471 | # 2. 从会话ID的各部分中寻找可能的群号,优先选择最后一部分 472 | for i in range(len(parts) - 1, -1, -1): # 从后向前查找 473 | part = parts[i] 474 | 475 | # 处理可能包含前缀的情况,如 "0_123456789" 476 | if "_" in part: 477 | potential_id = part.split("_")[-1] 478 | else: 479 | potential_id = part 480 | 481 | if potential_id.isdigit() and len(potential_id) >= 5: # 群号通常至少5位 482 | logger.debug( 483 | f"从会话ID '{session_id}' 的第{i + 1}部分提取到可能的群号: {potential_id}" 484 | ) 485 | return potential_id 486 | 487 | # 使用正则表达式提取会话ID中的任何数字序列 488 | import re 489 | 490 | # 匹配连续5位及以上的数字(可能的群号) 491 | matches = re.findall(r"\d{5,}", str(session_id)) 492 | if matches: 493 | # 找出最长的数字串 494 | longest_match = max(matches, key=len) 495 | logger.debug( 496 | f"使用正则表达式从会话ID '{session_id}' 提取到可能的群号: {longest_match}" 497 | ) 498 | return longest_match 499 | 500 | logger.warning(f"无法从会话ID '{session_id}' 提取群号") 501 | return None 502 | except Exception as e: 503 | logger.error(f"提取群号时出错: {e}") 504 | import traceback 505 | 506 | logger.error(f"提取群号错误详情: {traceback.format_exc()}") 507 | return None 508 | -------------------------------------------------------------------------------- /wordcloud_core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 词云生成器核心模块 3 | """ 4 | -------------------------------------------------------------------------------- /wordcloud_core/generator.py: -------------------------------------------------------------------------------- 1 | """ 2 | 词云生成核心模块 3 | """ 4 | 5 | import os 6 | import time 7 | import datetime 8 | import shutil 9 | import traceback 10 | import threading 11 | from typing import Dict, List, Optional, Tuple 12 | from collections import Counter 13 | from pathlib import Path 14 | 15 | import numpy as np 16 | from wordcloud import WordCloud 17 | import matplotlib.pyplot as plt 18 | import matplotlib 19 | 20 | matplotlib.use("Agg") # 使用非交互式后端 21 | from matplotlib.font_manager import FontProperties 22 | from PIL import Image, ImageDraw, ImageFont, ImageStat 23 | from astrbot.api import logger 24 | from astrbot.api.star import StarTools 25 | 26 | from ..utils import segment_text, load_stop_words, get_image_path 27 | from ..constant import ( 28 | DEFAULT_WIDTH, 29 | DEFAULT_HEIGHT, 30 | DEFAULT_MAX_WORDS, 31 | DEFAULT_BACKGROUND_COLOR, 32 | DEFAULT_COLORMAP, 33 | DEFAULT_MIN_WORD_LENGTH, 34 | PLUGIN_DIR, 35 | DATA_DIR, 36 | PLUGIN_NAME, 37 | ) 38 | 39 | # 全局锁,用于防止多个线程同时生成相同的词云 40 | _WORDCLOUD_LOCKS = {} 41 | _GLOBAL_LOCK = threading.Lock() 42 | 43 | 44 | # 确保当前词云生成请求唯一性的方法 45 | def _get_lock_for_key(key: str) -> threading.Lock: 46 | """ 47 | 获取指定键的锁对象,如果不存在则创建 48 | """ 49 | with _GLOBAL_LOCK: 50 | if key not in _WORDCLOUD_LOCKS: 51 | _WORDCLOUD_LOCKS[key] = threading.Lock() 52 | return _WORDCLOUD_LOCKS[key] 53 | 54 | 55 | class WordCloudGenerator: 56 | """词云生成器类""" 57 | 58 | def __init__( 59 | self, 60 | width: int = DEFAULT_WIDTH, 61 | height: int = DEFAULT_HEIGHT, 62 | max_words: int = DEFAULT_MAX_WORDS, 63 | background_color: str = DEFAULT_BACKGROUND_COLOR, 64 | colormap: str = DEFAULT_COLORMAP, 65 | font_path: Optional[str] = None, 66 | min_word_length: int = DEFAULT_MIN_WORD_LENGTH, 67 | stop_words_file: Optional[str] = None, 68 | shape: str = "rectangle", # 修改默认形状为矩形 69 | custom_mask_path: Optional[str] = None, # 添加自定义蒙版路径参数 70 | min_font_size: int = 8, # 添加最小字体大小参数 71 | max_font_size: int = 200, # 添加最大字体大小参数 72 | min_word_frequency: int = 1, # 新增:最小词频参数 73 | ): 74 | """ 75 | 初始化词云生成器 76 | 77 | Args: 78 | width: 词云图片宽度 79 | height: 词云图片高度 80 | max_words: 最大词数量 81 | background_color: 背景颜色 82 | colormap: 颜色映射 83 | font_path: 字体路径 84 | min_word_length: 最小词长度 85 | stop_words_file: 停用词文件路径 86 | shape: 词云形状,支持"circle"和"rectangle" 87 | custom_mask_path: 自定义蒙版图片路径 88 | min_font_size: 最小字体大小,用于低频词 89 | max_font_size: 最大字体大小,用于高频词 90 | """ 91 | self.width = width 92 | self.height = height 93 | self.max_words = max_words 94 | self.background_color = background_color 95 | self.colormap = colormap 96 | self.shape = shape 97 | self.custom_mask_path = custom_mask_path # 保存自定义蒙版路径 98 | self.min_font_size = min_font_size # 保存最小字体大小 99 | self.max_font_size = max_font_size # 保存最大字体大小 100 | self.min_word_frequency = min_word_frequency # 新增:保存最小词频 101 | 102 | # 获取数据目录,优先使用StarTools确保可用 103 | data_dir = None 104 | try: 105 | # 先尝试通过StarTools获取数据目录,这是最可靠的方式 106 | data_dir = StarTools.get_data_dir(PLUGIN_NAME) 107 | logger.info(f"通过StarTools获取数据目录: {data_dir}") 108 | except Exception as e: 109 | logger.warning(f"通过StarTools获取数据目录失败: {e}") 110 | # 尝试使用全局DATA_DIR 111 | if DATA_DIR is not None: 112 | data_dir = DATA_DIR 113 | logger.info(f"使用全局定义的DATA_DIR: {data_dir}") 114 | else: 115 | # 无法获取数据目录,使用临时目录作为备用 116 | temp_data_dir = PLUGIN_DIR / "temp_data" 117 | temp_data_dir.mkdir(exist_ok=True) 118 | data_dir = temp_data_dir 119 | logger.warning(f"无法获取标准数据目录,使用临时目录: {temp_data_dir}") 120 | 121 | # 确保资源目录存在 122 | resources_dir = data_dir / "resources" 123 | resources_dir.mkdir(exist_ok=True) 124 | fonts_dir = resources_dir / "fonts" 125 | fonts_dir.mkdir(exist_ok=True) 126 | 127 | # 设置默认字体路径,从插件目录复制到数据目录 128 | plugin_font_path = PLUGIN_DIR / "fonts" / "LXGWWenKai-Regular.ttf" 129 | data_font_path = fonts_dir / "LXGWWenKai-Regular.ttf" 130 | 131 | # 如果数据目录中没有字体,尝试从插件目录复制 132 | if not data_font_path.exists() and plugin_font_path.exists(): 133 | try: 134 | shutil.copy(plugin_font_path, data_font_path) 135 | logger.info(f"已将字体文件复制到数据目录: {data_font_path}") 136 | except Exception as e: 137 | logger.warning(f"复制字体文件失败: {e}") 138 | 139 | # 处理字体路径 140 | if font_path and os.path.exists(font_path): 141 | # 如果是相对路径,可能需要相对于插件目录解析 142 | if not os.path.isabs(font_path): 143 | abs_font_path = PLUGIN_DIR / font_path 144 | if os.path.exists(abs_font_path): 145 | self.font_path = str(abs_font_path) 146 | logger.info(f"使用插件目录中的字体: {self.font_path}") 147 | else: 148 | # 尝试相对于数据目录 149 | data_relative_font_path = ( 150 | data_dir / "resources" / "fonts" / os.path.basename(font_path) 151 | ) 152 | if os.path.exists(data_relative_font_path): 153 | self.font_path = str(data_relative_font_path) 154 | logger.info(f"使用数据目录中的字体: {self.font_path}") 155 | else: 156 | self.font_path = ( 157 | font_path # 使用原始路径,可能是相对于当前工作目录 158 | ) 159 | else: 160 | self.font_path = font_path # 使用绝对路径 161 | elif data_font_path.exists(): 162 | self.font_path = str(data_font_path) 163 | logger.info(f"使用数据目录中的字体: {self.font_path}") 164 | elif plugin_font_path.exists(): 165 | self.font_path = str(plugin_font_path) 166 | logger.info(f"使用插件目录中的字体: {self.font_path}") 167 | else: 168 | self.font_path = None 169 | logger.warning("未找到有效字体文件,将使用系统默认字体") 170 | 171 | # 处理停用词文件 172 | if stop_words_file: 173 | # 处理相对路径 174 | if not os.path.isabs(stop_words_file): 175 | # 尝试相对于插件目录解析 176 | plugin_stopwords_path = PLUGIN_DIR / stop_words_file 177 | data_stopwords_path = ( 178 | data_dir / "resources" / os.path.basename(stop_words_file) 179 | ) 180 | 181 | # 如果插件目录有文件但数据目录没有,复制过去 182 | if plugin_stopwords_path.exists() and not data_stopwords_path.exists(): 183 | try: 184 | shutil.copy(plugin_stopwords_path, data_stopwords_path) 185 | logger.info( 186 | f"已将停用词文件复制到数据目录: {data_stopwords_path}" 187 | ) 188 | # 使用数据目录中的文件 189 | stop_words_file = str(data_stopwords_path) 190 | except Exception as e: 191 | logger.warning(f"复制停用词文件失败: {e}") 192 | # 如果复制失败,使用插件目录中的文件 193 | if plugin_stopwords_path.exists(): 194 | stop_words_file = str(plugin_stopwords_path) 195 | elif data_stopwords_path.exists(): 196 | # 使用数据目录中的文件 197 | stop_words_file = str(data_stopwords_path) 198 | elif plugin_stopwords_path.exists(): 199 | # 使用插件目录中的文件 200 | stop_words_file = str(plugin_stopwords_path) 201 | 202 | self.min_word_length = min_word_length 203 | self.stop_words = load_stop_words(stop_words_file) 204 | 205 | # 保存临时使用的data_dir 206 | self._temp_data_dir = data_dir 207 | 208 | # 初始化词云生成器 209 | self._init_wordcloud() 210 | 211 | def _create_circle_mask(self): 212 | """ 213 | 创建圆形蒙版 214 | 215 | 在WordCloud中,蒙版的工作方式与直觉相反: 216 | - 值为0的区域允许绘制文字 217 | - 值为非0(如255)的区域不允许绘制文字 218 | 219 | 为生成在圆形内部的词云,我们需要: 220 | 1. 创建一个全为255的数组(默认不允许绘制) 221 | 2. 将圆形内部区域设置为0(允许绘制) 222 | 3. 确保圆形外部区域保持为255(不允许绘制) 223 | """ 224 | # 创建一个正方形画布,边长取width和height的最大值确保圆形不会被压缩 225 | size = max(self.width, self.height) 226 | 227 | # 创建一个全255数组作为基础蒙版(默认不允许绘制) 228 | mask = np.ones((size, size), dtype=np.uint8) * 255 229 | 230 | # 计算圆心和半径 231 | center = size // 2 232 | radius = int(center * 0.9) # 使用较小的半径避免太靠近边缘 233 | 234 | # 创建一个网格坐标系用于计算每个点到圆心的距离 235 | y, x = np.ogrid[:size, :size] 236 | 237 | # 计算每个点到圆心的距离的平方 238 | dist_from_center = (x - center) ** 2 + (y - center) ** 2 239 | 240 | # 圆内区域的布尔掩码(True表示在圆内) 241 | circle = dist_from_center <= radius**2 242 | 243 | # 将圆内区域设为0(允许绘制文字),其余区域保持为255(不绘制文字) 244 | mask[circle] = 0 245 | 246 | # 验证蒙版:记录圆内(值为0)像素的数量和总像素数 247 | circle_pixels = np.sum(mask == 0) 248 | total_pixels = size * size 249 | circle_ratio = circle_pixels / total_pixels 250 | 251 | logger.info(f"生成圆形蒙版: 大小={size}x{size}, 半径={radius}") 252 | logger.info( 253 | f"圆内像素数量: {circle_pixels}, 总像素数: {total_pixels}, 比例: {circle_ratio:.2f}" 254 | ) 255 | 256 | return mask 257 | 258 | def _create_diamond_mask(self, width: int, height: int): 259 | """创建菱形蒙版 (白色背景,黑色形状 - 词云绘制区域) 260 | 词云库通常期望蒙版中值为0的区域绘制文字,非0区域不绘制。 261 | 所以我们画黑色菱形在白色背景上,然后转换时黑色变0,白色变255. 262 | """ 263 | img = Image.new("L", (width, height), 255) # 白色背景 (不绘制区域) 264 | draw = ImageDraw.Draw(img) 265 | # 定义菱形的四个顶点 266 | # (width/2, 0), (width, height/2), (width/2, height), (0, height/2) 267 | points = [ 268 | (width // 2, 0), 269 | (width, height // 2), 270 | (width // 2, height - 1), # height-1 to avoid going out of bounds 271 | (0, height // 2), 272 | ] 273 | draw.polygon(points, fill=0) # 黑色菱形 (绘制区域) 274 | mask = np.array(img) 275 | logger.info(f"生成菱形蒙版: 大小={width}x{height}") 276 | return mask 277 | 278 | def _create_triangle_mask(self, width: int, height: int): 279 | """创建上三角形蒙版 (白色背景,黑色形状 - 词云绘制区域)""" 280 | img = Image.new("L", (width, height), 255) # 白色背景 281 | draw = ImageDraw.Draw(img) 282 | # 定义上三角形的三个顶点 283 | # (width/2, 0), (width, height), (0, height) 284 | points = [ 285 | (width // 2, 0), 286 | (width - 1, height - 1), # width-1, height-1 to avoid going out of bounds 287 | (0, height - 1), 288 | ] 289 | draw.polygon(points, fill=0) # 黑色三角形 290 | mask = np.array(img) 291 | logger.info(f"生成上三角形蒙版: 大小={width}x{height}") 292 | return mask 293 | 294 | def _create_cloud_mask(self, width: int, height: int): 295 | """创建底部平坦、顶部具有3-4个起伏圆弧的云朵形状蒙版""" 296 | img = Image.new("L", (width, height), 255) # 白色背景 (不绘制区域) 297 | draw = ImageDraw.Draw(img) 298 | 299 | y_bottom_line_factor = 0.7 # 平底从高度的70%处开始 300 | 301 | # 定义构成顶部起伏的椭圆参数 (cx, cy, rx, ry) 302 | # cx: 中心点X轴比例, cy: 中心点Y轴比例 303 | # rx: X轴半径比例, ry: Y轴半径比例 304 | 305 | ellipses_params = [ 306 | # 尝试构成3个主要、较宽的顶部凸起,以及一个更小的顶部点缀 307 | # 主要凸起1 (中间,最高) 308 | (0.50, 0.35, 0.25, 0.22), # Y中心0.35, Y半径0.22 -> 顶部在0.13, 底部在0.57 309 | # 主要凸起2 (左侧) 310 | ( 311 | 0.25, 312 | 0.45, 313 | 0.28, 314 | 0.20, 315 | ), # Y中心0.45, Y半径0.20 -> 顶部在0.25, 底部在0.65. X左边缘0.25-0.28 = -0.03 (会裁剪到0) 316 | # 主要凸起3 (右侧) 317 | ( 318 | 0.75, 319 | 0.45, 320 | 0.28, 321 | 0.20, 322 | ), # Y中心0.45, Y半径0.20 -> 顶部在0.25, 底部在0.65. X右边缘0.75+0.28 = 1.03 (会裁剪到1) 323 | # 额外的顶部小凸起,增加起伏感 (可选,如果上面3个效果够好,可以移除或调整) 324 | (0.50, 0.20, 0.12, 0.10), # 更小的,在中央凸起之上 325 | ] 326 | 327 | min_x_coord = width 328 | max_x_coord = 0 329 | 330 | for cx_f, cy_f, rx_f, ry_f in ellipses_params: 331 | center_x = int(width * cx_f) 332 | center_y = int(height * cy_f) 333 | radius_x = max(1, int(width * rx_f)) 334 | radius_y = max(1, int(height * ry_f)) 335 | 336 | bbox = ( 337 | center_x - radius_x, 338 | center_y - radius_y, 339 | center_x + radius_x, 340 | center_y + radius_y, 341 | ) 342 | draw.ellipse(bbox, fill=0) # 值为0的区域是词云绘制区 343 | 344 | min_x_coord = min(min_x_coord, center_x - radius_x) 345 | max_x_coord = max(max_x_coord, center_x + radius_x) 346 | 347 | # 确保 min_x 和 max_x 在图像范围内 348 | min_x_coord = max(0, min_x_coord) 349 | max_x_coord = min(width - 1, max_x_coord) 350 | 351 | # 绘制平坦的底部矩形 352 | if min_x_coord < max_x_coord: # 只有当云朵有宽度时才画底部 353 | flat_bottom_y_start = int(height * y_bottom_line_factor) 354 | 355 | fill_rect_bbox = ( 356 | min_x_coord, 357 | flat_bottom_y_start, 358 | max_x_coord, 359 | height - 1, # 延伸到图像底部 360 | ) 361 | draw.rectangle(fill_rect_bbox, fill=0) 362 | 363 | mask = np.array(img) 364 | logger.info( 365 | f"通过程序化绘制生成顶部起伏、底部平坦的云朵蒙版: 大小={width}x{height}" 366 | ) 367 | return mask 368 | 369 | def _init_wordcloud(self) -> None: 370 | """初始化词云生成器""" 371 | # 如果形状设置为圆形,创建圆形蒙版 372 | mask = None 373 | processed_custom_mask = False # 标记是否成功处理了自定义蒙版 374 | 375 | # 优先处理自定义蒙版 376 | if self.custom_mask_path: 377 | mask_image_path = None 378 | # 检查是绝对路径还是相对路径 379 | if os.path.isabs(self.custom_mask_path): 380 | mask_image_path = Path(self.custom_mask_path) 381 | else: 382 | # 相对路径,相对于插件数据目录下的 resources/images/ 383 | if self._temp_data_dir: # _temp_data_dir 在 __init__ 中设置 384 | mask_image_path = ( 385 | self._temp_data_dir 386 | / "resources" 387 | / "images" 388 | / self.custom_mask_path 389 | ) 390 | else: # Fallback if _temp_data_dir is somehow not set 391 | mask_image_path = ( 392 | PLUGIN_DIR / "resources" / "images" / self.custom_mask_path 393 | ) 394 | 395 | if ( 396 | mask_image_path 397 | and mask_image_path.exists() 398 | and mask_image_path.is_file() 399 | ): 400 | try: 401 | logger.info(f"加载自定义蒙版图片: {mask_image_path}") 402 | custom_mask_image = Image.open(mask_image_path) 403 | mask = np.array(custom_mask_image) 404 | # 确保蒙版是2D的 (灰度图或alpha通道) 405 | if mask.ndim == 3: 406 | # 如果是RGB(A),尝试取一个通道,比如红色,或者转换为灰度 407 | # WordCloud 通常期望蒙版是单通道的,非零表示区域,零表示空白 408 | # 但更常见的做法是白色(255)为忽略区域,黑色(0)或深色为绘制区域 409 | # 如果是RGBA,第四个通道是alpha,也可以用。这里我们简单转灰度 410 | # Image.open().convert('L') 之后再 np.array() 是更标准做法 411 | # 为了安全,重新用 convert('L') 加载 412 | custom_mask_image_gray = Image.open(mask_image_path).convert( 413 | "L" 414 | ) 415 | mask = np.array(custom_mask_image_gray) 416 | logger.info("自定义蒙版已转换为灰度图.") 417 | 418 | # 检查蒙版的值范围,wordcloud期望非绘制区域为255 419 | # 如果蒙版主要是深色背景,浅色图案,可能需要反转 420 | # 例如,如果用户提供的是黑底白云的图片,需要转换 421 | # 这里我们假设用户提供的图片是白底黑图案 (黑色区域为词云形状) 422 | # wordcloud库会将蒙版中值为0或接近0的区域视为绘制区域,255为忽略区域 423 | # 所以,如果我们的图片是黑形状白背景,Pillow读入后黑是0,白是255,正好符合预期 424 | logger.info( 425 | f"自定义蒙版加载成功,形状: {mask.shape}, 类型: {mask.dtype}" 426 | ) 427 | processed_custom_mask = True 428 | except Exception as e: 429 | logger.error( 430 | f"加载或处理自定义蒙版图片失败: {mask_image_path}, 错误: {e}" 431 | ) 432 | mask = None # 加载失败,不使用蒙版 433 | else: 434 | logger.warning( 435 | f"自定义蒙版图片路径无效或文件不存在: {self.custom_mask_path} (解析后路径: {mask_image_path})" 436 | ) 437 | 438 | # 如果没有成功处理自定义蒙版,再根据 shape 参数创建预设蒙版 439 | if not processed_custom_mask: 440 | if self.shape == "circle": 441 | mask = self._create_circle_mask() 442 | elif self.shape == "diamond": 443 | mask = self._create_diamond_mask(self.width, self.height) 444 | elif self.shape == "triangle_up": 445 | mask = self._create_triangle_mask(self.width, self.height) 446 | elif self.shape == "cloud": 447 | mask = self._create_cloud_mask(self.width, self.height) 448 | # 对于 "rectangle" 或其他未指定蒙版的形状,mask 保持为 None,词云将默认为矩形 # 词云参数 449 | wordcloud_params = { 450 | "width": self.width, 451 | "height": self.height, 452 | "max_words": self.max_words, 453 | "background_color": self.background_color, 454 | "colormap": self.colormap, 455 | "min_font_size": self.min_font_size, 456 | "max_font_size": self.max_font_size, 457 | "random_state": 42, 458 | "collocations": False, # 避免重复显示词组 459 | "normalize_plurals": False, 460 | "mask": mask, # 设置蒙版 461 | "prefer_horizontal": 0.9, # 调整为90%水平显示,增加布局多样性 462 | "repeat": False, # 不重复使用词以填满空间,避免文字出现在不应该出现的地方 463 | "mode": "RGB", # 使用RGB模式,避免与轮廓绘制时的通道不匹配问题 464 | } 465 | 466 | # 添加轮廓效果,增强形状 467 | if self.shape == "circle": 468 | # 由于通道不匹配问题,暂时禁用轮廓效果 469 | # wordcloud_params['contour_width'] = 1 470 | # wordcloud_params['contour_color'] = self.background_color 471 | pass 472 | 473 | # 如果提供了字体路径,则使用它 474 | if self.font_path and os.path.exists(self.font_path): 475 | wordcloud_params["font_path"] = self.font_path 476 | 477 | self.wordcloud = WordCloud(**wordcloud_params) 478 | 479 | def process_text(self, text: str) -> List[str]: 480 | """ 481 | 处理文本,进行分词和过滤 482 | 483 | Args: 484 | text: 输入文本 485 | 486 | Returns: 487 | 处理后的词语列表 488 | """ 489 | return segment_text(text, self.min_word_length, self.stop_words) 490 | 491 | def process_texts(self, texts: List[str]) -> Dict[str, int]: 492 | """ 493 | 处理多条文本,统计词频 494 | 495 | Args: 496 | texts: 文本列表 497 | 498 | Returns: 499 | 词频统计字典 500 | """ 501 | # 合并所有文本并分词 502 | all_words = [] 503 | for text in texts: 504 | words = self.process_text(text) 505 | all_words.extend(words) 506 | 507 | # 统计词频 508 | word_counts = Counter(all_words) 509 | return dict(word_counts) 510 | 511 | def _filter_by_frequency(self, word_counts: Dict[str, int]) -> Dict[str, int]: 512 | """ 513 | 根据最小词频过滤词汇。 514 | 515 | Args: 516 | word_counts: 原始词频统计。 517 | 518 | Returns: 519 | 过滤后的词频统计。 520 | """ 521 | if self.min_word_frequency <= 1: 522 | return word_counts # 如果最小词频设置为1或更小,则不进行过滤 523 | 524 | filtered_counts = { 525 | word: count 526 | for word, count in word_counts.items() 527 | if count >= self.min_word_frequency 528 | } 529 | logger.info( 530 | f"词频过滤 (min_freq={self.min_word_frequency}): 原始词汇 {len(word_counts)}个 -> 过滤后 {len(filtered_counts)}个" 531 | ) 532 | return filtered_counts 533 | 534 | def _add_timestamp_to_image( 535 | self, img: Image.Image, timestamp: Optional[int] = None 536 | ) -> Image.Image: 537 | """ 538 | 向图片添加时间戳水印 539 | 540 | Args: 541 | img: 原始图片 542 | timestamp: 时间戳,默认为当前时间 543 | 544 | Returns: 545 | 添加水印后的图片 546 | """ 547 | if timestamp is None: 548 | timestamp = int(time.time()) 549 | 550 | # 格式化时间戳 551 | time_str = f"生成时间: {datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')}" 552 | 553 | # 创建可绘制对象 554 | draw = ImageDraw.Draw(img, "RGBA") 555 | 556 | # 字体大小 557 | font_size = 14 558 | 559 | try: 560 | # 尝试加载自定义字体 561 | font = None 562 | try: 563 | if self.font_path and os.path.exists(self.font_path): 564 | # 尝试加载指定的字体 565 | try: 566 | font = ImageFont.truetype(self.font_path, font_size) 567 | except: 568 | # 如果加载失败,尝试使用默认字体 569 | font = ImageFont.load_default() 570 | logger.warning(f"加载指定字体失败: {self.font_path}") 571 | else: 572 | # 尝试从系统中查找可用的中文字体 573 | system_fonts = [ 574 | # Windows中文字体 575 | "C:/Windows/Fonts/simhei.ttf", # 黑体 576 | "C:/Windows/Fonts/simsun.ttc", # 宋体 577 | "C:/Windows/Fonts/simkai.ttf", # 楷体 578 | "C:/Windows/Fonts/msyh.ttc", # 微软雅黑 579 | # Linux中文字体 580 | "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf", 581 | "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc", 582 | # macOS中文字体 583 | "/System/Library/Fonts/PingFang.ttc", 584 | ] 585 | 586 | for font_path in system_fonts: 587 | try: 588 | if os.path.exists(font_path): 589 | font = ImageFont.truetype(font_path, font_size) 590 | logger.debug(f"成功加载系统字体: {font_path}") 591 | break 592 | except: 593 | continue 594 | except: 595 | font = ImageFont.load_default() 596 | except Exception as e: 597 | logger.warning(f"加载字体失败: {e},将使用默认字体") 598 | font = ImageFont.load_default() 599 | 600 | # 添加水印位置偏移量,确保文字放置位置合适 601 | margin = 10 602 | 603 | # 获取文本大小,用于定位 604 | try: 605 | if hasattr(font, "getbbox"): 606 | text_width, text_height = font.getbbox(time_str)[2:] 607 | else: 608 | text_width, text_height = font.getsize(time_str) 609 | except: 610 | # 如果无法获取文本大小,使用估计值 611 | text_width, text_height = len(time_str) * font_size // 2, font_size 612 | 613 | # 计算文字位置 - 左下角 614 | position = (margin, img.height - text_height - margin) 615 | 616 | # 检查背景颜色并确定文字颜色 617 | try: 618 | # 获取左下角区域的主要颜色 619 | sample_box = (0, img.height - text_height * 2, text_width * 2, img.height) 620 | sample_img = img.crop(sample_box) 621 | 622 | # 检查图像模式,如果有Alpha通道,转换为RGB 623 | if sample_img.mode == "RGBA": 624 | # 创建白色背景 625 | bg = Image.new("RGB", sample_img.size, (255, 255, 255)) 626 | # 复合Alpha通道 627 | sample_img = Image.alpha_composite( 628 | bg.convert("RGBA"), sample_img 629 | ).convert("RGB") 630 | 631 | # 计算平均亮度 632 | avg_rgb = ImageStat.Stat(sample_img).mean 633 | brightness = sum(avg_rgb) / len(avg_rgb) 634 | is_dark_bg = brightness < 128 635 | 636 | # 根据背景选择文字颜色 637 | text_color = (255, 255, 255) if is_dark_bg else (0, 0, 0) 638 | bg_color = (0, 0, 0, 180) if is_dark_bg else (255, 255, 255, 180) 639 | 640 | logger.debug( 641 | f"检测到{'深色' if is_dark_bg else '浅色'}背景,亮度值: {brightness:.1f}" 642 | ) 643 | 644 | except Exception as e: 645 | # 出错时使用默认设置 646 | logger.warning(f"检测背景颜色失败: {e},使用默认颜色") 647 | # 默认假设是深色背景 648 | text_color = (255, 255, 255) # 白色文字 649 | bg_color = (0, 0, 0, 180) # 半透明黑色背景 650 | 651 | # 使用半透明背景增加可读性 652 | bg_padding = 4 653 | bg_box = [ 654 | position[0] - bg_padding, 655 | position[1] - bg_padding, 656 | position[0] + text_width + bg_padding, 657 | position[1] + text_height + bg_padding, 658 | ] 659 | 660 | # 绘制半透明背景 661 | draw.rectangle(bg_box, fill=bg_color) 662 | 663 | # 绘制文字 664 | draw.text(position, time_str, fill=text_color, font=font) 665 | 666 | return img 667 | 668 | def generate_wordcloud( 669 | self, 670 | word_counts: Dict[str, int], 671 | session_id: str, 672 | timestamp: Optional[int] = None, 673 | title: Optional[str] = None, 674 | ) -> Tuple[str, Path]: 675 | """ 676 | 生成词云图片 677 | 678 | Args: 679 | word_counts: 词频统计 680 | session_id: 会话ID 681 | timestamp: 时间戳,为None则使用当前时间 682 | title: 词云标题 683 | 684 | Returns: 685 | 生成的图片路径(字符串), 路径对象 686 | """ 687 | if timestamp is None: 688 | timestamp = int(time.time()) 689 | 690 | if not word_counts: 691 | raise ValueError("无有效词频数据,无法生成词云") 692 | 693 | # 在生成词云前,根据配置的最小词频过滤词汇 694 | filtered_word_counts = self._filter_by_frequency(word_counts) 695 | 696 | if not filtered_word_counts: 697 | # 如果过滤后没有词了,可以抛出错误或者生成一个提示性的空图片 698 | # 这里我们选择抛出错误,因为通常这意味着数据不足或过滤条件太严格 699 | logger.warning("根据最小词频过滤后,没有足够的词汇来生成词云。") 700 | raise ValueError("过滤后无有效词频数据,无法生成词云") 701 | 702 | # 获取图片存储路径 703 | image_path = get_image_path(session_id, timestamp) 704 | 705 | # 创建锁的键名 706 | lock_key = f"wordcloud_{session_id}_{timestamp}" 707 | 708 | # 获取锁对象 709 | lock = _get_lock_for_key(lock_key) 710 | 711 | # 尝试获取锁 712 | if not lock.acquire(blocking=False): 713 | logger.warning( 714 | f"已有其他线程正在生成相同的词云 {session_id}_{timestamp},跳过本次生成" 715 | ) 716 | 717 | # 如果文件已存在,直接返回路径 718 | if image_path.exists(): 719 | logger.info(f"使用已存在的词云图片: {image_path}") 720 | return str(image_path), image_path 721 | 722 | # 等待一段时间看是否生成了 723 | try: 724 | wait_start = time.time() 725 | while time.time() - wait_start < 5.0: # 最多等待5秒 726 | time.sleep(0.5) 727 | if image_path.exists(): 728 | logger.info(f"等待后找到了词云图片: {image_path}") 729 | return str(image_path), image_path 730 | 731 | # 如果等待超时仍未生成,则抛出异常 732 | raise ValueError("等待词云生成超时,请稍后再试") 733 | except Exception as e: 734 | logger.error(f"等待词云生成时出错: {e}") 735 | raise ValueError("词云生成被其他任务占用,请稍后再试") 736 | 737 | try: 738 | # 生成词云 739 | self.wordcloud.generate_from_frequencies( 740 | filtered_word_counts 741 | ) # 使用过滤后的词频 742 | 743 | # 确保目录存在 744 | image_path.parent.mkdir(parents=True, exist_ok=True) 745 | 746 | # 先保存词云图像到临时文件,避免直接操作wordcloud对象导致维度不匹配 747 | temp_path = image_path.parent / f"temp_{image_path.name}" 748 | self.wordcloud.to_file(str(temp_path)) 749 | 750 | # 读取保存的图像 751 | wordcloud_img = np.array(Image.open(temp_path)) 752 | 753 | # 使用matplotlib创建带标题的完整图像 754 | fig_width, fig_height = 10, 6.5 755 | dpi = 150 756 | 757 | # 创建带有背景色的图表 758 | fig = plt.figure( 759 | figsize=(fig_width, fig_height), 760 | facecolor=self.background_color, 761 | dpi=dpi, 762 | ) 763 | plt.rcParams.update({"figure.autolayout": True}) 764 | ax = plt.axes() 765 | ax.set_facecolor(self.background_color) 766 | ax.set_position([0, 0, 1, 0.9]) # 为标题留出少量空间 767 | 768 | # 去除边框和刻度 769 | plt.axis("off") 770 | plt.box(False) 771 | plt.tight_layout(pad=0.1) # 减少内边距 772 | 773 | # 绘制词云图像 774 | plt.imshow(wordcloud_img, interpolation="bilinear") 775 | 776 | # 设置标题,使用对比色 777 | if title: 778 | # 选择与背景相反的颜色 779 | title_color = ( 780 | "white" if self._is_dark_color(self.background_color) else "black" 781 | ) 782 | 783 | logger.info( 784 | f"设置词云标题: {title}, 背景色: {self.background_color}, 标题颜色: {title_color}" 785 | ) 786 | 787 | # 设置中文标题字体 788 | if self.font_path and os.path.exists(self.font_path): 789 | try: 790 | font_prop = FontProperties(fname=self.font_path) 791 | plt.title( 792 | title, 793 | fontproperties=font_prop, 794 | fontsize=16, 795 | pad=10, 796 | color=title_color, 797 | ) 798 | except Exception as e: 799 | logger.warning(f"使用自定义字体设置标题失败: {e}") 800 | plt.title(title, fontsize=16, pad=10, color=title_color) 801 | else: 802 | plt.title(title, fontsize=16, pad=10, color=title_color) 803 | 804 | # 如果是深色背景,添加文字边框增强可读性 805 | if self._is_dark_color(self.background_color): 806 | try: 807 | # 将当前标题获取出来 808 | title_obj = ax.get_title() 809 | # 清除原标题 810 | ax.set_title("") 811 | # 重新设置带边框的标题 812 | plt.title( 813 | title, 814 | fontproperties=font_prop 815 | if "font_prop" in locals() 816 | else None, 817 | fontsize=16, 818 | pad=10, 819 | color=title_color, 820 | bbox=dict( 821 | facecolor=self.background_color, 822 | alpha=0.8, 823 | edgecolor="white", 824 | boxstyle="round,pad=0.5", 825 | ), 826 | ) 827 | except Exception as title_ex: 828 | logger.warning(f"设置标题边框失败: {title_ex}") 829 | # 恢复原标题 830 | if "title_obj" in locals(): 831 | ax.set_title(title_obj) 832 | 833 | # 保存图片 834 | plt.savefig( 835 | image_path, 836 | bbox_inches="tight", 837 | pad_inches=0.2, # 减少边距 838 | dpi=dpi, 839 | facecolor=self.background_color, 840 | ) 841 | plt.close(fig) 842 | 843 | # 删除临时文件 844 | try: 845 | if temp_path.exists(): 846 | os.remove(temp_path) 847 | except Exception as e: 848 | logger.warning(f"删除临时文件失败: {e}") 849 | 850 | # 添加时间戳水印 851 | img = Image.open(image_path) 852 | final_image = self._add_timestamp_to_image(img, timestamp) 853 | final_image.save(image_path) 854 | 855 | # 输出图片信息 856 | logger.info(f"词云图片已保存至: {image_path}") 857 | 858 | return str(image_path), image_path 859 | except Exception as e: 860 | logger.error(f"生成词云时出错: {e}") 861 | logger.error(traceback.format_exc()) 862 | raise 863 | finally: 864 | # 释放锁 865 | lock.release() 866 | 867 | def _is_dark_color(self, color_str: str) -> bool: 868 | """ 869 | 判断颜色是否为深色 870 | 871 | Args: 872 | color_str: 颜色字符串,可以是颜色名称或十六进制值 873 | 874 | Returns: 875 | 是否为深色 876 | """ 877 | # 处理常见颜色名称 878 | dark_color_names = [ 879 | "black", 880 | "darkblue", 881 | "darkgreen", 882 | "darkcyan", 883 | "darkred", 884 | "darkmagenta", 885 | "darkgray", 886 | "darkgrey", 887 | "navy", 888 | "green", 889 | "teal", 890 | "maroon", 891 | "purple", 892 | "indigo", 893 | "midnightblue", 894 | "darkslategray", 895 | "darkslategrey", 896 | "dimgray", 897 | "dimgrey", 898 | ] 899 | 900 | light_color_names = [ 901 | "white", 902 | "lightgray", 903 | "lightgrey", 904 | "whitesmoke", 905 | "snow", 906 | "ivory", 907 | "floralwhite", 908 | "linen", 909 | "cornsilk", 910 | "seashell", 911 | "lavenderblush", 912 | "papayawhip", 913 | "blanchedalmond", 914 | ] 915 | 916 | # 首先检查确定的颜色名称 917 | color_lower = color_str.lower() 918 | if color_lower in dark_color_names: 919 | logger.debug(f"颜色 {color_str} 在已知深色列表中") 920 | return True 921 | if color_lower in light_color_names: 922 | logger.debug(f"颜色 {color_str} 在已知浅色列表中") 923 | return False 924 | 925 | # 处理十六进制颜色值 926 | if color_str.startswith("#"): 927 | try: 928 | # 去掉#号并解析RGB值 929 | r, g, b = ( 930 | int(color_str[1:3], 16), 931 | int(color_str[3:5], 16), 932 | int(color_str[5:7], 16), 933 | ) 934 | # 计算亮度 (使用更精确的亮度计算公式) 935 | # 这个公式来自W3C标准:https://www.w3.org/TR/WCAG20-TECHS/G17.html 936 | brightness = (r * 299 + g * 587 + b * 114) / 1000 937 | is_dark = brightness < 128 938 | logger.debug( 939 | f"颜色 {color_str} 亮度值: {brightness:.1f}, 判定为{'深色' if is_dark else '浅色'}" 940 | ) 941 | return is_dark 942 | except Exception as e: 943 | logger.warning(f"解析十六进制颜色失败: {color_str}, {e}") 944 | return False # 解析失败,默认为浅色 945 | 946 | # 尝试使用matplotlib的颜色名称 947 | try: 948 | from matplotlib.colors import to_rgb 949 | 950 | rgb = to_rgb(color_str) 951 | r, g, b = int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255) 952 | brightness = (r * 299 + g * 587 + b * 114) / 1000 953 | is_dark = brightness < 128 954 | logger.debug( 955 | f"颜色名称 {color_str} 转换为RGB: {r},{g},{b}, 亮度值: {brightness:.1f}, 判定为{'深色' if is_dark else '浅色'}" 956 | ) 957 | return is_dark 958 | except Exception as e: 959 | logger.warning(f"解析颜色名称失败: {color_str}, {e}") 960 | return False # 解析失败,默认为浅色 961 | 962 | def _filter_word_frequencies(self, word_counts: Dict[str, int]) -> Dict[str, int]: 963 | """ 964 | 过滤词频,移除频率过低的词汇,让词云更加利落 965 | 966 | Args: 967 | word_counts: 原始词频统计 968 | 969 | Returns: 970 | 过滤后的词频统计 971 | """ 972 | if not word_counts: 973 | return word_counts 974 | 975 | # 计算词频统计信息 976 | frequencies = list(word_counts.values()) 977 | total_words = len(frequencies) 978 | max_freq = max(frequencies) 979 | 980 | # 动态计算最小频率阈值 981 | # 如果词汇总数很多,设置更严格的过滤条件 982 | if total_words > self.max_words * 3: 983 | # 词汇过多时,使用更严格的过滤 984 | min_freq_threshold = max(2, max_freq * 0.02) # 至少2次,或最高频的2% 985 | elif total_words > self.max_words * 2: 986 | # 词汇较多时,适中过滤 987 | min_freq_threshold = max(1, max_freq * 0.01) # 至少1次,或最高频的1% 988 | else: 989 | # 词汇不多时,轻度过滤 990 | min_freq_threshold = 1 991 | 992 | # 过滤低频词 993 | filtered_counts = { 994 | word: count 995 | for word, count in word_counts.items() 996 | if count >= min_freq_threshold 997 | } 998 | 999 | # 如果过滤后词汇仍然过多,取频率最高的词汇 1000 | if len(filtered_counts) > self.max_words: 1001 | # 按频率排序,取前max_words个 1002 | sorted_words = sorted( 1003 | filtered_counts.items(), key=lambda x: x[1], reverse=True 1004 | ) 1005 | filtered_counts = dict(sorted_words[: self.max_words]) 1006 | 1007 | logger.info( 1008 | f"词频过滤: 原始词汇{total_words}个 -> 过滤后{len(filtered_counts)}个,最小频率阈值: {min_freq_threshold}" 1009 | ) 1010 | 1011 | return filtered_counts 1012 | -------------------------------------------------------------------------------- /wordcloud_core/history_manager.py: -------------------------------------------------------------------------------- 1 | """ 2 | 聊天历史记录管理器 3 | """ 4 | import re 5 | import asyncio 6 | from typing import List, Dict, Any, Optional, Tuple 7 | import traceback 8 | 9 | from sqlalchemy import Column, Integer, String, Boolean, Index, select, func 10 | from sqlalchemy.orm import DeclarativeBase 11 | from sqlalchemy.ext.asyncio import AsyncSession 12 | 13 | from astrbot.api import logger 14 | from astrbot.api.event import AstrMessageEvent 15 | from astrbot.api.star import Context 16 | 17 | from ..utils import get_current_timestamp, get_day_start_end_timestamps 18 | 19 | 20 | class Base(DeclarativeBase): 21 | pass 22 | 23 | 24 | class MessageHistory(Base): 25 | """聊天消息历史记录模型""" 26 | __tablename__ = 'wordcloud_message_history' 27 | 28 | id = Column(Integer, primary_key=True, autoincrement=True) 29 | session_id = Column(String, nullable=False) 30 | sender_id = Column(String, nullable=False) 31 | sender_name = Column(String) 32 | message = Column(String, nullable=False) 33 | timestamp = Column(Integer, nullable=False) 34 | is_group = Column(Boolean, nullable=False) 35 | 36 | # 索引 37 | __table_args__ = ( 38 | Index('idx_wordcloud_session_id', 'session_id'), 39 | Index('idx_wordcloud_timestamp', 'timestamp'), 40 | Index('idx_wordcloud_session_timestamp', 'session_id', 'timestamp'), 41 | ) 42 | 43 | 44 | class HistoryManager: 45 | """聊天历史记录管理器类""" 46 | 47 | def __init__(self, context: Context): 48 | """ 49 | 初始化历史记录管理器 50 | 51 | Args: 52 | context: AstrBot上下文 53 | """ 54 | self.context = context 55 | self.db = self.context.get_db() 56 | 57 | # 初始化数据库 58 | asyncio.create_task(self._ensure_table()) 59 | 60 | async def _ensure_table(self) -> None: 61 | """确保数据库中有消息历史表""" 62 | try: 63 | # 使用异步session创建表 64 | async with self.db.get_db() as session: 65 | conn = await session.connection() 66 | await conn.run_sync(Base.metadata.create_all) 67 | logger.info("WordCloud历史消息表和索引创建成功或已存在") 68 | except Exception as e: 69 | logger.error(f"创建WordCloud历史消息表失败: {e}") 70 | logger.error(traceback.format_exc()) 71 | 72 | 73 | 74 | async def save_message(self, event: AstrMessageEvent) -> bool: 75 | """ 76 | 保存消息到历史记录 77 | 78 | Args: 79 | event: 消息事件 80 | 81 | Returns: 82 | 是否保存成功 83 | """ 84 | try: 85 | # 获取基本信息 86 | sender_id = event.get_sender_id() 87 | sender_name = event.get_sender_name() 88 | message = event.message_str if hasattr(event, "message_str") else None 89 | timestamp = get_current_timestamp() 90 | 91 | group_id_val = event.get_group_id() 92 | is_group = bool(group_id_val) 93 | 94 | # 构建会话ID 95 | session_id_to_save: str 96 | if group_id_val: # 群聊消息 97 | platform_name = event.get_platform_name() or "unknown_platform" 98 | session_id_to_save = f"{platform_name}_group_{group_id_val}" 99 | else: # 私聊消息 100 | session_id_to_save = event.unified_msg_origin 101 | 102 | # 处理空消息 103 | if message is None: 104 | try: 105 | # 尝试从消息链中提取文本 106 | if hasattr(event, "get_messages") and callable(getattr(event, "get_messages")): 107 | messages = event.get_messages() 108 | text_parts = [] 109 | for msg in messages: 110 | if hasattr(msg, "text") and msg.text: 111 | text_parts.append(msg.text) 112 | if text_parts: 113 | message = " ".join(text_parts) 114 | 115 | # 尝试从message_obj获取内容 116 | if not message and hasattr(event, "message_obj"): 117 | if hasattr(event.message_obj, "raw_message"): 118 | message = event.message_obj.raw_message 119 | elif hasattr(event.message_obj, "message"): 120 | message = str(event.message_obj.message) 121 | except Exception as e: 122 | logger.debug(f"尝试提取消息内容失败: {e}") 123 | 124 | if not message: 125 | logger.debug(f"跳过None消息: 会话ID={session_id_to_save}, 发送者={sender_name}") 126 | return False 127 | 128 | # 确保message是字符串并清理内容 129 | try: 130 | message = str(message) 131 | except: 132 | logger.debug(f"消息内容无法转换为字符串: {type(message)}") 133 | return False 134 | 135 | cleaned_message = await self._clean_message(message, sender_name) 136 | if not cleaned_message: 137 | logger.debug(f"跳过空消息: 会话ID={session_id_to_save}, 发送者={sender_name}") 138 | return True 139 | 140 | # 创建新的消息记录 141 | new_message = MessageHistory( 142 | session_id=session_id_to_save, 143 | sender_id=sender_id, 144 | sender_name=sender_name, 145 | message=cleaned_message, 146 | timestamp=timestamp, 147 | is_group=is_group 148 | ) 149 | 150 | # 保存到数据库 151 | async with self.db.get_db() as session: 152 | session.add(new_message) 153 | await session.commit() 154 | 155 | logger.debug(f"消息保存成功 - 会话ID: {session_id_to_save}, 时间戳: {timestamp}") 156 | return True 157 | 158 | except Exception as e: 159 | logger.error(f"保存消息到历史记录失败: {e}") 160 | return False 161 | 162 | async def get_history_messages( 163 | self, session_id: str, days: int = 7, limit: int = 1000 164 | ) -> List[Dict[str, Any]]: 165 | """ 166 | 获取指定会话的历史消息 167 | 168 | Args: 169 | session_id: 会话ID 170 | days: 获取最近几天的消息 171 | limit: 最大消息数量 172 | 173 | Returns: 174 | 历史消息列表 175 | """ 176 | try: 177 | # 计算起始时间戳 178 | current_time = get_current_timestamp() 179 | start_time = current_time - (days * 24 * 60 * 60) 180 | 181 | # 创建查询 182 | query = ( 183 | select(MessageHistory) 184 | .where(MessageHistory.session_id == session_id) 185 | .where(MessageHistory.timestamp >= start_time) 186 | .order_by(MessageHistory.timestamp.desc()) 187 | .limit(limit) 188 | ) 189 | 190 | async with self.db.get_db() as session: 191 | result = await session.execute(query) 192 | messages = result.scalars().all() 193 | 194 | # 转换为字典列表 195 | message_list = [ 196 | { 197 | "session_id": msg.session_id, 198 | "sender_id": msg.sender_id, 199 | "sender_name": msg.sender_name, 200 | "message": msg.message, 201 | "timestamp": msg.timestamp, 202 | "is_group": msg.is_group, 203 | } 204 | for msg in messages 205 | ] 206 | 207 | logger.debug( 208 | f"获取到{len(message_list)}条历史消息(会话ID: {session_id}, 天数: {days})" 209 | ) 210 | return message_list 211 | 212 | except Exception as e: 213 | logger.error(f"获取历史消息失败: {e}") 214 | return [] 215 | 216 | async def get_active_sessions(self, days: int = 7) -> List[str]: 217 | """ 218 | 获取有活动的会话ID列表 219 | 220 | Args: 221 | days: 最近几天有活动的会话 222 | 223 | Returns: 224 | 会话ID列表 225 | """ 226 | try: 227 | # 计算起始时间戳 228 | current_time = get_current_timestamp() 229 | start_time = current_time - (days * 24 * 60 * 60) 230 | 231 | # 创建查询 232 | query = ( 233 | select(func.distinct(MessageHistory.session_id)) 234 | .where(MessageHistory.timestamp >= start_time) 235 | ) 236 | 237 | async with self.db.get_db() as session: 238 | result = await session.execute(query) 239 | sessions = result.scalars().all() 240 | 241 | logger.info(f"获取到{len(sessions)}个活跃会话(天数: {days})") 242 | return sessions 243 | 244 | except Exception as e: 245 | logger.error(f"获取活跃会话失败: {e}") 246 | return [] 247 | 248 | async def get_message_texts( 249 | self, session_id: str, days: int = 7, limit: int = 1000 250 | ) -> List[str]: 251 | """ 252 | 获取指定会话的消息文本列表 253 | 254 | Args: 255 | session_id: 会话ID 256 | days: 获取最近几天的消息 257 | limit: 最大消息数量 258 | 259 | Returns: 260 | 消息文本列表,按时间顺序返回(旧的在前,新的在后) 261 | """ 262 | try: 263 | # 计算起始时间戳 264 | current_time = get_current_timestamp() 265 | start_time = current_time - (days * 24 * 60 * 60) 266 | 267 | # 创建查询 - 使用正序,使旧的消息在前 268 | query = ( 269 | select(MessageHistory.message) 270 | .where(MessageHistory.session_id == session_id) 271 | .where(MessageHistory.timestamp >= start_time) 272 | .order_by(MessageHistory.timestamp.asc()) 273 | .limit(limit) 274 | ) 275 | 276 | async with self.db.get_db() as session: 277 | result = await session.execute(query) 278 | messages = result.scalars().all() 279 | 280 | # 过滤掉空消息 281 | messages = [msg for msg in messages if msg and msg.strip()] 282 | 283 | logger.debug( 284 | f"获取到{len(messages)}条历史消息(会话ID: {session_id}, 天数: {days})" 285 | ) 286 | total_chars = sum(len(msg) for msg in messages) 287 | logger.debug(f"消息文本总长度: {total_chars} 字符") 288 | 289 | return messages 290 | 291 | except Exception as e: 292 | logger.error(f"获取消息文本失败: {e}") 293 | return [] 294 | 295 | async def get_todays_message_texts(self, session_id: str, limit: int = 1000) -> List[str]: 296 | """ 297 | 获取今天的消息文本列表 298 | 299 | Args: 300 | session_id: 会话ID 301 | limit: 最大消息数量限制 302 | 303 | Returns: 304 | 今天的消息文本列表 305 | """ 306 | try: 307 | # 获取今天的开始和结束时间戳 308 | start_timestamp, end_timestamp = get_day_start_end_timestamps() 309 | logger.info( 310 | f"获取今日消息 - 会话ID: {session_id}, 时间范围: {start_timestamp} 到 {end_timestamp}" 311 | ) 312 | 313 | # 创建查询 314 | query = ( 315 | select(MessageHistory.message) 316 | .where(MessageHistory.session_id == session_id) 317 | .where(MessageHistory.timestamp >= start_timestamp) 318 | .where(MessageHistory.timestamp <= end_timestamp) 319 | .order_by(MessageHistory.timestamp.asc()) 320 | .limit(limit) 321 | ) 322 | 323 | async with self.db.get_db() as session: 324 | result = await session.execute(query) 325 | messages = result.scalars().all() 326 | 327 | # 过滤掉空消息 328 | messages = [msg for msg in messages if msg and isinstance(msg, str) and msg.strip()] 329 | 330 | logger.info( 331 | f"今日消息获取成功 - 会话ID: {session_id}, 消息数量: {len(messages)}" 332 | ) 333 | return messages 334 | 335 | except Exception as e: 336 | logger.error(f"获取今日消息文本失败: {e}") 337 | return [] 338 | 339 | async def get_active_group_sessions(self, days: int = 1) -> List[str]: 340 | """ 341 | 获取有活动的群聊会话ID列表 342 | 343 | Args: 344 | days: 最近几天有活动的群聊 345 | 346 | Returns: 347 | 群聊会话ID列表 348 | """ 349 | try: 350 | # 计算起始时间戳 351 | current_time = get_current_timestamp() 352 | start_time = current_time - (days * 24 * 60 * 60) 353 | 354 | # 构建查询,只获取群聊会话 355 | query = ( 356 | select(func.distinct(MessageHistory.session_id)) 357 | .where( 358 | MessageHistory.timestamp >= start_time, 359 | MessageHistory.is_group == True 360 | ) 361 | ) 362 | 363 | try: 364 | async with self.db.get_db() as session: 365 | result = await session.execute(query) 366 | sessions = result.scalars().all() 367 | 368 | logger.info(f"获取到{len(sessions)}个活跃群聊会话(天数: {days})") 369 | return sessions 370 | except Exception as db_error: 371 | logger.error(f"获取活跃群聊会话数据库操作失败: {db_error}") 372 | return [] 373 | 374 | except Exception as e: 375 | logger.error(f"获取活跃群聊会话失败: {e}") 376 | return [] 377 | 378 | async def get_message_count_today(self, session_id: str) -> int: 379 | """ 380 | 获取今天的消息数量 381 | 382 | Args: 383 | session_id: 会话ID 384 | 385 | Returns: 386 | 消息数量 387 | """ 388 | try: 389 | # 获取今天的开始和结束时间戳 390 | start_timestamp, end_timestamp = get_day_start_end_timestamps() 391 | 392 | # 构建查询 393 | query = ( 394 | select(func.count().label('count')) 395 | .select_from(MessageHistory) 396 | .where( 397 | MessageHistory.session_id == session_id, 398 | MessageHistory.timestamp >= start_timestamp, 399 | MessageHistory.timestamp <= end_timestamp 400 | ) 401 | ) 402 | 403 | # 执行查询 404 | async with self.db.get_db() as session: 405 | result = await session.execute(query) 406 | count = result.scalar() 407 | 408 | return count or 0 409 | except Exception as e: 410 | logger.error(f"获取今天的消息数量失败: {e}") 411 | return 0 412 | 413 | async def get_message_count_for_days(self, session_id: str, days: int) -> int: 414 | """ 415 | 获取指定会话在过去N天内的总消息数量。 416 | 417 | Args: 418 | session_id: 会话ID 419 | days: 获取最近几天的消息 420 | 421 | Returns: 422 | 指定天数内的消息总数量 423 | """ 424 | try: 425 | # 计算起始时间戳 426 | current_time = get_current_timestamp() 427 | start_time = current_time - (days * 24 * 60 * 60) 428 | 429 | # 创建查询 430 | query = ( 431 | select(func.count()) 432 | .where(MessageHistory.session_id == session_id) 433 | .where(MessageHistory.timestamp >= start_time) 434 | ) 435 | 436 | async with self.db.get_db() as session: 437 | result = await session.execute(query) 438 | count = result.scalar() 439 | if count: 440 | logger.debug( 441 | f"获取到 {days} 天内消息总数: {count} (会话ID: {session_id})" 442 | ) 443 | return count 444 | return 0 445 | 446 | except Exception as e: 447 | logger.error(f"获取 {days} 天内消息总数失败: {e}, session_id={session_id}") 448 | return 0 449 | 450 | async def get_active_users( 451 | self, session_id: str, days: int = 1, limit: int = 10 452 | ) -> List[Tuple[str, str, int]]: 453 | """ 454 | 获取指定会话中最活跃的用户(按发言数量排序) 455 | 456 | Args: 457 | session_id: 会话ID 458 | days: 统计最近几天的数据,默认为1天(今天) 459 | limit: 返回的用户数量限制 460 | 461 | Returns: 462 | 用户活跃度排名列表,格式为 [(user_id, user_name, message_count), ...] 463 | """ 464 | try: 465 | # 计算时间范围 466 | if days == 1: 467 | # 使用当天时间范围 468 | start_timestamp, end_timestamp = get_day_start_end_timestamps() 469 | else: 470 | # 计算过去days天的时间范围 471 | current_time = get_current_timestamp() 472 | start_timestamp = current_time - (days * 24 * 60 * 60) 473 | end_timestamp = current_time 474 | 475 | # 创建查询 476 | query = ( 477 | select( 478 | MessageHistory.sender_id, 479 | MessageHistory.sender_name, 480 | func.count().label('message_count') 481 | ) 482 | .where(MessageHistory.session_id == session_id) 483 | .where(MessageHistory.timestamp >= start_timestamp) 484 | .where(MessageHistory.timestamp <= end_timestamp) 485 | .group_by(MessageHistory.sender_id, MessageHistory.sender_name) 486 | .order_by(func.count().desc()) 487 | .limit(limit) 488 | ) 489 | 490 | async with self.db.get_db() as session: 491 | result = await session.execute(query) 492 | rows = result.all() 493 | 494 | # 转换为所需格式 495 | user_list = [ 496 | ( 497 | row.sender_id, 498 | row.sender_name or row.sender_id, # 如果没有名称,使用ID 499 | row.message_count 500 | ) 501 | for row in rows 502 | ] 503 | 504 | return user_list 505 | except Exception as e: 506 | logger.error(f"获取活跃用户失败: {e}, session_id={session_id}, days={days}") 507 | return [] 508 | 509 | async def get_total_users_today(self, session_id: str) -> int: 510 | """ 511 | 获取今天在指定会话中发言的总用户数 512 | 513 | Args: 514 | session_id: 会话ID 515 | 516 | Returns: 517 | 用户数量 518 | """ 519 | try: 520 | # 获取今天的开始和结束时间戳 521 | start_timestamp, end_timestamp = get_day_start_end_timestamps() 522 | 523 | # 创建查询 524 | query = ( 525 | select(func.count(func.distinct(MessageHistory.sender_id))) 526 | .where(MessageHistory.session_id == session_id) 527 | .where(MessageHistory.timestamp >= start_timestamp) 528 | .where(MessageHistory.timestamp <= end_timestamp) 529 | ) 530 | 531 | async with self.db.get_db() as session: 532 | result = await session.execute(query) 533 | count = result.scalar() 534 | return count or 0 535 | 536 | except Exception as e: 537 | logger.error(f"获取今天的用户数量失败: {e}") 538 | return 0 539 | 540 | async def get_total_users_for_date_range( 541 | self, session_id: str, start_timestamp: int, end_timestamp: int 542 | ) -> int: 543 | """ 544 | 获取指定会话在指定时间戳范围内的总独立用户数。 545 | 546 | Args: 547 | session_id: 会话ID 548 | start_timestamp: 开始时间戳 549 | end_timestamp: 结束时间戳 550 | 551 | Returns: 552 | 独立用户总数 553 | """ 554 | try: 555 | # 创建查询 556 | query = ( 557 | select(func.count(func.distinct(MessageHistory.sender_id))) 558 | .where(MessageHistory.session_id == session_id) 559 | .where(MessageHistory.timestamp >= start_timestamp) 560 | .where(MessageHistory.timestamp <= end_timestamp) 561 | ) 562 | 563 | async with self.db.get_db() as session: 564 | result = await session.execute(query) 565 | count = result.scalar() 566 | if count: 567 | logger.debug( 568 | f"会话 {session_id} 在 {start_timestamp}-{end_timestamp} 范围内总用户数: {count}" 569 | ) 570 | return count 571 | return 0 572 | 573 | except Exception as e: 574 | logger.error(f"获取指定日期范围总用户数失败 (会话 {session_id}): {e}") 575 | return 0 576 | 577 | async def get_active_users_for_date_range( 578 | self, session_id: str, start_timestamp: int, end_timestamp: int, limit: int = 10 579 | ) -> List[Tuple[str, str, int]]: 580 | """ 581 | 获取指定会话在指定时间戳范围内的活跃用户列表(按消息数量排序)。 582 | 583 | Args: 584 | session_id: 会话ID 585 | start_timestamp: 开始时间戳 586 | end_timestamp: 结束时间戳 587 | limit: 返回的用户数量上限 588 | 589 | Returns: 590 | 活跃用户列表,每个元素为 (sender_id, sender_name, message_count) 591 | """ 592 | try: 593 | # 创建查询 594 | query = ( 595 | select( 596 | MessageHistory.sender_id, 597 | MessageHistory.sender_name, 598 | func.count().label('message_count') 599 | ) 600 | .where(MessageHistory.session_id == session_id) 601 | .where(MessageHistory.timestamp >= start_timestamp) 602 | .where(MessageHistory.timestamp <= end_timestamp) 603 | .group_by(MessageHistory.sender_id, MessageHistory.sender_name) 604 | .order_by(func.count().desc()) 605 | .limit(limit) 606 | ) 607 | 608 | async with self.db.get_db() as session: 609 | result = await session.execute(query) 610 | rows = result.all() 611 | 612 | # 转换为所需格式 613 | active_users = [ 614 | ( 615 | row.sender_id, 616 | row.sender_name or row.sender_id, # 如果没有名称,使用ID 617 | row.message_count 618 | ) 619 | for row in rows 620 | ] 621 | 622 | logger.debug( 623 | f"会话 {session_id} 在 {start_timestamp}-{end_timestamp} 范围内获取到 {len(active_users)} 个活跃用户 (上限 {limit})" 624 | ) 625 | return active_users 626 | 627 | except Exception as e: 628 | logger.error(f"获取指定日期范围活跃用户失败 (会话 {session_id}): {e}") 629 | return [] 630 | 631 | async def extract_group_id_from_session(self, session_id: str) -> Optional[str]: 632 | """ 633 | 从会话ID提取群号 634 | 635 | Args: 636 | session_id: 会话ID 637 | 638 | Returns: 639 | 群号,如果不是群聊则返回None 640 | """ 641 | try: 642 | # 会话ID格式通常为 "platform:GroupMessage:group_id" 643 | parts = session_id.split(":") 644 | if len(parts) >= 3 and "GroupMessage" in parts[1]: 645 | return parts[2] 646 | return None 647 | except Exception as e: 648 | logger.error(f"从会话ID提取群号失败: {e}") 649 | return None 650 | 651 | async def get_messages_by_timestamp_range( 652 | self, 653 | session_id: str, 654 | start_timestamp: int, 655 | end_timestamp: int, 656 | limit: int = 1000, 657 | ) -> List[str]: 658 | """ 659 | 获取指定时间戳范围内的消息文本列表 660 | 661 | Args: 662 | session_id: 会话ID 663 | start_timestamp: 开始时间戳 664 | end_timestamp: 结束时间戳 665 | limit: 最大消息数量限制 666 | 667 | Returns: 668 | 指定时间范围内的消息文本列表 669 | """ 670 | try: 671 | logger.info( 672 | f"获取指定时间范围消息 - 会话ID: {session_id}, 时间范围: {start_timestamp} 到 {end_timestamp}" 673 | ) 674 | 675 | # 创建查询 676 | query = ( 677 | select(MessageHistory.message) 678 | .where(MessageHistory.session_id == session_id) 679 | .where(MessageHistory.timestamp >= start_timestamp) 680 | .where(MessageHistory.timestamp <= end_timestamp) 681 | .order_by(MessageHistory.timestamp.asc()) 682 | .limit(limit) 683 | ) 684 | 685 | async with self.db.get_db() as session: 686 | result = await session.execute(query) 687 | messages = result.scalars().all() 688 | 689 | # 过滤掉空消息 690 | messages = [msg for msg in messages if msg and isinstance(msg, str) and msg.strip()] 691 | 692 | logger.info( 693 | f"指定时间范围消息获取成功 - 会话ID: {session_id}, 消息数量: {len(messages)}" 694 | ) 695 | return messages 696 | 697 | except Exception as e: 698 | logger.error(f"获取指定时间范围消息文本失败: {e}") 699 | return [] 700 | 701 | async def _clean_message(self, message: str, sender_name: Optional[str] = None) -> str: 702 | """ 703 | 清理消息内容,移除不需要计入词云的元素 704 | 705 | Args: 706 | message: 原始消息 707 | sender_name: 发送者昵称,用于移除群聊中的@某人 708 | 709 | Returns: 710 | 清理后的消息 711 | """ 712 | # 移除指令和相关关键词 713 | message_lower = message.strip().lower() 714 | if (message_lower.startswith(('#', '/')) or 715 | message_lower.startswith('wc') or 716 | message_lower.startswith('词云') or 717 | '生成词云' in message_lower or 718 | '/wordcloud' in message_lower): 719 | return "" 720 | 721 | # 移除@某人的内容,包括可能的空格和换行 722 | # 匹配 @昵称(QQ号) 或 @昵称 723 | message = re.sub(r"@\s*\S+\s*\(\d+\)|@\s*\S+", "", message) 724 | 725 | # 移除URL 726 | message = re.sub(r"https?://[\w./?=&-]+", "", message) 727 | 728 | # 移除其他可能不需要的内容,例如CQ码 729 | message = re.sub(r"\[CQ:[^\]]+\]", "", message) 730 | 731 | # 移除各种标点符号和特殊字符,只保留文本和基本空格 732 | message = re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]+", " ", message).strip() 733 | 734 | return message 735 | 736 | async def close(self): 737 | """ 738 | 关闭历史管理器,释放资源 739 | """ 740 | logger.info("关闭历史管理器...") 741 | try: 742 | # 清理数据和缓存 743 | if hasattr(self, "word_data"): 744 | self.word_data = {} 745 | if hasattr(self, "cached_word_counts"): 746 | self.cached_word_counts = {} 747 | logger.info("历史数据缓存已清理") 748 | logger.info("历史管理器已成功关闭") 749 | except Exception as e: 750 | logger.error(f"关闭历史管理器时出错: {e}") 751 | logger.error(traceback.format_exc()) 752 | -------------------------------------------------------------------------------- /wordcloud_core/scheduler.py: -------------------------------------------------------------------------------- 1 | """ 2 | 词云插件的定时任务调度器 3 | """ 4 | 5 | import asyncio 6 | import threading 7 | import time 8 | import os 9 | import datetime 10 | from typing import Dict, Any, Optional 11 | import traceback 12 | import pytz 13 | 14 | from croniter import croniter 15 | import astrbot.api.message_components as Comp 16 | from astrbot.api.event import MessageChain 17 | from astrbot.api import logger 18 | 19 | 20 | # 使用全局变量跟踪调度器实例 21 | _SCHEDULER_INSTANCES = {} 22 | _SCHEDULER_LOCK = threading.Lock() 23 | 24 | 25 | class TaskScheduler: 26 | """ 27 | 定时任务调度器类,用于管理定时任务 28 | """ 29 | 30 | def __init__( 31 | self, 32 | context, 33 | main_loop: asyncio.AbstractEventLoop, 34 | debug_mode: bool = False, 35 | timezone: pytz.BaseTzInfo = pytz.utc, 36 | ): 37 | """ 38 | 初始化定时任务调度器 39 | 40 | Args: 41 | context: AstrBot上下文 42 | main_loop: 主事件循环的引用 43 | debug_mode: 是否启用调试模式 44 | timezone: 时区对象 45 | """ 46 | # 检查是否有同一个上下文的调度器实例 47 | global _SCHEDULER_INSTANCES 48 | 49 | with _SCHEDULER_LOCK: 50 | # 使用上下文的ID作为标识符 51 | context_id = id(context) 52 | 53 | if context_id in _SCHEDULER_INSTANCES: 54 | existing_scheduler = _SCHEDULER_INSTANCES[context_id] 55 | if existing_scheduler.running: 56 | logger.warning( 57 | f"已存在运行中的调度器实例(ID: {context_id}),正在复用该实例。" 58 | ) 59 | # 复制现有实例的属性 60 | self.context = existing_scheduler.context 61 | self.tasks = existing_scheduler.tasks 62 | self.running = existing_scheduler.running 63 | self.thread = existing_scheduler.thread 64 | self.main_loop = existing_scheduler.main_loop 65 | self.debug_mode = existing_scheduler.debug_mode 66 | self.timezone = getattr(existing_scheduler, "timezone", pytz.utc) 67 | self._event_loop = getattr(existing_scheduler, "_event_loop", None) 68 | self._poller_task = getattr( 69 | existing_scheduler, "_poller_task", None 70 | ) 71 | return 72 | else: 73 | # 如果实例存在但没有运行,我们应该清理它 74 | logger.info(f"发现未运行的调度器实例(ID: {context_id}),将替换它。") 75 | 76 | # 如果没有找到实例或实例没有运行,创建一个新实例 77 | self.context = context 78 | self.tasks: Dict[str, Dict[str, Any]] = {} 79 | self.running = False 80 | self.thread = None 81 | self.main_loop = main_loop 82 | self.debug_mode = debug_mode 83 | self.timezone = timezone 84 | self._event_loop: Optional[asyncio.AbstractEventLoop] = None 85 | self._poller_task: Optional[asyncio.Task] = None 86 | 87 | # 将新实例添加到全局字典 88 | _SCHEDULER_INSTANCES[context_id] = self 89 | 90 | logger.info( 91 | f"TaskScheduler initialized with main loop ID: {id(self.main_loop)}, Debug Mode: {self.debug_mode}, Timezone: {self.timezone}" 92 | ) 93 | 94 | def add_task(self, cron_expression: str, callback, task_id: str) -> bool: 95 | """ 96 | 添加定时任务 97 | 98 | Args: 99 | cron_expression: cron表达式,如 "30 20 * * *"(分 时 日 月 周) 100 | callback: 回调函数,必须是可等待的 101 | task_id: 任务ID,用于标识任务 102 | 103 | Returns: 104 | 是否成功添加任务 105 | """ 106 | try: 107 | # 检查任务是否已存在 108 | if task_id in self.tasks: 109 | logger.warning(f"任务ID {task_id} 已存在,将被覆盖") 110 | 111 | # 验证cron表达式 112 | if not croniter.is_valid(cron_expression): 113 | logger.error(f"无效的cron表达式: {cron_expression}") 114 | return False 115 | 116 | # 获取当前时间,使用配置的时区 117 | current_time_dt = datetime.datetime.now(self.timezone) 118 | logger.info( 119 | f"当前配置时区 ({self.timezone}) 时间: {current_time_dt.strftime('%Y-%m-%d %H:%M:%S %Z%z')}" 120 | ) 121 | 122 | try: 123 | # 创建croniter对象时,如果datetime对象有时区信息,croniter会使用它 124 | cron = croniter(cron_expression, current_time_dt) 125 | 126 | # 获取下一次执行时间 (datetime对象,带有时区) 127 | next_run_datetime = cron.get_next(datetime.datetime) 128 | next_run_timestamp = next_run_datetime.timestamp() # 转为时间戳 (UTC) 129 | 130 | # 输出详细的时间信息以便调试 131 | next_run_str_local = next_run_datetime.astimezone( 132 | self.timezone 133 | ).strftime("%Y-%m-%d %H:%M:%S %Z%z") 134 | logger.info( 135 | f"任务 {task_id} 下次执行时间: {next_run_str_local} (时区: {self.timezone})" 136 | ) 137 | 138 | # 添加任务 139 | self.tasks[task_id] = { 140 | "cron_expression": cron_expression, 141 | "callback": callback, 142 | "next_run": next_run_timestamp, # Store as UTC timestamp 143 | "cron_ref_dt": current_time_dt, # Store reference datetime used for croniter 144 | "running": False, 145 | } 146 | 147 | logger.info( 148 | f"成功添加定时任务: {task_id}, 下次执行时间: {next_run_str_local}" 149 | ) 150 | return True 151 | 152 | except Exception as e: 153 | logger.error(f"创建cron对象或计算下次运行时间失败: {e}") 154 | logger.error(f"错误详情: {traceback.format_exc()}") 155 | return False 156 | 157 | except Exception as e: 158 | logger.error(f"添加定时任务失败: {e}") 159 | return False 160 | 161 | def remove_task(self, task_id: str) -> bool: 162 | """ 163 | 移除定时任务 164 | 165 | Args: 166 | task_id: 任务ID 167 | 168 | Returns: 169 | 是否成功移除任务 170 | """ 171 | if task_id in self.tasks: 172 | del self.tasks[task_id] 173 | logger.info(f"成功移除定时任务: {task_id}") 174 | return True 175 | else: 176 | logger.warning(f"任务ID不存在: {task_id}") 177 | return False 178 | 179 | def start(self) -> None: 180 | """启动调度器""" 181 | if self.running: 182 | logger.warning("调度器已经在运行") 183 | return 184 | 185 | self.running = True 186 | 187 | # 确保没有旧的线程在运行 188 | if self.thread and self.thread.is_alive(): 189 | logger.warning("调度器已有线程正在运行,尝试停止它") 190 | # 尝试优雅地停止旧线程 191 | try: 192 | old_running_state = self.running 193 | self.running = False 194 | self.thread.join(timeout=2.0) 195 | self.running = old_running_state 196 | except Exception as e: 197 | logger.error(f"停止旧线程时出错: {e}") 198 | 199 | # 创建新线程 200 | self.thread = threading.Thread( 201 | target=self._run_scheduler, name=f"TaskScheduler-{id(self)}" 202 | ) 203 | self.thread.daemon = True 204 | self.thread.start() 205 | logger.info("调度器已启动") 206 | 207 | def stop(self) -> None: 208 | """停止调度器""" 209 | if not self.running: 210 | logger.warning("调度器未运行") 211 | return 212 | 213 | logger.info("正在停止调度器...") 214 | self.running = False # Signal the async_poller to stop 215 | 216 | # Stop the asyncio event loop in the scheduler's thread 217 | if self._event_loop and self._event_loop.is_running(): 218 | logger.info( 219 | "SCHED: Calling loop.stop() via call_soon_threadsafe to stop run_forever." 220 | ) 221 | self._event_loop.call_soon_threadsafe(self._event_loop.stop) 222 | 223 | if self.thread and self.thread.is_alive(): 224 | try: 225 | # Wait for the scheduler thread to finish 226 | logger.info("SCHED: Waiting for scheduler thread to join...") 227 | self.thread.join(timeout=10.0) # Increased timeout 228 | if self.thread.is_alive(): 229 | logger.warning("SCHED: Scheduler thread did not join in time.") 230 | else: 231 | logger.info("SCHED: Scheduler thread joined successfully.") 232 | except Exception as e: 233 | logger.error(f"SCHED: Error stopping scheduler thread: {e}") 234 | 235 | # Event loop cleanup is now primarily handled in _run_scheduler's finally block 236 | # self._event_loop = None # Nullify after thread has joined and loop is closed by _run_scheduler 237 | 238 | logger.info("调度器已停止") 239 | 240 | # 从实例字典中移除自己 241 | with _SCHEDULER_LOCK: 242 | for context_id, scheduler in list(_SCHEDULER_INSTANCES.items()): 243 | if scheduler is self: 244 | del _SCHEDULER_INSTANCES[context_id] 245 | break 246 | 247 | async def _async_poller(self, loop: asyncio.AbstractEventLoop): 248 | """Asynchronous task poller running inside the scheduler's event loop.""" 249 | logger.info("SCHED ASYNC_POLLER: Async poller task started.") 250 | last_heartbeat = time.time() 251 | heartbeat_interval = 600 # Original: 600 seconds (10 minutes) 252 | task_check_interval = 1.0 # Check tasks every second 253 | 254 | try: 255 | while self.running: 256 | current_time = time.time() # This is a UTC timestamp 257 | 258 | if ( 259 | self.debug_mode 260 | and current_time - last_heartbeat > heartbeat_interval 261 | ): 262 | logger.debug( 263 | f"SCHED ASYNC_POLLER: Heartbeat. Current UTC time: {datetime.datetime.utcfromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S UTC')}" 264 | ) 265 | last_heartbeat = current_time 266 | 267 | for task_id, task_info in list( 268 | self.tasks.items() 269 | ): # Use list() for safe iteration if modifying 270 | if task_info.get("running", False): 271 | continue 272 | 273 | if current_time >= task_info["next_run"]: 274 | if self.debug_mode: 275 | logger.debug( 276 | f"SCHED ASYNC_POLLER: Executing task {task_id}" 277 | ) 278 | 279 | # Schedule the task execution in the main event loop 280 | asyncio.run_coroutine_threadsafe( 281 | self._execute_task(task_id, task_info), self.main_loop 282 | ) 283 | 284 | # Update next run time for this task 285 | try: 286 | # Re-initialize croniter with the reference datetime object that includes timezone 287 | # This ensures that DST transitions are handled correctly by croniter. 288 | # If task_info["cron_ref_dt"] is naive, convert it to aware using self.timezone 289 | ref_dt = task_info["cron_ref_dt"] 290 | if ( 291 | ref_dt.tzinfo is None 292 | ): # Should not happen if add_task is correct 293 | ref_dt = self.timezone.localize(ref_dt) 294 | 295 | # It's better to advance from the *scheduled* `next_run_datetime` rather than `now` 296 | # to avoid drift if the poller is slightly delayed. 297 | # Convert the stored `next_run` (UTC timestamp) back to a datetime object with our timezone. 298 | last_scheduled_run_dt = datetime.datetime.fromtimestamp( 299 | task_info["next_run"], self.timezone 300 | ) 301 | 302 | # Ensure croniter uses the correct timezone context by providing an aware datetime object 303 | cron = croniter( 304 | task_info["cron_expression"], last_scheduled_run_dt 305 | ) 306 | next_run_datetime_aware = cron.get_next(datetime.datetime) 307 | task_info["next_run"] = ( 308 | next_run_datetime_aware.timestamp() 309 | ) # Store as UTC timestamp 310 | task_info["cron_ref_dt"] = ( 311 | next_run_datetime_aware # Update reference dt 312 | ) 313 | 314 | if self.debug_mode: 315 | next_run_str_local = next_run_datetime_aware.astimezone( 316 | self.timezone 317 | ).strftime("%Y-%m-%d %H:%M:%S %Z%z") 318 | logger.debug( 319 | f"SCHED ASYNC_POLLER: Task {task_id} rescheduled. Next run: {next_run_str_local}" 320 | ) 321 | except Exception as e: 322 | logger.error( 323 | f"SCHED ASYNC_POLLER: Error rescheduling task {task_id}: {e} - Task will be removed." 324 | ) 325 | logger.error(f"Details: {traceback.format_exc()}") 326 | self.tasks.pop(task_id, None) # Remove problematic task 327 | 328 | await asyncio.sleep(task_check_interval) 329 | except asyncio.CancelledError: 330 | logger.info("SCHED ASYNC_POLLER: Async poller task cancelled.") 331 | except Exception as e: 332 | logger.error(f"SCHED ASYNC_POLLER: Error in async poller: {e}") 333 | logger.error(f"Details: {traceback.format_exc()}") 334 | finally: 335 | logger.info("SCHED ASYNC_POLLER: Async poller task stopped.") 336 | 337 | def _run_scheduler(self) -> None: 338 | """Runs the scheduler in a dedicated thread with its own asyncio event loop.""" 339 | logger.info("调度器线程已启动") 340 | loop: Optional[asyncio.AbstractEventLoop] = None 341 | 342 | try: 343 | loop = asyncio.new_event_loop() 344 | asyncio.set_event_loop(loop) 345 | self._event_loop = loop 346 | logger.info("为调度器线程创建了新的事件循环") 347 | 348 | self._poller_task = loop.create_task(self._async_poller(loop)) 349 | 350 | logger.info("SCHED: Starting event loop with run_forever().") 351 | loop.run_forever() # This blocks until loop.stop() is called 352 | logger.info("SCHED: Event loop run_forever() has exited.") 353 | 354 | except asyncio.CancelledError: 355 | logger.info( 356 | "SCHED: _run_scheduler's run_forever() was cancelled (likely during stop)." 357 | ) 358 | except Exception as e_outer: 359 | logger.error(f"SCHED: _run_scheduler outer error: {e_outer}") 360 | logger.error( 361 | f"SCHED: _run_scheduler outer traceback: {traceback.format_exc()}" 362 | ) 363 | finally: 364 | logger.info("SCHED: _run_scheduler finally block entered.") 365 | 366 | if self._poller_task and not self._poller_task.done(): 367 | logger.info("SCHED: Cancelling poller task in finally.") 368 | self._poller_task.cancel() 369 | if ( 370 | loop and not loop.is_closed() and not loop.is_running() 371 | ): # if run_forever exited 372 | # Need to run the loop briefly to process the cancellation 373 | try: 374 | logger.info( 375 | "SCHED: Running loop briefly to process poller cancellation." 376 | ) 377 | loop.run_until_complete(self._poller_task) 378 | except asyncio.CancelledError: 379 | logger.info( 380 | "SCHED: Poller task successfully cancelled in finally." 381 | ) 382 | except Exception as e_poll_cancel_wait: 383 | logger.error( 384 | f"SCHED: Exception waiting for poller task cancellation in finally: {e_poll_cancel_wait}" 385 | ) 386 | 387 | if loop and not loop.is_closed(): 388 | logger.info( 389 | "SCHED: Shutting down remaining tasks in event loop (finally)." 390 | ) 391 | 392 | # Ensure loop is stopped if it was running (e.g. if run_forever exited due to error) 393 | if loop.is_running(): 394 | logger.info( 395 | "SCHED: Loop was still running in finally, stopping it." 396 | ) 397 | loop.stop() 398 | 399 | # Gather all remaining tasks 400 | pending_tasks = [ 401 | t 402 | for t in asyncio.all_tasks(loop) 403 | if t is not self._poller_task and not t.done() 404 | ] 405 | if pending_tasks: 406 | logger.info( 407 | f"SCHED: {len(pending_tasks)} other pending tasks to cancel/gather." 408 | ) 409 | for t in pending_tasks: 410 | t.cancel() 411 | try: 412 | # Run loop to process cancellations and gather results 413 | loop.run_until_complete( 414 | asyncio.gather(*pending_tasks, return_exceptions=True) 415 | ) 416 | logger.info("SCHED: Gathered other pending tasks in finally.") 417 | except Exception as e_gather_final: 418 | logger.error( 419 | f"SCHED: Error during final gather in finally: {e_gather_final}" 420 | ) 421 | 422 | if hasattr(loop, "shutdown_asyncgens") and callable( 423 | loop.shutdown_asyncgens 424 | ): 425 | try: 426 | logger.info("SCHED: Shutting down asyncgens in finally.") 427 | loop.run_until_complete(loop.shutdown_asyncgens()) 428 | except RuntimeError as e_gens_runtime: 429 | logger.warning( 430 | f"SCHED: Runtime error shutting down asyncgens in finally (may be ok if loop closed): {e_gens_runtime}" 431 | ) 432 | except Exception as e_gens: 433 | logger.error( 434 | f"SCHED: Error shutting down asyncgens in finally: {e_gens}" 435 | ) 436 | 437 | if not loop.is_closed(): 438 | logger.info("SCHED: Closing event loop in finally.") 439 | loop.close() 440 | else: 441 | logger.info("SCHED: Event loop was already closed in finally.") 442 | 443 | self._event_loop = None # Clear the loop reference 444 | self._poller_task = None # Clear task reference 445 | logger.info("调度器线程已退出 (end of _run_scheduler)") 446 | 447 | async def _execute_task(self, task_id: str, task: Dict[str, Any]) -> None: 448 | """ 449 | 执行定时任务 450 | 451 | Args: 452 | task_id: 任务ID 453 | task: 任务信息 454 | """ 455 | current_loop_id = None 456 | try: 457 | current_loop_id = id(asyncio.get_running_loop()) 458 | except RuntimeError: 459 | if self.debug_mode: 460 | logger.debug( 461 | f"SCHED: [{task_id}] _execute_task: Cannot get current running loop." 462 | ) 463 | 464 | if self.debug_mode: 465 | logger.debug( 466 | f"SCHED: [{task_id}] _execute_task ENTERED. Will run in loop ID: {current_loop_id if current_loop_id else 'Unknown'}" 467 | ) 468 | try: 469 | # Keep essential start log at INFO level 470 | start_time_str = time.strftime( 471 | "%Y-%m-%d %H:%M:%S", time.localtime(time.time()) 472 | ) 473 | logger.info(f"[{task_id}] 开始执行定时任务,开始时间: {start_time_str}") 474 | execution_start = time.time() 475 | 476 | callback = task.get("callback") 477 | if not callback or not callable(callback): 478 | logger.error(f"[{task_id}] 任务回调函数无效或不可调用") # Keep as error 479 | if self.debug_mode: 480 | logger.debug( 481 | f"SCHED: [{task_id}] Callback is invalid or not callable." 482 | ) 483 | return 484 | 485 | if self.debug_mode: 486 | logger.debug( 487 | f"SCHED: [{task_id}] Callback obtained: {callback.__name__ if hasattr(callback, '__name__') else str(callback)}" 488 | ) 489 | 490 | try: 491 | import inspect 492 | 493 | if inspect.iscoroutinefunction(callback): 494 | if self.debug_mode: 495 | logger.debug( 496 | f"SCHED: [{task_id}] Callback is a coroutine function. Preparing to call it to get coroutine object." 497 | ) 498 | coro = None 499 | try: 500 | coro = callback() 501 | if self.debug_mode: 502 | logger.debug( 503 | f"SCHED: [{task_id}] Successfully CALLED callback function, got coroutine object: {type(coro)}" 504 | ) 505 | except Exception as coro_creation_e: 506 | logger.error( 507 | f"[{task_id}] 调用回调函数创建协程对象时出错: {coro_creation_e}" 508 | ) # Keep as error 509 | import traceback 510 | 511 | logger.error( 512 | f"[{task_id}] 协程创建错误详情: {traceback.format_exc()}" 513 | ) # Keep as error 514 | if self.debug_mode: 515 | logger.debug( 516 | f"SCHED: [{task_id}] EXCEPTION during calling callback() to get coroutine object: {coro_creation_e}" 517 | ) 518 | raise 519 | 520 | if coro is not None: 521 | if self.debug_mode: 522 | logger.debug( 523 | f"SCHED: [{task_id}] Preparing to AWAIT the coroutine object." 524 | ) 525 | try: 526 | # 使用超时来防止协程长时间运行 527 | # import asyncio # Already imported at top 528 | # 设置一个合理的超时时间,这里使用30分钟 529 | timeout = 30 * 60 # 30分钟 530 | try: 531 | await asyncio.wait_for(coro, timeout=timeout) 532 | if self.debug_mode: 533 | logger.debug( 534 | f"SCHED: [{task_id}] Successfully AWAITED the coroutine." 535 | ) 536 | logger.info(f"[{task_id}] 成功执行协程回调函数") 537 | except asyncio.TimeoutError: 538 | logger.error( 539 | f"[{task_id}] 协程执行超时(超过{timeout}秒)" 540 | ) 541 | except Exception as await_error: 542 | logger.error( 543 | f"[{task_id}] 等待协程执行时出错: {await_error}" 544 | ) 545 | import traceback 546 | 547 | logger.error( 548 | f"[{task_id}] 协程执行错误详情: {traceback.format_exc()}" 549 | ) 550 | else: 551 | logger.error(f"[{task_id}] 协程对象为None,无法执行") 552 | else: 553 | # 如果不是协程函数,直接调用 554 | if self.debug_mode: 555 | logger.debug( 556 | f"SCHED: [{task_id}] Callback is NOT a coroutine function. Will call directly." 557 | ) 558 | result = callback() 559 | if self.debug_mode: 560 | logger.debug( 561 | f"SCHED: [{task_id}] Successfully called regular function. Result: {result}" 562 | ) 563 | logger.info(f"[{task_id}] 成功执行普通回调函数") 564 | except Exception as call_error: 565 | logger.error(f"[{task_id}] 执行回调函数时出错: {call_error}") 566 | import traceback 567 | 568 | logger.error(f"[{task_id}] 执行错误详情: {traceback.format_exc()}") 569 | if self.debug_mode: 570 | logger.debug( 571 | f"SCHED: [{task_id}] EXCEPTION during execution: {call_error}" 572 | ) 573 | 574 | # 计算执行时间 575 | execution_time = time.time() - execution_start 576 | logger.info(f"[{task_id}] 任务执行完成,耗时: {execution_time:.2f}秒") 577 | if self.debug_mode: 578 | logger.debug( 579 | f"SCHED: [{task_id}] Task execution completed in {execution_time:.2f} seconds" 580 | ) 581 | except Exception as e: 582 | logger.error(f"[{task_id}] 执行任务过程中出错: {e}") 583 | import traceback 584 | 585 | logger.error(f"[{task_id}] 任务执行错误详情: {traceback.format_exc()}") 586 | if self.debug_mode: 587 | logger.debug(f"SCHED: [{task_id}] EXCEPTION in _execute_task: {e}") 588 | finally: 589 | # 无论成功失败,都重置任务状态 590 | try: 591 | if task_id in self.tasks: 592 | self.tasks[task_id]["running"] = False 593 | if self.debug_mode: 594 | logger.debug( 595 | f"SCHED: [{task_id}] Reset task running state to False" 596 | ) 597 | except Exception as reset_error: 598 | logger.error(f"[{task_id}] 重置任务状态时出错: {reset_error}") 599 | if self.debug_mode: 600 | logger.debug( 601 | f"SCHED: [{task_id}] EXCEPTION when resetting task state: {reset_error}" 602 | ) 603 | 604 | if self.debug_mode: 605 | logger.debug(f"SCHED: [{task_id}] _execute_task EXITED") 606 | 607 | async def send_to_session( 608 | self, session_id: str, message_text: str, image_path: Optional[str] = None 609 | ) -> bool: 610 | """ 611 | 向指定会话发送消息 612 | 613 | Args: 614 | session_id: 会话ID 615 | message_text: 消息文本 616 | image_path: 可选的图片路径 617 | 618 | Returns: 619 | 是否成功发送消息 620 | """ 621 | try: 622 | logger.info(f"准备发送消息到会话: {session_id}") 623 | 624 | # 尝试多种会话ID格式 625 | attempted_session_ids = [] 626 | success = False 627 | 628 | # 检查图片路径是否存在 629 | if image_path and not os.path.exists(image_path): 630 | logger.error(f"图片路径不存在: {image_path}") 631 | # 尝试查找可能存在的图片文件 632 | if os.path.dirname(image_path): 633 | dir_path = os.path.dirname(image_path) 634 | if os.path.exists(dir_path): 635 | files = os.listdir(dir_path) 636 | logger.info(f"目录 {dir_path} 中存在的文件: {files}") 637 | 638 | # 尝试找到类似名称的图片文件 639 | basename = os.path.basename(image_path) 640 | for file in files: 641 | if file.startswith(basename.split(".")[0]): 642 | logger.info( 643 | f"找到可能的替代图片: {os.path.join(dir_path, file)}" 644 | ) 645 | image_path = os.path.join(dir_path, file) 646 | break 647 | 648 | # 创建消息链 649 | message_components = [Comp.Plain(message_text)] 650 | 651 | # 如果提供了图片路径,添加图片组件 652 | if image_path and os.path.exists(image_path): 653 | try: 654 | logger.info(f"添加图片到消息: {image_path}") 655 | message_components.append(Comp.Image.fromFileSystem(image_path)) 656 | except Exception as img_error: 657 | logger.error(f"添加图片到消息链失败: {img_error}") 658 | logger.error(f"添加图片错误详情: {traceback.format_exc()}") 659 | # 继续发送纯文本消息 660 | 661 | # 创建消息链 662 | message_chain = MessageChain(message_components) 663 | 664 | # 首先尝试使用原始会话ID 665 | logger.info(f"尝试使用原始会话ID发送: {session_id}") 666 | attempted_session_ids.append(session_id) 667 | success = await self.context.send_message(session_id, message_chain) 668 | 669 | # 如果失败,尝试使用其他会话ID格式 670 | if not success: 671 | # 检查是否是群号,如果是,尝试构建完整会话ID 672 | if session_id.isdigit() or (":" not in session_id): 673 | # 从session_id提取可能的群号 674 | group_id = session_id 675 | if ":" in session_id: 676 | # 可能是部分会话ID,尝试提取最后部分作为群号 677 | parts = session_id.split(":") 678 | group_id = parts[-1] 679 | 680 | # 尝试QQ常见会话ID格式 681 | for platform in ["aiocqhttp", "qqofficial"]: 682 | for msg_type in ["GroupMessage", "group"]: 683 | fixed_id = f"{platform}:{msg_type}:{group_id}" 684 | if fixed_id not in attempted_session_ids: 685 | logger.info(f"尝试使用构造会话ID发送: {fixed_id}") 686 | attempted_session_ids.append(fixed_id) 687 | success = await self.context.send_message( 688 | fixed_id, message_chain 689 | ) 690 | if success: 691 | logger.info(f"使用会话ID {fixed_id} 发送成功") 692 | break 693 | if success: 694 | break 695 | 696 | # 如果仍未成功,尝试直接获取平台实例并发送 697 | if not success and group_id.isdigit(): 698 | try: 699 | # 尝试使用aiocqhttp平台直接发送 700 | platform = self.context.get_platform("aiocqhttp") 701 | if platform and hasattr(platform, "send_group_msg"): 702 | logger.info( 703 | f"尝试使用aiocqhttp平台直接发送到群: {group_id}" 704 | ) 705 | try: 706 | await platform.send_group_msg( 707 | group_id=group_id, message=message_chain 708 | ) 709 | logger.info("使用aiocqhttp平台发送成功") 710 | success = True 711 | except Exception as e: 712 | logger.error(f"使用aiocqhttp平台发送失败: {e}") 713 | 714 | # 尝试使用qqofficial平台 715 | if not success: 716 | platform = self.context.get_platform("qqofficial") 717 | if platform and hasattr(platform, "send_group_msg"): 718 | logger.info( 719 | f"尝试使用qqofficial平台直接发送到群: {group_id}" 720 | ) 721 | try: 722 | await platform.send_group_msg( 723 | group_id=group_id, message=message_chain 724 | ) 725 | logger.info("使用qqofficial平台发送成功") 726 | success = True 727 | except Exception as e: 728 | logger.error(f"使用qqofficial平台发送失败: {e}") 729 | except Exception as platform_error: 730 | logger.error(f"尝试直接使用平台发送失败: {platform_error}") 731 | 732 | if success: 733 | logger.info(f"成功发送消息到会话: {session_id}") 734 | else: 735 | logger.warning(f"所有尝试都失败,无法发送消息到会话: {session_id}") 736 | logger.warning(f"尝试过的会话ID: {attempted_session_ids}") 737 | 738 | return success 739 | except Exception as e: 740 | logger.error(f"发送消息到会话失败: {session_id}, 错误: {e}") 741 | logger.error(f"发送消息错误详情: {traceback.format_exc()}") 742 | return False 743 | --------------------------------------------------------------------------------