├── gcp-autostart ├── app │ ├── requirements.txt │ └── main.py ├── Dockerfile └── README.md ├── oracle-lookbusy ├── app │ ├── speedtest.sh │ └── docker-entrypoint.sh ├── docker-compose.yml └── Dockerfile ├── maccms-tool ├── requirements.txt ├── word.py ├── main.py └── config.yml ├── README.md ├── mail-api ├── template │ ├── theme_5 │ │ ├── author.html │ │ └── reply.html │ ├── theme_4 │ │ ├── author.html │ │ └── reply.html │ ├── theme_3 │ │ ├── reply.html │ │ └── author.html │ ├── theme_6 │ │ ├── author.html │ │ └── reply.html │ ├── theme_1 │ │ ├── author.html │ │ └── reply.html │ ├── theme_2 │ │ ├── author.html │ │ └── reply.html │ └── theme_7 │ │ └── reply.html ├── PHPMailer │ ├── Exception.php │ ├── OAuthTokenProvider.php │ ├── OAuth.php │ └── POP3.php └── index.php ├── wallpaper-dl ├── remove_person_pic.py ├── image_uploader.py ├── 360.py └── wallhaven.py ├── ddns-scripts └── cloudflare │ ├── ddns.sh │ └── domain-ddns.sh ├── mtab-import ├── bing-wp.py └── website-info.py └── LICENSE /gcp-autostart/app/requirements.txt: -------------------------------------------------------------------------------- 1 | google-cloud-compute==1.24.0 -------------------------------------------------------------------------------- /oracle-lookbusy/app/speedtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pkill speedtest 3 | /usr/bin/speedtest --accept-license 4 | -------------------------------------------------------------------------------- /maccms-tool/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fog-Forest/scripts/HEAD/maccms-tool/requirements.txt -------------------------------------------------------------------------------- /gcp-autostart/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | LABEL authors="Kinoko" 3 | 4 | WORKDIR /app 5 | COPY ./app/* . 6 | 7 | # 安装依赖 8 | RUN pip install --no-cache-dir -r requirements.txt 9 | 10 | ENV PYTHONUNBUFFERED=1 11 | 12 | CMD ["python", "main.py"] 13 | -------------------------------------------------------------------------------- /gcp-autostart/README.md: -------------------------------------------------------------------------------- 1 | # GCP 抢占式实例自动开机脚本 2 | 3 | ## 运行容器 4 | 5 | ```bash 6 | # 先申请GCP账号密钥,类型选JSON, 7 | 8 | mkdir /root/key # 密钥文件放到 /root/key 里面 9 | docker run -d --name gcp-autostart \ 10 | -e GCP_KEY_PATH=/app/key \ 11 | -e GCP_LOOP_INTERVAL=300 \ 12 | -v /root/key:/app/key \ 13 | fogforest/gcp-autostart 14 | ``` 15 | 16 | ## 查看日志 17 | 18 | ```bash 19 | docker logs -f gcp-autostart 20 | ``` 21 | 22 | -------------------------------------------------------------------------------- /oracle-lookbusy/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | lookbusy: 5 | image: fogforest/lookbusy:latest 6 | container_name: lookbusy 7 | hostname: lookbusy 8 | restart: always 9 | environment: 10 | - TZ=Asia/Shanghai 11 | - CPU_UTIL=10-20 # CPU占用,单位%,不可省略,支持固定值或范围 12 | - CPU_CORE=1 # CPU占用核心数,不指定默认跑全核,出现CPU打满的情况可以指定为1核 13 | - MEM_UTIL=15 # 内存占用,单位%,不跑内存可省略 14 | - SPEEDTEST_INTERVAL=120 # 网络测速间隔,单位分钟,不跑网络可省略 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 📖 Scripts 2 | 3 | 🤪 随手写的一些乱七八糟脚本,持续更新中,觉得好用请点个⭐~ 4 | 5 | ## 仓库简介 6 | 7 | - **ddns-scripts** 8 | 简易 DDNS 脚本,兼容 Cloudflare 服务商。 9 | - **gcp-autostart** 10 | 谷歌云抢占式实例状态实时监控及自动重启工具。 11 | - **maccms-tool** 12 | MacCMS 视频信息整理工具。 13 | - **mail-api** 14 | 发送邮件接口,支持多种模板。 15 | - **mtab-import** 16 | mTab新标签页数据丰富脚本(壁纸、书签)。 17 | - **oracle-lookbusy** 18 | 甲骨文云免费机器保活工具,采用智能资源占用策略。 19 | - **wallpaper-dl** 20 | 多平台壁纸下载工具,适配 Wallhaven、360壁纸。 21 | 22 | ## 免责声明 23 | 24 | - 本项目中的脚本仅供学习和个人使用,严禁用于任何商业用途。 25 | - 学习使用时请遵守您所在国家的法律,任何非法行为由使用者自行承担。 26 | 27 | -------------------------------------------------------------------------------- /oracle-lookbusy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:12 2 | LABEL authors="Kinoko" 3 | 4 | COPY ./app /app 5 | WORKDIR /app 6 | 7 | RUN apt update \ 8 | && apt install -y curl wget cron git cmake build-essential \ 9 | && git clone https://github.com/flow2000/lookbusy.git \ 10 | && curl -s https://packagecloud.io/install/repositories/ookla/speedtest-cli/script.deb.sh | bash \ 11 | && apt install -y speedtest \ 12 | && cd lookbusy && chmod +x ./configure && ./configure && make && make install \ 13 | && rm -rf /app/lookbusy && chmod +x /app/*.sh \ 14 | && apt autoremove -y && apt autoclean && apt remove -y && apt clean 15 | 16 | CMD [ "/app/docker-entrypoint.sh" ] 17 | 18 | USER root 19 | -------------------------------------------------------------------------------- /mail-api/template/theme_5/author.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

{blogName}:《{title}》一文有新的评论啦!

4 |
5 |
6 |

{author}在《{title}》评论:

7 |

{text}

8 | IP地址:{ip}
9 | 评论邮箱:{mail}
10 | 评论状态:{status}
11 | 管理评论 12 |
13 |
-------------------------------------------------------------------------------- /mail-api/template/theme_5/reply.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

您在《{title}》一文上的留言有回复啦!

4 |
5 |
6 |

{author},您好!

7 |

您在文章《{title}》上的评论:

8 |

{text}

9 |

{replyAuthor}给您的回复如下:

10 |

{replyText}

11 |

您可以点击 查看回复的完整內容

12 |

感谢您对 {blogName} 的关注,如您有任何疑问,欢迎来我网站留言。

13 |

(注:此邮件由系统自动发出,请勿回复。)

14 |
15 |
-------------------------------------------------------------------------------- /mail-api/PHPMailer/Exception.php: -------------------------------------------------------------------------------- 1 | 10 | * @author Jim Jagielski (jimjag) 11 | * @author Andy Prevost (codeworxtech) 12 | * @author Brent R. Matzelle (original founder) 13 | * @copyright 2012 - 2020 Marcus Bointon 14 | * @copyright 2010 - 2012 Jim Jagielski 15 | * @copyright 2004 - 2009 Andy Prevost 16 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License 17 | * @note This program is distributed in the hope that it will be useful - WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. 20 | */ 21 | 22 | namespace PHPMailer\PHPMailer; 23 | 24 | /** 25 | * PHPMailer exception handler. 26 | * 27 | * @author Marcus Bointon 28 | */ 29 | class Exception extends \Exception 30 | { 31 | /** 32 | * Prettify error message output. 33 | * 34 | * @return string 35 | */ 36 | public function errorMessage() 37 | { 38 | return '' . htmlspecialchars($this->getMessage(), ENT_COMPAT | ENT_HTML401) . "
\n"; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /oracle-lookbusy/app/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # speedtest 5 | if [ $SPEEDTEST_INTERVAL ]; then 6 | # create crontab 7 | if [ $SPEEDTEST_INTERVAL -eq 0 ]; then 8 | echo "Interval is 0, no task will be created" 9 | elif [ $SPEEDTEST_INTERVAL -le 59 ]; then 10 | echo -e "*/${SPEEDTEST_INTERVAL}\t*\t*\t*\t*\tnohup bash /app/speedtest.sh >/app/speedtest.log 2>&1 &" | crontab 11 | echo "Scheduled task created successfully" 12 | service cron restart 13 | elif [[ $SPEEDTEST_INTERVAL -gt 59 && $SPEEDTEST_INTERVAL -lt 1440 ]]; then 14 | hour=$(($SPEEDTEST_INTERVAL / 60)) 15 | minute=$(($SPEEDTEST_INTERVAL % 60)) 16 | echo -e "$minute\t*/$hour\t*\t*\t*\tnohup bash /app/speedtest.sh >/app/speedtest.log 2>&1 &" | crontab 17 | echo "Scheduled task created successfully" 18 | service cron restart 19 | else 20 | echo "Interval limit exceeded" 21 | fi 22 | 23 | else 24 | echo "SPEEDTEST_INTERVAL is not exists" 25 | fi 26 | 27 | # lookbusy 28 | MemTotal=$(awk '($1 == "MemTotal:"){printf "%d\n",$2/1024}' /proc/meminfo) # total memory 29 | if [ $CPU_CORE ]; then 30 | if [ $MEM_UTIL ]; then 31 | MemUsage=$(($MemTotal / 100 * $MEM_UTIL)) 32 | lookbusy -c $CPU_UTIL -n $CPU_CORE -m ${MemUsage}MB 33 | else 34 | lookbusy -c $CPU_UTIL -n $CPU_CORE 35 | fi 36 | else 37 | if [ $MEM_UTIL ]; then 38 | MemUsage=$(($MemTotal / 100 * $MEM_UTIL)) 39 | lookbusy -c $CPU_UTIL -r curve -m ${MemUsage}MB 40 | else 41 | lookbusy -c $CPU_UTIL -r curve 42 | fi 43 | fi 44 | -------------------------------------------------------------------------------- /mail-api/template/theme_4/author.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 57 | 58 | 59 | 60 |
61 |
62 | {blogName}:文章 《{title}》 有新的评论 63 |
64 |
65 | 用户 {author} 的评论内容为: 66 |

{text}

67 |
68 |
69 | 评论用户:{author}
70 | 评论  IP:{ip}
71 | 评论邮箱:{mail}
72 | 评论状态:{status}

73 | 管理评论 74 |
75 | 76 |
77 | 78 | 79 | -------------------------------------------------------------------------------- /mail-api/PHPMailer/OAuthTokenProvider.php: -------------------------------------------------------------------------------- 1 | 10 | * @author Jim Jagielski (jimjag) 11 | * @author Andy Prevost (codeworxtech) 12 | * @author Brent R. Matzelle (original founder) 13 | * @copyright 2012 - 2020 Marcus Bointon 14 | * @copyright 2010 - 2012 Jim Jagielski 15 | * @copyright 2004 - 2009 Andy Prevost 16 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License 17 | * @note This program is distributed in the hope that it will be useful - WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. 20 | */ 21 | 22 | namespace PHPMailer\PHPMailer; 23 | 24 | /** 25 | * OAuthTokenProvider - OAuth2 token provider interface. 26 | * Provides base64 encoded OAuth2 auth strings for SMTP authentication. 27 | * 28 | * @see OAuth 29 | * @see SMTP::authenticate() 30 | * 31 | * @author Peter Scopes (pdscopes) 32 | * @author Marcus Bointon (Synchro/coolbru) 33 | */ 34 | interface OAuthTokenProvider 35 | { 36 | /** 37 | * Generate a base64-encoded OAuth token ensuring that the access token has not expired. 38 | * The string to be base 64 encoded should be in the form: 39 | * "user=\001auth=Bearer \001\001" 40 | * 41 | * @return string 42 | */ 43 | public function getOauth64(); 44 | } 45 | -------------------------------------------------------------------------------- /mail-api/template/theme_3/reply.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 47 | 48 | 49 | 50 |
51 |
52 | 用户:{replyAuthor}在{blogName} 《{title}》 博文中对你的评论进行了回复 53 |
54 |
55 | 你的评论内容为: 56 |

{text}

57 |
58 |
59 |
60 | 用户:{replyAuthor}对你的评论回复: 61 |

{replyText}

62 |
63 |
64 | 文章链接:查看文章
65 |

本邮件为{blogName} 自动发送,请勿直接回复. 66 |

67 |
68 | 69 | -------------------------------------------------------------------------------- /mail-api/template/theme_6/author.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |

15 | > 16 | 您在{blogName}的文章有新评论啦! 17 |

18 |
19 |

{author}在 20 | 《{title}》中发表评论:

21 |

{text}

22 | 评论状态:{status}
23 | 评论IP:{ip}
24 | 评论邮箱:{mail}
25 | 查看完整的回复内容
26 |
27 |
28 |
29 |

2019 30 | {blogName} 31 |

32 |
-------------------------------------------------------------------------------- /mail-api/template/theme_4/reply.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 57 | 58 | 59 | 60 |
61 |
62 | {replyAuthor} 在{blogName} 《{title}》 博文中对你的评论进行了回复 63 |
64 |
65 | 你的评论内容为: 66 |

{text}

67 |
68 |
69 | {replyAuthor} 回复了你的评论: 70 |

{replyText}

71 |
72 |
73 | 文章链接:查看文章 74 |

本邮件为{blogName}自动发送,请勿直接回复 | 查看文章 | {blogName}

75 |
76 |
77 | 78 | 79 | -------------------------------------------------------------------------------- /mail-api/template/theme_3/author.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 54 | 55 | 56 | 57 |
58 |
59 | {blogName}:文章 《{title}》有新的评论 60 |
61 |
62 | [{author}] 的评论内容为: 63 |

{text}

64 |
65 |
66 | 评论用户:{author}
67 | 评论  IP:{ip}
68 | 评论邮箱:{mail}
69 | 评论地址:查看评论
70 | 评论状态:{status}

操作:管理评论

71 |
72 |
73 | 74 | -------------------------------------------------------------------------------- /mail-api/template/theme_6/reply.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |

15 | > 16 | {author},您在{blogName}的评论有新回复啦! 17 |

18 |
19 |

{author},您曾在 20 | 《{title}》中发表评论:

21 |

{text}

22 |

{replyAuthor}给您的回复如下:

23 |

{replyText}

24 |

您可以 25 | 查看完整的回复内容,欢迎再次光临 26 | {blogName}

27 |
28 |
29 |
30 |

2019 31 | {blogName} - 邮件自动生成,请勿直接回复!

32 |
33 |
-------------------------------------------------------------------------------- /mail-api/template/theme_1/author.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |

> 您的文章《{title}》有了新的回复耶~

5 |
6 |

{author} 给您的评论:

7 |

{text}

8 |

其他信息:

9 |

IP:{ip}
邮箱:{mail}
状态:{status} [管理评论]

10 |
11 |
12 | 查看回复的完整內容 13 |
14 |

©2021 Copyright {author}

15 |
16 |
-------------------------------------------------------------------------------- /mail-api/template/theme_2/author.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | 17 |
5 |
6 |

您在 [{blogName}] 发表的文章有新评论!

7 |
8 |

{author} 在您的《{title}》上发表评论:

9 |

{text}

10 |

请注意:此邮件由 {blogName} 自动发送,请勿直接回复。

11 |

若此邮件不是您请求的,请忽略并删除!

12 |
13 |
14 |
-------------------------------------------------------------------------------- /mail-api/template/theme_2/reply.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | 20 |
5 |
6 |

您在 [{blogName}] 的评论有了新的回复!

7 |
8 |

{author},您曾在文章《{title}》上发表评论:

9 |

{text}

10 |

{replyAuthor} 给您的回复如下:

11 |

{replyText}

12 |

您可以 查看回复完整内容,欢迎再次光临 {blogName}

13 |

请注意:此邮件由 {blogName} 自动发送,请勿直接回复。

14 |

若此邮件不是您请求的,请忽略并删除!

15 |
16 |
17 |
-------------------------------------------------------------------------------- /mail-api/template/theme_1/reply.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
Dear: {author}
4 |
5 |

> 您在《{title}》的评论有了新的回复呐~

6 |
7 |

您的评论:

8 |

{text}

9 |

{replyAuthor} 给您的回复:

10 |

{replyText}

11 |
12 |
13 |
14 |

树在,山在,大地在,岁月在,我在,你还要怎样更好的世界?——张晓风《我在》

15 |
16 | 查看回复的完整內容 17 |
18 |

本邮件为系统自动发送,请勿直接回复~

19 |
20 |
21 |

©2021 Copyright {author}

22 |
23 |
-------------------------------------------------------------------------------- /mail-api/index.php: -------------------------------------------------------------------------------- 1 | SMTPDebug = 0; //E//Enable SMTP debugging, SMTP::DEBUG_OFF = off (for production use) 17 | $mail->isSMTP(); //Send using SMTP 18 | $mail->CharSet = "UTF-8"; //GBK|GB2312 19 | $mail->Encoding = "base64"; 20 | $mail->Host = 'smtp.example.com'; //Set the SMTP server to send through 21 | $mail->SMTPAuth = true; //Enable SMTP authentication 22 | $mail->Username = 'user@example.com'; //SMTP username 23 | $mail->Password = 'secret'; //SMTP password 24 | $mail->SMTPSecure = 'ssl'; //Enable implicit TLS encryption 25 | $mail->Port = 465; //TCP port to connect to; use 587 if you have set `SMTPSecure = PHPMailer::ENCRYPTION_STARTTLS` 26 | 27 | //收件信息,请修改 from@example.com 为你的发信邮箱 28 | $mail->setFrom('from@example.com', $nickname); //设置发件人信息 29 | $mail->addAddress($address); //设置收件人邮箱地址 30 | //$mail->addAddress('joe@example.net', 'Joe User'); //收件人昵称可选 31 | //$mail->addReplyTo('info@example.com', 'Information'); //自定义邮件的回复地址 32 | 33 | //附件 34 | //$mail->addAttachment('/var/tmp/file.tar.gz'); //添加邮件附件 35 | //$mail->addAttachment('/tmp/image.jpg', 'new.jpg'); //附件名称可选 36 | 37 | //正文 38 | $mail->isHTML(true); //是否为HTML格式 39 | //$mail->Subject = 'Here is the subject'; // 邮件标题示例 40 | //$mail->Body = 'This is the HTML message body in bold!'; // 邮件内容示例 41 | 42 | //邮件模板使用示例 43 | $mail->Subject = $title; 44 | $mail->Body = << 47 |
48 |

⚠️服务器探针告警!

49 |
50 |
51 |

🐣描述:

52 |

$content


53 |

请注意:此邮件由 探针平台 自动发送,请勿直接回复。

54 |
55 | 56 | EOF; 57 | 58 | //发送邮件 59 | if (!$mail->send()) { 60 | echo '邮件发送失败:' . $mail->ErrorInfo; 61 | } else { 62 | echo '邮件已送达!'; 63 | } 64 | -------------------------------------------------------------------------------- /maccms-tool/word.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2022/11/14 5 | # @Desc : 导出 MacCMS mac_vod 表对应字段词库 6 | 7 | import pymysql 8 | 9 | from main import MacCMS 10 | 11 | 12 | class Word(MacCMS): 13 | 14 | def __init__(self, field): 15 | """ 16 | :param field: mac_vod 中的字段 17 | """ 18 | self.field = field 19 | 20 | def __select_field(self) -> tuple: 21 | """ 22 | 查询数据库字段 23 | 24 | :return: 查询结果 25 | """ 26 | config = self.read_config()['db'] 27 | db = pymysql.connect(host=config['host'], user=config['user'], password=config['password'], 28 | database=config['database']) 29 | # 使用cursor()方法获取操作游标 30 | cursor = db.cursor() 31 | sql = 'SELECT %s FROM mac_vod GROUP BY %s' % (self.field, self.field) 32 | try: 33 | cursor.execute(sql) 34 | # 获取所有记录列表 35 | results = cursor.fetchall() 36 | return results 37 | except: 38 | print("Error: unable to fetch data") 39 | # 关闭数据库连接 40 | db.close() 41 | 42 | @classmethod 43 | def clean_up(cls, alist): 44 | """ 45 | 列表去重排序 46 | 47 | :param alist: 需整理的列表 48 | :return: 去重排序后的列表 49 | """ 50 | temp_list = [] 51 | for a in alist: 52 | for b in a.split(','): 53 | if b not in temp_list: 54 | temp_list.append(b) 55 | else: 56 | pass 57 | temp_list.sort() 58 | for word in temp_list: 59 | print(word) 60 | 61 | def replace_test(self): 62 | """ 63 | 同义词替换测试 64 | 65 | :return: 替换后的列表 66 | """ 67 | temp_list = [] # 临时列表 68 | data = self.__select_field() 69 | # print(data) 70 | for a in data: 71 | # print(a) 72 | for b in a[0].split(','): 73 | # print(b) 74 | word_list = b.split(',') # 以 ',' 分割相应字段字符串 75 | ''' 替换同义词开始 ''' 76 | for word in word_list: # 遍历词库 77 | # print(word) 78 | if word == '': # 空字符设置为其他 79 | word = '其他' 80 | elif len(word) < 2: # 过滤单字符 81 | continue 82 | else: 83 | replace_word = self.read_config()['word'][self.field.replace('vod_', '')][0] # 对应的替换同义词字典 84 | # print(replace_word) 85 | times = len(replace_word) # 需替换的次数 86 | for replace_num in range(times): 87 | word = word.replace(list(replace_word.keys())[replace_num], 88 | list(replace_word.values())[replace_num]) 89 | # print(word) 90 | temp_list.append(word) 91 | ''' 替换同义词结束 ''' 92 | # 整理词库 93 | self.clean_up(temp_list) 94 | 95 | 96 | if __name__ == '__main__': 97 | obj = Word('vod_class') # vod_class,vod_area,vod_lang 98 | obj.replace_test() 99 | -------------------------------------------------------------------------------- /mail-api/template/theme_7/reply.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 52 | 53 | 54 |
5 |
14 |
15 | 16 |
17 |
18 |

Dear {author}

26 |
27 |

您有一条来自{blogName}的回复

28 |
29 |

您在文章《{title}》上发表的评论:

30 |

{text}

31 |

{replyAuthor} 给您的回复如下:

32 |

{replyText}

33 | 34 | 47 |

本邮件为系统自动发出,请勿直接回复
48 | ©{blogName}

49 |
50 |
51 |
55 | -------------------------------------------------------------------------------- /wallpaper-dl/remove_person_pic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2025/08/09 5 | # @Desc : 移除包含人像的图片 6 | 7 | import os 8 | from pathlib import Path 9 | 10 | import cv2 11 | import numpy as np 12 | 13 | # ---------------------- 配置项 ---------------------- 14 | # 目标目录路径(要处理的图片所在目录) 15 | TARGET_DIRECTORY = "D:/DL/自然风景" # 替换为实际的目录路径 16 | 17 | # 支持的图片格式 18 | SUPPORTED_IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff'] 19 | 20 | # 人脸检测参数 21 | FACE_DETECTION_SCALE_FACTOR = 1.1 # 图像缩放比例 22 | FACE_DETECTION_MIN_NEIGHBORS = 5 # 每个候选矩形应保留的邻居数 23 | FACE_DETECTION_MIN_SIZE = (30, 30) # 可能的最小人脸大小 24 | 25 | 26 | # ----------------------------------------------------- 27 | 28 | 29 | def is_person_present(image_path): 30 | """检测图片中是否有人像(人脸),支持中文路径""" 31 | # 加载预训练的 Haar 级联分类器 32 | face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') 33 | 34 | # 读取图片(支持中文路径) 35 | try: 36 | # 使用numpy从文件读取数据,再转换为OpenCV图像 37 | raw_data = np.fromfile(image_path, dtype=np.uint8) 38 | image = cv2.imdecode(raw_data, cv2.IMREAD_COLOR) 39 | except Exception as e: 40 | print(f"读取图片 {image_path} 失败: {str(e)}") 41 | return False # 无法读取图片 42 | 43 | if image is None: 44 | return False # 无法读取图片 45 | 46 | # 转换为灰度图 47 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 48 | 49 | # 检测人脸(使用配置项参数) 50 | faces = face_cascade.detectMultiScale( 51 | gray, 52 | scaleFactor=FACE_DETECTION_SCALE_FACTOR, 53 | minNeighbors=FACE_DETECTION_MIN_NEIGHBORS, 54 | minSize=FACE_DETECTION_MIN_SIZE 55 | ) 56 | 57 | # 如果检测到人脸,返回True 58 | return len(faces) > 0 59 | 60 | 61 | def process_images(directory): 62 | """遍历目录中的图片,删除包含人像的图片""" 63 | # 遍历目录 64 | for root, dirs, files in os.walk(directory): 65 | for file in files: 66 | # 检查文件扩展名(使用配置项) 67 | file_ext = Path(file).suffix.lower() 68 | if file_ext in SUPPORTED_IMAGE_EXTENSIONS: 69 | file_path = os.path.join(root, file) 70 | 71 | # 检查文件是否存在 72 | if not os.path.exists(file_path): 73 | print(f"文件不存在: {file_path}") 74 | continue 75 | 76 | # 检查是否为文件 77 | if not os.path.isfile(file_path): 78 | print(f"不是有效的文件: {file_path}") 79 | continue 80 | 81 | try: 82 | # 检测是否有人像 83 | has_person = is_person_present(file_path) 84 | 85 | if has_person: 86 | # 直接删除包含人像的图片 87 | os.remove(file_path) 88 | print(f"已删除包含人像的图片: {file_path}") 89 | else: 90 | print(f"未检测到人像: {file_path}") 91 | except Exception as e: 92 | print(f"处理文件 {file_path} 时出错: {str(e)}") 93 | 94 | 95 | def main(): 96 | # 检查目录是否存在 97 | if not os.path.isdir(TARGET_DIRECTORY): 98 | print(f"错误: 目录 '{TARGET_DIRECTORY}' 不存在。") 99 | return 100 | 101 | print(f"开始处理目录: {TARGET_DIRECTORY}") 102 | process_images(TARGET_DIRECTORY) 103 | print("处理完成。") 104 | 105 | 106 | if __name__ == "__main__": 107 | main() 108 | -------------------------------------------------------------------------------- /mail-api/PHPMailer/OAuth.php: -------------------------------------------------------------------------------- 1 | 10 | * @author Jim Jagielski (jimjag) 11 | * @author Andy Prevost (codeworxtech) 12 | * @author Brent R. Matzelle (original founder) 13 | * @copyright 2012 - 2020 Marcus Bointon 14 | * @copyright 2010 - 2012 Jim Jagielski 15 | * @copyright 2004 - 2009 Andy Prevost 16 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License 17 | * @note This program is distributed in the hope that it will be useful - WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. 20 | */ 21 | 22 | namespace PHPMailer\PHPMailer; 23 | 24 | use League\OAuth2\Client\Grant\RefreshToken; 25 | use League\OAuth2\Client\Provider\AbstractProvider; 26 | use League\OAuth2\Client\Token\AccessToken; 27 | 28 | /** 29 | * OAuth - OAuth2 authentication wrapper class. 30 | * Uses the oauth2-client package from the League of Extraordinary Packages. 31 | * 32 | * @see http://oauth2-client.thephpleague.com 33 | * 34 | * @author Marcus Bointon (Synchro/coolbru) 35 | */ 36 | class OAuth implements OAuthTokenProvider 37 | { 38 | /** 39 | * An instance of the League OAuth Client Provider. 40 | * 41 | * @var AbstractProvider 42 | */ 43 | protected $provider; 44 | 45 | /** 46 | * The current OAuth access token. 47 | * 48 | * @var AccessToken 49 | */ 50 | protected $oauthToken; 51 | 52 | /** 53 | * The user's email address, usually used as the login ID 54 | * and also the from address when sending email. 55 | * 56 | * @var string 57 | */ 58 | protected $oauthUserEmail = ''; 59 | 60 | /** 61 | * The client secret, generated in the app definition of the service you're connecting to. 62 | * 63 | * @var string 64 | */ 65 | protected $oauthClientSecret = ''; 66 | 67 | /** 68 | * The client ID, generated in the app definition of the service you're connecting to. 69 | * 70 | * @var string 71 | */ 72 | protected $oauthClientId = ''; 73 | 74 | /** 75 | * The refresh token, used to obtain new AccessTokens. 76 | * 77 | * @var string 78 | */ 79 | protected $oauthRefreshToken = ''; 80 | 81 | /** 82 | * OAuth constructor. 83 | * 84 | * @param array $options Associative array containing 85 | * `provider`, `userName`, `clientSecret`, `clientId` and `refreshToken` elements 86 | */ 87 | public function __construct($options) 88 | { 89 | $this->provider = $options['provider']; 90 | $this->oauthUserEmail = $options['userName']; 91 | $this->oauthClientSecret = $options['clientSecret']; 92 | $this->oauthClientId = $options['clientId']; 93 | $this->oauthRefreshToken = $options['refreshToken']; 94 | } 95 | 96 | /** 97 | * Get a new RefreshToken. 98 | * 99 | * @return RefreshToken 100 | */ 101 | protected function getGrant() 102 | { 103 | return new RefreshToken(); 104 | } 105 | 106 | /** 107 | * Get a new AccessToken. 108 | * 109 | * @return AccessToken 110 | */ 111 | protected function getToken() 112 | { 113 | return $this->provider->getAccessToken( 114 | $this->getGrant(), 115 | ['refresh_token' => $this->oauthRefreshToken] 116 | ); 117 | } 118 | 119 | /** 120 | * Generate a base64-encoded OAuth token. 121 | * 122 | * @return string 123 | */ 124 | public function getOauth64() 125 | { 126 | //Get a new token if it's not available or has expired 127 | if (null === $this->oauthToken || $this->oauthToken->hasExpired()) { 128 | $this->oauthToken = $this->getToken(); 129 | } 130 | 131 | return base64_encode( 132 | 'user=' . 133 | $this->oauthUserEmail . 134 | "\001auth=Bearer " . 135 | $this->oauthToken . 136 | "\001\001" 137 | ); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /maccms-tool/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2022/11/14 5 | # @Desc : MacCMS 扩展分类&地区&语言词库自动整理合并 6 | import os 7 | 8 | import pymysql 9 | import yaml 10 | 11 | 12 | class MacCMS: 13 | 14 | @classmethod 15 | def read_config(cls) -> dict: 16 | """ 17 | 读取配置文件内容 18 | 19 | :return: 配置文件内容 20 | """ 21 | # 获取yaml配置文件路径 22 | path = os.path.join(os.path.dirname(__file__), 'config.yml') 23 | with open(path, 'r', encoding='utf-8') as f: 24 | data = f.read() 25 | config = yaml.load(data, Loader=yaml.FullLoader) # 转字典 26 | return config 27 | 28 | @classmethod 29 | def __select_db(cls) -> tuple: 30 | """ 31 | 查询数据库 32 | 33 | :return: 数据库查询结果(vod_id,vod_class,vod_area,vod_lang) 34 | """ 35 | config = cls.read_config()['db'] 36 | num = cls.read_config()['num'] 37 | db = pymysql.connect(host=config['host'], user=config['user'], password=config['password'], 38 | database=config['database']) 39 | # 使用cursor()方法获取操作游标 40 | cursor = db.cursor() 41 | # SQL 查询语句 42 | if num == 'all': 43 | sql = 'SELECT vod_id,vod_class,vod_area,vod_lang FROM mac_vod ORDER BY vod_id DESC' 44 | else: 45 | sql = 'SELECT vod_id,vod_class,vod_area,vod_lang FROM mac_vod ORDER BY vod_time DESC LIMIT ' + str(num) 46 | try: 47 | cursor.execute(sql) 48 | # 获取所有记录列表 49 | results = cursor.fetchall() 50 | return results 51 | except: 52 | print("Error: unable to fetch data") 53 | # 关闭数据库连接 54 | db.close() 55 | 56 | @classmethod 57 | def update_db(cls, sql: str) -> None: 58 | """ 59 | 更新数据库 60 | 61 | :param sql: SQL语句 62 | :return: None 63 | """ 64 | config = cls.read_config()['db'] 65 | db = pymysql.connect(host=config['host'], user=config['user'], password=config['password'], 66 | database=config['database']) 67 | # 使用cursor()方法获取操作游标 68 | cursor = db.cursor() 69 | try: 70 | # 执行SQL语句 71 | cursor.execute(sql) 72 | print(sql) 73 | # 提交到数据库执行 74 | db.commit() 75 | except: 76 | # 发生错误时回滚 77 | db.rollback() 78 | # 关闭数据库连接 79 | db.close() 80 | 81 | @classmethod 82 | def __replace_word(cls, data: tuple) -> str: 83 | """ 84 | 替换同义词 85 | 86 | :return: SQL语句 87 | """ 88 | sql_param = [] # SQL更新参数列表 vod_class, vod_area, vod_lang 89 | replace_word_list = list(cls.read_config()['word'].values()) 90 | # print(replace_word_list) 91 | for field_num in range(len(data)): # 遍历传入字段 92 | save_list = [] # 最终词库列表 93 | temp_list = [] # 临时列表 94 | if field_num == 0: # 剔除 vod_id 字段 95 | continue 96 | else: 97 | word_list = data[field_num].split(',') # 以 ',' 分割相应字段字符串 98 | ''' 替换同义词开始 ''' 99 | for word in word_list: # 遍历词库 100 | # print(word) 101 | if word == '': # 空字符设置为其他 102 | word = '其他' 103 | elif len(word) < 2: # 过滤单字符 104 | continue 105 | else: 106 | replace_word = replace_word_list[field_num - 1][0] # 对应的同义词替换字典 107 | # print(replace_word) 108 | times = len(replace_word) # 需替换的次数 109 | for replace_num in range(times): 110 | word = word.replace(list(replace_word.keys())[replace_num], 111 | list(replace_word.values())[replace_num]) 112 | temp_list.append(word) 113 | ''' 替换同义词结束 ''' 114 | '''词库去重开始''' 115 | # print(temp_list) 116 | for a in temp_list: 117 | for b in a.split(','): 118 | if b not in save_list: 119 | save_list.append(b) 120 | else: 121 | pass 122 | sql_param.append(','.join(save_list)) 123 | '''词库去重结束''' 124 | # 最终的SQL更新语句 125 | sql = 'UPDATE mac_vod SET vod_class="' + sql_param[0] + '",vod_area="' + sql_param[1] + '",vod_lang="' + \ 126 | sql_param[2] + '" WHERE vod_id=' + str(data[0]) 127 | return sql 128 | 129 | @classmethod 130 | def main(cls): 131 | result = cls.__select_db() 132 | for data in result: # 遍历视频数据 133 | sql = cls.__replace_word(data) 134 | cls.update_db(sql) 135 | 136 | 137 | if __name__ == "__main__": 138 | MacCMS.main() 139 | # 删除无图片的文章 140 | # MacCMS.update_db('DELETE FROM mac_art WHERE art_pic = ""') 141 | # 替换视频图片海报链接 142 | # MacCMS.update_db('UPDATE mac_vod SET vod_pic = REPLACE(vod_pic, "http://", "https://")') 143 | -------------------------------------------------------------------------------- /gcp-autostart/app/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2025/02/08 5 | # @Desc : GCP 抢占式实例自动开机 6 | import json 7 | import os 8 | import time 9 | from typing import Dict, List 10 | 11 | from google.api_core.exceptions import GoogleAPICallError, RetryError 12 | from google.cloud import compute_v1 13 | 14 | # 环境变量配置 15 | KEY_PATH = os.getenv("GCP_KEY_PATH", "/app/key") 16 | LOOP_INTERVAL = int(os.getenv("GCP_LOOP_INTERVAL", "300")) # 循环间隔秒数 17 | DEFAULT_TIMEOUT = int(os.getenv("GCP_TIMEOUT", "30")) 18 | 19 | 20 | def load_gcp_credentials() -> List[Dict]: 21 | """加载并验证GCP凭证文件""" 22 | cred_list = [] 23 | 24 | # 处理目录或单个文件 25 | if os.path.isdir(KEY_PATH): 26 | for file in os.listdir(KEY_PATH): 27 | if file.endswith('.json'): 28 | file_path = os.path.join(KEY_PATH, file) 29 | try: 30 | with open(file_path, 'r') as f: 31 | cred = json.load(f) 32 | if 'project_id' in cred: 33 | cred['file_path'] = file_path 34 | cred_list.append(cred) 35 | except (json.JSONDecodeError, PermissionError): 36 | continue 37 | elif os.path.isfile(KEY_PATH) and KEY_PATH.endswith('.json'): 38 | try: 39 | with open(KEY_PATH, 'r') as f: 40 | cred = json.load(f) 41 | if 'project_id' in cred: 42 | cred['file_path'] = KEY_PATH 43 | cred_list.append(cred) 44 | except (json.JSONDecodeError, PermissionError): 45 | pass 46 | 47 | return cred_list 48 | 49 | 50 | def start_instance_if_not_running(project_id: str, zone: str, instance_name: str) -> str: 51 | """启动指定实例(如果未运行)""" 52 | try: 53 | instance_client = compute_v1.InstancesClient() 54 | instance = instance_client.get(project=project_id, zone=zone, instance=instance_name) 55 | 56 | if instance.status != "RUNNING": 57 | operation = instance_client.start(project=project_id, zone=zone, instance=instance_name) 58 | wait_for_operation(project_id, zone, instance_name, operation.name) 59 | return "RUNNING" 60 | return "RUNNING" 61 | except (GoogleAPICallError, RetryError) as e: 62 | print(f"[{project_id}] {instance_name} 启动失败: {str(e)}") 63 | return "ERROR" 64 | 65 | 66 | def wait_for_operation(project_id: str, zone: str, instance_name: str, operation_name: str) -> None: 67 | """等待云操作完成""" 68 | operation_client = compute_v1.ZoneOperationsClient() 69 | start_time = time.time() 70 | 71 | while time.time() - start_time < DEFAULT_TIMEOUT: 72 | operation = operation_client.get(project=project_id, zone=zone, operation=operation_name) 73 | if operation.status == "RUNNING": 74 | return 75 | if operation.status == "DONE": 76 | if operation.error: 77 | raise RuntimeError(f"操作失败: {operation.error}") 78 | return 79 | time.sleep(5) 80 | raise TimeoutError("操作超时") 81 | 82 | 83 | def process_account(cred: Dict): 84 | """处理单个账号""" 85 | project_id = cred['project_id'] 86 | try: 87 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cred['file_path'] 88 | compute = compute_v1.InstancesClient() 89 | zones_client = compute_v1.ZonesClient() 90 | 91 | print(f"\n🔍 开始处理项目: {project_id}") 92 | for zone in zones_client.list(project=project_id): 93 | instances = compute.list(project=project_id, zone=zone.name) 94 | for instance in instances: 95 | status = start_instance_if_not_running(project_id, zone.name, instance.name) 96 | print(f" {instance.name} ({zone.name}): {'✅' if status == 'RUNNING' else '❌'}") 97 | 98 | finally: 99 | if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: 100 | del os.environ["GOOGLE_APPLICATION_CREDENTIALS"] 101 | print(f"🏁 项目 {project_id} 处理完成\n") # 新增完成提示 102 | 103 | 104 | if __name__ == "__main__": 105 | print("🚀 GCP 抢占式实例自动维护服务启动") 106 | print(f"📁 凭证路径: {KEY_PATH}") 107 | print(f"⏱ 循环间隔: {LOOP_INTERVAL}秒") 108 | 109 | try: 110 | while True: 111 | start_time = time.time() 112 | credentials = load_gcp_credentials() 113 | 114 | if not credentials: 115 | print("⚠️ 未找到有效凭证文件") 116 | time.sleep(LOOP_INTERVAL) 117 | continue 118 | 119 | print(f"\n🔄 发现 {len(credentials)} 个账号") 120 | for cred in credentials: 121 | try: 122 | process_account(cred) 123 | except Exception as e: 124 | print(f"❌ 处理账号 {cred.get('project_id', '未知')} 失败: {str(e)}") 125 | 126 | print("🎉 所有项目处理完成,等待下次轮询") 127 | sleep_time = LOOP_INTERVAL - (time.time() - start_time) 128 | if sleep_time > 0: 129 | time.sleep(sleep_time) 130 | 131 | except KeyboardInterrupt: 132 | print("\n🛑 用户中断操作") 133 | except Exception as e: 134 | print(f"💥 严重错误: {str(e)}") 135 | finally: 136 | print("🔚 服务已停止") 137 | -------------------------------------------------------------------------------- /ddns-scripts/cloudflare/ddns.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o nounset 4 | set -o pipefail 5 | 6 | # Automatically update your CloudFlare DNS record to the IP, Dynamic DNS 7 | # Can retrieve cloudflare Domain id and list zone's, because, lazy 8 | 9 | # Place at: 10 | # curl https://raw.githubusercontent.com/yulewang/cloudflare-api-v4-ddns/master/cf-v4-ddns.sh > /usr/local/bin/cf-ddns.sh && chmod +x /usr/local/bin/cf-ddns.sh 11 | # run `crontab -e` and add next line: 12 | # */1 * * * * /usr/local/bin/cf-ddns.sh >/dev/null 2>&1 13 | # or you need log: 14 | # */1 * * * * /usr/local/bin/cf-ddns.sh >> /var/log/cf-ddns.log 2>&1 15 | 16 | # Usage: 17 | # cf-ddns.sh -k cloudflare-api-key \ 18 | # -u user@example.com \ 19 | # -h host.example.com \ # fqdn of the record you want to update 20 | # -z example.com \ # will show you all zones if forgot, but you need this 21 | # -t A|AAAA # specify ipv4/ipv6, default: ipv4 22 | 23 | # Optional flags: 24 | # -f false|true \ # force dns update, disregard local stored ip 25 | 26 | # default config 27 | 28 | # API key, see https://www.cloudflare.com/a/account/my-account, 29 | # incorrect api-key results in E_UNAUTH error 30 | CFKEY= 31 | 32 | # Username, eg: user@example.com 33 | CFUSER= 34 | 35 | # Zone name, eg: example.com 36 | CFZONE_NAME= 37 | 38 | # Hostname to update, eg: homeserver.example.com 39 | CFRECORD_NAME= 40 | 41 | # Record type, A(IPv4)|AAAA(IPv6), default IPv4 42 | CFRECORD_TYPE=A 43 | 44 | # Cloudflare TTL for record, between 120 and 86400 seconds 45 | CFTTL=120 46 | 47 | # Ignore local file, update ip anyway 48 | FORCE=false 49 | 50 | WANIPSITE="http://ipv4.icanhazip.com" 51 | 52 | # Site to retrieve WAN ip, other examples are: bot.whatismyipaddress.com, https://api.ipify.org/ ... 53 | if [ "$CFRECORD_TYPE" = "A" ]; then 54 | : 55 | elif [ "$CFRECORD_TYPE" = "AAAA" ]; then 56 | WANIPSITE="http://ipv6.icanhazip.com" 57 | else 58 | echo "$CFRECORD_TYPE specified is invalid, CFRECORD_TYPE can only be A(for IPv4)|AAAA(for IPv6)" 59 | exit 2 60 | fi 61 | 62 | # get parameter 63 | while getopts k:u:h:z:t:f: opts; do 64 | case ${opts} in 65 | k) CFKEY=${OPTARG} ;; 66 | u) CFUSER=${OPTARG} ;; 67 | h) CFRECORD_NAME=${OPTARG} ;; 68 | z) CFZONE_NAME=${OPTARG} ;; 69 | t) CFRECORD_TYPE=${OPTARG} ;; 70 | f) FORCE=${OPTARG} ;; 71 | esac 72 | done 73 | 74 | # If required settings are missing just exit 75 | if [ "$CFKEY" = "" ]; then 76 | echo "Missing api-key, get at: https://www.cloudflare.com/a/account/my-account" 77 | echo "and save in ${0} or using the -k flag" 78 | exit 2 79 | fi 80 | if [ "$CFUSER" = "" ]; then 81 | echo "Missing username, probably your email-address" 82 | echo "and save in ${0} or using the -u flag" 83 | exit 2 84 | fi 85 | if [ "$CFRECORD_NAME" = "" ]; then 86 | echo "Missing hostname, what host do you want to update?" 87 | echo "save in ${0} or using the -h flag" 88 | exit 2 89 | fi 90 | 91 | # If the hostname is not a FQDN 92 | if [ "$CFRECORD_NAME" != "$CFZONE_NAME" ] && ! [ -z "${CFRECORD_NAME##*$CFZONE_NAME}" ]; then 93 | CFRECORD_NAME="$CFRECORD_NAME.$CFZONE_NAME" 94 | echo " => Hostname is not a FQDN, assuming $CFRECORD_NAME" 95 | fi 96 | 97 | # Get current and old WAN ip 98 | WAN_IP=$(curl -s ${WANIPSITE}) 99 | WAN_IP_FILE=$HOME/.cf-wan_ip_$CFRECORD_NAME.txt 100 | if [ -f $WAN_IP_FILE ]; then 101 | OLD_WAN_IP=$(cat $WAN_IP_FILE) 102 | else 103 | echo "No file, need IP" 104 | OLD_WAN_IP="" 105 | fi 106 | 107 | # If WAN IP is unchanged an not -f flag, exit here 108 | if [ "$WAN_IP" = "$OLD_WAN_IP" ] && [ "$FORCE" = false ]; then 109 | echo "WAN IP Unchanged, to update anyway use flag -f true" 110 | exit 0 111 | fi 112 | 113 | # Get zone_identifier & record_identifier 114 | ID_FILE=$HOME/.cf-id_$CFRECORD_NAME.txt 115 | if [ -f $ID_FILE ] && [ $(wc -l $ID_FILE | cut -d " " -f 1) == 4 ] && 116 | [ "$(sed -n '3,1p' "$ID_FILE")" == "$CFZONE_NAME" ] && 117 | [ "$(sed -n '4,1p' "$ID_FILE")" == "$CFRECORD_NAME" ]; then 118 | CFZONE_ID=$(sed -n '1,1p' "$ID_FILE") 119 | CFRECORD_ID=$(sed -n '2,1p' "$ID_FILE") 120 | else 121 | echo "Updating zone_identifier & record_identifier" 122 | CFZONE_ID=$(curl -s -X GET "https://api.cloudflare.com/client/v4/zones?name=$CFZONE_NAME" -H "X-Auth-Email: $CFUSER" -H "X-Auth-Key: $CFKEY" -H "Content-Type: application/json" | grep -Po '(?<="id":")[^"]*' | head -1) 123 | CFRECORD_ID=$(curl -s -X GET "https://api.cloudflare.com/client/v4/zones/$CFZONE_ID/dns_records?name=$CFRECORD_NAME" -H "X-Auth-Email: $CFUSER" -H "X-Auth-Key: $CFKEY" -H "Content-Type: application/json" | grep -Po '(?<="id":")[^"]*' | head -1) 124 | echo "$CFZONE_ID" >$ID_FILE 125 | echo "$CFRECORD_ID" >>$ID_FILE 126 | echo "$CFZONE_NAME" >>$ID_FILE 127 | echo "$CFRECORD_NAME" >>$ID_FILE 128 | fi 129 | 130 | # If WAN is changed, update cloudflare 131 | echo "Updating DNS to $WAN_IP" 132 | 133 | RESPONSE=$(curl -s -X PUT "https://api.cloudflare.com/client/v4/zones/$CFZONE_ID/dns_records/$CFRECORD_ID" \ 134 | -H "X-Auth-Email: $CFUSER" \ 135 | -H "X-Auth-Key: $CFKEY" \ 136 | -H "Content-Type: application/json" \ 137 | --data "{\"id\":\"$CFZONE_ID\",\"type\":\"$CFRECORD_TYPE\",\"name\":\"$CFRECORD_NAME\",\"content\":\"$WAN_IP\", \"ttl\":$CFTTL}") 138 | 139 | if [ "$RESPONSE" != "${RESPONSE%success*}" ] && [ "$(echo $RESPONSE | grep "\"success\":true")" != "" ]; then 140 | echo "Updated succesfuly!" 141 | echo $WAN_IP >$WAN_IP_FILE 142 | exit 143 | else 144 | echo 'Something went wrong :(' 145 | echo "Response: $RESPONSE" 146 | exit 1 147 | fi 148 | -------------------------------------------------------------------------------- /ddns-scripts/cloudflare/domain-ddns.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o nounset 4 | set -o pipefail 5 | 6 | # Automatically update your CloudFlare DNS record to the IP, Dynamic DNS 7 | # Can retrieve cloudflare Domain id and list zone's, because, lazy 8 | 9 | # Place at: 10 | # curl https://raw.githubusercontent.com/yulewang/cloudflare-api-v4-ddns/master/cf-v4-ddns.sh > /usr/local/bin/cf-ddns.sh && chmod +x /usr/local/bin/cf-ddns.sh 11 | # run `crontab -e` and add next line: 12 | # */1 * * * * /usr/local/bin/cf-ddns.sh >/dev/null 2>&1 13 | # or you need log: 14 | # */1 * * * * /usr/local/bin/cf-ddns.sh >> /var/log/cf-ddns.log 2>&1 15 | # 16 | # Help: https://blog.natcloud.net/cf-ddns.html 17 | 18 | # Usage: 19 | # cf-ddns.sh -k cloudflare-api-key \ 20 | # -u user@example.com \ 21 | # -h host.example.com \ # fqdn of the record you want to update 22 | # -z example.com \ # will show you all zones if forgot, but you need this 23 | # -t A|AAAA # specify ipv4/ipv6, default: ipv4 24 | 25 | # Optional flags: 26 | # -f false|true \ # force dns update, disregard local stored ip 27 | 28 | # default config 29 | SAVEPATH= 30 | 31 | # API key, see https://www.cloudflare.com/a/account/my-account, 32 | # incorrect api-key results in E_UNAUTH error 33 | CFKEY= 34 | 35 | # Username, eg: user@example.com 36 | CFUSER= 37 | 38 | # Zone name, eg: example.com 39 | CFZONE_NAME= 40 | 41 | # Hostname to update, eg: homeserver.example.com 42 | CFRECORD_NAME= 43 | 44 | # Record type, A(IPv4)|AAAA(IPv6), default IPv4 45 | CFRECORD_TYPE=A 46 | 47 | # Cloudflare TTL for record, between 120 and 86400 seconds 48 | CFTTL=120 49 | 50 | # Ignore local file, update ip anyway 51 | FORCE=false 52 | 53 | # Domain select list 54 | DOMAIN_LIST=(1.demon.com 2.demon.com 3.demon.com) 55 | 56 | # select_domain 57 | selectDomain() { 58 | for element in ${DOMAIN_LIST[@]}; do 59 | ping -c 1 ${element} >/dev/null 2>&1 60 | if [ $? -eq 0 ]; then 61 | DOMAIN=${element} 62 | return 0 63 | fi 64 | done 65 | return 1 66 | } 67 | 68 | # get_domain_ip 69 | getDomainIp() { 70 | selectDomain 71 | if [ $? -eq 1 ]; then 72 | exit 3 73 | fi 74 | if [ "$CFRECORD_TYPE" = "A" ]; then 75 | PING=$(ping ${DOMAIN} -c 1 | sed '1{s/[^(]*(//;s/).*//;q}') 76 | echo $PING 77 | elif [ "$CFRECORD_TYPE" = "AAAA" ]; then 78 | PING=$(ping6 ${DOMAIN} -c 1 | sed '1{s/[^(]*(//;s/).*//;q}') 79 | echo $PING 80 | else 81 | echo "$CFRECORD_TYPE specified is invalid, CFRECORD_TYPE can only be A(for IPv4)|AAAA(for IPv6)" 82 | exit 2 83 | fi 84 | } 85 | 86 | # get parameter 87 | while getopts k:u:h:z:t:f: opts; do 88 | case ${opts} in 89 | k) CFKEY=${OPTARG} ;; 90 | u) CFUSER=${OPTARG} ;; 91 | h) CFRECORD_NAME=${OPTARG} ;; 92 | z) CFZONE_NAME=${OPTARG} ;; 93 | t) CFRECORD_TYPE=${OPTARG} ;; 94 | f) FORCE=${OPTARG} ;; 95 | esac 96 | done 97 | 98 | # If required settings are missing just exit 99 | if [ "$CFKEY" = "" ]; then 100 | echo "Missing api-key, get at: https://www.cloudflare.com/a/account/my-account" 101 | echo "and save in ${0} or using the -k flag" 102 | exit 2 103 | fi 104 | if [ "$CFUSER" = "" ]; then 105 | echo "Missing username, probably your email-address" 106 | echo "and save in ${0} or using the -u flag" 107 | exit 2 108 | fi 109 | if [ "$CFRECORD_NAME" = "" ]; then 110 | echo "Missing hostname, what host do you want to update?" 111 | echo "save in ${0} or using the -h flag" 112 | exit 2 113 | fi 114 | 115 | # If the hostname is not a FQDN 116 | if [ "$CFRECORD_NAME" != "$CFZONE_NAME" ] && ! [ -z "${CFRECORD_NAME##*$CFZONE_NAME}" ]; then 117 | CFRECORD_NAME="$CFRECORD_NAME.$CFZONE_NAME" 118 | echo " => Hostname is not a FQDN, assuming $CFRECORD_NAME" 119 | fi 120 | 121 | # Get current and old DOMAIN ip 122 | DOMAIN_IP=$(getDomainIp) 123 | DOMAIN_IP_FILE=$SAVEPATH/.cf-domain_ip_$CFRECORD_NAME.txt 124 | if [ -f $DOMAIN_IP_FILE ]; then 125 | OLD_DOMAIN_IP=$(cat $DOMAIN_IP_FILE) 126 | else 127 | echo "No file, need IP" 128 | OLD_DOMAIN_IP="" 129 | fi 130 | 131 | # If DOMAIN IP is unchanged an not -f flag, exit here 132 | if [ "$DOMAIN_IP" = "$OLD_DOMAIN_IP" ] && [ "$FORCE" = false ]; then 133 | echo "DOMAIN IP Unchanged, to update anyway use flag -f true" 134 | exit 0 135 | fi 136 | 137 | # Get zone_identifier & record_identifier 138 | ID_FILE=$SAVEPATH/.cf-id_$CFRECORD_NAME.txt 139 | if [ -f $ID_FILE ] && [ $(wc -l $ID_FILE | cut -d " " -f 1) == 4 ] && 140 | [ "$(sed -n '3,1p' "$ID_FILE")" == "$CFZONE_NAME" ] && 141 | [ "$(sed -n '4,1p' "$ID_FILE")" == "$CFRECORD_NAME" ]; then 142 | CFZONE_ID=$(sed -n '1,1p' "$ID_FILE") 143 | CFRECORD_ID=$(sed -n '2,1p' "$ID_FILE") 144 | else 145 | echo "Updating zone_identifier & record_identifier" 146 | CFZONE_ID=$(curl -s -X GET "https://api.cloudflare.com/client/v4/zones?name=$CFZONE_NAME" -H "X-Auth-Email: $CFUSER" -H "X-Auth-Key: $CFKEY" -H "Content-Type: application/json" | grep -Po '(?<="id":")[^"]*' | head -1) 147 | CFRECORD_ID=$(curl -s -X GET "https://api.cloudflare.com/client/v4/zones/$CFZONE_ID/dns_records?name=$CFRECORD_NAME" -H "X-Auth-Email: $CFUSER" -H "X-Auth-Key: $CFKEY" -H "Content-Type: application/json" | grep -Po '(?<="id":")[^"]*' | head -1) 148 | echo "$CFZONE_ID" >$ID_FILE 149 | echo "$CFRECORD_ID" >>$ID_FILE 150 | echo "$CFZONE_NAME" >>$ID_FILE 151 | echo "$CFRECORD_NAME" >>$ID_FILE 152 | fi 153 | 154 | # If DOMAIN is changed, update cloudflare 155 | echo "Updating DNS to $DOMAIN_IP" 156 | 157 | RESPONSE=$(curl -s -X PUT "https://api.cloudflare.com/client/v4/zones/$CFZONE_ID/dns_records/$CFRECORD_ID" \ 158 | -H "X-Auth-Email: $CFUSER" \ 159 | -H "X-Auth-Key: $CFKEY" \ 160 | -H "Content-Type: application/json" \ 161 | --data "{\"id\":\"$CFZONE_ID\",\"type\":\"$CFRECORD_TYPE\",\"name\":\"$CFRECORD_NAME\",\"content\":\"$DOMAIN_IP\", \"ttl\":$CFTTL}") 162 | 163 | if [ "$RESPONSE" != "${RESPONSE%success*}" ] && [ "$(echo $RESPONSE | grep "\"success\":true")" != "" ]; then 164 | echo "Updated succesfuly!" 165 | echo $DOMAIN_IP >$DOMAIN_IP_FILE 166 | exit 167 | else 168 | echo 'Something went wrong :(' 169 | echo "Response: $RESPONSE" 170 | exit 1 171 | fi 172 | -------------------------------------------------------------------------------- /mtab-import/bing-wp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2025/07/17 5 | # @Desc : mTab必应壁纸导入脚本,设置必应壁纸分类为2 6 | import re 7 | import time 8 | from datetime import datetime 9 | 10 | import requests 11 | 12 | # API请求基础URL 13 | BASE_URL = "https://api.codelife.cc/bing/list?lang=cn" 14 | 15 | # 设置请求头 16 | headers = { 17 | 'Origin': 'https://go.itab.link', 18 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' 19 | } 20 | 21 | 22 | def process_url(url): 23 | """处理壁纸URL,移除冗余参数并统一分辨率""" 24 | if not url: 25 | return url 26 | 27 | # 按顺序处理URL 28 | url = re.sub(r'&rf=[^&]*&pid=hp', '', url) # 移除广告参数 29 | url = url.replace('www4.bing.com', 'www.bing.com') # 统一域名 30 | url = re.sub(r'(?= max_wallpapers: 96 | print(f"已获取{len(wallpapers)}张壁纸,达到最大限制") 97 | break 98 | 99 | print(f"第{page}页: 成功处理 {processed_count}/{len(wallpaper_list)} 条数据") 100 | print(f"累计已获取: {len(wallpapers)} 张壁纸") 101 | 102 | # 检查是否需要继续 103 | if len(wallpapers) >= max_wallpapers: 104 | break 105 | 106 | page += 1 107 | time.sleep(1) # 控制爬取频率 108 | 109 | except Exception as e: 110 | print(f"发生致命错误,终止爬取: {e}") 111 | 112 | # 自动增补逻辑:如果去重后数量不足,继续爬取更多页面 113 | if len(wallpapers) < max_wallpapers: 114 | print(f"去重后壁纸数量不足,需要增补 {max_wallpapers - len(wallpapers)} 张") 115 | 116 | try: 117 | while len(wallpapers) < max_wallpapers: 118 | print(f"\n=== 正在增补数据,请求第{page}页 ===") 119 | wallpaper_list = fetch_wallpapers(page, page_size) 120 | 121 | processed_count = 0 122 | for wallpaper in wallpaper_list: 123 | thumb = wallpaper.get("thumb", "") 124 | raw = wallpaper.get("raw", "") 125 | 126 | thumb = process_url(thumb) 127 | raw = process_url(raw) 128 | 129 | if thumb and raw and (raw not in processed_urls) and (thumb not in processed_urls): 130 | wallpapers.append({"raw": raw, "thumb": thumb}) 131 | processed_urls.add(raw) 132 | processed_urls.add(thumb) 133 | processed_count += 1 134 | 135 | if len(wallpapers) >= max_wallpapers: 136 | print(f"已获取{len(wallpapers)}张壁纸,达到最大限制") 137 | break 138 | 139 | print(f"第{page}页: 成功增补 {processed_count}/{len(wallpaper_list)} 条数据") 140 | print(f"累计已获取: {len(wallpapers)} 张壁纸") 141 | 142 | if len(wallpapers) >= max_wallpapers: 143 | break 144 | 145 | page += 1 146 | time.sleep(1) 147 | 148 | except Exception as e: 149 | print(f"增补过程中发生错误: {e}") 150 | 151 | # 生成SQL文件 152 | if wallpapers: 153 | create_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 154 | filename = f"bing_wallpapers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.sql" 155 | 156 | with open(filename, "w", encoding="utf-8") as f: 157 | f.write(f"-- 必应壁纸SQL插入语句\n-- 生成时间: {create_time}\n\n") 158 | 159 | for wallpaper in wallpapers: 160 | raw = wallpaper["raw"].replace("'", "''") 161 | thumb = wallpaper["thumb"].replace("'", "''") 162 | sql = (f"INSERT INTO `mtab`.`wallpaper` " 163 | f"(`type`, `folder`, `mime`, `url`, `cover`, `create_time`, `name`, `sort`) " 164 | f"VALUES (0, 2, 0, '{raw}', '{thumb}', '2025-01-01 00:00:00', NULL, 999);\n") 165 | f.write(sql) 166 | 167 | print(f"\n成功生成SQL文件: {filename},共{len(wallpapers)}条记录") 168 | else: 169 | print("\n未获取到任何壁纸数据") 170 | 171 | 172 | if __name__ == "__main__": 173 | main() 174 | -------------------------------------------------------------------------------- /wallpaper-dl/image_uploader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2025/08/09 5 | # @Desc : 图片批量上传(黑猫图床) 6 | import os 7 | import threading 8 | import time 9 | from datetime import datetime 10 | from queue import Queue 11 | from threading import Lock 12 | 13 | import requests 14 | 15 | # ================= 配置参数 - 所有配置项集中在此处 ================= 16 | TARGET_DIRECTORY = "D:/DL/动物萌宠" # 硬编码的目标图片目录路径 17 | IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif', '.bmp'] # 支持的图片格式 18 | MAX_RETRIES = 2 # 最大重试次数 19 | SUCCESS_URLS = 'success_urls.txt' # 成功上传的URL列表 20 | THREAD_COUNT = 4 # 线程数量,可根据需要调整 21 | ALBUM_ID = 'zB7u' # 相册ID album_id 22 | API_URL = 'https://img.hmvod.cc/json' # API地址 23 | AUTH_TOKEN = '你的Token' # 认证令牌 24 | 25 | # API请求头 26 | headers = { 27 | 'Cookie': '你的Cookie' 28 | } 29 | # ================================================================== 30 | 31 | # 线程安全的数据结构和锁 32 | success_urls = [] 33 | processed_count = 0 # 已处理文件数量 34 | total_count = 0 # 总文件数量 35 | results_lock = Lock() # 用于保护共享数据的锁 36 | 37 | 38 | def get_current_timestamp(): 39 | """获取当前时间戳(毫秒)""" 40 | return int(datetime.now().timestamp() * 1000) 41 | 42 | 43 | def is_image_file(filename): 44 | """判断文件是否为图片""" 45 | ext = os.path.splitext(filename)[1].lower() 46 | return ext in IMAGE_EXTENSIONS 47 | 48 | 49 | def upload_image(file_path): 50 | """上传单个图片到API""" 51 | try: 52 | # 准备表单数据 53 | form_data = { 54 | 'type': 'file', 55 | 'action': 'upload', 56 | 'timestamp': str(get_current_timestamp()), 57 | 'auth_token': AUTH_TOKEN, 58 | 'expiration': '', 59 | 'nsfw': '', 60 | 'album_id': ALBUM_ID, # 使用提取出的配置项 61 | 'mimetype': f'image/{os.path.splitext(file_path)[1][1:].lower()}' # 提取文件扩展名作为mimetype 62 | } 63 | 64 | # 准备文件数据 65 | files = { 66 | 'source': (os.path.basename(file_path), open(file_path, 'rb'), form_data['mimetype']) 67 | } 68 | 69 | # 发送请求 70 | response = requests.post( 71 | API_URL, # 使用配置的API地址 72 | headers=headers, 73 | data=form_data, 74 | files=files, 75 | timeout=30 76 | ) 77 | 78 | # 解析响应 79 | response_json = response.json() 80 | 81 | # 提取结果信息 82 | result = { 83 | 'status_code': response.status_code, 84 | 'response': response.text 85 | } 86 | 87 | # 处理成功响应 88 | if response.status_code == 200: 89 | # 从成功响应中提取image url 90 | result['url'] = response_json.get('image', {}).get('url') 91 | else: 92 | # 处理错误响应 93 | result['error'] = response_json.get('error', {}).get('message', '未知错误') 94 | 95 | return result 96 | 97 | except Exception as e: 98 | return { 99 | 'status_code': None, 100 | 'url': None, 101 | 'error': str(e) 102 | } 103 | finally: 104 | # 确保文件被关闭 105 | if 'files' in locals() and 'source' in files: 106 | files['source'][1].close() 107 | 108 | 109 | def worker(queue): 110 | """线程工作函数,处理队列中的文件上传任务""" 111 | global processed_count 112 | while not queue.empty(): 113 | file_path = queue.get() 114 | try: 115 | # 上传文件,带重试机制 116 | result = None 117 | for attempt in range(MAX_RETRIES + 1): 118 | result = upload_image(file_path) 119 | 120 | if result['status_code'] == 200: 121 | print(f"上传成功 (尝试 {attempt + 1}/{MAX_RETRIES + 1}) - {os.path.basename(file_path)}") 122 | 123 | # 线程安全地更新共享数据 124 | with results_lock: 125 | success_urls.append(result['url']) 126 | break 127 | else: 128 | error_msg = result.get('error', f"状态码: {result['status_code']}") 129 | print( 130 | f"上传失败 (尝试 {attempt + 1}/{MAX_RETRIES + 1}) - {os.path.basename(file_path)}: {error_msg}") 131 | if attempt < MAX_RETRIES: 132 | time.sleep(2) # 重试前等待2秒 133 | 134 | # 更新处理计数并显示进度 135 | with results_lock: 136 | processed_count += 1 137 | progress = (processed_count / total_count) * 100 138 | print(f"进度: {processed_count}/{total_count} ({progress:.1f}%)") 139 | 140 | except Exception as e: 141 | print(f"处理 {os.path.basename(file_path)} 时出错: {str(e)}") 142 | finally: 143 | queue.task_done() 144 | 145 | 146 | def process_directory(): 147 | """处理目录下的所有图片文件,使用多线程上传""" 148 | global total_count, processed_count 149 | 150 | # 检查目录是否存在 151 | if not os.path.isdir(TARGET_DIRECTORY): 152 | print(f"错误: 目录 '{TARGET_DIRECTORY}' 不存在") 153 | return 154 | 155 | # 收集所有图片文件路径 156 | image_files = [] 157 | for root, dirs, files in os.walk(TARGET_DIRECTORY): 158 | for file in files: 159 | if is_image_file(file): 160 | file_path = os.path.join(root, file) 161 | image_files.append(file_path) 162 | 163 | total_count = len(image_files) 164 | processed_count = 0 165 | 166 | print(f"发现 {total_count} 个图片文件,准备上传...") 167 | if not image_files: 168 | print("没有找到图片文件,程序退出。") 169 | return 170 | 171 | # 创建任务队列 172 | queue = Queue() 173 | for file_path in image_files: 174 | queue.put(file_path) 175 | 176 | # 创建并启动线程 177 | threads = [] 178 | for i in range(THREAD_COUNT): 179 | thread = threading.Thread(target=worker, args=(queue,), name=f"Thread-{i + 1}") 180 | threads.append(thread) 181 | thread.start() 182 | 183 | # 等待所有任务完成 184 | queue.join() 185 | 186 | # 等待所有线程结束 187 | for thread in threads: 188 | thread.join() 189 | 190 | # 保存成功的URL到TXT文件(只保存URL) 191 | with open(SUCCESS_URLS, 'w', encoding='utf-8') as f: 192 | for url in success_urls: 193 | f.write(f"{url}\n") 194 | 195 | print(f"\n处理完成") 196 | print(f"成功上传的URL已保存到 {SUCCESS_URLS}") 197 | print(f"成功上传: {len(success_urls)} 个文件") 198 | print(f"上传失败: {total_count - len(success_urls)} 个文件") 199 | 200 | 201 | if __name__ == "__main__": 202 | process_directory() 203 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /mail-api/PHPMailer/POP3.php: -------------------------------------------------------------------------------- 1 | 10 | * @author Jim Jagielski (jimjag) 11 | * @author Andy Prevost (codeworxtech) 12 | * @author Brent R. Matzelle (original founder) 13 | * @copyright 2012 - 2020 Marcus Bointon 14 | * @copyright 2010 - 2012 Jim Jagielski 15 | * @copyright 2004 - 2009 Andy Prevost 16 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License 17 | * @note This program is distributed in the hope that it will be useful - WITHOUT 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 | * FITNESS FOR A PARTICULAR PURPOSE. 20 | */ 21 | 22 | namespace PHPMailer\PHPMailer; 23 | 24 | /** 25 | * PHPMailer POP-Before-SMTP Authentication Class. 26 | * Specifically for PHPMailer to use for RFC1939 POP-before-SMTP authentication. 27 | * 1) This class does not support APOP authentication. 28 | * 2) Opening and closing lots of POP3 connections can be quite slow. If you need 29 | * to send a batch of emails then just perform the authentication once at the start, 30 | * and then loop through your mail sending script. Providing this process doesn't 31 | * take longer than the verification period lasts on your POP3 server, you should be fine. 32 | * 3) This is really ancient technology; you should only need to use it to talk to very old systems. 33 | * 4) This POP3 class is deliberately lightweight and incomplete, implementing just 34 | * enough to do authentication. 35 | * If you want a more complete class there are other POP3 classes for PHP available. 36 | * 37 | * @author Richard Davey (original author) 38 | * @author Marcus Bointon (Synchro/coolbru) 39 | * @author Jim Jagielski (jimjag) 40 | * @author Andy Prevost (codeworxtech) 41 | */ 42 | class POP3 43 | { 44 | /** 45 | * The POP3 PHPMailer Version number. 46 | * 47 | * @var string 48 | */ 49 | const VERSION = '6.6.0'; 50 | 51 | /** 52 | * Default POP3 port number. 53 | * 54 | * @var int 55 | */ 56 | const DEFAULT_PORT = 110; 57 | 58 | /** 59 | * Default timeout in seconds. 60 | * 61 | * @var int 62 | */ 63 | const DEFAULT_TIMEOUT = 30; 64 | 65 | /** 66 | * POP3 class debug output mode. 67 | * Debug output level. 68 | * Options: 69 | * @see POP3::DEBUG_OFF: No output 70 | * @see POP3::DEBUG_SERVER: Server messages, connection/server errors 71 | * @see POP3::DEBUG_CLIENT: Client and Server messages, connection/server errors 72 | * 73 | * @var int 74 | */ 75 | public $do_debug = self::DEBUG_OFF; 76 | 77 | /** 78 | * POP3 mail server hostname. 79 | * 80 | * @var string 81 | */ 82 | public $host; 83 | 84 | /** 85 | * POP3 port number. 86 | * 87 | * @var int 88 | */ 89 | public $port; 90 | 91 | /** 92 | * POP3 Timeout Value in seconds. 93 | * 94 | * @var int 95 | */ 96 | public $tval; 97 | 98 | /** 99 | * POP3 username. 100 | * 101 | * @var string 102 | */ 103 | public $username; 104 | 105 | /** 106 | * POP3 password. 107 | * 108 | * @var string 109 | */ 110 | public $password; 111 | 112 | /** 113 | * Resource handle for the POP3 connection socket. 114 | * 115 | * @var resource 116 | */ 117 | protected $pop_conn; 118 | 119 | /** 120 | * Are we connected? 121 | * 122 | * @var bool 123 | */ 124 | protected $connected = false; 125 | 126 | /** 127 | * Error container. 128 | * 129 | * @var array 130 | */ 131 | protected $errors = []; 132 | 133 | /** 134 | * Line break constant. 135 | */ 136 | const LE = "\r\n"; 137 | 138 | /** 139 | * Debug level for no output. 140 | * 141 | * @var int 142 | */ 143 | const DEBUG_OFF = 0; 144 | 145 | /** 146 | * Debug level to show server -> client messages 147 | * also shows clients connection errors or errors from server 148 | * 149 | * @var int 150 | */ 151 | const DEBUG_SERVER = 1; 152 | 153 | /** 154 | * Debug level to show client -> server and server -> client messages. 155 | * 156 | * @var int 157 | */ 158 | const DEBUG_CLIENT = 2; 159 | 160 | /** 161 | * Simple static wrapper for all-in-one POP before SMTP. 162 | * 163 | * @param string $host The hostname to connect to 164 | * @param int|bool $port The port number to connect to 165 | * @param int|bool $timeout The timeout value 166 | * @param string $username 167 | * @param string $password 168 | * @param int $debug_level 169 | * 170 | * @return bool 171 | */ 172 | public static function popBeforeSmtp( 173 | $host, 174 | $port = false, 175 | $timeout = false, 176 | $username = '', 177 | $password = '', 178 | $debug_level = 0 179 | ) { 180 | $pop = new self(); 181 | 182 | return $pop->authorise($host, $port, $timeout, $username, $password, $debug_level); 183 | } 184 | 185 | /** 186 | * Authenticate with a POP3 server. 187 | * A connect, login, disconnect sequence 188 | * appropriate for POP-before SMTP authorisation. 189 | * 190 | * @param string $host The hostname to connect to 191 | * @param int|bool $port The port number to connect to 192 | * @param int|bool $timeout The timeout value 193 | * @param string $username 194 | * @param string $password 195 | * @param int $debug_level 196 | * 197 | * @return bool 198 | */ 199 | public function authorise($host, $port = false, $timeout = false, $username = '', $password = '', $debug_level = 0) 200 | { 201 | $this->host = $host; 202 | //If no port value provided, use default 203 | if (false === $port) { 204 | $this->port = static::DEFAULT_PORT; 205 | } else { 206 | $this->port = (int) $port; 207 | } 208 | //If no timeout value provided, use default 209 | if (false === $timeout) { 210 | $this->tval = static::DEFAULT_TIMEOUT; 211 | } else { 212 | $this->tval = (int) $timeout; 213 | } 214 | $this->do_debug = $debug_level; 215 | $this->username = $username; 216 | $this->password = $password; 217 | //Reset the error log 218 | $this->errors = []; 219 | //Connect 220 | $result = $this->connect($this->host, $this->port, $this->tval); 221 | if ($result) { 222 | $login_result = $this->login($this->username, $this->password); 223 | if ($login_result) { 224 | $this->disconnect(); 225 | 226 | return true; 227 | } 228 | } 229 | //We need to disconnect regardless of whether the login succeeded 230 | $this->disconnect(); 231 | 232 | return false; 233 | } 234 | 235 | /** 236 | * Connect to a POP3 server. 237 | * 238 | * @param string $host 239 | * @param int|bool $port 240 | * @param int $tval 241 | * 242 | * @return bool 243 | */ 244 | public function connect($host, $port = false, $tval = 30) 245 | { 246 | //Are we already connected? 247 | if ($this->connected) { 248 | return true; 249 | } 250 | 251 | //On Windows this will raise a PHP Warning error if the hostname doesn't exist. 252 | //Rather than suppress it with @fsockopen, capture it cleanly instead 253 | set_error_handler([$this, 'catchWarning']); 254 | 255 | if (false === $port) { 256 | $port = static::DEFAULT_PORT; 257 | } 258 | 259 | //Connect to the POP3 server 260 | $errno = 0; 261 | $errstr = ''; 262 | $this->pop_conn = fsockopen( 263 | $host, //POP3 Host 264 | $port, //Port # 265 | $errno, //Error Number 266 | $errstr, //Error Message 267 | $tval 268 | ); //Timeout (seconds) 269 | //Restore the error handler 270 | restore_error_handler(); 271 | 272 | //Did we connect? 273 | if (false === $this->pop_conn) { 274 | //It would appear not... 275 | $this->setError( 276 | "Failed to connect to server $host on port $port. errno: $errno; errstr: $errstr" 277 | ); 278 | 279 | return false; 280 | } 281 | 282 | //Increase the stream time-out 283 | stream_set_timeout($this->pop_conn, $tval, 0); 284 | 285 | //Get the POP3 server response 286 | $pop3_response = $this->getResponse(); 287 | //Check for the +OK 288 | if ($this->checkResponse($pop3_response)) { 289 | //The connection is established and the POP3 server is talking 290 | $this->connected = true; 291 | 292 | return true; 293 | } 294 | 295 | return false; 296 | } 297 | 298 | /** 299 | * Log in to the POP3 server. 300 | * Does not support APOP (RFC 2828, 4949). 301 | * 302 | * @param string $username 303 | * @param string $password 304 | * 305 | * @return bool 306 | */ 307 | public function login($username = '', $password = '') 308 | { 309 | if (!$this->connected) { 310 | $this->setError('Not connected to POP3 server'); 311 | return false; 312 | } 313 | if (empty($username)) { 314 | $username = $this->username; 315 | } 316 | if (empty($password)) { 317 | $password = $this->password; 318 | } 319 | 320 | //Send the Username 321 | $this->sendString("USER $username" . static::LE); 322 | $pop3_response = $this->getResponse(); 323 | if ($this->checkResponse($pop3_response)) { 324 | //Send the Password 325 | $this->sendString("PASS $password" . static::LE); 326 | $pop3_response = $this->getResponse(); 327 | if ($this->checkResponse($pop3_response)) { 328 | return true; 329 | } 330 | } 331 | 332 | return false; 333 | } 334 | 335 | /** 336 | * Disconnect from the POP3 server. 337 | */ 338 | public function disconnect() 339 | { 340 | $this->sendString('QUIT'); 341 | 342 | // RFC 1939 shows POP3 server sending a +OK response to the QUIT command. 343 | // Try to get it. Ignore any failures here. 344 | try { 345 | $this->getResponse(); 346 | } catch (Exception $e) { 347 | //Do nothing 348 | } 349 | 350 | //The QUIT command may cause the daemon to exit, which will kill our connection 351 | //So ignore errors here 352 | try { 353 | @fclose($this->pop_conn); 354 | } catch (Exception $e) { 355 | //Do nothing 356 | } 357 | 358 | // Clean up attributes. 359 | $this->connected = false; 360 | $this->pop_conn = false; 361 | } 362 | 363 | /** 364 | * Get a response from the POP3 server. 365 | * 366 | * @param int $size The maximum number of bytes to retrieve 367 | * 368 | * @return string 369 | */ 370 | protected function getResponse($size = 128) 371 | { 372 | $response = fgets($this->pop_conn, $size); 373 | if ($this->do_debug >= self::DEBUG_SERVER) { 374 | echo 'Server -> Client: ', $response; 375 | } 376 | 377 | return $response; 378 | } 379 | 380 | /** 381 | * Send raw data to the POP3 server. 382 | * 383 | * @param string $string 384 | * 385 | * @return int 386 | */ 387 | protected function sendString($string) 388 | { 389 | if ($this->pop_conn) { 390 | if ($this->do_debug >= self::DEBUG_CLIENT) { //Show client messages when debug >= 2 391 | echo 'Client -> Server: ', $string; 392 | } 393 | 394 | return fwrite($this->pop_conn, $string, strlen($string)); 395 | } 396 | 397 | return 0; 398 | } 399 | 400 | /** 401 | * Checks the POP3 server response. 402 | * Looks for for +OK or -ERR. 403 | * 404 | * @param string $string 405 | * 406 | * @return bool 407 | */ 408 | protected function checkResponse($string) 409 | { 410 | if (strpos($string, '+OK') !== 0) { 411 | $this->setError("Server reported an error: $string"); 412 | 413 | return false; 414 | } 415 | 416 | return true; 417 | } 418 | 419 | /** 420 | * Add an error to the internal error store. 421 | * Also display debug output if it's enabled. 422 | * 423 | * @param string $error 424 | */ 425 | protected function setError($error) 426 | { 427 | $this->errors[] = $error; 428 | if ($this->do_debug >= self::DEBUG_SERVER) { 429 | echo '
';
430 |             foreach ($this->errors as $e) {
431 |                 print_r($e);
432 |             }
433 |             echo '
'; 434 | } 435 | } 436 | 437 | /** 438 | * Get an array of error messages, if any. 439 | * 440 | * @return array 441 | */ 442 | public function getErrors() 443 | { 444 | return $this->errors; 445 | } 446 | 447 | /** 448 | * POP3 connection error handler. 449 | * 450 | * @param int $errno 451 | * @param string $errstr 452 | * @param string $errfile 453 | * @param int $errline 454 | */ 455 | protected function catchWarning($errno, $errstr, $errfile, $errline) 456 | { 457 | $this->setError( 458 | 'Connecting to the POP3 server raised a PHP warning:' . 459 | "errno: $errno errstr: $errstr; errfile: $errfile; errline: $errline" 460 | ); 461 | } 462 | } 463 | -------------------------------------------------------------------------------- /maccms-tool/config.yml: -------------------------------------------------------------------------------- 1 | # 数据库连接信息 2 | db: 3 | host: 10.0.0.11 4 | database: demo 5 | user: demo 6 | password: b26ZMnAAZMjjsmMc 7 | # 处理数量,'all' = 全部更新 8 | num: all 9 | # 替换词库 10 | word: 11 | class: # 扩展分类 12 | - { "7-12岁": "少儿,儿童", 13 | "13-17岁": "少年", 14 | "18岁及以上": "成人", 15 | "Hong": "", 16 | "Kong": "", 17 | "ITV": "电视,卫视", 18 | "LOLI": "萝莉", 19 | "VIP尊享": "", 20 | "VIP集": "剧集,剧情", 21 | "VIP": "", 22 | "中国动漫": "国创", 23 | "中国台湾": "台湾", 24 | "中国大陆": "内地", 25 | "中国香港": "香港", 26 | "中国": "国创", 27 | "儿歌精选": "儿歌,精选", 28 | "儿童搞笑": "儿童,搞笑", 29 | "儿童教育": "儿童,教育", 30 | "其他动漫": "其他,动漫", 31 | "其他综艺": "其他,综艺", 32 | "其它": "其他", 33 | "内地剧场": "内地,剧情", 34 | "动态漫": "动态漫画", 35 | "动态漫画画": "动态漫画", 36 | "动漫电影": "动漫,电影", 37 | "动漫音乐": "动漫,音乐", 38 | "动画电影": "动画,动漫,电影", 39 | "医疗健康": "医疗,健康", 40 | "卡通动漫": "卡通,动漫", 41 | "卫视剧": "电视,卫视", 42 | "即兴喜剧": "即兴,喜剧", 43 | "历史人文": "历史,人文", 44 | "历史文化": "历史,文化", 45 | "历史革命": "历史,革命", 46 | "反腐扫黑": "反腐,扫黑", 47 | "台剧": "台湾,剧集", 48 | "台湾剧": "台湾,剧集", 49 | "合作活动": "合作,活动", 50 | "启蒙英语": "启蒙,英语,学习,幼教", 51 | "唱唱跳跳": "唱跳,舞蹈", 52 | "国产综艺": "内地,综艺", 53 | "国产动漫": "国创,动漫", 54 | "国产动画": "国创,动画", 55 | "国产剧": "内地", 56 | "国产": "内地", 57 | "国学精粹": "国学,精粹", 58 | "国家地理": "地理", 59 | "外语学习": "外语,学习", 60 | "大陆综艺": "内地,综艺", 61 | "天津卫视": "天津,卫视,电视", 62 | "太空宇宙": "太空,宇宙", 63 | "女孩爱看": "女孩", 64 | "娱乐节目": "娱乐,明星", 65 | "婚恋情感": "婚恋,情感", 66 | "学前教育": "早教,教育,学习,幼儿", 67 | "学英语": "学习,英语", 68 | "安徽卫视": "安徽,卫视,电视", 69 | "幼儿认知": "早教,教育,学习,幼儿", 70 | "幼小教育": "早教,教育,学习,幼儿", 71 | "幽默集锦": "幽默,集锦,搞笑", 72 | "强档热播": "热播,院线", 73 | "情感交友": "情感,交友", 74 | "情景喜剧": "情景,喜剧,搞笑", 75 | "情景喜": "情景,喜剧,搞笑", 76 | "手工绘画": "手工,绘画", 77 | "抗疫救灾": "抗疫,救灾", 78 | "搜狐视频大视野": "搜狐出品", 79 | "搞笑幽默": "搞笑,幽默", 80 | "播报-专访": "播报,专访", 81 | "播报-出品": "播报,出品", 82 | "播报-明星": "播报,明星,娱乐", 83 | "播报-独家": "播报,独家", 84 | "播报-现场": "播报,现场", 85 | "收藏鉴宝": "收藏,鉴宝", 86 | "文化艺术": "文化,艺术", 87 | "日常生活": "日常,生活", 88 | "日本动漫": "日本,番剧", 89 | "日本动画": "日本,番剧,动画", 90 | "日本综艺": "日本,综艺", 91 | "日漫": "日本,番剧", 92 | "早教益智": "早教,益智,教育,学习", 93 | "明星八卦": "明星,八卦,娱乐", 94 | "明星访谈": "明星,访谈", 95 | "极限运动": "极限,运动,体育", 96 | "棚内真人秀": "真人秀", 97 | "欧美剧": "欧美,剧集", 98 | "欧美动漫": "欧美,动漫", 99 | "欧美动画": "欧美,动画", 100 | "欧美综艺": "欧美,综艺", 101 | "武术散打": "武术,散打", 102 | "母婴护理": "母婴,护理", 103 | "民族音乐": "音乐,民族音乐", 104 | "流行音乐": "音乐,流行,流行音乐", 105 | "浙江卫视": "浙江,卫视,电视", 106 | "港剧": "香港,剧集", 107 | "港台动漫": "香港,台湾,动漫", 108 | "港台综艺": "香港,台湾,综艺", 109 | "港台": "香港,台湾", 110 | "港澳剧": "香港,澳门,剧集", 111 | "港澳": "香港,澳门", 112 | "游戏改编": "游戏改", 113 | "游戏竞技": "游戏,竞技", 114 | "演唱会": "音乐,演唱会", 115 | "漫画改编": "漫画改", 116 | "潮流文化": "潮流,文化,流行", 117 | "热门综艺": "热门,综艺", 118 | "父母课堂": "父母,学习,课堂,教育", 119 | "瑞士Switzerland": "瑞士", 120 | "Switzerland": "瑞士", 121 | "生活娱乐": "生活,娱乐", 122 | "生活技巧": "生活,技巧", 123 | "生活服务": "生活,服务", 124 | "生活消费": "生活,消费", 125 | "生活百科": "生活,百科", 126 | "电视剧": "剧集", 127 | "电音": "电音,音乐", 128 | "男孩爱看": "男孩", 129 | "相声小品": "相声,小品", 130 | "真人特摄": "真人,特摄", 131 | "社会题材": "社会", 132 | "竖短片": "竖屏,短片", 133 | "竖短": "竖屏,短片", 134 | "童话绘本": "童话,绘本", 135 | "篮球": "篮球,体育,运动", 136 | "精选短": "精选,短片", 137 | "纪录片": "记录", 138 | "绘画手工": "绘画,手工", 139 | "网络剧": "网络,剧集,网剧", 140 | "网络游戏": "网络,游戏", 141 | "网络电影": "网络,电影", 142 | "罪案": "犯罪,凶案,罪案", 143 | "美少女": "美女,少女,美少女", 144 | "美食教学": "美食,教学", 145 | "美食文化": "美食,文化", 146 | "美食旅游": "美食,旅游", 147 | "职业技能": "职业,技能", 148 | "自制节目": "自制", 149 | "自然科学": "自然,科学", 150 | "西班牙SPain": "西班牙", 151 | "记录片": "记录", 152 | "语言表达": "语言,表达", 153 | "课堂知识": "课堂,知识,学习", 154 | "超级网剧": "网络,剧集,网剧", 155 | "轻小说改编": "轻小说,小说改", 156 | "韩国动漫": "韩国,动漫", 157 | "韩国动画": "韩国,动画", 158 | "韩国综艺": "韩国,综艺", 159 | "音乐亚洲": "音乐,亚洲", 160 | "音乐剧": "音乐,剧集", 161 | "预告&剧八卦": "预告,剧集,八卦" } 162 | area: # 地区 163 | - { ",": "", 164 | "(": "", 165 | ")": "", 166 | "/": ",", 167 | " ": "", 168 | ":中国大陆": "内地", 169 | "马来西亚Malaysia": "马来西亚", 170 | "馬來西亞Malay": "马来西亚", 171 | "马拉西亚": "马来西亚", 172 | "马来西": "马来西亚", 173 | "马来西亚亚": "马来西亚", 174 | "超级飞侠每一集飞往世界各地不同的城市或地": "其他", 175 | "电视剧以改革初期陕北地区的城乡生活为时空": "其他", 176 | "《果味香村》以黄桃、蜜瓜、刺梨、苹果、蜜": "其他", 177 | "在西南地区的一个刚刚脱贫的小村落——高石": "其他", 178 | "《我是冒险王》是青海卫视的一档探险栏目": "其他", 179 | "大型活动《芒果新童星》关注贫困地区儿童": "其他", 180 | "以日本东北地区和东京为舞台描写了命运悲": "其他", 181 | "刘昴星(小当家)是史上最年轻的通过中国": "日本", 182 | "记录了7个少数民族的当下生活体现的是少": "其他", 183 | "《坐庄2操盘手》是王珈执导的悬疑犯罪片": "其他", 184 | "清朝道光年间皇家御用烧锅“同盛金”埋藏": "其他", 185 | "适逢改革开放40周年美国格律文化传媒集": "其他", 186 | "以六盘山为切入点用鲜活的故事充分展示": "其他", 187 | "《不老乡音第二季》穿行湘西大地用镜头": "其他", 188 | "7月瓜果飘香是大力开展农产品销售和迎": "其他", 189 | "北京电影制片厂年出品": "北京", 190 | "蘇聯": "苏联", 191 | "菲律宾Philippines": "菲律宾", 192 | "荷兰Netherlands": "荷兰", 193 | "芬兰Finland": "芬兰", 194 | "美國USA": "美国", 195 | "美国南非加拿大": "美国,南非,加拿大", 196 | "美国、英国、德国": "美国,英国,德国", 197 | "瑞士Switzerland": "瑞士", 198 | "瑞典Sweden": "瑞典", 199 | "澳大利亚Australia": "澳大利亚", 200 | "港台": "香港,台湾", 201 | "泰國": "泰国", 202 | "法国德国日本": "法国,德国,日本", 203 | "沙特阿拉伯SaudiAra": "沙特阿拉伯", 204 | "比利时Belgium": "比利时", 205 | "比利": "比利时", 206 | "比利时时": "比利时", 207 | "英语": "其他", 208 | "皆可": "其他", 209 | "波黑": "波斯尼亚,黑塞哥维那", 210 | "欧美地区": "欧美", 211 | "欧美其他": "欧美,其他", 212 | "未知": "其他", 213 | "智利Chile": "智利", 214 | "日韩地区": "日本,韩国", 215 | "日韩": "日本,韩国", 216 | "日本日本剧": "日本", 217 | "新马": "新加坡,马来西亚", 218 | "新加坡美国": "新加坡,美国", 219 | "新加坡Singapore": "新加坡", 220 | "摩纳": "摩纳哥", 221 | "摩纳哥哥": "摩纳哥", 222 | "捷克斯洛伐克Czechoslovaki": "捷克斯洛伐克", 223 | "捷克美国": "捷克,美国", 224 | "德國": "德国", 225 | "德语": "德国", 226 | "意大": "意大利", 227 | "意大利利": "意大利", 228 | "巴勒斯坦被占领区": "巴勒斯坦", 229 | "委内瑞拉Venezuela": "委内瑞拉", 230 | "国外": "其他", 231 | "塞尔维": "塞尔维亚", 232 | "塞尔维亚亚": "塞尔维亚", 233 | "埃塞俄比亚Ethi": "埃塞俄比亚", 234 | "土耳其Turkey": "土耳其", 235 | "Turkey": "土耳其", 236 | "土耳": "土耳其", 237 | "土耳其其": "土耳其", 238 | "哈萨克斯": "哈萨克斯坦", 239 | "哈萨克斯坦坦": "哈萨克斯坦", 240 | "台灣Taiwan": "台湾", 241 | "古巴Cuba": "古巴", 242 | "叙利亚Syria": "叙利亚", 243 | "印度Indian": "印度", 244 | "印度India": "印度", 245 | "India": "印度", 246 | "印尼Indonesia": "印尼", 247 | "北京": "内地", 248 | "匈牙利Hungary": "匈牙利", 249 | "动漫": "日本", 250 | "加拿大Canada": "加拿大", 251 | "利比": "利比里亚", 252 | "利比里亚里亚": "利比里亚", 253 | "其它": "其他", 254 | "俄罗斯哈萨克斯坦": "俄罗斯,哈萨克斯坦", 255 | "俄罗斯Russia": "俄罗斯", 256 | "俄国Russia": "俄国", 257 | "中国香港": "香港", 258 | "中国香": "香港", 259 | "中国澳门": "澳门", 260 | "中国大陆法国": "内地,法国", 261 | "中国大陆": "内地", 262 | "中国内地": "内地", 263 | "中国大": "内地", 264 | "中国台湾": "台湾", 265 | "不详": "其他", 266 | "中国": "内地", 267 | "大陆": "内地", 268 | "USA": "美国", 269 | "UK": "英国", 270 | "U.S.A": "美国", 271 | "SouthAfrica": "南非", 272 | "NZ": "荷兰", 273 | "Mexico": "墨西哥", 274 | "Germany": "德国", 275 | "Denmark": "丹麦", 276 | "Canada加拿大": "加拿大", 277 | "Canada": "加拿大", 278 | "Australia": "澳大利亚", 279 | "Switzerland": "瑞士" } 280 | lang: # 语言 281 | - { "/": ",", 282 | " ": "", 283 | "55": "", 284 | ":汉语普通话": "普通话", 285 | ":韩语": "韩语", 286 | "马拉地语Marat": "马拉地语", 287 | "马来西亚": "马来语", 288 | "马来西": "马来语", 289 | "马来": "马来语", 290 | "音乐": "", 291 | "丹麦语Danish": "丹麦语", 292 | "丹麦语D": "丹麦语", 293 | "丹麦": "丹麦语", 294 | "乌克兰语Ukari": "乌克兰语", 295 | "乌克兰": "乌克兰语", 296 | "乌尔都": "乌尔都语", 297 | "俄罗斯语Russi": "俄语", 298 | "俄罗斯语": "俄语", 299 | "俄罗斯": "俄语", 300 | "俄语Russian": "俄语", 301 | "俄语Russina": "俄语", 302 | "俄語": "俄语", 303 | "克丘亚": "克丘亚语", 304 | "兰州": "", 305 | "其它": "其他", 306 | "冰岛语Icelan": "冰岛语", 307 | "比印度语Hindi": "印地语", 308 | "北印度语Hindi": "印地语", 309 | "北印度语": "印地语", 310 | "印度语Hindi": "印地语", 311 | "印度语": "印地语", 312 | "印地语Hindi": "印地语", 313 | "印地语h": "印地语", 314 | "印地": "印地语", 315 | "印度India": "印地语", 316 | "印度尼西亚语": "印尼语", 317 | "印度尼西亚": "印尼语", 318 | "印尼语Indone": "印尼语", 319 | "印度": "印地语", 320 | "南非語": "南非语", 321 | "四川方言": "四川话", 322 | "泰国语": "泰语", 323 | "泰国": "泰语", 324 | "泰語": "泰语", 325 | "泰米尔语Tamil": "泰米尔语", 326 | "土耳其语Turke": "土耳其语", 327 | "土耳其语Turki": "土耳其语", 328 | "土尔其语": "土耳其语", 329 | "土耳其": "土耳其语", 330 | "塞尔维亚克罗地亚语": "塞尔维亚语,克罗地亚语", 331 | "塞尔维亚-克罗地亚语": "塞尔维亚语,克罗地亚语", 332 | "塞尔维亚-": "塞尔维亚语", 333 | "塞尔维亚": "塞尔维亚语", 334 | "暂无": "其他", 335 | "未知": "其他", 336 | "国产": "普通话", 337 | "国语大陆": "普通话", 338 | "国语": "普通话", 339 | "中国大陆": "普通话", 340 | "中国大": "普通话", 341 | "中国香港": "粤语", 342 | "中国香": "粤语", 343 | "中国台湾": "闽南语", 344 | "台湾": "闽南语", 345 | "台语": "闽南语", 346 | "中国": "普通话", 347 | "中文": "普通话", 348 | "湖南方言": "湖南话,方言", 349 | "闽南方言": "闽南语,方言", 350 | "云南方言": "云南语,方言", 351 | "云南语": "云南语,方言", 352 | "闽南话": "闽南语", 353 | "上海": "上海话,方言", 354 | "南京": "南京话,方言", 355 | "北京": "北京话,方言", 356 | "四川": "四川话,方言", 357 | "山东": "山东话,方言", 358 | "客家": "客家话,方言", 359 | "徐州": "徐州话,方言", 360 | "武汉": "武汉话,方言", 361 | "河南": "河南话,方言", 362 | "福建": "福建话,方言", 363 | "胶辽": "胶辽话,方言", 364 | "重庆": "重庆话,方言", 365 | "陕西": "陕西话,方言", 366 | "闽南": "闽南语", 367 | "吴越": "吴语,方言", 368 | "维吾": "维语", 369 | "太湖": "", 370 | "河南越调": "越剧", 371 | "汉语普通话Mand": "普通话", 372 | "汉语普通话": "普通话", 373 | "汉语普通": "普通话", 374 | "汉语普": "普通话", 375 | "汉语方言及普通话": "普通话,方言", 376 | "汉语方言": "普通话,方言", 377 | "汉语四川话": "普通话,四川话,方言", 378 | "汉语": "普通话", 379 | "方言越调": "越剧", 380 | "方言话": "方言", 381 | "佛兰德斯语": "荷兰语", 382 | "佛兰德语": "荷兰语", 383 | "南非荷兰语": "南非语", 384 | "南非": "南非语", 385 | "加泰罗尼亚": "加泰罗尼亚语", 386 | "加泰罗": "加泰罗尼亚语", 387 | "加泰罗尼亚语尼亚语": "加泰罗尼亚语", 388 | "加拿大": "英语", 389 | "古希腊": "希腊语", 390 | "塔伽洛": "塔伽洛语", 391 | "墨西哥": "西班牙语", 392 | "西班牙": "西班牙语", 393 | "奥地利": "德语", 394 | "巴西": "葡萄牙语", 395 | "希伯来语Hebre": "希伯来语", 396 | "希伯来": "希伯来语", 397 | "库尔德": "库尔德语", 398 | "德国": "德语", 399 | "德語": "德语", 400 | "意大利": "意大利语", 401 | "意大": "意大利语", 402 | "意大利语利语": "意大利语", 403 | "挪威语Norweg": "挪威语", 404 | "挪威": "挪威语", 405 | "捷克斯洛伐克": "捷克语", 406 | "捷克语Czech": "捷克语", 407 | "捷克": "捷克语", 408 | "斯洛文尼亚": "斯洛文尼亚语", 409 | "无声": "默片", 410 | "无对白": "默片", 411 | "日本": "日语", 412 | "日語": "日语", 413 | "智利": "西班牙语", 414 | "朝鲜": "朝鲜语", 415 | "法国": "法语", 416 | "波兰语Polish": "波兰语", 417 | "波利尼西亚": "波利尼西亚语", 418 | "波斯语Persia": "波斯语", 419 | "波斯": "波斯语", 420 | "波黑": "波斯尼亚语", 421 | "波斯语尼亚语": "波斯尼亚语", 422 | "泰卢固语Te": "泰卢固语", 423 | "泰卢固": "泰卢固语", 424 | "斯洛伐": "斯洛伐克语", 425 | "斯洛伐克语克语": "斯洛伐克语", 426 | "比利时": "法语", 427 | "澳大利亚": "英语", 428 | "爱尔兰盖尔": "爱尔兰语", 429 | "爱沙尼亚": "爱沙尼亚语", 430 | "瑞典语Swedis": "瑞典语", 431 | "瑞典语S": "瑞典语", 432 | "瑞典": "瑞典语", 433 | "瑞士德语Swiss-": "德语", 434 | "瑞士德语": "德语", 435 | "瑞士语言": "德语", 436 | "瑞士语": "德语", 437 | "瑞士": "德语", 438 | "粵語": "粤语", 439 | "罗马尼亚语Roma": "罗马尼亚语", 440 | "罗马尼": "罗马尼亚语", 441 | "罗马尼亚语亚语": "罗马尼亚语", 442 | "美国手语": "英语,手语", 443 | "美国": "英语", 444 | "芬兰语Finnis": "芬兰语", 445 | "芬兰": "芬兰语", 446 | "艾马拉": "马拉语", 447 | "苏格兰盖尔": "苏格兰盖尔语", 448 | "苏联": "俄语", 449 | "英語英语": "英语", 450 | "英语英语": "英语", 451 | "英国": "英语", 452 | "荷兰语Dutch": "荷兰语", 453 | "荷蘭語": "荷兰语", 454 | "荷兰": "荷兰语", 455 | "菲律宾语Filip": "菲律宾语", 456 | "菲律宾塔加": "菲律宾语", 457 | "菲律宾": "菲律宾语", 458 | "葡萄牙语Portu": "葡萄牙语", 459 | "葡萄牙": "葡萄牙语", 460 | "蒙古语": "蒙语", 461 | "蒙古": "蒙语", 462 | "越南语Vietna": "越南语", 463 | "越南": "越南语", 464 | "越语": "越南语", 465 | "赣语Gan": "赣语", 466 | "西西里": "西西里语", 467 | "高棉": "高棉语", 468 | "普听话": "普通话", 469 | "阿姆哈": "阿姆哈拉语", 470 | "阿姆哈拉语拉语": "阿姆哈拉语", 471 | "阿拉伯": "阿拉伯语", 472 | "阿拉": "阿拉伯语", 473 | "阿拉伯语伯语": "阿拉伯语", 474 | "阿布": "其他", 475 | "韩国": "韩语", 476 | "马拉地语‎": "马拉地语", 477 | "Afrikaans": "荷兰语", 478 | "Athap": "其他", 479 | "Cantonese": "粤语", 480 | "Danish": "丹麦语", 481 | "Dari": "达里语", 482 | "English": "英语", 483 | "French": "法语", 484 | "Galic": "加利奇语", 485 | "German": "德语", 486 | "Hindi": "印地语", 487 | "India": "印地语", 488 | "Icelandic": "冰岛语", 489 | "Luxembourg": "卢森堡语", 490 | "Malayalam": "马拉雅拉姆语", 491 | "Pasht": "其他", 492 | "Persian": "波斯语", 493 | "Russian": "俄语", 494 | "Silent": "默片", 495 | "Swahi": "斯瓦希里语", 496 | "SwissGerm": "瑞士语", 497 | "Tamil": "泰米尔语", 498 | "Telugu": "泰卢固语", 499 | "Telu": "泰卢固语", 500 | "Turkish": "土耳其语", 501 | "Ukrai": "乌克兰语", 502 | "Welsh": "威尔士语", 503 | "Zulu": "祖鲁语", 504 | "spanish": "西班牙语", 505 | "话话": "话", 506 | "语语": "语" } -------------------------------------------------------------------------------- /wallpaper-dl/360.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Modified based on Kinoko's script 4 | # @Date : 2025/08/10 5 | # @Desc : 360壁纸批量下载脚本 - 支持过滤黑白、纯色背景、偏暗和相似图片 6 | import logging 7 | import os 8 | import re 9 | import time 10 | from concurrent.futures import ThreadPoolExecutor, as_completed 11 | from io import BytesIO 12 | 13 | import numpy as np 14 | import requests 15 | from PIL import Image 16 | from sklearn.cluster import KMeans 17 | from tqdm import tqdm 18 | 19 | # ===================== 配置项 ===================== 20 | # API基础地址 21 | API_BASE_URL = "http://wallpaper.apc.360.cn/index.php" 22 | 23 | # 分类映射关系 (cid: 分类名称) 24 | CATEGORY_MAPPING = { 25 | "14": "动物萌宠" 26 | } 27 | 28 | # 每页图片数量 29 | PAGE_SIZE = 100 30 | 31 | # 自定义下载根目录 32 | DOWNLOAD_ROOT_DIR = "D:/DL" 33 | 34 | # 并发下载线程数 35 | MAX_WORKERS = 5 36 | 37 | # 请求超时时间(秒) 38 | TIMEOUT = 10 39 | 40 | # 请求头 41 | HEADERS = { 42 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", 43 | "Accept": "application/json, text/plain, */*", 44 | "Connection": "keep-alive" 45 | } 46 | 47 | # 下载失败重试次数 48 | MAX_RETRIES = 3 49 | 50 | # 重试延迟时间(秒) 51 | RETRY_DELAY = 2 52 | 53 | # 图片过滤配置 54 | BLACK_WHITE_THRESHOLD = 20 # 黑白判断阈值 55 | SOLID_BACKGROUND_THRESHOLD = 0.6 # 纯色背景判断阈值 56 | BRIGHTNESS_THRESHOLD = 50 # 亮度阈值(0-255) 57 | SIMILARITY_THRESHOLD = 5 # 相似图片判断阈值(汉明距离),值越小要求越相似 58 | 59 | # 日志配置 60 | logging.basicConfig( 61 | level=logging.INFO, 62 | format='%(asctime)s - %(levelname)s - %(message)s', 63 | datefmt='%Y-%m-%d %H:%M:%S' 64 | ) 65 | logger = logging.getLogger(__name__) 66 | 67 | # 存储已下载图片的哈希值,用于相似性检查 68 | image_hashes = {} # 结构: {category_name: [hash_values]} 69 | 70 | 71 | # ================================================= 72 | 73 | 74 | def calculate_perceptual_hash(image, hash_size=16): 75 | """计算图片的感知哈希值""" 76 | try: 77 | # 缩小图片尺寸并转为灰度图 78 | img = image.resize((hash_size, hash_size), Image.LANCZOS).convert('L') 79 | img_array = np.array(img) 80 | 81 | # 计算平均亮度 82 | avg_brightness = img_array.mean() 83 | 84 | # 生成哈希值:像素亮度高于平均为1,否则为0 85 | hash_array = (img_array > avg_brightness).flatten() 86 | 87 | # 转换为整数哈希值 88 | hash_value = 0 89 | for bit in hash_array: 90 | hash_value = (hash_value << 1) | (1 if bit else 0) 91 | 92 | return hash_value 93 | except Exception as e: 94 | logger.error(f"计算哈希值失败: {str(e)}") 95 | return None 96 | 97 | 98 | def hamming_distance(hash1, hash2): 99 | """计算两个哈希值的汉明距离""" 100 | if hash1 is None or hash2 is None: 101 | return float('inf') # 无法计算时视为差异极大 102 | # 计算两个哈希值的异或结果中1的个数 103 | return bin(hash1 ^ hash2).count('1') 104 | 105 | 106 | def is_similar_to_existing(image, category_name): 107 | """判断图片是否与同分类中已下载的图片相似""" 108 | if category_name not in image_hashes: 109 | return False, None 110 | 111 | current_hash = calculate_perceptual_hash(image) 112 | if current_hash is None: 113 | return False, None 114 | 115 | # 与同分类中所有已下载图片比较 116 | for existing_hash in image_hashes[category_name]: 117 | distance = hamming_distance(current_hash, existing_hash) 118 | if distance < SIMILARITY_THRESHOLD: 119 | return True, distance 120 | 121 | return False, None 122 | 123 | 124 | def is_black_white(image): 125 | """判断图片是否为黑白""" 126 | try: 127 | img_rgb = image.convert('RGB') 128 | img_array = np.array(img_rgb) 129 | 130 | r, g, b = img_array[:, :, 0], img_array[:, :, 1], img_array[:, :, 2] 131 | diff1 = np.abs(r - g) 132 | diff2 = np.abs(r - b) 133 | diff3 = np.abs(g - b) 134 | 135 | total_pixels = img_array.shape[0] * img_array.shape[1] 136 | bw_pixels = np.sum((diff1 < BLACK_WHITE_THRESHOLD) & 137 | (diff2 < BLACK_WHITE_THRESHOLD) & 138 | (diff3 < BLACK_WHITE_THRESHOLD)) 139 | 140 | return bw_pixels / total_pixels > 0.95 141 | except Exception as e: 142 | logger.error(f"黑白判断失败: {str(e)}") 143 | return False 144 | 145 | 146 | def has_solid_background(image): 147 | """判断图片是否有纯色背景""" 148 | try: 149 | img_rgb = image.convert('RGB') 150 | img_array = np.array(img_rgb) 151 | pixels = img_array.reshape(-1, 3) 152 | 153 | kmeans = KMeans(n_clusters=min(10, len(pixels)), random_state=42) 154 | kmeans.fit(pixels) 155 | 156 | cluster_counts = np.bincount(kmeans.labels_) 157 | max_cluster_ratio = np.max(cluster_counts) / len(pixels) 158 | 159 | return max_cluster_ratio > SOLID_BACKGROUND_THRESHOLD 160 | except Exception as e: 161 | logger.error(f"纯色背景判断失败: {str(e)}") 162 | return False 163 | 164 | 165 | def is_too_dark(image): 166 | """判断图片是否偏暗""" 167 | try: 168 | img_gray = image.convert('L') 169 | img_array = np.array(img_gray) 170 | average_brightness = np.mean(img_array) 171 | return average_brightness < BRIGHTNESS_THRESHOLD 172 | except Exception as e: 173 | logger.error(f"亮度判断失败: {str(e)}") 174 | return False 175 | 176 | 177 | def download_and_filter_image(url, save_path, category_name): 178 | """下载图片并进行过滤""" 179 | for attempt in range(MAX_RETRIES): 180 | try: 181 | logger.debug(f"尝试下载 {url} (第 {attempt + 1} 次)") 182 | response = requests.get( 183 | url, 184 | headers=HEADERS, 185 | timeout=TIMEOUT, 186 | stream=True 187 | ) 188 | response.raise_for_status() 189 | 190 | image_data = BytesIO(response.content) 191 | 192 | try: 193 | with Image.open(image_data) as img: 194 | # 检查是否为黑白图片 195 | if is_black_white(img): 196 | logger.debug(f"过滤黑白图片: {url}") 197 | return False, "黑白图片" 198 | 199 | # 检查是否为纯色背景图片 200 | if has_solid_background(img): 201 | logger.debug(f"过滤纯色背景图片: {url}") 202 | return False, "纯色背景图片" 203 | 204 | # 检查是否为偏暗图片 205 | if is_too_dark(img): 206 | logger.debug(f"过滤偏暗图片: {url}") 207 | return False, "偏暗图片" 208 | 209 | # 检查是否与已下载图片相似 210 | is_similar, distance = is_similar_to_existing(img, category_name) 211 | if is_similar: 212 | logger.debug(f"过滤相似图片 (距离: {distance}): {url}") 213 | return False, f"相似图片 (距离: {distance})" 214 | 215 | except Exception as e: 216 | logger.warning(f"图片分析失败 {url} (格式可能异常): {str(e)}") 217 | return False, "图片格式异常" 218 | 219 | # 保存图片 220 | os.makedirs(os.path.dirname(save_path), exist_ok=True) 221 | with open(save_path, 'wb') as f: 222 | f.write(response.content) 223 | 224 | # 计算并保存哈希值 225 | with Image.open(save_path) as saved_img: 226 | img_hash = calculate_perceptual_hash(saved_img) 227 | if img_hash is not None: 228 | if category_name not in image_hashes: 229 | image_hashes[category_name] = [] 230 | image_hashes[category_name].append(img_hash) 231 | 232 | logger.debug(f"成功下载: {save_path}") 233 | return True, "成功" 234 | 235 | except Exception as e: 236 | if attempt < MAX_RETRIES - 1: 237 | logger.warning( 238 | f"下载失败 {url} (第 {attempt + 1} 次): {str(e)},将重试..." 239 | ) 240 | time.sleep(RETRY_DELAY * (attempt + 1)) 241 | continue 242 | 243 | logger.error(f"下载失败 {url} (已达最大重试次数): {str(e)}") 244 | return False, f"下载失败: {str(e)}" 245 | return None 246 | 247 | 248 | def fetch_page_images(category_id, start_index): 249 | """获取指定分类和起始位置的图片列表""" 250 | try: 251 | params = { 252 | "c": "WallPaper", 253 | "a": "getAppsByCategory", 254 | "cid": category_id, 255 | "start": start_index, 256 | "count": PAGE_SIZE, 257 | "from": "360chrome" 258 | } 259 | 260 | logger.debug(f"请求URL: {API_BASE_URL}, 参数: {params}") 261 | response = requests.get(API_BASE_URL, params=params, headers=HEADERS, timeout=TIMEOUT) 262 | response.raise_for_status() 263 | return response.json() 264 | except Exception as e: 265 | logger.error(f"获取起始位置 {start_index} 的数据失败: {str(e)}") 266 | return None 267 | 268 | 269 | def collect_all_image_urls(): 270 | """收集所有分类的图片URL,进行全局去重""" 271 | logger.info("====== 开始收集所有分类的图片URL ======") 272 | 273 | all_images = {} 274 | total_count = 0 275 | 276 | for category_id, category_name in CATEGORY_MAPPING.items(): 277 | logger.info(f"开始收集分类: {category_name} (ID: {category_id}) 的图片URL") 278 | save_dir = os.path.join(DOWNLOAD_ROOT_DIR, category_name) 279 | 280 | try: 281 | # 获取第一页数据以确定总数 282 | first_page_data = fetch_page_images(category_id, 0) 283 | if not first_page_data or first_page_data.get("errno") != "0": 284 | error_msg = first_page_data.get("errmsg", "未知错误") if first_page_data else "无法获取数据" 285 | logger.error(f"API请求失败: {error_msg}") 286 | continue 287 | 288 | total_images = int(first_page_data.get("total", 0)) 289 | total_count += total_images 290 | 291 | if total_images == 0: 292 | logger.info(f"分类 {category_name} 没有找到壁纸") 293 | continue 294 | 295 | logger.info(f"分类 {category_name} 发现 {total_images} 张壁纸") 296 | 297 | # 计算需要请求的页数 298 | pages = (total_images + PAGE_SIZE - 1) // PAGE_SIZE 299 | 300 | for page in range(pages): 301 | start_index = page * PAGE_SIZE 302 | # 避免请求超出总数 303 | if start_index >= total_images: 304 | break 305 | 306 | page_data = fetch_page_images(category_id, start_index) 307 | if not page_data or page_data.get("errno") != "0": 308 | error_msg = page_data.get("errmsg", "未知错误") if page_data else "无法获取数据" 309 | logger.warning(f"获取起始位置 {start_index} 失败: {error_msg},将跳过该页") 310 | continue 311 | 312 | for item in page_data.get("data", []): 313 | raw_url = item.get("url", "") 314 | # 移除了URL清理逻辑,直接使用原始URL 315 | if not raw_url: 316 | continue 317 | 318 | # 生成图片名称 319 | image_id = item.get("id", str(int(time.time() * 1000))) 320 | # 从URL提取扩展名 321 | ext_match = re.search(r'\.(\w+)(?:\?|$)', raw_url) 322 | ext = ext_match.group(1) if ext_match else 'jpg' 323 | image_name = f"{image_id}.{ext}" 324 | image_name = re.sub(r'[\\/*?:"<>|]', "", image_name) 325 | save_path = os.path.join(save_dir, image_name) 326 | 327 | if raw_url not in all_images: 328 | all_images[raw_url] = (category_name, save_path) 329 | 330 | logger.info(f"已收集分类 {category_name} 第 {page + 1}/{pages} 页的图片链接") 331 | time.sleep(1) # 避免请求过于频繁 332 | 333 | except Exception as e: 334 | logger.error(f"收集分类 {category_name} URL时出错: {str(e)}", exc_info=True) 335 | 336 | duplicate_count = total_count - len(all_images) 337 | logger.info( 338 | f"URL收集完成,原始总计 {total_count} 张,去重后剩余 {len(all_images)} 张,移除了 {duplicate_count} 个重复链接") 339 | 340 | categorized_images = {} 341 | for url, (category_name, save_path) in all_images.items(): 342 | if category_name not in categorized_images: 343 | categorized_images[category_name] = [] 344 | categorized_images[category_name].append((url, save_path)) 345 | 346 | return categorized_images 347 | 348 | 349 | def download_categorized_images(categorized_images): 350 | """按分类下载整理好的图片""" 351 | logger.info("====== 开始按分类下载图片 ======") 352 | os.makedirs(DOWNLOAD_ROOT_DIR, exist_ok=True) 353 | 354 | # 初始化哈希存储 355 | global image_hashes 356 | image_hashes = {category: [] for category in categorized_images.keys()} 357 | 358 | total_stats = {"total": 0, "success": 0, "failed": 0, 359 | "filtered_black_white": 0, "filtered_solid_bg": 0, 360 | "filtered_dark": 0, "filtered_similar": 0} 361 | 362 | for category_name, image_list in categorized_images.items(): 363 | logger.info(f"开始处理分类: {category_name},共 {len(image_list)} 张图片") 364 | cat_stats = { 365 | "total": len(image_list), 366 | "success": 0, 367 | "failed": 0, 368 | "filtered_black_white": 0, 369 | "filtered_solid_bg": 0, 370 | "filtered_dark": 0, 371 | "filtered_similar": 0 372 | } 373 | 374 | with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: 375 | # 提交任务时传递分类名称 376 | futures = { 377 | executor.submit(download_and_filter_image, url, path, category_name): (url, path) 378 | for url, path in image_list 379 | } 380 | 381 | for future in tqdm(as_completed(futures), total=len(futures), desc=f"下载 {category_name}"): 382 | url, path = futures[future] 383 | result, reason = future.result() 384 | if result: 385 | cat_stats["success"] += 1 386 | else: 387 | if reason == "黑白图片": 388 | cat_stats["filtered_black_white"] += 1 389 | logger.info(f"已过滤 {reason}: {url}") 390 | elif reason == "纯色背景图片": 391 | cat_stats["filtered_solid_bg"] += 1 392 | logger.info(f"已过滤 {reason}: {url}") 393 | elif reason == "偏暗图片": 394 | cat_stats["filtered_dark"] += 1 395 | logger.info(f"已过滤 {reason}: {url}") 396 | elif reason.startswith("相似图片"): 397 | cat_stats["filtered_similar"] += 1 398 | logger.info(f"已过滤 {reason}: {url}") 399 | else: 400 | cat_stats["failed"] += 1 401 | logger.info(f"下载失败 {reason}: {url}") 402 | 403 | for key in total_stats: 404 | total_stats[key] += cat_stats[key] 405 | 406 | logger.info( 407 | f"分类 {category_name} 处理完成: " 408 | f"成功 {cat_stats['success']} 张, " 409 | f"失败 {cat_stats['failed']} 张, " 410 | f"过滤黑白 {cat_stats['filtered_black_white']} 张, " 411 | f"过滤纯色背景 {cat_stats['filtered_solid_bg']} 张, " 412 | f"过滤偏暗图片 {cat_stats['filtered_dark']} 张, " 413 | f"过滤相似图片 {cat_stats['filtered_similar']} 张\n" 414 | ) 415 | 416 | logger.info( 417 | f"====== 所有分类处理完毕 ======\n" 418 | f"总计: {total_stats['total']} 张\n" 419 | f"成功下载: {total_stats['success']} 张\n" 420 | f"下载失败: {total_stats['failed']} 张\n" 421 | f"过滤黑白图片: {total_stats['filtered_black_white']} 张\n" 422 | f"过滤纯色背景图片: {total_stats['filtered_solid_bg']} 张\n" 423 | f"过滤偏暗图片: {total_stats['filtered_dark']} 张\n" 424 | f"过滤相似图片: {total_stats['filtered_similar']} 张" 425 | ) 426 | 427 | 428 | def main(): 429 | """主函数""" 430 | logger.info("====== 360壁纸批量下载脚本启动 ======") 431 | logger.info(f"配置信息: 并发数={MAX_WORKERS}, 每页数量={PAGE_SIZE}") 432 | logger.info(f"下载根目录: {os.path.abspath(DOWNLOAD_ROOT_DIR)}") 433 | logger.info(f"图片过滤: 黑白图片阈值={BLACK_WHITE_THRESHOLD}, " 434 | f"纯色背景阈值={SOLID_BACKGROUND_THRESHOLD}, " 435 | f"亮度阈值={BRIGHTNESS_THRESHOLD}, " 436 | f"相似图片阈值={SIMILARITY_THRESHOLD}") 437 | 438 | categorized_images = collect_all_image_urls() 439 | if categorized_images: 440 | download_categorized_images(categorized_images) 441 | else: 442 | logger.info("没有收集到任何图片URL,程序退出") 443 | 444 | 445 | if __name__ == "__main__": 446 | main() 447 | -------------------------------------------------------------------------------- /wallpaper-dl/wallhaven.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2025/08/08 5 | # @Desc : Wallhaven 壁纸批量下载脚本 - 支持过滤黑白、纯色背景、偏暗和相似图片 6 | import logging 7 | import os 8 | import re 9 | import time 10 | from concurrent.futures import ThreadPoolExecutor, as_completed 11 | from io import BytesIO 12 | 13 | import numpy as np 14 | import requests 15 | from PIL import Image 16 | from sklearn.cluster import KMeans 17 | from tqdm import tqdm 18 | 19 | # ===================== 配置项 ===================== 20 | # API基础地址 21 | API_BASE_URL = "https://api.codelife.cc/wallpaper/wallhaven" 22 | 23 | # 分类映射关系 (id: 分类名称) 24 | CATEGORY_MAPPING = { 25 | # "1": "二次元", 26 | # "5": "二次元", 27 | "37": "自然风景", 28 | "711": "自然风景", 29 | "1748": "吉卜力", 30 | "2321": "像素风" 31 | } 32 | 33 | # 自定义下载根目录 34 | DOWNLOAD_ROOT_DIR = "D:/DL" 35 | 36 | # 并发下载线程数 37 | MAX_WORKERS = 5 38 | 39 | # 请求超时时间(秒) 40 | TIMEOUT = 10 41 | 42 | # 请求头 43 | HEADERS = { 44 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", 45 | "Accept": "application/json, text/plain, */*", 46 | "Connection": "keep-alive" 47 | } 48 | 49 | # 下载失败重试次数 50 | MAX_RETRIES = 10 51 | 52 | # 重试延迟时间(秒) 53 | RETRY_DELAY = 2 54 | 55 | # 图片过滤配置 56 | BLACK_WHITE_THRESHOLD = 10 # 黑白判断阈值 57 | SOLID_BACKGROUND_THRESHOLD = 0.7 # 纯色背景判断阈值 58 | SOLID_COLOR_TOLERANCE = 15 # 颜色容差 59 | BRIGHTNESS_THRESHOLD = 20 # 亮度阈值(0-255) 60 | SIMILARITY_THRESHOLD = 5 # 相似图片判断阈值(汉明距离),值越小要求越相似 61 | 62 | # 域名配置 - 主域名和备用域名列表 63 | PRIMARY_DOMAIN = "https://w.wallhaven.cc/" 64 | BACKUP_DOMAINS = [ 65 | "https://w.wallhaven.wpcoder.cn/", 66 | "https://w.wallhaven.clbug.com/", 67 | "https://w.wallhaven.1lou.top/", 68 | "https://files.codelife.cc/wallhaven/" 69 | ] 70 | 71 | # 日志配置 72 | logging.basicConfig( 73 | level=logging.INFO, 74 | format='%(asctime)s - %(levelname)s - %(message)s', 75 | datefmt='%Y-%m-%d %H:%M:%S' 76 | ) 77 | logger = logging.getLogger(__name__) 78 | 79 | # 存储已下载图片的哈希值,用于相似性检查 80 | image_hashes = {} # 结构: {category_name: [hash_values]} 81 | 82 | 83 | # ================================================= 84 | 85 | 86 | def get_domain_url(raw_url, domain): 87 | """使用指定域名生成URL,只提取full/.../wallhaven-....[图片格式]部分""" 88 | if not raw_url: 89 | return "" 90 | 91 | # 修正正则表达式:精准匹配从full/开始到图片扩展名结束的路径 92 | path_match = re.search(r'(full/[^?]+\.(?:jpg|jpeg|png|gif|webp))', raw_url) 93 | if path_match: 94 | path = path_match.group(1) 95 | if not domain.endswith('/'): 96 | domain += '/' 97 | return f"{domain}{path}" 98 | 99 | logger.warning(f"无法提取有效路径: {raw_url}") 100 | return raw_url.split('?')[0] 101 | 102 | 103 | def clean_url(raw_url): 104 | """默认URL清理:使用主域名""" 105 | return get_domain_url(raw_url, PRIMARY_DOMAIN) 106 | 107 | 108 | def calculate_perceptual_hash(image, hash_size=16): 109 | """计算图片的感知哈希值""" 110 | try: 111 | # 缩小图片尺寸并转为灰度图 112 | img = image.resize((hash_size, hash_size), Image.LANCZOS).convert('L') 113 | img_array = np.array(img) 114 | 115 | # 计算平均亮度 116 | avg_brightness = img_array.mean() 117 | 118 | # 生成哈希值:像素亮度高于平均为1,否则为0 119 | hash_array = (img_array > avg_brightness).flatten() 120 | 121 | # 转换为整数哈希值 122 | hash_value = 0 123 | for bit in hash_array: 124 | hash_value = (hash_value << 1) | (1 if bit else 0) 125 | 126 | return hash_value 127 | except Exception as e: 128 | logger.error(f"计算哈希值失败: {str(e)}") 129 | return None 130 | 131 | 132 | def hamming_distance(hash1, hash2): 133 | """计算两个哈希值的汉明距离""" 134 | if hash1 is None or hash2 is None: 135 | return float('inf') # 无法计算时视为差异极大 136 | # 计算两个哈希值的异或结果中1的个数 137 | return bin(hash1 ^ hash2).count('1') 138 | 139 | 140 | def is_similar_to_existing(image, category_name): 141 | """判断图片是否与同分类中已下载的图片相似""" 142 | if category_name not in image_hashes: 143 | return False, None 144 | 145 | current_hash = calculate_perceptual_hash(image) 146 | if current_hash is None: 147 | return False, None 148 | 149 | # 与同分类中所有已下载图片比较 150 | for existing_hash in image_hashes[category_name]: 151 | distance = hamming_distance(current_hash, existing_hash) 152 | if distance < SIMILARITY_THRESHOLD: 153 | return True, distance 154 | 155 | return False, None 156 | 157 | 158 | def is_black_white(image): 159 | """判断图片是否为黑白""" 160 | try: 161 | img_rgb = image.convert('RGB') 162 | img_array = np.array(img_rgb) 163 | 164 | r, g, b = img_array[:, :, 0], img_array[:, :, 1], img_array[:, :, 2] 165 | diff1 = np.abs(r - g) 166 | diff2 = np.abs(r - b) 167 | diff3 = np.abs(g - b) 168 | 169 | total_pixels = img_array.shape[0] * img_array.shape[1] 170 | bw_pixels = np.sum((diff1 < BLACK_WHITE_THRESHOLD) & 171 | (diff2 < BLACK_WHITE_THRESHOLD) & 172 | (diff3 < BLACK_WHITE_THRESHOLD)) 173 | 174 | return bw_pixels / total_pixels > 0.95 175 | except Exception as e: 176 | logger.error(f"黑白判断失败: {str(e)}") 177 | return False 178 | 179 | 180 | def has_solid_background(image): 181 | """判断图片是否有纯色背景""" 182 | try: 183 | img_rgb = image.convert('RGB') 184 | img_array = np.array(img_rgb) 185 | pixels = img_array.reshape(-1, 3) 186 | 187 | kmeans = KMeans(n_clusters=min(10, len(pixels)), random_state=42) 188 | kmeans.fit(pixels) 189 | 190 | cluster_counts = np.bincount(kmeans.labels_) 191 | max_cluster_ratio = np.max(cluster_counts) / len(pixels) 192 | 193 | return max_cluster_ratio > SOLID_BACKGROUND_THRESHOLD 194 | except Exception as e: 195 | logger.error(f"纯色背景判断失败: {str(e)}") 196 | return False 197 | 198 | 199 | def is_too_dark(image): 200 | """判断图片是否偏暗""" 201 | try: 202 | img_gray = image.convert('L') 203 | img_array = np.array(img_gray) 204 | average_brightness = np.mean(img_array) 205 | return average_brightness < BRIGHTNESS_THRESHOLD 206 | except Exception as e: 207 | logger.error(f"亮度判断失败: {str(e)}") 208 | return False 209 | 210 | 211 | def download_and_filter_image(url, save_path, category_name): 212 | """下载图片并进行过滤(支持多域名切换重试)""" 213 | all_domains = [PRIMARY_DOMAIN] + BACKUP_DOMAINS 214 | current_domain_index = 0 215 | 216 | for attempt in range(MAX_RETRIES): 217 | try: 218 | current_domain = all_domains[current_domain_index] 219 | current_url = get_domain_url(url, current_domain) 220 | 221 | logger.debug(f"尝试下载 {current_url} (第 {attempt + 1} 次,使用域名: {current_domain})") 222 | response = requests.get( 223 | current_url, 224 | headers=HEADERS, 225 | timeout=TIMEOUT, 226 | stream=True 227 | ) 228 | response.raise_for_status() 229 | 230 | image_data = BytesIO(response.content) 231 | 232 | try: 233 | with Image.open(image_data) as img: 234 | # 检查是否为黑白图片 235 | if is_black_white(img): 236 | logger.debug(f"过滤黑白图片: {current_url}") 237 | return False, "黑白图片" 238 | 239 | # 检查是否为纯色背景图片 240 | if has_solid_background(img): 241 | logger.debug(f"过滤纯色背景图片: {current_url}") 242 | return False, "纯色背景图片" 243 | 244 | # 检查是否为偏暗图片 245 | if is_too_dark(img): 246 | logger.debug(f"过滤偏暗图片: {current_url}") 247 | return False, "偏暗图片" 248 | 249 | # 检查是否与已下载图片相似 250 | is_similar, distance = is_similar_to_existing(img, category_name) 251 | if is_similar: 252 | logger.debug(f"过滤相似图片 (距离: {distance}): {current_url}") 253 | return False, f"相似图片 (距离: {distance})" 254 | 255 | except Exception as e: 256 | logger.warning(f"图片分析失败 {current_url} (格式可能异常): {str(e)}") 257 | return False, "图片格式异常" 258 | 259 | # 保存图片 260 | os.makedirs(os.path.dirname(save_path), exist_ok=True) 261 | with open(save_path, 'wb') as f: 262 | f.write(response.content) 263 | 264 | # 计算并保存哈希值 265 | with Image.open(save_path) as saved_img: 266 | img_hash = calculate_perceptual_hash(saved_img) 267 | if img_hash is not None: 268 | if category_name not in image_hashes: 269 | image_hashes[category_name] = [] 270 | image_hashes[category_name].append(img_hash) 271 | 272 | logger.debug(f"成功下载: {save_path} (来源: {current_url})") 273 | return True, "成功" 274 | 275 | except Exception as e: 276 | current_domain_index = (current_domain_index + 1) % len(all_domains) 277 | 278 | if attempt < MAX_RETRIES - 1: 279 | next_domain = all_domains[current_domain_index] 280 | logger.warning( 281 | f"下载失败 {current_url} (第 {attempt + 1} 次): {str(e)}," 282 | f"将尝试域名 {next_domain} 重试..." 283 | ) 284 | time.sleep(RETRY_DELAY * (attempt + 1)) 285 | continue 286 | 287 | logger.error(f"下载失败 {current_url} (已达最大重试次数): {str(e)}") 288 | return False, f"下载失败: {str(e)}" 289 | return None 290 | 291 | 292 | def fetch_page_images(category_id, page_num): 293 | """获取指定分类和页码的图片列表""" 294 | try: 295 | url = f"{API_BASE_URL}?lang=cn&page={page_num}&size=50&q=id:{category_id}" 296 | logger.debug(f"请求URL: {url}") 297 | response = requests.get(url, headers=HEADERS, timeout=TIMEOUT) 298 | response.raise_for_status() 299 | return response.json() 300 | except Exception as e: 301 | logger.error(f"获取第 {page_num} 页数据失败: {str(e)}") 302 | return None 303 | 304 | 305 | def collect_all_image_urls(): 306 | """收集所有分类的图片URL,进行全局去重""" 307 | logger.info("====== 开始收集所有分类的图片URL ======") 308 | 309 | all_images = {} 310 | total_count = 0 311 | 312 | for category_id, category_name in CATEGORY_MAPPING.items(): 313 | logger.info(f"开始收集分类: {category_name} (ID: {category_id}) 的图片URL") 314 | save_dir = os.path.join(DOWNLOAD_ROOT_DIR, category_name) 315 | 316 | try: 317 | first_page_data = fetch_page_images(category_id, 1) 318 | if not first_page_data or first_page_data.get("code") != 200: 319 | error_msg = first_page_data.get("msg", "未知错误") if first_page_data else "无法获取数据" 320 | logger.error(f"API请求失败: {error_msg}") 321 | continue 322 | 323 | total_pages = first_page_data.get("pages", 0) 324 | cat_count = first_page_data.get("count", 0) 325 | total_count += cat_count 326 | 327 | if total_pages == 0: 328 | logger.info(f"分类 {category_name} 没有找到壁纸") 329 | continue 330 | 331 | logger.info(f"分类 {category_name} 发现 {cat_count} 张壁纸,共 {total_pages} 页") 332 | 333 | for page in range(1, total_pages + 1): 334 | page_data = fetch_page_images(category_id, page) 335 | if not page_data or page_data.get("code") != 200: 336 | error_msg = page_data.get("msg", "未知错误") if page_data else "无法获取数据" 337 | logger.warning(f"获取第 {page} 页失败: {error_msg},将跳过该页") 338 | continue 339 | 340 | for item in page_data.get("data", []): 341 | raw_url = item.get("raw", "") 342 | clean_img_url = clean_url(raw_url) 343 | if not clean_img_url: 344 | continue 345 | 346 | image_name = f"{item.get('name', '')}.jpg" 347 | image_name = re.sub(r'[\\/*?:"<>|]', "", image_name) 348 | save_path = os.path.join(save_dir, image_name) 349 | 350 | if clean_img_url not in all_images: 351 | all_images[clean_img_url] = (category_name, save_path) 352 | 353 | logger.info(f"已收集分类 {category_name} 第 {page}/{total_pages} 页的图片链接") 354 | time.sleep(1) 355 | 356 | except Exception as e: 357 | logger.error(f"收集分类 {category_name} URL时出错: {str(e)}", exc_info=True) 358 | 359 | duplicate_count = total_count - len(all_images) 360 | logger.info( 361 | f"URL收集完成,原始总计 {total_count} 张,去重后剩余 {len(all_images)} 张,移除了 {duplicate_count} 个重复链接") 362 | 363 | categorized_images = {} 364 | for url, (category_name, save_path) in all_images.items(): 365 | if category_name not in categorized_images: 366 | categorized_images[category_name] = [] 367 | categorized_images[category_name].append((url, save_path)) 368 | 369 | return categorized_images 370 | 371 | 372 | def download_categorized_images(categorized_images): 373 | """按分类下载整理好的图片""" 374 | logger.info("====== 开始按分类下载图片 ======") 375 | logger.info(f"使用的域名列表: 主域名={PRIMARY_DOMAIN}, 备用域名={BACKUP_DOMAINS}") 376 | os.makedirs(DOWNLOAD_ROOT_DIR, exist_ok=True) 377 | 378 | # 初始化哈希存储 379 | global image_hashes 380 | image_hashes = {category: [] for category in categorized_images.keys()} 381 | 382 | total_stats = {"total": 0, "success": 0, "failed": 0, 383 | "filtered_black_white": 0, "filtered_solid_bg": 0, 384 | "filtered_dark": 0, "filtered_similar": 0} 385 | 386 | for category_name, image_list in categorized_images.items(): 387 | logger.info(f"开始处理分类: {category_name},共 {len(image_list)} 张图片") 388 | cat_stats = { 389 | "total": len(image_list), 390 | "success": 0, 391 | "failed": 0, 392 | "filtered_black_white": 0, 393 | "filtered_solid_bg": 0, 394 | "filtered_dark": 0, 395 | "filtered_similar": 0 396 | } 397 | 398 | with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: 399 | # 提交任务时传递分类名称 400 | futures = { 401 | executor.submit(download_and_filter_image, url, path, category_name): (url, path) 402 | for url, path in image_list 403 | } 404 | 405 | for future in tqdm(as_completed(futures), total=len(futures), desc=f"下载 {category_name}"): 406 | url, path = futures[future] 407 | cleaned_url = clean_url(url) 408 | result, reason = future.result() 409 | if result: 410 | cat_stats["success"] += 1 411 | else: 412 | if reason == "黑白图片": 413 | cat_stats["filtered_black_white"] += 1 414 | logger.info(f"已过滤 {reason}: {cleaned_url}") 415 | elif reason == "纯色背景图片": 416 | cat_stats["filtered_solid_bg"] += 1 417 | logger.info(f"已过滤 {reason}: {cleaned_url}") 418 | elif reason == "偏暗图片": 419 | cat_stats["filtered_dark"] += 1 420 | logger.info(f"已过滤 {reason}: {cleaned_url}") 421 | elif reason.startswith("相似图片"): 422 | cat_stats["filtered_similar"] += 1 423 | logger.info(f"已过滤 {reason}: {cleaned_url}") 424 | else: 425 | cat_stats["failed"] += 1 426 | logger.info(f"下载失败 {reason}: {cleaned_url}") 427 | 428 | for key in total_stats: 429 | total_stats[key] += cat_stats[key] 430 | 431 | logger.info( 432 | f"分类 {category_name} 处理完成: " 433 | f"成功 {cat_stats['success']} 张, " 434 | f"失败 {cat_stats['failed']} 张, " 435 | f"过滤黑白 {cat_stats['filtered_black_white']} 张, " 436 | f"过滤纯色背景 {cat_stats['filtered_solid_bg']} 张, " 437 | f"过滤偏暗图片 {cat_stats['filtered_dark']} 张, " 438 | f"过滤相似图片 {cat_stats['filtered_similar']} 张\n" 439 | ) 440 | 441 | logger.info( 442 | f"====== 所有分类处理完毕 ======\n" 443 | f"总计: {total_stats['total']} 张\n" 444 | f"成功下载: {total_stats['success']} 张\n" 445 | f"下载失败: {total_stats['failed']} 张\n" 446 | f"过滤黑白图片: {total_stats['filtered_black_white']} 张\n" 447 | f"过滤纯色背景图片: {total_stats['filtered_solid_bg']} 张\n" 448 | f"过滤偏暗图片: {total_stats['filtered_dark']} 张\n" 449 | f"过滤相似图片: {total_stats['filtered_similar']} 张" 450 | ) 451 | 452 | 453 | def main(): 454 | """主函数""" 455 | logger.info("====== Wallhaven 壁纸批量下载脚本启动 ======") 456 | logger.info(f"配置信息: 并发数={MAX_WORKERS}, 每页数量=50") 457 | logger.info(f"下载根目录: {os.path.abspath(DOWNLOAD_ROOT_DIR)}") 458 | logger.info(f"图片过滤: 黑白图片阈值={BLACK_WHITE_THRESHOLD}, " 459 | f"纯色背景阈值={SOLID_BACKGROUND_THRESHOLD}, " 460 | f"亮度阈值={BRIGHTNESS_THRESHOLD}, " 461 | f"相似图片阈值={SIMILARITY_THRESHOLD}") 462 | logger.info(f"域名配置: 主域名={PRIMARY_DOMAIN}, 备用域名={BACKUP_DOMAINS}") 463 | 464 | categorized_images = collect_all_image_urls() 465 | if categorized_images: 466 | download_categorized_images(categorized_images) 467 | else: 468 | logger.info("没有收集到任何图片URL,程序退出") 469 | 470 | 471 | if __name__ == "__main__": 472 | main() 473 | -------------------------------------------------------------------------------- /mtab-import/website-info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # coding=utf8 3 | # @Author: Kinoko 4 | # @Date : 2025/08/01 5 | # @Desc : mTab多分类网站书签导入工具(AI) 6 | # @Func : 批量获取网站信息,处理URL去重、AI生成标题、描述和分类,图标下载转换压缩SVG,生成JSON 7 | 8 | 9 | # ============================== 公共配置参数区 ============================== 10 | # 线程数量配置 11 | MAX_WORKERS = 40 # 并发处理URL的线程数量 12 | 13 | # AI模型配置 14 | AI_CONFIG = { 15 | "api_key_env_var": "AI_API_KEY", 16 | "base_url": "https://www.gptapi.us/v1", 17 | "model": "gpt-4o-mini", 18 | "temperature": 0.7, 19 | "max_tokens": 1024, 20 | "max_retries": 2, # AI调用失败最大重试次数 21 | "retry_delay": 1 # 重试延迟时间(秒) 22 | } 23 | 24 | # 分类配置 25 | CATEGORIES = [ 26 | "ai", "app", "news", "music", "tech", "photos", "life", "education", 27 | "entertainment", "shopping", "social", "read", "sports", "finance", "others" 28 | ] 29 | CATEGORY_IDS = { 30 | "生活&出行&地图&交通&美食&健康&母婴": 1, 31 | "新闻&资讯&财经资讯&地方资讯": 2, 32 | "社交&互动&论坛&社区&邮箱&即时通讯": 3, 33 | "购物&消费&电商&跨境购&二手交易": 4, 34 | "影音&媒体&影视&音乐&短视频&直播": 5, 35 | "阅读&出版&书籍&小说&漫画&百科&文献": 6, 36 | "游戏&娱乐&电竞&手游&休闲游戏": 7, 37 | "应用&工具&办公工具&效率工具&系统工具&AI": 8, 38 | "教育&学习&课程&大学&职业教育&语言学习": 9, 39 | "设计&图片&素材&创意&模板&UI设计": 10, 40 | "开发&编程&Web&框架&编程语言&IDE&技术&文档&API文档&开源手册&教程文档": 11, 41 | "职场&就业&招聘&创业&职场技能": 12, 42 | "金融&投资&银行&理财&保险&支付": 13, 43 | "体育&运动&健身&赛事&运动装备": 14, 44 | "其他": 15 45 | } 46 | 47 | # 供AI参考的分类列表 48 | AI_CATEGORY_OPTIONS = list(CATEGORY_IDS.keys()) 49 | 50 | # 域名过滤配置 51 | DOMAIN_BLACKLIST = { 52 | "trae.cn", "trae.ai", "js.design", "zenvideo.qq.com" 53 | } 54 | DOMAIN_WHITELIST = { 55 | "x.com", "qq.com", "gmail.com", "google.com", "github.com", "youtube.com", "facebook.com", 56 | "yandex.com", "www.iqiyi.com", "yiyan.baidu.com", "outlook.live.com" 57 | } 58 | 59 | # 域名映射配置 - 键为需要映射的域名或URL,值为目标域名或URL 60 | DOMAIN_MAPPING = { 61 | "https://tj.shshinfo.com/tz/pcw/kimi10.html": "https://www.kimi.com", 62 | } 63 | 64 | # 网络请求配置 65 | HTTP_CONFIG = { 66 | 'timeout': 20, # 请求超时时间(秒) 67 | 'headers': { 68 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 69 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 70 | 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 71 | 'Connection': 'keep-alive' 72 | } 73 | } 74 | 75 | # URL跳转配置 76 | REDIRECT_CONFIG = { 77 | 'max_redirects': 10, # 最大跳转次数,防止无限循环 78 | 'js_redirect_patterns': [ 79 | r'window\.location\.href\s*=\s*["\'](.*?)["\']', 80 | r'window\.location\s*=\s*["\'](.*?)["\']', 81 | r'location\.href\s*=\s*["\'](.*?)["\']', 82 | r'location\s*=\s*["\'](.*?)["\']', 83 | r'redirect\s*\(\s*["\'](.*?)["\']\s*\)', 84 | r'window\.open\s*\(\s*["\'](.*?)["\']\s*\)' 85 | ] 86 | } 87 | 88 | # 文件路径配置 89 | ICON_DIRECTORY = 'icons' 90 | JSON_OUTPUT_FILE = "mtab_data.json" 91 | 92 | # 图片下载配置 93 | MAX_IMAGE_RETRIES = 3 # 最大重试次数 94 | INITIAL_RETRY_DELAY = 1 # 初始重试延迟(秒) 95 | # ======================================================================== 96 | 97 | 98 | # 导入依赖库 99 | import io 100 | import json 101 | import logging 102 | import os 103 | import random 104 | import re 105 | import threading 106 | import time 107 | from base64 import b64encode 108 | from collections import Counter 109 | from concurrent.futures import ThreadPoolExecutor, as_completed 110 | from dataclasses import dataclass, asdict 111 | from typing import List, Dict, Set, Tuple, Optional 112 | from urllib.parse import quote, urlparse, urljoin 113 | 114 | import requests 115 | import validators 116 | from PIL import Image 117 | from openai import OpenAI 118 | from openai.types.chat import ( 119 | ChatCompletionSystemMessageParam, 120 | ChatCompletionUserMessageParam, 121 | ChatCompletionMessageParam 122 | ) 123 | from tldextract import extract 124 | from tqdm import tqdm 125 | 126 | 127 | # 数据结构定义 128 | @dataclass 129 | class WebsiteData: 130 | name: str # 网站名称(AI生成) 131 | url: str # 网站URL(小写处理后) 132 | description: str # 网站描述(AI生成) 133 | img_src: str # 图标原始URL 134 | local_filename: str # 本地存储的图标文件名 135 | category: str # 所属分类(中文) 136 | category_id: int # 分类ID 137 | background_color: str # 背景颜色 138 | 139 | 140 | # 初始化AI客户端 - 从环境变量获取API密钥 141 | def get_ai_client(): 142 | """从环境变量获取API密钥并初始化AI客户端""" 143 | api_key = os.getenv(AI_CONFIG["api_key_env_var"]) 144 | if not api_key: 145 | raise EnvironmentError(f"未设置环境变量 {AI_CONFIG['api_key_env_var']},请配置API密钥") 146 | 147 | return OpenAI( 148 | api_key=api_key, 149 | base_url=AI_CONFIG["base_url"] 150 | ) 151 | 152 | 153 | # 初始化AI客户端 154 | ai_client = get_ai_client() 155 | 156 | 157 | # ============================== 日志配置 ============================== 158 | def setup_logger() -> logging.Logger: 159 | """配置并返回日志记录器""" 160 | logger = logging.getLogger('mtab_exporter') 161 | logger.setLevel(logging.INFO) 162 | 163 | ch = logging.StreamHandler() 164 | ch.setLevel(logging.INFO) 165 | 166 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 167 | ch.setFormatter(formatter) 168 | 169 | if logger.handlers: 170 | logger.handlers = [] 171 | logger.addHandler(ch) 172 | 173 | return logger 174 | 175 | 176 | # 初始化日志 177 | logger = setup_logger() 178 | 179 | 180 | # ============================== URL处理工具函数 ============================== 181 | def apply_domain_mapping(url: str) -> str: 182 | """应用域名映射规则,将URL转换为目标URL""" 183 | url_lower = url.lower() 184 | 185 | # 检查完整URL匹配 186 | if url_lower in DOMAIN_MAPPING: 187 | mapped_url = DOMAIN_MAPPING[url_lower] 188 | logger.info(f"URL映射: {url_lower} -> {mapped_url}") 189 | return mapped_url 190 | 191 | # 检查域名级别匹配 192 | parsed = urlparse(url_lower) 193 | domain = parsed.netloc 194 | 195 | # 检查子域名+主域名匹配 196 | if domain in DOMAIN_MAPPING: 197 | mapped_domain = DOMAIN_MAPPING[domain] 198 | mapped_parsed = urlparse(mapped_domain) 199 | # 保留路径和参数,但使用新的域名和协议 200 | new_url = urljoin(mapped_domain, parsed.path) 201 | if parsed.query: 202 | new_url += f"?{parsed.query}" 203 | logger.info(f"域名映射: {domain} -> {mapped_domain}, 完整URL: {url_lower} -> {new_url}") 204 | return new_url 205 | 206 | # 检查主域名匹配 207 | ext = extract(domain) 208 | main_domain = f"{ext.domain}.{ext.suffix}" 209 | if main_domain in DOMAIN_MAPPING: 210 | mapped_domain = DOMAIN_MAPPING[main_domain] 211 | # 替换主域名但保留子域名 212 | subdomain = ext.subdomain 213 | new_netloc = f"{subdomain}.{mapped_domain}" if subdomain else mapped_domain 214 | new_url = f"{parsed.scheme}://{new_netloc}{parsed.path}" 215 | if parsed.query: 216 | new_url += f"?{parsed.query}" 217 | logger.info(f"主域名映射: {main_domain} -> {mapped_domain}, 完整URL: {url_lower} -> {new_url}") 218 | return new_url 219 | 220 | # 无匹配的映射规则 221 | return url_lower 222 | 223 | 224 | def normalize_url(url: str) -> str: 225 | """标准化URL格式并转换为小写""" 226 | parsed = urlparse(url) 227 | normalized = f"{parsed.scheme.lower()}://{parsed.netloc.lower()}{parsed.path}".rstrip('/') 228 | return normalized 229 | 230 | 231 | def extract_domain(url: str) -> str: 232 | """提取URL中的域名(小写)""" 233 | parsed = urlparse(url) 234 | return parsed.netloc.lower() 235 | 236 | 237 | def is_domain_whitelisted(url: str) -> bool: 238 | """检查域名是否在白名单中(使用小写域名检查)""" 239 | ext = extract(url) 240 | domain_parts = [part for part in [ext.subdomain, ext.domain, ext.suffix] if part] 241 | full_domain = ".".join(domain_parts).lower() 242 | 243 | if full_domain in DOMAIN_WHITELIST: 244 | return True 245 | if ext.registered_domain.lower() in DOMAIN_WHITELIST: 246 | return True 247 | for i in range(1, len(domain_parts)): 248 | if ".".join(domain_parts[i:]).lower() in DOMAIN_WHITELIST: 249 | return True 250 | return False 251 | 252 | 253 | def is_domain_blocked(url: str) -> bool: 254 | """检查域名是否在黑名单中(使用小写域名检查)""" 255 | domain = extract_domain(url).lower() 256 | parts = domain.split('.') 257 | 258 | if domain in DOMAIN_BLACKLIST: 259 | return True 260 | for i in range(len(parts) - 1): 261 | if '.'.join(parts[i:]).lower() in DOMAIN_BLACKLIST: 262 | return True 263 | return False 264 | 265 | 266 | def is_url_acceptable(url: str) -> Tuple[bool, str]: 267 | """检查URL是否符合处理条件(使用小写URL检查)""" 268 | lower_url = url.lower() 269 | if is_domain_blocked(lower_url): 270 | return False, f"URL在黑名单中: {extract_domain(lower_url)}" 271 | return True, "URL符合处理条件" 272 | 273 | 274 | def validate_and_process_url(url: str) -> Tuple[Optional[str], Optional[str]]: 275 | """验证并处理URL格式(确保返回小写URL)""" 276 | url = url.lower() 277 | 278 | if not url.startswith(('http://', 'https://')): 279 | return None, "URL缺少协议前缀" 280 | 281 | parsed = urlparse(url) 282 | base_url = f"{parsed.scheme}://{parsed.netloc}" 283 | 284 | # 强制使用HTTPS 285 | if base_url.startswith('http://'): 286 | base_url = base_url.replace('http://', 'https://') 287 | 288 | if not base_url.endswith('/'): 289 | base_url += '/' 290 | 291 | if not validators.url(base_url.rstrip('/')): 292 | return None, "URL格式无效" 293 | 294 | return base_url, None 295 | 296 | 297 | def get_preferred_url(original_url: str, redirect_history: List[str]) -> str: 298 | """根据跳转历史选择最优URL(返回小写URL)""" 299 | if not redirect_history: 300 | return original_url.lower() 301 | 302 | url_info = [] 303 | for url in redirect_history: 304 | url_lower = url.lower() 305 | parsed = urlparse(url_lower) 306 | domain = parsed.netloc 307 | ext = extract(domain) 308 | 309 | url_info.append({ 310 | "url": url_lower, 311 | "main_domain": ext.domain.lower(), 312 | "registered_domain": ext.registered_domain.lower(), 313 | "subdomain": ext.subdomain.lower(), 314 | "is_www": ext.subdomain.lower() == "www", 315 | "suffix": ext.suffix.lower(), 316 | "suffix_length": len(ext.suffix.split('.')) 317 | }) 318 | 319 | base_main_domain = url_info[0]["main_domain"] 320 | same_main_domain_urls = [ 321 | info for info in url_info 322 | if info["main_domain"] == base_main_domain 323 | ] 324 | 325 | if len(same_main_domain_urls) != len(url_info): 326 | return redirect_history[-1].lower() 327 | 328 | # 优先保留带www的URL 329 | www_urls = [info for info in same_main_domain_urls if info["is_www"]] 330 | if www_urls: 331 | www_urls_sorted = sorted(www_urls, key=lambda x: x["suffix_length"]) 332 | return www_urls_sorted[0]["url"] 333 | 334 | # 无www时保留后缀最短的URL 335 | non_www_urls_sorted = sorted(same_main_domain_urls, key=lambda x: x["suffix_length"]) 336 | shortest_suffix_urls = [ 337 | info for info in non_www_urls_sorted 338 | if info["suffix_length"] == non_www_urls_sorted[0]["suffix_length"] 339 | ] 340 | 341 | # 后缀长度相同时保留最早出现的URL 342 | return shortest_suffix_urls[0]["url"] 343 | 344 | 345 | def follow_redirects(url: str) -> Tuple[str, int, str, List[str]]: 346 | """跟踪URL跳转,包括HTTP重定向和JS跳转(返回小写URL)""" 347 | # 首先应用域名映射 348 | url = apply_domain_mapping(url) 349 | 350 | visited_urls = set() 351 | current_url = url.lower() 352 | redirect_history = [current_url] 353 | redirect_count = 0 354 | 355 | while redirect_count < REDIRECT_CONFIG['max_redirects']: 356 | if current_url in visited_urls: 357 | return current_url, 302, f"循环跳转 detected after {redirect_count} steps", redirect_history 358 | visited_urls.add(current_url) 359 | 360 | try: 361 | response = requests.get( 362 | current_url, 363 | headers=HTTP_CONFIG['headers'], 364 | timeout=HTTP_CONFIG['timeout'], 365 | allow_redirects=False, 366 | stream=True 367 | ) 368 | 369 | # 处理HTTP重定向 370 | if 300 <= response.status_code < 400 and 'Location' in response.headers: 371 | next_url = response.headers['Location'].lower() 372 | next_url = urljoin(current_url, next_url) 373 | # 对重定向的URL也应用映射规则 374 | next_url = apply_domain_mapping(next_url) 375 | logger.info(f"HTTP重定向: {current_url} -> {next_url}") 376 | current_url = next_url 377 | redirect_history.append(current_url) 378 | redirect_count += 1 379 | continue 380 | 381 | # 处理JS跳转 382 | if response.status_code == 200 and 'text/html' in response.headers.get('Content-Type', ''): 383 | content = response.raw.read(8192).decode('utf-8', errors='ignore') 384 | 385 | for pattern in REDIRECT_CONFIG['js_redirect_patterns']: 386 | match = re.search(pattern, content, re.IGNORECASE) 387 | if match: 388 | js_redirect_url = match.group(1).lower() 389 | js_redirect_url = urljoin(current_url, js_redirect_url) 390 | # 对JS跳转的URL也应用映射规则 391 | js_redirect_url = apply_domain_mapping(js_redirect_url) 392 | logger.info(f"JS跳转检测: {current_url} -> {js_redirect_url}") 393 | current_url = js_redirect_url 394 | redirect_history.append(current_url) 395 | redirect_count += 1 396 | response.close() 397 | break 398 | else: 399 | return current_url, response.status_code, f"最终URL,经过{redirect_count}次跳转", redirect_history 400 | continue 401 | 402 | return current_url, response.status_code, f"最终URL,经过{redirect_count}次跳转", redirect_history 403 | 404 | except requests.exceptions.SSLError: 405 | return current_url, 495, "HTTPS证书错误", redirect_history 406 | except Exception as e: 407 | return current_url, 500, f"请求错误: {str(e)}", redirect_history 408 | 409 | return current_url, 302, f"达到最大跳转次数 ({REDIRECT_CONFIG['max_redirects']})", redirect_history 410 | 411 | 412 | def check_url_accessibility(url: str) -> Tuple[bool, Optional[str], Optional[str], Optional[str]]: 413 | """检查URL可访问性并处理跳转,返回最终小写URL""" 414 | try: 415 | url = url.lower() 416 | # 应用域名映射 417 | url = apply_domain_mapping(url) 418 | 419 | if url.startswith('http://'): 420 | url = url.replace('http://', 'https://') 421 | 422 | final_url, status_code, status_msg, redirect_history = follow_redirects(url) 423 | preferred_url = get_preferred_url(url, redirect_history) 424 | logger.info(f"URL跳转跟踪结果: {preferred_url} (状态码: {status_code}, {status_msg})") 425 | 426 | if status_code == 495 or status_code >= 500: 427 | return False, f"URL访问失败: {status_msg} (状态码: {status_code})", url, None 428 | 429 | is_acceptable, reason = is_url_acceptable(preferred_url) 430 | if not is_acceptable: 431 | return False, f"URL不符合处理条件: {reason}", url, None 432 | 433 | processed_url, error = validate_and_process_url(preferred_url) 434 | if not processed_url: 435 | return False, f"URL格式验证失败: {error}", url, None 436 | 437 | normalized = normalize_url(processed_url) 438 | return True, None, processed_url, normalized 439 | 440 | except Exception as e: 441 | normalized = normalize_url(url.lower()) 442 | return False, f"URL处理异常: {str(e)[:20]}", url, normalized 443 | 444 | 445 | # ============================== 网站信息处理函数 ============================== 446 | def is_valid_text(text: str) -> bool: 447 | """检查文本是否有效(不是乱码),兼容中文、英文和俄文""" 448 | if not text or not text.strip(): 449 | return False 450 | 451 | text_clean = re.sub(r'[\x00-\x1F\x7F]', '', text) 452 | if not text_clean: 453 | return False 454 | 455 | valid_chars = re.findall( 456 | r'[\u4e00-\u9fa5\u0400-\u04FFa-zA-Z0-9,。,.;:!?()()《》“”‘’«»\s]', 457 | text_clean 458 | ) 459 | 460 | return len(valid_chars) / len(text_clean) > 0.5 461 | 462 | 463 | def clean_html_entities(text: str) -> str: 464 | """清理HTML实体编码,保留单引号转换""" 465 | text = text.replace(''', "'") 466 | return re.sub(r'&#x[0-9a-fA-F]+;', '', text) 467 | 468 | 469 | def fetch_api(api, url: str) -> Optional[Dict[str, str]]: 470 | """调用API获取网站标题和描述""" 471 | try: 472 | encoded_url = quote(url) 473 | api_url = api['url_template'].format(encoded_url) 474 | 475 | response = requests.get( 476 | api_url, 477 | headers=HTTP_CONFIG['headers'], 478 | timeout=HTTP_CONFIG['timeout'], 479 | allow_redirects=False 480 | ) 481 | response.raise_for_status() 482 | return api['parse_func'](response.json()) 483 | 484 | except Exception: 485 | return None 486 | 487 | 488 | def fetch_website_info(url: str) -> Optional[Dict[str, str]]: 489 | """通过多个API获取网站标题和描述""" 490 | if not url: 491 | return None 492 | 493 | invalid_values = {"null", "暂无标题", "暂无描述"} 494 | 495 | api_list = [ 496 | { 497 | "name": "amogu", 498 | "url_template": "https://api.amogu.cn/api/tdk?url={}", 499 | "parse_func": lambda data: { 500 | "title": title, 501 | "description": desc 502 | } if data.get('code') == 1 503 | and (data_dict := data.get('data', {})) 504 | and (desc := data_dict.get('description', '')) 505 | and (title := data_dict.get('title', '')) 506 | and (title.strip() and title not in invalid_values or 507 | desc.strip() and desc not in invalid_values) 508 | and (not desc.strip() or is_valid_text(desc)) 509 | else None 510 | }, 511 | { 512 | "name": "shanhe", 513 | "url_template": "https://shanhe.kim/api/wz/web_tdk.php?url={}", 514 | "parse_func": lambda data: { 515 | "title": title, 516 | "description": desc 517 | } if (desc := data.get('description', '')) 518 | and (title := data.get('title', '')) 519 | and data.get('code') == 1 520 | and (title.strip() and title not in invalid_values or 521 | desc.strip() and desc not in invalid_values) 522 | and (not desc.strip() or is_valid_text(desc)) 523 | else None 524 | }, 525 | { 526 | "name": "suol", 527 | "url_template": "https://api.suol.cc/v1/zs_wzxx.php?url={}", 528 | "parse_func": lambda data: { 529 | "title": title, 530 | "description": desc 531 | } if (desc := data.get('description', '')) 532 | and (title := data.get('title', '')) 533 | and data.get('code') == 1 534 | and (title.strip() and title not in invalid_values or 535 | desc.strip() and desc not in invalid_values) 536 | and (not desc.strip() or is_valid_text(desc)) 537 | else None 538 | }, 539 | { 540 | "name": "ahfi", 541 | "url_template": "https://api.ahfi.cn/api/websiteinfo?url={}", 542 | "parse_func": lambda data: { 543 | "title": title, 544 | "description": desc 545 | } if (desc := data.get('data', {}).get('description', '')) 546 | and (title := data.get('data', {}).get('title', '')) 547 | and (title.strip() and title not in invalid_values or 548 | desc.strip() and desc not in invalid_values) 549 | and (not desc.strip() or is_valid_text(desc)) 550 | else None 551 | } 552 | ] 553 | 554 | for api in api_list: 555 | try: 556 | if website_info := fetch_api(api, url): 557 | cleaned_title = website_info["title"].strip().replace('\n', ' ').replace('\r', ' ') 558 | cleaned_desc = website_info["description"].strip().replace('\n', ' ').replace('\r', ' ') 559 | return { 560 | "title": cleaned_title, 561 | "description": cleaned_desc 562 | } 563 | except Exception as e: 564 | logger.warning(f"{api['name']} API失败: {str(e)}") 565 | continue 566 | 567 | return None 568 | 569 | 570 | def ask_openai(question: str) -> Optional[Dict[str, str]]: 571 | """调用AI接口生成标题、描述和分类(带重试机制)""" 572 | system_msg: ChatCompletionSystemMessageParam = { 573 | "role": "system", 574 | "content": "我会给你一个网址、网站标题和网站描述,帮我生成网站收藏的标题、中文描述和分类。" 575 | "1. 标题要求简短最好一个词,优先从我给你的标题中取,不要翻译;" 576 | "2. 描述长度控制在120字符内,尽量精简,不要有多余空格,末尾不要带标点;" 577 | "3. 分类必须从以下选项中选择一个:" + str(AI_CATEGORY_OPTIONS) + ",如果未找到则返回 其他;" 578 | "返回给我包含三个字段 title、description、category 的JSON格式。" 579 | } 580 | 581 | user_msg: ChatCompletionUserMessageParam = { 582 | "role": "user", 583 | "content": question 584 | } 585 | 586 | messages: List[ChatCompletionMessageParam] = [system_msg, user_msg] 587 | 588 | for attempt in range(1, AI_CONFIG["max_retries"] + 1): 589 | try: 590 | response = ai_client.chat.completions.create( 591 | model=AI_CONFIG["model"], 592 | messages=messages, 593 | temperature=AI_CONFIG["temperature"], 594 | max_tokens=AI_CONFIG["max_tokens"] 595 | ) 596 | 597 | result = response.choices[0].message.content.strip() 598 | if result == "不知道": 599 | return None 600 | 601 | result = re.sub(r'^```json\s*', '', result) 602 | result = re.sub(r'\s*```$', '', result) 603 | 604 | json_result = json.loads(result) 605 | if "title" in json_result and "description" in json_result and "category" in json_result: 606 | return { 607 | "title": json_result["title"].strip(), 608 | "description": json_result["description"].strip(), 609 | "category": json_result["category"].strip() 610 | } 611 | return None 612 | 613 | except json.JSONDecodeError: 614 | logger.warning(f"AI返回的不是有效的JSON: {result}") 615 | if attempt < AI_CONFIG["max_retries"]: 616 | time.sleep(AI_CONFIG["retry_delay"] * attempt) 617 | continue 618 | return None 619 | except Exception as e: 620 | logger.warning(f"AI调用失败 (尝试 {attempt}/{AI_CONFIG['max_retries']}): {str(e)}") 621 | if attempt < AI_CONFIG["max_retries"]: 622 | time.sleep(AI_CONFIG["retry_delay"] * attempt) 623 | 624 | logger.error(f"AI调用超过最大重试次数 ({AI_CONFIG['max_retries']}次),放弃请求") 625 | return None 626 | 627 | 628 | def clean_website_info(url: str, original_title: str = "", original_desc: str = "") -> Optional[Dict[str, str]]: 629 | """清理并优化网站标题、描述和分类(结合API和AI)""" 630 | invalid_values = {"null", "暂无标题", "暂无描述"} 631 | 632 | cleaned_original_title = clean_html_entities(original_title).strip() if original_title else "" 633 | cleaned_original_desc = clean_html_entities(original_desc).strip() if original_desc else "" 634 | 635 | if cleaned_original_title in invalid_values: 636 | cleaned_original_title = "" 637 | if cleaned_original_desc in invalid_values: 638 | cleaned_original_desc = "" 639 | 640 | # 尝试通过API获取信息 641 | api_info = fetch_website_info(url) 642 | domain = extract_domain(url) 643 | 644 | # 处理API获取到的信息 645 | if api_info: 646 | api_title = api_info["title"] if api_info["title"] not in invalid_values else "" 647 | api_desc = api_info["description"] if api_info["description"] not in invalid_values else "" 648 | 649 | if api_title or api_desc: 650 | if ai_info := ask_openai(f"网址:{domain}\n网站标题:{api_title}\n网站描述:{api_desc}"): 651 | if re.search(r'[\u0400-\u04FF]', ai_info["title"]): 652 | logger.warning(f"AI生成俄文标题,丢弃URL: {url}") 653 | return None 654 | # 验证分类是否有效 655 | if ai_info["category"] not in AI_CATEGORY_OPTIONS: 656 | logger.warning(f"AI返回无效分类 {ai_info['category']},使用默认分类") 657 | ai_info["category"] = "其他" 658 | return ai_info 659 | logger.warning(f"API获取到信息但都无效,丢弃URL: {url}") 660 | return None 661 | 662 | # 白名单域名直接调用AI 663 | if is_domain_whitelisted(url): 664 | prompt = f"网址:{domain}" 665 | if cleaned_original_title: 666 | prompt += f"\n网站标题:{cleaned_original_title}" 667 | if cleaned_original_desc: 668 | prompt += f"\n网站描述:{cleaned_original_desc}" 669 | 670 | if ai_info := ask_openai(prompt): 671 | if re.search(r'[\u0400-\u04FF]', ai_info["title"]): 672 | logger.warning(f"AI生成俄文标题,丢弃URL: {url}") 673 | return None 674 | if ai_info["category"] not in AI_CATEGORY_OPTIONS: 675 | logger.warning(f"AI返回无效分类 {ai_info['category']},使用默认分类") 676 | ai_info["category"] = "其他" 677 | return ai_info 678 | logger.warning(f"白名单域名但AI调用失败,丢弃URL: {url}") 679 | return None 680 | 681 | # 只要有原始标题或描述,就尝试使用AI处理 682 | if cleaned_original_title or cleaned_original_desc: 683 | prompt = f"网址:{domain}" 684 | if cleaned_original_title: 685 | prompt += f"\n网站标题:{cleaned_original_title}" 686 | if cleaned_original_desc: 687 | prompt += f"\n网站描述:{cleaned_original_desc}" 688 | 689 | if ai_info := ask_openai(prompt): 690 | if re.search(r'[\u0400-\u04FF]', ai_info["title"]): 691 | logger.warning(f"AI生成俄文标题,丢弃URL: {url}") 692 | return None 693 | if ai_info["category"] not in AI_CATEGORY_OPTIONS: 694 | logger.warning(f"AI返回无效分类 {ai_info['category']},使用默认分类") 695 | ai_info["category"] = "其他" 696 | return ai_info 697 | logger.warning(f"有原始信息但AI调用失败,丢弃URL: {url}") 698 | return None 699 | 700 | return None 701 | 702 | 703 | # ============================== 图像处理函数 ============================== 704 | def compress_svg(svg_content: str) -> str: 705 | """压缩SVG内容,移除注释和多余空格""" 706 | try: 707 | svg_content = re.sub(r'', '', svg_content, flags=re.DOTALL) 708 | lines = [] 709 | for line in svg_content.split('\n'): 710 | line = line.strip() 711 | if line: 712 | lines.append(' '.join(line.split())) 713 | return ''.join(lines) 714 | except Exception as e: 715 | logger.error(f"SVG压缩失败: {e}") 716 | return svg_content 717 | 718 | 719 | def image_to_svg(img_response: requests.Response) -> str: 720 | """将图片转换为SVG格式""" 721 | try: 722 | img = Image.open(io.BytesIO(img_response.content)) 723 | img_base64 = b64encode(img_response.content).decode('utf-8') 724 | 725 | content_type = img_response.headers.get('Content-Type', 'png') 726 | img_format = content_type.split('/')[-1].lower() 727 | if img_format not in ['png', 'jpeg', 'jpg', 'gif']: 728 | img_format = 'png' 729 | 730 | svg_template = """ 731 | 732 | """ 733 | 734 | return compress_svg(svg_template.format( 735 | img.width, img.height, img_format, img_base64, img.width, img.height 736 | )) 737 | except Exception as e: 738 | logger.error(f"图片转换失败: {e}") 739 | raise ValueError(f"图片转换失败: {e}") 740 | 741 | 742 | def validate_svg(svg_content: str) -> bool: 743 | """验证SVG内容有效性""" 744 | return all(tag in svg_content for tag in ['', ' Tuple[bool, str]: 748 | """下载并保存图片(带重试机制)""" 749 | for attempt in range(1, MAX_IMAGE_RETRIES + 1): 750 | try: 751 | img_src = img_src.lower() 752 | if img_src.startswith('http://'): 753 | img_src = img_src.replace('http://', 'https://') 754 | 755 | log_msg = f"重试下载图片 (尝试 {attempt}/{MAX_IMAGE_RETRIES}): {img_src}" if attempt > 1 else f"开始下载图片: {img_src}" 756 | logger.info(log_msg) 757 | 758 | img_response = requests.get( 759 | img_src, 760 | headers=HTTP_CONFIG['headers'], 761 | timeout=HTTP_CONFIG['timeout'], 762 | allow_redirects=False 763 | ) 764 | img_response.raise_for_status() 765 | 766 | file_path = os.path.join(ICON_DIRECTORY, filename) 767 | 768 | # 处理SVG文件 769 | if img_src.endswith('.svg'): 770 | svg_content = compress_svg(img_response.text) 771 | with open(file_path, 'w', encoding='utf-8') as f: 772 | f.write(svg_content) 773 | return True, "SVG已压缩保存" 774 | 775 | # 处理其他图片格式 776 | svg_content = image_to_svg(img_response) 777 | if validate_svg(svg_content): 778 | with open(file_path, 'w', encoding='utf-8') as f: 779 | f.write(svg_content) 780 | return True, "已转换为SVG" 781 | 782 | logger.warning(f"生成的SVG文件无效: {filename}") 783 | if attempt == MAX_IMAGE_RETRIES: 784 | return False, "生成的SVG文件无效" 785 | time.sleep(INITIAL_RETRY_DELAY * attempt) 786 | 787 | except Exception as e: 788 | error_msg = f"图片下载失败 (尝试 {attempt}/{MAX_IMAGE_RETRIES}) {img_src}: {str(e)}" 789 | logger.warning(error_msg) 790 | 791 | if attempt < MAX_IMAGE_RETRIES: 792 | time.sleep(INITIAL_RETRY_DELAY * attempt) 793 | 794 | return False, f"超过最大重试次数 ({MAX_IMAGE_RETRIES}次)" 795 | 796 | 797 | # ============================== 文件操作函数 ============================== 798 | def clear_directory(directory: str) -> None: 799 | """清理目录(删除所有文件),如果目录不存在则创建""" 800 | if os.path.exists(directory): 801 | for filename in os.listdir(directory): 802 | file_path = os.path.join(directory, filename) 803 | try: 804 | if os.path.isfile(file_path): 805 | os.unlink(file_path) 806 | except Exception as e: 807 | logger.error(f"无法删除 {file_path}: {e}") 808 | else: 809 | os.makedirs(directory) 810 | logger.info(f"创建图标存储目录: {directory}") 811 | 812 | 813 | def save_file(content: str, file_path: str) -> None: 814 | """保存内容到文件""" 815 | try: 816 | with open(file_path, 'w', encoding='utf-8') as f: 817 | f.write(content) 818 | except Exception as e: 819 | logger.error(f"保存文件失败: {file_path}, 错误: {e}") 820 | 821 | 822 | # ============================== 数据处理辅助函数 ============================== 823 | def generate_filename( 824 | url: str, 825 | processed_domains: Dict[str, Dict[str, str]], 826 | processed_data: List[WebsiteData], 827 | lock: threading.Lock 828 | ) -> Tuple[Optional[str], Optional[str]]: 829 | """生成唯一的图标文件名,处理域名冲突""" 830 | url_without_slash = url.rstrip('/').lower() 831 | ext = extract(url_without_slash) 832 | 833 | subdomain = ext.subdomain.lower() 834 | main_domain = ext.domain.lower() 835 | suffix = ext.suffix.lower() 836 | 837 | is_www = subdomain == "www" 838 | is_main_domain = subdomain in ("", "www") 839 | base_key = main_domain if is_main_domain else f"{subdomain}-{main_domain}" 840 | 841 | with lock: 842 | if is_main_domain: 843 | if base_key in processed_domains: 844 | existing = processed_domains[base_key] 845 | 846 | # 已存在www版本,丢弃非www版本 847 | if existing.get("is_www", False) and not is_www: 848 | return None, f"主域名{main_domain}已存在www前缀版本,当前域名被丢弃" 849 | 850 | # 后缀相同,使用现有文件名 851 | if existing["suffix"] == suffix: 852 | return existing["filename"], None 853 | 854 | # 处理不同后缀的情况 855 | old_filename = existing["filename"] 856 | old_suffix = existing["suffix"] 857 | new_existing_filename = f"{main_domain}-{old_suffix}.svg" 858 | 859 | # 重命名现有文件 860 | try: 861 | old_path = os.path.join(ICON_DIRECTORY, old_filename) 862 | new_path = os.path.join(ICON_DIRECTORY, new_existing_filename) 863 | os.rename(old_path, new_path) 864 | except Exception as e: 865 | logger.error(f"重命名文件失败: {old_filename} → {new_existing_filename}, 错误: {e}") 866 | return None, "文件重命名失败" 867 | 868 | # 更新已有数据的文件名 869 | for item in processed_data: 870 | if item.local_filename == old_filename: 871 | item.local_filename = new_existing_filename 872 | break 873 | 874 | # 记录新文件名 875 | existing["filename"] = new_existing_filename 876 | existing["suffix"] = old_suffix 877 | new_filename = f"{main_domain}-{suffix}.svg" 878 | processed_domains[base_key] = { 879 | "filename": new_filename, 880 | "suffix": suffix, 881 | "is_www": is_www, 882 | "base_key": base_key 883 | } 884 | return new_filename, f"主域名相同但后缀不同,新文件名为{new_filename}" 885 | 886 | # 首次处理主域名 887 | filename = f"{main_domain}.svg" 888 | processed_domains[base_key] = { 889 | "filename": filename, 890 | "suffix": suffix, 891 | "is_www": is_www, 892 | "base_key": base_key 893 | } 894 | return filename, f"首次处理主域名,文件名为{filename}" 895 | 896 | else: 897 | # 非主域名情况,处理子域名 898 | if base_key in processed_domains: 899 | existing = processed_domains[base_key] 900 | if existing["suffix"] == suffix: 901 | return existing["filename"], None 902 | 903 | # 不同后缀的子域名 904 | new_filename = f"{subdomain}-{main_domain}-{suffix}.svg" 905 | processed_domains[base_key] = { 906 | "filename": new_filename, 907 | "suffix": suffix, 908 | "is_www": False, 909 | "base_key": base_key 910 | } 911 | return new_filename, f"子域名,前缀相同但后缀不同,文件名为{new_filename}" 912 | 913 | # 首次处理子域名 914 | filename = f"{subdomain}-{main_domain}.svg" 915 | processed_domains[base_key] = { 916 | "filename": filename, 917 | "suffix": suffix, 918 | "is_www": False, 919 | "base_key": base_key 920 | } 921 | return filename, f"首次处理子域名 {subdomain},文件名为{filename}" 922 | 923 | 924 | def expand_color_format(color: str) -> str: 925 | """标准化颜色格式(3位HEX转6位)""" 926 | if not color: 927 | return '' 928 | 929 | if not color.startswith('#'): 930 | return color 931 | 932 | color = color.lstrip('#') 933 | if len(color) == 3: 934 | return f"#{color[0]}{color[0]}{color[1]}{color[1]}{color[2]}{color[2]}" 935 | elif len(color) == 6: 936 | return f"#{color}" 937 | return color 938 | 939 | 940 | # ============================== 核心处理函数 ============================== 941 | def process_url( 942 | item, 943 | processed_normalized_urls: Set[str], 944 | processed_domains: Dict[str, Dict[str, str]], 945 | processed_data: List[WebsiteData], 946 | lock: threading.Lock 947 | ): 948 | """处理单个URL,包括验证、去重、标题描述生成和图标下载""" 949 | original_title = item.get('name', '').strip() 950 | url = item.get('url', '').lower() 951 | # 对原始URL应用域名映射 952 | url = apply_domain_mapping(url) 953 | img_src = item.get('imgSrc', '').lower() 954 | background_color = item.get('backgroundColor', '') 955 | original_desc = item.get('description', '') 956 | 957 | if not url: 958 | logger.warning("丢弃url为空的条目") 959 | return 960 | 961 | if not background_color: 962 | logger.warning(f"丢弃颜色为空的条目: {url}") 963 | return 964 | 965 | # 检查URL可访问性和跳转处理 966 | accessible, error, final_url, normalized_url = check_url_accessibility(url) 967 | if not accessible: 968 | logger.warning(f"不可处理URL: {url} - {error}") 969 | return 970 | 971 | if not normalized_url: 972 | logger.warning(f"无法标准化URL: {final_url}") 973 | return 974 | 975 | # 检查重复URL 976 | with lock: 977 | if normalized_url in processed_normalized_urls: 978 | return 979 | 980 | # 处理标题、描述和分类 981 | website_info = clean_website_info(final_url, original_title, original_desc) 982 | if not website_info: 983 | logger.warning(f"无法生成有效的标题、描述和分类,丢弃URL: {final_url}") 984 | return 985 | 986 | # 处理颜色 987 | expanded_color = expand_color_format(background_color) 988 | if not expanded_color: 989 | logger.warning(f"丢弃扩展后颜色为空的条目: {url}") 990 | return 991 | 992 | # 获取分类ID 993 | category = website_info["category"] 994 | category_id = CATEGORY_IDS.get(category, 14) # 默认14为"其他" 995 | 996 | # 生成文件名 997 | filename, conflict_msg = generate_filename(final_url, processed_domains, processed_data, lock) 998 | if filename is None: 999 | logger.info(f"URL被丢弃: {final_url} - {conflict_msg}") 1000 | return 1001 | 1002 | # 下载并保存图片 1003 | success, status = download_and_save_image(img_src, filename) 1004 | if not success: 1005 | logger.warning(f"图片最终下载失败,丢弃条目: {url} - {status}") 1006 | return 1007 | 1008 | # 保存处理结果 1009 | with lock: 1010 | processed_normalized_urls.add(normalized_url) 1011 | domain = extract(final_url.rstrip('/').lower()) 1012 | processed_domains[domain.domain] = { 1013 | 'suffix': domain.suffix, 1014 | 'filename': filename 1015 | } 1016 | processed_data.append(WebsiteData( 1017 | name=website_info["title"], 1018 | url=final_url, 1019 | description=website_info["description"], 1020 | img_src=img_src, 1021 | local_filename=filename, 1022 | category=category, 1023 | category_id=category_id, 1024 | background_color=expanded_color 1025 | )) 1026 | 1027 | 1028 | def process_category(category: str, url_queue: list, lock: threading.Lock, seen_urls: Set[str]): 1029 | """获取指定分类的所有URL并加入处理队列,并检查是否已存在""" 1030 | logger.info(f"开始获取分类[{category}]的URL") 1031 | base_url = 'https://api.codelife.cc/website/list' 1032 | lang = 'zh' 1033 | name = '' 1034 | source = 'itab' 1035 | page = 1 1036 | 1037 | while True: 1038 | full_url = f"{base_url}?lang={lang}&type={category}&page={page}&name={name}&source={source}" 1039 | try: 1040 | response = requests.get( 1041 | full_url, 1042 | headers=HTTP_CONFIG['headers'], 1043 | timeout=HTTP_CONFIG['timeout'], 1044 | allow_redirects=False 1045 | ) 1046 | response.raise_for_status() 1047 | data = response.json() 1048 | 1049 | # 没有更多数据时退出 1050 | if not data.get('data', []): 1051 | logger.info(f"分类[{category}]的URL获取完成") 1052 | break 1053 | 1054 | # 添加到队列,将URL转为小写,并检查是否已存在 1055 | with lock: 1056 | new_items_count = 0 1057 | for item in data['data']: 1058 | url = item.get('url', '').lower() 1059 | # 对获取到的URL应用域名映射 1060 | url = apply_domain_mapping(url) 1061 | 1062 | if url.startswith('http://'): 1063 | url = url.replace('http://', 'https://') 1064 | 1065 | # 标准化URL用于去重检查 1066 | normalized_url = normalize_url(url) 1067 | 1068 | # 检查URL是否已存在,不存在才添加 1069 | if normalized_url not in seen_urls: 1070 | seen_urls.add(normalized_url) 1071 | item['url'] = url 1072 | item['imgSrc'] = item.get('imgSrc', '').lower() 1073 | url_queue.append(item) 1074 | new_items_count += 1 1075 | 1076 | logger.info(f"分类[{category}]第{page}页添加了{new_items_count}个新URL") 1077 | 1078 | page += 1 1079 | time.sleep(1 + random.uniform(0, 1)) # 随机延迟避免请求过于频繁 1080 | 1081 | except Exception as e: 1082 | logger.error(f"分类[{category}]第{page}页URL获取失败: {e}") 1083 | page += 1 1084 | time.sleep(2 + random.uniform(0, 1)) 1085 | 1086 | 1087 | def generate_json_data(websites: List[WebsiteData]) -> str: 1088 | """生成JSON数据""" 1089 | # 转换为字典列表 1090 | websites_dict = [asdict(website) for website in websites] 1091 | # 按分类ID和名称排序 1092 | websites_dict.sort(key=lambda x: (x['category_id'], x['name'])) 1093 | # 转换为JSON 1094 | return json.dumps(websites_dict, ensure_ascii=False, indent=2) 1095 | 1096 | 1097 | # ============================== 主函数 ============================== 1098 | def main() -> None: 1099 | """程序主入口""" 1100 | logger.info("\n" + "=" * 60) 1101 | logger.info("开始执行mTab多分类网站书签导出工具") 1102 | logger.info("=" * 60 + "\n") 1103 | 1104 | # 显示配置信息 1105 | logger.info(f"URL处理线程数量: {MAX_WORKERS}") 1106 | logger.info(f"AI模型: {AI_CONFIG['model']}") 1107 | logger.info(f"AI API密钥环境变量: {AI_CONFIG['api_key_env_var']}") 1108 | logger.info(f"AI最大重试次数: {AI_CONFIG['max_retries']}") 1109 | logger.info(f"最大URL跳转次数: {REDIRECT_CONFIG['max_redirects']}") 1110 | logger.info(f"图片下载最大重试次数: {MAX_IMAGE_RETRIES}") 1111 | logger.info(f"处理分类数量: {len(CATEGORIES)}") 1112 | logger.info(f"处理分类顺序: {CATEGORIES}") 1113 | logger.info(f"域名黑名单数量: {len(DOMAIN_BLACKLIST)}") 1114 | logger.info(f"域名映射规则数量: {len(DOMAIN_MAPPING)}") 1115 | logger.info("\n") 1116 | 1117 | # 初始化数据结构 1118 | clear_directory(ICON_DIRECTORY) 1119 | processed_data: List[WebsiteData] = [] 1120 | processed_normalized_urls: Set[str] = set() 1121 | processed_domains: Dict[str, Dict[str, str]] = {} 1122 | url_queue = [] # 存储所有待处理的URL任务 1123 | queue_lock = threading.Lock() # 队列操作锁 1124 | data_lock = threading.Lock() # 数据操作锁 1125 | seen_urls = set() # 用于跟踪已获取的URL,确保去重 1126 | 1127 | # 第一步:多线程获取所有分类的URL(按原始顺序) 1128 | logger.info("===== 开始收集所有分类的URL =====") 1129 | with ThreadPoolExecutor(max_workers=min(len(CATEGORIES), 2)) as category_executor: 1130 | futures = [ 1131 | category_executor.submit(process_category, category, url_queue, queue_lock, seen_urls) 1132 | for category in CATEGORIES 1133 | ] 1134 | for future in as_completed(futures): 1135 | try: 1136 | future.result() 1137 | except Exception as e: 1138 | logger.error(f"分类URL获取线程出错: {e}") 1139 | 1140 | logger.info(f"\n共收集到 {len(url_queue)} 个不重复的URL待处理\n") 1141 | 1142 | # 第二步:多线程处理所有URL 1143 | logger.info("===== 开始多线程处理URL =====") 1144 | with ThreadPoolExecutor(max_workers=MAX_WORKERS) as url_executor: 1145 | pbar = tqdm(total=len(url_queue), desc="处理URL进度") 1146 | 1147 | def process_with_progress(item): 1148 | process_url(item, processed_normalized_urls, processed_domains, processed_data, data_lock) 1149 | pbar.update(1) 1150 | 1151 | futures = [ 1152 | url_executor.submit(process_with_progress, item) 1153 | for item in url_queue 1154 | ] 1155 | 1156 | for future in as_completed(futures): 1157 | try: 1158 | future.result() 1159 | except Exception as e: 1160 | logger.error(f"URL处理线程出错: {e}") 1161 | 1162 | pbar.close() 1163 | 1164 | # 后续处理与结果展示 1165 | logger.info("\n" + "=" * 60) 1166 | logger.info(f"所有URL处理完成,共获取 {len(processed_data)} 条不重复数据") 1167 | logger.info("=" * 60 + "\n") 1168 | 1169 | if processed_data: 1170 | # 分类统计 1171 | print("\n按分类统计:") 1172 | category_counts = Counter(item.category for item in processed_data) 1173 | for cat, count in category_counts.items(): 1174 | print(f"- {cat}: {count} 条") 1175 | 1176 | # 数据示例 1177 | print("\n前5条数据示例:") 1178 | for i, item in enumerate(processed_data[:5], 1): 1179 | print(f"{i}. [{item.category}] {item.name}") 1180 | print(f" URL: {item.url}") 1181 | print(f" 描述: {item.description}") 1182 | print(f" 分类ID: {item.category_id}") 1183 | print(f" 本地文件: {item.local_filename}") 1184 | print(f" 背景颜色: {item.background_color}\n") 1185 | 1186 | # 生成JSON文件 1187 | json_content = generate_json_data(processed_data) 1188 | save_file(json_content, JSON_OUTPUT_FILE) 1189 | print(f"JSON数据文件已生成: {JSON_OUTPUT_FILE}") 1190 | else: 1191 | logger.warning("未处理任何数据") 1192 | 1193 | logger.info("\n" + "=" * 60) 1194 | logger.info("mTab多分类网站书签导出工具执行完成") 1195 | logger.info("=" * 60 + "\n") 1196 | 1197 | 1198 | if __name__ == "__main__": 1199 | main() 1200 | --------------------------------------------------------------------------------