├── api
└── __init__.py
├── utils
├── __init__.py
├── constants.py
├── logger.py
├── types.py
├── anti_risk_manager.py
├── config_manager.py
├── fetcher.py
└── csv_manager.py
├── webui
├── __init__.py
├── static
│ ├── favicon.ico
│ └── css
│ │ └── custom.css
└── templates
│ ├── base.html
│ ├── index.html
│ ├── config.html
│ └── tasks.html
├── requirements.txt
├── pictures
├── p1.png
├── p3.png
├── p4.png
├── Logo.ico
├── Logo.png
├── logo2.ico
├── logo2.png
├── p2-1.png
├── p2-2.png
├── example-1.png
├── example-2.png
├── example-3.png
└── example-4.png
├── webui_requirements.txt
├── LICENSE.txt
├── .gitignore
├── start_webui.py
├── README_ZH.md
├── tools
└── dir_tree_size.py
├── main.py
├── flatten.py
├── README.md
└── extractors.py
/api/__init__.py:
--------------------------------------------------------------------------------
1 | # API package
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Utils package
--------------------------------------------------------------------------------
/webui/__init__.py:
--------------------------------------------------------------------------------
1 | # WebUI Package
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | httpx>=0.24.0
2 | asyncio
3 | PyYAML>=6.0
4 | psutil>=5.8.0
--------------------------------------------------------------------------------
/pictures/p1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/p1.png
--------------------------------------------------------------------------------
/pictures/p3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/p3.png
--------------------------------------------------------------------------------
/pictures/p4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/p4.png
--------------------------------------------------------------------------------
/pictures/Logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/Logo.ico
--------------------------------------------------------------------------------
/pictures/Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/Logo.png
--------------------------------------------------------------------------------
/pictures/logo2.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/logo2.ico
--------------------------------------------------------------------------------
/pictures/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/logo2.png
--------------------------------------------------------------------------------
/pictures/p2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/p2-1.png
--------------------------------------------------------------------------------
/pictures/p2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/p2-2.png
--------------------------------------------------------------------------------
/webui_requirements.txt:
--------------------------------------------------------------------------------
1 | flask>=2.3.0
2 | flask-socketio>=5.3.0
3 | eventlet>=0.33.0
4 | PyYAML>=6.0
--------------------------------------------------------------------------------
/pictures/example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/example-1.png
--------------------------------------------------------------------------------
/pictures/example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/example-2.png
--------------------------------------------------------------------------------
/pictures/example-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/example-3.png
--------------------------------------------------------------------------------
/pictures/example-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/pictures/example-4.png
--------------------------------------------------------------------------------
/webui/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dongshuyan/BiliSyncer/HEAD/webui/static/favicon.ico
--------------------------------------------------------------------------------
/utils/constants.py:
--------------------------------------------------------------------------------
1 | """
2 | 通用常量
3 | """
4 |
5 | TASK_FOLDER_PREFIXES = [
6 | "投稿视频-",
7 | "番剧-",
8 | "收藏夹-",
9 | "视频列表-",
10 | "视频合集-",
11 | "UP主-",
12 | "稍后再看-",
13 | "课程-",
14 | ]
15 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 [Your Name]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/webui/static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* 自定义样式 */
2 | .fade-in {
3 | animation: fadeIn 0.5s ease-in;
4 | }
5 |
6 | @keyframes fadeIn {
7 | from { opacity: 0; }
8 | to { opacity: 1; }
9 | }
10 |
11 | .pulse-animation {
12 | animation: pulse 2s infinite;
13 | }
14 |
15 | @keyframes pulse {
16 | 0% { transform: scale(1); }
17 | 50% { transform: scale(1.05); }
18 | 100% { transform: scale(1); }
19 | }
20 |
21 | /* 任务卡片动画 */
22 | .task-card {
23 | transition: all 0.3s ease;
24 | }
25 |
26 | .task-card:hover {
27 | transform: translateY(-2px);
28 | box-shadow: 0 4px 12px rgba(0,0,0,0.15);
29 | }
30 |
31 | /* 进度条动画 */
32 | .progress-bar {
33 | transition: width 0.6s ease;
34 | }
35 |
36 | /* 日志容器滚动条样式 */
37 | .log-container::-webkit-scrollbar {
38 | width: 8px;
39 | }
40 |
41 | .log-container::-webkit-scrollbar-track {
42 | background: #2d2d2d;
43 | }
44 |
45 | .log-container::-webkit-scrollbar-thumb {
46 | background: #666;
47 | border-radius: 4px;
48 | }
49 |
50 | .log-container::-webkit-scrollbar-thumb:hover {
51 | background: #888;
52 | }
53 |
54 | /* 响应式调整 */
55 | @media (max-width: 768px) {
56 | .sidebar {
57 | display: none;
58 | }
59 |
60 | .main-content {
61 | margin-left: 0 !important;
62 | }
63 | }
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | """
2 | 简化版日志模块
3 | """
4 |
5 | import sys
6 | from datetime import datetime
7 | from typing import Literal, Callable, Optional
8 |
9 | LogLevel = Literal["INFO", "WARNING", "ERROR", "DEBUG"]
10 |
11 |
12 | class Logger:
13 | """简化版日志器"""
14 |
15 | # 全局回调函数,用于WebUI等场景
16 | _callback: Optional[Callable] = None
17 |
18 | @classmethod
19 | def set_callback(cls, callback: Optional[Callable]):
20 | """设置日志回调函数"""
21 | cls._callback = callback
22 |
23 | @classmethod
24 | def _send_to_callback(cls, level: str, message: str, category: Optional[str] = None):
25 | """发送日志到回调函数"""
26 | if cls._callback:
27 | try:
28 | cls._callback(level, message, category)
29 | except Exception:
30 | pass # 忽略回调错误,避免影响主程序
31 |
32 | @staticmethod
33 | def _format_message(level: LogLevel, message: str) -> str:
34 | """格式化日志消息"""
35 | timestamp = datetime.now().strftime("%H:%M:%S")
36 | colors = {
37 | "INFO": "\033[92m", # 绿色
38 | "WARNING": "\033[93m", # 黄色
39 | "ERROR": "\033[91m", # 红色
40 | "DEBUG": "\033[94m", # 蓝色
41 | }
42 | reset = "\033[0m"
43 |
44 | color = colors.get(level, "")
45 | return f"[{timestamp}] {color}{level}{reset}: {message}"
46 |
47 | @classmethod
48 | def info(cls, message: str):
49 | """输出信息日志"""
50 | print(cls._format_message("INFO", message))
51 | cls._send_to_callback("info", message)
52 |
53 | @classmethod
54 | def warning(cls, message: str):
55 | """输出警告日志"""
56 | print(cls._format_message("WARNING", message))
57 | cls._send_to_callback("warning", message)
58 |
59 | @classmethod
60 | def error(cls, message: str):
61 | """输出错误日志"""
62 | print(cls._format_message("ERROR", message), file=sys.stderr)
63 | cls._send_to_callback("error", message)
64 |
65 | @classmethod
66 | def debug(cls, message: str):
67 | """输出调试日志"""
68 | print(cls._format_message("DEBUG", message))
69 | cls._send_to_callback("debug", message)
70 |
71 | @classmethod
72 | def custom(cls, title: str, badge: str):
73 | """输出自定义格式的日志"""
74 | print(f"[{badge}] {title}")
75 | cls._send_to_callback("custom", title, badge)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 | .cunzhi-memory/
30 |
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 | db.sqlite3
64 | db.sqlite3-journal
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
133 | # BiliSyncer specific ignores
134 | # Configuration files (含有敏感信息如SESSDATA)
135 | config/
136 | *.yaml
137 |
138 | # 下载目录
139 | downloads/
140 | output/
141 | temp/
142 |
143 | # 状态文件
144 | *.csv
145 | logs/
146 | *.log
147 |
148 | # IDE files
149 | .vscode/
150 | .idea/
151 | *.swp
152 | *.swo
153 | *~
154 |
155 | # OS files
156 | .DS_Store
157 | .DS_Store?
158 | ._*
159 | .Spotlight-V100
160 | .Trashes
161 | ehthumbs.db
162 | Thumbs.db
163 |
164 | # Temporary files
165 | *.tmp
166 | *.temp
167 | *.bak
168 | *.backup
169 | .cunzhi-memory
170 | .cunzhi-memory/
171 |
--------------------------------------------------------------------------------
/utils/types.py:
--------------------------------------------------------------------------------
1 | """
2 | 类型定义
3 | """
4 |
5 | from typing import TypedDict, NamedTuple, Union, Optional
6 | from pathlib import Path
7 |
8 |
9 | class BilibiliId(NamedTuple):
10 | """所有 bilibili id 的基类"""
11 | value: str
12 |
13 | def __str__(self) -> str:
14 | return self.value
15 |
16 | def __repr__(self) -> str:
17 | return self.__str__()
18 |
19 | def to_dict(self) -> dict[str, str]:
20 | raise NotImplementedError("请不要直接使用 BilibiliId")
21 |
22 |
23 | class AvId(BilibiliId):
24 | """AvId基类"""
25 |
26 | def to_dict(self) -> dict[str, str]:
27 | raise NotImplementedError("请不要直接使用 AvId")
28 |
29 | def to_url(self) -> str:
30 | raise NotImplementedError("请不要直接使用 AvId")
31 |
32 |
33 | class AId(AvId):
34 | """AID"""
35 |
36 | def to_dict(self):
37 | return {"aid": self.value, "bvid": ""}
38 |
39 | def to_url(self) -> str:
40 | return f"https://www.bilibili.com/video/av{self.value}"
41 |
42 |
43 | class BvId(AvId):
44 | """BVID"""
45 |
46 | def to_dict(self):
47 | return {"aid": "", "bvid": self.value}
48 |
49 | def to_url(self) -> str:
50 | return f"https://www.bilibili.com/video/{self.value}"
51 |
52 |
53 | class CId(BilibiliId):
54 | """视频 ID"""
55 |
56 | def to_dict(self):
57 | return {"cid": self.value}
58 |
59 |
60 | class EpisodeId(BilibiliId):
61 | """番剧剧集 ID"""
62 |
63 | def to_dict(self):
64 | return {"episode_id": self.value}
65 |
66 |
67 | class MediaId(BilibiliId):
68 | """番剧 ID"""
69 |
70 | def to_dict(self):
71 | return {"media_id": self.value}
72 |
73 |
74 | class SeasonId(BilibiliId):
75 | """番剧(季) ID"""
76 |
77 | def to_dict(self):
78 | return {"season_id": self.value}
79 |
80 |
81 | class MId(BilibiliId):
82 | """用户 ID"""
83 |
84 | def to_dict(self):
85 | return {"mid": self.value}
86 |
87 |
88 | class FId(BilibiliId):
89 | """收藏夹 ID"""
90 |
91 | def to_dict(self):
92 | return {"fid": self.value}
93 |
94 |
95 | class SeriesId(BilibiliId):
96 | """视频合集 ID"""
97 |
98 | def to_dict(self):
99 | return {"series_id": self.value}
100 |
101 |
102 | class VideoInfo(TypedDict):
103 | """视频信息"""
104 | id: int
105 | name: str
106 | avid: AvId
107 | cid: CId
108 | title: str
109 | path: Path
110 | pubdate: int # 发布时间(Unix时间戳)
111 | status: Optional[str] # 视频状态:pending(待处理)、ready(已就绪)、unavailable(不可访问)
112 | episode_id: Optional[str] # 番剧剧集ID(仅番剧视频)
113 | author: Optional[str] # 作者
114 | duration: Optional[int] # 时长
115 | is_multi_part: Optional[bool] # 是否为多P视频
116 | total_parts: Optional[int] # 总分P数量
117 | folder_size: Optional[int] # 已下载文件夹占用(字节)
118 |
119 |
120 | class VideoListData(TypedDict):
121 | """视频列表数据"""
122 | title: str
123 | videos: list[VideoInfo]
124 |
125 |
126 | class DownloadOptions(TypedDict):
127 | """下载选项"""
128 | output_dir: Path
129 | sessdata: str | None
130 |
--------------------------------------------------------------------------------
/start_webui.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | BiliSyncer WebUI 启动脚本
4 | """
5 |
6 | import sys
7 | import os
8 | import socket
9 | import random
10 | import argparse
11 | import webbrowser
12 | import threading
13 | import time
14 | from pathlib import Path
15 |
16 | def find_available_port(start_port=15000, max_port=65535):
17 | """查找可用端口"""
18 | for _ in range(100): # 最多尝试100次
19 | port = random.randint(start_port, max_port)
20 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
21 | try:
22 | s.bind(('localhost', port))
23 | return port
24 | except OSError:
25 | continue
26 |
27 | # 如果随机选择失败,按顺序查找
28 | for port in range(start_port, max_port + 1):
29 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
30 | try:
31 | s.bind(('localhost', port))
32 | return port
33 | except OSError:
34 | continue
35 |
36 | raise RuntimeError("无法找到可用端口")
37 |
38 | def is_port_available(port: int) -> bool:
39 | """检测指定端口是否可用"""
40 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
41 | try:
42 | s.bind(('localhost', port))
43 | return True
44 | except OSError:
45 | return False
46 |
47 | def open_browser(url, delay=2):
48 | """延迟打开浏览器"""
49 | time.sleep(delay)
50 | webbrowser.open(url)
51 |
52 | def main():
53 | parser = argparse.ArgumentParser(description="启动 BiliSyncer WebUI")
54 | parser.add_argument("-p", "--port", type=int, help="指定 WebUI 使用的端口号")
55 | args = parser.parse_args()
56 |
57 | # 检查依赖
58 | try:
59 | import flask
60 | import flask_socketio
61 | except ImportError as e:
62 | print(f"缺少依赖: {e}")
63 | print("请先安装WebUI依赖:")
64 | print("pip install -r webui_requirements.txt")
65 | sys.exit(1)
66 |
67 | # 设置工作目录
68 | script_dir = Path(__file__).parent
69 | os.chdir(script_dir)
70 |
71 | # 查找可用端口
72 | if args.port:
73 | if not (1 <= args.port <= 65535):
74 | print(f"❌ 端口 {args.port} 无效,请使用 1~65535")
75 | sys.exit(1)
76 | if not is_port_available(args.port):
77 | print(f"❌ 端口 {args.port} 已被占用,请选择其他端口")
78 | sys.exit(1)
79 | port = args.port
80 | else:
81 | try:
82 | port = find_available_port()
83 | except RuntimeError as e:
84 | print(f"❌ {e}")
85 | sys.exit(1)
86 |
87 | # 启动WebUI
88 | from webui.app import app, socketio
89 |
90 | url = f"http://localhost:{port}"
91 |
92 | print("=" * 60)
93 | print("🎉 BiliSyncer WebUI 启动中...")
94 | print("📂 工作目录:", script_dir)
95 | print(f"🌐 访问地址: {url}")
96 | print(f"🔌 使用端口: {port}")
97 | print("🔄 支持功能:")
98 | print(" • 批量下载 B站视频/收藏夹/空间等")
99 | print(" • 断点续传和任务管理")
100 | print(" • 批量更新所有任务")
101 | print(" • 实时日志显示")
102 | print("=" * 60)
103 | print("🚀 正在自动打开浏览器...")
104 |
105 | # 启动浏览器打开页面(在后台线程中延迟执行)
106 | browser_thread = threading.Thread(target=open_browser, args=(url,))
107 | browser_thread.daemon = True
108 | browser_thread.start()
109 |
110 | try:
111 | socketio.run(app, host='0.0.0.0', port=port, debug=False)
112 | except KeyboardInterrupt:
113 | print("\n👋 WebUI已关闭")
114 | except Exception as e:
115 | print(f"❌ 启动失败: {e}")
116 | sys.exit(1)
117 |
118 | if __name__ == '__main__':
119 | main()
120 |
--------------------------------------------------------------------------------
/utils/anti_risk_manager.py:
--------------------------------------------------------------------------------
1 | """
2 | 风控管理器
3 | 用于检测和管理Bilibili的风控状态
4 | """
5 |
6 | from typing import List, Dict, Any, Optional
7 | from .logger import Logger
8 | from .fetcher import Fetcher
9 | from extractors import extract_video_list
10 | from api.bilibili import RISK_CONTROL_DETECTED
11 |
12 |
13 | class AntiRiskManager:
14 | """风控管理器"""
15 |
16 | def __init__(self):
17 | self.is_risk_controlled = False
18 | self.successful_urls: List[Dict[str, Any]] = []
19 | self.max_urls = 10 # 最多保存10个成功URL
20 |
21 | def add_successful_url(self, url: str, url_type: str) -> None:
22 | """添加成功的URL到测试列表"""
23 | # 检查是否已存在
24 | for item in self.successful_urls:
25 | if item['url'] == url:
26 | return
27 |
28 | # 添加新URL
29 | self.successful_urls.append({
30 | 'url': url,
31 | 'type': url_type,
32 | 'timestamp': None # 可以添加时间戳
33 | })
34 |
35 | # 保持列表长度不超过最大值
36 | if len(self.successful_urls) > self.max_urls:
37 | self.successful_urls.pop(0)
38 |
39 | Logger.debug(f"已添加成功URL: {url} (类型: {url_type})")
40 |
41 | async def check_risk_control(self, fetcher: Fetcher) -> bool:
42 | """检测是否受到风控"""
43 | if not self.successful_urls:
44 | Logger.warning("没有可用的测试URL,无法检测风控状态")
45 | return False
46 |
47 | # 选择第一个URL进行测试
48 | test_url = self.successful_urls[0]
49 | Logger.info(f"使用测试URL检测风控状态: {test_url['url']} (类型: {test_url['type']})")
50 |
51 | try:
52 | # 尝试获取视频列表的第一页
53 | video_list = await extract_video_list(fetcher, test_url['url'])
54 |
55 | # 检查是否返回了风控检测指令
56 | if video_list == RISK_CONTROL_DETECTED:
57 | Logger.warning("测试URL返回风控检测指令,确认受到风控")
58 | return True
59 |
60 | videos = video_list.get("videos", [])
61 |
62 | if videos:
63 | Logger.info("测试URL可以正常获取视频列表,未受到风控")
64 | return False
65 | else:
66 | Logger.warning("测试URL无法获取视频列表,可能受到风控")
67 | return True
68 |
69 | except Exception as e:
70 | Logger.warning(f"测试URL时出错,可能受到风控: {e}")
71 | return True
72 |
73 | async def check_risk_resolved(self, fetcher: Fetcher) -> bool:
74 | """检测风控是否已解除"""
75 | if not self.successful_urls:
76 | Logger.warning("没有可用的测试URL,无法检测风控解除状态")
77 | return False
78 |
79 | # 选择第一个URL进行测试
80 | test_url = self.successful_urls[0]
81 | Logger.info(f"使用测试URL检测风控解除状态: {test_url['url']} (类型: {test_url['type']})")
82 |
83 | try:
84 | # 尝试获取视频列表的第一页
85 | video_list = await extract_video_list(fetcher, test_url['url'])
86 |
87 | # 检查是否返回了风控检测指令
88 | if video_list == RISK_CONTROL_DETECTED:
89 | Logger.warning("测试URL返回风控检测指令,风控仍未解除")
90 | return False
91 |
92 | videos = video_list.get("videos", [])
93 |
94 | if videos and len(videos) >= 5: # 至少5个视频才算解除风控
95 | Logger.info("检测到风控已解除,可以继续获取视频列表")
96 | self.is_risk_controlled = False
97 | return True
98 | else:
99 | Logger.warning("风控仍未解除,继续等待")
100 | return False
101 |
102 | except Exception as e:
103 | Logger.warning(f"检测风控解除状态时出错: {e}")
104 | return False
105 |
106 | def set_risk_controlled(self, status: bool) -> None:
107 | """设置风控状态"""
108 | self.is_risk_controlled = status
109 | if status:
110 | Logger.warning("已设置风控状态为True")
111 | else:
112 | Logger.info("已设置风控状态为False")
113 |
114 | def get_risk_status(self) -> Dict[str, Any]:
115 | """获取风控状态信息"""
116 | return {
117 | "is_risk_controlled": self.is_risk_controlled,
118 | "successful_urls_count": len(self.successful_urls),
119 | "successful_urls": self.successful_urls
120 | }
121 |
122 | def get_test_urls(self) -> List[Dict[str, Any]]:
123 | """获取测试URL列表"""
124 | return self.successful_urls.copy()
125 |
126 | def clear_test_urls(self) -> None:
127 | """清空测试URL列表"""
128 | self.successful_urls.clear()
129 | Logger.info("已清空测试URL列表")
130 |
131 |
132 | # 全局风控管理器实例
133 | _anti_risk_manager: Optional[AntiRiskManager] = None
134 |
135 |
136 | def get_anti_risk_manager() -> AntiRiskManager:
137 | """获取全局风控管理器实例"""
138 | global _anti_risk_manager
139 | if _anti_risk_manager is None:
140 | _anti_risk_manager = AntiRiskManager()
141 | return _anti_risk_manager
142 |
143 |
144 | def reset_anti_risk_manager() -> None:
145 | """重置全局风控管理器"""
146 | global _anti_risk_manager
147 | _anti_risk_manager = None
148 |
--------------------------------------------------------------------------------
/utils/config_manager.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | 配置文件管理模块
4 | """
5 |
6 | import os
7 | import yaml
8 | from pathlib import Path
9 | from typing import Dict, List, Any, Optional
10 | from datetime import datetime
11 |
12 | class ConfigManager:
13 | """配置文件管理器"""
14 |
15 | def __init__(self, config_dir: Optional[Path] = None):
16 | self.config_dir = config_dir if config_dir is not None else Path(__file__).parent.parent / "config"
17 | self.config_dir.mkdir(exist_ok=True)
18 |
19 | def list_configs(self) -> List[Dict[str, Any]]:
20 | """列出所有配置文件"""
21 | configs = []
22 |
23 | for config_file in self.config_dir.glob("*.yaml"):
24 | try:
25 | config = self.load_config(config_file.stem)
26 | if config:
27 | config['filename'] = config_file.stem
28 | configs.append(config)
29 | except Exception as e:
30 | print(f"警告: 无法加载配置文件 {config_file}: {e}")
31 |
32 | return configs
33 |
34 | def load_config(self, name: str) -> Optional[Dict[str, Any]]:
35 | """加载指定配置文件"""
36 | config_file = self.config_dir / f"{name}.yaml"
37 |
38 | if not config_file.exists():
39 | return None
40 |
41 | try:
42 | with open(config_file, 'r', encoding='utf-8') as f:
43 | config = yaml.safe_load(f)
44 | return config
45 | except Exception as e:
46 | print(f"错误: 无法读取配置文件 {config_file}: {e}")
47 | return None
48 |
49 | def save_config(self, name: str, config: Dict[str, Any]) -> bool:
50 | """保存配置文件"""
51 | config_file = self.config_dir / f"{name}.yaml"
52 |
53 | # 更新时间戳
54 | config['updated_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
55 | if 'created_at' not in config:
56 | config['created_at'] = config['updated_at']
57 |
58 | try:
59 | with open(config_file, 'w', encoding='utf-8') as f:
60 | yaml.dump(config, f, default_flow_style=False,
61 | allow_unicode=True, indent=2)
62 | return True
63 | except Exception as e:
64 | print(f"错误: 无法保存配置文件 {config_file}: {e}")
65 | return False
66 |
67 | def delete_config(self, name: str) -> bool:
68 | """删除配置文件"""
69 | config_file = self.config_dir / f"{name}.yaml"
70 |
71 | if not config_file.exists():
72 | return False
73 |
74 | try:
75 | config_file.unlink()
76 | return True
77 | except Exception as e:
78 | print(f"错误: 无法删除配置文件 {config_file}: {e}")
79 | return False
80 |
81 | def create_default_config(self) -> Dict[str, Any]:
82 | """创建默认配置"""
83 | return {
84 | 'name': '新配置',
85 | 'description': '用户自定义配置',
86 | 'output_dir': '~/Downloads',
87 | 'sessdata': '',
88 | 'vip_strict': False,
89 | 'save_cover': False,
90 | 'debug': False,
91 | 'extra_args': []
92 | }
93 |
94 | def validate_config(self, config: Dict[str, Any]) -> List[str]:
95 | """验证配置文件格式"""
96 | errors = []
97 |
98 | required_fields = ['name', 'output_dir']
99 | for field in required_fields:
100 | if field not in config:
101 | errors.append(f"缺少必需字段: {field}")
102 |
103 | if 'name' in config and not isinstance(config['name'], str):
104 | errors.append("name 必须是字符串")
105 |
106 | if 'output_dir' in config and not isinstance(config['output_dir'], str):
107 | errors.append("output_dir 必须是字符串")
108 |
109 | if 'vip_strict' in config and not isinstance(config['vip_strict'], bool):
110 | errors.append("vip_strict 必须是布尔值")
111 |
112 | if 'save_cover' in config and not isinstance(config['save_cover'], bool):
113 | errors.append("save_cover 必须是布尔值")
114 |
115 | if 'debug' in config and not isinstance(config['debug'], bool):
116 | errors.append("debug 必须是布尔值")
117 |
118 | if 'extra_args' in config and not isinstance(config['extra_args'], list):
119 | errors.append("extra_args 必须是列表")
120 |
121 | return errors
122 |
123 | def get_config_for_download(self, name: str) -> Optional[Dict[str, Any]]:
124 | """获取用于下载的配置参数"""
125 | config = self.load_config(name)
126 | if not config:
127 | return None
128 |
129 | return {
130 | 'output_dir': config.get('output_dir', '~/Downloads'),
131 | 'sessdata': config.get('sessdata', ''),
132 | 'vip_strict': config.get('vip_strict', False),
133 | 'save_cover': config.get('save_cover', False),
134 | 'debug': config.get('debug', False),
135 | 'extra_args': config.get('extra_args', [])
136 | }
137 |
138 |
139 | def get_config_manager() -> ConfigManager:
140 | """获取配置管理器单例"""
141 | return ConfigManager()
--------------------------------------------------------------------------------
/utils/fetcher.py:
--------------------------------------------------------------------------------
1 | """
2 | HTTP请求工具类
3 | """
4 |
5 | import asyncio
6 | import httpx
7 | from typing import Any, Dict, Optional
8 | from .logger import Logger
9 |
10 |
11 | class Fetcher:
12 | """HTTP请求工具"""
13 |
14 | def __init__(self, sessdata: Optional[str] = None, proxy: Optional[str] = None, max_retries: int = 3, retry_delay: float = 1.0):
15 | """初始化"""
16 | self.cookies = {}
17 | if sessdata:
18 | self.cookies["SESSDATA"] = sessdata
19 |
20 | self.proxy = proxy
21 | self.max_retries = max_retries
22 | self.retry_delay = retry_delay
23 | self._client: Optional[httpx.AsyncClient] = None
24 |
25 | async def __aenter__(self):
26 | """异步上下文管理器入口"""
27 | self._client = httpx.AsyncClient(
28 | cookies=self.cookies,
29 | proxy=self.proxy,
30 | timeout=httpx.Timeout(30.0, connect=10.0), # 设置连接和总超时
31 | follow_redirects=False, # 不自动跟随重定向
32 | headers={
33 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
34 | "Referer": "https://www.bilibili.com"
35 | },
36 | limits=httpx.Limits(max_keepalive_connections=10, max_connections=20) # 连接池限制
37 | )
38 | return self
39 |
40 | async def __aexit__(self, exc_type, exc_val, exc_tb):
41 | """异步上下文管理器退出"""
42 | if self._client:
43 | await self._client.aclose()
44 |
45 | async def fetch_json(self, url: str, params: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
46 | """获取JSON数据(带重试机制)"""
47 | if not self._client:
48 | raise RuntimeError("Fetcher not initialized. Use 'async with' syntax.")
49 |
50 | last_exception = None
51 |
52 | for attempt in range(self.max_retries + 1):
53 | try:
54 | # 在重试时添加延迟
55 | if attempt > 0:
56 | await asyncio.sleep(self.retry_delay * attempt)
57 | Logger.debug(f"重试请求 ({attempt}/{self.max_retries}): {url}")
58 |
59 | response = await self._client.get(url, params=params)
60 |
61 | if response.status_code == 200:
62 | return response.json()
63 | elif response.status_code == 429:
64 | Logger.warning(f"请求频率限制 (429),等待后重试: {url}")
65 | await asyncio.sleep(5.0) # 频率限制时等待更久
66 | continue
67 | elif response.status_code in [502, 503, 504]:
68 | Logger.warning(f"服务器暂时不可用 ({response.status_code}),重试: {url}")
69 | continue
70 | else:
71 | Logger.error(f"HTTP错误 {response.status_code}: {url}")
72 | return None
73 |
74 | except httpx.ReadTimeout as e:
75 | last_exception = e
76 | Logger.warning(f"读取超时 (尝试 {attempt + 1}/{self.max_retries + 1}): {e}")
77 | continue
78 | except (httpx.ConnectError, httpx.TimeoutException, httpx.RemoteProtocolError) as e:
79 | last_exception = e
80 | Logger.warning(f"网络连接错误 (尝试 {attempt + 1}/{self.max_retries + 1}): {e}")
81 | continue
82 | except Exception as e:
83 | last_exception = e
84 | Logger.error(f"请求失败: {e}")
85 | break
86 |
87 | # 所有重试都失败了
88 | Logger.error(f"请求最终失败 ({url}),已重试 {self.max_retries} 次")
89 | if last_exception:
90 | Logger.error(f"最后一次错误: {last_exception}")
91 | return None
92 |
93 | async def get_redirected_url(self, url: str) -> str:
94 | """获取重定向后的URL"""
95 | if not self._client:
96 | raise RuntimeError("Fetcher not initialized. Use 'async with' syntax.")
97 |
98 | try:
99 | # 临时创建一个支持重定向的客户端
100 | async with httpx.AsyncClient(
101 | cookies=self.cookies,
102 | proxy=self.proxy,
103 | timeout=httpx.Timeout(30.0, connect=10.0),
104 | follow_redirects=True,
105 | headers={
106 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
107 | "Referer": "https://www.bilibili.com"
108 | }
109 | ) as client:
110 | response = await client.get(url)
111 | return str(response.url)
112 | except Exception as e:
113 | Logger.error(f"获取重定向URL失败: {e}")
114 | return url
115 |
116 | async def touch_url(self, url: str) -> bool:
117 | """访问URL(用于登录状态验证)"""
118 | if not self._client:
119 | raise RuntimeError("Fetcher not initialized. Use 'async with' syntax.")
120 |
121 | try:
122 | response = await self._client.get(url)
123 | return response.status_code == 200
124 | except Exception:
125 | return False
--------------------------------------------------------------------------------
/README_ZH.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 | # BiliSyncer
5 |
6 | 🎯 **智能的B站内容同步工具** - 自动同步管理、增量更新、批量下载
7 |
8 |
9 | [](https://python.org)
10 | [](LICENSE)
11 | [](webui)
12 |
13 | 🇨🇳 中文 | [🇺🇸 English](README.md)
14 |
15 | ## 🌟 项目简介
16 |
17 | BiliSyncer 是一个专为B站内容持续更新资源设计的智能同步管理工具。专注于解决用户收藏夹、UP主投稿、新番动画等持续更新内容的自动化同步难题,在 yutto 基础上构建了完整的资源管理生态,让内容管理变得简单高效。
18 |
19 | ## ✨ 核心优势
20 |
21 | ### 🎯 智能资源获取与识别
22 | - **精准资源定位** - 自动获取收藏夹、UP主投稿等完整准确的视频清单,无遗漏无冗余
23 | - **智能更新检测** - 自动识别自上次同步后的所有新增内容,避免重复请求和无效操作
24 | - **多平台内容支持** - 全面支持投稿视频、番剧、电影、课程、收藏夹、合集等多种内容类型
25 |
26 | ### 🔄 先进的同步管理机制
27 | - **增量同步技术** - 仅同步新增和变更内容,大幅节省时间和带宽资源
28 | - **断点续传保障** - 网络中断或意外停止后自动恢复,确保下载任务的连续性
29 | - **状态持久化跟踪** - 基于CSV文件的进度管理,确保同步记录永不丢失
30 |
31 | ### 🧹 一键清理与空间管理
32 | - **智能清理功能** - 备份完成后支持一键清理所有下载文件,释放存储空间
33 | - **记录保留机制** - 清理文件的同时完整保留下载记录,为后续增量同步提供基础
34 | - **存储优化策略** - 灵活的文件管理策略,适应不同的存储需求场景
35 |
36 | ### ⚙️ 多样化配置管理
37 | - **多账号支持** - 支持多个B站账号的独立配置和管理,满足不同权限需求
38 | - **差异化配置** - 针对不同下载需求提供独立的参数配置方案
39 | - **配置模板化** - 预设常用配置模板,快速应用到不同任务场景
40 |
41 | ### 📊 可视化监控与分析
42 | - **实时任务监控** - 直观显示所有同步任务的执行状态和进度信息
43 | - **历史记录分析** - 自动统计同步历史,提供详细的任务执行报告
44 | - **资源状态总览** - 一目了然地查看所有资源的同步状态和存储信息
45 |
46 | ### 🔧 精细化任务调度
47 | - **并发任务管理** - 支持多任务并行执行,最大化利用系统资源
48 | - **任务生命周期控制** - 提供任务的启动、暂停、停止、重启等完整控制功能
49 | - **优先级调度** - 支持任务优先级设置,优先处理重要资源
50 |
51 | ### 🎨 直观友好的Web界面
52 | - **现代化设计** - 简洁美观的响应式Web界面,适配各种设备
53 | - **操作简便性** - 直观的操作流程,降低学习成本,提升用户体验
54 | - **功能集成度** - 所有管理功能集中在统一界面,避免复杂的命令行操作
55 |
56 | ### ⚡ 高效的命令行接口
57 | - **批处理能力** - 强大的CLI支持,方便脚本调用和自动化集成
58 | - **参数灵活性** - 丰富的命令行参数,满足高级用户的精细化控制需求
59 | - **程序集成友好** - 易于集成到其他自动化系统和工作流中
60 |
61 | ## 🆚 BiliSyncer vs Yutto vs Yutto-uiya
62 |
63 | | 功能特性 | BiliSyncer | Yutto | Yutto-uiya |
64 | |---------|------------|-------|------------|
65 | | **核心定位** | 持续同步管理 | 强大的CLI下载器 | 简单的网页封装 |
66 | | **同步能力** | ✅ 智能增量同步 | ➖ 需手动重新执行 | ➖ 需手动重新执行 |
67 | | **资源管理** | ✅ 完整生命周期管理 | ➖ 仅下载功能 | ➖ 仅下载功能 |
68 | | **界面类型** | 专业Web仪表板 | 强大命令行界面 | 友好的Streamlit界面 |
69 | | **下载引擎** | 基于yutto构建 | 原创强大引擎 | 基于yutto构建 |
70 | | **批量操作** | ✅ 多任务管理 | ✅ 批量下载支持 | ✅ 基础批量支持 |
71 | | **断点续传** | ✅ 自动检测恢复 | ✅ 内置续传功能 | ✅ 继承yutto续传 |
72 | | **状态持久化** | ✅ CSV文件跟踪 | ➖ 仅会话状态 | ➖ 仅会话状态 |
73 | | **配置管理** | ✅ Web + YAML管理 | ✅ 丰富CLI选项 | ✅ 简单网页表单 |
74 | | **内容组织** | ✅ 结构化文件夹命名 | ✅ 灵活路径模板 | ✅ 基础组织方式 |
75 | | **学习曲线** | 🟢 新手友好 | 🟡 技术用户 | 🟢 非常简单 |
76 | | **适用场景** | 持续内容管理 | 专业用户下载 | 休闲下载使用 |
77 |
78 | ### 🎯 各工具特色
79 |
80 | **Yutto**: 强大基础 - 高性能、高可配置的CLI工具,为技术用户提供最大控制力和性能。
81 |
82 | **Yutto-uiya**: 易用桥梁 - 通过简洁的Web界面将yutto的强大功能带给普通用户,无需复杂配置。
83 |
84 | **BiliSyncer**: 管理层面 - 专注于持续更新内容的自动化同步管理,提供完整的资源生命周期解决方案。
85 |
86 | ## 📱 界面预览
87 |
88 | ### 下载管理界面
89 | 
90 |
91 | ### 批量更新界面
92 | 
93 | 
94 |
95 | ### 任务状态界面
96 | 
97 |
98 | ### 配置管理界面
99 | 
100 |
101 | ## 🚀 快速开始
102 |
103 | ### 环境准备
104 | ```bash
105 | # 安装依赖
106 | pip install yutto
107 | pip install -r requirements.txt
108 | ```
109 |
110 | ### 启动Web界面
111 | ```bash
112 | python start_webui.py
113 | # 访问 http://localhost:5000
114 | ```
115 |
116 | ### 命令行使用
117 | ```bash
118 | # 单次下载
119 | python main.py "https://www.bilibili.com/video/BV1xx411c7mD"
120 |
121 | # 带附加选项
122 | python main.py "URL" --vip-strict --save-cover
123 |
124 | # 批量更新所有配置任务
125 | python main.py --update -c "SESSDATA"
126 |
127 | # 使用自定义配置
128 | python main.py "URL" --config vip
129 | ```
130 |
131 | ## 🔧 配置说明
132 |
133 | 创建 `config/your_config.yaml`:
134 | ```yaml
135 | name: "我的配置"
136 | output_dir: "~/Downloads"
137 | sessdata: "your_sessdata_here"
138 | vip_strict: true
139 | save_cover: true
140 | extra_args: ["--quality", "8K"]
141 | ```
142 |
143 | **获取SESSDATA**:登录 bilibili.com → F12 → Application → Cookies → 复制 `SESSDATA` 值
144 |
145 | ## 🛠️ 辅助工具
146 |
147 | ### 目录占用分析工具
148 |
149 | `tools/dir_tree_size.py` 是一个目录占用分析工具,可以以 tree 风格显示目录结构并按占用空间排序。
150 |
151 | **功能特性**:
152 | - 📊 **Tree 风格显示** - 以树形结构展示目录和文件
153 | - 📈 **智能排序** - 按占用空间从小到大排序,方便快速定位占用空间小的项目
154 | - 🔍 **自动检查** - 自动检查叶子目录(最底层目录)中的视频文件状态
155 | - 📝 **问题报告** - 自动生成检查报告,记录存在问题的目录
156 |
157 | **使用方法**:
158 | ```bash
159 | # 基本使用(会自动检查并生成报告)
160 | python3 tools/dir_tree_size.py "/要分析的目录路径"
161 |
162 | # 不进行检查,仅显示目录结构
163 | python3 tools/dir_tree_size.py "/要分析的目录路径" --no-check
164 | ```
165 |
166 | **检查功能**:
167 | - ✅ **缺少 mp4 文件** - 检测叶子目录中是否缺少 `.mp4` 文件
168 | - ✅ **存在 m4s 文件** - 检测叶子目录中是否存在 `.m4s` 文件(通常是未完成的下载或分片文件)
169 |
170 | **报告生成**:
171 | - 检查报告会自动生成在被检查目录中
172 | - 文件名格式:`目录名_检查报告_时间戳.log`
173 | - 报告包含:问题目录列表、具体问题类型、统计信息
174 |
175 | **示例输出**:
176 | ```
177 | /Volumes/Data-12T-mybook/多媒体资料/视频/Bilibili/ (1.23 TB)
178 | ├── 番剧-33415-名侦探柯南(中配) (50.2 GB)
179 | │ ├── BV1xx411c7mD-第1集 (500 MB)
180 | │ └── BV1xx411c7mE-第2集 (480 MB)
181 | └── 收藏夹-123456-我的收藏 (30.5 GB)
182 |
183 | 已生成检查报告: /path/to/xxx_检查报告_20250116_123456.log
184 | 发现 5 个叶子目录存在问题
185 | ```
186 |
187 | ### 目录扁平化工具
188 |
189 | `flatten.py` 是一个目录扁平化工具,可以将指定层级的目录结构扁平化,将子目录中的文件移动到父目录,并删除空子目录。
190 |
191 | **功能特性**:
192 | - 📁 **层级扁平化** - 将指定深度的目录结构扁平化,简化目录层级
193 | - ⚡ **多线程处理** - 支持多线程并行移动文件,提高处理速度
194 | - 🔍 **预览模式** - 支持 `--dry-run` 模式,预览操作而不实际执行
195 | - 🗑️ **隐藏文件处理** - 可选择删除或保留隐藏文件(以 `.` 开头的文件)
196 | - 🔒 **安全保护** - 自动处理同名文件冲突,避免文件覆盖
197 |
198 | **使用方法**:
199 | ```bash
200 | # 扁平化第 1 层目录(默认删除隐藏文件)
201 | python3 flatten.py "/path/to/directory" -n 1
202 |
203 | # 预览模式,不实际执行操作
204 | python3 flatten.py "/path/to/directory" -n 1 --dry-run
205 |
206 | # 扁平化第 2 层目录,保留隐藏文件,8 线程处理
207 | python3 flatten.py "/path/to/directory" -n 2 --keep-hidden --jobs 8
208 |
209 | # 扁平化第 1 层目录,删除隐藏文件,4 线程处理,预览模式
210 | python3 flatten.py "/path/to/directory" -n 1 --delete-hidden --jobs 4 --dry-run
211 | ```
212 |
213 | **参数说明**:
214 | - `-n, --level` - 要扁平化的层级(相对于根目录,根目录深度为 0),默认 0
215 | - `-j, --jobs` - 最大并行线程数,默认 8
216 | - `--delete-hidden` - 删除所有以 `.` 开头的隐藏文件(默认行为)
217 | - `--keep-hidden` - 保留隐藏文件,不删除
218 | - `--dry-run` - 预览模式,仅打印将要执行的操作,不实际移动/删除文件
219 |
220 | **使用场景**:
221 | - 📦 **简化目录结构** - 将多层嵌套的目录结构扁平化,便于管理
222 | - 🎬 **视频文件整理** - 将下载的视频文件从子目录移动到主目录
223 | - 🧹 **清理空目录** - 扁平化后自动删除空子目录,保持目录整洁
224 |
225 | **注意事项**:
226 | - 工具会自动跳过隐藏目录(以 `.` 开头),保护 `.git` 等系统目录
227 | - 如果目标位置存在同名文件,会自动重命名为 `name__dup1.ext`、`name__dup2.ext` 等
228 | - 建议先使用 `--dry-run` 模式预览操作结果,确认无误后再执行
229 |
230 | ## 🎯 适用场景
231 |
232 | - **内容创作者** - 持续跟踪和备份关注UP主的最新投稿
233 | - **教育工作者** - 自动同步课程更新和教育资源
234 | - **媒体收藏者** - 智能管理收藏夹和追番列表的更新
235 | - **研究人员** - 自动化收集和整理研究相关的视频资料
236 |
237 | ## 🛠️ 技术栈
238 |
239 | 基于 Python 3.8+、Flask、yutto 和现代Web技术构建,确保可靠性和性能。
240 |
241 | ## 🤝 贡献指南
242 |
243 | 欢迎贡献!提交 Issues 或 Pull Requests 来帮助改进 BiliSyncer。
244 |
245 | ## 📜 许可证
246 |
247 | MIT 许可证 - 详见 [LICENSE](LICENSE) 文件。
248 |
249 | ---
250 |
251 | ⭐ **如果这个项目帮助你管理B站内容,请给个Star支持!**
--------------------------------------------------------------------------------
/tools/dir_tree_size.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | 目录占用分析工具:以 tree 风格输出每个子目录/文件的大小
4 | """
5 |
6 | from __future__ import annotations
7 |
8 | import argparse
9 | from pathlib import Path
10 | from typing import List, Tuple, Optional, Dict
11 | from datetime import datetime
12 |
13 |
14 | Entry = Tuple[str, str, int, Optional[List["Entry"]]]
15 |
16 |
17 | def format_size(size_bytes: int) -> str:
18 | """将字节数转换为易读格式"""
19 | if size_bytes <= 0:
20 | return "0 B"
21 |
22 | units = ["B", "KB", "MB", "GB", "TB"]
23 | value = float(size_bytes)
24 | idx = 0
25 | while value >= 1024 and idx < len(units) - 1:
26 | value /= 1024
27 | idx += 1
28 | return f"{value:.2f} {units[idx]}"
29 |
30 |
31 | def check_leaf_directory(dir_path: Path) -> Dict[str, bool]:
32 | """检查叶子目录(最底层目录)中的mp4和m4s文件情况
33 |
34 | Returns:
35 | Dict with keys: 'has_mp4', 'has_m4s'
36 | """
37 | has_mp4 = False
38 | has_m4s = False
39 |
40 | try:
41 | for item in dir_path.iterdir():
42 | if item.is_file():
43 | name_lower = item.name.lower()
44 | if name_lower.endswith('.mp4'):
45 | has_mp4 = True
46 | elif name_lower.endswith('.m4s'):
47 | has_m4s = True
48 | except (PermissionError, OSError):
49 | pass
50 |
51 | return {'has_mp4': has_mp4, 'has_m4s': has_m4s}
52 |
53 |
54 | def scan_directory(path: Path, leaf_issues: List[Dict[str, str]] = None) -> Tuple[int, List[Entry]]:
55 | """递归统计目录大小并返回子项信息
56 |
57 | Args:
58 | path: 要扫描的目录路径
59 | leaf_issues: 用于收集叶子目录问题的列表
60 |
61 | Returns:
62 | Tuple of (total_size, entries)
63 | """
64 | if leaf_issues is None:
65 | leaf_issues = []
66 |
67 | total_size = 0
68 | entries: List[Entry] = []
69 | has_subdirs = False
70 |
71 | try:
72 | items = list(path.iterdir())
73 | except PermissionError:
74 | return 0, []
75 |
76 | for item in items:
77 | try:
78 | if item.is_symlink():
79 | continue
80 | if item.is_dir():
81 | has_subdirs = True
82 | child_size, child_entries = scan_directory(item, leaf_issues)
83 | entries.append(("dir", item.name, child_size, child_entries))
84 | total_size += child_size
85 | else:
86 | size = item.stat().st_size
87 | entries.append(("file", item.name, size, None))
88 | total_size += size
89 | except (PermissionError, OSError):
90 | continue
91 |
92 | # 检查叶子目录(没有子目录的目录)
93 | if not has_subdirs and entries:
94 | # 检查这个目录是否是叶子目录(只有文件,没有子目录)
95 | has_any_subdir = any(entry[0] == "dir" for entry in entries)
96 | if not has_any_subdir:
97 | check_result = check_leaf_directory(path)
98 | issues = []
99 | if not check_result['has_mp4']:
100 | issues.append("缺少mp4文件")
101 | if check_result['has_m4s']:
102 | issues.append("存在m4s文件")
103 |
104 | if issues:
105 | leaf_issues.append({
106 | 'path': str(path),
107 | 'issues': issues
108 | })
109 |
110 | # 按照占用空间从小到大排序
111 | entries.sort(key=lambda x: x[2]) # x[2] 是 size
112 |
113 | return total_size, entries
114 |
115 |
116 | def print_tree(entries: List[Entry], prefix: str = "") -> None:
117 | """以 tree 风格打印目录树"""
118 | for idx, (entry_type, name, size, children) in enumerate(entries):
119 | is_last = idx == len(entries) - 1
120 | connector = "└── " if is_last else "├── "
121 | print(f"{prefix}{connector}{name} ({format_size(size)})")
122 | if children is not None:
123 | next_prefix = prefix + (" " if is_last else "│ ")
124 | print_tree(children, next_prefix)
125 |
126 |
127 | def generate_log_file(target_dir: Path, leaf_issues: List[Dict[str, str]]) -> Optional[Path]:
128 | """生成日志文件,记录叶子目录的问题"""
129 | if not leaf_issues:
130 | return None
131 |
132 | # 生成日志文件名:目录名_检查报告_时间戳.log,保存在被检查的目录中
133 | timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
134 | log_filename = f"{target_dir.name}_检查报告_{timestamp}.log"
135 | log_path = target_dir / log_filename
136 |
137 | with open(log_path, 'w', encoding='utf-8') as f:
138 | f.write(f"目录检查报告\n")
139 | f.write(f"检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
140 | f.write(f"检查目录: {target_dir}\n")
141 | f.write(f"=" * 80 + "\n\n")
142 | f.write(f"发现 {len(leaf_issues)} 个叶子目录存在问题:\n\n")
143 |
144 | for idx, issue_info in enumerate(leaf_issues, 1):
145 | f.write(f"{idx}. {issue_info['path']}\n")
146 | f.write(f" 问题: {', '.join(issue_info['issues'])}\n\n")
147 |
148 | # 统计信息
149 | f.write("=" * 80 + "\n")
150 | f.write("统计信息:\n\n")
151 |
152 | no_mp4_count = sum(1 for issue in leaf_issues if "缺少mp4文件" in issue['issues'])
153 | has_m4s_count = sum(1 for issue in leaf_issues if "存在m4s文件" in issue['issues'])
154 | both_count = sum(1 for issue in leaf_issues if len(issue['issues']) == 2)
155 |
156 | f.write(f"缺少mp4文件的目录: {no_mp4_count} 个\n")
157 | f.write(f"存在m4s文件的目录: {has_m4s_count} 个\n")
158 | f.write(f"同时存在两种问题的目录: {both_count} 个\n")
159 |
160 | return log_path
161 |
162 |
163 | def parse_args() -> Tuple[Path, bool]:
164 | parser = argparse.ArgumentParser(description="目录占用分析工具(tree 格式输出)")
165 | parser.add_argument("directory", help="需要分析的目录路径")
166 | parser.add_argument("--no-check", action="store_true", help="不检查叶子目录的mp4/m4s文件")
167 | args = parser.parse_args()
168 | target = Path(args.directory).expanduser().resolve()
169 | if not target.exists():
170 | raise FileNotFoundError(f"目录不存在: {target}")
171 | if not target.is_dir():
172 | raise NotADirectoryError(f"不是有效目录: {target}")
173 | return target, not args.no_check
174 |
175 |
176 | def main() -> None:
177 | target_dir, enable_check = parse_args()
178 | leaf_issues = []
179 |
180 | if enable_check:
181 | total, entries = scan_directory(target_dir, leaf_issues)
182 | else:
183 | total, entries = scan_directory(target_dir)
184 |
185 | print(f"{target_dir} ({format_size(total)})")
186 | print_tree(entries)
187 |
188 | # 生成日志文件
189 | if enable_check and leaf_issues:
190 | log_path = generate_log_file(target_dir, leaf_issues)
191 | if log_path:
192 | print(f"\n已生成检查报告: {log_path}")
193 | print(f"发现 {len(leaf_issues)} 个叶子目录存在问题")
194 | elif enable_check:
195 | print("\n所有叶子目录检查正常,未发现问题")
196 |
197 |
198 | if __name__ == "__main__":
199 | main()
200 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Yutto-Batch: 精简版B站批量下载工具
4 | """
5 |
6 | import sys
7 | import asyncio
8 | from typing import Optional
9 | from pathlib import Path
10 |
11 | from batch_downloader import BatchDownloader
12 | from utils.logger import Logger
13 | from utils.config_manager import ConfigManager
14 |
15 |
16 | def print_help():
17 | """打印帮助信息"""
18 | help_text = """
19 | BiliSyncer - 精简版B站批量下载工具
20 |
21 | 用法:
22 | python main.py [选项] # 单个下载模式
23 | python main.py --update -o <输出目录> [选项] # 批量更新模式
24 | python main.py --update -d <任务目录> [选项] # 定向更新模式
25 | python main.py --delete -o <输出目录> [选项] # 批量删除模式
26 | python main.py --delete -d <任务目录> [选项] # 定向删除模式
27 |
28 | 支持的URL类型:
29 | - 投稿视频: https://www.bilibili.com/video/BV1xx411c7mD
30 | - 番剧: https://www.bilibili.com/bangumi/play/ss12345
31 | - 课程: https://www.bilibili.com/cheese/play/ss12345
32 | - 收藏夹: https://space.bilibili.com/123456/favlist?fid=789012
33 | - 视频列表: https://space.bilibili.com/123456/lists/789012?type=series
34 | - 视频合集: https://space.bilibili.com/123456/lists/789012?type=season
35 | - 个人空间: https://space.bilibili.com/123456
36 | - 稍后再看: https://www.bilibili.com/watchlater
37 |
38 | 选项:
39 | -h, --help 显示此帮助信息
40 | -o, --output DIR 指定下载目录 (默认: ~/Downloads)
41 | -c, --cookie STR 设置SESSDATA cookie
42 | --config NAME 使用指定的配置文件 (不含.yaml扩展名)
43 | --update 更新模式:检查并下载新增内容
44 | --delete 删除模式:删除视频文件但保留CSV记录
45 | -d, --directory DIR 定向模式目录:指定单个任务目录
46 | --vip-strict 启用严格VIP模式(传递给yutto)
47 | --save-cover 保存视频封面(传递给yutto)
48 |
49 | 模式说明:
50 | 单个下载模式 下载指定URL的内容到输出目录
51 | 批量更新模式 扫描输出目录下所有任务,检查并下载新增内容
52 | 定向更新模式 只更新指定的单个任务目录
53 | 批量删除模式 扫描输出目录下所有任务,删除视频文件但保留CSV记录
54 | 定向删除模式 只删除指定单个任务目录的视频文件但保留CSV记录
55 |
56 | 示例:
57 | # 单个下载
58 | python main.py "https://www.bilibili.com/video/BV1xx411c7mD"
59 | python main.py "https://space.bilibili.com/123456/favlist?fid=789012" -o ./my_downloads
60 |
61 | # 批量更新(扫描~/Downloads下所有任务并更新)
62 | python main.py --update -o "~/Downloads" -c "cookie"
63 |
64 | # 定向更新(只更新指定任务目录)
65 | python main.py --update -d "~/Downloads/收藏夹-123456-我的收藏"
66 |
67 | # 批量删除(删除~/Downloads下所有任务的视频文件,保留CSV)
68 | python main.py --delete -o "~/Downloads"
69 |
70 | # 定向删除(只删除指定任务的视频文件,保留CSV)
71 | python main.py --delete -d "~/Downloads/收藏夹-123456-我的收藏"
72 |
73 | # 使用配置文件
74 | python main.py "https://www.bilibili.com/video/BV1xx411c7mD" --config vip
75 | """
76 | print(help_text)
77 |
78 |
79 | def parse_args():
80 | """解析命令行参数"""
81 | args = sys.argv[1:]
82 |
83 | if not args or '-h' in args or '--help' in args:
84 | print_help()
85 | sys.exit(0)
86 |
87 | # 检查是否使用配置文件
88 | config_name = None
89 | config_manager = ConfigManager()
90 |
91 | # 先检查是否有--config参数
92 | if '--config' in args:
93 | config_index = args.index('--config')
94 | if config_index + 1 < len(args):
95 | config_name = args[config_index + 1]
96 | # 移除--config参数
97 | args = args[:config_index] + args[config_index + 2:]
98 |
99 | # 加载配置文件
100 | config_data = {}
101 | if config_name:
102 | config_data = config_manager.get_config_for_download(config_name) or {}
103 | if not config_data:
104 | Logger.error(f"无法加载配置文件: {config_name}")
105 | sys.exit(1)
106 | Logger.info(f"使用配置文件: {config_name}")
107 |
108 | # 检查是否是更新模式或删除模式
109 | update_mode = '--update' in args
110 | delete_mode = '--delete' in args
111 | target_directory = None # 定向操作的目标目录
112 |
113 | # 确保不能同时使用多种模式
114 | if update_mode and delete_mode:
115 | Logger.error("不能同时使用 --update 和 --delete 模式")
116 | sys.exit(1)
117 |
118 | if update_mode or delete_mode:
119 | # 更新或删除模式
120 | url = None
121 | output_dir = Path(config_data.get('output_dir', '~/Downloads')).expanduser()
122 | sessdata = config_data.get('sessdata', None)
123 | extra_args = config_data.get('extra_args', []).copy()
124 |
125 | # 添加配置文件中的选项
126 | if config_data.get('vip_strict', False):
127 | extra_args.append('--vip-strict')
128 | if config_data.get('save_cover', False):
129 | extra_args.append('--save-cover')
130 | if config_data.get('debug', False):
131 | extra_args.append('--debug')
132 |
133 | i = 0
134 | while i < len(args):
135 | if args[i] in ['--update', '--delete']:
136 | i += 1
137 | elif args[i] in ['-o', '--output'] and i + 1 < len(args):
138 | output_dir = Path(args[i + 1]).expanduser()
139 | i += 2
140 | elif args[i] in ['-c', '--cookie'] and i + 1 < len(args):
141 | sessdata = args[i + 1]
142 | i += 2
143 | elif args[i] in ['-d', '--directory'] and i + 1 < len(args):
144 | # 定向操作模式
145 | target_directory = Path(args[i + 1]).expanduser()
146 | i += 2
147 | elif args[i] == '--vip-strict':
148 | # 将vip-strict参数传递给yutto
149 | extra_args.append('--vip-strict')
150 | i += 1
151 | elif args[i] == '--save-cover':
152 | # 将save-cover参数传递给yutto
153 | extra_args.append('--save-cover')
154 | i += 1
155 | else:
156 | # 未识别的参数传递给yutto
157 | extra_args.append(args[i])
158 | i += 1
159 | else:
160 | # 普通下载模式
161 | url = args[0]
162 | output_dir = Path(config_data.get('output_dir', './downloads'))
163 | sessdata = config_data.get('sessdata', None)
164 | extra_args = config_data.get('extra_args', []).copy()
165 |
166 | # 添加配置文件中的选项
167 | if config_data.get('vip_strict', False):
168 | extra_args.append('--vip-strict')
169 | if config_data.get('save_cover', False):
170 | extra_args.append('--save-cover')
171 | if config_data.get('debug', False):
172 | extra_args.append('--debug')
173 |
174 | i = 1
175 | while i < len(args):
176 | if args[i] in ['-o', '--output'] and i + 1 < len(args):
177 | output_dir = Path(args[i + 1])
178 | i += 2
179 | elif args[i] in ['-c', '--cookie'] and i + 1 < len(args):
180 | sessdata = args[i + 1]
181 | i += 2
182 | elif args[i] == '--vip-strict':
183 | # 将vip-strict参数传递给yutto
184 | extra_args.append('--vip-strict')
185 | i += 1
186 | elif args[i] == '--save-cover':
187 | # 将save-cover参数传递给yutto
188 | extra_args.append('--save-cover')
189 | i += 1
190 | else:
191 | # 未识别的参数传递给yutto
192 | extra_args.append(args[i])
193 | i += 1
194 |
195 | return url, output_dir, sessdata, extra_args, update_mode, delete_mode, target_directory
196 |
197 |
198 | async def main():
199 | """主函数"""
200 | try:
201 | url, output_dir, sessdata, extra_args, update_mode, delete_mode, target_directory = parse_args()
202 |
203 | # 创建输出目录
204 | output_dir.mkdir(parents=True, exist_ok=True)
205 |
206 | # 创建批量下载器
207 | downloader = BatchDownloader(
208 | output_dir=output_dir,
209 | sessdata=sessdata,
210 | extra_args=extra_args,
211 | original_url=url
212 | )
213 |
214 | if update_mode:
215 | if target_directory:
216 | # 定向更新模式
217 | Logger.info("=== 定向更新模式 ===")
218 | Logger.info(f"目标任务目录: {target_directory}")
219 | if extra_args:
220 | Logger.info(f"额外参数传递给yutto: {' '.join(extra_args)}")
221 |
222 | await downloader.update_single_task(target_directory)
223 | else:
224 | # 批量更新模式
225 | Logger.info("=== 批量更新模式 ===")
226 | Logger.info(f"扫描目录: {output_dir}")
227 | if extra_args:
228 | Logger.info(f"额外参数传递给yutto: {' '.join(extra_args)}")
229 |
230 | await downloader.update_all_tasks()
231 |
232 | elif delete_mode:
233 | if target_directory:
234 | # 定向删除模式
235 | Logger.info("=== 定向删除模式 ===")
236 | Logger.info(f"目标任务目录: {target_directory}")
237 |
238 | await downloader.delete_single_task(target_directory)
239 | else:
240 | # 批量删除模式
241 | Logger.info("=== 批量删除模式 ===")
242 | Logger.info(f"扫描目录: {output_dir}")
243 |
244 | await downloader.delete_all_tasks()
245 |
246 | else:
247 | # 普通下载模式
248 | if url is None:
249 | Logger.error("普通下载模式需要提供URL")
250 | sys.exit(1)
251 |
252 | Logger.info(f"URL: {url}")
253 | Logger.info(f"输出目录: {output_dir}")
254 | if extra_args:
255 | Logger.info(f"额外参数传递给yutto: {' '.join(extra_args)}")
256 |
257 | # 开始批量下载
258 | await downloader.download_from_url(url)
259 |
260 | Logger.info("任务完成!")
261 |
262 | except KeyboardInterrupt:
263 | Logger.info("用户中断操作")
264 | sys.exit(1)
265 | except Exception as e:
266 | Logger.error(f"操作失败: {e}")
267 | sys.exit(1)
268 |
269 |
270 | if __name__ == "__main__":
271 | asyncio.run(main())
--------------------------------------------------------------------------------
/flatten.py:
--------------------------------------------------------------------------------
1 | """
2 | 使用示例:
3 |
4 | 真正执行(默认删除隐藏文件):
5 |
6 | python flatten.py /path/to/p -n 1
7 |
8 |
9 | 只看效果,不动真实文件:
10 |
11 | python flatten.py /path/to/p -n 1 --dry-run
12 |
13 |
14 | 扁平化第 2 层目录,同时保留所有隐藏文件,8 线程 dry-run:
15 |
16 | python flatten.py /path/to/p -n 2 --keep-hidden --jobs 8 --dry-run
17 | python flatten.py /Volumes/Data-12T-mybook/多媒体资料/视频/Bilibili -n 1 --keep-hidden --jobs 8 --dry-run
18 |
19 |
20 | 如果你还想要日志输出到文件(方便审计),我也可以帮你加上 --log-file 参数。
21 | """
22 | #!/usr/bin/env python3
23 | # -*- coding: utf-8 -*-
24 |
25 | import argparse
26 | import logging
27 | import os
28 | import shutil
29 | import threading
30 | from concurrent.futures import ThreadPoolExecutor, as_completed
31 | from pathlib import Path
32 |
33 | # 全局:目标目录锁表,防止并发写入同一目录时命名冲突
34 | _dir_locks = {}
35 | _dir_locks_lock = threading.Lock()
36 |
37 |
38 | def get_dir_lock(directory: Path) -> threading.Lock:
39 | """为每个目标目录提供一把全局共享的锁,用于 safe_move。"""
40 | directory = directory.resolve()
41 | key = str(directory)
42 | with _dir_locks_lock:
43 | lock = _dir_locks.get(key)
44 | if lock is None:
45 | lock = threading.Lock()
46 | _dir_locks[key] = lock
47 | return lock
48 |
49 |
50 | def safe_move(src: Path, target_dir: Path, dry_run: bool = False) -> None: # NEW: dry_run
51 | """
52 | 将 src 文件安全地移动到 target_dir 中:
53 | - 如果 src 已经在 target_dir 下,什么也不做;
54 | - 如有同名文件,自动生成不冲突的新名字:name__dup1.ext, name__dup2.ext, ...
55 | - 利用每个目录一把锁,避免并发竞态导致覆盖或命名冲突。
56 | """
57 | src = src.resolve()
58 | target_dir = target_dir.resolve()
59 |
60 | if not src.exists():
61 | # 文件有可能在前面的步骤中被删除或移动,记录一下即可
62 | logging.warning("Source file vanished before move: %s", src)
63 | return
64 |
65 | # 如果文件本来就在目标目录中,则不需要移动
66 | if src.parent == target_dir:
67 | return
68 |
69 | lock = get_dir_lock(target_dir)
70 | with lock:
71 | base_name = src.name
72 | dest = target_dir / base_name
73 |
74 | # 如果目标文件不存在,直接移动
75 | if not dest.exists():
76 | if dry_run: # NEW
77 | logging.info("[DRY-RUN] Would move %s -> %s", src, dest)
78 | else:
79 | try:
80 | shutil.move(str(src), str(dest))
81 | except Exception as e:
82 | logging.error("Failed to move %s -> %s: %s", src, dest, e)
83 | return
84 |
85 | # 存在同名文件,尝试追加 __dupN 后缀
86 | stem, suffix = os.path.splitext(base_name)
87 | idx = 1
88 | while True:
89 | candidate = target_dir / f"{stem}__dup{idx}{suffix}"
90 | if not candidate.exists():
91 | if dry_run: # NEW
92 | logging.info("[DRY-RUN] Would move %s -> %s", src, candidate)
93 | else:
94 | try:
95 | shutil.move(str(src), str(candidate))
96 | except Exception as e:
97 | logging.error("Failed to move %s -> %s: %s", src, candidate, e)
98 | return
99 | idx += 1
100 |
101 |
102 | def flatten_all_into(
103 | root_dir: Path,
104 | executor: ThreadPoolExecutor,
105 | delete_hidden: bool,
106 | dry_run: bool = False, # NEW
107 | ) -> None:
108 | """
109 | 将 root_dir 子树下所有非隐藏文件移动到 root_dir 下,并尽可能删除其下所有子目录。
110 | delete_hidden=True 时,会删除遍历到的所有以 "." 开头的普通文件。
111 | 隐藏目录(名字以 "." 开头)会整个跳过,不进行任何操作。
112 |
113 | dry_run=True 时不会做任何实际的移动 / 删除,只打印将要进行的操作。
114 | """
115 | root_dir = root_dir.resolve()
116 | logging.info(
117 | "Flattening subtree into: %s%s",
118 | root_dir,
119 | " (DRY-RUN)" if dry_run else "",
120 | )
121 |
122 | files_to_move = []
123 | hidden_files_to_delete = []
124 | all_dirs = []
125 |
126 | # 1. 扫描阶段:只记录,不修改
127 | for dirpath, dirnames, filenames in os.walk(root_dir, topdown=True, followlinks=False):
128 | current_dir = Path(dirpath)
129 |
130 | # 跳过隐藏目录(以 "." 开头),保证 .git 等不会被破坏
131 | dirnames[:] = [d for d in dirnames if not d.startswith(".")]
132 |
133 | # 记录子目录(后续尝试删除)
134 | if current_dir != root_dir:
135 | all_dirs.append(current_dir)
136 |
137 | # 处理当前目录中的文件
138 | for name in filenames:
139 | file_path = current_dir / name
140 |
141 | # 隐藏文件处理
142 | if name.startswith("."):
143 | if delete_hidden:
144 | hidden_files_to_delete.append(file_path)
145 | # 无论是否删除隐藏文件,都不把其加入移动列表
146 | continue
147 |
148 | # 非隐藏文件:如果不在 root_dir 本身,就需要移动到 root_dir
149 | if current_dir != root_dir:
150 | files_to_move.append(file_path)
151 |
152 | # 2. 删除隐藏文件
153 | if delete_hidden and hidden_files_to_delete:
154 | logging.info(
155 | "%sDeleting %d hidden files under %s",
156 | "[DRY-RUN] " if dry_run else "",
157 | len(hidden_files_to_delete),
158 | root_dir,
159 | )
160 | for fpath in hidden_files_to_delete:
161 | if dry_run:
162 | logging.info("[DRY-RUN] Would delete hidden file %s", fpath)
163 | else:
164 | try:
165 | if fpath.exists():
166 | fpath.unlink()
167 | except Exception as e:
168 | logging.error("Failed to delete hidden file %s: %s", fpath, e)
169 |
170 | # 3. 并行移动文件
171 | if files_to_move:
172 | logging.info(
173 | "%sMoving %d files into %s",
174 | "[DRY-RUN] " if dry_run else "",
175 | len(files_to_move),
176 | root_dir,
177 | )
178 | futures = [executor.submit(safe_move, src, root_dir, dry_run) for src in files_to_move]
179 | for fut in as_completed(futures):
180 | exc = fut.exception()
181 | if exc:
182 | logging.error("Error during moving files: %s", exc)
183 |
184 | # 4. 自底向上删除子目录
185 | if all_dirs:
186 | # 按深度从大到小排序
187 | all_dirs.sort(key=lambda d: len(d.relative_to(root_dir).parts), reverse=True)
188 | for d in all_dirs:
189 | if dry_run:
190 | logging.info("[DRY-RUN] Would remove directory: %s", d)
191 | continue
192 | try:
193 | d.rmdir()
194 | logging.info("Removed directory: %s", d)
195 | except OSError as e:
196 | # 目录非空或无权限等,不强行处理,记录一下信息
197 | logging.debug("Directory not removed (not empty or permission denied): %s (%s)", d, e)
198 |
199 |
200 | def collect_target_dirs(root: Path, level: int):
201 | """
202 | 收集相对于 root 深度为 level 的所有目录。
203 | level == 0 时只包含 root 自身。
204 | 会跳过以 "." 开头的目录。
205 | """
206 | root = root.resolve()
207 | if level == 0:
208 | return [root]
209 |
210 | result = []
211 | for dirpath, dirnames, filenames in os.walk(root, topdown=True, followlinks=False):
212 | current = Path(dirpath)
213 |
214 | # 跳过隐藏目录
215 | dirnames[:] = [d for d in dirnames if not d.startswith(".")]
216 |
217 | if current == root:
218 | rel_depth = 0
219 | else:
220 | rel_depth = len(current.relative_to(root).parts)
221 |
222 | if rel_depth == level:
223 | result.append(current)
224 | # 不再深入此子树,因为更深的目录不会是目标
225 | dirnames[:] = []
226 |
227 | return result
228 |
229 |
230 | def parse_args():
231 | parser = argparse.ArgumentParser(
232 | description=(
233 | "将指定目录树在第 n 层进行扁平化:"
234 | "把所有深度为 n 的目录的子树文件集中到该目录下,并删除其下子目录。"
235 | )
236 | )
237 | parser.add_argument(
238 | "path",
239 | help="要处理的根目录路径 p",
240 | )
241 | parser.add_argument(
242 | "-n",
243 | "--level",
244 | type=int,
245 | default=0,
246 | help="要扁平化的层级 n(相对于 p,p 的深度为 0),默认 0",
247 | )
248 | parser.add_argument(
249 | "-j",
250 | "--jobs",
251 | type=int,
252 | default=8,
253 | help="最大并行线程数,默认 8",
254 | )
255 |
256 | hidden_group = parser.add_mutually_exclusive_group()
257 | hidden_group.add_argument(
258 | "--delete-hidden",
259 | dest="delete_hidden",
260 | action="store_true",
261 | help="删除所有以 '.' 开头的隐藏文件(默认行为)",
262 | )
263 | hidden_group.add_argument(
264 | "--keep-hidden",
265 | dest="delete_hidden",
266 | action="store_false",
267 | help="保留以 '.' 开头的隐藏文件(不删除)",
268 | )
269 | parser.set_defaults(delete_hidden=True)
270 |
271 | parser.add_argument( # NEW
272 | "--dry-run",
273 | action="store_true",
274 | help="仅打印将要执行的操作,不实际移动/删除任何文件或目录",
275 | )
276 |
277 | return parser.parse_args()
278 |
279 |
280 | def main():
281 | args = parse_args()
282 |
283 | # 初始化日志
284 | logging.basicConfig(
285 | level=logging.INFO,
286 | format="[%(levelname)s] %(message)s",
287 | )
288 |
289 | root = Path(args.path).expanduser().resolve()
290 |
291 | if not root.exists():
292 | logging.error("Path does not exist: %s", root)
293 | return
294 |
295 | if not root.is_dir():
296 | logging.error("Path is not a directory: %s", root)
297 | return
298 |
299 | if args.level < 0:
300 | logging.error("Level n must be >= 0, got %d", args.level)
301 | return
302 |
303 | jobs = max(1, args.jobs)
304 |
305 | # 收集所有深度为 n 的目标目录
306 | target_dirs = collect_target_dirs(root, args.level)
307 | if not target_dirs:
308 | logging.info("No directories found at level %d under %s. Nothing to do.", args.level, root)
309 | return
310 |
311 | logging.info(
312 | "Found %d target directories at level %d under %s%s",
313 | len(target_dirs),
314 | args.level,
315 | root,
316 | " (DRY-RUN)" if args.dry_run else "",
317 | )
318 |
319 | # 全局线程池,用于所有文件移动任务
320 | with ThreadPoolExecutor(max_workers=jobs) as executor:
321 | for d in target_dirs:
322 | flatten_all_into(d, executor, args.delete_hidden, args.dry_run)
323 |
324 | logging.info("Done%s", " (DRY-RUN)" if args.dry_run else "")
325 |
326 |
327 | if __name__ == "__main__":
328 | main()
329 |
--------------------------------------------------------------------------------
/webui/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {% block title %}BiliSyncer WebUI{% endblock %}
7 |
8 |
9 |
10 |
63 |
64 |
65 |
66 |
67 |
68 |
113 |
114 |
115 |
116 |
128 |
129 |
130 | {% block content %}{% endblock %}
131 |
132 |
133 |
134 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
265 |
266 | {% block scripts %}{% endblock %}
267 |
268 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 | # BiliSyncer
5 |
6 | 🎯 **Intelligent Bilibili Content Synchronization Tool** - Automated sync management, incremental updates, and batch downloads
7 |
8 |
9 | [](https://python.org)
10 | [](LICENSE)
11 | [](webui)
12 |
13 | [🇨🇳 中文文档](README_ZH.md) | 🇺🇸 English
14 |
15 | ## 🌟 Overview
16 |
17 | BiliSyncer is an intelligent synchronization management tool specifically designed for continuously updating Bilibili content. It focuses on solving the automated synchronization challenges of user favorites, UP master uploads, anime series, and other continuously updating resources. Built upon yutto, it provides a complete resource management ecosystem that makes content management simple and efficient.
18 |
19 | ## ✨ Core Advantages
20 |
21 | ### 🎯 Intelligent Resource Discovery & Recognition
22 | - **Precise Resource Targeting** - Automatically retrieves complete and accurate video lists from favorites, UP master uploads, etc., without omissions or redundancy
23 | - **Smart Update Detection** - Automatically identifies all new content since the last sync, avoiding redundant requests and invalid operations
24 | - **Comprehensive Content Support** - Full support for user videos, anime, movies, courses, favorites, collections, and more
25 |
26 | ### 🔄 Advanced Synchronization Management
27 | - **Incremental Sync Technology** - Only synchronizes new and changed content, dramatically saving time and bandwidth
28 | - **Resume Protection** - Automatically recovers from network interruptions or unexpected stops, ensuring download continuity
29 | - **Persistent Status Tracking** - CSV-based progress management ensures sync records are never lost
30 |
31 | ### 🧹 One-Click Cleanup & Space Management
32 | - **Smart Cleanup Function** - Supports one-click cleanup of all downloaded files after backup completion, freeing storage space
33 | - **Record Retention Mechanism** - Preserves complete download records while cleaning files, providing foundation for future incremental syncs
34 | - **Storage Optimization Strategy** - Flexible file management strategies adapted to different storage scenarios
35 |
36 | ### ⚙️ Versatile Configuration Management
37 | - **Multi-Account Support** - Supports independent configuration and management of multiple Bilibili accounts for different permission needs
38 | - **Differentiated Configuration** - Provides independent parameter configuration schemes for different download requirements
39 | - **Configuration Templating** - Preset common configuration templates for quick application to different task scenarios
40 |
41 | ### 📊 Visual Monitoring & Analytics
42 | - **Real-Time Task Monitoring** - Intuitively displays execution status and progress information of all sync tasks
43 | - **Historical Record Analysis** - Automatically tracks sync history and provides detailed task execution reports
44 | - **Resource Status Overview** - At-a-glance view of sync status and storage information for all resources
45 |
46 | ### 🔧 Granular Task Scheduling
47 | - **Concurrent Task Management** - Supports parallel execution of multiple tasks, maximizing system resource utilization
48 | - **Task Lifecycle Control** - Provides complete control functions for task start, pause, stop, restart, etc.
49 | - **Priority Scheduling** - Supports task priority settings to prioritize important resources
50 |
51 | ### 🎨 Intuitive & User-Friendly Web Interface
52 | - **Modern Design** - Clean and beautiful responsive web interface adapted to various devices
53 | - **Operational Simplicity** - Intuitive operation flow reduces learning costs and enhances user experience
54 | - **Feature Integration** - All management functions centralized in a unified interface, avoiding complex command-line operations
55 |
56 | ### ⚡ Efficient Command Line Interface
57 | - **Batch Processing Capability** - Powerful CLI support for easy script invocation and automation integration
58 | - **Parameter Flexibility** - Rich command-line parameters meeting advanced users' fine-grained control needs
59 | - **Program Integration Friendly** - Easy integration into other automation systems and workflows
60 |
61 | ## 🆚 BiliSyncer vs Yutto vs Yutto-uiya
62 |
63 | | Feature | BiliSyncer | Yutto | Yutto-uiya |
64 | |---------|------------|-------|------------|
65 | | **Core Purpose** | Continuous sync management | Versatile CLI downloader | Simple WebUI wrapper |
66 | | **Sync Capability** | ✅ Smart incremental sync | ➖ Manual re-execution required | ➖ Manual re-execution required |
67 | | **Resource Management** | ✅ Complete lifecycle management | ➖ Download-only functionality | ➖ Download-only functionality |
68 | | **Interface Type** | Professional Web Dashboard | Powerful Command Line | User-friendly Streamlit UI |
69 | | **Download Engine** | Built on yutto | Original robust engine | Built on yutto |
70 | | **Batch Operations** | ✅ Multi-task management | ✅ Batch download support | ✅ Basic batch support |
71 | | **Resume Downloads** | ✅ Automatic detection | ✅ Built-in resume capability | ✅ Inherits yutto's resume |
72 | | **Status Persistence** | ✅ CSV-based tracking | ➖ Session-based only | ➖ Session-based only |
73 | | **Configuration** | ✅ Web + YAML management | ✅ Rich CLI options | ✅ Simple web forms |
74 | | **Content Organization** | ✅ Structured folder naming | ✅ Flexible path templates | ✅ Basic organization |
75 | | **Learning Curve** | 🟢 Beginner-friendly | 🟡 Technical users | 🟢 Very easy |
76 | | **Use Case** | Continuous content management | Power user downloads | Casual downloading |
77 |
78 | ### 🎯 Each Tool's Strength
79 |
80 | **Yutto**: The robust foundation - powerful, fast, and highly configurable CLI tool perfect for technical users who need maximum control and performance.
81 |
82 | **Yutto-uiya**: The accessibility bridge - brings yutto's power to casual users through a clean, simple web interface without complexity.
83 |
84 | **BiliSyncer**: The management layer - focuses on automated synchronization management of continuously updating content, providing complete resource lifecycle solutions.
85 |
86 | ## 📱 Interface Preview
87 |
88 | ### Download Management Interface
89 | 
90 |
91 | ### Batch Update Interface
92 | 
93 | 
94 |
95 | ### Task Status Interface
96 | 
97 |
98 | ### Configuration Management Interface
99 | 
100 |
101 | ## 🚀 Quick Start
102 |
103 | ### Prerequisites
104 | ```bash
105 | # Install dependencies
106 | pip install yutto
107 | pip install -r requirements.txt
108 | ```
109 |
110 | ### Launch Web Interface
111 | ```bash
112 | python start_webui.py
113 | # Visit http://localhost:5000
114 | ```
115 |
116 | ### Command Line Usage
117 | ```bash
118 | # Single download
119 | python main.py "https://www.bilibili.com/video/BV1xx411c7mD"
120 |
121 | # With additional options
122 | python main.py "URL" --vip-strict --save-cover
123 |
124 | # Batch update all configured tasks
125 | python main.py --update -c "SESSDATA"
126 |
127 | # Use custom configuration
128 | python main.py "URL" --config vip
129 | ```
130 |
131 | ## 🔧 Configuration
132 |
133 | Create `config/your_config.yaml`:
134 | ```yaml
135 | name: "My Config"
136 | output_dir: "~/Downloads"
137 | sessdata: "your_sessdata_here"
138 | vip_strict: true
139 | save_cover: true
140 | extra_args: ["--quality", "8K"]
141 | ```
142 |
143 | **Getting SESSDATA**: Login to bilibili.com → F12 → Application → Cookies → Copy `SESSDATA` value
144 |
145 | ## 🛠️ Utility Tools
146 |
147 | ### Directory Size Analysis Tool
148 |
149 | `tools/dir_tree_size.py` is a directory size analysis tool that displays directory structure in tree format and sorts by size.
150 |
151 | **Features**:
152 | - 📊 **Tree-style Display** - Shows directories and files in a tree structure
153 | - 📈 **Smart Sorting** - Sorts by size from smallest to largest for easy identification
154 | - 🔍 **Auto Check** - Automatically checks video file status in leaf directories (bottom-level directories)
155 | - 📝 **Issue Report** - Automatically generates check reports listing problematic directories
156 |
157 | **Usage**:
158 | ```bash
159 | # Basic usage (automatically checks and generates report)
160 | python3 tools/dir_tree_size.py "/path/to/directory"
161 |
162 | # Skip checking, only show directory structure
163 | python3 tools/dir_tree_size.py "/path/to/directory" --no-check
164 | ```
165 |
166 | **Check Features**:
167 | - ✅ **Missing mp4 Files** - Detects if leaf directories are missing `.mp4` files
168 | - ✅ **m4s Files Present** - Detects if `.m4s` files exist in leaf directories (usually incomplete downloads or fragmented files)
169 |
170 | **Report Generation**:
171 | - Check reports are automatically generated in the checked directory
172 | - Filename format: `directory_name_检查报告_timestamp.log`
173 | - Report includes: List of problematic directories, specific issue types, statistics
174 |
175 | **Example Output**:
176 | ```
177 | /Volumes/Data-12T-mybook/多媒体资料/视频/Bilibili/ (1.23 TB)
178 | ├── 番剧-33415-名侦探柯南(中配) (50.2 GB)
179 | │ ├── BV1xx411c7mD-第1集 (500 MB)
180 | │ └── BV1xx411c7mE-第2集 (480 MB)
181 | └── 收藏夹-123456-我的收藏 (30.5 GB)
182 |
183 | Report generated: /path/to/xxx_检查报告_20250116_123456.log
184 | Found 5 leaf directories with issues
185 | ```
186 |
187 | ### Directory Flattening Tool
188 |
189 | `flatten.py` is a directory flattening tool that flattens directory structures at a specified level, moving files from subdirectories to parent directories and removing empty subdirectories.
190 |
191 | **Features**:
192 | - 📁 **Level-based Flattening** - Flattens directory structures at specified depth levels
193 | - ⚡ **Multi-threaded Processing** - Supports parallel file moving with multiple threads for faster processing
194 | - 🔍 **Preview Mode** - Supports `--dry-run` mode to preview operations without actually executing them
195 | - 🗑️ **Hidden File Handling** - Option to delete or keep hidden files (files starting with `.`)
196 | - 🔒 **Safe Operation** - Automatically handles filename conflicts to prevent file overwriting
197 |
198 | **Usage**:
199 | ```bash
200 | # Flatten level 1 directories (default: delete hidden files)
201 | python3 flatten.py "/path/to/directory" -n 1
202 |
203 | # Preview mode, don't actually execute operations
204 | python3 flatten.py "/path/to/directory" -n 1 --dry-run
205 |
206 | # Flatten level 2 directories, keep hidden files, 8 threads
207 | python3 flatten.py "/path/to/directory" -n 2 --keep-hidden --jobs 8
208 |
209 | # Flatten level 1, delete hidden files, 4 threads, preview mode
210 | python3 flatten.py "/path/to/directory" -n 1 --delete-hidden --jobs 4 --dry-run
211 | ```
212 |
213 | **Parameters**:
214 | - `-n, --level` - Level to flatten (relative to root, root depth is 0), default 0
215 | - `-j, --jobs` - Maximum parallel threads, default 8
216 | - `--delete-hidden` - Delete all hidden files starting with `.` (default behavior)
217 | - `--keep-hidden` - Keep hidden files, don't delete
218 | - `--dry-run` - Preview mode, only print operations to be performed, don't actually move/delete files
219 |
220 | **Use Cases**:
221 | - 📦 **Simplify Directory Structure** - Flatten multi-level nested directory structures for easier management
222 | - 🎬 **Video File Organization** - Move downloaded video files from subdirectories to main directory
223 | - 🧹 **Clean Empty Directories** - Automatically remove empty subdirectories after flattening
224 |
225 | **Notes**:
226 | - Tool automatically skips hidden directories (starting with `.`) to protect system directories like `.git`
227 | - If files with the same name exist at destination, they will be automatically renamed to `name__dup1.ext`, `name__dup2.ext`, etc.
228 | - It's recommended to use `--dry-run` mode first to preview results before actual execution
229 |
230 | ## 🎯 Perfect For
231 |
232 | - **Content Creators** - Continuously track and backup latest uploads from followed UP masters
233 | - **Educators** - Automatically sync course updates and educational resources
234 | - **Media Collectors** - Intelligently manage updates from favorites and watchlists
235 | - **Researchers** - Automate collection and organization of research-related video materials
236 |
237 | ## 🛠️ Tech Stack
238 |
239 | Built with Python 3.8+, Flask, yutto, and modern web technologies for reliability and performance.
240 |
241 | ## 🤝 Contributing
242 |
243 | We welcome contributions! Submit Issues or Pull Requests to help improve BiliSyncer.
244 |
245 | ## 📜 License
246 |
247 | MIT Licensed - see [LICENSE](LICENSE) for details.
248 |
249 | ---
250 |
251 | ⭐ **Star this project if it helps you manage your Bilibili content!**
--------------------------------------------------------------------------------
/webui/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}下载管理 - BiliSyncer WebUI{% endblock %}
4 | {% block page_title %}下载管理{% endblock %}
5 |
6 | {% block content %}
7 |
8 |
96 |
97 |
98 |
99 |
100 |
105 |
106 |
107 | -
108 |
109 | 投稿视频
110 | 支持多P视频批量下载
111 |
112 | -
113 |
114 | 番剧/电影
115 | 支持所有集数下载
116 |
117 | -
118 |
119 | 课程
120 | B站课程完整下载
121 |
122 | -
123 |
124 | 收藏夹
125 | 用户收藏夹内容
126 |
127 | -
128 |
129 | 视频列表
130 | 视频列表和合集
131 |
132 | -
133 |
134 | 个人空间
135 | UP主所有投稿
136 |
137 | -
138 |
139 | 稍后再看
140 | 稍后再看列表
141 |
142 |
143 |
144 |
145 |
146 |
147 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
173 |
174 |
投稿视频
175 | https://www.bilibili.com/video/BV1xx411c7mD
176 |
177 | 番剧
178 | https://www.bilibili.com/bangumi/play/ss12345
179 |
180 | 课程
181 | https://www.bilibili.com/cheese/play/ep1643581
182 |
183 | 收藏夹
184 | https://space.bilibili.com/123456/favlist?fid=789012
185 |
186 | 个人空间
187 | https://space.bilibili.com/123456
188 |
189 |
190 |
191 |
192 | {% endblock %}
193 |
194 | {% block scripts %}
195 |
361 | {% endblock %}
--------------------------------------------------------------------------------
/webui/templates/config.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}配置管理 - BiliSyncer WebUI{% endblock %}
4 | {% block page_title %}配置管理{% endblock %}
5 |
6 | {% block content %}
7 |
8 |
9 |
10 |
11 |
19 |
20 |
21 |
22 |
23 | 加载中...
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
39 |
40 |
41 | -
42 | name: 配置名称,用于识别
43 |
44 | -
45 | output_dir: 默认输出目录
46 |
47 | -
48 | sessdata: B站登录Cookie
49 |
50 | -
51 | vip_strict: 是否启用VIP严格模式
52 |
53 | -
54 | save_cover: 是否保存视频封面
55 |
56 | -
57 | debug: 是否启用调试模式
58 |
59 | -
60 | extra_args: 传递给yutto的额外参数
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
73 |
74 |
命令行使用:
75 |
python main.py "url" --config 配置名
76 |
77 |
WebUI使用:
78 |
79 | 在下载管理和批量更新页面选择对应的配置文件即可
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
171 | {% endblock %}
172 |
173 | {% block scripts %}
174 |
383 | {% endblock %}
--------------------------------------------------------------------------------
/extractors.py:
--------------------------------------------------------------------------------
1 | """
2 | 视频提取器模块
3 | 用于从不同类型的B站URL中提取视频信息
4 | """
5 |
6 | import re
7 | import asyncio
8 | from pathlib import Path
9 | from typing import Dict, List, Any
10 | from utils.logger import Logger
11 | from utils.fetcher import Fetcher
12 | from api.bilibili import (
13 | get_favourite_avids,
14 | get_favourite_avids_incremental,
15 | get_favourite_info,
16 | get_user_space_videos,
17 | get_user_space_videos_incremental,
18 | get_series_videos,
19 | get_watch_later_avids,
20 | get_bangumi_list,
21 | get_season_id_by_media_id,
22 | get_season_id_by_episode_id,
23 | get_user_name,
24 | get_ugc_video_list,
25 | get_bangumi_episode_list,
26 | get_bangumi_episode_info,
27 | get_cheese_episode_list,
28 | get_cheese_season_id_by_episode_id,
29 | RISK_CONTROL_DETECTED,
30 | )
31 | from abc import ABC, abstractmethod
32 | from typing import List, Optional, Tuple
33 |
34 | from utils.types import *
35 | from utils.fetcher import Fetcher
36 | from utils.logger import Logger
37 | from api.bilibili import *
38 |
39 |
40 | class URLExtractor(ABC):
41 | """URL提取器基类"""
42 |
43 | @abstractmethod
44 | def match(self, url: str) -> bool:
45 | """检查URL是否匹配此提取器"""
46 | pass
47 |
48 | @abstractmethod
49 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
50 | """提取视频列表"""
51 | pass
52 |
53 | async def extract_incremental(self, fetcher: Fetcher, url: str, existing_urls: set) -> VideoListData:
54 | """增量提取视频列表(支持实时查重)"""
55 | # 默认实现:回退到普通提取
56 | return await self.extract(fetcher, url)
57 |
58 | def resolve_shortcut(self, url: str) -> Tuple[bool, str]:
59 | """解析快捷方式"""
60 | return False, url
61 |
62 |
63 | class UgcVideoExtractor(URLExtractor):
64 | """投稿视频提取器"""
65 |
66 | REGEX_AV = re.compile(r"https?://www\.bilibili\.com/video/av(?P\d+)/?")
67 | REGEX_BV = re.compile(r"https?://www\.bilibili\.com/video/(?P(bv|BV)\w+)/?")
68 | REGEX_AV_ID = re.compile(r"av(?P\d+)")
69 | REGEX_BV_ID = re.compile(r"(?P(bv|BV)\w+)")
70 |
71 | def resolve_shortcut(self, url: str) -> Tuple[bool, str]:
72 | """解析快捷方式"""
73 | if match_obj := self.REGEX_AV_ID.match(url):
74 | return True, f"https://www.bilibili.com/video/av{match_obj.group('aid')}"
75 | elif match_obj := self.REGEX_BV_ID.match(url):
76 | return True, f"https://www.bilibili.com/video/{match_obj.group('bvid')}"
77 | return False, url
78 |
79 | def match(self, url: str) -> bool:
80 | """检查URL是否匹配"""
81 | return bool(self.REGEX_AV.match(url) or self.REGEX_BV.match(url))
82 |
83 | def _extract_avid(self, url: str) -> AvId:
84 | """从URL提取AVID"""
85 | if match_obj := self.REGEX_AV.match(url):
86 | return AId(match_obj.group("aid"))
87 | elif match_obj := self.REGEX_BV.match(url):
88 | return BvId(match_obj.group("bvid"))
89 | raise ValueError(f"无法从URL提取AVID: {url}")
90 |
91 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
92 | """提取投稿视频列表"""
93 | avid = self._extract_avid(url)
94 | Logger.info(f"提取投稿视频: {avid}")
95 | video_data = await get_ugc_video_list(fetcher, avid)
96 |
97 | # 修改文件夹命名格式:投稿视频-视频BV号-视频总标题
98 | folder_name = f"投稿视频-{avid}-{video_data['title']}"
99 |
100 | # 更新视频路径
101 | for video in video_data["videos"]:
102 | video["path"] = Path(folder_name) / f"{avid}-{video['title']}"
103 |
104 | return {"title": folder_name, "videos": video_data["videos"]}
105 |
106 |
107 | class BangumiExtractor(URLExtractor):
108 | """番剧提取器(批量下载所有集数)"""
109 |
110 | REGEX_MD = re.compile(r"https?://www\.bilibili\.com/bangumi/media/md(?P\d+)")
111 | REGEX_EP = re.compile(r"https?://www\.bilibili\.com/bangumi/play/ep(?P\d+)")
112 | REGEX_SS = re.compile(r"https?://www\.bilibili\.com/bangumi/play/ss(?P\d+)")
113 |
114 | def match(self, url: str) -> bool:
115 | """检查URL是否匹配"""
116 | return bool(self.REGEX_MD.match(url) or self.REGEX_EP.match(url) or self.REGEX_SS.match(url))
117 |
118 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
119 | """提取番剧视频列表"""
120 | # 解析不同类型的URL获取season_id
121 | season_id = await self._parse_season_id(fetcher, url)
122 | Logger.info(f"提取番剧: {season_id}")
123 |
124 | # 获取番剧标题和剧集ID列表(仅获取ID,不获取详细信息)
125 | bangumi_title, episode_ids = await get_bangumi_episode_list(fetcher, season_id)
126 |
127 | # 修改文件夹命名格式:番剧-番剧编号-番剧名
128 | folder_name = f"番剧-{season_id}-{bangumi_title}"
129 |
130 | videos = []
131 | for i, episode_id in enumerate(episode_ids):
132 | # 创建占位符视频条目,下载时再获取详细信息
133 | video = {
134 | "avid": BvId("BV1"), # 占位符,下载时再获取
135 | "cid": CId("0"), # 占位符,下载时再获取
136 | "title": "", # 空标题,下载时再获取
137 | "name": "", # 空名称,下载时再获取
138 | "pubdate": 0, # 番剧没有pubdate概念
139 | "author": "", # 空作者,下载时再获取
140 | "duration": 0, # 空时长,下载时再获取
141 | "path": Path(f"{folder_name}/第{i+1}话"), # 临时路径,下载时会更新
142 | "status": "pending", # 标记为待处理,需要下载时再获取详细信息
143 | "episode_id": episode_id # 保存episode_id用于后续获取详细信息
144 | }
145 | videos.append(video)
146 |
147 | return {"title": folder_name, "videos": videos}
148 |
149 | async def _parse_season_id(self, fetcher: Fetcher, url: str) -> str:
150 | """根据URL类型获取season_id"""
151 | if match_obj := self.REGEX_MD.match(url):
152 | media_id = match_obj.group("media_id")
153 | return await get_season_id_by_media_id(fetcher, media_id)
154 | elif match_obj := self.REGEX_EP.match(url):
155 | episode_id = match_obj.group("episode_id")
156 | return await get_season_id_by_episode_id(fetcher, episode_id)
157 | elif match_obj := self.REGEX_SS.match(url):
158 | return match_obj.group("season_id")
159 | else:
160 | raise ValueError(f"无法解析番剧URL: {url}")
161 |
162 |
163 | class FavouriteExtractor(URLExtractor):
164 | """收藏夹提取器"""
165 |
166 | REGEX_FAV = re.compile(r"https?://space\.bilibili\.com/(?P\d+)/favlist\?fid=(?P\d+)((&ftype=create)|$)")
167 |
168 | def match(self, url: str) -> bool:
169 | """检查URL是否匹配"""
170 | return bool(self.REGEX_FAV.match(url))
171 |
172 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
173 | """提取收藏夹视频列表"""
174 | match_obj = self.REGEX_FAV.match(url)
175 | if not match_obj:
176 | raise ValueError(f"无法解析收藏夹URL: {url}")
177 |
178 | fid = FId(match_obj.group("fid"))
179 | Logger.info(f"提取收藏夹: {fid}")
180 |
181 | fav_info = await get_favourite_info(fetcher, fid)
182 | avids = await get_favourite_avids(fetcher, fid)
183 |
184 | # 检查是否返回了风控检测指令
185 | if avids == RISK_CONTROL_DETECTED:
186 | return RISK_CONTROL_DETECTED
187 |
188 | # 修改文件夹命名格式:收藏夹-收藏夹ID-收藏夹名
189 | folder_name = f"收藏夹-{fid}-{fav_info['title']}"
190 |
191 | videos = []
192 | for avid in avids:
193 | # 创建占位符视频条目,稍后按需获取详细信息
194 | video = {
195 | "avid": avid,
196 | "cid": CId("0"), # 占位符
197 | "title": "", # 空标题,稍后获取
198 | "name": "", # 空名称,稍后获取
199 | "pubdate": 0, # 空发布时间,稍后获取
200 | "author": "", # 空作者,稍后获取
201 | "duration": 0, # 空时长,稍后获取
202 | "path": Path(f"{folder_name}/{avid}"), # 临时路径,下载时会更新为avid-title
203 | "status": "pending" # 标记为待处理,需要下载时再获取详细信息
204 | }
205 | videos.append(video)
206 |
207 | return {"title": folder_name, "videos": videos}
208 |
209 | async def extract_incremental(self, fetcher: Fetcher, url: str, existing_urls: set) -> VideoListData:
210 | """增量提取收藏夹视频(支持实时查重)"""
211 | match_obj = self.REGEX_FAV.match(url)
212 | if not match_obj:
213 | raise ValueError(f"无法解析收藏夹URL: {url}")
214 |
215 | fid = FId(match_obj.group("fid"))
216 | Logger.info(f"增量提取收藏夹: {fid}")
217 |
218 | fav_info = await get_favourite_info(fetcher, fid)
219 | avids = await get_favourite_avids_incremental(fetcher, fid, existing_urls)
220 |
221 | # 检查是否返回了风控检测指令
222 | if avids == RISK_CONTROL_DETECTED:
223 | return RISK_CONTROL_DETECTED
224 |
225 | # 修改文件夹命名格式:收藏夹-收藏夹ID-收藏夹名
226 | folder_name = f"收藏夹-{fid}-{fav_info['title']}"
227 |
228 | videos = []
229 | for avid in avids:
230 | # 创建占位符视频条目,稍后按需获取详细信息
231 | video = {
232 | "avid": avid,
233 | "cid": CId("0"), # 占位符
234 | "title": "", # 空标题,稍后获取
235 | "name": "", # 空名称,稍后获取
236 | "pubdate": 0, # 空发布时间,稍后获取
237 | "author": "", # 空作者,稍后获取
238 | "duration": 0, # 空时长,稍后获取
239 | "path": Path(f"{folder_name}/{avid}"), # 临时路径,下载时会更新为avid-title
240 | "status": "pending" # 标记为待处理,需要下载时再获取详细信息
241 | }
242 | videos.append(video)
243 |
244 | return {"title": folder_name, "videos": videos}
245 |
246 |
247 | class SeriesExtractor(URLExtractor):
248 | """视频列表/合集提取器"""
249 |
250 | REGEX_SERIES = re.compile(r"https?://space\.bilibili\.com/(?P\d+)/lists/(?P\d+)\?type=(?Pseries|season)")
251 |
252 | def match(self, url: str) -> bool:
253 | """检查URL是否匹配"""
254 | return bool(self.REGEX_SERIES.match(url))
255 |
256 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
257 | """提取视频列表"""
258 | match_obj = self.REGEX_SERIES.match(url)
259 | if not match_obj:
260 | raise ValueError(f"无法解析视频列表URL: {url}")
261 |
262 | mid = MId(match_obj.group("mid"))
263 | series_id = SeriesId(match_obj.group("series_id"))
264 | list_type = match_obj.group("type")
265 |
266 | Logger.info(f"提取{'视频列表' if list_type == 'series' else '视频合集'}: {series_id}")
267 |
268 | avids = await get_series_videos(fetcher, series_id, mid)
269 |
270 | # 检查是否返回了风控检测指令
271 | if avids == RISK_CONTROL_DETECTED:
272 | return RISK_CONTROL_DETECTED
273 |
274 | # 修改文件夹命名格式:视频列表-视频列表ID-视频列表名
275 | type_name = "视频列表" if list_type == "series" else "视频合集"
276 | folder_name = f"{type_name}-{series_id}-{type_name}{series_id}" # 暂时使用ID作为名称,后续可能需要获取实际名称
277 |
278 | videos = []
279 | for avid in avids:
280 | # 创建占位符视频条目,稍后按需获取详细信息
281 | video = {
282 | "avid": avid,
283 | "cid": CId("0"), # 占位符
284 | "title": "", # 空标题,稍后获取
285 | "name": "", # 空名称,稍后获取
286 | "pubdate": 0, # 空发布时间,稍后获取
287 | "author": "", # 空作者,稍后获取
288 | "duration": 0, # 空时长,稍后获取
289 | "path": Path(f"{folder_name}/{avid}"), # 临时路径,下载时会更新为avid-title
290 | "status": "pending" # 标记为待处理,需要下载时再获取详细信息
291 | }
292 | videos.append(video)
293 |
294 | return {"title": folder_name, "videos": videos}
295 |
296 |
297 | class UserSpaceExtractor(URLExtractor):
298 | """用户空间提取器"""
299 |
300 | REGEX_SPACE = re.compile(r"https?://space\.bilibili\.com/(?P\d+)(/video)?/?(?:\?.*)?")
301 |
302 | def match(self, url: str) -> bool:
303 | """检查URL是否匹配"""
304 | return bool(self.REGEX_SPACE.match(url))
305 |
306 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
307 | """提取用户空间视频"""
308 | match_obj = self.REGEX_SPACE.match(url)
309 | if not match_obj:
310 | raise ValueError(f"无法解析用户空间URL: {url}")
311 |
312 | mid = MId(match_obj.group("mid"))
313 | Logger.info(f"提取用户空间: {mid}")
314 |
315 | # 获取用户名和视频列表(仅ID)
316 | username = await get_user_name(fetcher, mid)
317 | avids = await get_user_space_videos(fetcher, mid)
318 |
319 | # 检查是否返回了风控检测指令
320 | if avids == RISK_CONTROL_DETECTED:
321 | return RISK_CONTROL_DETECTED
322 |
323 | # 修改文件夹命名格式:UP主-UP主UID-UP主名
324 | folder_name = f"UP主-{mid}-{username}"
325 |
326 | videos = []
327 | for avid in avids:
328 | # 创建占位符视频条目,稍后按需获取详细信息
329 | video = {
330 | "avid": avid,
331 | "cid": CId("0"), # 占位符,下载时再获取
332 | "title": "", # 空标题,稍后获取
333 | "name": "", # 空名称,稍后获取
334 | "pubdate": 0, # 空发布时间,稍后获取
335 | "author": username, # 使用获取到的用户名
336 | "duration": 0, # 空时长,稍后获取
337 | "path": Path(f"{folder_name}/{avid}"), # 临时路径,下载时会更新为avid-title
338 | "status": "pending" # 标记为待处理,需要下载时再获取详细信息
339 | }
340 | videos.append(video)
341 |
342 | return {"title": folder_name, "videos": videos}
343 |
344 | async def extract_incremental(self, fetcher: Fetcher, url: str, existing_urls: set) -> VideoListData:
345 | """增量提取用户空间视频(支持实时查重)"""
346 | match_obj = self.REGEX_SPACE.match(url)
347 | if not match_obj:
348 | raise ValueError(f"无法解析用户空间URL: {url}")
349 |
350 | mid = MId(match_obj.group("mid"))
351 | Logger.info(f"增量提取用户空间: {mid}")
352 |
353 | # 获取用户名和增量视频列表(仅ID)
354 | username = await get_user_name(fetcher, mid)
355 | avids = await get_user_space_videos_incremental(fetcher, mid, existing_urls)
356 |
357 | # 检查是否返回了风控检测指令
358 | if avids == RISK_CONTROL_DETECTED:
359 | return RISK_CONTROL_DETECTED
360 |
361 | # 修改文件夹命名格式:UP主-UP主UID-UP主名
362 | folder_name = f"UP主-{mid}-{username}"
363 |
364 | videos = []
365 | for avid in avids:
366 | # 创建占位符视频条目,稍后按需获取详细信息
367 | video = {
368 | "avid": avid,
369 | "cid": CId("0"), # 占位符,下载时再获取
370 | "title": "", # 空标题,稍后获取
371 | "name": "", # 空名称,稍后获取
372 | "pubdate": 0, # 空发布时间,稍后获取
373 | "author": username, # 使用获取到的用户名
374 | "duration": 0, # 空时长,稍后获取
375 | "path": Path(f"{folder_name}/{avid}"), # 临时路径,下载时会更新为avid-title
376 | "status": "pending" # 标记为待处理,需要下载时再获取详细信息
377 | }
378 | videos.append(video)
379 |
380 | return {"title": folder_name, "videos": videos}
381 |
382 |
383 | class WatchLaterExtractor(URLExtractor):
384 | """稍后再看提取器"""
385 |
386 | REGEX_WATCH_LATER = re.compile(r"https?://www\.bilibili\.com/(watchlater|list/watchlater)")
387 |
388 | def match(self, url: str) -> bool:
389 | """检查URL是否匹配"""
390 | return bool(self.REGEX_WATCH_LATER.match(url))
391 |
392 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
393 | """提取稍后再看列表"""
394 | Logger.info("提取稍后再看列表")
395 |
396 | avids = await get_watch_later_avids(fetcher)
397 |
398 | # 检查是否返回了风控检测指令
399 | if avids == RISK_CONTROL_DETECTED:
400 | return RISK_CONTROL_DETECTED
401 |
402 | # 修改文件夹命名格式:稍后再看-稍后再看ID-稍后再看名
403 | folder_name = "稍后再看-watchlater-稍后再看"
404 |
405 | videos = []
406 | for avid in avids:
407 | # 创建占位符视频条目,稍后按需获取详细信息
408 | video = {
409 | "avid": avid,
410 | "cid": CId("0"), # 占位符
411 | "title": "", # 空标题,稍后获取
412 | "name": "", # 空名称,稍后获取
413 | "pubdate": 0, # 空发布时间,稍后获取
414 | "author": "", # 空作者,稍后获取
415 | "duration": 0, # 空时长,稍后获取
416 | "path": Path(f"{folder_name}/{avid}"), # 临时路径,下载时会更新为avid-title
417 | "status": "pending" # 标记为待处理,需要下载时再获取详细信息
418 | }
419 | videos.append(video)
420 |
421 | return {"title": folder_name, "videos": videos}
422 |
423 |
424 | class CheeseExtractor(URLExtractor):
425 | """课程提取器(批量下载所有课时)"""
426 |
427 | REGEX_EP = re.compile(r"https?://www\.bilibili\.com/cheese/play/ep(?P\d+)")
428 | REGEX_SS = re.compile(r"https?://www\.bilibili\.com/cheese/play/ss(?P\d+)")
429 |
430 | def match(self, url: str) -> bool:
431 | """检查URL是否匹配"""
432 | return bool(self.REGEX_EP.match(url) or self.REGEX_SS.match(url))
433 |
434 | async def extract(self, fetcher: Fetcher, url: str) -> VideoListData:
435 | """提取课程视频列表"""
436 | # 解析不同类型的URL获取season_id
437 | season_id = await self._parse_season_id(fetcher, url)
438 | Logger.info(f"提取课程: {season_id}")
439 |
440 | # 获取课程标题和课时ID列表(仅获取ID,不获取详细信息)
441 | course_title, episode_ids = await get_cheese_episode_list(fetcher, season_id)
442 |
443 | # 修改文件夹命名格式:课程-课程编号-课程名
444 | folder_name = f"课程-{season_id}-{course_title}"
445 |
446 | videos = []
447 | for i, episode_id in enumerate(episode_ids):
448 | # 创建占位符视频条目,下载时再获取详细信息
449 | video = {
450 | "avid": AId("1"), # 占位符,下载时再获取
451 | "cid": CId("0"), # 占位符,下载时再获取
452 | "title": "", # 空标题,下载时再获取
453 | "name": "", # 空名称,下载时再获取
454 | "pubdate": 0, # 课程没有pubdate概念
455 | "author": "", # 空作者,下载时再获取
456 | "duration": 0, # 空时长,下载时再获取
457 | "path": Path(f"{folder_name}/第{i+1}课时"), # 临时路径,下载时会更新
458 | "status": "pending", # 标记为待处理,需要下载时再获取详细信息
459 | "episode_id": episode_id # 保存episode_id用于后续获取详细信息
460 | }
461 | videos.append(video)
462 |
463 | return {"title": folder_name, "videos": videos}
464 |
465 | async def _parse_season_id(self, fetcher: Fetcher, url: str) -> str:
466 | """根据URL类型获取season_id"""
467 | if match_obj := self.REGEX_EP.match(url):
468 | episode_id = match_obj.group("episode_id")
469 | return await get_cheese_season_id_by_episode_id(fetcher, episode_id)
470 | elif match_obj := self.REGEX_SS.match(url):
471 | return match_obj.group("season_id")
472 | else:
473 | raise ValueError(f"无法解析课程URL: {url}")
474 |
475 |
476 | # 提取器列表(按优先级排序)
477 | EXTRACTORS = [
478 | UgcVideoExtractor(), # 投稿视频
479 | BangumiExtractor(), # 番剧
480 | FavouriteExtractor(), # 收藏夹
481 | SeriesExtractor(), # 视频列表/合集
482 | WatchLaterExtractor(), # 稍后再看
483 | UserSpaceExtractor(), # 用户空间(放在最后,因为正则最宽泛)
484 | CheeseExtractor(), # 课程
485 | ]
486 |
487 |
488 | async def extract_video_list(fetcher: Fetcher, url: str) -> VideoListData | str:
489 | """从URL提取视频列表"""
490 | # 首先尝试解析快捷方式
491 | original_url = url
492 | for extractor in EXTRACTORS:
493 | matched, resolved_url = extractor.resolve_shortcut(url)
494 | if matched:
495 | url = resolved_url
496 | Logger.info(f"快捷方式解析: {original_url} -> {url}")
497 | break
498 |
499 | # 获取重定向后的URL
500 | url = await fetcher.get_redirected_url(url)
501 | if url != original_url:
502 | Logger.info(f"URL重定向: {original_url} -> {url}")
503 |
504 | # 匹配提取器
505 | for extractor in EXTRACTORS:
506 | if extractor.match(url):
507 | Logger.info(f"使用提取器: {extractor.__class__.__name__}")
508 | try:
509 | return await extractor.extract(fetcher, url)
510 | except Exception as e:
511 | Logger.warning(f"提取器 {extractor.__class__.__name__} 执行失败: {e}")
512 | # 任何获取视频列表的失败都返回特殊指令
513 | return RISK_CONTROL_DETECTED
514 |
515 | raise ValueError(f"不支持的URL类型: {url}")
516 |
517 |
518 | async def extract_video_list_incremental(fetcher: Fetcher, url: str, existing_urls: set) -> VideoListData | str:
519 | """增量提取视频列表(支持实时查重)"""
520 | # 首先尝试解析快捷方式
521 | original_url = url
522 | for extractor in EXTRACTORS:
523 | matched, resolved_url = extractor.resolve_shortcut(url)
524 | if matched:
525 | url = resolved_url
526 | Logger.info(f"快捷方式解析: {original_url} -> {url}")
527 | break
528 |
529 | # 获取重定向后的URL
530 | url = await fetcher.get_redirected_url(url)
531 | if url != original_url:
532 | Logger.info(f"URL重定向: {original_url} -> {url}")
533 |
534 | # 匹配提取器
535 | for extractor in EXTRACTORS:
536 | if extractor.match(url):
537 | Logger.info(f"使用增量提取器: {extractor.__class__.__name__}")
538 | try:
539 | return await extractor.extract_incremental(fetcher, url, existing_urls)
540 | except Exception as e:
541 | Logger.warning(f"增量提取器 {extractor.__class__.__name__} 执行失败: {e}")
542 | # 任何获取视频列表的失败都返回特殊指令
543 | return RISK_CONTROL_DETECTED
544 |
545 | raise ValueError(f"不支持的URL类型: {url}")
546 |
--------------------------------------------------------------------------------
/webui/templates/tasks.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}任务状态 - BiliSyncer WebUI{% endblock %}
4 | {% block page_title %}任务状态{% endblock %}
5 |
6 | {% block content %}
7 |
8 |
9 |
10 |
45 |
46 |
47 |
76 |
77 |
78 |
90 |
91 |
92 |
93 |
94 |
95 |
100 |
101 |
102 |
103 |
104 | 正在扫描目录...
105 | 0 / 0
106 |
107 |
111 |
准备开始...
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
123 |
126 |
127 |
128 |
129 |
130 |
138 |
139 |
140 |
141 |
151 |
152 |
153 |
156 |
157 |
158 |
159 | 显示 0 / 0 个任务
160 |
161 |
162 |
163 |
164 |
165 |
点击"扫描本地任务"来查看已存在的任务
166 |
167 |
168 |
169 |
170 | 没有找到符合条件的任务
171 |
172 | 尝试调整过滤条件或点击重置按钮
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
195 | {% endblock %}
196 |
197 | {% block scripts %}
198 |
717 | {% endblock %}
--------------------------------------------------------------------------------
/utils/csv_manager.py:
--------------------------------------------------------------------------------
1 | """
2 | CSV文件管理模块
3 | 用于保存和管理视频下载状态
4 | """
5 |
6 | import csv
7 | import glob
8 | import re
9 | import shutil
10 | from datetime import datetime
11 | from pathlib import Path
12 | from typing import List, Dict, Optional, Tuple, Any
13 |
14 | from utils.types import VideoInfo
15 | from utils.logger import Logger
16 | from utils.constants import TASK_FOLDER_PREFIXES
17 |
18 |
19 | class CSVManager:
20 | """CSV文件管理器"""
21 |
22 | def __init__(self, task_dir: Path):
23 | """
24 | 初始化CSV管理器
25 | task_dir: 当前任务的目录(比如"收藏夹-声音"文件夹)
26 | """
27 | self.task_dir = task_dir
28 | self.task_dir.mkdir(parents=True, exist_ok=True)
29 |
30 | def _extract_main_folder_from_path(self, path_value: Any) -> str:
31 | """根据路径提取任务主目录名称"""
32 | if isinstance(path_value, Path):
33 | path_str = path_value.as_posix()
34 | else:
35 | path_str = str(path_value or "")
36 |
37 | if path_str:
38 | parts = [part for part in path_str.replace("\\", "/").split("/") if part]
39 | for part in parts:
40 | if any(part.startswith(prefix) for prefix in TASK_FOLDER_PREFIXES):
41 | return part
42 | if parts:
43 | return parts[-2] if len(parts) >= 2 else parts[0]
44 |
45 | return self.task_dir.name
46 |
47 | def _get_video_url_and_identifier(self, video: VideoInfo) -> Tuple[str, str]:
48 | """根据VideoInfo生成统一的video_url和标识"""
49 | episode_id = video.get('episode_id')
50 | main_folder = self._extract_main_folder_from_path(video.get('path'))
51 | if episode_id:
52 | if main_folder.startswith('课程-'):
53 | video_url = f"https://www.bilibili.com/cheese/play/ep{episode_id}"
54 | else:
55 | video_url = f"https://www.bilibili.com/bangumi/play/ep{episode_id}"
56 | avid_str = episode_id
57 | else:
58 | video_url = video['avid'].to_url()
59 | avid_str = str(video['avid'])
60 |
61 | return video_url, avid_str
62 |
63 | def _derive_title_from_video(self, video: VideoInfo) -> str:
64 | """推断标题,避免写入空值"""
65 | candidates = [
66 | str(video.get('title') or "").strip(),
67 | str(video.get('name') or "").strip(),
68 | ]
69 | path_value = video.get('path')
70 | if path_value:
71 | if isinstance(path_value, Path):
72 | candidates.append(path_value.name)
73 | else:
74 | candidates.append(Path(str(path_value)).name)
75 | for candidate in candidates:
76 | if candidate:
77 | return candidate
78 | return "未命名视频"
79 |
80 | def _video_to_csv_row(self, video: VideoInfo, downloaded_override: Optional[str] = None) -> Dict[str, str]:
81 | """将VideoInfo转换成标准CSV行"""
82 | video_url, avid_str = self._get_video_url_and_identifier(video)
83 | title_value = self._derive_title_from_video(video)
84 | name_value = video.get('name') or title_value
85 |
86 | pubdate_unix = video.get('pubdate', 0)
87 | if pubdate_unix:
88 | pubdate_str = datetime.fromtimestamp(pubdate_unix).strftime('%Y-%m-%d %H:%M:%S')
89 | else:
90 | pubdate_str = "未知"
91 |
92 | cid_value = video.get('cid', '')
93 | if str(cid_value) == "0":
94 | cid_value = ""
95 |
96 | download_path = self._format_download_path(video.get('path'))
97 |
98 | is_unavailable = video.get('status') == 'unavailable'
99 | downloaded_flag = downloaded_override if downloaded_override is not None else ('True' if is_unavailable else 'False')
100 |
101 | folder_size_value = int(video.get('folder_size', 0) or 0)
102 |
103 | return {
104 | 'video_url': video_url,
105 | 'title': title_value,
106 | 'name': name_value,
107 | 'download_path': download_path,
108 | 'folder_size': self._format_folder_size_value(folder_size_value),
109 | 'downloaded': downloaded_flag,
110 | 'avid': avid_str,
111 | 'cid': str(cid_value),
112 | 'pubdate': pubdate_str,
113 | 'status': video.get('status', 'normal'),
114 | 'is_multi_part': str(video.get('is_multi_part', False)),
115 | 'total_parts': str(video.get('total_parts', 1))
116 | }
117 |
118 | def _format_download_path(self, path_value: Any) -> str:
119 | """保证下载路径以绝对路径形式存储"""
120 | if isinstance(path_value, Path):
121 | path_obj = path_value
122 | elif path_value:
123 | path_str = str(path_value).strip()
124 | if not path_str:
125 | # 空字符串,使用task_dir
126 | path_obj = self.task_dir
127 | else:
128 | path_obj = Path(path_str)
129 | else:
130 | # 如果path_value为空,使用task_dir作为基础路径
131 | path_obj = self.task_dir
132 |
133 | # 确保路径是绝对路径
134 | if not path_obj.is_absolute():
135 | # 如果是相对路径,将其转换为相对于task_dir的绝对路径
136 | # 先尝试直接拼接,如果失败则使用resolve
137 | try:
138 | path_obj = (self.task_dir / path_obj).resolve()
139 | except (OSError, ValueError):
140 | # 如果resolve失败,至少确保是相对于task_dir的绝对路径
141 | path_obj = self.task_dir / path_obj
142 | path_obj = path_obj.resolve()
143 | else:
144 | # 如果已经是绝对路径,确保它是解析后的路径
145 | try:
146 | path_obj = path_obj.resolve()
147 | except (OSError, ValueError):
148 | # 如果resolve失败,保持原路径
149 | pass
150 |
151 | # 返回绝对路径的字符串表示(使用正斜杠)
152 | return path_obj.as_posix()
153 |
154 | @staticmethod
155 | def _format_folder_size_value(size_bytes: int) -> str:
156 | """将字节大小转为人类可读格式(智能选择合适的单位)"""
157 | if size_bytes <= 0:
158 | return "0 B"
159 |
160 | units = ["B", "KB", "MB", "GB", "TB"]
161 | import math
162 |
163 | # 计算应该使用的单位索引
164 | if size_bytes < 1024:
165 | # 小于1KB,使用B
166 | return f"{size_bytes} B"
167 | else:
168 | # 计算单位索引(使用对数避免循环)
169 | idx = min(int(math.floor(math.log(size_bytes, 1024))), len(units) - 1)
170 | value = size_bytes / (1024 ** idx)
171 | # 根据大小选择合适的精度
172 | if idx == 0: # B
173 | return f"{int(value)} {units[idx]}"
174 | elif idx == 1: # KB
175 | return f"{value:.2f} {units[idx]}"
176 | elif idx == 2: # MB
177 | return f"{value:.2f} {units[idx]}"
178 | elif idx == 3: # GB
179 | return f"{value:.2f} {units[idx]}"
180 | else: # TB
181 | return f"{value:.2f} {units[idx]}"
182 |
183 | @staticmethod
184 | def parse_folder_size_value(size_value: str) -> int:
185 | """将带单位的大小字符串还原为字节"""
186 | if size_value is None:
187 | return 0
188 | # 统一格式,移除空格和千位分隔符,兼容“字节”中文单位
189 | normalized = str(size_value).strip().upper().replace('字节', 'B')
190 | normalized = normalized.replace(',', '')
191 | normalized = re.sub(r'\s+', '', normalized)
192 | if not normalized:
193 | return 0
194 |
195 | units = {
196 | 'B': 1,
197 | 'KB': 1024,
198 | 'MB': 1024 ** 2,
199 | 'GB': 1024 ** 3,
200 | 'TB': 1024 ** 4,
201 | }
202 | # 处理单字母后缀(如历史数据中的"1.2G")
203 | alias_map = {
204 | 'K': 'KB',
205 | 'M': 'MB',
206 | 'G': 'GB',
207 | 'T': 'TB',
208 | }
209 | for alias, full_unit in alias_map.items():
210 | if normalized.endswith(alias) and not normalized.endswith(full_unit):
211 | normalized = normalized[:-len(alias)] + full_unit
212 | break
213 |
214 | match = re.match(r'^([0-9]+(?:\.[0-9]+)?)(B|KB|MB|GB|TB)?$', normalized)
215 | if match:
216 | number_part = match.group(1)
217 | unit = match.group(2) or 'B'
218 | try:
219 | value = float(number_part) * units[unit]
220 | return int(value)
221 | except (KeyError, ValueError):
222 | return 0
223 |
224 | # 尽量解析任何纯数字形式
225 | try:
226 | return int(float(normalized))
227 | except ValueError:
228 | return 0
229 |
230 | def _normalize_csv_row_for_write(self, row: Dict[str, str]) -> Dict[str, str]:
231 | """确保写入CSV的数据格式统一"""
232 | normalized = row.copy()
233 | normalized['download_path'] = self._format_download_path(normalized.get('download_path', ''))
234 | size_value = self.parse_folder_size_value(normalized.get('folder_size', '0'))
235 | normalized['folder_size'] = self._format_folder_size_value(size_value)
236 |
237 | title_raw = str(normalized.get('title', '')).strip()
238 | name_raw = str(normalized.get('name', '')).strip()
239 | if not title_raw:
240 | path_candidate = Path(normalized['download_path'])
241 | title_raw = name_raw or path_candidate.name or "未命名视频"
242 | normalized['title'] = title_raw
243 | if not name_raw:
244 | normalized['name'] = title_raw
245 | else:
246 | normalized['name'] = name_raw
247 |
248 | return normalized
249 |
250 | def _detect_csv_encoding(self, file_path: Path) -> str:
251 | """智能检测CSV文件编码"""
252 | encodings = ['utf-8-sig', 'utf-8', 'gbk', 'gb2312']
253 |
254 | for encoding in encodings:
255 | try:
256 | with open(file_path, 'r', encoding=encoding) as f:
257 | # 尝试读取前几行
258 | f.readline()
259 | return encoding
260 | except UnicodeDecodeError:
261 | continue
262 | except Exception:
263 | continue
264 |
265 | # 如果都失败了,默认使用utf-8
266 | Logger.warning(f"无法检测CSV文件编码,使用默认utf-8: {file_path}")
267 | return 'utf-8'
268 |
269 | def _generate_csv_filename(self) -> str:
270 | """生成基于当前时间的CSV文件名"""
271 | now = datetime.now()
272 | return f"{now.strftime('%y-%m-%d-%H-%M')}.csv"
273 |
274 | def _find_latest_csv(self) -> Optional[Path]:
275 | """查找任务目录下最新的CSV文件"""
276 | pattern = str(self.task_dir / "??-??-??-??-??.csv")
277 | csv_files = glob.glob(pattern)
278 |
279 | if not csv_files:
280 | Logger.info(f"未找到现有的CSV文件:{self.task_dir}")
281 | return None
282 |
283 | # 按文件名排序,最新的在最后
284 | csv_files.sort()
285 | latest_file = Path(csv_files[-1])
286 | Logger.info(f"找到现有CSV文件:{latest_file.name}")
287 | return latest_file
288 |
289 | def save_video_list(self, videos: List[VideoInfo], original_url: Optional[str] = None) -> Path:
290 | """保存视频列表到CSV文件(仅用于新任务)"""
291 | csv_filename = self._generate_csv_filename()
292 | csv_path = self.task_dir / csv_filename
293 | temp_path = self.task_dir / f"temp_{csv_filename}"
294 |
295 | try:
296 | # 先写入临时文件,使用UTF-8-BOM编码确保Excel正确识别
297 | with open(temp_path, 'w', newline='', encoding='utf-8-sig') as f:
298 | # 第一行写入原始URL(如果提供)
299 | if original_url:
300 | f.write(f"# Original URL: {original_url}\n")
301 |
302 | writer = csv.DictWriter(f, fieldnames=[
303 | 'video_url', 'title', 'name', 'download_path', 'folder_size',
304 | 'downloaded', 'avid', 'cid', 'pubdate', 'status',
305 | 'is_multi_part', 'total_parts'
306 | ])
307 | writer.writeheader()
308 |
309 | for video in videos:
310 | writer.writerow(self._video_to_csv_row(video))
311 |
312 | # 验证临时文件写入成功后,移动到正式位置
313 | shutil.move(str(temp_path), str(csv_path))
314 | Logger.info(f"已保存视频列表到: {csv_path}")
315 | return csv_path
316 |
317 | except Exception as e:
318 | # 清理临时文件
319 | if temp_path.exists():
320 | temp_path.unlink()
321 | Logger.error(f"保存CSV文件失败: {e}")
322 | raise
323 |
324 | def update_video_list(self, new_videos: List[VideoInfo], original_url: str) -> Path:
325 | """更新现有的视频列表,合并新视频并保持已下载状态"""
326 | current_csv = self._find_latest_csv()
327 |
328 | if current_csv is None:
329 | # 如果没有现有CSV,直接创建新的
330 | return self.save_video_list(new_videos, original_url)
331 |
332 | try:
333 | # 读取现有数据
334 | existing_videos = self.load_video_list()
335 | if existing_videos is None:
336 | existing_videos = []
337 |
338 | # 创建现有视频的URL映射(保留下载状态)
339 | existing_video_map = {video['video_url']: video for video in existing_videos}
340 |
341 | # 生成新的CSV文件名(带时间戳)
342 | new_csv_filename = self._generate_csv_filename()
343 | new_csv_path = self.task_dir / new_csv_filename
344 | temp_path = self.task_dir / f"temp_{new_csv_filename}"
345 |
346 | merged_videos = []
347 | for video in new_videos:
348 | video_url, _ = self._get_video_url_and_identifier(video)
349 | if video_url in existing_video_map:
350 | existing_data = self._normalize_csv_row_for_write(existing_video_map[video_url])
351 | merged_videos.append(existing_data)
352 | else:
353 | merged_videos.append(self._video_to_csv_row(video))
354 |
355 | # 定义统一的字段列表
356 | fieldnames = [
357 | 'video_url', 'title', 'name', 'download_path', 'folder_size',
358 | 'downloaded', 'avid', 'cid', 'pubdate', 'status',
359 | 'is_multi_part', 'total_parts'
360 | ]
361 |
362 | # 安全写入新的CSV文件
363 | with open(temp_path, 'w', newline='', encoding='utf-8-sig') as f:
364 | # 写入原始URL
365 | f.write(f"# Original URL: {original_url}\n")
366 |
367 | if merged_videos:
368 | writer = csv.DictWriter(f, fieldnames=fieldnames)
369 | writer.writeheader()
370 | writer.writerows(merged_videos)
371 |
372 | # 验证写入成功后,替换原文件
373 | shutil.move(str(temp_path), str(new_csv_path))
374 |
375 | # 删除旧的CSV文件
376 | if current_csv != new_csv_path:
377 | current_csv.unlink()
378 | Logger.debug(f"已删除旧CSV文件: {current_csv.name}")
379 |
380 | Logger.info(f"已更新视频列表到: {new_csv_path}")
381 | return new_csv_path
382 |
383 | except Exception as e:
384 | # 清理临时文件
385 | if temp_path.exists():
386 | temp_path.unlink()
387 | Logger.error(f"更新CSV文件失败: {e}")
388 | raise
389 |
390 | def load_video_list(self) -> Optional[List[Dict[str, str]]]:
391 | """从CSV文件加载视频列表"""
392 | csv_path = self._find_latest_csv()
393 |
394 | if csv_path is None:
395 | return None
396 |
397 | try:
398 | # 智能检测文件编码
399 | encoding = self._detect_csv_encoding(csv_path)
400 |
401 | videos = []
402 | with open(csv_path, 'r', encoding=encoding) as f:
403 | # 跳过第一行的原始URL(如果存在)
404 | first_line = f.readline()
405 | if not first_line.startswith("# Original URL:"):
406 | # 如果第一行不是URL,重新回到文件开头
407 | f.seek(0)
408 |
409 | reader = csv.DictReader(f)
410 |
411 | # 验证CSV文件是否有标题行
412 | if not reader.fieldnames:
413 | Logger.error("CSV文件缺少标题行")
414 | return None
415 |
416 | # 检查必需字段
417 | required_fields = ['video_url', 'title', 'downloaded']
418 | missing_fields = [field for field in required_fields if field not in reader.fieldnames]
419 | if missing_fields:
420 | Logger.error(f"CSV文件缺少必需字段: {missing_fields}")
421 | Logger.error(f"当前字段: {list(reader.fieldnames)}")
422 | return None
423 |
424 | row_count = 0
425 | error_rows = 0
426 | missing_title_count = 0
427 |
428 | for row_num, row in enumerate(reader, start=2): # 从第2行开始计数(考虑标题行)
429 | try:
430 | row_count += 1
431 |
432 | # 验证关键字段不为空
433 | if not row.get('video_url', '').strip():
434 | Logger.warning(f"第{row_num}行:video_url为空,跳过")
435 | error_rows += 1
436 | continue
437 |
438 | if not row.get('title', '').strip():
439 | fallback_source = row.get('name', '') or row.get('download_path', '')
440 | if fallback_source:
441 | fallback_title = Path(str(fallback_source)).name or f"未命名视频_{row_count}"
442 | else:
443 | fallback_title = f"未命名视频_{row_count}"
444 | row['title'] = fallback_title
445 | missing_title_count += 1
446 | # 不再输出每行的详细日志,只在汇总时输出一次,避免大量重复日志
447 |
448 | # 确保所有必需字段都存在,为缺失字段设置默认值
449 | row.setdefault('is_multi_part', 'False')
450 | row.setdefault('total_parts', '1')
451 | row.setdefault('status', 'normal')
452 | row.setdefault('downloaded', 'False')
453 | row.setdefault('name', row.get('title', ''))
454 | row.setdefault('download_path', '')
455 | row.setdefault('folder_size', '0')
456 | row.setdefault('avid', '')
457 | row.setdefault('cid', '')
458 | row.setdefault('pubdate', '')
459 |
460 | row['download_path'] = self._format_download_path(row['download_path'])
461 | folder_size_bytes = self.parse_folder_size_value(row['folder_size'])
462 | row['folder_size'] = self._format_folder_size_value(folder_size_bytes)
463 |
464 | # 验证和修复数据格式
465 | self._validate_and_fix_row_data(row, row_num)
466 |
467 | videos.append(row)
468 |
469 | except Exception as e:
470 | error_rows += 1
471 | Logger.warning(f"第{row_num}行数据处理失败,跳过: {e}")
472 | continue
473 |
474 | if missing_title_count > 0:
475 | Logger.info(f"CSV文件中有 {missing_title_count} 行缺少标题,已自动填充")
476 | if error_rows > 0:
477 | Logger.warning(f"CSV文件中有 {error_rows} 行数据存在问题")
478 |
479 | if not videos:
480 | Logger.warning("CSV文件中没有有效的视频记录")
481 | return []
482 |
483 | Logger.info(f"从CSV文件加载了 {len(videos)} 个视频记录")
484 | return videos
485 |
486 | except UnicodeDecodeError as e:
487 | Logger.error(f"CSV文件编码错误: {e}")
488 | Logger.error("建议:检查文件编码格式,支持的编码: UTF-8, GBK, GB2312")
489 | return None
490 | except csv.Error as e:
491 | Logger.error(f"CSV文件格式错误: {e}")
492 | Logger.error("建议:检查CSV文件格式,确保使用正确的分隔符和引号")
493 | return None
494 | except FileNotFoundError:
495 | Logger.error(f"CSV文件不存在: {csv_path}")
496 | return None
497 | except PermissionError:
498 | Logger.error(f"没有权限读取CSV文件: {csv_path}")
499 | return None
500 | except Exception as e:
501 | Logger.error(f"读取CSV文件失败: {e}")
502 | Logger.error(f"文件路径: {csv_path}")
503 | return None
504 |
505 | def _validate_and_fix_row_data(self, row: Dict[str, str], row_num: int) -> None:
506 | """验证和修复行数据"""
507 | # 验证video_url格式
508 | video_url = row.get('video_url', '')
509 | if video_url and not (video_url.startswith('http') and 'bilibili.com' in video_url):
510 | Logger.warning(f"第{row_num}行:video_url格式可能不正确: {video_url}")
511 |
512 | # 验证和修复downloaded字段
513 | downloaded = row.get('downloaded', '').lower()
514 | if downloaded not in ['true', 'false']:
515 | Logger.warning(f"第{row_num}行:downloaded字段值不正确 '{row['downloaded']}',设置为False")
516 | row['downloaded'] = 'False'
517 | else:
518 | row['downloaded'] = 'True' if downloaded == 'true' else 'False'
519 |
520 | # 验证和修复is_multi_part字段
521 | is_multi_part = row.get('is_multi_part', '').lower()
522 | if is_multi_part not in ['true', 'false']:
523 | if is_multi_part: # 如果有值但不是true/false
524 | Logger.warning(f"第{row_num}行:is_multi_part字段值不正确 '{row['is_multi_part']}',设置为False")
525 | row['is_multi_part'] = 'False'
526 | else:
527 | row['is_multi_part'] = 'True' if is_multi_part == 'true' else 'False'
528 |
529 | # 验证和修复total_parts字段
530 | total_parts = row.get('total_parts', '1')
531 | try:
532 | parts_num = int(total_parts)
533 | if parts_num < 1:
534 | Logger.warning(f"第{row_num}行:total_parts值不正确 '{total_parts}',设置为1")
535 | row['total_parts'] = '1'
536 | except ValueError:
537 | Logger.warning(f"第{row_num}行:total_parts不是数字 '{total_parts}',设置为1")
538 | row['total_parts'] = '1'
539 |
540 | folder_size = row.get('folder_size', '0')
541 | size_value = self.parse_folder_size_value(folder_size)
542 | if size_value < 0:
543 | Logger.warning(f"第{row_num}行:folder_size为负数,设置为0")
544 | size_value = 0
545 | row['folder_size'] = self._format_folder_size_value(size_value)
546 |
547 | def get_pending_videos(self) -> Optional[List[Dict[str, str]]]:
548 | """获取未下载的视频列表"""
549 | videos = self.load_video_list()
550 | if videos is None:
551 | return None
552 |
553 | pending_videos = [v for v in videos if v['downloaded'].lower() != 'true']
554 |
555 | if pending_videos:
556 | Logger.info(f"发现 {len(pending_videos)} 个未下载的视频")
557 | else:
558 | Logger.info("所有视频已下载完成")
559 |
560 | return pending_videos
561 |
562 | def mark_video_downloaded(self, video_url: str, folder_size: Optional[int] = None) -> None:
563 | """标记视频为已下载并更新CSV文件"""
564 | current_csv = self._find_latest_csv()
565 |
566 | if current_csv is None:
567 | Logger.warning("未找到CSV文件,无法标记下载状态")
568 | return
569 |
570 | try:
571 | # 智能检测文件编码
572 | encoding = self._detect_csv_encoding(current_csv)
573 |
574 | # 读取现有数据
575 | videos = []
576 | url_line = None
577 | with open(current_csv, 'r', encoding=encoding) as f:
578 | # 检查第一行是否为原始URL
579 | first_line = f.readline()
580 | if first_line.startswith("# Original URL:"):
581 | url_line = first_line
582 | else:
583 | # 如果第一行不是URL,重新回到文件开头
584 | f.seek(0)
585 |
586 | reader = csv.DictReader(f)
587 | for row in reader:
588 | row.setdefault('is_multi_part', 'False')
589 | row.setdefault('total_parts', '1')
590 | row.setdefault('status', 'normal')
591 | # 保留原有的folder_size值,不要用setdefault覆盖
592 | if 'folder_size' not in row or not row['folder_size']:
593 | row['folder_size'] = '0'
594 |
595 | # 先保存原有的folder_size值(如果存在且有效)
596 | existing_folder_size = row.get('folder_size', '0')
597 | existing_size_bytes = self.parse_folder_size_value(existing_folder_size)
598 |
599 | normalized_row = self._normalize_csv_row_for_write(row)
600 |
601 | if normalized_row['video_url'] == video_url:
602 | normalized_row['downloaded'] = 'True'
603 | if folder_size is not None:
604 | # 更新当前视频的folder_size
605 | normalized_row['folder_size'] = self._format_folder_size_value(folder_size)
606 | elif existing_size_bytes > 0:
607 | # 如果当前视频没有新的folder_size,但已有值,保留原有值
608 | normalized_row['folder_size'] = self._format_folder_size_value(existing_size_bytes)
609 | else:
610 | # 对于其他视频,保留原有的folder_size值
611 | if existing_size_bytes > 0:
612 | normalized_row['folder_size'] = self._format_folder_size_value(existing_size_bytes)
613 |
614 | videos.append(normalized_row)
615 |
616 | # 生成新的CSV文件名
617 | new_csv_filename = self._generate_csv_filename()
618 | new_csv_path = self.task_dir / new_csv_filename
619 | temp_path = self.task_dir / f"temp_{new_csv_filename}"
620 |
621 | # 定义统一的字段列表
622 | fieldnames = [
623 | 'video_url', 'title', 'name', 'download_path', 'folder_size',
624 | 'downloaded', 'avid', 'cid', 'pubdate', 'status',
625 | 'is_multi_part', 'total_parts'
626 | ]
627 |
628 | # 先写入临时文件,使用UTF-8-BOM编码确保Excel正确识别
629 | with open(temp_path, 'w', newline='', encoding='utf-8-sig') as f:
630 | # 写入原始URL行(如果存在)
631 | if url_line:
632 | f.write(url_line)
633 |
634 | if videos:
635 | writer = csv.DictWriter(f, fieldnames=fieldnames)
636 | writer.writeheader()
637 | writer.writerows(videos)
638 |
639 | # 验证写入成功后,替换文件
640 | shutil.move(str(temp_path), str(new_csv_path))
641 |
642 | # 删除旧的CSV文件
643 | if current_csv != new_csv_path:
644 | current_csv.unlink()
645 | Logger.debug(f"已删除旧CSV文件: {current_csv.name}")
646 |
647 | Logger.debug(f"已更新CSV文件并标记下载: {video_url}")
648 |
649 | except Exception as e:
650 | # 清理临时文件
651 | temp_path = self.task_dir / f"temp_{self._generate_csv_filename()}"
652 | if temp_path.exists():
653 | temp_path.unlink()
654 | Logger.error(f"更新CSV文件失败: {e}")
655 |
656 | def get_download_stats(self) -> Dict[str, int]:
657 | """获取下载统计信息"""
658 | videos = self.load_video_list()
659 | if not videos:
660 | return {'total': 0, 'downloaded': 0, 'pending': 0}
661 |
662 | total = len(videos)
663 | downloaded = sum(1 for v in videos if v['downloaded'].lower() == 'true')
664 | pending = total - downloaded
665 |
666 | return {
667 | 'total': total,
668 | 'downloaded': downloaded,
669 | 'pending': pending
670 | }
671 |
672 | def get_existing_video_urls(self) -> set:
673 | """获取现有视频的URL集合,用于增量获取时的查重"""
674 | videos = self.load_video_list()
675 | if not videos:
676 | return set()
677 |
678 | # 提取所有video_url
679 | existing_urls = set()
680 | for video in videos:
681 | video_url = video.get('video_url', '').strip()
682 | if video_url:
683 | existing_urls.add(video_url)
684 |
685 | Logger.debug(f"现有视频URL数量: {len(existing_urls)}")
686 | return existing_urls
687 |
688 | def get_original_url(self) -> Optional[str]:
689 | """从CSV文件中获取原始URL"""
690 | csv_path = self._find_latest_csv()
691 |
692 | if csv_path is None:
693 | return None
694 |
695 | try:
696 | # 智能检测文件编码
697 | encoding = self._detect_csv_encoding(csv_path)
698 |
699 | with open(csv_path, 'r', encoding=encoding) as f:
700 | first_line = f.readline().strip()
701 | if first_line.startswith("# Original URL:"):
702 | return first_line[15:].strip() # 去掉"# Original URL:"前缀
703 | return None
704 |
705 | except Exception as e:
706 | Logger.error(f"读取原始URL失败: {e}")
707 | return None
708 |
709 | def update_video_info(self, video_url: str, updated_info: Dict[str, str]) -> None:
710 | """更新视频的详细信息"""
711 | current_csv = self._find_latest_csv()
712 |
713 | if current_csv is None:
714 | Logger.warning("未找到CSV文件,无法更新视频信息")
715 | return
716 |
717 | try:
718 | # 智能检测文件编码
719 | encoding = self._detect_csv_encoding(current_csv)
720 |
721 | # 读取现有数据
722 | videos = []
723 | url_line = None
724 | with open(current_csv, 'r', encoding=encoding) as f:
725 | # 检查第一行是否为原始URL
726 | first_line = f.readline()
727 | if first_line.startswith("# Original URL:"):
728 | url_line = first_line
729 | else:
730 | # 如果第一行不是URL,重新回到文件开头
731 | f.seek(0)
732 |
733 | reader = csv.DictReader(f)
734 | for row in reader:
735 | row.setdefault('is_multi_part', 'False')
736 | row.setdefault('total_parts', '1')
737 | row.setdefault('status', 'normal')
738 | row.setdefault('folder_size', '0')
739 |
740 | normalized_row = self._normalize_csv_row_for_write(row)
741 | if normalized_row['video_url'] == video_url:
742 | updated_copy = updated_info.copy()
743 | if 'download_path' in updated_copy:
744 | updated_copy['download_path'] = self._format_download_path(updated_copy['download_path'])
745 | if 'folder_size' in updated_copy:
746 | size_bytes = self.parse_folder_size_value(str(updated_copy['folder_size']))
747 | updated_copy['folder_size'] = self._format_folder_size_value(size_bytes)
748 | normalized_row.update(updated_copy)
749 | normalized_row = self._normalize_csv_row_for_write(normalized_row)
750 | videos.append(normalized_row)
751 |
752 | # 生成新的CSV文件名
753 | new_csv_filename = self._generate_csv_filename()
754 | new_csv_path = self.task_dir / new_csv_filename
755 | temp_path = self.task_dir / f"temp_{new_csv_filename}"
756 |
757 | # 定义统一的字段列表
758 | fieldnames = [
759 | 'video_url', 'title', 'name', 'download_path', 'folder_size',
760 | 'downloaded', 'avid', 'cid', 'pubdate', 'status',
761 | 'is_multi_part', 'total_parts'
762 | ]
763 |
764 | # 先写入临时文件,使用UTF-8-BOM编码确保Excel正确识别
765 | with open(temp_path, 'w', newline='', encoding='utf-8-sig') as f:
766 | # 写入原始URL行(如果存在)
767 | if url_line:
768 | f.write(url_line)
769 |
770 | if videos:
771 | writer = csv.DictWriter(f, fieldnames=fieldnames)
772 | writer.writeheader()
773 | writer.writerows(videos)
774 |
775 | # 验证写入成功后,替换文件
776 | shutil.move(str(temp_path), str(new_csv_path))
777 |
778 | # 删除旧的CSV文件
779 | if current_csv != new_csv_path:
780 | current_csv.unlink()
781 | Logger.debug(f"已删除旧CSV文件: {current_csv.name}")
782 |
783 | Logger.debug(f"已更新视频信息: {video_url}")
784 |
785 | except Exception as e:
786 | # 清理临时文件
787 | temp_path = self.task_dir / f"temp_{self._generate_csv_filename()}"
788 | if temp_path.exists():
789 | temp_path.unlink()
790 | Logger.error(f"更新视频信息失败: {e}")
791 |
--------------------------------------------------------------------------------