├── .gitignore ├── BilibiliMangaDownload.py ├── LICENSE ├── README.md ├── cli.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | Download/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # data 108 | sessdata.txt 109 | downloads 110 | .idea -------------------------------------------------------------------------------- /BilibiliMangaDownload.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import zipfile 5 | from io import BytesIO 6 | from pathlib import Path 7 | import requests 8 | from tenacity import retry, stop_after_attempt 9 | from time import sleep 10 | from tqdm import tqdm 11 | from urllib3.exceptions import InsecureRequestWarning 12 | from typing import Literal, List 13 | 14 | requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) 15 | 16 | URL_DETAIL = "https://manga.bilibili.com/twirp/comic.v2.Comic/ComicDetail?device=pc&platform=web" 17 | URL_IMAGE_INDEX = "https://manga.bilibili.com/twirp/comic.v1.Comic/GetImageIndex?device=pc&platform=web" 18 | URL_MANGA_HOST = "https://manga.hdslb.com" 19 | URL_IMAGE_TOKEN = "https://manga.bilibili.com/twirp/comic.v1.Comic/ImageToken?device=pc&platform=web" 20 | 21 | cookies = {} 22 | 23 | # TODO: 支持连续下载 24 | # TODO: 支持文件设置 session_data 25 | # TODO: session 过期支持 26 | # TODO: 下载的照片格式命名 27 | 28 | 29 | @retry(stop=stop_after_attempt(3)) 30 | def download_image(url: str, path: str): 31 | r = requests.get(url, cookies=cookies, verify=False) 32 | with open(path, 'wb') as f: 33 | f.write(r.content) 34 | 35 | 36 | def get_manga_info(comic_id: int): 37 | r = requests.post(URL_DETAIL, data={'comic_id': comic_id}, cookies=cookies) 38 | if r.status_code == 200: 39 | return r.json() 40 | else: 41 | raise Exception("未找到该漫画ID") 42 | 43 | 44 | def get_images(comic_id: int, ep_id: int): 45 | data = requests.post(URL_IMAGE_INDEX, data={'ep_id': ep_id}, cookies=cookies).json()['data'] 46 | data = bytearray(requests.get(data['host'] + data['path']).content[9:]) 47 | key = [ep_id & 0xff, ep_id >> 8 & 0xff, ep_id >> 16 & 0xff, ep_id >> 24 & 0xff, comic_id & 0xff, 48 | comic_id >> 8 & 0xff, 49 | comic_id >> 16 & 0xff, comic_id >> 24 & 0xff] 50 | for i in range(len(data)): 51 | data[i] ^= key[i % 8] 52 | file = BytesIO(data) 53 | zf = zipfile.ZipFile(file) 54 | data = json.loads(zf.read('index.dat')) 55 | zf.close() 56 | file.close() 57 | return data['pics'] 58 | 59 | 60 | def get_token(url: str): 61 | data = requests.post(URL_IMAGE_TOKEN, data={"urls": f'["{url}"]'}, cookies=cookies).json()["data"][0] 62 | return f'{data["url"]}?token={data["token"]}' 63 | 64 | 65 | def filter_str(name): 66 | return re.sub(r'[\\/:*?"<>|]', '', name).strip().rstrip('.') 67 | 68 | 69 | def download(comic_id: int, mode: Literal["ep", "ord"], ids: List[int], sessdata: str): 70 | cookies['SESSDATA'] = sessdata 71 | 72 | if not (Path.exists(Path("downloads"))): 73 | os.mkdir('downloads') 74 | 75 | mange_info = get_manga_info(comic_id) 76 | title = filter_str(mange_info["data"]["title"]) 77 | ep_list = mange_info["data"]["ep_list"] 78 | 79 | print('[INFO]', title) 80 | 81 | if mode == "ep": 82 | ep_data_list = [data for data in ep_list if data["id"] in ids] 83 | else: 84 | ep_data_list = [data for data in ep_list if data["ord"] in ids] 85 | 86 | if len(ep_data_list) == 0: 87 | print(f'[ERROR] 未找到相应章节') 88 | return 0 89 | for ep_data in ep_data_list: 90 | if ep_data["is_locked"]: 91 | print(f"INFO 你尚未解锁第 {ep_data['short_title']} 话,无法下载") 92 | continue 93 | 94 | image_list = get_images(comic_id, ep_data["id"]) 95 | print(f'[INFO] 第 {ep_data["short_title"]} 话开始下载') 96 | dir_path = Path(f'downloads/{title}/{filter_str(ep_data["short_title"]) + filter_str(ep_data["title"])}') 97 | dir_path.mkdir(parents=True, exist_ok=True) 98 | 99 | for index, image_url in enumerate(tqdm(image_list), 1): 100 | full_url = get_token(image_url) 101 | path = dir_path / f'{index:04d}.jpg' 102 | if not path.exists(): 103 | download_image(full_url, path) 104 | 105 | sleep(1) 106 | 107 | print("下载完毕") 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 renmu123 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 介绍 2 | bilibili 漫画批量下载,付费内容可在购买登录后下载 3 | 4 | # 安装 5 | ## cli 6 | 从 [release](https://github.com/renmu123/bilibili-manga-downloader/releases) 下载最新版cli 7 | ``` 8 | bili-comic-download.exe --help 9 | 10 | # 下载漫画 id 为 27923 的 ep438701 话 11 | bili-comic-download.exe bili-comic-download 27923 --mode=ep --ids=438701 12 | 13 | # 下载漫画 id 为 25514 的第1-5和7话 14 | bili-comic-download.exe bili-comic-download 25514 --mode=ord --ids=1-5,7 15 | ``` 16 | 17 | ## 源码 18 | ``` 19 | git clone https://github.com/renmu123/bilibili-manga-downloader 20 | cd bilibili-manga-downloader 21 | pip install -r requirements.txt 22 | python cli.py bili-comic-download --help 23 | ``` 24 | 25 | # 使用 26 | ``` 27 | bili-comic-download.exe bili-comic-download 35514 28 | # 35514 是某个 comic 的 id 29 | # 然后根据提示处理就可以了 30 | # 31 | ``` 32 | 33 | # 概念解释 34 | ep模式是指后面输入的 id 为漫画的章节号 35 | ord模式是指后面输入的 id 为漫画的序号 36 | 37 | # 获取 comic_id & ep 38 | 运行后根据提示操作 例如漫画章节页url https://manga.bilibili.com/mc26742/334263 中 39 | 40 | mc号(漫画id)为26742 章节号为334263 41 | 42 | ## 啥是 ord? 43 | ord 理论上就是漫画的序号,比如第一话就是1,但有时候还会有作者的节日插画,我测试过《高木同学》的漫画,基本都是对应的,如果有不对应,欢迎提 issue 44 | 45 | # 获取 sessdata 46 | 47 | 如要下载的内容中包含付费章节,则需输入SESSDATA,请按以下方式获取SESSDATA 48 | 49 | 1.在浏览器中登录biliibli漫画,并购买好相应的章节 50 | 51 | 2.找到Cookie中的SESSDATA,复制其内容。 52 | 53 | ![image.png](https://i.loli.net/2020/10/26/RBhmXZdl9jJC7pw.png) 54 | 55 | **也可以在该 exe 文件夹下新建 sessdata.txt,将上述的 sessdata 复制到其中,可以避免每次都要复制** 56 | 57 | 图片解析部分来自 https://github.com/flaribbit/bilibili-manga-spider 58 | 59 | # TODOS 60 | - [x] 支持同时下载多个 61 | - [x] 支持文件设置 session_data 62 | - [ ] session 过期支持 63 | - [x] 下载的照片格式自动补位 64 | - [x] 未解锁的漫画下载提醒 65 | - [ ] 不进行重复下载 -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | from cleo import Application, Command 2 | from BilibiliMangaDownload import download 3 | from pathlib import Path 4 | from loguru import logger 5 | 6 | 7 | class Download(Command): 8 | """ 9 | bilibili 漫画下载命令行工具 10 | 11 | bili-comic-download 12 | {comicId : 想要下载的漫画mc号?} 13 | {--m|mode= : 下载模式,ep: 根据ep_id来进行下载, ord: 根据顺序来进行下载?} 14 | {--e|ids= : ep_id 或者 ord_id} 15 | {--s|sessdata= : 如果要下载已购买的漫画,这个参数是必要的} 16 | """ 17 | 18 | def handle(self): 19 | try: 20 | comic_id = int(self.argument('comicId')) 21 | except ValueError as e: 22 | self.line("comicId只能为数字") 23 | raise e 24 | if mode := self.option('mode'): 25 | if mode not in {"ep", "ord"}: 26 | mode = self.choice("1选择下载模式", ["ep", "ord"]) 27 | else: 28 | mode = self.choice("选择下载模式", ["ep", "ord"]) 29 | 30 | ids = self.option('ids') 31 | id_array = [] 32 | if not ids: 33 | # TODO: ask 增加验证 34 | ids = self.ask('请输入需要下载的漫画章节,支持用“,”分隔:\n > ') 35 | try: 36 | for split_ids in ids.split(","): 37 | split_id_array = split_ids.split("-") 38 | if len(split_id_array) == 2: 39 | start, end = int(split_id_array[0]), int(split_id_array[1]) 40 | id_array += [i for i in range(start, end + 1)] 41 | else: 42 | id_array.append(int(split_id_array[0])) 43 | except Exception as e: 44 | logger.error(str(e), ids, mode) 45 | self.line("输入的ids格式有误,请重新输入") 46 | raise e 47 | 48 | sessdata = self.option("sessdata") 49 | if not sessdata: 50 | sessdata_path = Path("sessdata.txt") 51 | if Path.exists(Path(sessdata_path)): 52 | with open(sessdata_path, "r") as f: 53 | sessdata = f.read() 54 | else: 55 | sessdata = self.ask('请输入sessdata(如果下载的漫画无需登录即可下载,可以留空),获取方式请查询文档:\n > ') 56 | 57 | download(comic_id, mode, id_array, sessdata) 58 | 59 | 60 | application = Application("bili-comic-download", "0.91") 61 | application.add(Download()) 62 | 63 | if __name__ == '__main__': 64 | application.run() 65 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altgraph==0.17 2 | certifi==2020.12.5 3 | chardet==4.0.0 4 | cleo==0.8.1 5 | clikit==0.6.2 6 | colorama==0.4.4 7 | crashtest==0.3.1 8 | future==0.18.2 9 | idna==2.10 10 | loguru==0.5.3 11 | pastel==0.2.1 12 | pefile==2019.4.18 13 | pyinstaller==4.2 14 | pyinstaller-hooks-contrib==2020.11 15 | pylev==1.3.0 16 | pywin32-ctypes==0.2.0 17 | requests==2.25.1 18 | six==1.15.0 19 | tenacity==6.3.1 20 | tqdm==4.57.0 21 | urllib3==1.26.3 22 | win32-setctime==1.0.3 23 | --------------------------------------------------------------------------------