├── .gitignore
├── BilibiliMangaDownload.py
├── LICENSE
├── README.md
├── cli.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | Download/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | env/
 89 | venv/
 90 | ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | # data
108 | sessdata.txt
109 | downloads
110 | .idea


--------------------------------------------------------------------------------
/BilibiliMangaDownload.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import zipfile
  5 | from io import BytesIO
  6 | from pathlib import Path
  7 | import requests
  8 | from tenacity import retry, stop_after_attempt
  9 | from time import sleep
 10 | from tqdm import tqdm
 11 | from urllib3.exceptions import InsecureRequestWarning
 12 | from typing import Literal, List
 13 | 
 14 | requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
 15 | 
 16 | URL_DETAIL = "https://manga.bilibili.com/twirp/comic.v2.Comic/ComicDetail?device=pc&platform=web"
 17 | URL_IMAGE_INDEX = "https://manga.bilibili.com/twirp/comic.v1.Comic/GetImageIndex?device=pc&platform=web"
 18 | URL_MANGA_HOST = "https://manga.hdslb.com"
 19 | URL_IMAGE_TOKEN = "https://manga.bilibili.com/twirp/comic.v1.Comic/ImageToken?device=pc&platform=web"
 20 | 
 21 | cookies = {}
 22 | 
 23 | # TODO: 支持连续下载
 24 | # TODO: 支持文件设置 session_data
 25 | # TODO: session 过期支持
 26 | # TODO: 下载的照片格式命名
 27 | 
 28 | 
 29 | @retry(stop=stop_after_attempt(3))
 30 | def download_image(url: str, path: str):
 31 |     r = requests.get(url, cookies=cookies, verify=False)
 32 |     with open(path, 'wb') as f:
 33 |         f.write(r.content)
 34 | 
 35 | 
 36 | def get_manga_info(comic_id: int):
 37 |     r = requests.post(URL_DETAIL, data={'comic_id': comic_id}, cookies=cookies)
 38 |     if r.status_code == 200:
 39 |         return r.json()
 40 |     else:
 41 |         raise Exception("未找到该漫画ID")
 42 | 
 43 | 
 44 | def get_images(comic_id: int, ep_id: int):
 45 |     data = requests.post(URL_IMAGE_INDEX, data={'ep_id': ep_id}, cookies=cookies).json()['data']
 46 |     data = bytearray(requests.get(data['host'] + data['path']).content[9:])
 47 |     key = [ep_id & 0xff, ep_id >> 8 & 0xff, ep_id >> 16 & 0xff, ep_id >> 24 & 0xff, comic_id & 0xff,
 48 |            comic_id >> 8 & 0xff,
 49 |            comic_id >> 16 & 0xff, comic_id >> 24 & 0xff]
 50 |     for i in range(len(data)):
 51 |         data[i] ^= key[i % 8]
 52 |     file = BytesIO(data)
 53 |     zf = zipfile.ZipFile(file)
 54 |     data = json.loads(zf.read('index.dat'))
 55 |     zf.close()
 56 |     file.close()
 57 |     return data['pics']
 58 | 
 59 | 
 60 | def get_token(url: str):
 61 |     data = requests.post(URL_IMAGE_TOKEN, data={"urls": f'["{url}"]'}, cookies=cookies).json()["data"][0]
 62 |     return f'{data["url"]}?token={data["token"]}'
 63 | 
 64 | 
 65 | def filter_str(name):
 66 |     return re.sub(r'[\\/:*?"<>|]', '', name).strip().rstrip('.')
 67 | 
 68 | 
 69 | def download(comic_id: int, mode: Literal["ep", "ord"], ids: List[int], sessdata: str):
 70 |     cookies['SESSDATA'] = sessdata
 71 | 
 72 |     if not (Path.exists(Path("downloads"))):
 73 |         os.mkdir('downloads')
 74 | 
 75 |     mange_info = get_manga_info(comic_id)
 76 |     title = filter_str(mange_info["data"]["title"])
 77 |     ep_list = mange_info["data"]["ep_list"]
 78 | 
 79 |     print('[INFO]', title)
 80 | 
 81 |     if mode == "ep":
 82 |         ep_data_list = [data for data in ep_list if data["id"] in ids]
 83 |     else:
 84 |         ep_data_list = [data for data in ep_list if data["ord"] in ids]
 85 | 
 86 |     if len(ep_data_list) == 0:
 87 |         print(f'[ERROR] 未找到相应章节')
 88 |         return 0
 89 |     for ep_data in ep_data_list:
 90 |         if ep_data["is_locked"]:
 91 |             print(f"INFO 你尚未解锁第 {ep_data['short_title']} 话，无法下载")
 92 |             continue
 93 | 
 94 |         image_list = get_images(comic_id, ep_data["id"])
 95 |         print(f'[INFO] 第 {ep_data["short_title"]} 话开始下载')
 96 |         dir_path = Path(f'downloads/{title}/{filter_str(ep_data["short_title"]) + filter_str(ep_data["title"])}')
 97 |         dir_path.mkdir(parents=True, exist_ok=True)
 98 | 
 99 |         for index, image_url in enumerate(tqdm(image_list), 1):
100 |             full_url = get_token(image_url)
101 |             path = dir_path / f'{index:04d}.jpg'
102 |             if not path.exists():
103 |                 download_image(full_url, path)
104 | 
105 |         sleep(1)
106 | 
107 |     print("下载完毕")
108 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 renmu123
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 介绍
 2 | bilibili 漫画批量下载，付费内容可在购买登录后下载
 3 | 
 4 | # 安装
 5 | ## cli
 6 | 从 [release](https://github.com/renmu123/bilibili-manga-downloader/releases) 下载最新版cli
 7 | ```
 8 | bili-comic-download.exe --help
 9 | 
10 | # 下载漫画 id 为 27923 的 ep438701 话
11 | bili-comic-download.exe bili-comic-download 27923 --mode=ep --ids=438701
12 | 
13 | # 下载漫画 id 为 25514 的第1-5和7话
14 | bili-comic-download.exe bili-comic-download 25514 --mode=ord --ids=1-5,7
15 | ```
16 | 
17 | ## 源码
18 | ```
19 | git clone https://github.com/renmu123/bilibili-manga-downloader
20 | cd bilibili-manga-downloader
21 | pip install -r requirements.txt
22 | python cli.py bili-comic-download --help
23 | ```
24 | 
25 | # 使用
26 | ```
27 | bili-comic-download.exe bili-comic-download 35514
28 | # 35514 是某个 comic 的 id
29 | # 然后根据提示处理就可以了
30 | # 
31 | ```
32 | 
33 | # 概念解释
34 | ep模式是指后面输入的 id 为漫画的章节号  
35 | ord模式是指后面输入的 id 为漫画的序号
36 | 
37 | # 获取 comic_id & ep
38 | 运行后根据提示操作 例如漫画章节页url https://manga.bilibili.com/mc26742/334263 中
39 | 
40 | mc号（漫画id）为26742 章节号为334263
41 | 
42 | ## 啥是 ord？
43 | ord 理论上就是漫画的序号，比如第一话就是1，但有时候还会有作者的节日插画，我测试过《高木同学》的漫画，基本都是对应的，如果有不对应，欢迎提 issue
44 | 
45 | # 获取 sessdata
46 | 
47 | 如要下载的内容中包含付费章节，则需输入SESSDATA，请按以下方式获取SESSDATA
48 | 
49 | 1.在浏览器中登录biliibli漫画，并购买好相应的章节
50 | 
51 | 2.找到Cookie中的SESSDATA，复制其内容。
52 | 
53 | ![image.png](https://i.loli.net/2020/10/26/RBhmXZdl9jJC7pw.png)
54 | 
55 | **也可以在该 exe 文件夹下新建 sessdata.txt，将上述的 sessdata 复制到其中，可以避免每次都要复制**
56 | 
57 | 图片解析部分来自 https://github.com/flaribbit/bilibili-manga-spider
58 | 
59 | # TODOS
60 | - [x] 支持同时下载多个
61 | - [x] 支持文件设置 session_data
62 | - [ ] session 过期支持
63 | - [x] 下载的照片格式自动补位
64 | - [x] 未解锁的漫画下载提醒
65 | - [ ] 不进行重复下载


--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
 1 | from cleo import Application, Command
 2 | from BilibiliMangaDownload import download
 3 | from pathlib import Path
 4 | from loguru import logger
 5 | 
 6 | 
 7 | class Download(Command):
 8 |     """
 9 |     bilibili 漫画下载命令行工具
10 | 
11 |     bili-comic-download
12 |         {comicId : 想要下载的漫画mc号?}
13 |         {--m|mode= : 下载模式，ep: 根据ep_id来进行下载, ord: 根据顺序来进行下载?}
14 |         {--e|ids= : ep_id 或者 ord_id}
15 |         {--s|sessdata= : 如果要下载已购买的漫画，这个参数是必要的}
16 |     """
17 | 
18 |     def handle(self):
19 |         try:
20 |             comic_id = int(self.argument('comicId'))
21 |         except ValueError as e:
22 |             self.line("comicId只能为数字")
23 |             raise e
24 |         if mode := self.option('mode'):
25 |             if mode not in {"ep", "ord"}:
26 |                 mode = self.choice("1选择下载模式", ["ep", "ord"])
27 |         else:
28 |             mode = self.choice("选择下载模式", ["ep", "ord"])
29 | 
30 |         ids = self.option('ids')
31 |         id_array = []
32 |         if not ids:
33 |             # TODO: ask 增加验证
34 |             ids = self.ask('请输入需要下载的漫画章节，支持用“,”分隔：\n > ')
35 |         try:
36 |             for split_ids in ids.split(","):
37 |                 split_id_array = split_ids.split("-")
38 |                 if len(split_id_array) == 2:
39 |                     start, end = int(split_id_array[0]), int(split_id_array[1])
40 |                     id_array += [i for i in range(start, end + 1)]
41 |                 else:
42 |                     id_array.append(int(split_id_array[0]))
43 |         except Exception as e:
44 |             logger.error(str(e), ids, mode)
45 |             self.line("输入的ids格式有误，请重新输入")
46 |             raise e
47 | 
48 |         sessdata = self.option("sessdata")
49 |         if not sessdata:
50 |             sessdata_path = Path("sessdata.txt")
51 |             if Path.exists(Path(sessdata_path)):
52 |                 with open(sessdata_path, "r") as f:
53 |                     sessdata = f.read()
54 |             else:
55 |                 sessdata = self.ask('请输入sessdata（如果下载的漫画无需登录即可下载，可以留空），获取方式请查询文档：\n > ')
56 | 
57 |         download(comic_id, mode, id_array, sessdata)
58 | 
59 | 
60 | application = Application("bili-comic-download", "0.91")
61 | application.add(Download())
62 | 
63 | if __name__ == '__main__':
64 |     application.run()
65 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | altgraph==0.17
 2 | certifi==2020.12.5
 3 | chardet==4.0.0
 4 | cleo==0.8.1
 5 | clikit==0.6.2
 6 | colorama==0.4.4
 7 | crashtest==0.3.1
 8 | future==0.18.2
 9 | idna==2.10
10 | loguru==0.5.3
11 | pastel==0.2.1
12 | pefile==2019.4.18
13 | pyinstaller==4.2
14 | pyinstaller-hooks-contrib==2020.11
15 | pylev==1.3.0
16 | pywin32-ctypes==0.2.0
17 | requests==2.25.1
18 | six==1.15.0
19 | tenacity==6.3.1
20 | tqdm==4.57.0
21 | urllib3==1.26.3
22 | win32-setctime==1.0.3
23 | 


--------------------------------------------------------------------------------