├── .github
    └── workflows
    │   ├── doc-deploy.yml
    │   ├── python-app.yml
    │   └── python-publish.yml
├── .gitignore
├── CONTRIBUTING.md
├── CONTRIBUTING_EN.md
├── LICENSE
├── README.md
├── bilix
    ├── __init__.py
    ├── __main__.py
    ├── _process.py
    ├── cli
    │   ├── assign.py
    │   └── main.py
    ├── download
    │   ├── base_downloader.py
    │   ├── base_downloader_m3u8.py
    │   ├── base_downloader_part.py
    │   └── utils.py
    ├── exception.py
    ├── ffmpeg.py
    ├── log.py
    ├── progress
    │   ├── abc.py
    │   ├── cli_progress.py
    │   └── ws_progress.py
    ├── sites
    │   ├── bilibili
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   ├── downloader.py
    │   │   ├── downloader_test.py
    │   │   ├── informer.py
    │   │   ├── informer_test.py
    │   │   ├── utils.py
    │   │   └── utils_test.py
    │   ├── cctv
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   └── downloader.py
    │   ├── douyin
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   ├── downloader.py
    │   │   └── downloader_test.py
    │   ├── hanime1
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   └── downloader.py
    │   ├── jable
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   └── downloader.py
    │   ├── tiktok
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   ├── downloader.py
    │   │   └── downloader_test.py
    │   ├── yhdmp
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   ├── downloader.py
    │   │   └── yhdmp.js
    │   ├── yinghuacd
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   └── downloader.py
    │   └── youtube
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── api_test.py
    │   │   └── downloader.py
    └── utils.py
├── docs
    ├── .vitepress
    │   ├── config.ts
    │   └── theme
    │   │   ├── index.ts
    │   │   └── style
    │   │       └── var.css
    ├── advance_guide.md
    ├── api_examples.md
    ├── async.md
    ├── download_examples.md
    ├── en
    │   ├── advance_guide.md
    │   ├── api_examples.md
    │   ├── async.md
    │   ├── download_examples.md
    │   ├── index.md
    │   ├── install.md
    │   ├── more.md
    │   └── quickstart.md
    ├── index.md
    ├── install.md
    ├── more.md
    ├── package-lock.json
    ├── package.json
    └── quickstart.md
├── examples
    ├── a_very_simple_example.py
    ├── download_by_timerange.py
    ├── limit_download_rate.py
    ├── multi_site_download_same_time.py
    ├── multi_type_tasks.py
    └── use_of_api.py
└── pyproject.toml


/.github/workflows/doc-deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Document Deploy
 2 | on:
 3 |   workflow_dispatch: { }
 4 |   push:
 5 |     paths:
 6 |       - 'docs/**'
 7 |     branches:
 8 |       - master
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       pages: write
14 |       id-token: write
15 |     environment:
16 |       name: github-pages
17 |       url: ${{ steps.deployment.outputs.page_url }}
18 |     steps:
19 |       - uses: actions/checkout@v3
20 |         with:
21 |           fetch-depth: 0
22 |       - uses: actions/setup-node@v3
23 |         with:
24 |           node-version: 16
25 |           cache: 'npm'
26 |           cache-dependency-path: docs/package-lock.json
27 |       - name: Install dependencies and build
28 |         run: |
29 |           npm ci
30 |           npm run docs:build
31 |         working-directory: docs
32 |       - uses: actions/configure-pages@v2
33 |       - uses: actions/upload-pages-artifact@v1
34 |         with:
35 |           path: docs/.vitepress/dist
36 |       - name: Deploy
37 |         id: deployment
38 |         uses: actions/deploy-pages@v1
39 | 


--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python application
 5 | 
 6 | on:
 7 |   push:
 8 |     paths:
 9 |       - '.github/workflows/python-app.yml'
10 |       - 'bilix/**'
11 |       - 'pyproject.toml'
12 |     branches: [ "master" ]
13 |   pull_request:
14 |     paths:
15 |       - '.github/workflows/python-app.yml'
16 |       - 'bilix/**'
17 |       - 'pyproject.toml'
18 |     branches: [ "master" ]
19 | 
20 | permissions:
21 |   contents: read
22 | 
23 | jobs:
24 |   build:
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       # You can use PyPy versions in python-version.
28 |       # For example, pypy-2.7 and pypy-3.8
29 |       matrix:
30 |         python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v3
34 |       - name: Set up Python ${{ matrix.python-version }}
35 |         uses: actions/setup-python@v4
36 |         with:
37 |           python-version: ${{ matrix.python-version }}
38 |       - name: Install dependencies
39 |         run: |
40 |           python -m pip install --upgrade pip
41 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
42 |           pip install -e .
43 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | .vscode
 3 | .fleet
 4 | .pytest_cache
 5 | videos
 6 | __pycache__/
 7 | *.egg-info/
 8 | *.pyc
 9 | venv*/
10 | build/
11 | dist/
12 | docs/.vitepress/dist
13 | docs/.vitepress/cache
14 | node_modules
15 | .venv
16 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # bilix 开发指南
  2 | 
  3 | 感谢你对贡献bilix有所兴趣，在你开始之前可以阅读下面的一些提示。请注意，bilix正快速迭代，
  4 | 如果你在阅读本文档时发现有些内容已经过时，请以master分支的代码为准。
  5 | 
  6 | # 开始之前
  7 | 
  8 | 在一切开始之前，你需要先 **fork** 本仓库，然后clone你fork的仓库到你的本地：
  9 | 
 10 | ```shell
 11 | git clone https://github.com/your_user_name/bilix
 12 | ```
 13 | 
 14 | 拉取至本地后，我**建议**你在独立的python环境中进行测试和开发，确认后进行本地源码可编辑安装：
 15 | 
 16 | ```shell
 17 | pip install -e .
 18 | ```
 19 | 
 20 | 试试bilix命令能否正常执行。通过测试了？至此，你可以在本地开发bilix了🍻
 21 | 
 22 | # bilix 结构
 23 | 
 24 | 在动手改动代码之前你需要对bilix的结构有一定的了解，下面是bilix的大致目录和各模块相应功能：
 25 | 
 26 | ```text
 27 | bilix
 28 | ├── __init__.py
 29 | ├── __main__.py
 30 | ├── _process.py  # 多进程相关
 31 | ├── cli
 32 | │   ├── assign.py  # 分配任务，动态导入相关
 33 | │   └── main.py    # 命令行入口
 34 | ├── download
 35 | │   ├── base_downloader.py
 36 | │   ├── base_downloader_m3u8.py  # 基础m3u8下载器
 37 | │   ├── base_downloader_part.py  # 基础分段文件下载器
 38 | │   └── utils.py                 # 下载相关的一些工具函数
 39 | ├── exception.py
 40 | ├── log.py
 41 | ├── progress
 42 | │   ├── abc.py            # 进度条抽象类
 43 | │   ├── cli_progress.py   # 命令行进度条
 44 | │   └── ws_progress.py
 45 | ├── serve
 46 | │   ├── __init__.py
 47 | │   ├── app.py
 48 | │   ├── auth.py
 49 | │   ├── serve.py
 50 | │   └── user.py
 51 | ├── sites     # 站点扩展目录，稍后介绍
 52 | └── utils.py  # 通用工具函数
 53 | ```
 54 | 
 55 | ## 基础下载器
 56 | 
 57 | bilix在`bilix.download`中提供了两种基础下载器，m3u8下载器和分段文件下载器。
 58 | 它们基于`httpx`乃至更底层的`asyncio`及IO多路复用，并且集成了速度控制，并发控制，断点续传，时间段切片，进度条显示等许多实用功能。
 59 | bilix的站点扩展下载功能都将基于这些基础下载器完成，基础下载器本身也提供cli服务
 60 | 
 61 | ## 下载器是如何提供cli服务的
 62 | 
 63 | 在bilix中，一个类只要实现了`handle`方法，就可以被注册到命令行（cli）中，`handle`方法的函数签名为
 64 | 
 65 | ```python
 66 | @classmethod
 67 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict):
 68 |     ...
 69 | ```
 70 | 
 71 | handle函数的实现应该满足下面三个原则：
 72 | 
 73 | 1. 如果类根据`method` `keys` `options`认为自己不应该承担下载任务，`handle`函数应该返回`None`
 74 | 2. 如果类可以承担任务，但发现`method`不在自己的可接受范围内，应该抛出`HandleMethodError`异常
 75 | 3. 如果类可以承担任务，且`method`在自己的可接受范围内，应该返回两个值，第一个值为下载器实例，第二个值为下载coroutine
 76 | 
 77 | Q：🙋为什么我看到有的下载器返回的是类本身，以及下载函数对象？
 78 | 
 79 | ```python
 80 | @classmethod
 81 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict):
 82 |     if method == 'f' or method == 'get_file':
 83 |         return cls, cls.get_file
 84 | ```
 85 | 
 86 | A：为了偷懒，如果返回值是类以及下载函数对象，将根据命令行参数及type hint自动组装为实例和coroutine，
 87 | 适用于当命令行options的名字和方法，类参数名字、类型一致的情况
 88 | 
 89 | 其实`handle`函数给你了较大的自由，你可以根据自己的需求，自由的组合出适合你的下载器的cli服务
 90 | 
 91 | ## 如何快速添加一个站点的支持
 92 | 
 93 | 在`bilix/sites`下，已经有一些站点的支持，如果你想要添加一个新的站点支持，可以按照下面的步骤进行：
 94 | 
 95 | 1. 在`sites`文件夹下新建一个站点文件夹，例如`example`
 96 | 2. 在`example`文件夹下添加站点的api模块`api.py`，仿照其他站点的格式实现从输入网页url到输出视频url，视频title的各种api
 97 | 3. 在`example`文件夹下添加站点api模块的测试`api_test.py`，让大家随时测试站点是否可用
 98 | 4. 在`example`文件夹下添加站点的下载器`donwloader.py`，定义`DownloaderExample`
 99 |    类，根据该站点使用的传输方法选择相应的`BaseDownloader`进行继承，然后在类中定义好下载视频的方法，并实现`handle`
100 |    方法。另外你还可以添加`downloader_test.py`来验证你的下载器是否可用
101 | 5. 在`example`文件夹下添加`__init__.py`，将`DownloaderExample`类导入，并且在`__all__`中添加`DownloaderExample`以方便bilix找到你的下载器
102 | 
103 | 搞定，使用bilix命令测试一下吧
104 | 
105 | 当前已经有其他开发者为bilix对其他站点的适配做出了贡献🎉，
106 | 或许被接受的[New site PR](https://github.com/HFrost0/bilix/pulls?q=is%3Apr+is%3Aclosed+label%3A%22New+site%22)也能为你提供帮助
107 | 
108 | 


--------------------------------------------------------------------------------
/CONTRIBUTING_EN.md:
--------------------------------------------------------------------------------
  1 | # Development guide of bilix
  2 | 
  3 | Thank you for your interest in contributing to bilix. Before you start, you can read some tips below.
  4 | Please note that bilix is rapidly iterating, if you find some content outdated while reading this document,
  5 | please refer to the code of the master branch.
  6 | 
  7 | # Before starting
  8 | 
  9 | Before everything starts, you need to first **fork** this repository, and then clone your fork:
 10 | 
 11 | ```shell
 12 | git clone https://github.com/your_user_name/bilix
 13 | ```
 14 | 
 15 | After clone, I **recommend** you to test and develop in an independent python environment,
 16 | and then perform local source editable installation after that:
 17 | 
 18 | ```shell
 19 | pip install -e .
 20 | ```
 21 | 
 22 | Try whether the `bilix` command can be executed normally. Passed the test? At this point,
 23 | you can develop bilix locally🍻
 24 | 
 25 | # Structure of bilix
 26 | 
 27 | Before making any changes to the code, you need to have some understanding of the structure of bilix.
 28 | 
 29 | ```text
 30 | bilix
 31 | ├── __init__.py
 32 | ├── __main__.py
 33 | ├── _process.py  # related to multiprocessing
 34 | ├── cli
 35 | │   ├── assign.py  # assign tasks, dynamically import related
 36 | │   └── main.py    # command line entry
 37 | ├── download
 38 | │   ├── base_downloader.py
 39 | │   ├── base_downloader_m3u8.py  # basic m3u8 downloader
 40 | │   ├── base_downloader_part.py  # basic segmented file downloader
 41 | │   └── utils.py                 # some utils for download
 42 | ├── exception.py
 43 | ├── log.py
 44 | ├── progress
 45 | │   ├── abc.py            # abstract class of progress
 46 | │   ├── cli_progress.py   # progress for cli
 47 | │   └── ws_progress.py
 48 | ├── serve
 49 | │   ├── __init__.py
 50 | │   ├── app.py
 51 | │   ├── auth.py
 52 | │   ├── serve.py
 53 | │   └── user.py
 54 | ├── sites     # site support
 55 | └── utils.py  # some utils
 56 | ```
 57 | 
 58 | # BaseDownloader
 59 | 
 60 | bilix provides two basic downloaders in `bilix.download`, m3u8 downloader and content range file downloader.
 61 | They are based on `httpx` and even lower-level `asyncio` and IO multiplexing, and integrate many practical functions
 62 | such as speed control, concurrency control, download resume, time range clip, and progress bar display.
 63 | The site extension of bilix will be based on these basic downloaders, and the basic downloaders
 64 | themselves also provide cli services
 65 | 
 66 | 
 67 | # How does the downloader provide cli service
 68 | 
 69 | In bilix, as long as a class implements the `handle` method, it can be registered in the command line interface (cli).
 70 | The function signature of the `handle` method is
 71 | 
 72 | ```python
 73 | @classmethod
 74 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict):
 75 |     ...
 76 | ```
 77 | 
 78 | The implementation of the `handle` function should meet the following three principles:
 79 | 
 80 | 1. If the class thinks that it should not be assigned the download task according to `method` `keys` `options`, the `handle` function should return `None`
 81 | 2. If the class can be assigned the task, but finds that the `method` is not within its acceptable range, it should raise a `HandleMethodError` exception
 82 | 3. If the class can handle the task, and `method` is within its acceptable range, it should return two values, the first value is the downloader instance, and the second value is the download coroutine
 83 | 
 84 | Q: 🙋Why do I see that some downloaders return the class itself and the download function object?
 85 | 
 86 | ```python
 87 | @classmethod
 88 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict):
 89 |     if method == 'f' or method == 'get_file':
 90 |         return cls, cls.get_file
 91 | ```
 92 | 
 93 | A: Just for easy, if the return value is a class and the function object, it will be automatically assembled into an
 94 | instance and coroutine according to the command line arguments, options and type hint.
 95 | 
 96 | 
 97 | # How to add support for a site
 98 | 
 99 | Under `bilix/sites`, there are already some sites supported, if you want to add a new site support, you can follow the steps below:
100 | 
101 | 1. Create a new site folder under the `sites` folder, such as `example`
102 | 2. Add the site's api module `api.py` under the `example` folder, and follow the format of other sites to implement various APIs from input webpage url to output video url and video title
103 | 3. Add the site api module test `api_test.py` under the `example` folder, so that everyone can test whether the site is available at any time
104 | 4. Add the site downloader `donwloader.py` under the `example` folder, define `DownloaderExample`
105 |    Class, select the corresponding `BaseDownloader` to inherit according to the site, then define the method of downloading the video in the class, and implement `handle`
106 |    method.
107 | 5. Add `__init__.py` under the `example` folder, import `DownloaderExample` class, and add `DownloaderExample` in `__all__` to facilitate bilix to find your downloader
108 | 
109 | Okay, let's test it
110 | 
111 | At present, other developers have contributed to the extension of bilix to other sites🎉,
112 | Maybe the accepted [New site PR](https://github.com/HFrost0/bilix/pulls?q=is%3Apr+is%3Aclosed+label%3A%22New+site%22) can also help you
113 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [HFrost0] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # bilix
 2 | 
 3 | [![GitHub license](https://img.shields.io/github/license/HFrost0/bilix?style=flat-square)](https://github.com/HFrost0/bilix/blob/master/LICENSE)
 4 | ![PyPI](https://img.shields.io/pypi/v/bilix?style=flat-square&color=blue)
 5 | ![GitHub commit activity](https://img.shields.io/github/commit-activity/m/HFrost0/bilix)
 6 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/bilix?label=pypi%20downloads&style=flat-square)
 7 | 
 8 | ⚡️Lightning-fast asynchronous download tool for bilibili and more
 9 | 
10 | 
11 | ## Features
12 | 
13 | ### ⚡️ Fast & Async
14 | 
15 | Asynchronous high concurrency support, controllable concurrency and speed settings.
16 | 
17 | ### 😉 Lightweight & User-friendly
18 | 
19 | Lightweight user-friendly CLI with progress notification, focusing on core functionality.
20 | 
21 | ### 📝 Fully-featured
22 | 
23 | Submissions, anime, TV Series, video clip, audio, favourite, danmaku ,cover...
24 | 
25 | ### 🔨 Extensible
26 | 
27 | Extensible Python module suitable for more download scenarios.
28 | 
29 | ## Install
30 | 
31 | ```shell
32 | pip install bilix
33 | ```
34 | 
35 | for macOS, you can also install `bilix` by `brew`
36 | 
37 | ```shell
38 | brew install bilix
39 | ```
40 | 
41 | ## Usage Example
42 | 
43 | * If you prefer to use command line interface (cli)
44 | 
45 | ```shell
46 | bilix v 'url'
47 | ```
48 | 
49 | > `v` is a method short alias for `get_video`
50 | 
51 | * If you prefer to code with python
52 | 
53 | ```python
54 | from bilix.sites.bilibili import DownloaderBilibili
55 | import asyncio
56 | 
57 | 
58 | async def main():
59 |     async with DownloaderBilibili() as d:
60 |         await d.get_video('url')
61 | 
62 | 
63 | asyncio.run(main())
64 | ```
65 | 
66 | ## Community
67 | 
68 | If you find any bugs or other issues, feel free to raise an [Issue](https://github.com/HFrost0/bilix/issues).
69 | 
70 | If you have new ideas or new feature requests👍，welcome to participate in
71 | the [Discussion](https://github.com/HFrost0/bilix/discussions)
72 | 
73 | If you find this project helpful, you can support the author by [Star](https://github.com/HFrost0/bilix/stargazers)🌟
74 | 
75 | ## Contribute
76 | 
77 | ❤️ Welcome! Details can be found in [Contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING_EN.md)
78 | 


--------------------------------------------------------------------------------
/bilix/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Lighting-fast async download tool inspired by w
3 | """
4 | 
5 | __version__ = "0.18.9"
6 | __url__ = "https://github.com/HFrost0/bilix"
7 | 


--------------------------------------------------------------------------------
/bilix/__main__.py:
--------------------------------------------------------------------------------
1 | from bilix.cli.main import main
2 | 
3 | main()
4 | 


--------------------------------------------------------------------------------
/bilix/_process.py:
--------------------------------------------------------------------------------
 1 | import signal
 2 | import sys
 3 | from concurrent.futures import ProcessPoolExecutor
 4 | from functools import partial
 5 | 
 6 | 
 7 | def _init():
 8 |     def shutdown(*args):
 9 |         sys.exit(0)
10 | 
11 |     signal.signal(signal.SIGINT, shutdown)
12 | 
13 | 
14 | def singleton(cls):
15 |     _instance = {}
16 | 
17 |     def inner(*args, **kwargs):
18 |         if cls not in _instance:
19 |             _instance[cls] = cls(*args, **kwargs)
20 |         return _instance[cls]
21 | 
22 |     return inner
23 | 
24 | 
25 | # singleton ProcessPoolExecutor to avoid recreation in spawn process
26 | SingletonPPE = singleton(partial(ProcessPoolExecutor, initializer=_init))
27 | 
28 | if __name__ == '__main__':
29 |     p = SingletonPPE(max_workers=5)
30 |     p.shutdown()
31 | 


--------------------------------------------------------------------------------
/bilix/cli/assign.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import inspect
  3 | import re
  4 | import time
  5 | from functools import wraps
  6 | from pathlib import Path
  7 | from typing import Callable, Union, Tuple
  8 | from importlib import import_module
  9 | 
 10 | from bilix.exception import HandleMethodError, HandleError
 11 | from bilix.log import logger
 12 | 
 13 | 
 14 | def kwargs_filter(obj: Union[type, Callable], kwargs: dict):
 15 |     """
 16 | 
 17 |     :param obj:
 18 |     :param kwargs:
 19 |     :return:
 20 |     """
 21 |     sig = inspect.signature(obj)
 22 |     obj_require = set(sig.parameters.keys())
 23 | 
 24 |     def check(k):
 25 |         if k in obj_require:
 26 |             p = sig.parameters[k]
 27 |             # check type hint
 28 |             try:
 29 |                 if p.annotation is inspect.Signature.empty or \
 30 |                         isinstance(kwargs[k], p.annotation):
 31 |                     return True
 32 |                 else:
 33 |                     logger.debug(f"kwarg {k}:{kwargs[k]} has been drop due to type hint missmatch")
 34 |                     return False
 35 |             except TypeError:  # https://peps.python.org/pep-0604/#isinstance-and-issubclass
 36 |                 # lower than 3.10, Union
 37 |                 # TypeError: Subscripted generics cannot be used with class and instance checks
 38 |                 return True
 39 |         return False
 40 | 
 41 |     kwargs = {k: kwargs[k] for k in filter(check, kwargs)}
 42 |     return kwargs
 43 | 
 44 | 
 45 | def module_handle_funcs(module):
 46 |     """find and yield all handle func in module"""
 47 |     attrs = getattr(module, '__all__', None)
 48 |     attrs = attrs or dir(module)
 49 |     for attr_name in attrs:
 50 |         if attr_name.startswith('__'):
 51 |             continue
 52 |         executor_cls = getattr(module, attr_name)
 53 |         if not inspect.isclass(executor_cls):
 54 |             continue
 55 |         handle_func = getattr(executor_cls, 'handle', None)
 56 |         if handle_func is None:
 57 |             continue
 58 |         yield handle_func
 59 | 
 60 | 
 61 | def auto_assemble(handle_func):
 62 |     @wraps(handle_func)
 63 |     def wrapped(cls, method: str, keys: Tuple[str, ...], options: dict):
 64 |         res = handle_func(cls, method, keys, options)
 65 |         if res is NotImplemented or res is None:
 66 |             return res
 67 |         executor, cor = res
 68 |         # handle func return class instead of instance
 69 |         if inspect.isclass(executor):
 70 |             kwargs = kwargs_filter(executor, options)
 71 |             executor = executor(**kwargs)
 72 |             logger.debug(f"auto assemble {executor} by {kwargs}")
 73 |         # handle func return async function instead of coroutine
 74 |         if inspect.iscoroutinefunction(cor):
 75 |             kwargs = kwargs_filter(cor, options)
 76 |             cors = []
 77 |             for key in keys:
 78 |                 if not hasattr(cor, '__self__'):  # coroutine function has not bound to instance
 79 |                     cors.append(cor(executor, key, **kwargs))  # bound executor to self
 80 |                 else:
 81 |                     cors.append(cor(key, **kwargs))
 82 |                 logger.debug(f"auto assemble {cor} by {kwargs}")
 83 |             cor = asyncio.gather(*cors)
 84 |         return executor, cor
 85 | 
 86 |     return wrapped
 87 | 
 88 | 
 89 | def longest_common_len(str1, str2):
 90 |     m, n = len(str1), len(str2)
 91 |     dp = [[0] * (n + 1) for _ in range(m + 1)]
 92 |     max_length = 0
 93 |     for i in range(1, m + 1):
 94 |         for j in range(1, n + 1):
 95 |             if str1[i - 1] == str2[j - 1]:
 96 |                 dp[i][j] = dp[i - 1][j - 1] + 1
 97 |                 max_length = max(max_length, dp[i][j])
 98 |     return max_length
 99 | 
100 | 
101 | def find_sites():
102 |     sites_path = Path(__file__).parent.parent / 'sites'
103 |     for site in sites_path.iterdir():
104 |         if not site.is_dir() or not (site / '__init__.py').exists():
105 |             continue
106 |         yield site
107 | 
108 | 
109 | def assign(cli_kwargs):
110 |     method = cli_kwargs.pop('method')
111 |     keys = cli_kwargs.pop('keys')
112 |     options = cli_kwargs
113 |     modules = [
114 |         # path, cmp_key
115 |         ('download.base_downloader_m3u8', 'm3u8'),
116 |         ('download.base_downloader_part', 'file'),
117 |     ]
118 |     for site in find_sites():
119 |         modules.append((f"sites.{site.name}", site.name))
120 |     pattern = re.compile(r"https?://(?:[\w-]*\.)?([\w-]+)\.([\w-]+)")
121 |     if g := pattern.search(keys[0]):
122 |         cmp_base = g.group(1)
123 |     else:
124 |         cmp_base = keys[0]
125 | 
126 |     def key(x: Tuple[str, str]):
127 |         if x[0].startswith("sites"):
128 |             return longest_common_len(cmp_base, x[-1])
129 |         else:  # base_downloader
130 |             return longest_common_len(method, x[-1])
131 | 
132 |     for module, _ in sorted(modules, key=key, reverse=True):
133 |         a = time.time()
134 |         try:
135 |             module = import_module(f"bilix.{module}")
136 |         except ImportError as e:
137 |             logger.debug(f"duo to ImportError <{e}>, skip <module 'bilix.{module}'>")
138 |             continue
139 |         logger.debug(f"import cost {time.time() - a:.6f} s <module '{module.__name__}'>")
140 |         exc = None
141 |         for handle_func in module_handle_funcs(module):
142 |             try:
143 |                 res = handle_func(method, keys, options)
144 |             except HandleMethodError as e:
145 |                 exc = e
146 |                 continue
147 |             if res is NotImplemented or res is None:
148 |                 continue
149 |             executor, cor = res
150 |             logger.debug(f"Assign to {executor.__class__.__name__}")
151 |             return executor, cor
152 |         if exc is not None:  # for the module, some handler can handle, but method miss match
153 |             raise exc
154 |     raise HandleError(f"Can't find any handler for method: '{method}' keys: {keys}")
155 | 


--------------------------------------------------------------------------------
/bilix/cli/main.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import typing
  3 | from pathlib import Path
  4 | import click
  5 | import rich
  6 | from rich.panel import Panel
  7 | from rich.table import Table
  8 | 
  9 | from .. import __version__
 10 | from ..log import logger
 11 | from .assign import assign
 12 | from ..progress.cli_progress import CLIProgress
 13 | from ..utils import parse_bytes_str, s2t
 14 | from ..exception import HandleError
 15 | 
 16 | 
 17 | def handle_help(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> None:
 18 |     if not value or ctx.resilient_parsing:
 19 |         return
 20 |     print_help()
 21 |     ctx.exit()
 22 | 
 23 | 
 24 | def handle_version(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> None:
 25 |     if not value or ctx.resilient_parsing:
 26 |         return
 27 |     print(f"Version {__version__}")
 28 |     ctx.exit()
 29 | 
 30 | 
 31 | def handle_debug(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ):
 32 |     if not value or ctx.resilient_parsing:
 33 |         return
 34 |     from rich.traceback import install
 35 |     install()
 36 |     logger.setLevel('DEBUG')
 37 |     logger.debug("Debug on, more information will be shown")
 38 | 
 39 | 
 40 | def print_help():
 41 |     console = rich.console.Console()
 42 |     console.print(f"\n[bold]bilix {__version__}", justify="center")
 43 |     console.print("⚡️快如闪电的bilibili下载工具，基于Python现代Async特性，高速批量下载整部动漫，电视剧，up投稿等\n",
 44 |                   justify="center")
 45 |     console.print("使用方法： bilix [cyan]<method> <key1, key2...> [OPTIONS][/cyan] ", justify="left")
 46 |     table = Table.grid(padding=1, pad_edge=False)
 47 |     table.add_column("Parameter", no_wrap=True, justify="left", style="bold")
 48 |     table.add_column("Description")
 49 | 
 50 |     table.add_row(
 51 |         "[cyan]<method>",
 52 |         'get_series 或 s：   获取整个系列的视频（包括多p投稿，动漫，电视剧，电影，纪录片），也可以下载单个视频\n'
 53 |         'get_video 或 v：    获取特定的单个视频，在用户不希望下载系列其他视频的时候可以使用\n'
 54 |         'get_up 或 up：      获取某个up的所有投稿视频，支持数量选择，关键词搜索，排序\n'
 55 |         'get_cate 或 cate：  获取分区视频，支持数量选择，关键词搜索，排序\n'
 56 |         'get_favour 或 fav： 获取收藏夹内视频，支持数量选择，关键词搜索\n'
 57 |         'get_collect 或 col：获取合集或视频列表内视频\n'
 58 |         'info：              打印url所属资源的详细信息（例如点赞数，画质，编码格式等）'
 59 |     )
 60 |     table.add_row(
 61 |         "[cyan]<key>[/cyan]",
 62 |         '如使用get_video/get_series，填写视频的url\n'
 63 |         '如使用get_up，填写b站用户空间页url或用户id\n'
 64 |         '如使用get_cate，填写分区名称\n'
 65 |         '如使用get_favour，填写收藏夹页url或收藏夹id\n'
 66 |         '如使用get_collect，填写合集或者视频列表详情页url\n'
 67 |         '如使用info，填写任意资源url'
 68 |     )
 69 |     console.print(table)
 70 |     # console.rule("OPTIONS参数")
 71 |     table = Table(highlight=True, box=None, show_header=False)
 72 |     table.add_column("OPTIONS", no_wrap=True, justify="left", style="bold")
 73 |     table.add_column("type", no_wrap=True, justify="left", style="bold")
 74 |     table.add_column("Description", )
 75 |     table.add_row(
 76 |         "-d --dir",
 77 |         '[dark_cyan]str',
 78 |         "文件的下载目录，默认当前路径下的videos文件夹下，不存在会自动创建"
 79 |     )
 80 |     table.add_row(
 81 |         "-q --quality",
 82 |         '[dark_cyan]int | str',
 83 |         "视频画面质量，默认0为最高画质，越大画质越低，超出范围时自动选最低画质，或者直接使用字符串指定'1080p'等名称"
 84 |     )
 85 |     table.add_row(
 86 |         "-vc --video-con",
 87 |         '[dark_cyan]int',
 88 |         "控制最大同时下载的视频数量，理论上网络带宽越高可以设的越高，默认3",
 89 |     )
 90 |     table.add_row(
 91 |         "-pc --part-con",
 92 |         '[dark_cyan]int',
 93 |         "控制每个媒体的分段并发数，默认10",
 94 |     )
 95 |     table.add_row(
 96 |         '--cookie',
 97 |         '[dark_cyan]str',
 98 |         '有条件的用户可以提供大会员的SESSDATA来下载会员视频'
 99 |     )
100 |     table.add_row(
101 |         "-fb --from-browser", '[dark_cyan]str',
102 |         '从哪个浏览器中导入cookies，例如safari，chrome，edge...默认无',
103 |     )
104 |     table.add_row(
105 |         '--days',
106 |         '[dark_cyan]int',
107 |         '过去days天中的结果，默认为7，仅get_up, get_cate时生效'
108 |     )
109 |     table.add_row(
110 |         "-n --num",
111 |         '[dark_cyan]int',
112 |         "下载前多少个投稿，仅get_up，get_cate，get_favor时生效",
113 |     )
114 |     table.add_row(
115 |         "--order",
116 |         '[dark_cyan]str',
117 |         '何种排序，pubdate发布时间（默认）， click播放数，scores评论数，stow收藏数，coin硬币数，dm弹幕数, 仅get_up, get_cate时生效',
118 |     )
119 |     table.add_row(
120 |         "--keyword",
121 |         '[dark_cyan]str',
122 |         '搜索关键词， 仅get_up, get_cate，get_favor时生效',
123 |     )
124 |     table.add_row(
125 |         "-ns --no-series", '',
126 |         '只下载搜索结果每个视频的第一p，仅get_up，get_cate，get_favour时生效',
127 |     )
128 |     table.add_row(
129 |         "-nh --no-hierarchy", '',
130 |         '不使用层次目录，所有视频统一保存在下载目录下'
131 |     )
132 |     table.add_row(
133 |         "--image", '',
134 |         '下载视频封面'
135 |     )
136 |     table.add_row(
137 |         "--subtitle", '',
138 |         '下载srt字幕',
139 |     )
140 |     table.add_row(
141 |         "--dm", '',
142 |         '下载弹幕',
143 |     )
144 |     table.add_row(
145 |         "-oa --only-audio", '',
146 |         '仅下载音频，下载的音质固定为最高音质',
147 |     )
148 |     table.add_row(
149 |         "-p", '[dark_cyan]int, int',
150 |         '下载集数范围，例如-p 1 3 只下载P1至P3，仅get_series时生效',
151 |     )
152 |     table.add_row(
153 |         "--codec", '[dark_cyan]str',
154 |         '视频及音频编码（可使用info查看后填写，使用:分隔），可使用完整名称（例如avc1.640032，fLaC）或部分名称（例如avc，hev）',
155 |     )
156 |     table.add_row(
157 |         "-sl --speed-limit", '[dark_cyan]str',
158 |         '最大下载速度，默认无限制。例如：-sl 1.5MB',
159 |     )
160 |     table.add_row(
161 |         "-sr --stream-retry", '[dark_cyan]int',
162 |         '下载过程中发生网络错误后最大重试数，默认5',
163 |     )
164 |     table.add_row(
165 |         "-tr --time-range", '[dark_cyan]str',
166 |         r'下载视频的时间范围，格式如 h:m:s-h:m:s 或 s-s，默认无，仅get_video时生效',
167 |     )
168 |     table.add_row("-h --help", '', "帮助信息")
169 |     table.add_row("-v --version", '', "版本信息")
170 |     table.add_row("--debug", '', "显示debug信息")
171 |     console.print(Panel(table, border_style="dim", title="Options", title_align="left"))
172 | 
173 | 
174 | class BasedQualityType(click.ParamType):
175 |     name = "quality"
176 | 
177 |     def convert(self, value, param, ctx):
178 |         try:
179 |             value = int(value)
180 |         except ValueError:
181 |             return value  # str
182 |         if value in {1080, 720, 480, 360}:
183 |             return str(value)
184 |         else:
185 |             return value  # relative choice like 0, 1, 2, 999...
186 | 
187 | 
188 | class BasedSpeedLimit(click.ParamType):
189 |     name = "speed_limit"
190 | 
191 |     def convert(self, value, param, ctx):
192 |         if value is not None:
193 |             return parse_bytes_str(value)
194 | 
195 | 
196 | class BasedTimeRange(click.ParamType):
197 |     name = "time_range"
198 | 
199 |     def convert(self, value, param, ctx):
200 |         start_time, end_time = map(s2t, value.split('-'))
201 |         return start_time, end_time
202 | 
203 | 
204 | @click.command(add_help_option=False)
205 | @click.argument("method", type=str)
206 | @click.argument("keys", type=str, nargs=-1, required=True)
207 | @click.option(
208 |     "-d",
209 |     "--dir",
210 |     "path",
211 |     type=Path,
212 |     default='videos',
213 | )
214 | @click.option(
215 |     '-q',
216 |     '--quality',
217 |     'quality',
218 |     type=BasedQualityType(),
219 |     default=0,  # default relatively choice
220 | )
221 | @click.option(
222 |     '-vc',
223 |     '--video-con',
224 |     'video_concurrency',
225 |     type=int,
226 |     default=3,
227 | )
228 | @click.option(
229 |     '-pc',
230 |     "--part-con",
231 |     "part_concurrency",
232 |     type=int,
233 |     default=10,
234 | )
235 | @click.option(
236 |     '--cookie',
237 |     'cookie',
238 |     type=str,
239 | )
240 | @click.option(
241 |     '--days',
242 |     'days',
243 |     type=int,
244 |     default=7,
245 | )
246 | @click.option(
247 |     '-n',
248 |     '--num',
249 |     type=int,
250 |     default=10,
251 | )
252 | @click.option(
253 |     '--order',
254 |     'order',
255 |     type=str,
256 |     default='pubdate',
257 | )
258 | @click.option(
259 |     '--keyword',
260 |     'keyword',
261 |     type=str
262 | )
263 | @click.option(
264 |     '-ns',
265 |     '--no-series',
266 |     'series',
267 |     is_flag=True,
268 |     default=True,
269 | )
270 | @click.option(
271 |     '-nh',
272 |     '--no-hierarchy',
273 |     'hierarchy',
274 |     is_flag=True,
275 |     default=True,
276 | )
277 | @click.option(
278 |     '--image',
279 |     'image',
280 |     is_flag=True,
281 |     default=False,
282 | )
283 | @click.option(
284 |     '--subtitle',
285 |     'subtitle',
286 |     is_flag=True,
287 |     default=False,
288 | )
289 | @click.option(
290 |     '--dm',
291 |     'dm',
292 |     is_flag=True,
293 |     default=False,
294 | )
295 | @click.option(
296 |     '-oa',
297 |     '--only-audio',
298 |     'only_audio',
299 |     is_flag=True,
300 |     default=False,
301 | )
302 | @click.option(
303 |     '-p',
304 |     'p_range',
305 |     type=(int, int),
306 | )
307 | @click.option(
308 |     '--codec',
309 |     'codec',
310 |     type=str,
311 |     default=''
312 | )
313 | @click.option(
314 |     '--speed-limit',
315 |     '-sl',
316 |     'speed_limit',
317 |     type=BasedSpeedLimit(),
318 |     default=None,
319 | )
320 | @click.option(
321 |     '--stream-retry',
322 |     '-sr',
323 |     'stream_retry',
324 |     type=int,
325 |     default=5
326 | )
327 | @click.option(
328 |     '--from-browser',
329 |     '-fb',
330 |     'browser',
331 |     type=str,
332 | )
333 | @click.option(
334 |     '--time-range',
335 |     '-tr',
336 |     'time_range',
337 |     type=BasedTimeRange(),
338 |     default=None,
339 | )
340 | @click.option(
341 |     '-h',
342 |     "--help",
343 |     is_flag=True,
344 |     is_eager=True,
345 |     expose_value=False,
346 |     callback=handle_help,
347 | )
348 | @click.option(
349 |     '-v',
350 |     "--version",
351 |     is_flag=True,
352 |     is_eager=True,
353 |     expose_value=False,
354 |     callback=handle_version,
355 | )
356 | @click.option(
357 |     "--debug",
358 |     is_flag=True,
359 |     is_eager=True,
360 |     expose_value=False,
361 |     callback=handle_debug,
362 | )
363 | def main(**kwargs):
364 |     loop = asyncio.new_event_loop()  # avoid deprecated warning in 3.11
365 |     asyncio.set_event_loop(loop)
366 |     logger.debug(f'CLI KEY METHOD and OPTIONS: {kwargs}')
367 |     try:
368 |         # CLIProgress.switch_theme(gs="cyan", bs="dark_cyan")
369 |         CLIProgress.start()  # start progress
370 |         if not kwargs['path'].exists():
371 |             kwargs['path'].mkdir(parents=True)
372 |             logger.info(f'Directory {kwargs["path"]} not exists, auto created')
373 |         executor, cor = assign(kwargs)
374 |         loop.run_until_complete(cor)
375 |     except HandleError as e:  # method no match
376 |         logger.error(e)
377 |     except KeyboardInterrupt:
378 |         logger.info('[cyan]提示：用户中断，重复执行命令可继续下载')
379 |     finally:
380 |         CLIProgress.stop()  # stop rich progress to ensure cursor is repositioned
381 | 


--------------------------------------------------------------------------------
/bilix/download/base_downloader.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import inspect
  3 | import logging
  4 | import re
  5 | import time
  6 | from functools import wraps
  7 | from typing import Union, Optional, Tuple
  8 | from contextlib import asynccontextmanager
  9 | from urllib.parse import urlparse
 10 | import aiofiles
 11 | import httpx
 12 | 
 13 | from bilix.cli.assign import auto_assemble
 14 | from bilix.log import logger as dft_logger
 15 | from bilix.download.utils import req_retry, path_check
 16 | from bilix.progress.abc import Progress
 17 | from bilix.progress.cli_progress import CLIProgress
 18 | from bilix.exception import HandleMethodError
 19 | from pathlib import Path, PurePath
 20 | 
 21 | __all__ = ['BaseDownloader']
 22 | 
 23 | 
 24 | class BaseDownloaderMeta(type):
 25 |     def __new__(cls, name, bases, dct):
 26 |         dct['_cli_info'] = {}
 27 |         dct['_cli_map'] = {}
 28 |         for method_name, method in dct.items():
 29 |             if not method_name.startswith('_') and asyncio.iscoroutinefunction(method):
 30 |                 if 'path' in (sig := inspect.signature(method)).parameters:
 31 |                     dct[method_name] = cls.ensure_path(method, sig)
 32 | 
 33 |                 if cls.check_unique_method(method, bases):
 34 |                     cli_info = cls.parse_cli_doc(method)
 35 |                     if cli_info:
 36 |                         dct['_cli_info'][method] = cli_info
 37 |                         dct['_cli_map'][method_name] = method
 38 |                         if cli_info['short']:
 39 |                             dct['_cli_map'][cli_info['short']] = method
 40 | 
 41 |         return super().__new__(cls, name, bases, dct)
 42 | 
 43 |     @staticmethod
 44 |     def check_unique_method(method_name: str, bases: Tuple[type, ...]):
 45 |         for base in bases:
 46 |             if method_name in base.__dict__:
 47 |                 return False
 48 |         return True
 49 | 
 50 |     @staticmethod
 51 |     def parse_cli_doc(func) -> Optional[dict]:
 52 |         docstring = func.__doc__
 53 |         if not docstring or ':cli:' not in docstring:
 54 |             return
 55 |         params_matches = re.findall(r":param (\w+): (.+)", docstring)
 56 |         params = {param: description for param, description in params_matches}
 57 | 
 58 |         cli_short_match = re.search(r":cli: short: (\w+)", docstring)
 59 |         short_name = cli_short_match.group(1) if cli_short_match else None
 60 | 
 61 |         return {"short": short_name, "params": params}
 62 | 
 63 |     @staticmethod
 64 |     def ensure_path(func, sig):
 65 |         path_index = next(i for i, name in enumerate(sig.parameters) if name == 'path')
 66 | 
 67 |         @wraps(func)
 68 |         async def wrapper(*args, **kwargs):
 69 |             new_args = list(args)
 70 |             if path_index < len(args) and isinstance(args[path_index], str):
 71 |                 new_args[path_index] = Path(args[path_index])
 72 |             elif 'path' in kwargs and isinstance(kwargs['path'], str):
 73 |                 kwargs['path'] = Path(kwargs['path'])
 74 | 
 75 |             return await func(*new_args, **kwargs)
 76 | 
 77 |         wrapper.__annotations__['path'] = Union[Path, str]
 78 |         return wrapper
 79 | 
 80 | 
 81 | class BaseDownloader(metaclass=BaseDownloaderMeta):
 82 |     pattern: re.Pattern = None
 83 |     cookie_domain: str = ""
 84 |     _cli_info: dict
 85 |     _cli_map: dict
 86 | 
 87 |     def __init__(
 88 |             self,
 89 |             *,
 90 |             client: httpx.AsyncClient = None,
 91 |             browser: str = None,
 92 |             speed_limit: Union[float, int] = None,
 93 |             stream_retry: int = 5,
 94 |             progress: Progress = None,
 95 |             logger: logging.Logger = None,
 96 |     ):
 97 |         """
 98 | 
 99 |         :param client: client used for http request
100 |         :param browser: load cookies from which browser
101 |         :param speed_limit: global download rate for the downloader, should be a number (Byte/s unit)
102 |         :param progress: progress obj
103 |         """
104 |         # use cli progress by default
105 |         self.progress = progress or CLIProgress()
106 |         self.logger = logger or dft_logger
107 |         self.client = client if client else httpx.AsyncClient(headers={'user-agent': 'PostmanRuntime/7.29.0'})
108 |         if browser:  # load cookies from browser, may need auth
109 |             self.update_cookies_from_browser(browser)
110 |         assert speed_limit is None or speed_limit > 0
111 |         self.speed_limit = speed_limit
112 |         self.stream_retry = stream_retry
113 |         # active stream number
114 |         self._stream_num = 0
115 | 
116 |     async def __aenter__(self):
117 |         await self.client.__aenter__()
118 |         return self
119 | 
120 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
121 |         await self.client.__aexit__(exc_type, exc_val, exc_tb)
122 | 
123 |     async def aclose(self):
124 |         """Close transport and proxies for httpx client"""
125 |         await self.client.aclose()
126 | 
127 |     async def get_static(self, url: str, path: Union[str, Path], convert_func=None) -> Path:
128 |         """
129 | 
130 |         :param url:
131 |         :param path: file path without suffix
132 |         :param convert_func: function used to convert http bytes content, must be named like ...2...
133 |         :return: downloaded file path
134 |         """
135 |         # use suffix from convert_func's name
136 |         if convert_func:
137 |             suffix = '.' + convert_func.__name__.split('2')[-1]
138 |         # try to find suffix from url
139 |         else:
140 |             suffix = PurePath(urlparse(url).path).suffix
141 |         path = path.with_name(path.name + suffix)
142 |         exist, path = path_check(path)
143 |         if exist:
144 |             self.logger.info(f'[green]已存在[/green] {path.name}')
145 |             return path
146 |         res = await req_retry(self.client, url)
147 |         content = convert_func(res.content) if convert_func else res.content
148 |         async with aiofiles.open(path, 'wb') as f:
149 |             await f.write(content)
150 |         self.logger.info(f'[cyan]已完成[/cyan] {path.name}')
151 |         return path
152 | 
153 |     @asynccontextmanager
154 |     async def _stream_context(self, times: int):
155 |         """
156 |         contextmanager to print log, slow down streaming and count active stream number
157 | 
158 |         :param times: error occur times which is related to sleep time
159 |         :return:
160 |         """
161 |         self._stream_num += 1
162 |         try:
163 |             yield
164 |         except httpx.HTTPStatusError as e:
165 |             if e.response.status_code == 403:
166 |                 self.logger.warning(f"STREAM slowing down since 403 forbidden {e}")
167 |                 await asyncio.sleep(10. * (times + 1))
168 |             else:
169 |                 self.logger.warning(f"STREAM {e}")
170 |                 await asyncio.sleep(.5 * (times + 1))
171 |             raise
172 |         except httpx.TransportError as e:
173 |             msg = f'STREAM {e.__class__.__name__} 异常可能由于网络条件不佳或并发数过大导致，若重复出现请考虑降低并发数'
174 |             self.logger.warning(msg) if times > 2 else self.logger.debug(msg)
175 |             await asyncio.sleep(.1 * (times + 1))
176 |             raise
177 |         except Exception as e:
178 |             self.logger.warning(f'STREAM Unexpected Exception class:{e.__class__.__name__} {e}')
179 |             raise
180 |         finally:
181 |             self._stream_num -= 1
182 | 
183 |     @property
184 |     def stream_num(self):
185 |         """current activate network stream number"""
186 |         return self._stream_num
187 | 
188 |     @property
189 |     def chunk_size(self) -> Optional[int]:
190 |         if self.speed_limit and self.speed_limit < 1e5:  # 1e5 limit bound
191 |             # only restrict chunk_size when speed_limit is too low
192 |             return int(self.speed_limit * 0.1)  # 0.1 delay slope
193 |         # default to None setup
194 |         return None
195 | 
196 |     async def _check_speed(self, content_size):
197 |         if self.speed_limit and (cur_speed := self.progress.active_speed) > self.speed_limit:
198 |             t_tgt = content_size / self.speed_limit * self.stream_num
199 |             t_real = content_size / cur_speed
200 |             t = t_tgt - t_real
201 |             await asyncio.sleep(t)
202 | 
203 |     def update_cookies_from_browser(self, browser: str):
204 |         try:
205 |             a = time.time()
206 |             import browser_cookie3
207 |             f = getattr(browser_cookie3, browser.lower())
208 |             self.logger.debug(f"trying to load cookies from {browser}: {self.cookie_domain}, may need auth")
209 |             self.client.cookies.update(f(domain_name=self.cookie_domain))
210 |             self.logger.debug(f"load complete, consumed time: {time.time() - a} s")
211 |         except AttributeError:
212 |             raise AttributeError(f"Invalid Browser {browser}")
213 | 
214 |     @classmethod
215 |     def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict) -> bool:
216 |         """check if the cls can be handled by this downloader"""
217 |         if cls.pattern:
218 |             return cls.pattern.match(keys[0]) is not None
219 |         else:
220 |             return method in cls._cli_map
221 | 
222 |     @classmethod
223 |     @auto_assemble
224 |     def handle(cls, method: str, keys: Tuple[str, ...], options: dict):
225 |         if cls._decide_handle(method, keys, options):
226 |             try:
227 |                 method = cls._cli_map[method]
228 |             except KeyError:
229 |                 raise HandleMethodError(cls, method)
230 |             return cls, method
231 | 


--------------------------------------------------------------------------------
/bilix/download/base_downloader_m3u8.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import uuid
  3 | from pathlib import Path, PurePath
  4 | from typing import Tuple, Union
  5 | from urllib.parse import urlparse
  6 | import aiofiles
  7 | import httpx
  8 | import os
  9 | import m3u8
 10 | from Crypto.Cipher import AES
 11 | from m3u8 import Segment
 12 | from bilix.download.base_downloader import BaseDownloader
 13 | from bilix.download.utils import path_check, merge_files
 14 | from bilix import ffmpeg
 15 | from .utils import req_retry
 16 | 
 17 | __all__ = ['BaseDownloaderM3u8']
 18 | 
 19 | 
 20 | class BaseDownloaderM3u8(BaseDownloader):
 21 |     """Base Async http m3u8 Downloader"""
 22 | 
 23 |     def __init__(
 24 |             self,
 25 |             *,
 26 |             client: httpx.AsyncClient = None,
 27 |             browser: str = None,
 28 |             speed_limit: Union[float, int] = None,
 29 |             stream_retry: int = 5,
 30 |             progress=None,
 31 |             logger=None,
 32 |             # unique params
 33 |             part_concurrency: int = 10,
 34 |             video_concurrency: Union[int, asyncio.Semaphore] = 3,
 35 |     ):
 36 |         super(BaseDownloaderM3u8, self).__init__(
 37 |             client=client,
 38 |             browser=browser,
 39 |             stream_retry=stream_retry,
 40 |             speed_limit=speed_limit,
 41 |             progress=progress,
 42 |             logger=logger
 43 |         )
 44 |         self.v_sema = asyncio.Semaphore(video_concurrency) if isinstance(video_concurrency, int) else video_concurrency
 45 |         self.part_concurrency = part_concurrency
 46 |         self.decrypt_cache = {}
 47 | 
 48 |     async def _decrypt(self, seg: m3u8.Segment, content: bytearray):
 49 |         async def get_key():
 50 |             key_bytes = (await req_retry(self.client, uri)).content
 51 |             iv = bytes.fromhex(seg.key.iv.replace('0x', '')) if seg.key.iv is not None else \
 52 |                 seg.custom_parser_values['iv']
 53 |             return AES.new(key_bytes, AES.MODE_CBC, iv)
 54 | 
 55 |         uri = seg.key.absolute_uri
 56 |         if uri not in self.decrypt_cache:
 57 |             self.decrypt_cache[uri] = asyncio.ensure_future(get_key())
 58 |             self.decrypt_cache[uri] = await self.decrypt_cache[uri]
 59 |         elif asyncio.isfuture(self.decrypt_cache[uri]):
 60 |             await self.decrypt_cache[uri]
 61 |         cipher = self.decrypt_cache[uri]
 62 |         return cipher.decrypt(content)
 63 | 
 64 |     async def to_invariant_m3u8(self, m3u8_url: str) -> m3u8.M3U8:
 65 |         res = await req_retry(self.client, m3u8_url, follow_redirects=True)
 66 |         m3u8_info = m3u8.loads(res.text)
 67 |         if not m3u8_info.base_uri:
 68 |             m3u8_info.base_uri = m3u8_url
 69 |         if m3u8_info.is_variant:
 70 |             self.logger.debug(f"m3u8 is variant, use first playlist: {m3u8_info.playlists[0].absolute_uri}")
 71 |             return await self.to_invariant_m3u8(m3u8_info.playlists[0].absolute_uri)
 72 |         return m3u8_info
 73 | 
 74 |     async def get_m3u8_video(self, m3u8_url: str, path: Union[str, Path], time_range: Tuple[int, int] = None) -> Path:
 75 |         """
 76 |         download video from m3u8 url
 77 |         :cli: short: m3u8
 78 |         :param m3u8_url:
 79 |         :param path: file path or file dir, if dir, filename will be set according to m3u8_url
 80 |         :param time_range: (start, end) in seconds, if provided, only download the clip and add start-end to filename
 81 |         :return: downloaded file path
 82 |         """
 83 |         if path.is_dir():
 84 |             path = (path / PurePath(urlparse(m3u8_url).path).stem).with_suffix('.mp4')
 85 |         if time_range:
 86 |             path = path.with_stem(f"{path.stem}-{time_range[0]}-{time_range[1]}")
 87 |         exist, path = path_check(path)
 88 |         if exist:
 89 |             self.logger.info(f"[green]已存在[/green] {path.name}")
 90 |             return path
 91 |         async with self.v_sema:
 92 |             task_id = await self.progress.add_task(total=None, description=path.name)
 93 |             m3u8_info = await self.to_invariant_m3u8(m3u8_url)
 94 |             cors = []
 95 |             p_sema = asyncio.Semaphore(self.part_concurrency)
 96 |             total_time = 0
 97 |             if time_range:
 98 |                 current_time = 0
 99 |                 start_time, end_time = time_range
100 |                 inside = False
101 |             else:
102 |                 inside = True
103 |             for idx, seg in enumerate(m3u8_info.segments):
104 |                 if time_range:
105 |                     current_time += seg.duration
106 |                     if not inside and current_time > start_time:
107 |                         inside = True
108 |                         s = seg.duration - (current_time - start_time)
109 |                     elif current_time > end_time:
110 |                         break
111 |                 if inside:
112 |                     total_time += seg.duration
113 |                     # https://stackoverflow.com/questions/50628791/decrypt-m3u8-playlist-encrypted-with-aes-128-without-iv
114 |                     if seg.key and seg.key.iv is None:
115 |                         seg.custom_parser_values['iv'] = idx.to_bytes(16, 'big')
116 |                     cors.append(self._get_seg(seg, path.with_name(f"{path.stem}-{idx}.ts"), task_id, p_sema))
117 |             if len(cors) == 0 and time_range:
118 |                 raise Exception(f"time range <{start_time}-{end_time}> invalid for <{path.name}>")
119 |             if init_sec := m3u8_info.segments[0].init_section:
120 |                 async def _get_init():
121 |                     r = await req_retry(self.client, init_sec.absolute_uri)
122 |                     async with aiofiles.open(fn := path.with_name(f"{path.stem}-init"), 'wb') as f:
123 |                         await f.write(r.content)
124 |                         return fn
125 | 
126 |                 cors.insert(0, _get_init())
127 |                 merge_fn = merge_files
128 |             else:
129 |                 merge_fn = ffmpeg.concat
130 |             await self.progress.update(task_id, total_time=total_time)
131 |             file_list = await asyncio.gather(*cors)
132 | 
133 |         await merge_fn(file_list, path)
134 |         if time_range:
135 |             path_tmp = path.with_stem(str(uuid.uuid4()))
136 |             # to save key frame, use 0 as start time instead of s, clip will be a little longer than expected
137 |             await ffmpeg.time_range_clip(path, 0, end_time - start_time + s, path_tmp)
138 |             os.rename(path_tmp, path)
139 |         self.logger.info(f"[cyan]已完成[/cyan] {path.name}")
140 |         await self.progress.update(task_id, visible=False)
141 |         return path
142 | 
143 |     async def _update_task_total(self, task_id, time_part: float, update_size: int):
144 |         task = self.progress.tasks[task_id]
145 |         if task.total is None:
146 |             confirmed_t = time_part
147 |             confirmed_b = update_size
148 |         else:
149 |             confirmed_t = time_part + task.fields['confirmed_t']
150 |             confirmed_b = update_size + task.fields['confirmed_b']
151 |         predicted_total = task.fields['total_time'] * confirmed_b / confirmed_t
152 |         await self.progress.update(task_id, total=predicted_total, confirmed_t=confirmed_t, confirmed_b=confirmed_b)
153 | 
154 |     async def _get_seg(self, seg: Segment, path: Path, task_id, p_sema: asyncio.Semaphore) -> Path:
155 |         exists, path = path_check(path)
156 |         if exists:
157 |             downloaded = os.path.getsize(path)
158 |             await self._update_task_total(task_id, time_part=seg.duration, update_size=downloaded)
159 |             await self.progress.update(task_id, advance=downloaded)
160 |             return path
161 |         seg_url = seg.absolute_uri
162 |         async with p_sema:
163 |             content = None
164 |             for times in range(1 + self.stream_retry):
165 |                 content = bytearray()
166 |                 try:
167 |                     async with self.client.stream("GET", seg_url,
168 |                                                   follow_redirects=True) as r, self._stream_context(times):
169 |                         r.raise_for_status()
170 |                         # pre-update total if content-length is provided and first time to get content
171 |                         if 'content-length' in r.headers and not content:
172 |                             await self._update_task_total(
173 |                                 task_id, time_part=seg.duration, update_size=int(r.headers['content-length']))
174 |                         async for chunk in r.aiter_bytes(chunk_size=self.chunk_size):
175 |                             content.extend(chunk)
176 |                             await self.progress.update(task_id, advance=len(chunk))
177 |                             await self._check_speed(len(chunk))
178 |                     if 'content-length' not in r.headers:  # after-update total if content-length is not provided
179 |                         await self._update_task_total(task_id, time_part=seg.duration, update_size=len(content))
180 |                     break
181 |                 except (httpx.HTTPStatusError, httpx.TransportError):
182 |                     continue
183 |             else:
184 |                 raise Exception(f"STREAM 超过重复次数 {seg_url}")
185 |         content = self._after_seg(seg, content)
186 |         # in case encrypted
187 |         if seg.key:
188 |             content = await self._decrypt(seg, content)
189 |         async with aiofiles.open(path, 'wb') as f:
190 |             await f.write(content)
191 |         return path
192 | 
193 |     def _after_seg(self, seg: Segment, content: bytearray) -> bytearray:
194 |         """hook for subclass to modify segment content, happened before decrypt"""
195 |         return content
196 | 


--------------------------------------------------------------------------------
/bilix/download/base_downloader_part.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from pathlib import Path, PurePath
  3 | from typing import Union, List, Iterable, Tuple
  4 | from urllib.parse import urlparse
  5 | import aiofiles
  6 | import httpx
  7 | import uuid
  8 | import random
  9 | import os
 10 | from email.message import Message
 11 | from pymp4.parser import Box
 12 | from bilix.download.base_downloader import BaseDownloader
 13 | from bilix.download.utils import path_check, merge_files
 14 | from bilix import ffmpeg
 15 | from .utils import req_retry
 16 | 
 17 | __all__ = ['BaseDownloaderPart']
 18 | 
 19 | 
 20 | class BaseDownloaderPart(BaseDownloader):
 21 |     """Base Async http Content-Range Downloader"""
 22 | 
 23 |     def __init__(
 24 |             self,
 25 |             *,
 26 |             client: httpx.AsyncClient = None,
 27 |             browser: str = None,
 28 |             speed_limit: Union[float, int, None] = None,
 29 |             stream_retry: int = 5,
 30 |             progress=None,
 31 |             logger=None,
 32 |             # unique params
 33 |             part_concurrency: int = 10,
 34 |     ):
 35 |         super(BaseDownloaderPart, self).__init__(
 36 |             client=client,
 37 |             browser=browser,
 38 |             stream_retry=stream_retry,
 39 |             speed_limit=speed_limit,
 40 |             progress=progress,
 41 |             logger=logger
 42 |         )
 43 |         self.part_concurrency = part_concurrency
 44 | 
 45 |     async def _pre_req(self, urls: List[str]) -> Tuple[int, str]:
 46 |         # use GET instead of HEAD due to 404 bug https://github.com/HFrost0/bilix/issues/16
 47 |         res = await req_retry(self.client, urls[0], follow_redirects=True, headers={'Range': 'bytes=0-1'})
 48 |         total = int(res.headers['Content-Range'].split('/')[-1])
 49 |         # get filename
 50 |         if content_disposition := res.headers.get('Content-Disposition', None):
 51 |             m = Message()
 52 |             m['content-type'] = content_disposition
 53 |             filename = m.get_param('filename', '')
 54 |         else:
 55 |             filename = ''
 56 |         # change origin url to redirected position to avoid twice redirect
 57 |         if res.history:
 58 |             urls[0] = str(res.url)
 59 |         return total, filename
 60 | 
 61 |     async def get_media_clip(
 62 |             self,
 63 |             url_or_urls: Union[str, Iterable[str]],
 64 |             path: Union[Path, str],
 65 |             time_range: Tuple[int, int],
 66 |             init_range: str,
 67 |             seg_range: str,
 68 |             get_s: asyncio.Future = None,
 69 |             set_s: asyncio.Future = None,
 70 |             task_id=None,
 71 |     ):
 72 |         """
 73 | 
 74 |         :param url_or_urls:
 75 |         :param path:
 76 |         :param time_range: (start_time, end_time)
 77 |         :param init_range: xxx-xxx
 78 |         :param seg_range: xxx-xxx
 79 |         :param get_s:
 80 |         :param set_s:
 81 |         :param task_id:
 82 |         :return:
 83 |         """
 84 |         upper = task_id is not None and self.progress.tasks[task_id].fields.get('upper', None)
 85 |         exist, path = path_check(path)
 86 |         if exist:
 87 |             if not upper:
 88 |                 self.logger.info(f'[green]已存在[/green] {path.name}')
 89 |             return path
 90 | 
 91 |         urls = [url_or_urls] if isinstance(url_or_urls, str) else [url for url in url_or_urls]
 92 |         init_start, init_end = map(int, init_range.split('-'))
 93 |         seg_start, seg_end = map(int, seg_range.split('-'))
 94 |         res = await req_retry(self.client, urls[0], follow_redirects=True,
 95 |                               headers={'Range': f'bytes={seg_start}-{seg_end}'})
 96 |         container = Box.parse(res.content)
 97 |         assert container.type == b'sidx'
 98 |         if get_s:
 99 |             start_time = await get_s
100 |             end_time = time_range[1]
101 |         else:
102 |             start_time, end_time = time_range
103 |         pre_time, pre_byte = 0, seg_end + 1
104 |         inside = False
105 |         parts = [(init_start, init_end)]
106 |         total = init_end - init_start + 1
107 |         s = 0
108 |         for idx, ref in enumerate(container.references):
109 |             if ref.reference_type != "MEDIA":
110 |                 self.logger.debug("not a media", ref)
111 |                 continue
112 |             seg_duration = ref.segment_duration / container.timescale
113 |             if not inside and start_time < pre_time + seg_duration:
114 |                 s = start_time - pre_time
115 |                 inside = True
116 |             if inside and end_time < pre_time:
117 |                 break
118 |             if inside:
119 |                 total += ref.referenced_size
120 |                 parts.append((pre_byte, pre_byte + ref.referenced_size - 1))
121 |             pre_time += seg_duration
122 |             pre_byte += ref.referenced_size
123 |         if len(parts) == 1:
124 |             raise Exception(f"time range <{start_time}-{end_time}> invalid for <{path.name}>")
125 |         if set_s:
126 |             set_s.set_result(start_time - s)
127 |         if task_id is not None:
128 |             await self.progress.update(
129 |                 task_id,
130 |                 total=self.progress.tasks[task_id].total + total if self.progress.tasks[task_id].total else total)
131 |         else:
132 |             task_id = await self.progress.add_task(description=path.name, total=total)
133 |         p_sema = asyncio.Semaphore(self.part_concurrency)
134 | 
135 |         async def get_seg(part_range: Tuple[int, int]):
136 |             async with p_sema:
137 |                 return await self._get_file_part(urls, path=path, part_range=part_range, task_id=task_id)
138 | 
139 |         file_list = await asyncio.gather(*[get_seg(part_range) for part_range in parts])
140 |         path_tmp = path.with_name(str(uuid.uuid4()))
141 |         await merge_files(file_list, path_tmp)
142 |         if set_s:
143 |             await ffmpeg.time_range_clip(path_tmp, start=0, t=end_time - start_time + s, output_path=path)
144 |         else:
145 |             await ffmpeg.time_range_clip(path_tmp, start=s, t=end_time - start_time, output_path=path)
146 |         if not upper:  # no upstream task
147 |             await self.progress.update(task_id, visible=False)
148 |             self.logger.info(f"[cyan]已完成[/cyan] {path.name}")
149 |         return path
150 | 
151 |     async def get_file(self, url_or_urls: Union[str, Iterable[str]], path: Union[Path, str], task_id=None) -> Path:
152 |         """
153 |         download file by http content-range
154 |         :cli: short: f
155 |         :param url_or_urls: file url or urls with backups
156 |         :param path: file path or dir path, if dir path, filename will be extracted from url
157 |         :param task_id: if not provided, a new progress task will be created
158 |         :return: downloaded file path
159 |         """
160 |         urls = [url_or_urls] if isinstance(url_or_urls, str) else [url for url in url_or_urls]
161 |         upper = task_id is not None and self.progress.tasks[task_id].fields.get('upper', None)
162 | 
163 |         if not path.is_dir():
164 |             exist, path = path_check(path)
165 |             if exist:
166 |                 if not upper:
167 |                     self.logger.info(f'[green]已存在[/green] {path.name}')
168 |                 return path
169 | 
170 |         total, req_filename = await self._pre_req(urls)
171 | 
172 |         if path.is_dir():
173 |             file_name = req_filename if req_filename else PurePath(urlparse(urls[0]).path).name
174 |             path /= file_name
175 |             exist, path = path_check(path)
176 |             if exist:
177 |                 if not upper:
178 |                     self.logger.info(f'[green]已存在[/green] {path.name}')
179 |                 return path
180 | 
181 |         if task_id is not None:
182 |             await self.progress.update(
183 |                 task_id,
184 |                 total=self.progress.tasks[task_id].total + total if self.progress.tasks[task_id].total else total)
185 |         else:
186 |             task_id = await self.progress.add_task(description=path.name, total=total)
187 |         part_length = total // self.part_concurrency
188 |         cors = []
189 |         for i in range(self.part_concurrency):
190 |             start = i * part_length
191 |             end = (i + 1) * part_length - 1 if i < self.part_concurrency - 1 else total - 1
192 |             cors.append(self._get_file_part(urls, path=path, part_range=(start, end), task_id=task_id))
193 |         file_list = await asyncio.gather(*cors)
194 |         await merge_files(file_list, new_path=path)
195 |         if not upper:
196 |             await self.progress.update(task_id, visible=False)
197 |             self.logger.info(f"[cyan]已完成[/cyan] {path.name}")
198 |         return path
199 | 
200 |     async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[int, int],
201 |                              task_id) -> Path:
202 |         start, end = part_range
203 |         part_path = path.with_name(f'{path.name}.{part_range[0]}-{part_range[1]}')
204 |         exist, part_path = path_check(part_path)
205 |         if exist:
206 |             downloaded = os.path.getsize(part_path)
207 |             start += downloaded
208 |             await self.progress.update(task_id, advance=downloaded)
209 |         if start > end:
210 |             return part_path  # skip already finished
211 |         url_idx = random.randint(0, len(urls) - 1)
212 | 
213 |         for times in range(1 + self.stream_retry):
214 |             try:
215 |                 async with \
216 |                         self.client.stream("GET", urls[url_idx], follow_redirects=True,
217 |                                            headers={'Range': f'bytes={start}-{end}'}) as r, \
218 |                         self._stream_context(times), \
219 |                         aiofiles.open(part_path, 'ab') as f:
220 |                     r.raise_for_status()
221 |                     if r.history:  # avoid twice redirect
222 |                         urls[url_idx] = r.url
223 |                     async for chunk in r.aiter_bytes(chunk_size=self.chunk_size):
224 |                         await f.write(chunk)
225 |                         start += len(chunk)
226 |                         await self.progress.update(task_id, advance=len(chunk))
227 |                         await self._check_speed(len(chunk))
228 |                 break
229 |             except (httpx.HTTPStatusError, httpx.TransportError):
230 |                 continue
231 |         else:
232 |             raise Exception(f"STREAM 超过重复次数 {part_path.name}")
233 |         return part_path
234 | 


--------------------------------------------------------------------------------
/bilix/download/utils.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import errno
 3 | import os
 4 | import random
 5 | from functools import wraps
 6 | from pathlib import Path
 7 | 
 8 | import aiofiles
 9 | import httpx
10 | from typing import Union, Sequence, Tuple, List
11 | from bilix.exception import APIError, APIParseError
12 | from bilix.log import logger
13 | 
14 | 
15 | async def merge_files(file_list: List[Path], new_path: Path):
16 |     first_file = file_list[0]
17 |     async with aiofiles.open(first_file, 'ab') as f:
18 |         for idx in range(1, len(file_list)):
19 |             async with aiofiles.open(file_list[idx], 'rb') as fa:
20 |                 await f.write(await fa.read())
21 |             os.remove(file_list[idx])
22 |     os.rename(first_file, new_path)
23 | 
24 | 
25 | async def req_retry(client: httpx.AsyncClient, url_or_urls: Union[str, Sequence[str]], method='GET',
26 |                     follow_redirects=False, retry=5, **kwargs) -> httpx.Response:
27 |     """Client request with multiple backup urls and retry"""
28 |     pre_exc = None  # predefine to avoid warning
29 |     for times in range(1 + retry):
30 |         url = url_or_urls if type(url_or_urls) is str else random.choice(url_or_urls)
31 |         try:
32 |             res = await client.request(method, url, follow_redirects=follow_redirects, **kwargs)
33 |             res.raise_for_status()
34 |         except httpx.TransportError as e:
35 |             msg = f'{method} {e.__class__.__name__} url: {url}'
36 |             logger.warning(msg) if times > 0 else logger.debug(msg)
37 |             pre_exc = e
38 |             await asyncio.sleep(.1 * (times + 1))
39 |         except httpx.HTTPStatusError as e:
40 |             logger.warning(f'{method} {e.response.status_code} {url}')
41 |             pre_exc = e
42 |             await asyncio.sleep(1. * (times + 1))
43 |         except Exception as e:
44 |             logger.warning(f'{method} {e.__class__.__name__} 未知异常 url: {url}')
45 |             raise e
46 |         else:
47 |             return res
48 |     logger.error(f"{method} 超过重复次数 {url_or_urls}")
49 |     raise pre_exc
50 | 
51 | 
52 | def eclipse_str(s: str, max_len: int = 100):
53 |     if len(s) <= max_len:
54 |         return s
55 |     else:
56 |         half_len = (max_len - 1) // 2
57 |         return f"{s[:half_len]}…{s[-half_len:]}"
58 | 
59 | 
60 | def path_check(path: Path, retry: int = 100) -> Tuple[bool, Path]:
61 |     """
62 |     check whether path exist, if filename too long, truncate and return valid path
63 | 
64 |     :param path: path to check
65 |     :param retry: max retry times
66 |     :return: exist, path
67 |     """
68 |     for times in range(retry):
69 |         try:
70 |             exist = path.exists()
71 |             return exist, path
72 |         except OSError as e:
73 |             if e.errno == errno.ENAMETOOLONG:  # filename too long for os
74 |                 if times == 0:
75 |                     logger.warning(f"filename too long for os, truncate will be applied. filename: {path.name}")
76 |                 else:
77 |                     logger.debug(f"filename too long for os {path.name}")
78 |                 path = path.with_stem(eclipse_str(path.stem, int(len(path.stem) * .8)))
79 |             else:
80 |                 raise e
81 |     raise OSError(f"filename too long for os {path.name}")
82 | 
83 | 
84 | def raise_api_error(func):
85 |     """Decorator to catch exceptions except APIError and HTTPError and raise APIParseError"""
86 | 
87 |     @wraps(func)
88 |     async def wrapped(client: httpx.AsyncClient, *args, **kwargs):
89 |         try:
90 |             return await func(client, *args, **kwargs)
91 |         except (APIError, httpx.HTTPError):
92 |             raise
93 |         except Exception as e:
94 |             raise APIParseError(e, func) from e
95 | 
96 |     return wrapped
97 | 


--------------------------------------------------------------------------------
/bilix/exception.py:
--------------------------------------------------------------------------------
 1 | class APIError(Exception):
 2 |     """API Error during request to website"""
 3 | 
 4 |     def __init__(self, msg: str, resource):
 5 |         self.msg = msg
 6 |         self.resource = resource
 7 | 
 8 |     def __str__(self):
 9 |         return f"{self.msg} resource: {self.resource}"
10 | 
11 | 
12 | class APIParseError(APIError):
13 |     """API Parse Error, maybe cased by website interface change, raise by decorator"""
14 | 
15 |     def __init__(self, e, func):
16 |         self.e = e
17 |         self.func = func
18 | 
19 |     def __str__(self):
20 |         return f"APIParseError Caused by {self.e.__class__.__name__} in <{self.func.__module__}:{self.func.__name__}>"
21 | 
22 | 
23 | class APIResourceError(APIError):
24 |     """API Error that resource is not available (like deleted by uploader)"""
25 | 
26 | 
27 | class APIUnsupportedError(APIError):
28 |     """The resource parse is not supported yet"""
29 | 
30 | 
31 | class APIInvalidError(APIError):
32 |     """API request is invalid"""
33 | 
34 | 
35 | class HandleError(Exception):
36 |     """the error related to bilix cli handle"""
37 | 
38 | 
39 | class HandleMethodError(HandleError):
40 |     """the error that handler can not recognize the method"""
41 | 
42 |     def __init__(self, executor_cls, method):
43 |         self.executor_cls = executor_cls
44 |         self.method = method
45 | 
46 |     def __str__(self):
47 |         return f"For {self.executor_cls.__name__} method '{self.method}' is not available"
48 | 


--------------------------------------------------------------------------------
/bilix/ffmpeg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | just some useful ffmpeg commands wrapped in python
 3 | """
 4 | import os
 5 | from anyio import run_process
 6 | from typing import List
 7 | from pathlib import Path
 8 | import tempfile
 9 | 
10 | 
11 | async def concat(path_lst: List[Path], output_path: Path, remove=True):
12 |     with tempfile.NamedTemporaryFile('w', dir=output_path.parent, delete=False) as fp:
13 |         for path in path_lst:
14 |             fp.write(f"file '{path.name}'\n")
15 |         cmd = ['ffmpeg', '-f', 'concat', '-safe', '0', '-i', fp.name, '-c', 'copy', '-loglevel', 'quiet',
16 |                str(output_path)]
17 |         # print(' '.join(map(lambda x: f'"{x}"', cmd)))
18 |     await run_process(cmd)
19 |     os.remove(fp.name)
20 |     if remove:
21 |         for path in path_lst:
22 |             os.remove(path)
23 | 
24 | 
25 | async def combine(path_lst: List[Path], output_path: Path, remove=True):
26 |     cmd = ['ffmpeg']
27 |     for path in path_lst:
28 |         cmd.extend(['-i', str(path)])
29 |     # for flac, use -strict -2
30 |     cmd.extend(['-c', 'copy', '-strict', '-2', '-loglevel', 'quiet', str(output_path)])
31 |     # print(' '.join(map(lambda x: f'"{x}"', cmd)))
32 |     await run_process(cmd)
33 |     if remove:
34 |         for path in path_lst:
35 |             os.remove(path)
36 | 
37 | 
38 | async def time_range_clip(input_path: Path, start: int, t: int, output_path: Path, remove=True):
39 |     # for flac, use -strict -2
40 |     cmd = ['ffmpeg', '-ss', f'{start:.1f}', '-t', f'{t:.1f}', '-i', str(input_path), '-codec', 'copy', '-strict', '-2',
41 |            '-loglevel', 'quiet', '-f', 'mp4', str(output_path)]
42 |     # print(' '.join(map(lambda x: f'"{x}"', cmd)))
43 |     await run_process(cmd)
44 |     if remove:
45 |         os.remove(input_path)
46 | 


--------------------------------------------------------------------------------
/bilix/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from rich.logging import RichHandler
 3 | 
 4 | 
 5 | def get_logger():
 6 |     bilix_logger = logging.getLogger("bilix")
 7 |     # 如果logger已经配置过handler，直接返回logger实例
 8 |     if bilix_logger.hasHandlers():
 9 |         return bilix_logger
10 |     bilix_logger.setLevel(logging.INFO)
11 |     # 创建自定义的RichHandler
12 |     custom_rich_handler = RichHandler(
13 |         show_time=False,
14 |         show_path=False,
15 |         markup=True,
16 |         keywords=RichHandler.KEYWORDS + ['STREAM'],
17 |         rich_tracebacks=True
18 |     )
19 |     # 设置日志格式
20 |     formatter = logging.Formatter("{message}", style="{", datefmt="[%X]")
21 |     custom_rich_handler.setFormatter(formatter)
22 |     # 为logger添加自定义的RichHandler
23 |     bilix_logger.addHandler(custom_rich_handler)
24 |     return bilix_logger
25 | 
26 | 
27 | logger = get_logger()
28 | 


--------------------------------------------------------------------------------
/bilix/progress/abc.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Optional, Any
 3 | 
 4 | 
 5 | class Progress(ABC):
 6 |     """Abstract Class for bilix download progress, checkout to design your own progress"""
 7 | 
 8 |     @classmethod
 9 |     @abstractmethod
10 |     def start(cls):
11 |         """start to show the progress"""
12 | 
13 |     @classmethod
14 |     @abstractmethod
15 |     def stop(cls):
16 |         """stop to show the progress"""
17 | 
18 |     @abstractmethod
19 |     def tasks(self):
20 |         """return the tasks"""
21 | 
22 |     @abstractmethod
23 |     def active_speed(self) -> Optional[float]:
24 |         """return current active speed (bit/s)"""
25 | 
26 |     @abstractmethod
27 |     async def add_task(
28 |             self,
29 |             description: str,
30 |             start: bool = True,
31 |             total: Optional[float] = None,
32 |             completed: int = 0,
33 |             visible: bool = True,
34 |             **fields,
35 |     ):
36 |         """async add a task to progress"""
37 | 
38 |     @abstractmethod
39 |     async def update(
40 |             self,
41 |             task_id,
42 |             *,
43 |             total: Optional[float] = None,
44 |             completed: Optional[float] = None,
45 |             advance: Optional[float] = None,
46 |             description: Optional[str] = None,
47 |             visible: Optional[bool] = None,
48 |             refresh: bool = False,
49 |             **fields: Any
50 |     ):
51 |         """async update a task status"""
52 | 


--------------------------------------------------------------------------------
/bilix/progress/cli_progress.py:
--------------------------------------------------------------------------------
  1 | from bilix.progress.abc import Progress
  2 | from typing import Optional, Any, Set
  3 | from rich.theme import Theme
  4 | from rich.style import Style
  5 | from rich.spinner import Spinner
  6 | from rich.progress import Progress as RichProgress, TaskID, \
  7 |     TextColumn, BarColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn, ProgressColumn
  8 | 
  9 | 
 10 | class SpinnerColumn(ProgressColumn):
 11 |     def __init__(self, style="progress.spinner", speed: float = 1.0):
 12 |         self.waiting = Spinner("dqpb", style=style)
 13 |         self.downloading = Spinner("dots", style=style, speed=speed)
 14 |         self.merging = Spinner("line", style=style, speed=speed)
 15 |         super().__init__()
 16 | 
 17 |     def render(self, task):
 18 |         t = task.get_time()
 19 |         if task.total is None:
 20 |             return self.waiting.render(t)
 21 |         elif task.finished:
 22 |             return self.merging.render(t)
 23 |         else:
 24 |             return self.downloading.render(t)
 25 | 
 26 | 
 27 | class CLIProgress(Progress):
 28 |     # Only one live display may be active at once
 29 |     _progress = RichProgress(
 30 |         SpinnerColumn(speed=2.),
 31 |         TextColumn("[progress.description]{task.description}"),
 32 |         TextColumn("[progress.percentage]{task.percentage:>4.1f}%"),
 33 |         BarColumn(),
 34 |         DownloadColumn(),
 35 |         TransferSpeedColumn(),
 36 |         TextColumn('ETA'),
 37 |         TimeRemainingColumn(),
 38 |         transient=True,
 39 |     )
 40 | 
 41 |     def __init__(self):
 42 |         self._active_ids: Set[TaskID] = set()
 43 | 
 44 |     @classmethod
 45 |     def start(cls):
 46 |         cls._progress.start()
 47 | 
 48 |     @classmethod
 49 |     def stop(cls):
 50 |         cls._progress.stop()
 51 | 
 52 |     @property
 53 |     def tasks(self):
 54 |         return self._progress.tasks
 55 | 
 56 |     @staticmethod
 57 |     def _cat_description(description, max_length=33):
 58 |         mid = (max_length - 3) // 2
 59 |         return description if len(description) < max_length else f'{description[:mid]}...{description[-mid:]}'
 60 | 
 61 |     async def add_task(
 62 |             self,
 63 |             description: str,
 64 |             start: bool = True,
 65 |             total: Optional[float] = None,
 66 |             completed: int = 0,
 67 |             visible: bool = True,
 68 |             **fields: Any,
 69 |     ) -> TaskID:
 70 |         task_id = self._progress.add_task(description=self._cat_description(description),
 71 |                                           start=start, total=total, completed=completed, visible=visible, **fields)
 72 |         self._active_ids.add(task_id)
 73 |         return task_id
 74 | 
 75 |     @property
 76 |     def active_speed(self):
 77 |         return sum(self._progress.tasks[task_id].speed for task_id in self._active_ids
 78 |                    if self._progress.tasks[task_id].speed)
 79 | 
 80 |     async def update(
 81 |             self,
 82 |             task_id: TaskID,
 83 |             *,
 84 |             total: Optional[float] = None,
 85 |             completed: Optional[float] = None,
 86 |             advance: Optional[float] = None,
 87 |             description: Optional[str] = None,
 88 |             visible: Optional[bool] = None,
 89 |             refresh: bool = False,
 90 |             **fields: Any,
 91 |     ) -> None:
 92 |         if description:
 93 |             description = self._cat_description(description)
 94 |         self._progress.update(task_id, total=total, completed=completed, advance=advance,
 95 |                               description=description, visible=visible, refresh=refresh, **fields)
 96 |         if self._progress.tasks[task_id].finished and task_id in self._active_ids:
 97 |             self._active_ids.remove(task_id)
 98 | 
 99 |     @classmethod
100 |     def switch_theme(cls, bs="rgb(95,138,239)", gs="rgb(65,165,189)"):
101 |         cls._progress.console.push_theme(Theme({
102 |             # "progress.data.speed": Style(color=bs),
103 |             "progress.download": Style(color=gs),
104 |             "progress.percentage": Style(color=gs),
105 |             "progress.spinner": Style(color=bs),
106 |             "progress.remaining": Style(color=gs),
107 |             # "bar.back": Style(color="grey23"),
108 |             "bar.complete": Style(color=bs),
109 |             "bar.finished": Style(color=gs),
110 |             "bar.pulse": Style(color=bs),
111 |         }))
112 | 


--------------------------------------------------------------------------------
/bilix/progress/ws_progress.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from bilix.progress.cli_progress import CLIProgress
 5 | 
 6 | 
 7 | class WebSocketProgress(CLIProgress):
 8 |     def __init__(self, sockets):
 9 |         super().__init__()
10 |         self._sockets = sockets
11 | 
12 |     async def broadcast(self, msg: str):
13 |         cors = [s.send_text(msg) for s in self._sockets]
14 |         await asyncio.gather(*cors)
15 | 
16 |     async def add_task(self, **kwargs):
17 |         task_id = await super().add_task(**kwargs)
18 |         asyncio.create_task(
19 |             self.broadcast(json.dumps({'method': 'add_task', 'task_id': task_id, **kwargs}))
20 |         )
21 |         return task_id
22 | 
23 |     async def update(self, task_id, **kwargs) -> None:
24 |         await super().update(task_id, **kwargs)
25 |         asyncio.create_task(
26 |             self.broadcast(json.dumps({'method': 'update', "task_id": task_id, **kwargs}))
27 |         )
28 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderBilibili
2 | from .informer import InformerBilibili
3 | 
4 | __all__ = ['DownloaderBilibili', 'InformerBilibili']
5 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/api.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import re
  4 | from urllib.parse import quote
  5 | import httpx
  6 | from pydantic import field_validator, BaseModel, Field
  7 | from typing import Union, List, Tuple, Dict, Optional
  8 | import json5
  9 | from danmakuC.bilibili import parse_view
 10 | from bilix.download.utils import req_retry, raise_api_error
 11 | from bilix.sites.bilibili.utils import parse_ids_from_url
 12 | from bilix.utils import legal_title
 13 | from bilix.exception import APIInvalidError, APIError, APIResourceError, APIUnsupportedError
 14 | import hashlib
 15 | import time
 16 | 
 17 | dft_client_settings = {
 18 |     'headers': {'user-agent': 'PostmanRuntime/7.29.0', 'referer': 'https://www.bilibili.com'},
 19 |     'cookies': {'CURRENT_FNVAL': '4048'},
 20 |     'http2': True
 21 | }
 22 | 
 23 | 
 24 | @raise_api_error
 25 | async def get_cate_meta(client: httpx.AsyncClient) -> dict:
 26 |     """
 27 |     获取b站分区元数据
 28 | 
 29 |     :param client:
 30 |     :return:
 31 |     """
 32 |     cate_info = {}
 33 |     res = await req_retry(client, 'https://s1.hdslb.com/bfs/static/laputa-channel/client/assets/index.c0ea30e6.js')
 34 |     cate_data = re.search('Za=([^;]*);', res.text).groups()[0]
 35 |     cate_data = json5.loads(cate_data)['channelList']
 36 |     for i in cate_data:
 37 |         if 'sub' in i:
 38 |             for j in i['sub']:
 39 |                 cate_info[j['name']] = j
 40 |             cate_info[i['name']] = i
 41 |     return cate_info
 42 | 
 43 | 
 44 | @raise_api_error
 45 | async def get_list_info(client: httpx.AsyncClient, url_or_sid: str, ):
 46 |     """
 47 |     获取视频列表信息
 48 | 
 49 |     :param url_or_sid:
 50 |     :param client:
 51 |     :return:
 52 |     """
 53 |     if url_or_sid.startswith('http'):
 54 |         sid = re.search(r'sid=(\d+)', url_or_sid).groups()[0]
 55 |     else:
 56 |         sid = url_or_sid
 57 |     res = await req_retry(client, f'https://api.bilibili.com/x/series/series?series_id={sid}')  # meta api
 58 |     meta = json.loads(res.text)
 59 |     mid = meta['data']['meta']['mid']
 60 |     params = {'mid': mid, 'series_id': sid, 'ps': meta['data']['meta']['total']}
 61 |     list_res, up_info = await asyncio.gather(
 62 |         req_retry(client, 'https://api.bilibili.com/x/series/archives', params=params),
 63 |         get_up_info(client, str(mid)),
 64 |     )
 65 |     list_info = json.loads(list_res.text)
 66 |     list_name = meta['data']['meta']['name']
 67 |     up_name = up_info.get('name', '')
 68 |     bvids = [i['bvid'] for i in list_info['data']['archives']]
 69 |     return list_name, up_name, bvids
 70 | 
 71 | 
 72 | @raise_api_error
 73 | async def get_collect_info(client: httpx.AsyncClient, url_or_sid: str):
 74 |     """
 75 |     获取合集信息
 76 | 
 77 |     :param url_or_sid:
 78 |     :param client:
 79 |     :return:
 80 |     """
 81 |     sid = re.search(r'sid=(\d+)', url_or_sid).groups()[0] if url_or_sid.startswith('http') else url_or_sid
 82 |     params = {'season_id': sid}
 83 |     res = await req_retry(client, 'https://api.bilibili.com/x/space/fav/season/list', params=params)
 84 |     data = json.loads(res.text)
 85 |     medias = data['data']['medias']
 86 |     info = data['data']['info']
 87 |     col_name, up_name = info['title'], medias[0]['upper']['name']
 88 |     bvids = [i['bvid'] for i in data['data']['medias']]
 89 |     return col_name, up_name, bvids
 90 | 
 91 | 
 92 | @raise_api_error
 93 | async def get_favour_page_info(client: httpx.AsyncClient, url_or_fid: str, pn=1, ps=20, keyword=''):
 94 |     """
 95 |     获取收藏夹信息（分页）
 96 | 
 97 |     :param url_or_fid:
 98 |     :param pn:
 99 |     :param ps:
100 |     :param keyword:
101 |     :param client:
102 |     :return:
103 |     """
104 |     if url_or_fid.startswith('http'):
105 |         fid = re.findall(r'fid=(\d+)', url_or_fid)[0]
106 |     else:
107 |         fid = url_or_fid
108 |     params = {'media_id': fid, 'pn': pn, 'ps': ps, 'keyword': keyword, 'order': 'mtime'}
109 |     res = await req_retry(client, 'https://api.bilibili.com/x/v3/fav/resource/list', params=params)
110 |     data = json.loads(res.text)['data']
111 |     fav_name, up_name = data['info']['title'], data['info']['upper']['name']
112 |     bvids = [i['bvid'] for i in data['medias'] if i['title'] != '已失效视频']
113 |     total_size = data['info']['media_count']
114 |     return fav_name, up_name, total_size, bvids
115 | 
116 | 
117 | @raise_api_error
118 | async def get_cate_page_info(client: httpx.AsyncClient, cate_id, time_from, time_to, pn=1, ps=30,
119 |                              order='click', keyword=''):
120 |     """
121 |     获取分区视频信息（分页）
122 | 
123 |     :param cate_id:
124 |     :param pn:
125 |     :param ps:
126 |     :param order:
127 |     :param keyword:
128 |     :param time_from:
129 |     :param time_to:
130 |     :param client:
131 |     :return:
132 |     """
133 |     params = {'search_type': 'video', 'view_type': 'hot_rank', 'cate_id': cate_id, 'pagesize': ps,
134 |               'keyword': keyword, 'page': pn, 'order': order, 'time_from': time_from, 'time_to': time_to}
135 |     res = await req_retry(client, 'https://s.search.bilibili.com/cate/search', params=params)
136 |     info = json.loads(res.text)
137 |     bvids = [i['bvid'] for i in info['result']]
138 |     return bvids
139 | 
140 | 
141 | async def _add_sign(client: httpx.AsyncClient, params: dict):
142 |     """添加b站api签名到params中
143 |     :param params:
144 |     :return:
145 |     """
146 |     OE = [46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45,
147 |           35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38,
148 |           41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60,
149 |           51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36,
150 |           20, 34, 44, 52]
151 |     res = await req_retry(
152 |         client, "https://api.bilibili.com/x/web-interface/nav"
153 |     )
154 |     info = json.loads(res.text)
155 |     img_val = info['data']['wbi_img']['img_url'].split('/')[-1].split('.')[0]
156 |     sub_val = info['data']['wbi_img']['sub_url'].split('/')[-1].split('.')[0]
157 |     val = img_val + sub_val
158 |     request_token = ''.join([val[v] for v in OE])[:32]
159 | 
160 |     wts = int(time.time())
161 |     params["wts"] = wts
162 |     data = dict(sorted(params.items()))
163 |     data_str = "&".join([f"{k}={v}" for k, v in data.items()]) + request_token
164 |     md5 = hashlib.md5(data_str.encode("utf-8")).hexdigest()
165 |     params["w_rid"] = md5
166 |     return params
167 | 
168 | 
169 | def _find_mid(space_url: str):
170 |     return re.search(r'^https://space.bilibili.com/(\d+)/?', space_url).group(1)
171 | 
172 | 
173 | @raise_api_error
174 | async def get_up_video_info(client: httpx.AsyncClient, url_or_mid: str, pn=1, ps=30, order="pubdate", keyword=""):
175 |     """
176 |     获取up主信息
177 | 
178 |     :param url_or_mid:
179 |     :param pn:
180 |     :param ps:
181 |     :param order:
182 |     :param keyword:
183 |     :param client:
184 |     :return:
185 |     """
186 |     if url_or_mid.startswith("http"):
187 |         mid = re.findall(r"/(\d+)", url_or_mid)[0]
188 |     else:
189 |         mid = url_or_mid
190 | 
191 |     params = {"mid": mid, "order": order, "ps": ps, "pn": pn, "keyword": quote(keyword or "")}
192 |     await _add_sign(client, params)
193 | 
194 |     res = await req_retry(client, "https://api.bilibili.com/x/space/wbi/arc/search", params=params)
195 |     info = json.loads(res.text)
196 |     up_name = info["data"]["list"]["vlist"][0]["author"]
197 |     total_size = info["data"]["page"]["count"]
198 |     bv_ids = [i["bvid"] for i in info["data"]["list"]["vlist"]]
199 |     return up_name, total_size, bv_ids
200 | 
201 | 
202 | async def get_up_info(client: httpx.AsyncClient, url_or_mid: str):
203 |     if url_or_mid.startswith("http"):
204 |         mid = _find_mid(url_or_mid)
205 |     else:
206 |         mid = url_or_mid
207 |     params = {"mid": mid}
208 |     await _add_sign(client, params)
209 |     res = await req_retry(client, "https://api.bilibili.com/x/space/wbi/acc/info", params=params)
210 |     data = json.loads(res.text)['data']
211 |     return data
212 | 
213 | 
214 | class Media(BaseModel):
215 |     base_url: str
216 |     backup_url: Optional[List[str]] = None
217 |     size: Optional[int] = None
218 |     width: Optional[int] = None
219 |     height: Optional[int] = None
220 |     suffix: Optional[str] = None
221 |     quality: Optional[str] = None
222 |     codec: Optional[str] = None
223 |     segment_base: Optional[dict] = None
224 | 
225 |     @property
226 |     def urls(self):
227 |         """the copy of all url including backup"""
228 |         return [self.base_url, *self.backup_url] if self.backup_url else [self.base_url]
229 | 
230 | 
231 | class Dash(BaseModel):
232 |     duration: int
233 |     videos: List[Media]
234 |     audios: List[Media]
235 |     video_formats: Dict[str, Dict[str, Media]]
236 |     audio_formats: Dict[str, Optional[Media]]
237 | 
238 |     @classmethod
239 |     def from_dict(cls, play_info: dict):
240 |         dash = play_info['dash']  # may raise KeyError
241 |         video_formats = {}
242 |         quality_map = {}
243 |         for d in play_info['support_formats']:
244 |             quality_map[d['quality']] = d['new_description']
245 |             video_formats[d['new_description']] = {}
246 |         videos = []
247 |         for d in dash['video']:
248 |             if d['id'] not in quality_map:
249 |                 continue  # https://github.com/HFrost0/bilix/issues/93
250 |             quality = quality_map[d['id']]
251 |             m = Media(quality=quality, codec=d['codecs'], **d)
252 |             video_formats[quality][m.codec] = m
253 |             videos.append(m)
254 | 
255 |         audios = []
256 |         audio_formats = {}
257 |         if dash.get('audio', None):  # some video have NO audio
258 |             d = dash['audio'][0]
259 |             m = Media(quality="default", suffix='.aac', codec=d['codecs'], **d)
260 |             audios.append(m)
261 |             audio_formats[m.quality] = m
262 |         if dash['dolby']['type'] != 0:
263 |             quality = "dolby"
264 |             audio_formats[quality] = None
265 |             if dash['dolby'].get('audio', None):
266 |                 d = dash['dolby']['audio'][0]
267 |                 m = Media(quality=quality, suffix='.eac3', codec=d['codecs'], **d)
268 |                 audios.append(m)
269 |                 audio_formats[m.quality] = m
270 |         if dash.get('flac', None):
271 |             quality = "flac"
272 |             audio_formats[quality] = None
273 |             if d := dash['flac']['audio']:
274 |                 m = Media(quality=quality, suffix='.flac', codec=d['codecs'], **d)
275 |                 audios.append(m)
276 |                 audio_formats[m.quality] = m
277 |         return cls(duration=dash['duration'], videos=videos, audios=audios,
278 |                    video_formats=video_formats, audio_formats=audio_formats)
279 | 
280 |     def choose_video(self, quality: Union[int, str], video_codec: str) -> Media:
281 |         # 1. absolute choice with quality name like 4k 1080p '1080p 60帧'
282 |         if isinstance(quality, str):
283 |             for k in self.video_formats:
284 |                 if k.upper().startswith(quality.upper()):  # incase 1080P->1080p
285 |                     for c in self.video_formats[k]:
286 |                         if c.startswith(video_codec):
287 |                             return self.video_formats[k][c]
288 |         # 2. relative choice
289 |         else:
290 |             keys = [k for k in self.video_formats.keys() if self.video_formats[k]]
291 |             quality = min(quality, len(keys) - 1)
292 |             k = keys[quality]
293 |             for c in self.video_formats[k]:
294 |                 if c.startswith(video_codec):
295 |                     return self.video_formats[k][c]
296 |         raise KeyError(f"no match for video quality: {quality} codec: {video_codec}")
297 | 
298 |     def choose_audio(self, audio_codec: str) -> Optional[Media]:
299 |         if len(self.audios) == 0:  # some video has no audio
300 |             return
301 |         for k in self.audio_formats:
302 |             if self.audio_formats[k] and self.audio_formats[k].codec.startswith(audio_codec):
303 |                 return self.audio_formats[k]
304 |         raise KeyError(f'no match for audio codec: {audio_codec}')
305 | 
306 |     def choose_quality(self, quality: Union[str, int], codec: str = '') -> Tuple[Media, Optional[Media]]:
307 |         v_codec, a_codec, *_ = codec.split(':') + [""]
308 |         video, audio = self.choose_video(quality, v_codec), self.choose_audio(a_codec)
309 |         return video, audio
310 | 
311 | 
312 | class Status(BaseModel):
313 |     view: int = Field(description="播放量")
314 |     danmaku: int = Field(description="弹幕数")
315 |     coin: int = Field(description="硬币数")
316 |     like: int = Field(description="点赞数")
317 |     reply: int = Field(description="回复数")
318 |     favorite: int = Field(description="收藏数")
319 |     share: int = Field(description="分享数")
320 |     follow: Optional[int] = Field(default=None, description="追剧数/追番数")
321 | 
322 |     @field_validator('view', mode="before")
323 |     @classmethod
324 |     def no_view(cls, v):
325 |         return 0 if v == '--' else v
326 | 
327 | 
328 | class Page(BaseModel):
329 |     p_name: str
330 |     p_url: str
331 | 
332 | 
333 | class VideoInfo(BaseModel):
334 |     title: str
335 |     aid: int
336 |     cid: int
337 |     ep_id: Optional[int] = None
338 |     p: int
339 |     pages: List[Page]  # [[p_name, p_url], ...]
340 |     img_url: str
341 |     status: Status
342 |     bvid: Optional[str] = None
343 |     dash: Optional[Dash] = None
344 |     other: Optional[List[Media]] = None  # durl resource: flv, mp4.
345 |     desc: Optional[str] = None
346 |     tags: Optional[List[str]] = None
347 | 
348 | 
349 | def _parse_bv_html(url, html: str) -> VideoInfo:
350 |     init_info = re.search(r'<script>window.__INITIAL_STATE__=({.*?});\(', html).groups()[0]  # this line may raise
351 |     init_info = json.loads(init_info)
352 |     if len(init_info.get('error', {})) > 0:
353 |         raise APIResourceError("视频已失效", url)  # 啊叻？视频不见了？在分区下载的时候可能产生
354 |     # extract meta
355 |     pages = []
356 |     h1_title = legal_title(re.search('<h1[^>]*title="([^"]*)"', html).groups()[0])
357 |     status = Status(**init_info['videoData']['stat'])
358 |     bvid = init_info['bvid']
359 |     desc = init_info['videoData'].get('desc', '')
360 |     tags = [i['tag_name'] for i in init_info['tags']]
361 |     aid = init_info['aid']
362 |     (p, cid), = init_info['cidMap'][bvid]['cids'].items()
363 |     p = int(p) - 1
364 |     title = legal_title(init_info['videoData']['title'])
365 |     base_url = url.split('?')[0]
366 |     for idx, i in enumerate(init_info['videoData']['pages']):
367 |         p_url = f"{base_url}?p={idx + 1}"
368 |         p_name = f"P{idx + 1}-{i['part']}" if len(init_info['videoData']['pages']) > 1 else ''
369 |         pages.append(Page(p_name=p_name, p_url=p_url))
370 |     # extract dash and flv_url
371 |     dash, other = None, []
372 |     play_info = re.search('<script>window.__playinfo__=({.*?})</script><script>', html).groups()[0]
373 |     play_info = json.loads(play_info)['data']
374 |     try:
375 |         dash = Dash.from_dict(play_info)
376 |     except KeyError:
377 |         pass
378 |     try:
379 |         for i in play_info['durl']:
380 |             suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1)
381 |             other.append(Media(base_url=i['url'], backup_url=i['backup_url'], suffix=suffix))
382 |     except KeyError:
383 |         pass
384 |     # extract img url
385 |     img_url = re.search('property="og:image" content="([^"]*)"', html).groups()[0]
386 |     if not img_url.startswith('http'):  # https://github.com/HFrost0/bilix/issues/52 just for some video
387 |         img_url = 'http:' + img_url.split('@')[0]
388 |     # construct data
389 |     video_info = VideoInfo(title=title, aid=aid, cid=cid, status=status,
390 |                            p=p, pages=pages, img_url=img_url, bvid=bvid, dash=dash, other=other,
391 |                            desc=desc, tags=tags)
392 |     return video_info
393 | 
394 | 
395 | def _parse_ep_html(url, html: str) -> VideoInfo:
396 |     data = re.search(r'<script id="__NEXT_DATA__" type="application/json">({.*})</script>', html).groups()[0]
397 |     data = json.loads(data)
398 |     queries = data['props']['pageProps']['dehydratedState']['queries']
399 |     season_info = queries[0]['state']['data']['seasonInfo']
400 |     media_info = season_info['mediaInfo']
401 |     stat = media_info['stat']
402 |     status = Status(coin=stat['coins'], view=stat['views'], danmaku=stat['danmakus'], share=stat['share'],
403 |                     like=stat['likes'], reply=stat['reply'], favorite=stat['favorite'], follow=stat['favorites'])
404 |     title = legal_title(media_info['title'])
405 |     desc = media_info['evaluate']
406 |     episodes = media_info['episodes']
407 |     path: str = url.split('?')[0].split('/')[-1]
408 |     ep_id = path[2:] if path.startswith('ep') else str(episodes[0]["ep_id"])
409 |     p = 0
410 |     aid, cid, bvid = 0, 0, ""
411 |     pages = []
412 |     img_url = ''
413 |     for i, ep in enumerate(episodes):
414 |         if str(ep["ep_id"]) == ep_id:
415 |             p = i
416 |             aid, cid, bvid = ep["aid"], ep["cid"], ep["bvid"]
417 |             img_url = ep["cover"]
418 |         pages.append(Page(p_name=legal_title(ep["playerEpTitle"]), p_url=ep["link"]))
419 |     video_info = VideoInfo(
420 |         title=title, status=status, desc=desc,
421 |         aid=aid, cid=cid, bvid=bvid, p=p, pages=pages,
422 |         img_url=img_url, ep_id=ep_id,
423 |     )
424 |     return video_info
425 | 
426 | 
427 | @raise_api_error
428 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo:
429 |     try:
430 |         # try to get video info from web front-end first
431 |         return await _get_video_info_from_html(client, url)
432 |     except APIInvalidError:
433 |         # try to get video info from api if web front-end is banned
434 |         return await _get_video_info_from_api(client, url)
435 | 
436 | 
437 | async def _get_video_info_from_html(client: httpx.AsyncClient, url: str) -> VideoInfo:
438 |     res = await req_retry(client, url, follow_redirects=True)
439 |     if str(res.url).startswith("https://www.bilibili.com/festival"):
440 |         raise APIInvalidError("特殊节日页面", url)
441 |     html = res.text
442 |     if "window._riskdata_" in html:
443 |         raise APIInvalidError("web 前端访问被风控", url)
444 |     if "window.__INITIAL_STATE__" in html:
445 |         return _parse_bv_html(url, html)
446 |     elif "__NEXT_DATA__" in html:
447 |         video_info = _parse_ep_html(url, html)
448 |         await _attach_ep_dash(client, video_info)
449 |         return video_info
450 |     else:
451 |         raise APIUnsupportedError("未知页面类型", url)
452 | 
453 | 
454 | async def _get_video_info_from_api(client: httpx.AsyncClient, url: str) -> VideoInfo:
455 |     assert '/av' in url or '/BV' in url  # TODO: only support BV or av url
456 |     video_info = await _get_video_basic_info_from_api(client, url)
457 |     # can not be parallelized since we need to get cid first
458 |     await _attach_dash_and_durl_from_api(client, video_info)
459 |     return video_info
460 | 
461 | 
462 | async def _attach_ep_dash(client: httpx.AsyncClient, video_info: VideoInfo):
463 |     params = {
464 |         'support_multi_audio': True,
465 |         'avid': video_info.aid,
466 |         'cid': video_info.cid,
467 |         'fnver': 0,
468 |         'fnval': 4048,
469 |         'fourk': 1,
470 |         'ep_id': video_info.ep_id,
471 |     }
472 |     res = await req_retry(client, 'https://api.bilibili.com/pgc/player/web/v2/playurl', params=params)
473 |     res = json.loads(res.text)
474 |     data = res['result']['video_info']
475 |     if "dash" in data:
476 |         video_info.dash = Dash.from_dict(data)
477 |     if "durl" in data:
478 |         other = []
479 |         for i in data['durl']:
480 |             suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1)
481 |             other.append(Media(base_url=i['url'], backup_url=i['backup_url'], size=i['size'], suffix=suffix))
482 |         video_info.other = other
483 | 
484 | 
485 | async def _attach_dash_and_durl_from_api(client: httpx.AsyncClient, video_info: VideoInfo):
486 |     params = {'cid': video_info.cid, 'bvid': video_info.bvid,
487 |               'qn': 120,  # 如无 dash 资源（少数老视频），fallback 到 4K 超清 durl
488 |               'fnval': 4048,  # 如 dash 资源可用，请求 dash 格式的全部可用流
489 |               'fourk': 1,  # 请求 4k 资源
490 |               'fnver': 0, 'platform': 'pc', 'otype': 'json'}
491 |     dash_response = await req_retry(client, 'https://api.bilibili.com/x/player/playurl',
492 |                                     params=params, follow_redirects=True)
493 |     dash_json = json.loads(dash_response.text)
494 |     if dash_json['code'] != 0:
495 |         raise APIResourceError(dash_json['message'], video_info.bvid)
496 |     dash, other = None, []
497 |     if 'dash' in dash_json['data']:
498 |         dash = Dash.from_dict(dash_json['data'])
499 |     if 'durl' in dash_json['data']:
500 |         for i in dash_json['data']['durl']:
501 |             suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1)
502 |             other.append(Media(base_url=i['url'], backup_url=i['backup_url'], size=i['size'], suffix=suffix))
503 |     video_info.dash, video_info.other = dash, other
504 | 
505 | 
506 | async def _get_video_basic_info_from_api(client: httpx.AsyncClient, url) -> VideoInfo:
507 |     """通过 view api 获取视频的基本信息，不包括 dash 或 durl(other) 视频流资源"""
508 |     aid, bvid, selected_page_num = parse_ids_from_url(url)
509 |     params = {'bvid': bvid} if bvid else {'aid': aid}
510 |     r = await req_retry(client, 'https://api.bilibili.com/x/web-interface/view',
511 |                         params=params, follow_redirects=True)
512 |     raw_json = json.loads(r.text)
513 |     if raw_json['code'] != 0:
514 |         raise APIResourceError(raw_json['message'], raw_json['message'])
515 |     title = legal_title(raw_json['data']['title'])
516 |     h1_title = title  # TODO: 根据视频类型，使 h1_title 与实际网页标题的格式一致
517 |     aid = raw_json['data']['aid']
518 |     bvid = raw_json['data']['bvid']
519 |     base_url = f"https://www.bilibili.com/video/{bvid}/"
520 |     status = Status(**raw_json['data']['stat'])
521 |     pages = []
522 |     p = None
523 |     cid = None
524 |     for idx, i in enumerate(raw_json['data']['pages']):
525 |         page_num = int(i['page'])
526 |         if page_num == selected_page_num:
527 |             p = idx  # selected_page_num 的分p 在 pages 列表中的 index 位置
528 |             cid = int(i['cid'])  # selected_page_num 的分p 的 cid
529 |         p_url = f"{base_url}?p={page_num}"
530 |         p_name = f"P{page_num}-{i['part']}"
531 |         pages.append(Page(p_name=p_name, p_url=p_url))
532 |     assert p is not None, f"没有找到分P: p{selected_page_num}，请检查输入"  # cid 也会是 None
533 |     img_url = raw_json['data']['pic']
534 |     basic_video_info = VideoInfo(title=title, h1_title=h1_title, aid=aid, cid=cid, status=status,
535 |                                  p=p, pages=pages, img_url=img_url, bvid=bvid, dash=None, other=None)
536 |     return basic_video_info
537 | 
538 | 
539 | @raise_api_error
540 | async def get_subtitle_info(client: httpx.AsyncClient, bvid, cid):
541 |     params = {'bvid': bvid, 'cid': cid}
542 |     res = await req_retry(client, 'https://api.bilibili.com/x/player/v2', params=params)
543 |     info = json.loads(res.text)
544 |     if info['code'] == -400:
545 |         raise APIError(f'未找到字幕信息', params)
546 |     return [[f'http:{i["subtitle_url"]}', i['lan_doc']] for i in info['data']['subtitle']['subtitles']]
547 | 
548 | 
549 | @raise_api_error
550 | async def get_dm_urls(client: httpx.AsyncClient, aid, cid) -> List[str]:
551 |     params = {'oid': cid, 'pid': aid, 'type': 1}
552 |     res = await req_retry(client, f'https://api.bilibili.com/x/v2/dm/web/view', params=params)
553 |     view = parse_view(res.content)
554 |     total = int(view['dmSge']['total'])
555 |     return [f'https://api.bilibili.com/x/v2/dm/web/seg.so?oid={cid}&type=1&segment_index={i + 1}' for i in range(total)]
556 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/api_test.py:
--------------------------------------------------------------------------------
  1 | import httpx
  2 | import pytest
  3 | import asyncio
  4 | from datetime import datetime, timedelta
  5 | from bilix.sites.bilibili import api
  6 | 
  7 | client = httpx.AsyncClient(**api.dft_client_settings)
  8 | 
  9 | 
 10 | # https://stackoverflow.com/questions/61022713/pytest-asyncio-has-a-closed-event-loop-but-only-when-running-all-tests
 11 | @pytest.fixture(scope="session")
 12 | def event_loop():
 13 |     try:
 14 |         loop = asyncio.get_running_loop()
 15 |     except RuntimeError:
 16 |         loop = asyncio.new_event_loop()
 17 |     yield loop
 18 |     loop.close()
 19 | 
 20 | 
 21 | @pytest.mark.asyncio
 22 | async def test_get_cate_meta():
 23 |     data = await api.get_cate_meta(client)
 24 |     assert '舞蹈' in data and "sub" in data["舞蹈"]
 25 |     assert "宅舞" in data and 'tid' in data['宅舞']
 26 | 
 27 | 
 28 | @pytest.mark.asyncio
 29 | async def test_get_list_info():
 30 |     list_name, up_name, bvids = await api.get_list_info(
 31 |         client,
 32 |         "https://space.bilibili.com/369750017/channel/seriesdetail?sid=2458228")
 33 |     assert list_name == '瘦腰腹跟练'
 34 |     assert len(bvids) > 0 and bvids[0].startswith('BV')
 35 | 
 36 | 
 37 | @pytest.mark.asyncio
 38 | async def test_get_collect_info():
 39 |     list_name, up_name, bvids = await api.get_collect_info(
 40 |         client,
 41 |         "https://space.bilibili.com/54296062/channel/collectiondetail?sid=412818&ctype=0")
 42 |     assert list_name == 'asyncio协程'
 43 |     assert len(bvids) > 0 and bvids[0].startswith('BV')
 44 | 
 45 | 
 46 | @pytest.mark.asyncio
 47 | async def test_get_favour_page_info():
 48 |     fav_name, up_name, total_size, bvids = await api.get_favour_page_info(client, "69072721")
 49 |     assert fav_name == '默认收藏夹'
 50 |     assert len(bvids) > 0 and bvids[0].startswith('BV')
 51 | 
 52 | 
 53 | @pytest.mark.asyncio
 54 | async def test_get_cate_page_info():
 55 |     time_to = datetime.now()
 56 |     time_from = time_to - timedelta(days=7)
 57 |     time_from, time_to = time_from.strftime('%Y%m%d'), time_to.strftime('%Y%m%d')
 58 |     meta = await api.get_cate_meta(client)
 59 |     bvids = await api.get_cate_page_info(client, cate_id=meta['宅舞']['tid'], time_from=time_from, time_to=time_to)
 60 |     assert len(bvids) > 0 and bvids[0].startswith('BV')
 61 | 
 62 | 
 63 | @pytest.mark.asyncio
 64 | async def test_get_up_video_info():
 65 |     up_name, total_size, bvids = await api.get_up_video_info(client, "316568752", keyword="什么")
 66 |     assert len(bvids) > 0 and bvids[0].startswith('BV')
 67 | 
 68 | 
 69 | # GitHub actions problem...
 70 | # @pytest.mark.asyncio
 71 | # async.md def test_get_special_audio():
 72 | #     # Dolby
 73 | #     data = await api.get_video_info(client, 'https://www.bilibili.com/video/BV13L4y1K7th')
 74 | #     assert data.dash['dolby']['type'] != 0
 75 | #     # Hi-Res
 76 | #     data = await api.get_video_info(client, 'https://www.bilibili.com/video/BV16K411S7sk')
 77 | #     assert data.dash['flac']['display']
 78 | 
 79 | 
 80 | @pytest.mark.asyncio
 81 | async def test_get_video_info():
 82 |     methods = (api._get_video_info_from_html, api._get_video_info_from_api)
 83 |     for method in methods:
 84 |         # 单个bv视频
 85 |         data = await method(client, "https://www.bilibili.com/video/BV1sS4y1b7qb?spm_id_from=333.999.0.0")
 86 |         assert len(data.pages) == 1
 87 |         assert data.p == 0
 88 |         assert data.bvid
 89 |         assert data.img_url.startswith('http://') or data.img_url.startswith('https://')
 90 |         assert data.dash
 91 |         # 多个bv视频
 92 |         data = await method(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
 93 |         assert len(data.pages) > 1
 94 |         assert data.p == 4
 95 |         assert data.bvid
 96 |         if method is api._get_video_info_from_api:
 97 |             continue
 98 |         # 电视剧
 99 |         data = await method(client, "https://www.bilibili.com/bangumi/play/ss24053?spm_id_from=333.337.0.0")
100 |         assert len(data.pages) > 1
101 |         assert data.status.follow
102 |         # 动漫
103 |         data = await method(client, "https://www.bilibili.com/bangumi/play/ss5043?spm_id_from=333.337.0.0")
104 |         assert len(data.pages) > 1
105 |         assert data.status.follow
106 |         # 电影
107 |         data = await method(client,
108 |                             "https://www.bilibili.com/bangumi/play/ss33343?theme=movie&spm_id_from=333.337.0.0")
109 |         assert data.title == '天气之子'
110 |         assert data.status.follow
111 |         # 纪录片
112 |         data = await method(client, "https://www.bilibili.com/bangumi/play/ss40509?from_spmid=666.9.hotlist.3")
113 |         assert len(data.pages) > 1
114 |         assert data.status.follow
115 | 
116 | 
117 | @pytest.mark.asyncio
118 | async def test_get_subtitle_info():
119 |     data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1hS4y1m7Ma")
120 |     data = await api.get_subtitle_info(client, data.bvid, data.cid)
121 |     assert data[0][0].startswith('http')
122 |     assert data[0][1]
123 | 
124 | 
125 | @pytest.mark.asyncio
126 | async def test_get_dm_info():
127 |     data = await api.get_video_info(client,
128 |                                     "https://www.bilibili.com/bangumi/play/ss33343?theme=movie&spm_id_from=333.337.0.0")
129 |     data = await api.get_dm_urls(client, data.aid, data.cid)
130 |     assert len(data) > 0
131 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/downloader_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.bilibili import DownloaderBilibili
 4 | 
 5 | 
 6 | @pytest.mark.asyncio
 7 | async def test_get_collect_or_list():
 8 |     d = DownloaderBilibili()
 9 |     await d.get_collect_or_list('https://space.bilibili.com/54296062/channel/collectiondetail?sid=412818&ctype=0',
10 |                                 quality=999)
11 |     await d.get_collect_or_list('https://space.bilibili.com/8251621/channel/seriesdetail?sid=2323334&ctype=0',
12 |                                 quality=999)
13 |     await d.aclose()
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_get_favour():
18 |     d = DownloaderBilibili()
19 |     await d.get_favour("69072721", num=1, quality=999)
20 |     await d.aclose()
21 | 
22 | 
23 | @pytest.mark.asyncio
24 | async def test_get_cate():
25 |     d = DownloaderBilibili()
26 |     await d.get_cate("宅舞", num=1, order="click", keyword="jk", quality=1)
27 |     await d.aclose()
28 | 
29 | 
30 | @pytest.mark.asyncio
31 | async def test_get_up():
32 |     d = DownloaderBilibili()
33 |     await d.get_up("455511061", num=1, order="pubdate", quality=1)
34 |     await d.aclose()
35 | 
36 | 
37 | @pytest.mark.asyncio
38 | async def test_get_series():
39 |     d = DownloaderBilibili()
40 |     await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(5, 5), quality=999)
41 |     # only audio
42 |     await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(5, 5), only_audio=True)
43 |     # image
44 |     await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(1, 1), image=True, quality=999)
45 |     # 单个视频
46 |     await d.get_series("https://www.bilibili.com/video/BV1sS4y1b7qb?spm_id_from=333.999.0.0", quality=999)
47 |     await d.aclose()
48 | 
49 | 
50 | @pytest.mark.asyncio
51 | async def test_get_dm():
52 |     d = DownloaderBilibili()
53 |     await d.get_dm('https://www.bilibili.com/video/BV11Z4y1z7s8?spm_id_from=333.337.search-card.all.click')
54 |     await d.aclose()
55 | 
56 | 
57 | @pytest.mark.asyncio
58 | async def test_get_subtitle():
59 |     d = DownloaderBilibili()
60 |     await d.get_subtitle("https://www.bilibili.com/video/BV1hS4y1m7Ma")
61 |     await d.aclose()
62 | 
63 | 
64 | @pytest.mark.asyncio
65 | async def test_choose_quality():
66 |     import os
67 |     from bilix.sites.bilibili import api
68 | 
69 |     client = httpx.AsyncClient()
70 |     client.cookies.set('SESSDATA', os.getenv('BILI_TOKEN'))
71 |     # dolby
72 |     data = await api.get_video_info(client, "https://www.bilibili.com/video/BV13L4y1K7th")
73 |     try:
74 |         video, audio = data.dash.choose_quality(quality=999, codec=":ec-3")
75 |     except KeyError:
76 |         assert not os.getenv("BILI_TOKEN")
77 |     # normal
78 |     data.dash.choose_quality(quality="360P", codec="hev")
79 |     # hi-res
80 |     data = await api.get_video_info(client, "https://www.bilibili.com/video/BV16K411S7sk")
81 |     try:
82 |         video, audio = data.dash.choose_quality(quality='1080P', codec="hev:fLaC")
83 |     except KeyError:
84 |         assert not os.getenv("BILI_TOKEN")
85 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/informer.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from typing import Tuple
 3 | from rich.tree import Tree
 4 | from .downloader import DownloaderBilibili
 5 | from . import api
 6 | from bilix.log import logger
 7 | from rich import print as rprint
 8 | from bilix.utils import convert_size
 9 | from bilix.download.utils import req_retry
10 | from bilix.cli.assign import kwargs_filter
11 | 
12 | 
13 | class InformerBilibili(DownloaderBilibili):
14 |     """A special downloader with functionality to log info of bilibili resources"""
15 | 
16 |     @classmethod
17 |     def parse_url(cls, url: str):
18 |         res = super().parse_url(url)
19 |         func_name = res.__name__.replace("get_", "info_")
20 |         return getattr(cls, func_name)
21 | 
22 |     async def info_key(self, key):
23 |         await self.parse_url(key)(self, key)
24 | 
25 |     async def info_up(self, url: str):
26 |         up_name, total_size, bvids = await api.get_up_video_info(self.client, url)
27 |         rprint(up_name)
28 | 
29 |     async def info_favour(self, url: str):
30 |         pass
31 | 
32 |     async def info_collect_or_list(self, url: str):
33 |         pass
34 | 
35 |     async def info_video(self, url: str):
36 |         video_info = await api.get_video_info(self.client, url)
37 |         if video_info.dash is None and video_info.other is None:
38 |             return logger.warning(f'{video_info.title} 需要大会员或该地区不支持')
39 |         elif video_info.other and video_info.dash is None:
40 |             return rprint(video_info.other)  # todo: beautify durl info
41 | 
42 |         async def ensure_size(m: api.Media):
43 |             if m.size is None:
44 |                 res = await req_retry(self.client, m.base_url, method='GET', headers={'Range': 'bytes=0-1'})
45 |                 m.size = int(res.headers['Content-Range'].split('/')[-1])
46 | 
47 |         dash = video_info.dash
48 |         cors = [ensure_size(m) for m in dash.videos] + [ensure_size(m) for m in dash.audios]
49 |         await asyncio.gather(*cors)
50 | 
51 |         tree = Tree(
52 |             f"[bold reverse] {video_info.title}-{video_info.pages[video_info.p].p_name} [/]"
53 |             f" {video_info.status.view:,}👀 {video_info.status.like:,}👍 {video_info.status.coin:,}🪙",
54 |             guide_style="bold cyan")
55 |         video_tree = tree.add("[bold]画面 Video")
56 |         audio_tree = tree.add("[bold]声音 Audio")
57 |         leaf_fmt = "codec: {codec:32} size: {size}"
58 |         # for video
59 |         for quality in dash.video_formats:
60 |             p_tree = video_tree.add(quality)
61 |             for c in dash.video_formats[quality]:
62 |                 m = dash.video_formats[quality][c]
63 |                 p_tree.add(leaf_fmt.format(codec=m.codec, size=convert_size(m.size)))
64 |             if len(p_tree.children) == 0:
65 |                 p_tree.style = "rgb(242,93,142)"
66 |                 p_tree.add("需要登录或大会员")
67 |         # for audio
68 |         name_map = {"default": "默认音质", "dolby": "杜比全景声 Dolby", "flac": "Hi-Res无损"}
69 |         for k in dash.audio_formats:
70 |             sub_tree = audio_tree.add(name_map[k])
71 |             if m := dash.audio_formats[k]:
72 |                 sub_tree.add(leaf_fmt.format(codec=m.codec, size=convert_size(m.size)))
73 |             else:
74 |                 sub_tree.style = "rgb(242,93,142)"
75 |                 sub_tree.add("需要登录或大会员")
76 |         rprint(tree)
77 | 
78 |     @classmethod
79 |     def handle(cls, method: str, keys: Tuple[str, ...], options: dict):
80 |         if cls.pattern.match(keys[0]) and 'info' == method:
81 |             informer = InformerBilibili(sess_data=options['cookie'], **kwargs_filter(cls, options))
82 | 
83 |             # in order to maintain order
84 |             async def temp():
85 |                 for key in keys:
86 |                     if len(keys) > 1:
87 |                         logger.info(f"For {key}")
88 |                     await informer.info_key(key)
89 | 
90 |             return informer, temp()
91 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/informer_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from bilix.sites.bilibili import InformerBilibili
 3 | 
 4 | informer = InformerBilibili()
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_bilibili_informer():
 9 |     await informer.info_video('https://www.bilibili.com/video/BV1sG411A7r3')
10 |     await informer.info_video('https://www.bilibili.com/video/BV1oG4y1Z7fx')
11 |     await informer.info_video('https://www.bilibili.com/video/BV1eV411W7tt')
12 |     await informer.info_video("https://www.bilibili.com/bangumi/play/ep508404/")
13 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def parse_ids_from_url(url_or_string: str):
 5 |     bvid, aid, page_num = None, None, 1
 6 |     if re.match(r'https?://www.bilibili.com/video/BV\w+', url_or_string) or re.match(r'BV\w+', url_or_string):
 7 |         bvid = re.search(r'(BV\w+)', url_or_string).groups()[0]
 8 |         assert bvid.isalnum()
 9 |     elif re.match(r'https?://www.bilibili.com/video/av\d+', url_or_string) or re.match(r'av\d+', url_or_string):
10 |         aid = re.search(r'av(\d+)', url_or_string).groups()[0]
11 |         assert aid.isdigit()
12 |         aid = int(aid)
13 |     else:
14 |         raise ValueError(f"{url_or_string} is not a valid bilibili video url")
15 |     # ?p=123 or &p=123
16 |     if m := re.match(r'.*[?&]p=(\d+)', url_or_string):
17 |         page_num = int(m.groups()[0])
18 |         assert page_num >= 1
19 |     return aid, bvid, page_num
20 | 


--------------------------------------------------------------------------------
/bilix/sites/bilibili/utils_test.py:
--------------------------------------------------------------------------------
 1 | from bilix.sites.bilibili.utils import parse_ids_from_url
 2 | 
 3 | 
 4 | def test_parse_ids_from_url():
 5 |     strings = [
 6 |         "https://www.bilibili.com/video/av170001",
 7 |         "http://www.bilibili.com/video/BV1Xx41117Tz/?ba=labala&p=3#time=1234",
 8 |         "av170001",
 9 |         "BV1sE411w7tQ?p=2&from=search",
10 |         "https://www.bilibili.com/video/BV1xx411c7HW?p=1"
11 |     ]
12 |     results = [
13 |         (170001, None, 1),
14 |         (None, 'BV1Xx41117Tz', 3),
15 |         (170001, None, 1),
16 |         (None, 'BV1sE411w7tQ', 2),
17 |         (None, 'BV1xx411c7HW', 1)
18 |     ]
19 |     for index, string in enumerate(strings):
20 |         assert parse_ids_from_url(string) == results[index]
21 | 


--------------------------------------------------------------------------------
/bilix/sites/cctv/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderCctv
2 | 
3 | __all__ = ['DownloaderCctv']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/cctv/api.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | import json
 4 | from typing import Sequence, Tuple
 5 | 
 6 | import httpx
 7 | import m3u8
 8 | 
 9 | from bilix.download.utils import req_retry, raise_api_error
10 | from bilix.utils import legal_title
11 | 
12 | dft_client_settings = {
13 |     'headers': {'user-agent': 'PostmanRuntime/7.29.0'},
14 |     'http2': True
15 | }
16 | 
17 | 
18 | @raise_api_error
19 | async def get_id(client: httpx.AsyncClient, url: str) -> Tuple[str, str, str]:
20 |     res_web = await req_retry(client, url)
21 |     pid = re.findall(r'guid ?= ?"(\w+)"', res_web.text)[0]
22 |     vide = re.findall(r'/(VIDE\w+)\.', url)[0]
23 |     try:
24 |         vida = re.findall(r'videotvCodes ?= ?"(\w+)"', res_web.text)[0]
25 |     except IndexError:
26 |         vida = None
27 |     return pid, vide, vida
28 | 
29 | 
30 | @raise_api_error
31 | async def get_media_info(client: httpx.AsyncClient, pid: str) -> Tuple[str, Sequence[str]]:
32 |     """
33 | 
34 |     :param pid:
35 |     :param client:
36 |     :return: title and m3u8 urls sorted by quality
37 |     """
38 |     res = await req_retry(client, f'https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={pid}')
39 |     info_data = json.loads(res.text)
40 |     # extract
41 |     title = legal_title(info_data['title'])
42 |     m3u8_main_url = info_data['hls_url']
43 |     res = await req_retry(client, m3u8_main_url)
44 |     m3u8_info = m3u8.loads(res.text)
45 |     if m3u8_info.base_uri is None:
46 |         m3u8_info.base_uri = re.match(r'(https?://[^/]*)/', m3u8_main_url).groups()[0]
47 |     m3u8_urls = list(sorted((i.absolute_uri for i in m3u8_info.playlists), reverse=True,
48 |                             key=lambda s: int(re.findall(r'/(\d+).m3u8', s)[0])))
49 |     return title, m3u8_urls
50 | 
51 | 
52 | @raise_api_error
53 | async def get_series_info(client: httpx.AsyncClient, vide: str, vida: str) -> Tuple[str, Sequence[str]]:
54 |     """
55 | 
56 |     :param vide:
57 |     :param vida:
58 |     :param client:
59 |     :return: title and list of guid(pid)
60 |     """
61 |     params = {'mode': 0, 'id': vida, 'serviceId': 'tvcctv', 'p': 1, 'n': 999}
62 |     res_meta, res_list = await asyncio.gather(
63 |         req_retry(client, f"https://api.cntv.cn/NewVideoset/getVideoAlbumInfoByVideoId?id={vide}&serviceId=tvcctv"),
64 |         req_retry(client, f'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew', params=params)
65 |     )
66 |     meta_data = json.loads(res_meta.text)
67 |     list_data = json.loads(res_list.text)
68 |     # extract
69 |     title = legal_title(meta_data['data']['title'])
70 |     pids = [i['guid'] for i in list_data['data']['list']]
71 |     return title, pids
72 | 


--------------------------------------------------------------------------------
/bilix/sites/cctv/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.cctv import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     pid, vide, vida = await api.get_id(client, "https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml")
11 |     data = await api.get_media_info(client, pid)
12 |     data = await api.get_series_info(client, vide, vida)
13 |     pass
14 | 


--------------------------------------------------------------------------------
/bilix/sites/cctv/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | from pathlib import Path
 4 | from typing import Union, Tuple
 5 | import httpx
 6 | 
 7 | from . import api
 8 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8
 9 | 
10 | 
11 | class DownloaderCctv(BaseDownloaderM3u8):
12 |     pattern = re.compile(r'https?://(?:tv\.cctv\.com|tv\.cctv\.cn)/?[?/](?:pid=)?(\d+)(?:&vid=(\d+))?(?:&v=(\d+))?')
13 | 
14 |     def __init__(
15 |             self,
16 |             *,
17 |             client: httpx.AsyncClient = None,
18 |             browser: str = None,
19 |             speed_limit: Union[float, int] = None,
20 |             stream_retry: int = 5,
21 |             progress=None,
22 |             logger=None,
23 |             part_concurrency: int = 10,
24 |             video_concurrency: Union[int, asyncio.Semaphore] = 3,
25 |             # unique params
26 |             hierarchy: bool = True,
27 |     ):
28 |         client = client or httpx.AsyncClient(**api.dft_client_settings)
29 |         super(DownloaderCctv, self).__init__(
30 |             client=client,
31 |             browser=browser,
32 |             speed_limit=speed_limit,
33 |             stream_retry=stream_retry,
34 |             progress=progress,
35 |             logger=logger,
36 |             part_concurrency=part_concurrency,
37 |             video_concurrency=video_concurrency,
38 |         )
39 |         self.hierarchy = hierarchy
40 | 
41 |     async def get_series(self, url: str, path=Path('.'), quality: int = 0):
42 |         """
43 |         :cli: short: s
44 |         :param url:
45 |         :param path:
46 |         :param quality:
47 |         :return:
48 |         """
49 |         pid, vide, vida = await api.get_id(self.client, url)
50 |         if vida is None:  # 单个视频
51 |             await self.get_video(pid, quality=quality)
52 |         else:  # 剧集
53 |             title, pids = await api.get_series_info(self.client, vide, vida)
54 |             if self.hierarchy:
55 |                 path /= title
56 |                 path.mkdir(parents=True, exist_ok=True)
57 |             await asyncio.gather(*[self.get_video(pid, path, quality) for pid in pids])
58 | 
59 |     async def get_video(self, url_or_pid: str, path=Path('.'), quality: int = 0, time_range: Tuple[int, int] = None):
60 |         """
61 |         :cli: short: v
62 |         :param url_or_pid:
63 |         :param path:
64 |         :param quality:
65 |         :param time_range:
66 |         :return:
67 |         """
68 |         if url_or_pid.startswith('http'):
69 |             pid, _, _ = await api.get_id(self.client, url_or_pid)
70 |         else:
71 |             pid = url_or_pid
72 |         title, m3u8_urls = await api.get_media_info(self.client, pid)
73 |         m3u8_url = m3u8_urls[min(quality, len(m3u8_urls) - 1)]
74 |         file_path = await self.get_m3u8_video(m3u8_url, path / f"{title}.mp4", time_range=time_range)
75 |         return file_path
76 | 


--------------------------------------------------------------------------------
/bilix/sites/douyin/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderDouyin
2 | 
3 | __all__ = ['DownloaderDouyin']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/douyin/api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Originally From
 3 | @Author: https://github.com/Evil0ctal/
 4 | https://github.com/Evil0ctal/Douyin_TikTok_Download_API
 5 | 
 6 | Modified by
 7 | @Author: https://github.com/HFrost0/
 8 | """
 9 | import asyncio
10 | import re
11 | import json
12 | from typing import List
13 | import httpx
14 | from pydantic import BaseModel
15 | from bilix.utils import legal_title
16 | from bilix.download.utils import req_retry, raise_api_error
17 | 
18 | dft_client_settings = {
19 |     'headers': {'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012)'
20 |                               ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile'
21 |                               ' Safari/537.36 Edg/87.0.664.66'},
22 |     'http2': True
23 | }
24 | 
25 | 
26 | class VideoInfo(BaseModel):
27 |     title: str
28 |     author_name: str
29 |     wm_urls: List[str]
30 |     nwm_urls: List[str]
31 |     cover: str
32 |     dynamic_cover: str
33 |     origin_cover: str
34 | 
35 | 
36 | @raise_api_error
37 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo:
38 |     if short_url := re.findall(r'https://v.douyin.com/\w+/', url):
39 |         res = await req_retry(client, short_url[0], follow_redirects=True)
40 |         url = str(res.url)
41 |     if key := re.search(r'/video/(\d+)', url):
42 |         key = key.groups()[0]
43 |     else:
44 |         key = re.search(r"modal_id=(\d+)", url).groups()[0]
45 |     res = await req_retry(client, f'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={key}')
46 |     data = json.loads(res.text)
47 |     data = data['item_list'][0]
48 |     # 视频标题
49 |     title = legal_title(data['desc'])
50 |     # 视频作者昵称
51 |     author_name = data['author']['nickname']
52 |     # 有水印视频链接
53 |     wm_urls = data['video']['play_addr']['url_list']
54 |     # 无水印视频链接 (在回执JSON中将关键字'playwm'替换为'play'即可获得无水印地址)
55 |     nwm_urls = list(map(lambda x: x.replace('playwm', 'play'), wm_urls))
56 |     # 视频封面
57 |     cover = data['video']['cover']['url_list'][0]
58 |     # 视频动态封面
59 |     dynamic_cover = data['video']['dynamic_cover']['url_list'][0]
60 |     # 视频原始封面
61 |     origin_cover = data['video']['origin_cover']['url_list'][0]
62 |     video_info = VideoInfo(title=title, author_name=author_name, wm_urls=wm_urls, nwm_urls=nwm_urls, cover=cover,
63 |                            dynamic_cover=dynamic_cover, origin_cover=origin_cover)
64 |     return video_info
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     async def main():
69 |         client = httpx.AsyncClient(**dft_client_settings)
70 |         data = await get_video_info(client, 'https://www.douyin.com/video/7132430286415252773')
71 |         print(data)
72 | 
73 | 
74 |     asyncio.run(main())
75 | 


--------------------------------------------------------------------------------
/bilix/sites/douyin/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.douyin import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "https://www.douyin.com/video/7132430286415252773")
11 |     pass
12 | 


--------------------------------------------------------------------------------
/bilix/sites/douyin/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | from pathlib import Path
 4 | from typing import Union
 5 | import httpx
 6 | from . import api
 7 | from bilix.download.base_downloader_part import BaseDownloaderPart
 8 | from bilix.utils import legal_title
 9 | 
10 | 
11 | class DownloaderDouyin(BaseDownloaderPart):
12 |     pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(douyin\.com)")
13 | 
14 |     def __init__(
15 |             self,
16 |             *,
17 |             client: httpx.AsyncClient = None,
18 |             browser: str = None,
19 |             speed_limit: Union[float, int, None] = None,
20 |             stream_retry: int = 5,
21 |             progress=None,
22 |             logger=None,
23 |             part_concurrency: int = 10,
24 |     ):
25 |         client = client or httpx.AsyncClient(**api.dft_client_settings)
26 |         super(DownloaderDouyin, self).__init__(
27 |             client=client,
28 |             browser=browser,
29 |             speed_limit=speed_limit,
30 |             stream_retry=stream_retry,
31 |             progress=progress,
32 |             logger=logger,
33 |             part_concurrency=part_concurrency,
34 |         )
35 | 
36 |     async def get_video(self, url: str, path=Path('.'), image=False):
37 |         """
38 |         :cli: short: v
39 |         :param url:
40 |         :param path:
41 |         :param image:
42 |         :return:
43 |         """
44 |         video_info = await api.get_video_info(self.client, url)
45 |         title = legal_title(video_info.author_name, video_info.title)
46 |         cors = [self.get_file(video_info.nwm_urls, path=path / f"{title}.mp4")]
47 |         if image:
48 |             cors.append(self.get_static(video_info.cover, path / title))
49 |         await asyncio.gather(*cors)
50 | 


--------------------------------------------------------------------------------
/bilix/sites/douyin/downloader_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from bilix.sites.douyin import DownloaderDouyin
 3 | 
 4 | 
 5 | @pytest.mark.asyncio
 6 | async def test_get_video():
 7 |     async with DownloaderDouyin() as d:
 8 |         await d.get_video('https://v.douyin.com/r4tm4Pe/')
 9 | 
10 | 


--------------------------------------------------------------------------------
/bilix/sites/hanime1/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderHanime1
2 | 
3 | __all__ = ['DownloaderHanime1']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/hanime1/api.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | import httpx
 3 | from bilix.utils import legal_title
 4 | from bilix.download.utils import req_retry, raise_api_error
 5 | from bs4 import BeautifulSoup
 6 | 
 7 | BASE_URL = "https://hanime1.me"
 8 | dft_client_settings = {
 9 |     'headers': {'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012)'
10 |                               ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile'
11 |                               ' Safari/537.36 Edg/87.0.664.66', "Referer": BASE_URL},
12 |     'http2': False
13 | }
14 | 
15 | 
16 | class VideoInfo(BaseModel):
17 |     url: str
18 |     avid: str
19 |     title: str
20 |     video_url: str
21 |     img_url: str
22 | 
23 | 
24 | @raise_api_error
25 | async def get_video_info(client: httpx.AsyncClient, url_or_avid: str) -> VideoInfo:
26 |     if url_or_avid.startswith('http'):
27 |         url = url_or_avid
28 |         avid = url.split('=')[-1]
29 |     else:
30 |         url = f'{BASE_URL}/watch?v={url_or_avid}'
31 |         avid = url_or_avid
32 |     res = await req_retry(client, url)
33 |     soup = BeautifulSoup(res.text, "html.parser")
34 |     title = soup.find('meta', property="og:title")['content']
35 |     title = legal_title(title)
36 |     img_url = soup.find('meta', property="og:image")['content']
37 |     video_url = soup.find('input', {'id': 'video-sd'})['value']
38 |     video_info = VideoInfo(url=url, avid=avid, title=title, img_url=img_url, video_url=video_url)
39 |     return video_info
40 | 


--------------------------------------------------------------------------------
/bilix/sites/hanime1/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.hanime1 import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "https://hanime1.me/watch?v=39123")
11 |     assert data.title
12 |     data = await api.get_video_info(client, "https://hanime1.me/watch?v=13658")
13 |     assert data.title
14 | 


--------------------------------------------------------------------------------
/bilix/sites/hanime1/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | from pathlib import Path
 4 | from typing import Union, Tuple
 5 | import httpx
 6 | from . import api
 7 | from bilix.download.base_downloader_part import BaseDownloaderPart
 8 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8
 9 | 
10 | 
11 | class DownloaderHanime1(BaseDownloaderM3u8, BaseDownloaderPart):
12 |     pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(hanime1\.me)")
13 | 
14 |     def __init__(
15 |             self,
16 |             *,
17 |             client: httpx.AsyncClient = None,
18 |             browser: str = None,
19 |             speed_limit: Union[float, int] = None,
20 |             stream_retry: int = 5,
21 |             progress=None,
22 |             logger=None,
23 |             part_concurrency: int = 10,
24 |             video_concurrency: Union[int, asyncio.Semaphore] = 3,
25 |     ):
26 |         self.client = client or httpx.AsyncClient(**api.dft_client_settings)
27 |         super().__init__(
28 |             client=self.client,
29 |             browser=browser,
30 |             speed_limit=speed_limit,
31 |             stream_retry=stream_retry,
32 |             progress=progress,
33 |             logger=logger,
34 |             part_concurrency=part_concurrency,
35 |             video_concurrency=video_concurrency,
36 |         )
37 | 
38 |     async def get_video(self, url: str, path=Path('.'), image=False, time_range: Tuple[int, int] = None):
39 |         """
40 |         :cli: short: v
41 |         :param url:
42 |         :param path:
43 |         :param image:
44 |         :param time_range:
45 |         :return:
46 |         """
47 |         video_info = await api.get_video_info(self.client, url)
48 |         video_url = video_info.video_url
49 |         cors = [
50 |             self.get_m3u8_video(
51 |                 video_url, path=path / f'{video_info.title}.mp4', time_range=time_range) if '.m3u8' in video_url else
52 |             self.get_file(video_url, path=path / f'{video_info.title}.mp4')]
53 |         if image:
54 |             cors.append(self.get_static(video_info.img_url, path=path / video_info.title))
55 |         await asyncio.gather(*cors)
56 | 


--------------------------------------------------------------------------------
/bilix/sites/jable/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderJable
2 | 
3 | __all__ = ['DownloaderJable']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/jable/api.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pydantic import BaseModel
 3 | import httpx
 4 | from bs4 import BeautifulSoup
 5 | from bilix.utils import legal_title
 6 | from bilix.download.utils import raise_api_error, req_retry
 7 | 
 8 | BASE_URL = "https://jable.tv"
 9 | dft_client_settings = {
10 |     'headers': {'user-agent': 'PostmanRuntime/7.29.0', "Referer": BASE_URL},
11 |     'http2': False
12 | }
13 | 
14 | 
15 | class VideoInfo(BaseModel):
16 |     url: str
17 |     avid: str
18 |     title: str
19 |     actor_name: str
20 |     m3u8_url: str
21 |     img_url: str
22 | 
23 | 
24 | @raise_api_error
25 | async def get_actor_info(client: httpx.AsyncClient, url: str):
26 |     res = await req_retry(client, url)
27 |     soup = BeautifulSoup(res.text, "html.parser")
28 |     actor_name = soup.find('h2', class_='h3-md mb-1').text
29 |     urls = [h6.a['href'] for h6 in soup.find('section', class_='pb-3 pb-e-lg-40').find_all('h6')]
30 |     return {'actor_name': actor_name, 'urls': urls}
31 | 
32 | 
33 | @raise_api_error
34 | async def get_video_info(client: httpx.AsyncClient, url_or_avid: str) -> VideoInfo:
35 |     if url_or_avid.startswith('http'):
36 |         url = url_or_avid
37 |         avid = url.split('/')[-2]
38 |     else:
39 |         url = f'{BASE_URL}/videos/{url_or_avid}/'
40 |         avid = url_or_avid
41 |     avid = avid.upper()
42 |     res = await req_retry(client, url)  # proxies default global in httpx
43 |     soup = BeautifulSoup(res.text, "html.parser")
44 |     title = soup.find('meta', property="og:title")['content']
45 |     title = legal_title(title)
46 |     if span := soup.find("span", class_="placeholder rounded-circle"):
47 |         actor_name = span['title']
48 |     else:  # https://github.com/HFrost0/bilix/issues/45  for some video actor name in different place
49 |         actor_name = soup.find("img", class_="avatar rounded-circle")['title']
50 |     img_url = soup.find('meta', property="og:image")['content']
51 |     m3u8_url = re.findall(r'http.*m3u8', res.text)[0]
52 |     video_info = VideoInfo(url=url, avid=avid, title=title, img_url=img_url, m3u8_url=m3u8_url, actor_name=actor_name)
53 |     return video_info
54 | 


--------------------------------------------------------------------------------
/bilix/sites/jable/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.jable import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "https://jable.tv/videos/ssis-533/")
11 |     assert data.actor_name
12 |     data = await api.get_video_info(client, "https://jable.tv/videos/ssis-448/")
13 |     assert data.actor_name
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_get_actor_info():
18 |     data = await api.get_actor_info(client, 'https://jable.tv/models/393ec3548aecc34004d54e03becd2ea9/')
19 |     assert data['actor_name'].encode('utf8') == b'\xe4\xbd\x90\xe4\xb9\x85\xe8\x89\xaf\xe5\x92\xb2\xe5\xb8\x8c'
20 |     assert data['urls']
21 | 


--------------------------------------------------------------------------------
/bilix/sites/jable/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | from pathlib import Path
 4 | from typing import Union, Tuple
 5 | import httpx
 6 | from . import api
 7 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8
 8 | 
 9 | 
10 | class DownloaderJable(BaseDownloaderM3u8):
11 |     pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(jable\.tv)")
12 | 
13 |     def __init__(
14 |             self,
15 |             *,
16 |             client: httpx.AsyncClient = None,
17 |             browser: str = None,
18 |             speed_limit: Union[float, int] = None,
19 |             stream_retry: int = 5,
20 |             progress=None,
21 |             logger=None,
22 |             part_concurrency: int = 10,
23 |             video_concurrency: Union[int, asyncio.Semaphore] = 3,
24 |             # unique params
25 |             hierarchy: bool = True,
26 | 
27 |     ):
28 |         client = client or httpx.AsyncClient(**api.dft_client_settings)
29 |         super(DownloaderJable, self).__init__(
30 |             client=client,
31 |             browser=browser,
32 |             speed_limit=speed_limit,
33 |             stream_retry=stream_retry,
34 |             progress=progress,
35 |             logger=logger,
36 |             part_concurrency=part_concurrency,
37 |             video_concurrency=video_concurrency,
38 |         )
39 |         self.hierarchy = hierarchy
40 | 
41 |     async def get_actor(self, url: str, path=Path("."), image=True):
42 |         """
43 |         download videos of a actor
44 |         :cli: short: a
45 |         :param url: actor page url
46 |         :param path: save path
47 |         :param image: download cover
48 |         :return:
49 |         """
50 |         data = await api.get_actor_info(self.client, url)
51 |         if self.hierarchy:
52 |             path /= data['actor_name']
53 |             path.mkdir(parents=True, exist_ok=True)
54 |         await asyncio.gather(*[self.get_video(url, path, image) for url in data['urls']])
55 | 
56 |     async def get_video(self, url: str, path=Path("."), image=True, time_range: Tuple[int, int] = None):
57 |         """
58 |         :cli: short: v
59 |         :param url:
60 |         :param path:
61 |         :param image:
62 |         :param time_range:
63 |         :return:
64 |         """
65 |         video_info = await api.get_video_info(self.client, url)
66 |         if self.hierarchy:
67 |             path /= f"{video_info.avid} {video_info.actor_name}"
68 |             path.mkdir(parents=True, exist_ok=True)
69 |         cors = [self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f"{video_info.title}.mp4",
70 |                                     time_range=time_range)]
71 |         if image:
72 |             cors.append(self.get_static(video_info.img_url, path=path / video_info.title, ))
73 |         await asyncio.gather(*cors)
74 | 


--------------------------------------------------------------------------------
/bilix/sites/tiktok/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderTiktok
2 | 
3 | __all__ = ['DownloaderTiktok']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/tiktok/api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Originally From
 3 | @Author: https://github.com/Evil0ctal/
 4 | https://github.com/Evil0ctal/Douyin_TikTok_Download_API
 5 | """
 6 | 
 7 | import re
 8 | import json
 9 | import random
10 | from typing import List
11 | import httpx
12 | from pydantic import BaseModel
13 | from bilix.utils import legal_title
14 | from bilix.download.utils import req_retry, raise_api_error
15 | 
16 | dft_client_settings = {
17 |     'headers': {'user-agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;'
18 |                               '+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+'
19 |                               '(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'},
20 |     'http2': True
21 | }
22 | 
23 | 
24 | class VideoInfo(BaseModel):
25 |     title: str
26 |     author_name: str
27 |     wm_urls: List[str]
28 |     nwm_urls: List[str]
29 |     cover: str
30 |     dynamic_cover: str
31 |     origin_cover: str
32 | 
33 | 
34 | @raise_api_error
35 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo:
36 |     if short_url := re.findall(r'https://www.tiktok.com/t/\w+/', url):
37 |         res = await req_retry(client, short_url[0], follow_redirects=True)
38 |         url = str(res.url)
39 |     if key := re.search(r'/video/(\d+)', url):
40 |         key = key.groups()[0]
41 |     else:
42 |         key = re.search(r"/v/(\d+)", url).groups()[0]
43 |     params = {'aweme_id': key, 'aid': 1180, 'iid': 6165993682518218889,
44 |               'device_id': random.randint(10 * 10 * 10, 9 * 10 ** 10)}
45 |     res = await req_retry(client, 'https://api16-normal-c-useast1a.tiktokv.com/aweme/v1/feed/', params=params)
46 |     data = json.loads(res.text)
47 |     data = data['aweme_list'][0]
48 |     # 视频标题 (如果为空则使用分享标题)
49 |     title = legal_title(data['desc'] if data['desc'] != '' else data['share_info']['share_title'])
50 |     # 视频作者昵称
51 |     author_name = data['author']['nickname']
52 |     # 有水印视频链接
53 |     wm_urls = data['video']['download_addr']['url_list']
54 |     # 无水印视频链接
55 |     nwm_urls = data['video']['bit_rate'][0]['play_addr']['url_list']
56 |     # 视频封面
57 |     cover = data['video']['cover']['url_list'][0]
58 |     # 视频动态封面
59 |     dynamic_cover = data['video']['dynamic_cover']['url_list'][0]
60 |     # 视频原始封面
61 |     origin_cover = data['video']['origin_cover']['url_list'][0]
62 |     video_info = VideoInfo(title=title, author_name=author_name, wm_urls=wm_urls, nwm_urls=nwm_urls, cover=cover,
63 |                            dynamic_cover=dynamic_cover, origin_cover=origin_cover)
64 |     return video_info
65 | 


--------------------------------------------------------------------------------
/bilix/sites/tiktok/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.tiktok import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "https://www.tiktok.com/@lindaselection/video/7171715528124271877")
11 |     assert data.nwm_urls
12 | 


--------------------------------------------------------------------------------
/bilix/sites/tiktok/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | from pathlib import Path
 4 | from typing import Union
 5 | import httpx
 6 | from . import api
 7 | from bilix.download.base_downloader_part import BaseDownloaderPart
 8 | from bilix.utils import legal_title
 9 | 
10 | 
11 | class DownloaderTiktok(BaseDownloaderPart):
12 |     pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(titok\.com)")
13 | 
14 |     def __init__(
15 |             self,
16 |             *,
17 |             client: httpx.AsyncClient = None,
18 |             browser: str = None,
19 |             speed_limit: Union[float, int, None] = None,
20 |             stream_retry: int = 5,
21 |             progress=None,
22 |             logger=None,
23 |             part_concurrency: int = 10,
24 |     ):
25 |         client = client or httpx.AsyncClient(**api.dft_client_settings)
26 |         super(DownloaderTiktok, self).__init__(
27 |             client=client,
28 |             browser=browser,
29 |             speed_limit=speed_limit,
30 |             stream_retry=stream_retry,
31 |             progress=progress,
32 |             logger=logger,
33 |             part_concurrency=part_concurrency,
34 |         )
35 | 
36 |     async def get_video(self, url: str, path=Path('.'), image=False):
37 |         """
38 |         :cli: short: v
39 |         :param url:
40 |         :param path:
41 |         :param image:
42 |         :return:
43 |         """
44 |         video_info = await api.get_video_info(self.client, url)
45 |         title = legal_title(video_info.author_name, video_info.title)
46 |         # since TikTok backup not fast enough some time, use the first one
47 |         cors = [self.get_file(video_info.nwm_urls[0], path / f'{title}.mp4')]
48 |         if image:
49 |             cors.append(self.get_static(video_info.cover, path=path / title, ))
50 |         await asyncio.gather(*cors)
51 | 


--------------------------------------------------------------------------------
/bilix/sites/tiktok/downloader_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from bilix.sites.tiktok import DownloaderTiktok
 3 | 
 4 | 
 5 | @pytest.mark.asyncio
 6 | async def test_get_video():
 7 |     async with DownloaderTiktok() as d:
 8 |         await d.get_video('https://www.tiktok.com/@evil0ctal/video/7168978761973550378')
 9 | 
10 | 


--------------------------------------------------------------------------------
/bilix/sites/yhdmp/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderYhdmp
2 | 
3 | __all__ = ['DownloaderYhdmp']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/yhdmp/api.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import random
  4 | import re
  5 | from pathlib import Path
  6 | from pydantic import BaseModel
  7 | from typing import Union, List
  8 | import httpx
  9 | import execjs
 10 | from bs4 import BeautifulSoup
 11 | from bilix.utils import legal_title
 12 | from bilix.download.utils import req_retry as rr, raise_api_error
 13 | 
 14 | BASE_URL = "https://www.yhdmp.cc"
 15 | dft_client_settings = {
 16 |     'headers': {'user-agent': 'PostmanRuntime/7.29.0', "Referer": BASE_URL},
 17 |     'http2': False
 18 | }
 19 | _js = None
 20 | 
 21 | 
 22 | def _get_js():
 23 |     global _js
 24 |     if _js is None:
 25 |         with open(Path(__file__).parent / 'yhdmp.js', 'r') as f:
 26 |             _js = execjs.compile(f.read())
 27 |     return _js
 28 | 
 29 | 
 30 | def _get_t2_k2(t1: str, k1: str) -> dict:
 31 |     new_cookies = _get_js().call("get_t2_k2", t1, k1)
 32 |     return new_cookies
 33 | 
 34 | 
 35 | def _decode(data: str) -> str:
 36 |     return _get_js().call('__getplay_rev_data', data)
 37 | 
 38 | 
 39 | async def req_retry(client: httpx.AsyncClient, url_or_urls: Union[str, List[str]],
 40 |                     method: str = 'GET',
 41 |                     follow_redirects: bool = False,
 42 |                     **kwargs):
 43 |     if 't1' in client.cookies and 'k1' in client.cookies:
 44 |         new_cookies = _get_t2_k2(client.cookies['t1'], client.cookies['k1'])
 45 |         if 't2' in client.cookies:
 46 |             client.cookies.delete('t2')
 47 |         if 'k2' in client.cookies:
 48 |             client.cookies.delete('k2')
 49 |         client.cookies.update(new_cookies)
 50 | 
 51 |     res = await rr(client, url_or_urls, method, follow_redirects, **kwargs)
 52 |     return res
 53 | 
 54 | 
 55 | class VideoInfo(BaseModel):
 56 |     aid: Union[str, int]
 57 |     play_idx: int
 58 |     ep_idx: int
 59 |     title: str
 60 |     sub_title: str
 61 |     play_info: List[Union[List[str], List]]  # may be empty
 62 |     m3u8_url: str
 63 | 
 64 | 
 65 | @raise_api_error
 66 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo:
 67 |     aid, play_idx, ep_idx = url.split('/')[-1].split('.')[0].split('-')
 68 |     play_idx, ep_idx = int(play_idx), int(ep_idx)
 69 |     # request
 70 |     res_web = req_retry(client, url)
 71 |     m3u8_url = get_m3u8_url(url=url, client=client)
 72 |     if 't1' in client.cookies and 'k1' in client.cookies:
 73 |         res_web, m3u8_url = await asyncio.gather(res_web, m3u8_url)
 74 |     else:
 75 |         res_web, m3u8_url = await res_web, await m3u8_url
 76 |     # extract
 77 |     title, sub_title = map(legal_title,
 78 |                            re.search(r'target="_self">([^<]+)</a><span>:([^<]+)</span>', res_web.text).groups())
 79 |     soup = BeautifulSoup(res_web.text, 'html.parser')
 80 |     divs = soup.find_all('div', class_="movurl")
 81 |     play_info = []
 82 |     for div in divs:
 83 |         play_info.append([[legal_title(a["title"]), f"{BASE_URL}/{a['href']}"] for a in div.find_all("a")])
 84 |     video_info = VideoInfo(aid=aid, play_idx=play_idx, ep_idx=ep_idx, title=title, sub_title=sub_title,
 85 |                            play_info=play_info, m3u8_url=m3u8_url)
 86 |     return video_info
 87 | 
 88 | 
 89 | @raise_api_error
 90 | async def get_m3u8_url(client: httpx.AsyncClient, url):
 91 |     aid, play_idx, ep_idx = url.split('/')[-1].split('.')[0].split('-')
 92 |     params = {"aid": aid, "playindex": play_idx, "epindex": ep_idx, "r": random.random()}
 93 |     res_play = await req_retry(client, f"{BASE_URL}/_getplay", params=params)
 94 |     if res_play.text.startswith("err"):  # maybe first time
 95 |         res_play = await req_retry(client, f"{BASE_URL}/_getplay", params=params)
 96 |     data = json.loads(res_play.text)
 97 |     purl, vurl = _decode(data['purl']), _decode(data['vurl'])
 98 |     m3u8_url = purl.split("url=")[-1] + vurl
 99 |     return m3u8_url
100 | 


--------------------------------------------------------------------------------
/bilix/sites/yhdmp/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.yhdmp import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "https://www.yhdmp.cc/vp/22224-1-0.html")
11 |     data = await api.get_m3u8_url(client, "https://www.yhdmp.cc/vp/22224-1-0.html")
12 |     pass
13 | 


--------------------------------------------------------------------------------
/bilix/sites/yhdmp/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from pathlib import Path
 3 | import httpx
 4 | from typing import Sequence, Union, Tuple
 5 | from . import api
 6 | from bilix.utils import legal_title, cors_slice
 7 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8
 8 | 
 9 | 
10 | class DownloaderYhdmp(BaseDownloaderM3u8):
11 |     def __init__(
12 |             self,
13 |             *,
14 |             api_client: httpx.AsyncClient = None,
15 |             stream_client: httpx.AsyncClient = None,
16 |             browser: str = None,
17 |             speed_limit: Union[float, int] = None,
18 |             stream_retry: int = 5,
19 |             progress=None,
20 |             logger=None,
21 |             part_concurrency: int = 10,
22 |             video_concurrency: Union[int, asyncio.Semaphore] = 3,
23 |             hierarchy: bool = True,
24 |     ):
25 |         stream_client = stream_client or httpx.AsyncClient()
26 |         super(DownloaderYhdmp, self).__init__(
27 |             client=stream_client,
28 |             browser=browser,
29 |             speed_limit=speed_limit,
30 |             stream_retry=stream_retry,
31 |             progress=progress,
32 |             logger=logger,
33 |             part_concurrency=part_concurrency,
34 |             video_concurrency=video_concurrency,
35 |         )
36 |         self.api_client = api_client or httpx.AsyncClient(**api.dft_client_settings)
37 |         self.hierarchy = hierarchy
38 | 
39 |     async def get_series(self, url: str, path=Path('.'), p_range: Sequence[int] = None):
40 |         """
41 |         :cli: short: s
42 |         :param url:
43 |         :param path:
44 |         :param p_range:
45 |         :return:
46 |         """
47 |         video_info = await api.get_video_info(self.api_client, url)
48 |         ep_idx = video_info.ep_idx
49 |         play_idx = video_info.play_idx
50 |         title = video_info.title
51 |         if self.hierarchy:
52 |             path = path / title
53 |             path.mkdir(parents=True, exist_ok=True)
54 | 
55 |         # no need to reuse get_video since we only need m3u8_url
56 |         async def get_video(page_url, name):
57 |             m3u8_url = await api.get_m3u8_url(self.api_client, page_url)
58 |             await self.get_m3u8_video(m3u8_url=m3u8_url, path=path / name)
59 | 
60 |         cors = []
61 |         for idx, (sub_title, url) in enumerate(video_info.play_info[play_idx]):
62 |             if ep_idx == idx:
63 |                 cors.append(self.get_m3u8_video(m3u8_url=video_info.m3u8_url,
64 |                                                 path=path / f'{legal_title(title, sub_title)}.mp4'))
65 |             else:
66 |                 cors.append(get_video(url, legal_title(title, sub_title)))
67 |         if p_range:
68 |             cors = cors_slice(cors, p_range)
69 |         await asyncio.gather(*cors)
70 | 
71 |     async def get_video(self, url: str, path=Path('.'), time_range=None):
72 |         """
73 |         :cli: short: v
74 |         :param url:
75 |         :param path:
76 |         :param time_range:
77 |         :return:
78 |         """
79 |         video_info = await api.get_video_info(self.api_client, url)
80 |         name = legal_title(video_info.title, video_info.sub_title)
81 |         await self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{name}.mp4', time_range=time_range)
82 | 
83 |     @classmethod
84 |     def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict) -> bool:
85 |         return 'yhdmp' in keys[0]
86 | 


--------------------------------------------------------------------------------
/bilix/sites/yinghuacd/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderYinghuacd
2 | 
3 | __all__ = ['DownloaderYinghuacd']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/yinghuacd/api.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pydantic import BaseModel
 3 | from typing import Union, List
 4 | import httpx
 5 | from bs4 import BeautifulSoup
 6 | from bilix.download.utils import req_retry, raise_api_error
 7 | 
 8 | BASE_URL = "http://www.yinghuacd.com"
 9 | dft_client_settings = {
10 |     'headers': {'user-agent': 'PostmanRuntime/7.29.0'},
11 |     'http2': False
12 | }
13 | 
14 | 
15 | class VideoInfo(BaseModel):
16 |     title: str
17 |     sub_title: str
18 |     play_info: List[Union[List[str], List]]  # may be empty
19 |     m3u8_url: str
20 | 
21 | 
22 | @raise_api_error
23 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo:
24 |     # request
25 |     res = await req_retry(client, url)
26 |     m3u8_url = re.search(r'http.*m3u8', res.text)[0]
27 |     soup = BeautifulSoup(res.text, 'html.parser')
28 |     h1 = soup.find('h1')
29 |     title, sub_title = h1.a.text, h1.span.text[1:]
30 | 
31 |     # extract
32 |     play_info = [[a.text, f"{BASE_URL}{a['href']}"] for a in soup.find('div', class_="movurls").find_all('a')]
33 |     video_info = VideoInfo(title=title, sub_title=sub_title, play_info=play_info, m3u8_url=m3u8_url)
34 |     return video_info
35 | 


--------------------------------------------------------------------------------
/bilix/sites/yinghuacd/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.yinghuacd import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "http://www.yinghuacd.com/v/5606-7.html")
11 |     pass
12 | 


--------------------------------------------------------------------------------
/bilix/sites/yinghuacd/downloader.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from pathlib import Path
 3 | import httpx
 4 | import re
 5 | from m3u8 import Segment
 6 | from typing import Sequence, Union, Tuple
 7 | from . import api
 8 | from bilix.utils import legal_title, cors_slice
 9 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8
10 | from bilix.exception import APIError
11 | 
12 | 
13 | class DownloaderYinghuacd(BaseDownloaderM3u8):
14 |     def __init__(
15 |             self,
16 |             *,
17 |             stream_client: httpx.AsyncClient = None,
18 |             api_client: httpx.AsyncClient = None,
19 |             browser: str = None,
20 |             speed_limit: Union[float, int] = None,
21 |             stream_retry: int = 5,
22 |             progress=None,
23 |             logger=None,
24 |             part_concurrency: int = 10,
25 |             video_concurrency: Union[int, asyncio.Semaphore] = 3,
26 |             hierarchy: bool = True,
27 |     ):
28 |         stream_client = stream_client or httpx.AsyncClient()
29 |         super(DownloaderYinghuacd, self).__init__(
30 |             client=stream_client,
31 |             browser=browser,
32 |             speed_limit=speed_limit,
33 |             stream_retry=stream_retry,
34 |             progress=progress,
35 |             logger=logger,
36 |             part_concurrency=part_concurrency,
37 |             video_concurrency=video_concurrency,
38 |         )
39 |         self.api_client = api_client or httpx.AsyncClient(**api.dft_client_settings)
40 |         self.hierarchy = hierarchy
41 | 
42 |     def _after_seg(self, seg: Segment, content: bytearray) -> bytearray:
43 |         # in case .png
44 |         if re.fullmatch(r'.*\.png', seg.absolute_uri):
45 |             _, _, content = content.partition(b'\x47\x40')
46 |         return content
47 | 
48 |     async def get_series(self, url: str, path=Path("."), p_range: Sequence[int] = None):
49 |         """
50 |         :cli: short: s
51 |         :param url:
52 |         :param path:
53 |         :param p_range:
54 |         :return:
55 |         """
56 |         video_info = await api.get_video_info(self.api_client, url)
57 |         if self.hierarchy:
58 |             path /= video_info.title
59 |             path.mkdir(parents=True, exist_ok=True)
60 |         cors = [self.get_video(u, path=path, video_info=video_info if u == url else None)
61 |                 for _, u in video_info.play_info]
62 |         if p_range:
63 |             cors = cors_slice(cors, p_range)
64 |         await asyncio.gather(*cors)
65 | 
66 |     async def get_video(self, url: str, path=Path('.'), time_range=None, video_info=None):
67 |         """
68 |         :cli: short: v
69 |         :param url:
70 |         :param path:
71 |         :param time_range:
72 |         :param video_info:
73 |         :return:
74 |         """
75 |         if video_info is None:
76 |             try:
77 |                 video_info = await api.get_video_info(self.api_client, url)
78 |             except APIError as e:
79 |                 return self.logger.error(e)
80 |         else:
81 |             video_info = video_info
82 |         name = legal_title(video_info.title, video_info.sub_title)
83 |         await self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{name}.mp4', time_range=time_range)
84 | 
85 |     @classmethod
86 |     def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict):
87 |         return 'yinghuacd' in keys[0]
88 | 


--------------------------------------------------------------------------------
/bilix/sites/youtube/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import DownloaderYoutube
2 | 
3 | __all__ = ['DownloaderYoutube']
4 | 


--------------------------------------------------------------------------------
/bilix/sites/youtube/api.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import json
 3 | from pydantic import BaseModel
 4 | import httpx
 5 | from bilix.download.utils import req_retry
 6 | from bilix.utils import legal_title
 7 | 
 8 | dft_client_settings = {
 9 |     'headers': {
10 |         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
11 |                       'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
12 |         'referer': 'https://www.youtube.com/'
13 |     },
14 | }
15 | 
16 | 
17 | class VideoInfo(BaseModel):
18 |     # url: str
19 |     title: str
20 |     video_url: str
21 |     audio_url: str
22 |     # img_url: str
23 | 
24 | 
25 | async def get_video_info(client: httpx.AsyncClient, url: str):
26 |     response = await req_retry(client=client, url_or_urls=url)
27 |     # 解析
28 |     json_str = re.findall('var ytInitialPlayerResponse = (.*?);var', response.text)[0]
29 |     json_data = json.loads(json_str)
30 |     video_url = json_data['streamingData']['adaptiveFormats'][0]['url']
31 |     audio_url = json_data['streamingData']['adaptiveFormats'][-2]['url']
32 |     title = legal_title(json_data['videoDetails']['title'])
33 |     video_info = VideoInfo(video_url=video_url, audio_url=audio_url, title=title)
34 |     return video_info
35 | 


--------------------------------------------------------------------------------
/bilix/sites/youtube/api_test.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import pytest
 3 | from bilix.sites.youtube import api
 4 | 
 5 | client = httpx.AsyncClient(**api.dft_client_settings)
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_get_video_info():
10 |     data = await api.get_video_info(client, "https://www.youtube.com/watch?v=26lanyBFXw8")
11 |     assert data.video_url and data.audio_url and data.title
12 | 


--------------------------------------------------------------------------------
/bilix/sites/youtube/downloader.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import asyncio
 3 | from pathlib import Path
 4 | from typing import Union
 5 | import httpx
 6 | from . import api
 7 | from bilix.download.base_downloader_part import BaseDownloaderPart
 8 | from bilix import ffmpeg
 9 | 
10 | 
11 | class DownloaderYoutube(BaseDownloaderPart):
12 |     pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(youtube\.com)")
13 | 
14 |     def __init__(
15 |             self,
16 |             *,
17 |             client: httpx.AsyncClient = None,
18 |             browser: str = None,
19 |             speed_limit: Union[float, int] = None,
20 |             stream_retry: int = 5,
21 |             progress=None,
22 |             logger=None,
23 |             part_concurrency: int = 10,
24 |             # unique params
25 |             video_concurrency: Union[int, asyncio.Semaphore] = 3
26 |     ):
27 |         client = client or httpx.AsyncClient(**api.dft_client_settings)
28 |         super(DownloaderYoutube, self).__init__(
29 |             client=client,
30 |             browser=browser,
31 |             speed_limit=speed_limit,
32 |             stream_retry=stream_retry,
33 |             progress=progress,
34 |             logger=logger,
35 |             part_concurrency=part_concurrency
36 |         )
37 |         self.video_sema = asyncio.Semaphore(video_concurrency) if type(video_concurrency) is int else video_concurrency
38 | 
39 |     async def get_video(self, url: str, path=Path('.')):
40 |         """
41 |         :cli: short: v
42 |         :param url
43 |         :param path:
44 |         :return:
45 |         """
46 |         async with self.video_sema:
47 |             video_info = await api.get_video_info(self.client, url)
48 |             video_path = path / (video_info.title + '.mp4')
49 |             if video_path.exists():
50 |                 return self.logger.info(f'[green]已存在[/green] {video_path.name}')
51 |             task_id = await self.progress.add_task(description=video_info.title, upper=True)
52 |             path_lst = await asyncio.gather(
53 |                 self.get_file(url_or_urls=video_info.video_url, path=path / (video_info.title + '-v'), task_id=task_id),
54 |                 self.get_file(url_or_urls=video_info.audio_url, path=path / (video_info.title + '-a'), task_id=task_id)
55 |             )
56 |         await ffmpeg.combine(path_lst, output_path=path / (video_info.title + '.mp4'))
57 |         self.logger.info(f'[cyan]已完成[/cyan] {video_path.name}')
58 |         await self.progress.update(task_id=task_id, visible=False)
59 | 


--------------------------------------------------------------------------------
/bilix/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | some useful functions
  3 | """
  4 | import html
  5 | import json
  6 | import re
  7 | import time
  8 | from functools import wraps
  9 | from urllib.parse import quote_plus
 10 | from typing import Union, Sequence, Coroutine, List, Tuple, Optional
 11 | from bilix.log import logger
 12 | 
 13 | 
 14 | def cors_slice(cors: Sequence[Coroutine], p_range: Sequence[int]):
 15 |     h, t = p_range[0] - 1, p_range[1]
 16 |     assert 0 <= h <= t
 17 |     [cor.close() for idx, cor in enumerate(cors) if idx < h or idx >= t]  # avoid runtime warning
 18 |     cors = cors[h:t]
 19 |     return cors
 20 | 
 21 | 
 22 | def legal_title(*parts: str, join_str: str = '-'):
 23 |     """
 24 |     join several string parts to os illegal file/dir name (no illegal character and not too long).
 25 |     auto skip empty.
 26 | 
 27 |     :param parts:
 28 |     :param join_str: the string to join each part
 29 |     :return:
 30 |     """
 31 |     return join_str.join(filter(lambda x: len(x) > 0, map(replace_illegal, parts)))
 32 | 
 33 | 
 34 | def replace_illegal(s: str):
 35 |     """strip, unescape html and replace os illegal character in s"""
 36 |     s = s.strip()
 37 |     s = html.unescape(s)  # handel & "...
 38 |     s = re.sub(r"[/\\:*?\"<>|\n\t]", '', s)  # replace illegal filename character
 39 |     return s
 40 | 
 41 | 
 42 | def convert_size(total_bytes: int) -> str:
 43 |     unit, suffix = pick_unit_and_suffix(
 44 |         total_bytes, ["bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"], 1000
 45 |     )
 46 |     return f"{total_bytes / unit:,.2f}{suffix}"
 47 | 
 48 | 
 49 | def pick_unit_and_suffix(size: int, suffixes: List[str], base: int) -> Tuple[int, str]:
 50 |     """Borrowed from rich.filesize. Pick a suffix and base for the given size."""
 51 |     for i, suffix in enumerate(suffixes):
 52 |         unit = base ** i
 53 |         if size < unit * base:
 54 |             break
 55 |     else:
 56 |         raise ValueError('Invalid input')
 57 |     return unit, suffix
 58 | 
 59 | 
 60 | def parse_bytes_str(s: str) -> float:
 61 |     """"Parse a string byte quantity into an integer"""
 62 |     units_map = {unit: i for i, unit in enumerate(['', *'KMGTPEZY'])}
 63 |     units_re = '|'.join(units_map.keys())
 64 |     m = re.fullmatch(rf'(?P<num>\d+(?:\.\d+)?)\s*(?P<unit>{units_re})B?', s)
 65 |     if not m:
 66 |         raise ValueError(f"Invalid bytes str {s} to parse to number")
 67 |     num = float(m.group('num'))
 68 |     mult = 1000 ** units_map[m.group('unit')]
 69 |     return num * mult
 70 | 
 71 | 
 72 | def valid_sess_data(sess_data: Optional[str]) -> str:
 73 |     """check and encode sess_data"""
 74 |     # url-encoding sess_data if it's not encoded
 75 |     # https://github.com/HFrost0/bilix/pull/114https://github.com/HFrost0/bilix/pull/114
 76 |     if sess_data and not re.search(r'(%[0-9A-Fa-f]{2})|(\+)', sess_data):
 77 |         sess_data = quote_plus(sess_data)
 78 |         logger.debug(f"sess_data encoded: {sess_data}")
 79 |     return sess_data
 80 | 
 81 | 
 82 | def t2s(t: int) -> str:
 83 |     return str(t)
 84 | 
 85 | 
 86 | def s2t(s: str) -> int:
 87 |     """
 88 |     :param s: hour:minute:second or xx(s) format input
 89 |     :return:
 90 |     """
 91 |     if ':' not in s:
 92 |         return int(s)
 93 |     h, m, s = map(int, s.split(':'))
 94 |     return h * 60 * 60 + m * 60 + s
 95 | 
 96 | 
 97 | def json2srt(data: Union[bytes, str, dict]):
 98 |     b = False
 99 |     if type(data) is bytes:
100 |         data = data.decode('utf-8')
101 |         b = True
102 |     if type(data) is str:
103 |         data = json.loads(data)
104 | 
105 |     def t2str(t):
106 |         ms = int(round(t % 1, 3) * 1000)
107 |         s = int(t)
108 |         m = s // 60
109 |         h = m // 60
110 |         m, s = m % 60, s % 60
111 |         t_str = f'{h:0>2}:{m:0>2}:{s:0>2},{ms:0>3}'
112 |         return t_str
113 | 
114 |     res = ''
115 |     for idx, i in enumerate(data['body']):
116 |         from_time, to_time = t2str(i['from']), t2str(i['to'])
117 |         content = i['content']
118 |         res += f"{idx + 1}\n{from_time} --> {to_time}\n{content}\n\n"
119 |     return res.encode('utf-8') if b else res
120 | 
121 | 
122 | def timer(func):
123 |     @wraps(func)
124 |     def wrapper(*args, **kwargs):
125 |         start = time.monotonic_ns()
126 |         res = func(*args, **kwargs)
127 |         logger.debug(
128 |             f"{func.__name__} cost {time.monotonic_ns() - start} ns with args: {args}, kwargs: {kwargs} result: {res}")
129 |         return res
130 | 
131 |     return wrapper
132 | 


--------------------------------------------------------------------------------
/docs/.vitepress/config.ts:
--------------------------------------------------------------------------------
 1 | import {defineConfig} from 'vitepress'
 2 | 
 3 | // https://vitepress.dev/reference/site-config
 4 | export default defineConfig({
 5 |   title: "bilix",
 6 |   description: "bilix download",
 7 |   base: '/bilix/',
 8 |   lastUpdated: true,
 9 |   themeConfig: {
10 |     // https://vitepress.dev/reference/default-theme-config
11 |     editLink: {
12 |       pattern: 'https://github.com/HFrost0/bilix/edit/master/docs/:path'
13 |     },
14 |     algolia: {
15 |       appId: 'F4ZDY9KUXU',
16 |       apiKey: '30aaace8ddea0d6f25ac39ea70ce8bd8',
17 |       indexName: 'bilix'
18 |     },
19 |     footer: {
20 |       message: 'Released under the Apache 2.0 License.',
21 |       copyright: 'Copyright © 2022-present HFrost0'
22 |     },
23 |     socialLinks: [
24 |       {icon: 'github', link: 'https://github.com/HFrost0/bilix'}
25 |     ]
26 |   },
27 | 
28 |   locales: {
29 |     root: {
30 |       label: '中文',
31 |       lang: 'zh',
32 |       themeConfig: {
33 |         nav: [
34 |           {text: 'Home', link: '/'},
35 |           {text: '安装', link: '/install'},
36 |           {text: '快速上手', link: '/quickstart'}
37 |         ],
38 |         sidebar: [
39 |           {text: '安装', link: '/install'},
40 |           {text: '快速上手', link: '/quickstart'},
41 |           {text: '进阶使用', link: '/advance_guide'},
42 |           {
43 |             text: 'Python调用',
44 |             items: [
45 |               {text: '异步基础', link: '/async'},
46 |               {text: '下载案例', link: '/download_examples'},
47 |               {text: 'API案例', link: '/api_examples'}
48 |             ]
49 |           },
50 |           {text: '更多', link: '/more'},
51 |         ],
52 |       }
53 |     },
54 | 
55 |     en: {
56 |       label: 'English',
57 |       lang: 'en', // optional, will be added  as `lang` attribute on `html` tag
58 |       themeConfig: {
59 |         nav: [
60 |           {text: 'Home', link: '/en/'},
61 |           {text: 'Install', link: '/en/install'},
62 |           {text: 'Quickstart', link: '/en/quickstart'}
63 |         ],
64 |         sidebar: [
65 |           {text: 'Install', link: '/en/install'},
66 |           {text: 'Quickstart', link: '/en/quickstart'},
67 |           {text: 'Advance Guide', link: '/en/advance_guide'},
68 |           {
69 |             text: 'Python API',
70 |             items: [
71 |               {text: 'Async basic', link: '/en/async'},
72 |               {text: 'Download Examples', link: '/en/download_examples'},
73 |               {text: 'API Examples', link: '/en/api_examples'}
74 |             ]
75 |           },
76 |           {text: 'More', link: '/en/more'},
77 |         ],
78 |       },
79 |     }
80 |   },
81 | })
82 | 


--------------------------------------------------------------------------------
/docs/.vitepress/theme/index.ts:
--------------------------------------------------------------------------------
1 | import Theme from 'vitepress/theme'
2 | import './style/var.css'
3 | 
4 | export default {
5 |   extends: Theme,
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/.vitepress/theme/style/var.css:
--------------------------------------------------------------------------------
1 | :root {
2 |   --vp-home-hero-name-color: transparent;
3 |   --vp-home-hero-name-background: linear-gradient( 135deg, #79F1A4 10%, #0E5CAD 100%);;
4 | }
5 | 


--------------------------------------------------------------------------------
/docs/advance_guide.md:
--------------------------------------------------------------------------------
  1 | # 进阶使用
  2 | 请使用`bilix -h`查看更多参数提示，包括方法名简写，视频画面质量选择，并发量控制，下载速度限制，下载目录等。
  3 | 
  4 | ## 方法名简写
  5 | 
  6 | 觉得`get_series`，`get_video`这些方法名写起来太麻烦了？同感！你可以使用他们的简写，这样快多了：
  7 | 
  8 | ```shell
  9 | bilix s 'url'
 10 | bilix v 'url'
 11 | ...
 12 | ```
 13 | 更多简写请查看`bilix -h`
 14 | 
 15 | ## 登录
 16 | 
 17 | 你是大会员？🥸，两种方式登录
 18 | 
 19 | * 直接填写cookie
 20 | 
 21 |   在`--cookie`参数中填写浏览器缓存的`SESSDATA`cookie，填写后可以下载需要大会员的视频
 22 | 
 23 | * 从浏览器载入cookie
 24 | 
 25 |   在浏览器中登录之后，使用`-fb --from-browser`参数从浏览器中读取cookie，例如`-fb chrome`，使用这种方法可能需要授权，bilix读取浏览器cookie的
 26 |   方式为开源项目[browser_cookie3](https://github.com/borisbabic/browser_cookie3)。
 27 | :::tip
 28 | 如果你总是需要保持登录，在linux和mac系统中你可以使用`alias bilix=bilix --cookie xxxxxx`或`alias bilix=bilix -fb chrome`来为`bilix`命令创建别名
 29 | :::
 30 | 
 31 | ## 画质，音质和编码选择
 32 | 
 33 | 你可以使用`--quality`即`-q`参数选择画面分辨率，bilix支持两种不同的选择方式：
 34 | 
 35 | * 相对选择（默认）
 36 | 
 37 |   bilix在默认情况下会为你选择可选的最高画质进行下载（即`-q 0`），如果你想下载第二清晰的可使用`-q 1`进行指定，以此类推，指定序号越大画质越低，
 38 |   当超过可选择范围时，默认选择到最低画质，例如你总是可以通过`-q 999`来选择到最低画质。
 39 | * 绝对选择
 40 | 
 41 |   在某些时候，你只希望下载720P的视频，但是720P在相对选择中并不总是处于固定的位置，这在下载收藏夹，合集等等场景中经常出现。
 42 |   另外有可能你就是喜欢通过`-q 1080P`这样的方式来指定画质。
 43 |   没问题，bilix同时也支持通过`-q 4K` `-q '1080P 高码率'`等字符串的形式来直接指定画质，字符串为b站显示的画质名称的子串即可。
 44 | 
 45 | 在更加专业用户的需求中，可能需要指定特定的视频编码进行下载，而b站支持的编码在网页或app中是不可见的，bilix为此设计了方法`info`
 46 | ， 通过它你可以完全了解该视频的所有信息：
 47 | 
 48 | ```text
 49 | bilix info 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' 
 50 |                         
 51 |  【4K·HDR·Hi-Res】群青 - YOASOBI  33,899👀 1,098👍 201🪙
 52 | ┣━━ 画面 Video
 53 | ┃   ┣━━ HDR 真彩
 54 | ┃   ┃   ┗━━ codec: hev1.2.4.L153.90                 total: 149.86MB
 55 | ┃   ┣━━ 4K 超清
 56 | ┃   ┃   ┣━━ codec: avc1.640034                      total: 320.78MB
 57 | ┃   ┃   ┗━━ codec: hev1.1.6.L153.90                 total: 106.54MB
 58 | ┃   ┣━━ 1080P 60帧
 59 | ┃   ┃   ┣━━ codec: avc1.640032                      total: 171.91MB
 60 | ┃   ┃   ┗━━ codec: hev1.1.6.L150.90                 total: 24.66MB
 61 | ┃   ┣━━ 1080P 高清
 62 | ┃   ┃   ┣━━ codec: avc1.640032                      total: 86.01MB
 63 | ┃   ┃   ┗━━ codec: hev1.1.6.L150.90                 total: 24.18MB
 64 | ┃   ┣━━ 720P 高清
 65 | ┃   ┃   ┣━━ codec: avc1.640028                      total: 57.39MB
 66 | ┃   ┃   ┗━━ codec: hev1.1.6.L120.90                 total: 11.53MB
 67 | ┃   ┣━━ 480P 清晰
 68 | ┃   ┃   ┣━━ codec: avc1.64001F                      total: 25.87MB
 69 | ┃   ┃   ┗━━ codec: hev1.1.6.L120.90                 total: 7.61MB
 70 | ┃   ┗━━ 360P 流畅
 71 | ┃       ┣━━ codec: hev1.1.6.L120.90                 total: 5.24MB
 72 | ┃       ┗━━ codec: avc1.64001E                      total: 11.59MB
 73 | ┗━━ 声音 Audio
 74 |     ┣━━ 默认音质
 75 |     ┃   ┗━━ codec: mp4a.40.2                        total: 10.78MB
 76 |     ┗━━ Hi-Res无损
 77 |         ┗━━ codec: fLaC                             total: 94.55MB
 78 | ```
 79 | 
 80 | 看上去不错😇，那么我要怎么才能下到指定编码的视频呢？
 81 | 
 82 | bilix提供了另一个参数`--codec`来指定编码格式，例如你可以通过组合`-q 480P --codec hev1.1.6.L120.90`来指定下载7.61MB的那个。
 83 | `--codec`参数与`-q`参数类似，也支持子串指定，例如你可以通过`--codec hev`来使得所有视频都选择`hev`开头的编码。
 84 | 
 85 | 对于音质，部分视频会含有大会员专享的杜比全景声和Hi-Res无损音质，利用`--codec`参数可以指定这些音频，例如
 86 | 
 87 | ```shell
 88 | bilix v 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' --codec hev:fLaC 
 89 | ```
 90 | 
 91 | `--codec hev:fLaC`中使用`:`将画质编码和音频编码隔开，如只指定音频编码，可使用`--codec :fLaC`
 92 | 
 93 | ## 关于断点重连
 94 | 
 95 | 用户可以通过Ctrl+C中断任务，对于未完成的文件，重新执行命令会在之前的进度基础上下载，已完成的文件会进行跳过。
 96 | 但是对于未完成的文件，以下情况建议清除未完成任务的临时文件再执行命令，否则可能残留部分临时文件。
 97 | 
 98 | - 中断后改变画面质量`-q`或编码`--codec`
 99 | - 中断后改变分段并发数`--part-con`
100 | - 中断后改变时间范围`--time-range`
101 | 
102 | ## 一次提供多个url
103 | bilix的所有方法都支持提供多个`url`
104 | ```shell
105 | bilix v 'url1' 'url2' 'url3'
106 | bilix up 'up_url1' 'up_url2'
107 | ```
108 | 当你提供多个`url`时，并发控制当然也正常工作
109 | 
110 | 
111 | ## 更多站点支持
112 | bilix除了b站以外也支持了一些别的站点，但作者精力有限，所以失效也不奇怪。具体可见[discussion](https://github.com/HFrost0/bilix/discussions/39)
113 | 
114 | ## 基本下载方法
115 | 对于一些基本的下载场景
116 | * 你可以直接通过文件链接下载
117 |   ```shell
118 |   bilix f 'https://xxxx.com/xxxx.mp4'
119 |   ```
120 | * 你可以通过m3u8 url直接下载m3u8视频
121 |   ```shell
122 |   bilix m3u8 'https:/xxxx.com/xxxx.m3u8'
123 |   ```
124 | 
125 | ## 代理
126 | bilix默认使用系统代理
127 | 


--------------------------------------------------------------------------------
/docs/api_examples.md:
--------------------------------------------------------------------------------
 1 | # API案例
 2 | bilix 提供了各个网站的api，如果你有需要当然可以使用，并且它们都是异步的
 3 | ```python
 4 | import asyncio
 5 | from bilix.sites.bilibili import api
 6 | from httpx import AsyncClient
 7 | 
 8 | 
 9 | async def main():
10 |     # 需要先实例化一个用来进行http请求的client
11 |     client = AsyncClient(**api.dft_client_settings)
12 |     data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849')
13 |     print(data)
14 | 
15 | 
16 | asyncio.run(main())
17 | 
18 | ```
19 | 


--------------------------------------------------------------------------------
/docs/async.md:
--------------------------------------------------------------------------------
 1 | # 异步基础
 2 | 异步无疑是python中处理网络请求的最佳技术，因为它可以承载极高的并发量。
 3 | 在python中使用bilix之前，你需要先对python中的异步编程有一些了解。python官方使用[asyncio](https://docs.python.org/3/library/asyncio.html)
 4 | 提供异步I/O的支持。
 5 | 
 6 | ```python
 7 | async def hello():
 8 |     print("hello world")
 9 | ```
10 | 
11 | 对于一个async函数（`def`变为`async def`）来说调用不会直接执行函数，而是返回一个协程（coroutine）对象
12 | 
13 | ```python
14 | c = hello()
15 | >>> c
16 | <coroutine object hello at 0x100a92540>
17 | 
18 | ```
19 | 
20 | 我们可以将这个coroutine提交到asyncio的事件循环中执行它
21 | 
22 | ```python
23 | import asyncio
24 | 
25 | >>> asyncio.run(c)
26 | "hello world"
27 | ```
28 | 
29 | bilix的所有下载方法都是异步的，所以你也可以这样执行他们
30 | ```python
31 | import asyncio
32 | from bilix.sites.bilibili import DownloaderBilibili
33 | 
34 | d = DownloaderBilibili()
35 | asyncio.run(d.get_video('url'))
36 | ```
37 | 


--------------------------------------------------------------------------------
/docs/download_examples.md:
--------------------------------------------------------------------------------
  1 | # 下载案例
  2 | 
  3 | 觉得命令行太麻烦，不够强大？bilix可做为python的库调用，并且接口设计易用，功能更强大，这给了你很大的扩展空间
  4 | 
  5 | ## 从最简单的开始
  6 | 
  7 | ```python
  8 | import asyncio
  9 | # 导入下载器，里面有很多方法，例如get_series, get_video, get_favour，get_dm等等
 10 | from bilix.sites.bilibili import DownloaderBilibili
 11 | 
 12 | 
 13 | async def main():
 14 |     # 你可以使用async with上下文管理器来开启和关闭一个下载器
 15 |     async with DownloaderBilibili() as d:
 16 |         # 然后用await异步等待下载完成
 17 |         await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
 18 | 
 19 | 
 20 | if __name__ == '__main__':
 21 |     asyncio.run(main())
 22 | 
 23 | ```
 24 | 
 25 | ## 组合多种任务 / 控制并发量
 26 | 
 27 | 你可以组合下载器返回的协程对象，利用gather并发执行他们，他们执行的并发度收到下载器对象的严格约束，因此不会对服务器造成意想不到的负担。
 28 | 
 29 | ```python
 30 | import asyncio
 31 | from bilix.sites.bilibili import DownloaderBilibili
 32 | 
 33 | 
 34 | async def main():
 35 |     d = DownloaderBilibili(video_concurrency=5, part_concurrency=10)
 36 |     cor1 = d.get_series(
 37 |         'https://www.bilibili.com/bangumi/play/ss28277'
 38 |         , quality=999)
 39 |     cor2 = d.get_up(url_or_mid='436482484', quality=999)
 40 |     cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122', quality=999)
 41 |     await asyncio.gather(cor1, cor2, cor3)
 42 |     await d.aclose()
 43 | 
 44 | 
 45 | if __name__ == '__main__':
 46 |     asyncio.run(main())
 47 | 
 48 | 
 49 | ```
 50 | 
 51 | ## 下载切片
 52 | 
 53 | 你可以只下视频的一小段
 54 | 
 55 | ```python
 56 | import asyncio
 57 | from bilix.sites.bilibili import DownloaderBilibili
 58 | 
 59 | 
 60 | async def main():
 61 |     """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣"""
 62 |     async with DownloaderBilibili() as d:
 63 |         # time_range (start_time, end_time)
 64 |         await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7))
 65 | 
 66 | 
 67 | if __name__ == '__main__':
 68 |     asyncio.run(main())
 69 | 
 70 | ```
 71 | 
 72 | ## 同时下载多个站点
 73 | 
 74 | 你可以同时初始化不同网站的下载器，并且利用他们方法返回的协程对象进行并发下载。各个下载器之间的并发控制是独立的，因此可以最大化利用自己的网络资源。
 75 | 
 76 | ```python
 77 | import asyncio
 78 | from bilix.sites.bilibili import DownloaderBilibili
 79 | from bilix.sites.cctv import DownloaderCctv
 80 | 
 81 | 
 82 | async def main():
 83 |     async with DownloaderBilibili() as d_bl, DownloaderCctv() as d_tv:
 84 |         await asyncio.gather(
 85 |             d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999),
 86 |             d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999)
 87 |         )
 88 | 
 89 | 
 90 | if __name__ == '__main__':
 91 |     asyncio.run(main())
 92 | 
 93 | ```
 94 | 
 95 | ## 限制下载速度
 96 | 
 97 | 限制下载速度很简单，下面的例子限制了b站点总下载速度在1MB/s以下
 98 | 
 99 | ```python
100 | import asyncio
101 | from bilix.sites.bilibili import DownloaderBilibili
102 | from bilix.sites.cctv import DownloaderCctv
103 | 
104 | 
105 | async def main():
106 |     async with DownloaderBilibili(speed_limit=1e6) as d:  # limit to 1MB/s
107 |         await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     asyncio.run(main())
112 | 
113 | ```
114 | 
115 | 另外，多个下载器之间的速度设置也是独立的
116 | 
117 | ```python
118 | async def main():
119 |     # 就像并发控制一样，每个downloader的速度设置也是独立的
120 |     async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d:
121 |         await asyncio.gather(
122 |             bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'),
123 |             cctv_d.get_series('https://www.douyin.com/video/7132430286415252773')
124 |         )
125 | ```
126 | 
127 | ## 显示进度条
128 | 
129 | 使用python模块时，进度条默认不显示，如需显示，可以
130 | 
131 | ```python
132 | from bilix.progress.cli_progress import CLIProgress
133 | 
134 | CLIProgress.start()
135 | ```
136 | 
137 | 或者通过任意下载器内部的`progress`对象打开
138 | 
139 | ```python
140 | d.progress.start()
141 | ```
142 | 


--------------------------------------------------------------------------------
/docs/en/advance_guide.md:
--------------------------------------------------------------------------------
  1 | # Advance Guide
  2 | Please use `bilix -h` for more help，including method short alias，video quality selection，concurrency control，
  3 | download speed control，download directory...
  4 | 
  5 | ## Method short alias
  6 | 
  7 | Method names like `get_series` and `get_video` are too cumbersome to write? Agreed! You can use their
  8 | short alias for faster access:
  9 | 
 10 | ```shell
 11 | bilix s 'url'
 12 | bilix v 'url'
 13 | ...
 14 | ```
 15 | please check `bilix -h` for all short alias
 16 | 
 17 | ## Login
 18 | 
 19 | there are two ways to login
 20 | 
 21 | * cookie option
 22 | 
 23 |   By adding the `SESSDATA` cookie from your browser's cache in the `--cookie` option, you can download videos that require a premium membership.
 24 | 
 25 | * load cookies from browser
 26 | 
 27 |   After logging in through the browser, use the `-fb --from-browser` option to read cookies from the browser,
 28 |   such as `-fb chrome`. Using this method may require authorization. The method that `bilix` uses to read browser
 29 |   cookies is the open-source project [browser_cookie3](https://github.com/borisbabic/browser_cookie3).  
 30 | 
 31 | :::tip
 32 | If you want to keep logged in, you can use `alias bilix=bilix --cookie xxxxxx` or `alias bilix=bilix -fb chrome`
 33 | to create an alias for the `bilix` command
 34 | :::
 35 | 
 36 | ## Video and audio quality, codec selection
 37 | 
 38 | You can use `--quality -q`option to choose video resolution，bilix supports two different selection ways：
 39 | 
 40 | * relatively choose (default)
 41 | 
 42 |   By default, bilix will select the accessible highest quality for you (that is, `-q 0`), for the second, use `-q 1` to specify, the larger number the lower resolution.
 43 |   When the number out of index, the lowest quality will be is selected. For example, you can always select the lowest quality by `-q 999`.
 44 | * absolute choose
 45 | 
 46 |   You can use`-q 1080P` to specific a resolution, the string is a substring of the resolution name on bilibili.
 47 | 
 48 | For more advanced users who may need to specify a particular video codec for download, the encodings supported by Bilibili are not visible on the website or in the app. For this purpose, bilix has designed the `info` method. By using it, you can fully understand all the information about the video:
 49 | 
 50 | ```text
 51 | bilix info 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' 
 52 |                         
 53 |  【4K·HDR·Hi-Res】群青 - YOASOBI  33,899👀 1,098👍 201🪙
 54 | ┣━━ 画面 Video
 55 | ┃   ┣━━ HDR 真彩
 56 | ┃   ┃   ┗━━ codec: hev1.2.4.L153.90                 total: 149.86MB
 57 | ┃   ┣━━ 4K 超清
 58 | ┃   ┃   ┣━━ codec: avc1.640034                      total: 320.78MB
 59 | ┃   ┃   ┗━━ codec: hev1.1.6.L153.90                 total: 106.54MB
 60 | ┃   ┣━━ 1080P 60帧
 61 | ┃   ┃   ┣━━ codec: avc1.640032                      total: 171.91MB
 62 | ┃   ┃   ┗━━ codec: hev1.1.6.L150.90                 total: 24.66MB
 63 | ┃   ┣━━ 1080P 高清
 64 | ┃   ┃   ┣━━ codec: avc1.640032                      total: 86.01MB
 65 | ┃   ┃   ┗━━ codec: hev1.1.6.L150.90                 total: 24.18MB
 66 | ┃   ┣━━ 720P 高清
 67 | ┃   ┃   ┣━━ codec: avc1.640028                      total: 57.39MB
 68 | ┃   ┃   ┗━━ codec: hev1.1.6.L120.90                 total: 11.53MB
 69 | ┃   ┣━━ 480P 清晰
 70 | ┃   ┃   ┣━━ codec: avc1.64001F                      total: 25.87MB
 71 | ┃   ┃   ┗━━ codec: hev1.1.6.L120.90                 total: 7.61MB
 72 | ┃   ┗━━ 360P 流畅
 73 | ┃       ┣━━ codec: hev1.1.6.L120.90                 total: 5.24MB
 74 | ┃       ┗━━ codec: avc1.64001E                      total: 11.59MB
 75 | ┗━━ 声音 Audio
 76 |     ┣━━ 默认音质
 77 |     ┃   ┗━━ codec: mp4a.40.2                        total: 10.78MB
 78 |     ┗━━ Hi-Res无损
 79 |         ┗━━ codec: fLaC                             total: 94.55MB
 80 | ```
 81 | 
 82 | looks good😇，so how can I download the video with the specified codec?
 83 | 
 84 | bilix provides another option `--codec`. For example, you can use a combination like `-q 480P --codec hev1.1.6.L120.90`
 85 | to specify downloading the 7.61MB one. The `--codec` option is similar to the `-q` option which supports substring specification,
 86 | for example using `--codec hev` to make all videos choose codec that start with hev.
 87 | 
 88 | For audio quality, some videos may contain Dolby and Hi-Res audio. You can use the `--codec` option to specify these
 89 | audio formats, for example:
 90 | 
 91 | ```shell
 92 | bilix v 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' --codec hev:fLaC 
 93 | ```
 94 | 
 95 | in `--codec hev:fLaC`, use`:` to split video and audio codec, if you just want to specify audio codec，you can use`--codec :fLaC`
 96 | 
 97 | ## Resuming Interrupted Downloads
 98 | 
 99 | Users can interrupt tasks by pressing `Ctrl+C`. For unfinished files, re-executing the command will resume the download
100 | based on the previous progress, and completed files will be skipped. However, for unfinished files, it is recommended
101 | to clear the temporary files of the unfinished tasks before executing the command again in the following situations,
102 | otherwise some temporary files may remain:
103 | 
104 | * Changing the video quality `-q` or `--codec` after interruption
105 | * Changing the `--part-con` after interruption
106 | * Changing the `--time-range` after interruption
107 | 
108 | ## Provide multiple urls at once
109 | All methods of bilix support providing multiple `url`
110 | ```shell
111 | bilix v 'url1' 'url2' 'url3'
112 | bilix up 'up_url1' 'up_url2'
113 | ```
114 | Concurrency, speed control also works fine when you provide multiple `url` of course
115 | 
116 | 
117 | ## Support for More Sites
118 | 
119 | bilix also supports some other websites, but their availability may vary as the author is currently busy. 
120 | For further information, please refer to the following [discussion](https://github.com/HFrost0/bilix/discussions/39).
121 | 
122 | ## Basic Download method
123 | For some basic download scenarios
124 | * You can directly download a file through the file url
125 |   ```shell
126 |   bilix f 'https://xxxx.com/xxxx.mp4'
127 |   ```
128 | * you can directly download m3u8 video by url
129 |   ```shell
130 |   bilix m3u8 'https:/xxxx.com/xxxx.m3u8'
131 |   ```
132 |   
133 | ## Proxy
134 | bilix will use system proxy by default
135 | 


--------------------------------------------------------------------------------
/docs/en/api_examples.md:
--------------------------------------------------------------------------------
 1 | # API Examples
 2 | bilix provides the APIs of various websites, and they are all asynchronous
 3 | ```python
 4 | import asyncio
 5 | from bilix.sites.bilibili import api
 6 | from httpx import AsyncClient
 7 | 
 8 | 
 9 | async def main():
10 |     # instantiate a httpx client for making http requests
11 |     client = AsyncClient(**api.dft_client_settings)
12 |     data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849')
13 |     print(data)
14 | 
15 | 
16 | asyncio.run(main())
17 | 
18 | ```
19 | 


--------------------------------------------------------------------------------
/docs/en/async.md:
--------------------------------------------------------------------------------
 1 | # Async basic
 2 | Asynchronous programming in Python excels at handling network requests with high concurrency.
 3 | Before using bilix in Python, you need to have some understanding of asynchronous programming in Python.
 4 | The official Python [asyncio](https://docs.python.org/3/library/asyncio.html) library provides support for asynchronous I/O.
 5 | 
 6 | ```python
 7 | async def hello():
 8 |     print("hello world")
 9 | ```
10 | 
11 | For an async function (async def), calling it will not directly execute the function but instead return a coroutine object.
12 | ```python
13 | c = hello()
14 | >>> c
15 | <coroutine object hello at 0x100a92540>
16 | 
17 | ```
18 | 
19 | We can submit the coroutine obj to asyncio's event loop to execute it
20 | 
21 | ```python
22 | import asyncio
23 | 
24 | >>> asyncio.run(c)
25 | "hello world"
26 | ```
27 | 
28 | All download methods of bilix are asynchronous, so you can execute them like this
29 | ```python
30 | import asyncio
31 | from bilix.sites.bilibili import DownloaderBilibili
32 | 
33 | d = DownloaderBilibili()
34 | asyncio.run(d.get_video('url'))
35 | ```
36 | 


--------------------------------------------------------------------------------
/docs/en/download_examples.md:
--------------------------------------------------------------------------------
  1 | # Download Examples
  2 | 
  3 | Command line is too cumbersome and not powerful enough for you? bilix can be used as a Python library
  4 | with user-friendly interfaces and enhanced functionality for greater flexibility.
  5 | 
  6 | ## Start with the simplest
  7 | 
  8 | ```python
  9 | import asyncio
 10 | from bilix.sites.bilibili import DownloaderBilibili
 11 | 
 12 | 
 13 | async def main():
 14 |     # you can use async with context manager to open and close a downloader
 15 |     async with DownloaderBilibili() as d:
 16 |         await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
 17 | 
 18 | 
 19 | if __name__ == '__main__':
 20 |     asyncio.run(main())
 21 | 
 22 | ```
 23 | 
 24 | ## Combine multiple tasks and control concurrency
 25 | 
 26 | You can combine the coroutine objects returned by the downloader and use gather to execute them concurrently.
 27 | The concurrency is strictly restricted by the downloader object, ensuring no unexpected burden on the server.
 28 | 
 29 | ```python
 30 | import asyncio
 31 | from bilix.sites.bilibili import DownloaderBilibili
 32 | 
 33 | 
 34 | async def main():
 35 |     d = DownloaderBilibili(video_concurrency=5, part_concurrency=10)
 36 |     cor1 = d.get_series(
 37 |         'https://www.bilibili.com/bangumi/play/ss28277'
 38 |         , quality=999)
 39 |     cor2 = d.get_up(url_or_mid='436482484', quality=999)
 40 |     cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122', quality=999)
 41 |     await asyncio.gather(cor1, cor2, cor3)
 42 |     await d.aclose()
 43 | 
 44 | 
 45 | if __name__ == '__main__':
 46 |     asyncio.run(main())
 47 | 
 48 | 
 49 | ```
 50 | 
 51 | ## Download a clip
 52 | 
 53 | You can download just a clip of the video
 54 | 
 55 | ```python
 56 | import asyncio
 57 | from bilix.sites.bilibili import DownloaderBilibili
 58 | 
 59 | 
 60 | async def main():
 61 |     """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣"""
 62 |     async with DownloaderBilibili() as d:
 63 |         # time_range (start_time, end_time)
 64 |         await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7))
 65 | 
 66 | 
 67 | if __name__ == '__main__':
 68 |     asyncio.run(main())
 69 | 
 70 | ```
 71 | 
 72 | ## Download from multiple sites simultaneously
 73 | 
 74 | You can initialize downloaders for different websites, and use the coroutine objects returned by their
 75 | methods for concurrent downloads. The concurrency control between different downloaders is independent, allowing you to
 76 | maximize the use of your network resources.
 77 | 
 78 | ```python
 79 | import asyncio
 80 | from bilix.sites.bilibili import DownloaderBilibili
 81 | from bilix.sites.cctv import DownloaderCctv
 82 | 
 83 | 
 84 | async def main():
 85 |     async with DownloaderBilibili() as d_bl, DownloaderCctv() as d_tv:
 86 |         await asyncio.gather(
 87 |             d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999),
 88 |             d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999)
 89 |         )
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |     asyncio.run(main())
 94 | 
 95 | ```
 96 | 
 97 | ## Limit download speed
 98 | 
 99 | Limiting the download speed is very simple.
100 | The following example limits the total download speed below 1MB/s
101 | 
102 | ```python
103 | import asyncio
104 | from bilix.sites.bilibili import DownloaderBilibili
105 | from bilix.sites.cctv import DownloaderCctv
106 | 
107 | 
108 | async def main():
109 |     async with DownloaderBilibili(speed_limit=1e6) as d:  # limit to 1MB/s
110 |         await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     asyncio.run(main())
115 | 
116 | ```
117 | 
118 | In addition, the speed settings between downloaders are also independent
119 | 
120 | ```python
121 | async def main():
122 |     # 就像并发控制一样，每个downloader的速度设置也是独立的
123 |     async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d:
124 |         await asyncio.gather(
125 |             bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'),
126 |             cctv_d.get_series('https://www.douyin.com/video/7132430286415252773')
127 |         )
128 | ```
129 | 
130 | ## Show progress bar
131 | 
132 | When using the python module, the progress bar is not displayed by default. If you want to display it, you can
133 | 
134 | ```python
135 | from bilix.progress.cli_progress import CLIProgress
136 | 
137 | CLIProgress.start()
138 | ```
139 | 
140 | or open via the `progress` object inside any downloader
141 | 
142 | ```python
143 | d.progress.start()
144 | ```
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/docs/en/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | # https://vitepress.dev/reference/default-theme-home-page
 3 | layout: home
 4 | 
 5 | hero:
 6 |   name: "bilix"
 7 |   tagline: Lightning-fast asynchronous download tool for bilibili and more
 8 |   actions:
 9 |     - theme: brand
10 |       text: Quickstart
11 |       link: /en/quickstart
12 |     - theme: alt
13 |       text: Python API
14 |       link: /en/async
15 | 
16 | features:
17 |   - icon: ⚡️
18 |     title: Fast & Async
19 |     details: Asynchronous high concurrency support, controllable concurrency and speed settings
20 |   - icon: 😉
21 |     title: Lightweight & User-friendly
22 |     details: Lightweight user-friendly CLI with progress notification, focusing on core functionality
23 |   - icon: 📝
24 |     title: Fully-featured
25 |     details: Submissions, anime, TV Series, video clip, audio, favourite, danmaku ,cover...
26 |   - icon: 🔨
27 |     title: Extensible
28 |     details: Extensible Python module suitable for more download scenarios
29 | ---
30 | 


--------------------------------------------------------------------------------
/docs/en/install.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | bilix is a powerful Python asynchronous video download tool that requires two steps to install:
 3 | 
 4 | 1. pip install（require python >= 3.8）
 5 |    ```shell
 6 |    pip install bilix
 7 |    ```
 8 |    If you are a macOS user, you can also use `brew` to install:
 9 |    ```shell
10 |     brew install bilix
11 |     ```
12 | 
13 | 2. [FFmpeg](https://ffmpeg.org) ：A command-line video tool for compositing downloaded audio and video
14 | 
15 |     * For macOS, it can be installed via `brew install ffmpeg`
16 |     * For Windows, please go to the official website https://ffmpeg.org/download.html#build-windows , you need to configure environment variables after installation
17 | 
18 |    ::: info
19 |    Just make sure that you can call the `ffmpeg` command from the command line in the end.
20 |    :::
21 | 


--------------------------------------------------------------------------------
/docs/en/more.md:
--------------------------------------------------------------------------------
 1 | # More
 2 | 
 3 | ## Community
 4 | 
 5 | If you find any bugs or other issues, feel free to raise an [Issue](https://github.com/HFrost0/bilix/issues).
 6 | 
 7 | If you have new ideas or new feature requests，welcome to participate in
 8 | the [Discussion](https://github.com/HFrost0/bilix/discussions)
 9 | 
10 | If you find this project helpful, you can support the author by [Star](https://github.com/HFrost0/bilix/stargazers)🌟
11 | 
12 | ## Contribute
13 | 
14 | ❤️ Welcome~ Details can be found in [Contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING_EN.md)
15 | 
16 | ## Known Bugs 🤡
17 | 
18 | When two video names are exactly the same, task conflicts occur but no error is reported.
19 | 


--------------------------------------------------------------------------------
/docs/en/quickstart.md:
--------------------------------------------------------------------------------
  1 | # Quickstart
  2 | 
  3 | bilix offers a simple command line interface, so open the terminal and start downloading now!
  4 | 
  5 | ## Batch download
  6 | 
  7 | Batch download entire anime series, TV shows, movies, and UP submissions... just replace the `url` in the
  8 | command with the web link of any video in the series you want to download.
  9 | 
 10 | Head over to bilibili and find one to try (like [this](https://www.bilibili.com/video/BV1JE411g7XF)),
 11 | `bilix` will download the files to the `videos` folder in the current directory of the command line, which is automatically created by default.
 12 | 
 13 | ```shell
 14 | bilix get_series 'url'
 15 | ```
 16 | 
 17 | `get_series` is powerful, as it automatically recognizes and downloads all videos in a series.
 18 | 
 19 | ::: info
 20 | * What is a series: For example, all parts of a multi-part submission, all episodes of an anime or TV show.
 21 | * Some URLs containing parameters need to be wrapped in `''` when used in the terminal.
 22 | The Windows cmd does not support `''`, but you can use PowerShell or Windows Terminal as an alternative.
 23 | :::
 24 | 
 25 | ## Single download
 26 | 
 27 | User😨：I don't want to download that many, just a single video. No problem, try this, just provide the web link of that video:
 28 | 
 29 | ```shell
 30 | bilix get_video 'url'
 31 | ```
 32 | :::info
 33 | Do you know that? methods like `get_series` `get_video` all has a [short alias](/en/advance_guide)
 34 | :::
 35 | 
 36 | 
 37 | ## Audio download
 38 | 
 39 | Assuming you like the music and only want to download audio, then you can use the optional parameter `--only-audio`
 40 | 
 41 | ```shell
 42 | bilix get_series 'url' --only-audio
 43 | ```
 44 | 
 45 | ## Clip download
 46 | 
 47 | The video, live record is too long, I need to download the clip I am interested in✂️, then you can use the
 48 | `--time-range -tr` parameter to specify the time range
 49 | 
 50 | ```shell
 51 | bilix get_vedio 'url' -tr 0:16:53-0:17:49
 52 | ```
 53 | 
 54 | In this example, a time range from 16 minutes 53 seconds to 17 minutes 49 seconds is specified.
 55 | The format can be `h:m:s-h:m:s`, or `s-s`
 56 | 
 57 | this option is only available in `get_video`, you can combine `-tr` with `--only-audio` to download audio clip
 58 | 
 59 | ## Uploader download
 60 | 
 61 | If you want to download the latest 100 submissions from an uploader
 62 | 
 63 | ```shell
 64 | bilix get_up 'https://space.bilibili.com/672328094' --num 100
 65 | ```
 66 | 
 67 | `https://space.bilibili.com/672328094` is the uploader space url，you can also use uploader id `672328094` to replace `url`
 68 | 
 69 | 
 70 | ## Download Videos by Category
 71 | 
 72 | Suppose you enjoy watching the dance category👍 and want to download the top 20 超级敏感 宅舞 videos with
 73 | the highest play count in the last 30 days, you can use:
 74 | 
 75 | ```shell
 76 | bilix get_cate 宅舞 --keyword 超级敏感 --order click --num 20 --days 30
 77 | ```
 78 | 
 79 | `get_cate` supports every sub-category on bilibili and offers options for sorting and keyword searching.
 80 | For more details, please refer to `bilix -h` or the code comments.
 81 | 
 82 | ## Download Videos from Favorites
 83 | 
 84 | If you need to download videos from your own or someone else's favorites, you can use the `get_favour` method
 85 | 
 86 | ```shell
 87 | bilix get_favour 'https://space.bilibili.com/11499954/favlist?fid=1445680654' --num 20
 88 | ```
 89 | 
 90 | `https://space.bilibili.com/11499954/favlist?fid=1445680654` is the URL for the favorites. If you want to know
 91 | the URL of a favorites, the easiest way is to click on it in the Bilibili webpage's left-side menu, and the URL
 92 | will appear in the browser's address bar. Alternatively, you can directly replace the URL with the fid `1445680654`
 93 | 
 94 | ## Download collection or video list
 95 | 
 96 | If you want to download the collection or video list released by a uploader, you can use the `get_collect` method
 97 | 
 98 | ```shell
 99 | bilix get_collect 'url'
100 | ```
101 | 
102 | Replace `url` with the url of a collection or video list details page（[for example](https://space.bilibili.com/369750017/channel/collectiondetail?sid=630)）
103 | 
104 | 
105 | ## Download subtitle, danmaku, cover...
106 | 
107 | Add options `--subtitle` `--dm` `--image` according to your need to download these additional files
108 | 
109 | ```shell
110 | bilix get_series 'url' --subtitle --dm --image
111 | ```
112 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | # https://vitepress.dev/reference/default-theme-home-page
 3 | layout: home
 4 | 
 5 | hero:
 6 |   name: "bilix"
 7 |   tagline: 快如闪电的异步下载工具，支持bilibili及更多
 8 |   actions:
 9 |     - theme: brand
10 |       text: 快速上手
11 |       link: /quickstart
12 |     - theme: alt
13 |       text: Python调用
14 |       link: /async
15 | 
16 | features:
17 |   - icon: ⚡️
18 |     title: 高速异步
19 |     details: 异步高并发支持，可控的并发量和速度设置
20 |   - icon: 😉
21 |     title: 轻量易用
22 |     details: 友好的CLI及进度提示，专注核心功能
23 |   - icon: 📝
24 |     title: 功能齐全
25 |     details: 投稿，弹幕，收藏夹，分区，动漫，电视剧，切片，封面，音频...
26 |   - icon: 🔨
27 |     title: 可拓展
28 |     details: 可扩展的Python模块适应更多下载场景
29 | ---
30 | 


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # 安装
 2 | bilix是一个强大的Python异步视频下载工具，安装它需要完成两个步骤：
 3 | 
 4 | 1. pip安装（需要python3.8及以上）
 5 |    ```shell
 6 |    pip install bilix
 7 |    ```
 8 |    
 9 |    如果你是macOS用户，也可以使用`brew`安装：
10 |    ```shell
11 |     brew install bilix
12 |     ```
13 | 
14 | 2. [FFmpeg](https://ffmpeg.org) ：一个命令行视频工具，用于合成下载的音频和视频
15 | 
16 |     * macOS 下可以通过`brew install ffmpeg`进行安装。
17 |     * Windows 下载请至官网 https://ffmpeg.org/download.html#build-windows ，安装好后需要配置环境变量。
18 | 
19 |    ::: info
20 |    最终确保在命令行中可以调用`ffmpeg`命令即可。
21 |    :::
22 | 


--------------------------------------------------------------------------------
/docs/more.md:
--------------------------------------------------------------------------------
 1 | # 更多
 2 | 
 3 | ## 欢迎提问
 4 | 
 5 | 如果你发现任何bug或者其他问题，欢迎提[Issue](https://github.com/HFrost0/bilix/issues)。
 6 | 
 7 | 如果你有新想法或新的功能请求，欢迎在[Discussion](https://github.com/HFrost0/bilix/discussions)中参与讨论
 8 | 
 9 | 如果觉得该项目对你有所帮助，可以给作者一个小小的[Star](https://github.com/HFrost0/bilix/stargazers)🌟
10 | 
11 | 
12 | ## 参与贡献
13 | 
14 | ❤️ 非常欢迎～详情可见[contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING.md)
15 | 
16 | ## 已知的bug 🤡
17 | 
18 | 当两个视频名字完全一样时，任务冲突但不会报错
19 | 


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scripts": {
 3 |     "docs:dev": "vitepress dev",
 4 |     "docs:build": "vitepress build",
 5 |     "docs:preview": "vitepress preview"
 6 |   },
 7 |   "devDependencies": {
 8 |     "vitepress": "^1.0.0-alpha.63"
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/docs/quickstart.md:
--------------------------------------------------------------------------------
  1 | # 快速上手
  2 | 
  3 | bilix提供了简单的命令行使用方式，打开终端开始下载吧～
  4 | 
  5 | ## 批量下载
  6 | 
  7 | 批量下载整部动漫，电视剧，纪录片，电影，up投稿.....只需要把命令中的`url`替换成你要下载的系列中任意一个视频的网页链接。\
  8 | 到 bilibili 上找一个来试试吧～，比如这个李宏毅老师的机器学习视频：[链接](https://www.bilibili.com/video/BV1JE411g7XF)，
  9 | `bilix`会下载文件至命令行当前目录的`videos`文件夹中，默认自动创建。
 10 | 
 11 | ```shell
 12 | bilix get_series 'url'
 13 | ```
 14 | 
 15 | `get_series`很强大，会自动识别系列所有视频并下载，当然，如果该系列只有一个视频（比如单p投稿）也是可以正常下载的。
 16 | 
 17 | ::: info
 18 | * 什么是一个系列(series)：比如一个多p投稿的所有p，一部动漫，电视剧的所有集。
 19 | 
 20 | * 某些含有参数的url在终端中要用`''`包住，而windows的命令提示符不支持`''`，可用powershell或windows terminal代替。
 21 | :::
 22 | 
 23 | ## 单个下载
 24 | 
 25 | 用户😨：我不想下载那么多，只想下载单个视频。没问题，试试这个，只需要提供那个视频的网页链接：
 26 | 
 27 | ```shell
 28 | bilix get_video 'url'
 29 | ```
 30 | :::info
 31 | 你知道吗？`get_series` `get_video`方法名都有[简写](/advance_guide)
 32 | :::
 33 | 
 34 | 
 35 | ## 下载音频
 36 | 
 37 | 假设你喜欢音乐区，只想下载音频，那么可以使用可选参数`--only-audio`，例如下面是下载[A叔](https://space.bilibili.com/6075139)
 38 | 一个钢琴曲合集音频的例子
 39 | 
 40 | ```shell
 41 | bilix get_series 'https://www.bilibili.com/video/BV1ts411D7mf' --only-audio
 42 | ```
 43 | 
 44 | ## 切片下载
 45 | 
 46 | 视频，直播录像太长，我需要下载我感兴趣的片段✂️，那么可以使用`--time-range -tr`参数指定时间段下载切片
 47 | 
 48 | ```shell
 49 | bilix get_vedio 'url' -tr 0:16:53-0:17:49
 50 | ```
 51 | 
 52 | 这个例子中指定了16分53秒至17分49秒的片段。 `-tr`参数的格式为`h:m:s-h:m:s`，起始时间和结束时间以`-`分割，时分秒以`:`
 53 | 分割。或者`s-s`格式，例如1013秒至1069秒`1013-1069`
 54 | 
 55 | 该参数仅在`get_video`中生效，仅下载音频也支持该参数
 56 | 
 57 | ## 下载特定up主的投稿
 58 | 
 59 | 假设你是一个嘉心糖，想要下载嘉然小姐最新投稿的100个视频，那么你可以使用命令：
 60 | 
 61 | ```shell
 62 | bilix get_up 'https://space.bilibili.com/672328094' --num 100
 63 | ```
 64 | 
 65 | `https://space.bilibili.com/672328094` 是up空间页url，另外用up主id`672328094`替换url同样也是可以的
 66 | 
 67 | ## 下载分区视频
 68 | 
 69 | 假设你喜欢看舞蹈区👍，想要下载最近30天播放量最高的20个超级敏感宅舞视频，那么你可以使用
 70 | 
 71 | ```shell
 72 | bilix get_cate 宅舞 --keyword 超级敏感 --order click --num 20 --days 30
 73 | ```
 74 | 
 75 | `get_cate`支持b站的每个子分区，可以使用排序，关键词搜索等，详细请参考`bilix -h`或代码注释
 76 | 
 77 | ## 下载收藏夹视频
 78 | 
 79 | 如果你需要下载自己或者其他人收藏夹中的视频，你可以使用`get_favour`方法
 80 | 
 81 | ```shell
 82 | bilix get_favour 'https://space.bilibili.com/11499954/favlist?fid=1445680654' --num 20
 83 | ```
 84 | 
 85 | `https://space.bilibili.com/11499954/favlist?fid=1445680654` 是收藏夹url，如果要知道一个收藏夹的url是什么，
 86 | 最简单的办法是在b站网页左侧列表中点击切换到该收藏夹，url就会出现在浏览器的地址栏中。另外直接使用url中的fid`1445680654`
 87 | 替换url也是可以的。
 88 | 
 89 | ## 下载合集或视频列表
 90 | 
 91 | 如果你需要下载up主发布的合集或视频列表，你可以使用`get_collect`方法
 92 | 
 93 | ```shell
 94 | bilix get_collect 'url'
 95 | ```
 96 | 
 97 | 将`url`替换为某个合集或视频列表详情页的url（例如[这个](https://space.bilibili.com/369750017/channel/collectiondetail?sid=630)）即可下载合集或列表内所有视频
 98 | 
 99 | :::info
100 | 合集和视频列表有什么区别？b站的合集可以订阅，列表则没有这个功能，但是他们都在up主空间页面的合集和列表菜单中，例如[这个](https://space.bilibili.com/369750017/channel/series)
101 | ，`get_collect`会根据详情页url中的信息判断这个链接是合集还是列表
102 | :::
103 | 
104 | ## 下载字幕，弹幕，封面...
105 | 
106 | 在命令中加入可选参数`--subtitle`（字幕） `--dm`（弹幕） `--image`（封面），即可下载这些附属文件
107 | 
108 | ```shell
109 | bilix get_series 'url' --subtitle --dm --image
110 | ```
111 | 


--------------------------------------------------------------------------------
/examples/a_very_simple_example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 使用bilix在python中最简单的实践🤖
 3 | The simplest practice of using bilix in python
 4 | """
 5 | import asyncio
 6 | # 导入下载器，里面有很多方法，例如get_series, get_video, get_favour，get_dm等等，总能找到符合你需求的
 7 | # downloader with many method like get_series, get_video...
 8 | from bilix.sites.bilibili import DownloaderBilibili
 9 | 
10 | 
11 | async def main():
12 |     # 你可以使用with上下文管理器来开启和关闭一个下载器
13 |     # you can use with to open and close a downloader
14 |     async with DownloaderBilibili() as d:
15 |         # 然后用await等待下载完成
16 |         # and use await to download
17 |         await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
18 | 
19 | 
20 | async def main2():
21 |     d = DownloaderBilibili()
22 |     await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
23 |     # 或者，手动关闭，一样很简单
24 |     # or you can call aclose() manually
25 |     await d.aclose()
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     asyncio.run(main())
30 | 


--------------------------------------------------------------------------------
/examples/download_by_timerange.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 你可以只下视频的一小段
 3 | You can download just a small clip of the video
 4 | """
 5 | import asyncio
 6 | 
 7 | from bilix.sites.bilibili import DownloaderBilibili
 8 | 
 9 | 
10 | async def main():
11 |     """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣"""
12 |     async with DownloaderBilibili() as d:
13 |         # time_range (start_time, end_time)
14 |         await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7))
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     asyncio.run(main())
19 | 


--------------------------------------------------------------------------------
/examples/limit_download_rate.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 限制下载速度很简单
 3 | limit download rate is simple
 4 | """
 5 | import asyncio
 6 | from bilix.sites.bilibili import DownloaderBilibili
 7 | from bilix.sites.cctv import DownloaderCctv
 8 | 
 9 | 
10 | async def main():
11 |     async with DownloaderBilibili(speed_limit=1e6) as d:  # limit to 1MB/s
12 |         await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5")
13 | 
14 | 
15 | async def main2():
16 |     # 就像并发控制一样，每个downloader的速度设置也是独立的
17 |     # Like concurrency control, the speed settings of each downloader are independent
18 |     async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d:
19 |         await asyncio.gather(
20 |             bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'),
21 |             cctv_d.get_series('https://www.douyin.com/video/7132430286415252773')
22 |         )
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     asyncio.run(main())
27 | 


--------------------------------------------------------------------------------
/examples/multi_site_download_same_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 你可以同时初始化不同网站的下载器，并且利用他们方法返回的协程对象进行并发下载。
 3 | 各个下载器之间的并发控制是独立的，因此可以最大化利用自己的网络资源。
 4 | 
 5 | You can initialize the downloaders of different websites at the same time, and use the coroutine objects returned by
 6 | their methods to download concurrently. The concurrency control between each downloader is independent, so you can
 7 | maximize the use of your network resources.
 8 | """
 9 | import asyncio
10 | from bilix.sites.bilibili import DownloaderBilibili
11 | from bilix.sites.douyin import DownloaderDouyin
12 | from bilix.sites.cctv import DownloaderCctv
13 | 
14 | 
15 | async def main():
16 |     async with DownloaderBilibili() as d_bl, DownloaderDouyin() as d_dy, DownloaderCctv() as d_tv:
17 |         await asyncio.gather(
18 |             d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999),
19 |             d_dy.get_video('https://www.douyin.com/video/7132430286415252773'),
20 |             d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999)
21 |         )
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     asyncio.run(main())
26 | 


--------------------------------------------------------------------------------
/examples/multi_type_tasks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 你可以组合下载器返回的协程对象，利用gather并发执行他们，他们执行的并发度收到下载器对象的严格约束，因此不会对服务器造成意想不到的负担。
 3 | 
 4 | You can combine coroutine objects returned by the downloader and use gather to execute them concurrently.
 5 | The concurrency is strictly constrained by the downloader object, so it will not cause unexpected burden on
 6 | the site server.
 7 | """
 8 | import asyncio
 9 | from bilix.sites.bilibili import DownloaderBilibili
10 | 
11 | 
12 | async def main():
13 |     d = DownloaderBilibili(video_concurrency=5, part_concurrency=10)
14 |     cor1 = d.get_series(
15 |         'https://www.bilibili.com/bangumi/play/ss28277?spm_id_from=333.337.0.0',
16 |         quality=999)
17 |     cor2 = d.get_up(url_or_mid='436482484', quality=999)
18 |     cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122?from_spmid=666.4.0.0', quality=999)
19 |     await asyncio.gather(cor1, cor2, cor3)
20 |     await d.aclose()
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     asyncio.run(main())
25 | 


--------------------------------------------------------------------------------
/examples/use_of_api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | bilix 提供了各个网站的api，如果你有需要当然可以使用，并且它们都是异步的
 3 | 
 4 | bilix provides api for various websites. You can use them if you need, and they are asynchronous
 5 | """
 6 | import asyncio
 7 | 
 8 | from bilix.sites.bilibili import api
 9 | from httpx import AsyncClient
10 | 
11 | 
12 | async def main():
13 |     # 需要先实例化一个用来进行http请求的client
14 |     # first we should initialize a http client
15 |     client = AsyncClient(**api.dft_client_settings)
16 |     data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849')
17 |     print(data)
18 | 
19 | 
20 | asyncio.run(main())
21 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "bilix"
 7 | dynamic = ["version"]
 8 | description = "⚡️Lightning-fast asynchronous download tool for bilibili and more"
 9 | readme = "README.md"
10 | license = "Apache-2.0"
11 | requires-python = ">=3.8"
12 | authors = [
13 |     { name = "HFrost0", email = "hhlfrost@gmail.com" },
14 | ]
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "Programming Language :: Python :: 3 :: Only",
18 |     "Programming Language :: Python :: 3.8",
19 |     "Programming Language :: Python :: 3.9",
20 |     "Programming Language :: Python :: 3.10",
21 |     "Programming Language :: Python :: 3.11",
22 |     "Programming Language :: Python :: 3.12",
23 | ]
24 | dependencies = [
25 |     "aiofiles>=0.8.0",
26 |     "anyio",
27 |     "danmakuC>=0.3.5",
28 |     "bs4",
29 |     "click>=8.0.3",
30 |     "httpx[http2]>=0.23.3",
31 |     "json5",
32 |     "m3u8>=3.5.0",
33 |     "pycryptodome",
34 |     "pydantic>=2.5.3",
35 |     "rich",
36 |     "browser_cookie3>=0.17.1",
37 |     "pymp4>=1.2.0",
38 | ]
39 | 
40 | [project.scripts]
41 | bilix = "bilix.cli.main:main"
42 | 
43 | [project.urls]
44 | Homepage = "https://github.com/HFrost0/bilix"
45 | 
46 | [tool.hatch.version]
47 | path = "bilix/__init__.py"
48 | 
49 | [tool.hatch.build.targets.sdist]
50 | include = [
51 |     "/bilix",
52 | ]
53 | 


--------------------------------------------------------------------------------