├── .github └── workflows │ ├── doc-deploy.yml │ ├── python-app.yml │ └── python-publish.yml ├── .gitignore ├── CONTRIBUTING.md ├── CONTRIBUTING_EN.md ├── LICENSE ├── README.md ├── bilix ├── __init__.py ├── __main__.py ├── _process.py ├── cli │ ├── assign.py │ └── main.py ├── download │ ├── base_downloader.py │ ├── base_downloader_m3u8.py │ ├── base_downloader_part.py │ └── utils.py ├── exception.py ├── ffmpeg.py ├── log.py ├── progress │ ├── abc.py │ ├── cli_progress.py │ └── ws_progress.py ├── sites │ ├── bilibili │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ ├── downloader.py │ │ ├── downloader_test.py │ │ ├── informer.py │ │ ├── informer_test.py │ │ ├── utils.py │ │ └── utils_test.py │ ├── cctv │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ └── downloader.py │ ├── douyin │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ ├── downloader.py │ │ └── downloader_test.py │ ├── hanime1 │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ └── downloader.py │ ├── jable │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ └── downloader.py │ ├── tiktok │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ ├── downloader.py │ │ └── downloader_test.py │ ├── yhdmp │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ ├── downloader.py │ │ └── yhdmp.js │ ├── yinghuacd │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ └── downloader.py │ └── youtube │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ └── downloader.py └── utils.py ├── docs ├── .vitepress │ ├── config.ts │ └── theme │ │ ├── index.ts │ │ └── style │ │ └── var.css ├── advance_guide.md ├── api_examples.md ├── async.md ├── download_examples.md ├── en │ ├── advance_guide.md │ ├── api_examples.md │ ├── async.md │ ├── download_examples.md │ ├── index.md │ ├── install.md │ ├── more.md │ └── quickstart.md ├── index.md ├── install.md ├── more.md ├── package-lock.json ├── package.json └── quickstart.md ├── examples ├── a_very_simple_example.py ├── download_by_timerange.py ├── limit_download_rate.py ├── multi_site_download_same_time.py ├── multi_type_tasks.py └── use_of_api.py └── pyproject.toml /.github/workflows/doc-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Document Deploy 2 | on: 3 | workflow_dispatch: { } 4 | push: 5 | paths: 6 | - 'docs/**' 7 | branches: 8 | - master 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | pages: write 14 | id-token: write 15 | environment: 16 | name: github-pages 17 | url: ${{ steps.deployment.outputs.page_url }} 18 | steps: 19 | - uses: actions/checkout@v3 20 | with: 21 | fetch-depth: 0 22 | - uses: actions/setup-node@v3 23 | with: 24 | node-version: 16 25 | cache: 'npm' 26 | cache-dependency-path: docs/package-lock.json 27 | - name: Install dependencies and build 28 | run: | 29 | npm ci 30 | npm run docs:build 31 | working-directory: docs 32 | - uses: actions/configure-pages@v2 33 | - uses: actions/upload-pages-artifact@v1 34 | with: 35 | path: docs/.vitepress/dist 36 | - name: Deploy 37 | id: deployment 38 | uses: actions/deploy-pages@v1 39 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | paths: 9 | - '.github/workflows/python-app.yml' 10 | - 'bilix/**' 11 | - 'pyproject.toml' 12 | branches: [ "master" ] 13 | pull_request: 14 | paths: 15 | - '.github/workflows/python-app.yml' 16 | - 'bilix/**' 17 | - 'pyproject.toml' 18 | branches: [ "master" ] 19 | 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | build: 25 | runs-on: ubuntu-latest 26 | strategy: 27 | # You can use PyPy versions in python-version. 28 | # For example, pypy-2.7 and pypy-3.8 29 | matrix: 30 | python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] 31 | 32 | steps: 33 | - uses: actions/checkout@v3 34 | - name: Set up Python ${{ matrix.python-version }} 35 | uses: actions/setup-python@v4 36 | with: 37 | python-version: ${{ matrix.python-version }} 38 | - name: Install dependencies 39 | run: | 40 | python -m pip install --upgrade pip 41 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 42 | pip install -e . 43 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | .fleet 4 | .pytest_cache 5 | videos 6 | __pycache__/ 7 | *.egg-info/ 8 | *.pyc 9 | venv*/ 10 | build/ 11 | dist/ 12 | docs/.vitepress/dist 13 | docs/.vitepress/cache 14 | node_modules 15 | .venv 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # bilix 开发指南 2 | 3 | 感谢你对贡献bilix有所兴趣,在你开始之前可以阅读下面的一些提示。请注意,bilix正快速迭代, 4 | 如果你在阅读本文档时发现有些内容已经过时,请以master分支的代码为准。 5 | 6 | # 开始之前 7 | 8 | 在一切开始之前,你需要先 **fork** 本仓库,然后clone你fork的仓库到你的本地: 9 | 10 | ```shell 11 | git clone https://github.com/your_user_name/bilix 12 | ``` 13 | 14 | 拉取至本地后,我**建议**你在独立的python环境中进行测试和开发,确认后进行本地源码可编辑安装: 15 | 16 | ```shell 17 | pip install -e . 18 | ``` 19 | 20 | 试试bilix命令能否正常执行。通过测试了?至此,你可以在本地开发bilix了🍻 21 | 22 | # bilix 结构 23 | 24 | 在动手改动代码之前你需要对bilix的结构有一定的了解,下面是bilix的大致目录和各模块相应功能: 25 | 26 | ```text 27 | bilix 28 | ├── __init__.py 29 | ├── __main__.py 30 | ├── _process.py # 多进程相关 31 | ├── cli 32 | │   ├── assign.py # 分配任务,动态导入相关 33 | │   └── main.py # 命令行入口 34 | ├── download 35 | │   ├── base_downloader.py 36 | │   ├── base_downloader_m3u8.py # 基础m3u8下载器 37 | │   ├── base_downloader_part.py # 基础分段文件下载器 38 | │   └── utils.py # 下载相关的一些工具函数 39 | ├── exception.py 40 | ├── log.py 41 | ├── progress 42 | │   ├── abc.py # 进度条抽象类 43 | │   ├── cli_progress.py # 命令行进度条 44 | │   └── ws_progress.py 45 | ├── serve 46 | │   ├── __init__.py 47 | │   ├── app.py 48 | │   ├── auth.py 49 | │   ├── serve.py 50 | │   └── user.py 51 | ├── sites # 站点扩展目录,稍后介绍 52 | └── utils.py # 通用工具函数 53 | ``` 54 | 55 | ## 基础下载器 56 | 57 | bilix在`bilix.download`中提供了两种基础下载器,m3u8下载器和分段文件下载器。 58 | 它们基于`httpx`乃至更底层的`asyncio`及IO多路复用,并且集成了速度控制,并发控制,断点续传,时间段切片,进度条显示等许多实用功能。 59 | bilix的站点扩展下载功能都将基于这些基础下载器完成,基础下载器本身也提供cli服务 60 | 61 | ## 下载器是如何提供cli服务的 62 | 63 | 在bilix中,一个类只要实现了`handle`方法,就可以被注册到命令行(cli)中,`handle`方法的函数签名为 64 | 65 | ```python 66 | @classmethod 67 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict): 68 | ... 69 | ``` 70 | 71 | handle函数的实现应该满足下面三个原则: 72 | 73 | 1. 如果类根据`method` `keys` `options`认为自己不应该承担下载任务,`handle`函数应该返回`None` 74 | 2. 如果类可以承担任务,但发现`method`不在自己的可接受范围内,应该抛出`HandleMethodError`异常 75 | 3. 如果类可以承担任务,且`method`在自己的可接受范围内,应该返回两个值,第一个值为下载器实例,第二个值为下载coroutine 76 | 77 | Q:🙋为什么我看到有的下载器返回的是类本身,以及下载函数对象? 78 | 79 | ```python 80 | @classmethod 81 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict): 82 | if method == 'f' or method == 'get_file': 83 | return cls, cls.get_file 84 | ``` 85 | 86 | A:为了偷懒,如果返回值是类以及下载函数对象,将根据命令行参数及type hint自动组装为实例和coroutine, 87 | 适用于当命令行options的名字和方法,类参数名字、类型一致的情况 88 | 89 | 其实`handle`函数给你了较大的自由,你可以根据自己的需求,自由的组合出适合你的下载器的cli服务 90 | 91 | ## 如何快速添加一个站点的支持 92 | 93 | 在`bilix/sites`下,已经有一些站点的支持,如果你想要添加一个新的站点支持,可以按照下面的步骤进行: 94 | 95 | 1. 在`sites`文件夹下新建一个站点文件夹,例如`example` 96 | 2. 在`example`文件夹下添加站点的api模块`api.py`,仿照其他站点的格式实现从输入网页url到输出视频url,视频title的各种api 97 | 3. 在`example`文件夹下添加站点api模块的测试`api_test.py`,让大家随时测试站点是否可用 98 | 4. 在`example`文件夹下添加站点的下载器`donwloader.py`,定义`DownloaderExample` 99 | 类,根据该站点使用的传输方法选择相应的`BaseDownloader`进行继承,然后在类中定义好下载视频的方法,并实现`handle` 100 | 方法。另外你还可以添加`downloader_test.py`来验证你的下载器是否可用 101 | 5. 在`example`文件夹下添加`__init__.py`,将`DownloaderExample`类导入,并且在`__all__`中添加`DownloaderExample`以方便bilix找到你的下载器 102 | 103 | 搞定,使用bilix命令测试一下吧 104 | 105 | 当前已经有其他开发者为bilix对其他站点的适配做出了贡献🎉, 106 | 或许被接受的[New site PR](https://github.com/HFrost0/bilix/pulls?q=is%3Apr+is%3Aclosed+label%3A%22New+site%22)也能为你提供帮助 107 | 108 | -------------------------------------------------------------------------------- /CONTRIBUTING_EN.md: -------------------------------------------------------------------------------- 1 | # Development guide of bilix 2 | 3 | Thank you for your interest in contributing to bilix. Before you start, you can read some tips below. 4 | Please note that bilix is rapidly iterating, if you find some content outdated while reading this document, 5 | please refer to the code of the master branch. 6 | 7 | # Before starting 8 | 9 | Before everything starts, you need to first **fork** this repository, and then clone your fork: 10 | 11 | ```shell 12 | git clone https://github.com/your_user_name/bilix 13 | ``` 14 | 15 | After clone, I **recommend** you to test and develop in an independent python environment, 16 | and then perform local source editable installation after that: 17 | 18 | ```shell 19 | pip install -e . 20 | ``` 21 | 22 | Try whether the `bilix` command can be executed normally. Passed the test? At this point, 23 | you can develop bilix locally🍻 24 | 25 | # Structure of bilix 26 | 27 | Before making any changes to the code, you need to have some understanding of the structure of bilix. 28 | 29 | ```text 30 | bilix 31 | ├── __init__.py 32 | ├── __main__.py 33 | ├── _process.py # related to multiprocessing 34 | ├── cli 35 | │   ├── assign.py # assign tasks, dynamically import related 36 | │   └── main.py # command line entry 37 | ├── download 38 | │   ├── base_downloader.py 39 | │   ├── base_downloader_m3u8.py # basic m3u8 downloader 40 | │   ├── base_downloader_part.py # basic segmented file downloader 41 | │   └── utils.py # some utils for download 42 | ├── exception.py 43 | ├── log.py 44 | ├── progress 45 | │   ├── abc.py # abstract class of progress 46 | │   ├── cli_progress.py # progress for cli 47 | │   └── ws_progress.py 48 | ├── serve 49 | │   ├── __init__.py 50 | │   ├── app.py 51 | │   ├── auth.py 52 | │   ├── serve.py 53 | │   └── user.py 54 | ├── sites # site support 55 | └── utils.py # some utils 56 | ``` 57 | 58 | # BaseDownloader 59 | 60 | bilix provides two basic downloaders in `bilix.download`, m3u8 downloader and content range file downloader. 61 | They are based on `httpx` and even lower-level `asyncio` and IO multiplexing, and integrate many practical functions 62 | such as speed control, concurrency control, download resume, time range clip, and progress bar display. 63 | The site extension of bilix will be based on these basic downloaders, and the basic downloaders 64 | themselves also provide cli services 65 | 66 | 67 | # How does the downloader provide cli service 68 | 69 | In bilix, as long as a class implements the `handle` method, it can be registered in the command line interface (cli). 70 | The function signature of the `handle` method is 71 | 72 | ```python 73 | @classmethod 74 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict): 75 | ... 76 | ``` 77 | 78 | The implementation of the `handle` function should meet the following three principles: 79 | 80 | 1. If the class thinks that it should not be assigned the download task according to `method` `keys` `options`, the `handle` function should return `None` 81 | 2. If the class can be assigned the task, but finds that the `method` is not within its acceptable range, it should raise a `HandleMethodError` exception 82 | 3. If the class can handle the task, and `method` is within its acceptable range, it should return two values, the first value is the downloader instance, and the second value is the download coroutine 83 | 84 | Q: 🙋Why do I see that some downloaders return the class itself and the download function object? 85 | 86 | ```python 87 | @classmethod 88 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict): 89 | if method == 'f' or method == 'get_file': 90 | return cls, cls.get_file 91 | ``` 92 | 93 | A: Just for easy, if the return value is a class and the function object, it will be automatically assembled into an 94 | instance and coroutine according to the command line arguments, options and type hint. 95 | 96 | 97 | # How to add support for a site 98 | 99 | Under `bilix/sites`, there are already some sites supported, if you want to add a new site support, you can follow the steps below: 100 | 101 | 1. Create a new site folder under the `sites` folder, such as `example` 102 | 2. Add the site's api module `api.py` under the `example` folder, and follow the format of other sites to implement various APIs from input webpage url to output video url and video title 103 | 3. Add the site api module test `api_test.py` under the `example` folder, so that everyone can test whether the site is available at any time 104 | 4. Add the site downloader `donwloader.py` under the `example` folder, define `DownloaderExample` 105 | Class, select the corresponding `BaseDownloader` to inherit according to the site, then define the method of downloading the video in the class, and implement `handle` 106 | method. 107 | 5. Add `__init__.py` under the `example` folder, import `DownloaderExample` class, and add `DownloaderExample` in `__all__` to facilitate bilix to find your downloader 108 | 109 | Okay, let's test it 110 | 111 | At present, other developers have contributed to the extension of bilix to other sites🎉, 112 | Maybe the accepted [New site PR](https://github.com/HFrost0/bilix/pulls?q=is%3Apr+is%3Aclosed+label%3A%22New+site%22) can also help you 113 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [HFrost0] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bilix 2 | 3 | [![GitHub license](https://img.shields.io/github/license/HFrost0/bilix?style=flat-square)](https://github.com/HFrost0/bilix/blob/master/LICENSE) 4 | ![PyPI](https://img.shields.io/pypi/v/bilix?style=flat-square&color=blue) 5 | ![GitHub commit activity](https://img.shields.io/github/commit-activity/m/HFrost0/bilix) 6 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/bilix?label=pypi%20downloads&style=flat-square) 7 | 8 | ⚡️Lightning-fast asynchronous download tool for bilibili and more 9 | 10 | 11 | ## Features 12 | 13 | ### ⚡️ Fast & Async 14 | 15 | Asynchronous high concurrency support, controllable concurrency and speed settings. 16 | 17 | ### 😉 Lightweight & User-friendly 18 | 19 | Lightweight user-friendly CLI with progress notification, focusing on core functionality. 20 | 21 | ### 📝 Fully-featured 22 | 23 | Submissions, anime, TV Series, video clip, audio, favourite, danmaku ,cover... 24 | 25 | ### 🔨 Extensible 26 | 27 | Extensible Python module suitable for more download scenarios. 28 | 29 | ## Install 30 | 31 | ```shell 32 | pip install bilix 33 | ``` 34 | 35 | for macOS, you can also install `bilix` by `brew` 36 | 37 | ```shell 38 | brew install bilix 39 | ``` 40 | 41 | ## Usage Example 42 | 43 | * If you prefer to use command line interface (cli) 44 | 45 | ```shell 46 | bilix v 'url' 47 | ``` 48 | 49 | > `v` is a method short alias for `get_video` 50 | 51 | * If you prefer to code with python 52 | 53 | ```python 54 | from bilix.sites.bilibili import DownloaderBilibili 55 | import asyncio 56 | 57 | 58 | async def main(): 59 | async with DownloaderBilibili() as d: 60 | await d.get_video('url') 61 | 62 | 63 | asyncio.run(main()) 64 | ``` 65 | 66 | ## Community 67 | 68 | If you find any bugs or other issues, feel free to raise an [Issue](https://github.com/HFrost0/bilix/issues). 69 | 70 | If you have new ideas or new feature requests👍,welcome to participate in 71 | the [Discussion](https://github.com/HFrost0/bilix/discussions) 72 | 73 | If you find this project helpful, you can support the author by [Star](https://github.com/HFrost0/bilix/stargazers)🌟 74 | 75 | ## Contribute 76 | 77 | ❤️ Welcome! Details can be found in [Contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING_EN.md) 78 | -------------------------------------------------------------------------------- /bilix/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lighting-fast async download tool inspired by w 3 | """ 4 | 5 | __version__ = "0.18.9" 6 | __url__ = "https://github.com/HFrost0/bilix" 7 | -------------------------------------------------------------------------------- /bilix/__main__.py: -------------------------------------------------------------------------------- 1 | from bilix.cli.main import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /bilix/_process.py: -------------------------------------------------------------------------------- 1 | import signal 2 | import sys 3 | from concurrent.futures import ProcessPoolExecutor 4 | from functools import partial 5 | 6 | 7 | def _init(): 8 | def shutdown(*args): 9 | sys.exit(0) 10 | 11 | signal.signal(signal.SIGINT, shutdown) 12 | 13 | 14 | def singleton(cls): 15 | _instance = {} 16 | 17 | def inner(*args, **kwargs): 18 | if cls not in _instance: 19 | _instance[cls] = cls(*args, **kwargs) 20 | return _instance[cls] 21 | 22 | return inner 23 | 24 | 25 | # singleton ProcessPoolExecutor to avoid recreation in spawn process 26 | SingletonPPE = singleton(partial(ProcessPoolExecutor, initializer=_init)) 27 | 28 | if __name__ == '__main__': 29 | p = SingletonPPE(max_workers=5) 30 | p.shutdown() 31 | -------------------------------------------------------------------------------- /bilix/cli/assign.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import inspect 3 | import re 4 | import time 5 | from functools import wraps 6 | from pathlib import Path 7 | from typing import Callable, Union, Tuple 8 | from importlib import import_module 9 | 10 | from bilix.exception import HandleMethodError, HandleError 11 | from bilix.log import logger 12 | 13 | 14 | def kwargs_filter(obj: Union[type, Callable], kwargs: dict): 15 | """ 16 | 17 | :param obj: 18 | :param kwargs: 19 | :return: 20 | """ 21 | sig = inspect.signature(obj) 22 | obj_require = set(sig.parameters.keys()) 23 | 24 | def check(k): 25 | if k in obj_require: 26 | p = sig.parameters[k] 27 | # check type hint 28 | try: 29 | if p.annotation is inspect.Signature.empty or \ 30 | isinstance(kwargs[k], p.annotation): 31 | return True 32 | else: 33 | logger.debug(f"kwarg {k}:{kwargs[k]} has been drop due to type hint missmatch") 34 | return False 35 | except TypeError: # https://peps.python.org/pep-0604/#isinstance-and-issubclass 36 | # lower than 3.10, Union 37 | # TypeError: Subscripted generics cannot be used with class and instance checks 38 | return True 39 | return False 40 | 41 | kwargs = {k: kwargs[k] for k in filter(check, kwargs)} 42 | return kwargs 43 | 44 | 45 | def module_handle_funcs(module): 46 | """find and yield all handle func in module""" 47 | attrs = getattr(module, '__all__', None) 48 | attrs = attrs or dir(module) 49 | for attr_name in attrs: 50 | if attr_name.startswith('__'): 51 | continue 52 | executor_cls = getattr(module, attr_name) 53 | if not inspect.isclass(executor_cls): 54 | continue 55 | handle_func = getattr(executor_cls, 'handle', None) 56 | if handle_func is None: 57 | continue 58 | yield handle_func 59 | 60 | 61 | def auto_assemble(handle_func): 62 | @wraps(handle_func) 63 | def wrapped(cls, method: str, keys: Tuple[str, ...], options: dict): 64 | res = handle_func(cls, method, keys, options) 65 | if res is NotImplemented or res is None: 66 | return res 67 | executor, cor = res 68 | # handle func return class instead of instance 69 | if inspect.isclass(executor): 70 | kwargs = kwargs_filter(executor, options) 71 | executor = executor(**kwargs) 72 | logger.debug(f"auto assemble {executor} by {kwargs}") 73 | # handle func return async function instead of coroutine 74 | if inspect.iscoroutinefunction(cor): 75 | kwargs = kwargs_filter(cor, options) 76 | cors = [] 77 | for key in keys: 78 | if not hasattr(cor, '__self__'): # coroutine function has not bound to instance 79 | cors.append(cor(executor, key, **kwargs)) # bound executor to self 80 | else: 81 | cors.append(cor(key, **kwargs)) 82 | logger.debug(f"auto assemble {cor} by {kwargs}") 83 | cor = asyncio.gather(*cors) 84 | return executor, cor 85 | 86 | return wrapped 87 | 88 | 89 | def longest_common_len(str1, str2): 90 | m, n = len(str1), len(str2) 91 | dp = [[0] * (n + 1) for _ in range(m + 1)] 92 | max_length = 0 93 | for i in range(1, m + 1): 94 | for j in range(1, n + 1): 95 | if str1[i - 1] == str2[j - 1]: 96 | dp[i][j] = dp[i - 1][j - 1] + 1 97 | max_length = max(max_length, dp[i][j]) 98 | return max_length 99 | 100 | 101 | def find_sites(): 102 | sites_path = Path(__file__).parent.parent / 'sites' 103 | for site in sites_path.iterdir(): 104 | if not site.is_dir() or not (site / '__init__.py').exists(): 105 | continue 106 | yield site 107 | 108 | 109 | def assign(cli_kwargs): 110 | method = cli_kwargs.pop('method') 111 | keys = cli_kwargs.pop('keys') 112 | options = cli_kwargs 113 | modules = [ 114 | # path, cmp_key 115 | ('download.base_downloader_m3u8', 'm3u8'), 116 | ('download.base_downloader_part', 'file'), 117 | ] 118 | for site in find_sites(): 119 | modules.append((f"sites.{site.name}", site.name)) 120 | pattern = re.compile(r"https?://(?:[\w-]*\.)?([\w-]+)\.([\w-]+)") 121 | if g := pattern.search(keys[0]): 122 | cmp_base = g.group(1) 123 | else: 124 | cmp_base = keys[0] 125 | 126 | def key(x: Tuple[str, str]): 127 | if x[0].startswith("sites"): 128 | return longest_common_len(cmp_base, x[-1]) 129 | else: # base_downloader 130 | return longest_common_len(method, x[-1]) 131 | 132 | for module, _ in sorted(modules, key=key, reverse=True): 133 | a = time.time() 134 | try: 135 | module = import_module(f"bilix.{module}") 136 | except ImportError as e: 137 | logger.debug(f"duo to ImportError <{e}>, skip ") 138 | continue 139 | logger.debug(f"import cost {time.time() - a:.6f} s ") 140 | exc = None 141 | for handle_func in module_handle_funcs(module): 142 | try: 143 | res = handle_func(method, keys, options) 144 | except HandleMethodError as e: 145 | exc = e 146 | continue 147 | if res is NotImplemented or res is None: 148 | continue 149 | executor, cor = res 150 | logger.debug(f"Assign to {executor.__class__.__name__}") 151 | return executor, cor 152 | if exc is not None: # for the module, some handler can handle, but method miss match 153 | raise exc 154 | raise HandleError(f"Can't find any handler for method: '{method}' keys: {keys}") 155 | -------------------------------------------------------------------------------- /bilix/cli/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import typing 3 | from pathlib import Path 4 | import click 5 | import rich 6 | from rich.panel import Panel 7 | from rich.table import Table 8 | 9 | from .. import __version__ 10 | from ..log import logger 11 | from .assign import assign 12 | from ..progress.cli_progress import CLIProgress 13 | from ..utils import parse_bytes_str, s2t 14 | from ..exception import HandleError 15 | 16 | 17 | def handle_help(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> None: 18 | if not value or ctx.resilient_parsing: 19 | return 20 | print_help() 21 | ctx.exit() 22 | 23 | 24 | def handle_version(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> None: 25 | if not value or ctx.resilient_parsing: 26 | return 27 | print(f"Version {__version__}") 28 | ctx.exit() 29 | 30 | 31 | def handle_debug(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ): 32 | if not value or ctx.resilient_parsing: 33 | return 34 | from rich.traceback import install 35 | install() 36 | logger.setLevel('DEBUG') 37 | logger.debug("Debug on, more information will be shown") 38 | 39 | 40 | def print_help(): 41 | console = rich.console.Console() 42 | console.print(f"\n[bold]bilix {__version__}", justify="center") 43 | console.print("⚡️快如闪电的bilibili下载工具,基于Python现代Async特性,高速批量下载整部动漫,电视剧,up投稿等\n", 44 | justify="center") 45 | console.print("使用方法: bilix [cyan] [OPTIONS][/cyan] ", justify="left") 46 | table = Table.grid(padding=1, pad_edge=False) 47 | table.add_column("Parameter", no_wrap=True, justify="left", style="bold") 48 | table.add_column("Description") 49 | 50 | table.add_row( 51 | "[cyan]", 52 | 'get_series 或 s: 获取整个系列的视频(包括多p投稿,动漫,电视剧,电影,纪录片),也可以下载单个视频\n' 53 | 'get_video 或 v: 获取特定的单个视频,在用户不希望下载系列其他视频的时候可以使用\n' 54 | 'get_up 或 up: 获取某个up的所有投稿视频,支持数量选择,关键词搜索,排序\n' 55 | 'get_cate 或 cate: 获取分区视频,支持数量选择,关键词搜索,排序\n' 56 | 'get_favour 或 fav: 获取收藏夹内视频,支持数量选择,关键词搜索\n' 57 | 'get_collect 或 col:获取合集或视频列表内视频\n' 58 | 'info: 打印url所属资源的详细信息(例如点赞数,画质,编码格式等)' 59 | ) 60 | table.add_row( 61 | "[cyan][/cyan]", 62 | '如使用get_video/get_series,填写视频的url\n' 63 | '如使用get_up,填写b站用户空间页url或用户id\n' 64 | '如使用get_cate,填写分区名称\n' 65 | '如使用get_favour,填写收藏夹页url或收藏夹id\n' 66 | '如使用get_collect,填写合集或者视频列表详情页url\n' 67 | '如使用info,填写任意资源url' 68 | ) 69 | console.print(table) 70 | # console.rule("OPTIONS参数") 71 | table = Table(highlight=True, box=None, show_header=False) 72 | table.add_column("OPTIONS", no_wrap=True, justify="left", style="bold") 73 | table.add_column("type", no_wrap=True, justify="left", style="bold") 74 | table.add_column("Description", ) 75 | table.add_row( 76 | "-d --dir", 77 | '[dark_cyan]str', 78 | "文件的下载目录,默认当前路径下的videos文件夹下,不存在会自动创建" 79 | ) 80 | table.add_row( 81 | "-q --quality", 82 | '[dark_cyan]int | str', 83 | "视频画面质量,默认0为最高画质,越大画质越低,超出范围时自动选最低画质,或者直接使用字符串指定'1080p'等名称" 84 | ) 85 | table.add_row( 86 | "-vc --video-con", 87 | '[dark_cyan]int', 88 | "控制最大同时下载的视频数量,理论上网络带宽越高可以设的越高,默认3", 89 | ) 90 | table.add_row( 91 | "-pc --part-con", 92 | '[dark_cyan]int', 93 | "控制每个媒体的分段并发数,默认10", 94 | ) 95 | table.add_row( 96 | '--cookie', 97 | '[dark_cyan]str', 98 | '有条件的用户可以提供大会员的SESSDATA来下载会员视频' 99 | ) 100 | table.add_row( 101 | "-fb --from-browser", '[dark_cyan]str', 102 | '从哪个浏览器中导入cookies,例如safari,chrome,edge...默认无', 103 | ) 104 | table.add_row( 105 | '--days', 106 | '[dark_cyan]int', 107 | '过去days天中的结果,默认为7,仅get_up, get_cate时生效' 108 | ) 109 | table.add_row( 110 | "-n --num", 111 | '[dark_cyan]int', 112 | "下载前多少个投稿,仅get_up,get_cate,get_favor时生效", 113 | ) 114 | table.add_row( 115 | "--order", 116 | '[dark_cyan]str', 117 | '何种排序,pubdate发布时间(默认), click播放数,scores评论数,stow收藏数,coin硬币数,dm弹幕数, 仅get_up, get_cate时生效', 118 | ) 119 | table.add_row( 120 | "--keyword", 121 | '[dark_cyan]str', 122 | '搜索关键词, 仅get_up, get_cate,get_favor时生效', 123 | ) 124 | table.add_row( 125 | "-ns --no-series", '', 126 | '只下载搜索结果每个视频的第一p,仅get_up,get_cate,get_favour时生效', 127 | ) 128 | table.add_row( 129 | "-nh --no-hierarchy", '', 130 | '不使用层次目录,所有视频统一保存在下载目录下' 131 | ) 132 | table.add_row( 133 | "--image", '', 134 | '下载视频封面' 135 | ) 136 | table.add_row( 137 | "--subtitle", '', 138 | '下载srt字幕', 139 | ) 140 | table.add_row( 141 | "--dm", '', 142 | '下载弹幕', 143 | ) 144 | table.add_row( 145 | "-oa --only-audio", '', 146 | '仅下载音频,下载的音质固定为最高音质', 147 | ) 148 | table.add_row( 149 | "-p", '[dark_cyan]int, int', 150 | '下载集数范围,例如-p 1 3 只下载P1至P3,仅get_series时生效', 151 | ) 152 | table.add_row( 153 | "--codec", '[dark_cyan]str', 154 | '视频及音频编码(可使用info查看后填写,使用:分隔),可使用完整名称(例如avc1.640032,fLaC)或部分名称(例如avc,hev)', 155 | ) 156 | table.add_row( 157 | "-sl --speed-limit", '[dark_cyan]str', 158 | '最大下载速度,默认无限制。例如:-sl 1.5MB', 159 | ) 160 | table.add_row( 161 | "-sr --stream-retry", '[dark_cyan]int', 162 | '下载过程中发生网络错误后最大重试数,默认5', 163 | ) 164 | table.add_row( 165 | "-tr --time-range", '[dark_cyan]str', 166 | r'下载视频的时间范围,格式如 h:m:s-h:m:s 或 s-s,默认无,仅get_video时生效', 167 | ) 168 | table.add_row("-h --help", '', "帮助信息") 169 | table.add_row("-v --version", '', "版本信息") 170 | table.add_row("--debug", '', "显示debug信息") 171 | console.print(Panel(table, border_style="dim", title="Options", title_align="left")) 172 | 173 | 174 | class BasedQualityType(click.ParamType): 175 | name = "quality" 176 | 177 | def convert(self, value, param, ctx): 178 | try: 179 | value = int(value) 180 | except ValueError: 181 | return value # str 182 | if value in {1080, 720, 480, 360}: 183 | return str(value) 184 | else: 185 | return value # relative choice like 0, 1, 2, 999... 186 | 187 | 188 | class BasedSpeedLimit(click.ParamType): 189 | name = "speed_limit" 190 | 191 | def convert(self, value, param, ctx): 192 | if value is not None: 193 | return parse_bytes_str(value) 194 | 195 | 196 | class BasedTimeRange(click.ParamType): 197 | name = "time_range" 198 | 199 | def convert(self, value, param, ctx): 200 | start_time, end_time = map(s2t, value.split('-')) 201 | return start_time, end_time 202 | 203 | 204 | @click.command(add_help_option=False) 205 | @click.argument("method", type=str) 206 | @click.argument("keys", type=str, nargs=-1, required=True) 207 | @click.option( 208 | "-d", 209 | "--dir", 210 | "path", 211 | type=Path, 212 | default='videos', 213 | ) 214 | @click.option( 215 | '-q', 216 | '--quality', 217 | 'quality', 218 | type=BasedQualityType(), 219 | default=0, # default relatively choice 220 | ) 221 | @click.option( 222 | '-vc', 223 | '--video-con', 224 | 'video_concurrency', 225 | type=int, 226 | default=3, 227 | ) 228 | @click.option( 229 | '-pc', 230 | "--part-con", 231 | "part_concurrency", 232 | type=int, 233 | default=10, 234 | ) 235 | @click.option( 236 | '--cookie', 237 | 'cookie', 238 | type=str, 239 | ) 240 | @click.option( 241 | '--days', 242 | 'days', 243 | type=int, 244 | default=7, 245 | ) 246 | @click.option( 247 | '-n', 248 | '--num', 249 | type=int, 250 | default=10, 251 | ) 252 | @click.option( 253 | '--order', 254 | 'order', 255 | type=str, 256 | default='pubdate', 257 | ) 258 | @click.option( 259 | '--keyword', 260 | 'keyword', 261 | type=str 262 | ) 263 | @click.option( 264 | '-ns', 265 | '--no-series', 266 | 'series', 267 | is_flag=True, 268 | default=True, 269 | ) 270 | @click.option( 271 | '-nh', 272 | '--no-hierarchy', 273 | 'hierarchy', 274 | is_flag=True, 275 | default=True, 276 | ) 277 | @click.option( 278 | '--image', 279 | 'image', 280 | is_flag=True, 281 | default=False, 282 | ) 283 | @click.option( 284 | '--subtitle', 285 | 'subtitle', 286 | is_flag=True, 287 | default=False, 288 | ) 289 | @click.option( 290 | '--dm', 291 | 'dm', 292 | is_flag=True, 293 | default=False, 294 | ) 295 | @click.option( 296 | '-oa', 297 | '--only-audio', 298 | 'only_audio', 299 | is_flag=True, 300 | default=False, 301 | ) 302 | @click.option( 303 | '-p', 304 | 'p_range', 305 | type=(int, int), 306 | ) 307 | @click.option( 308 | '--codec', 309 | 'codec', 310 | type=str, 311 | default='' 312 | ) 313 | @click.option( 314 | '--speed-limit', 315 | '-sl', 316 | 'speed_limit', 317 | type=BasedSpeedLimit(), 318 | default=None, 319 | ) 320 | @click.option( 321 | '--stream-retry', 322 | '-sr', 323 | 'stream_retry', 324 | type=int, 325 | default=5 326 | ) 327 | @click.option( 328 | '--from-browser', 329 | '-fb', 330 | 'browser', 331 | type=str, 332 | ) 333 | @click.option( 334 | '--time-range', 335 | '-tr', 336 | 'time_range', 337 | type=BasedTimeRange(), 338 | default=None, 339 | ) 340 | @click.option( 341 | '-h', 342 | "--help", 343 | is_flag=True, 344 | is_eager=True, 345 | expose_value=False, 346 | callback=handle_help, 347 | ) 348 | @click.option( 349 | '-v', 350 | "--version", 351 | is_flag=True, 352 | is_eager=True, 353 | expose_value=False, 354 | callback=handle_version, 355 | ) 356 | @click.option( 357 | "--debug", 358 | is_flag=True, 359 | is_eager=True, 360 | expose_value=False, 361 | callback=handle_debug, 362 | ) 363 | def main(**kwargs): 364 | loop = asyncio.new_event_loop() # avoid deprecated warning in 3.11 365 | asyncio.set_event_loop(loop) 366 | logger.debug(f'CLI KEY METHOD and OPTIONS: {kwargs}') 367 | try: 368 | # CLIProgress.switch_theme(gs="cyan", bs="dark_cyan") 369 | CLIProgress.start() # start progress 370 | if not kwargs['path'].exists(): 371 | kwargs['path'].mkdir(parents=True) 372 | logger.info(f'Directory {kwargs["path"]} not exists, auto created') 373 | executor, cor = assign(kwargs) 374 | loop.run_until_complete(cor) 375 | except HandleError as e: # method no match 376 | logger.error(e) 377 | except KeyboardInterrupt: 378 | logger.info('[cyan]提示:用户中断,重复执行命令可继续下载') 379 | finally: 380 | CLIProgress.stop() # stop rich progress to ensure cursor is repositioned 381 | -------------------------------------------------------------------------------- /bilix/download/base_downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import inspect 3 | import logging 4 | import re 5 | import time 6 | from functools import wraps 7 | from typing import Union, Optional, Tuple 8 | from contextlib import asynccontextmanager 9 | from urllib.parse import urlparse 10 | import aiofiles 11 | import httpx 12 | 13 | from bilix.cli.assign import auto_assemble 14 | from bilix.log import logger as dft_logger 15 | from bilix.download.utils import req_retry, path_check 16 | from bilix.progress.abc import Progress 17 | from bilix.progress.cli_progress import CLIProgress 18 | from bilix.exception import HandleMethodError 19 | from pathlib import Path, PurePath 20 | 21 | __all__ = ['BaseDownloader'] 22 | 23 | 24 | class BaseDownloaderMeta(type): 25 | def __new__(cls, name, bases, dct): 26 | dct['_cli_info'] = {} 27 | dct['_cli_map'] = {} 28 | for method_name, method in dct.items(): 29 | if not method_name.startswith('_') and asyncio.iscoroutinefunction(method): 30 | if 'path' in (sig := inspect.signature(method)).parameters: 31 | dct[method_name] = cls.ensure_path(method, sig) 32 | 33 | if cls.check_unique_method(method, bases): 34 | cli_info = cls.parse_cli_doc(method) 35 | if cli_info: 36 | dct['_cli_info'][method] = cli_info 37 | dct['_cli_map'][method_name] = method 38 | if cli_info['short']: 39 | dct['_cli_map'][cli_info['short']] = method 40 | 41 | return super().__new__(cls, name, bases, dct) 42 | 43 | @staticmethod 44 | def check_unique_method(method_name: str, bases: Tuple[type, ...]): 45 | for base in bases: 46 | if method_name in base.__dict__: 47 | return False 48 | return True 49 | 50 | @staticmethod 51 | def parse_cli_doc(func) -> Optional[dict]: 52 | docstring = func.__doc__ 53 | if not docstring or ':cli:' not in docstring: 54 | return 55 | params_matches = re.findall(r":param (\w+): (.+)", docstring) 56 | params = {param: description for param, description in params_matches} 57 | 58 | cli_short_match = re.search(r":cli: short: (\w+)", docstring) 59 | short_name = cli_short_match.group(1) if cli_short_match else None 60 | 61 | return {"short": short_name, "params": params} 62 | 63 | @staticmethod 64 | def ensure_path(func, sig): 65 | path_index = next(i for i, name in enumerate(sig.parameters) if name == 'path') 66 | 67 | @wraps(func) 68 | async def wrapper(*args, **kwargs): 69 | new_args = list(args) 70 | if path_index < len(args) and isinstance(args[path_index], str): 71 | new_args[path_index] = Path(args[path_index]) 72 | elif 'path' in kwargs and isinstance(kwargs['path'], str): 73 | kwargs['path'] = Path(kwargs['path']) 74 | 75 | return await func(*new_args, **kwargs) 76 | 77 | wrapper.__annotations__['path'] = Union[Path, str] 78 | return wrapper 79 | 80 | 81 | class BaseDownloader(metaclass=BaseDownloaderMeta): 82 | pattern: re.Pattern = None 83 | cookie_domain: str = "" 84 | _cli_info: dict 85 | _cli_map: dict 86 | 87 | def __init__( 88 | self, 89 | *, 90 | client: httpx.AsyncClient = None, 91 | browser: str = None, 92 | speed_limit: Union[float, int] = None, 93 | stream_retry: int = 5, 94 | progress: Progress = None, 95 | logger: logging.Logger = None, 96 | ): 97 | """ 98 | 99 | :param client: client used for http request 100 | :param browser: load cookies from which browser 101 | :param speed_limit: global download rate for the downloader, should be a number (Byte/s unit) 102 | :param progress: progress obj 103 | """ 104 | # use cli progress by default 105 | self.progress = progress or CLIProgress() 106 | self.logger = logger or dft_logger 107 | self.client = client if client else httpx.AsyncClient(headers={'user-agent': 'PostmanRuntime/7.29.0'}) 108 | if browser: # load cookies from browser, may need auth 109 | self.update_cookies_from_browser(browser) 110 | assert speed_limit is None or speed_limit > 0 111 | self.speed_limit = speed_limit 112 | self.stream_retry = stream_retry 113 | # active stream number 114 | self._stream_num = 0 115 | 116 | async def __aenter__(self): 117 | await self.client.__aenter__() 118 | return self 119 | 120 | async def __aexit__(self, exc_type, exc_val, exc_tb): 121 | await self.client.__aexit__(exc_type, exc_val, exc_tb) 122 | 123 | async def aclose(self): 124 | """Close transport and proxies for httpx client""" 125 | await self.client.aclose() 126 | 127 | async def get_static(self, url: str, path: Union[str, Path], convert_func=None) -> Path: 128 | """ 129 | 130 | :param url: 131 | :param path: file path without suffix 132 | :param convert_func: function used to convert http bytes content, must be named like ...2... 133 | :return: downloaded file path 134 | """ 135 | # use suffix from convert_func's name 136 | if convert_func: 137 | suffix = '.' + convert_func.__name__.split('2')[-1] 138 | # try to find suffix from url 139 | else: 140 | suffix = PurePath(urlparse(url).path).suffix 141 | path = path.with_name(path.name + suffix) 142 | exist, path = path_check(path) 143 | if exist: 144 | self.logger.info(f'[green]已存在[/green] {path.name}') 145 | return path 146 | res = await req_retry(self.client, url) 147 | content = convert_func(res.content) if convert_func else res.content 148 | async with aiofiles.open(path, 'wb') as f: 149 | await f.write(content) 150 | self.logger.info(f'[cyan]已完成[/cyan] {path.name}') 151 | return path 152 | 153 | @asynccontextmanager 154 | async def _stream_context(self, times: int): 155 | """ 156 | contextmanager to print log, slow down streaming and count active stream number 157 | 158 | :param times: error occur times which is related to sleep time 159 | :return: 160 | """ 161 | self._stream_num += 1 162 | try: 163 | yield 164 | except httpx.HTTPStatusError as e: 165 | if e.response.status_code == 403: 166 | self.logger.warning(f"STREAM slowing down since 403 forbidden {e}") 167 | await asyncio.sleep(10. * (times + 1)) 168 | else: 169 | self.logger.warning(f"STREAM {e}") 170 | await asyncio.sleep(.5 * (times + 1)) 171 | raise 172 | except httpx.TransportError as e: 173 | msg = f'STREAM {e.__class__.__name__} 异常可能由于网络条件不佳或并发数过大导致,若重复出现请考虑降低并发数' 174 | self.logger.warning(msg) if times > 2 else self.logger.debug(msg) 175 | await asyncio.sleep(.1 * (times + 1)) 176 | raise 177 | except Exception as e: 178 | self.logger.warning(f'STREAM Unexpected Exception class:{e.__class__.__name__} {e}') 179 | raise 180 | finally: 181 | self._stream_num -= 1 182 | 183 | @property 184 | def stream_num(self): 185 | """current activate network stream number""" 186 | return self._stream_num 187 | 188 | @property 189 | def chunk_size(self) -> Optional[int]: 190 | if self.speed_limit and self.speed_limit < 1e5: # 1e5 limit bound 191 | # only restrict chunk_size when speed_limit is too low 192 | return int(self.speed_limit * 0.1) # 0.1 delay slope 193 | # default to None setup 194 | return None 195 | 196 | async def _check_speed(self, content_size): 197 | if self.speed_limit and (cur_speed := self.progress.active_speed) > self.speed_limit: 198 | t_tgt = content_size / self.speed_limit * self.stream_num 199 | t_real = content_size / cur_speed 200 | t = t_tgt - t_real 201 | await asyncio.sleep(t) 202 | 203 | def update_cookies_from_browser(self, browser: str): 204 | try: 205 | a = time.time() 206 | import browser_cookie3 207 | f = getattr(browser_cookie3, browser.lower()) 208 | self.logger.debug(f"trying to load cookies from {browser}: {self.cookie_domain}, may need auth") 209 | self.client.cookies.update(f(domain_name=self.cookie_domain)) 210 | self.logger.debug(f"load complete, consumed time: {time.time() - a} s") 211 | except AttributeError: 212 | raise AttributeError(f"Invalid Browser {browser}") 213 | 214 | @classmethod 215 | def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict) -> bool: 216 | """check if the cls can be handled by this downloader""" 217 | if cls.pattern: 218 | return cls.pattern.match(keys[0]) is not None 219 | else: 220 | return method in cls._cli_map 221 | 222 | @classmethod 223 | @auto_assemble 224 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict): 225 | if cls._decide_handle(method, keys, options): 226 | try: 227 | method = cls._cli_map[method] 228 | except KeyError: 229 | raise HandleMethodError(cls, method) 230 | return cls, method 231 | -------------------------------------------------------------------------------- /bilix/download/base_downloader_m3u8.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import uuid 3 | from pathlib import Path, PurePath 4 | from typing import Tuple, Union 5 | from urllib.parse import urlparse 6 | import aiofiles 7 | import httpx 8 | import os 9 | import m3u8 10 | from Crypto.Cipher import AES 11 | from m3u8 import Segment 12 | from bilix.download.base_downloader import BaseDownloader 13 | from bilix.download.utils import path_check, merge_files 14 | from bilix import ffmpeg 15 | from .utils import req_retry 16 | 17 | __all__ = ['BaseDownloaderM3u8'] 18 | 19 | 20 | class BaseDownloaderM3u8(BaseDownloader): 21 | """Base Async http m3u8 Downloader""" 22 | 23 | def __init__( 24 | self, 25 | *, 26 | client: httpx.AsyncClient = None, 27 | browser: str = None, 28 | speed_limit: Union[float, int] = None, 29 | stream_retry: int = 5, 30 | progress=None, 31 | logger=None, 32 | # unique params 33 | part_concurrency: int = 10, 34 | video_concurrency: Union[int, asyncio.Semaphore] = 3, 35 | ): 36 | super(BaseDownloaderM3u8, self).__init__( 37 | client=client, 38 | browser=browser, 39 | stream_retry=stream_retry, 40 | speed_limit=speed_limit, 41 | progress=progress, 42 | logger=logger 43 | ) 44 | self.v_sema = asyncio.Semaphore(video_concurrency) if isinstance(video_concurrency, int) else video_concurrency 45 | self.part_concurrency = part_concurrency 46 | self.decrypt_cache = {} 47 | 48 | async def _decrypt(self, seg: m3u8.Segment, content: bytearray): 49 | async def get_key(): 50 | key_bytes = (await req_retry(self.client, uri)).content 51 | iv = bytes.fromhex(seg.key.iv.replace('0x', '')) if seg.key.iv is not None else \ 52 | seg.custom_parser_values['iv'] 53 | return AES.new(key_bytes, AES.MODE_CBC, iv) 54 | 55 | uri = seg.key.absolute_uri 56 | if uri not in self.decrypt_cache: 57 | self.decrypt_cache[uri] = asyncio.ensure_future(get_key()) 58 | self.decrypt_cache[uri] = await self.decrypt_cache[uri] 59 | elif asyncio.isfuture(self.decrypt_cache[uri]): 60 | await self.decrypt_cache[uri] 61 | cipher = self.decrypt_cache[uri] 62 | return cipher.decrypt(content) 63 | 64 | async def to_invariant_m3u8(self, m3u8_url: str) -> m3u8.M3U8: 65 | res = await req_retry(self.client, m3u8_url, follow_redirects=True) 66 | m3u8_info = m3u8.loads(res.text) 67 | if not m3u8_info.base_uri: 68 | m3u8_info.base_uri = m3u8_url 69 | if m3u8_info.is_variant: 70 | self.logger.debug(f"m3u8 is variant, use first playlist: {m3u8_info.playlists[0].absolute_uri}") 71 | return await self.to_invariant_m3u8(m3u8_info.playlists[0].absolute_uri) 72 | return m3u8_info 73 | 74 | async def get_m3u8_video(self, m3u8_url: str, path: Union[str, Path], time_range: Tuple[int, int] = None) -> Path: 75 | """ 76 | download video from m3u8 url 77 | :cli: short: m3u8 78 | :param m3u8_url: 79 | :param path: file path or file dir, if dir, filename will be set according to m3u8_url 80 | :param time_range: (start, end) in seconds, if provided, only download the clip and add start-end to filename 81 | :return: downloaded file path 82 | """ 83 | if path.is_dir(): 84 | path = (path / PurePath(urlparse(m3u8_url).path).stem).with_suffix('.mp4') 85 | if time_range: 86 | path = path.with_stem(f"{path.stem}-{time_range[0]}-{time_range[1]}") 87 | exist, path = path_check(path) 88 | if exist: 89 | self.logger.info(f"[green]已存在[/green] {path.name}") 90 | return path 91 | async with self.v_sema: 92 | task_id = await self.progress.add_task(total=None, description=path.name) 93 | m3u8_info = await self.to_invariant_m3u8(m3u8_url) 94 | cors = [] 95 | p_sema = asyncio.Semaphore(self.part_concurrency) 96 | total_time = 0 97 | if time_range: 98 | current_time = 0 99 | start_time, end_time = time_range 100 | inside = False 101 | else: 102 | inside = True 103 | for idx, seg in enumerate(m3u8_info.segments): 104 | if time_range: 105 | current_time += seg.duration 106 | if not inside and current_time > start_time: 107 | inside = True 108 | s = seg.duration - (current_time - start_time) 109 | elif current_time > end_time: 110 | break 111 | if inside: 112 | total_time += seg.duration 113 | # https://stackoverflow.com/questions/50628791/decrypt-m3u8-playlist-encrypted-with-aes-128-without-iv 114 | if seg.key and seg.key.iv is None: 115 | seg.custom_parser_values['iv'] = idx.to_bytes(16, 'big') 116 | cors.append(self._get_seg(seg, path.with_name(f"{path.stem}-{idx}.ts"), task_id, p_sema)) 117 | if len(cors) == 0 and time_range: 118 | raise Exception(f"time range <{start_time}-{end_time}> invalid for <{path.name}>") 119 | if init_sec := m3u8_info.segments[0].init_section: 120 | async def _get_init(): 121 | r = await req_retry(self.client, init_sec.absolute_uri) 122 | async with aiofiles.open(fn := path.with_name(f"{path.stem}-init"), 'wb') as f: 123 | await f.write(r.content) 124 | return fn 125 | 126 | cors.insert(0, _get_init()) 127 | merge_fn = merge_files 128 | else: 129 | merge_fn = ffmpeg.concat 130 | await self.progress.update(task_id, total_time=total_time) 131 | file_list = await asyncio.gather(*cors) 132 | 133 | await merge_fn(file_list, path) 134 | if time_range: 135 | path_tmp = path.with_stem(str(uuid.uuid4())) 136 | # to save key frame, use 0 as start time instead of s, clip will be a little longer than expected 137 | await ffmpeg.time_range_clip(path, 0, end_time - start_time + s, path_tmp) 138 | os.rename(path_tmp, path) 139 | self.logger.info(f"[cyan]已完成[/cyan] {path.name}") 140 | await self.progress.update(task_id, visible=False) 141 | return path 142 | 143 | async def _update_task_total(self, task_id, time_part: float, update_size: int): 144 | task = self.progress.tasks[task_id] 145 | if task.total is None: 146 | confirmed_t = time_part 147 | confirmed_b = update_size 148 | else: 149 | confirmed_t = time_part + task.fields['confirmed_t'] 150 | confirmed_b = update_size + task.fields['confirmed_b'] 151 | predicted_total = task.fields['total_time'] * confirmed_b / confirmed_t 152 | await self.progress.update(task_id, total=predicted_total, confirmed_t=confirmed_t, confirmed_b=confirmed_b) 153 | 154 | async def _get_seg(self, seg: Segment, path: Path, task_id, p_sema: asyncio.Semaphore) -> Path: 155 | exists, path = path_check(path) 156 | if exists: 157 | downloaded = os.path.getsize(path) 158 | await self._update_task_total(task_id, time_part=seg.duration, update_size=downloaded) 159 | await self.progress.update(task_id, advance=downloaded) 160 | return path 161 | seg_url = seg.absolute_uri 162 | async with p_sema: 163 | content = None 164 | for times in range(1 + self.stream_retry): 165 | content = bytearray() 166 | try: 167 | async with self.client.stream("GET", seg_url, 168 | follow_redirects=True) as r, self._stream_context(times): 169 | r.raise_for_status() 170 | # pre-update total if content-length is provided and first time to get content 171 | if 'content-length' in r.headers and not content: 172 | await self._update_task_total( 173 | task_id, time_part=seg.duration, update_size=int(r.headers['content-length'])) 174 | async for chunk in r.aiter_bytes(chunk_size=self.chunk_size): 175 | content.extend(chunk) 176 | await self.progress.update(task_id, advance=len(chunk)) 177 | await self._check_speed(len(chunk)) 178 | if 'content-length' not in r.headers: # after-update total if content-length is not provided 179 | await self._update_task_total(task_id, time_part=seg.duration, update_size=len(content)) 180 | break 181 | except (httpx.HTTPStatusError, httpx.TransportError): 182 | continue 183 | else: 184 | raise Exception(f"STREAM 超过重复次数 {seg_url}") 185 | content = self._after_seg(seg, content) 186 | # in case encrypted 187 | if seg.key: 188 | content = await self._decrypt(seg, content) 189 | async with aiofiles.open(path, 'wb') as f: 190 | await f.write(content) 191 | return path 192 | 193 | def _after_seg(self, seg: Segment, content: bytearray) -> bytearray: 194 | """hook for subclass to modify segment content, happened before decrypt""" 195 | return content 196 | -------------------------------------------------------------------------------- /bilix/download/base_downloader_part.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path, PurePath 3 | from typing import Union, List, Iterable, Tuple 4 | from urllib.parse import urlparse 5 | import aiofiles 6 | import httpx 7 | import uuid 8 | import random 9 | import os 10 | from email.message import Message 11 | from pymp4.parser import Box 12 | from bilix.download.base_downloader import BaseDownloader 13 | from bilix.download.utils import path_check, merge_files 14 | from bilix import ffmpeg 15 | from .utils import req_retry 16 | 17 | __all__ = ['BaseDownloaderPart'] 18 | 19 | 20 | class BaseDownloaderPart(BaseDownloader): 21 | """Base Async http Content-Range Downloader""" 22 | 23 | def __init__( 24 | self, 25 | *, 26 | client: httpx.AsyncClient = None, 27 | browser: str = None, 28 | speed_limit: Union[float, int, None] = None, 29 | stream_retry: int = 5, 30 | progress=None, 31 | logger=None, 32 | # unique params 33 | part_concurrency: int = 10, 34 | ): 35 | super(BaseDownloaderPart, self).__init__( 36 | client=client, 37 | browser=browser, 38 | stream_retry=stream_retry, 39 | speed_limit=speed_limit, 40 | progress=progress, 41 | logger=logger 42 | ) 43 | self.part_concurrency = part_concurrency 44 | 45 | async def _pre_req(self, urls: List[str]) -> Tuple[int, str]: 46 | # use GET instead of HEAD due to 404 bug https://github.com/HFrost0/bilix/issues/16 47 | res = await req_retry(self.client, urls[0], follow_redirects=True, headers={'Range': 'bytes=0-1'}) 48 | total = int(res.headers['Content-Range'].split('/')[-1]) 49 | # get filename 50 | if content_disposition := res.headers.get('Content-Disposition', None): 51 | m = Message() 52 | m['content-type'] = content_disposition 53 | filename = m.get_param('filename', '') 54 | else: 55 | filename = '' 56 | # change origin url to redirected position to avoid twice redirect 57 | if res.history: 58 | urls[0] = str(res.url) 59 | return total, filename 60 | 61 | async def get_media_clip( 62 | self, 63 | url_or_urls: Union[str, Iterable[str]], 64 | path: Union[Path, str], 65 | time_range: Tuple[int, int], 66 | init_range: str, 67 | seg_range: str, 68 | get_s: asyncio.Future = None, 69 | set_s: asyncio.Future = None, 70 | task_id=None, 71 | ): 72 | """ 73 | 74 | :param url_or_urls: 75 | :param path: 76 | :param time_range: (start_time, end_time) 77 | :param init_range: xxx-xxx 78 | :param seg_range: xxx-xxx 79 | :param get_s: 80 | :param set_s: 81 | :param task_id: 82 | :return: 83 | """ 84 | upper = task_id is not None and self.progress.tasks[task_id].fields.get('upper', None) 85 | exist, path = path_check(path) 86 | if exist: 87 | if not upper: 88 | self.logger.info(f'[green]已存在[/green] {path.name}') 89 | return path 90 | 91 | urls = [url_or_urls] if isinstance(url_or_urls, str) else [url for url in url_or_urls] 92 | init_start, init_end = map(int, init_range.split('-')) 93 | seg_start, seg_end = map(int, seg_range.split('-')) 94 | res = await req_retry(self.client, urls[0], follow_redirects=True, 95 | headers={'Range': f'bytes={seg_start}-{seg_end}'}) 96 | container = Box.parse(res.content) 97 | assert container.type == b'sidx' 98 | if get_s: 99 | start_time = await get_s 100 | end_time = time_range[1] 101 | else: 102 | start_time, end_time = time_range 103 | pre_time, pre_byte = 0, seg_end + 1 104 | inside = False 105 | parts = [(init_start, init_end)] 106 | total = init_end - init_start + 1 107 | s = 0 108 | for idx, ref in enumerate(container.references): 109 | if ref.reference_type != "MEDIA": 110 | self.logger.debug("not a media", ref) 111 | continue 112 | seg_duration = ref.segment_duration / container.timescale 113 | if not inside and start_time < pre_time + seg_duration: 114 | s = start_time - pre_time 115 | inside = True 116 | if inside and end_time < pre_time: 117 | break 118 | if inside: 119 | total += ref.referenced_size 120 | parts.append((pre_byte, pre_byte + ref.referenced_size - 1)) 121 | pre_time += seg_duration 122 | pre_byte += ref.referenced_size 123 | if len(parts) == 1: 124 | raise Exception(f"time range <{start_time}-{end_time}> invalid for <{path.name}>") 125 | if set_s: 126 | set_s.set_result(start_time - s) 127 | if task_id is not None: 128 | await self.progress.update( 129 | task_id, 130 | total=self.progress.tasks[task_id].total + total if self.progress.tasks[task_id].total else total) 131 | else: 132 | task_id = await self.progress.add_task(description=path.name, total=total) 133 | p_sema = asyncio.Semaphore(self.part_concurrency) 134 | 135 | async def get_seg(part_range: Tuple[int, int]): 136 | async with p_sema: 137 | return await self._get_file_part(urls, path=path, part_range=part_range, task_id=task_id) 138 | 139 | file_list = await asyncio.gather(*[get_seg(part_range) for part_range in parts]) 140 | path_tmp = path.with_name(str(uuid.uuid4())) 141 | await merge_files(file_list, path_tmp) 142 | if set_s: 143 | await ffmpeg.time_range_clip(path_tmp, start=0, t=end_time - start_time + s, output_path=path) 144 | else: 145 | await ffmpeg.time_range_clip(path_tmp, start=s, t=end_time - start_time, output_path=path) 146 | if not upper: # no upstream task 147 | await self.progress.update(task_id, visible=False) 148 | self.logger.info(f"[cyan]已完成[/cyan] {path.name}") 149 | return path 150 | 151 | async def get_file(self, url_or_urls: Union[str, Iterable[str]], path: Union[Path, str], task_id=None) -> Path: 152 | """ 153 | download file by http content-range 154 | :cli: short: f 155 | :param url_or_urls: file url or urls with backups 156 | :param path: file path or dir path, if dir path, filename will be extracted from url 157 | :param task_id: if not provided, a new progress task will be created 158 | :return: downloaded file path 159 | """ 160 | urls = [url_or_urls] if isinstance(url_or_urls, str) else [url for url in url_or_urls] 161 | upper = task_id is not None and self.progress.tasks[task_id].fields.get('upper', None) 162 | 163 | if not path.is_dir(): 164 | exist, path = path_check(path) 165 | if exist: 166 | if not upper: 167 | self.logger.info(f'[green]已存在[/green] {path.name}') 168 | return path 169 | 170 | total, req_filename = await self._pre_req(urls) 171 | 172 | if path.is_dir(): 173 | file_name = req_filename if req_filename else PurePath(urlparse(urls[0]).path).name 174 | path /= file_name 175 | exist, path = path_check(path) 176 | if exist: 177 | if not upper: 178 | self.logger.info(f'[green]已存在[/green] {path.name}') 179 | return path 180 | 181 | if task_id is not None: 182 | await self.progress.update( 183 | task_id, 184 | total=self.progress.tasks[task_id].total + total if self.progress.tasks[task_id].total else total) 185 | else: 186 | task_id = await self.progress.add_task(description=path.name, total=total) 187 | part_length = total // self.part_concurrency 188 | cors = [] 189 | for i in range(self.part_concurrency): 190 | start = i * part_length 191 | end = (i + 1) * part_length - 1 if i < self.part_concurrency - 1 else total - 1 192 | cors.append(self._get_file_part(urls, path=path, part_range=(start, end), task_id=task_id)) 193 | file_list = await asyncio.gather(*cors) 194 | await merge_files(file_list, new_path=path) 195 | if not upper: 196 | await self.progress.update(task_id, visible=False) 197 | self.logger.info(f"[cyan]已完成[/cyan] {path.name}") 198 | return path 199 | 200 | async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[int, int], 201 | task_id) -> Path: 202 | start, end = part_range 203 | part_path = path.with_name(f'{path.name}.{part_range[0]}-{part_range[1]}') 204 | exist, part_path = path_check(part_path) 205 | if exist: 206 | downloaded = os.path.getsize(part_path) 207 | start += downloaded 208 | await self.progress.update(task_id, advance=downloaded) 209 | if start > end: 210 | return part_path # skip already finished 211 | url_idx = random.randint(0, len(urls) - 1) 212 | 213 | for times in range(1 + self.stream_retry): 214 | try: 215 | async with \ 216 | self.client.stream("GET", urls[url_idx], follow_redirects=True, 217 | headers={'Range': f'bytes={start}-{end}'}) as r, \ 218 | self._stream_context(times), \ 219 | aiofiles.open(part_path, 'ab') as f: 220 | r.raise_for_status() 221 | if r.history: # avoid twice redirect 222 | urls[url_idx] = r.url 223 | async for chunk in r.aiter_bytes(chunk_size=self.chunk_size): 224 | await f.write(chunk) 225 | start += len(chunk) 226 | await self.progress.update(task_id, advance=len(chunk)) 227 | await self._check_speed(len(chunk)) 228 | break 229 | except (httpx.HTTPStatusError, httpx.TransportError): 230 | continue 231 | else: 232 | raise Exception(f"STREAM 超过重复次数 {part_path.name}") 233 | return part_path 234 | -------------------------------------------------------------------------------- /bilix/download/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import errno 3 | import os 4 | import random 5 | from functools import wraps 6 | from pathlib import Path 7 | 8 | import aiofiles 9 | import httpx 10 | from typing import Union, Sequence, Tuple, List 11 | from bilix.exception import APIError, APIParseError 12 | from bilix.log import logger 13 | 14 | 15 | async def merge_files(file_list: List[Path], new_path: Path): 16 | first_file = file_list[0] 17 | async with aiofiles.open(first_file, 'ab') as f: 18 | for idx in range(1, len(file_list)): 19 | async with aiofiles.open(file_list[idx], 'rb') as fa: 20 | await f.write(await fa.read()) 21 | os.remove(file_list[idx]) 22 | os.rename(first_file, new_path) 23 | 24 | 25 | async def req_retry(client: httpx.AsyncClient, url_or_urls: Union[str, Sequence[str]], method='GET', 26 | follow_redirects=False, retry=5, **kwargs) -> httpx.Response: 27 | """Client request with multiple backup urls and retry""" 28 | pre_exc = None # predefine to avoid warning 29 | for times in range(1 + retry): 30 | url = url_or_urls if type(url_or_urls) is str else random.choice(url_or_urls) 31 | try: 32 | res = await client.request(method, url, follow_redirects=follow_redirects, **kwargs) 33 | res.raise_for_status() 34 | except httpx.TransportError as e: 35 | msg = f'{method} {e.__class__.__name__} url: {url}' 36 | logger.warning(msg) if times > 0 else logger.debug(msg) 37 | pre_exc = e 38 | await asyncio.sleep(.1 * (times + 1)) 39 | except httpx.HTTPStatusError as e: 40 | logger.warning(f'{method} {e.response.status_code} {url}') 41 | pre_exc = e 42 | await asyncio.sleep(1. * (times + 1)) 43 | except Exception as e: 44 | logger.warning(f'{method} {e.__class__.__name__} 未知异常 url: {url}') 45 | raise e 46 | else: 47 | return res 48 | logger.error(f"{method} 超过重复次数 {url_or_urls}") 49 | raise pre_exc 50 | 51 | 52 | def eclipse_str(s: str, max_len: int = 100): 53 | if len(s) <= max_len: 54 | return s 55 | else: 56 | half_len = (max_len - 1) // 2 57 | return f"{s[:half_len]}…{s[-half_len:]}" 58 | 59 | 60 | def path_check(path: Path, retry: int = 100) -> Tuple[bool, Path]: 61 | """ 62 | check whether path exist, if filename too long, truncate and return valid path 63 | 64 | :param path: path to check 65 | :param retry: max retry times 66 | :return: exist, path 67 | """ 68 | for times in range(retry): 69 | try: 70 | exist = path.exists() 71 | return exist, path 72 | except OSError as e: 73 | if e.errno == errno.ENAMETOOLONG: # filename too long for os 74 | if times == 0: 75 | logger.warning(f"filename too long for os, truncate will be applied. filename: {path.name}") 76 | else: 77 | logger.debug(f"filename too long for os {path.name}") 78 | path = path.with_stem(eclipse_str(path.stem, int(len(path.stem) * .8))) 79 | else: 80 | raise e 81 | raise OSError(f"filename too long for os {path.name}") 82 | 83 | 84 | def raise_api_error(func): 85 | """Decorator to catch exceptions except APIError and HTTPError and raise APIParseError""" 86 | 87 | @wraps(func) 88 | async def wrapped(client: httpx.AsyncClient, *args, **kwargs): 89 | try: 90 | return await func(client, *args, **kwargs) 91 | except (APIError, httpx.HTTPError): 92 | raise 93 | except Exception as e: 94 | raise APIParseError(e, func) from e 95 | 96 | return wrapped 97 | -------------------------------------------------------------------------------- /bilix/exception.py: -------------------------------------------------------------------------------- 1 | class APIError(Exception): 2 | """API Error during request to website""" 3 | 4 | def __init__(self, msg: str, resource): 5 | self.msg = msg 6 | self.resource = resource 7 | 8 | def __str__(self): 9 | return f"{self.msg} resource: {self.resource}" 10 | 11 | 12 | class APIParseError(APIError): 13 | """API Parse Error, maybe cased by website interface change, raise by decorator""" 14 | 15 | def __init__(self, e, func): 16 | self.e = e 17 | self.func = func 18 | 19 | def __str__(self): 20 | return f"APIParseError Caused by {self.e.__class__.__name__} in <{self.func.__module__}:{self.func.__name__}>" 21 | 22 | 23 | class APIResourceError(APIError): 24 | """API Error that resource is not available (like deleted by uploader)""" 25 | 26 | 27 | class APIUnsupportedError(APIError): 28 | """The resource parse is not supported yet""" 29 | 30 | 31 | class APIInvalidError(APIError): 32 | """API request is invalid""" 33 | 34 | 35 | class HandleError(Exception): 36 | """the error related to bilix cli handle""" 37 | 38 | 39 | class HandleMethodError(HandleError): 40 | """the error that handler can not recognize the method""" 41 | 42 | def __init__(self, executor_cls, method): 43 | self.executor_cls = executor_cls 44 | self.method = method 45 | 46 | def __str__(self): 47 | return f"For {self.executor_cls.__name__} method '{self.method}' is not available" 48 | -------------------------------------------------------------------------------- /bilix/ffmpeg.py: -------------------------------------------------------------------------------- 1 | """ 2 | just some useful ffmpeg commands wrapped in python 3 | """ 4 | import os 5 | from anyio import run_process 6 | from typing import List 7 | from pathlib import Path 8 | import tempfile 9 | 10 | 11 | async def concat(path_lst: List[Path], output_path: Path, remove=True): 12 | with tempfile.NamedTemporaryFile('w', dir=output_path.parent, delete=False) as fp: 13 | for path in path_lst: 14 | fp.write(f"file '{path.name}'\n") 15 | cmd = ['ffmpeg', '-f', 'concat', '-safe', '0', '-i', fp.name, '-c', 'copy', '-loglevel', 'quiet', 16 | str(output_path)] 17 | # print(' '.join(map(lambda x: f'"{x}"', cmd))) 18 | await run_process(cmd) 19 | os.remove(fp.name) 20 | if remove: 21 | for path in path_lst: 22 | os.remove(path) 23 | 24 | 25 | async def combine(path_lst: List[Path], output_path: Path, remove=True): 26 | cmd = ['ffmpeg'] 27 | for path in path_lst: 28 | cmd.extend(['-i', str(path)]) 29 | # for flac, use -strict -2 30 | cmd.extend(['-c', 'copy', '-strict', '-2', '-loglevel', 'quiet', str(output_path)]) 31 | # print(' '.join(map(lambda x: f'"{x}"', cmd))) 32 | await run_process(cmd) 33 | if remove: 34 | for path in path_lst: 35 | os.remove(path) 36 | 37 | 38 | async def time_range_clip(input_path: Path, start: int, t: int, output_path: Path, remove=True): 39 | # for flac, use -strict -2 40 | cmd = ['ffmpeg', '-ss', f'{start:.1f}', '-t', f'{t:.1f}', '-i', str(input_path), '-codec', 'copy', '-strict', '-2', 41 | '-loglevel', 'quiet', '-f', 'mp4', str(output_path)] 42 | # print(' '.join(map(lambda x: f'"{x}"', cmd))) 43 | await run_process(cmd) 44 | if remove: 45 | os.remove(input_path) 46 | -------------------------------------------------------------------------------- /bilix/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from rich.logging import RichHandler 3 | 4 | 5 | def get_logger(): 6 | bilix_logger = logging.getLogger("bilix") 7 | # 如果logger已经配置过handler,直接返回logger实例 8 | if bilix_logger.hasHandlers(): 9 | return bilix_logger 10 | bilix_logger.setLevel(logging.INFO) 11 | # 创建自定义的RichHandler 12 | custom_rich_handler = RichHandler( 13 | show_time=False, 14 | show_path=False, 15 | markup=True, 16 | keywords=RichHandler.KEYWORDS + ['STREAM'], 17 | rich_tracebacks=True 18 | ) 19 | # 设置日志格式 20 | formatter = logging.Formatter("{message}", style="{", datefmt="[%X]") 21 | custom_rich_handler.setFormatter(formatter) 22 | # 为logger添加自定义的RichHandler 23 | bilix_logger.addHandler(custom_rich_handler) 24 | return bilix_logger 25 | 26 | 27 | logger = get_logger() 28 | -------------------------------------------------------------------------------- /bilix/progress/abc.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Optional, Any 3 | 4 | 5 | class Progress(ABC): 6 | """Abstract Class for bilix download progress, checkout to design your own progress""" 7 | 8 | @classmethod 9 | @abstractmethod 10 | def start(cls): 11 | """start to show the progress""" 12 | 13 | @classmethod 14 | @abstractmethod 15 | def stop(cls): 16 | """stop to show the progress""" 17 | 18 | @abstractmethod 19 | def tasks(self): 20 | """return the tasks""" 21 | 22 | @abstractmethod 23 | def active_speed(self) -> Optional[float]: 24 | """return current active speed (bit/s)""" 25 | 26 | @abstractmethod 27 | async def add_task( 28 | self, 29 | description: str, 30 | start: bool = True, 31 | total: Optional[float] = None, 32 | completed: int = 0, 33 | visible: bool = True, 34 | **fields, 35 | ): 36 | """async add a task to progress""" 37 | 38 | @abstractmethod 39 | async def update( 40 | self, 41 | task_id, 42 | *, 43 | total: Optional[float] = None, 44 | completed: Optional[float] = None, 45 | advance: Optional[float] = None, 46 | description: Optional[str] = None, 47 | visible: Optional[bool] = None, 48 | refresh: bool = False, 49 | **fields: Any 50 | ): 51 | """async update a task status""" 52 | -------------------------------------------------------------------------------- /bilix/progress/cli_progress.py: -------------------------------------------------------------------------------- 1 | from bilix.progress.abc import Progress 2 | from typing import Optional, Any, Set 3 | from rich.theme import Theme 4 | from rich.style import Style 5 | from rich.spinner import Spinner 6 | from rich.progress import Progress as RichProgress, TaskID, \ 7 | TextColumn, BarColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn, ProgressColumn 8 | 9 | 10 | class SpinnerColumn(ProgressColumn): 11 | def __init__(self, style="progress.spinner", speed: float = 1.0): 12 | self.waiting = Spinner("dqpb", style=style) 13 | self.downloading = Spinner("dots", style=style, speed=speed) 14 | self.merging = Spinner("line", style=style, speed=speed) 15 | super().__init__() 16 | 17 | def render(self, task): 18 | t = task.get_time() 19 | if task.total is None: 20 | return self.waiting.render(t) 21 | elif task.finished: 22 | return self.merging.render(t) 23 | else: 24 | return self.downloading.render(t) 25 | 26 | 27 | class CLIProgress(Progress): 28 | # Only one live display may be active at once 29 | _progress = RichProgress( 30 | SpinnerColumn(speed=2.), 31 | TextColumn("[progress.description]{task.description}"), 32 | TextColumn("[progress.percentage]{task.percentage:>4.1f}%"), 33 | BarColumn(), 34 | DownloadColumn(), 35 | TransferSpeedColumn(), 36 | TextColumn('ETA'), 37 | TimeRemainingColumn(), 38 | transient=True, 39 | ) 40 | 41 | def __init__(self): 42 | self._active_ids: Set[TaskID] = set() 43 | 44 | @classmethod 45 | def start(cls): 46 | cls._progress.start() 47 | 48 | @classmethod 49 | def stop(cls): 50 | cls._progress.stop() 51 | 52 | @property 53 | def tasks(self): 54 | return self._progress.tasks 55 | 56 | @staticmethod 57 | def _cat_description(description, max_length=33): 58 | mid = (max_length - 3) // 2 59 | return description if len(description) < max_length else f'{description[:mid]}...{description[-mid:]}' 60 | 61 | async def add_task( 62 | self, 63 | description: str, 64 | start: bool = True, 65 | total: Optional[float] = None, 66 | completed: int = 0, 67 | visible: bool = True, 68 | **fields: Any, 69 | ) -> TaskID: 70 | task_id = self._progress.add_task(description=self._cat_description(description), 71 | start=start, total=total, completed=completed, visible=visible, **fields) 72 | self._active_ids.add(task_id) 73 | return task_id 74 | 75 | @property 76 | def active_speed(self): 77 | return sum(self._progress.tasks[task_id].speed for task_id in self._active_ids 78 | if self._progress.tasks[task_id].speed) 79 | 80 | async def update( 81 | self, 82 | task_id: TaskID, 83 | *, 84 | total: Optional[float] = None, 85 | completed: Optional[float] = None, 86 | advance: Optional[float] = None, 87 | description: Optional[str] = None, 88 | visible: Optional[bool] = None, 89 | refresh: bool = False, 90 | **fields: Any, 91 | ) -> None: 92 | if description: 93 | description = self._cat_description(description) 94 | self._progress.update(task_id, total=total, completed=completed, advance=advance, 95 | description=description, visible=visible, refresh=refresh, **fields) 96 | if self._progress.tasks[task_id].finished and task_id in self._active_ids: 97 | self._active_ids.remove(task_id) 98 | 99 | @classmethod 100 | def switch_theme(cls, bs="rgb(95,138,239)", gs="rgb(65,165,189)"): 101 | cls._progress.console.push_theme(Theme({ 102 | # "progress.data.speed": Style(color=bs), 103 | "progress.download": Style(color=gs), 104 | "progress.percentage": Style(color=gs), 105 | "progress.spinner": Style(color=bs), 106 | "progress.remaining": Style(color=gs), 107 | # "bar.back": Style(color="grey23"), 108 | "bar.complete": Style(color=bs), 109 | "bar.finished": Style(color=gs), 110 | "bar.pulse": Style(color=bs), 111 | })) 112 | -------------------------------------------------------------------------------- /bilix/progress/ws_progress.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from bilix.progress.cli_progress import CLIProgress 5 | 6 | 7 | class WebSocketProgress(CLIProgress): 8 | def __init__(self, sockets): 9 | super().__init__() 10 | self._sockets = sockets 11 | 12 | async def broadcast(self, msg: str): 13 | cors = [s.send_text(msg) for s in self._sockets] 14 | await asyncio.gather(*cors) 15 | 16 | async def add_task(self, **kwargs): 17 | task_id = await super().add_task(**kwargs) 18 | asyncio.create_task( 19 | self.broadcast(json.dumps({'method': 'add_task', 'task_id': task_id, **kwargs})) 20 | ) 21 | return task_id 22 | 23 | async def update(self, task_id, **kwargs) -> None: 24 | await super().update(task_id, **kwargs) 25 | asyncio.create_task( 26 | self.broadcast(json.dumps({'method': 'update', "task_id": task_id, **kwargs})) 27 | ) 28 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderBilibili 2 | from .informer import InformerBilibili 3 | 4 | __all__ = ['DownloaderBilibili', 'InformerBilibili'] 5 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import re 4 | from urllib.parse import quote 5 | import httpx 6 | from pydantic import field_validator, BaseModel, Field 7 | from typing import Union, List, Tuple, Dict, Optional 8 | import json5 9 | from danmakuC.bilibili import parse_view 10 | from bilix.download.utils import req_retry, raise_api_error 11 | from bilix.sites.bilibili.utils import parse_ids_from_url 12 | from bilix.utils import legal_title 13 | from bilix.exception import APIInvalidError, APIError, APIResourceError, APIUnsupportedError 14 | import hashlib 15 | import time 16 | 17 | dft_client_settings = { 18 | 'headers': {'user-agent': 'PostmanRuntime/7.29.0', 'referer': 'https://www.bilibili.com'}, 19 | 'cookies': {'CURRENT_FNVAL': '4048'}, 20 | 'http2': True 21 | } 22 | 23 | 24 | @raise_api_error 25 | async def get_cate_meta(client: httpx.AsyncClient) -> dict: 26 | """ 27 | 获取b站分区元数据 28 | 29 | :param client: 30 | :return: 31 | """ 32 | cate_info = {} 33 | res = await req_retry(client, 'https://s1.hdslb.com/bfs/static/laputa-channel/client/assets/index.c0ea30e6.js') 34 | cate_data = re.search('Za=([^;]*);', res.text).groups()[0] 35 | cate_data = json5.loads(cate_data)['channelList'] 36 | for i in cate_data: 37 | if 'sub' in i: 38 | for j in i['sub']: 39 | cate_info[j['name']] = j 40 | cate_info[i['name']] = i 41 | return cate_info 42 | 43 | 44 | @raise_api_error 45 | async def get_list_info(client: httpx.AsyncClient, url_or_sid: str, ): 46 | """ 47 | 获取视频列表信息 48 | 49 | :param url_or_sid: 50 | :param client: 51 | :return: 52 | """ 53 | if url_or_sid.startswith('http'): 54 | sid = re.search(r'sid=(\d+)', url_or_sid).groups()[0] 55 | else: 56 | sid = url_or_sid 57 | res = await req_retry(client, f'https://api.bilibili.com/x/series/series?series_id={sid}') # meta api 58 | meta = json.loads(res.text) 59 | mid = meta['data']['meta']['mid'] 60 | params = {'mid': mid, 'series_id': sid, 'ps': meta['data']['meta']['total']} 61 | list_res, up_info = await asyncio.gather( 62 | req_retry(client, 'https://api.bilibili.com/x/series/archives', params=params), 63 | get_up_info(client, str(mid)), 64 | ) 65 | list_info = json.loads(list_res.text) 66 | list_name = meta['data']['meta']['name'] 67 | up_name = up_info.get('name', '') 68 | bvids = [i['bvid'] for i in list_info['data']['archives']] 69 | return list_name, up_name, bvids 70 | 71 | 72 | @raise_api_error 73 | async def get_collect_info(client: httpx.AsyncClient, url_or_sid: str): 74 | """ 75 | 获取合集信息 76 | 77 | :param url_or_sid: 78 | :param client: 79 | :return: 80 | """ 81 | sid = re.search(r'sid=(\d+)', url_or_sid).groups()[0] if url_or_sid.startswith('http') else url_or_sid 82 | params = {'season_id': sid} 83 | res = await req_retry(client, 'https://api.bilibili.com/x/space/fav/season/list', params=params) 84 | data = json.loads(res.text) 85 | medias = data['data']['medias'] 86 | info = data['data']['info'] 87 | col_name, up_name = info['title'], medias[0]['upper']['name'] 88 | bvids = [i['bvid'] for i in data['data']['medias']] 89 | return col_name, up_name, bvids 90 | 91 | 92 | @raise_api_error 93 | async def get_favour_page_info(client: httpx.AsyncClient, url_or_fid: str, pn=1, ps=20, keyword=''): 94 | """ 95 | 获取收藏夹信息(分页) 96 | 97 | :param url_or_fid: 98 | :param pn: 99 | :param ps: 100 | :param keyword: 101 | :param client: 102 | :return: 103 | """ 104 | if url_or_fid.startswith('http'): 105 | fid = re.findall(r'fid=(\d+)', url_or_fid)[0] 106 | else: 107 | fid = url_or_fid 108 | params = {'media_id': fid, 'pn': pn, 'ps': ps, 'keyword': keyword, 'order': 'mtime'} 109 | res = await req_retry(client, 'https://api.bilibili.com/x/v3/fav/resource/list', params=params) 110 | data = json.loads(res.text)['data'] 111 | fav_name, up_name = data['info']['title'], data['info']['upper']['name'] 112 | bvids = [i['bvid'] for i in data['medias'] if i['title'] != '已失效视频'] 113 | total_size = data['info']['media_count'] 114 | return fav_name, up_name, total_size, bvids 115 | 116 | 117 | @raise_api_error 118 | async def get_cate_page_info(client: httpx.AsyncClient, cate_id, time_from, time_to, pn=1, ps=30, 119 | order='click', keyword=''): 120 | """ 121 | 获取分区视频信息(分页) 122 | 123 | :param cate_id: 124 | :param pn: 125 | :param ps: 126 | :param order: 127 | :param keyword: 128 | :param time_from: 129 | :param time_to: 130 | :param client: 131 | :return: 132 | """ 133 | params = {'search_type': 'video', 'view_type': 'hot_rank', 'cate_id': cate_id, 'pagesize': ps, 134 | 'keyword': keyword, 'page': pn, 'order': order, 'time_from': time_from, 'time_to': time_to} 135 | res = await req_retry(client, 'https://s.search.bilibili.com/cate/search', params=params) 136 | info = json.loads(res.text) 137 | bvids = [i['bvid'] for i in info['result']] 138 | return bvids 139 | 140 | 141 | async def _add_sign(client: httpx.AsyncClient, params: dict): 142 | """添加b站api签名到params中 143 | :param params: 144 | :return: 145 | """ 146 | OE = [46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 147 | 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 148 | 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 149 | 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 150 | 20, 34, 44, 52] 151 | res = await req_retry( 152 | client, "https://api.bilibili.com/x/web-interface/nav" 153 | ) 154 | info = json.loads(res.text) 155 | img_val = info['data']['wbi_img']['img_url'].split('/')[-1].split('.')[0] 156 | sub_val = info['data']['wbi_img']['sub_url'].split('/')[-1].split('.')[0] 157 | val = img_val + sub_val 158 | request_token = ''.join([val[v] for v in OE])[:32] 159 | 160 | wts = int(time.time()) 161 | params["wts"] = wts 162 | data = dict(sorted(params.items())) 163 | data_str = "&".join([f"{k}={v}" for k, v in data.items()]) + request_token 164 | md5 = hashlib.md5(data_str.encode("utf-8")).hexdigest() 165 | params["w_rid"] = md5 166 | return params 167 | 168 | 169 | def _find_mid(space_url: str): 170 | return re.search(r'^https://space.bilibili.com/(\d+)/?', space_url).group(1) 171 | 172 | 173 | @raise_api_error 174 | async def get_up_video_info(client: httpx.AsyncClient, url_or_mid: str, pn=1, ps=30, order="pubdate", keyword=""): 175 | """ 176 | 获取up主信息 177 | 178 | :param url_or_mid: 179 | :param pn: 180 | :param ps: 181 | :param order: 182 | :param keyword: 183 | :param client: 184 | :return: 185 | """ 186 | if url_or_mid.startswith("http"): 187 | mid = re.findall(r"/(\d+)", url_or_mid)[0] 188 | else: 189 | mid = url_or_mid 190 | 191 | params = {"mid": mid, "order": order, "ps": ps, "pn": pn, "keyword": quote(keyword or "")} 192 | await _add_sign(client, params) 193 | 194 | res = await req_retry(client, "https://api.bilibili.com/x/space/wbi/arc/search", params=params) 195 | info = json.loads(res.text) 196 | up_name = info["data"]["list"]["vlist"][0]["author"] 197 | total_size = info["data"]["page"]["count"] 198 | bv_ids = [i["bvid"] for i in info["data"]["list"]["vlist"]] 199 | return up_name, total_size, bv_ids 200 | 201 | 202 | async def get_up_info(client: httpx.AsyncClient, url_or_mid: str): 203 | if url_or_mid.startswith("http"): 204 | mid = _find_mid(url_or_mid) 205 | else: 206 | mid = url_or_mid 207 | params = {"mid": mid} 208 | await _add_sign(client, params) 209 | res = await req_retry(client, "https://api.bilibili.com/x/space/wbi/acc/info", params=params) 210 | data = json.loads(res.text)['data'] 211 | return data 212 | 213 | 214 | class Media(BaseModel): 215 | base_url: str 216 | backup_url: Optional[List[str]] = None 217 | size: Optional[int] = None 218 | width: Optional[int] = None 219 | height: Optional[int] = None 220 | suffix: Optional[str] = None 221 | quality: Optional[str] = None 222 | codec: Optional[str] = None 223 | segment_base: Optional[dict] = None 224 | 225 | @property 226 | def urls(self): 227 | """the copy of all url including backup""" 228 | return [self.base_url, *self.backup_url] if self.backup_url else [self.base_url] 229 | 230 | 231 | class Dash(BaseModel): 232 | duration: int 233 | videos: List[Media] 234 | audios: List[Media] 235 | video_formats: Dict[str, Dict[str, Media]] 236 | audio_formats: Dict[str, Optional[Media]] 237 | 238 | @classmethod 239 | def from_dict(cls, play_info: dict): 240 | dash = play_info['dash'] # may raise KeyError 241 | video_formats = {} 242 | quality_map = {} 243 | for d in play_info['support_formats']: 244 | quality_map[d['quality']] = d['new_description'] 245 | video_formats[d['new_description']] = {} 246 | videos = [] 247 | for d in dash['video']: 248 | if d['id'] not in quality_map: 249 | continue # https://github.com/HFrost0/bilix/issues/93 250 | quality = quality_map[d['id']] 251 | m = Media(quality=quality, codec=d['codecs'], **d) 252 | video_formats[quality][m.codec] = m 253 | videos.append(m) 254 | 255 | audios = [] 256 | audio_formats = {} 257 | if dash.get('audio', None): # some video have NO audio 258 | d = dash['audio'][0] 259 | m = Media(quality="default", suffix='.aac', codec=d['codecs'], **d) 260 | audios.append(m) 261 | audio_formats[m.quality] = m 262 | if dash['dolby']['type'] != 0: 263 | quality = "dolby" 264 | audio_formats[quality] = None 265 | if dash['dolby'].get('audio', None): 266 | d = dash['dolby']['audio'][0] 267 | m = Media(quality=quality, suffix='.eac3', codec=d['codecs'], **d) 268 | audios.append(m) 269 | audio_formats[m.quality] = m 270 | if dash.get('flac', None): 271 | quality = "flac" 272 | audio_formats[quality] = None 273 | if d := dash['flac']['audio']: 274 | m = Media(quality=quality, suffix='.flac', codec=d['codecs'], **d) 275 | audios.append(m) 276 | audio_formats[m.quality] = m 277 | return cls(duration=dash['duration'], videos=videos, audios=audios, 278 | video_formats=video_formats, audio_formats=audio_formats) 279 | 280 | def choose_video(self, quality: Union[int, str], video_codec: str) -> Media: 281 | # 1. absolute choice with quality name like 4k 1080p '1080p 60帧' 282 | if isinstance(quality, str): 283 | for k in self.video_formats: 284 | if k.upper().startswith(quality.upper()): # incase 1080P->1080p 285 | for c in self.video_formats[k]: 286 | if c.startswith(video_codec): 287 | return self.video_formats[k][c] 288 | # 2. relative choice 289 | else: 290 | keys = [k for k in self.video_formats.keys() if self.video_formats[k]] 291 | quality = min(quality, len(keys) - 1) 292 | k = keys[quality] 293 | for c in self.video_formats[k]: 294 | if c.startswith(video_codec): 295 | return self.video_formats[k][c] 296 | raise KeyError(f"no match for video quality: {quality} codec: {video_codec}") 297 | 298 | def choose_audio(self, audio_codec: str) -> Optional[Media]: 299 | if len(self.audios) == 0: # some video has no audio 300 | return 301 | for k in self.audio_formats: 302 | if self.audio_formats[k] and self.audio_formats[k].codec.startswith(audio_codec): 303 | return self.audio_formats[k] 304 | raise KeyError(f'no match for audio codec: {audio_codec}') 305 | 306 | def choose_quality(self, quality: Union[str, int], codec: str = '') -> Tuple[Media, Optional[Media]]: 307 | v_codec, a_codec, *_ = codec.split(':') + [""] 308 | video, audio = self.choose_video(quality, v_codec), self.choose_audio(a_codec) 309 | return video, audio 310 | 311 | 312 | class Status(BaseModel): 313 | view: int = Field(description="播放量") 314 | danmaku: int = Field(description="弹幕数") 315 | coin: int = Field(description="硬币数") 316 | like: int = Field(description="点赞数") 317 | reply: int = Field(description="回复数") 318 | favorite: int = Field(description="收藏数") 319 | share: int = Field(description="分享数") 320 | follow: Optional[int] = Field(default=None, description="追剧数/追番数") 321 | 322 | @field_validator('view', mode="before") 323 | @classmethod 324 | def no_view(cls, v): 325 | return 0 if v == '--' else v 326 | 327 | 328 | class Page(BaseModel): 329 | p_name: str 330 | p_url: str 331 | 332 | 333 | class VideoInfo(BaseModel): 334 | title: str 335 | aid: int 336 | cid: int 337 | ep_id: Optional[int] = None 338 | p: int 339 | pages: List[Page] # [[p_name, p_url], ...] 340 | img_url: str 341 | status: Status 342 | bvid: Optional[str] = None 343 | dash: Optional[Dash] = None 344 | other: Optional[List[Media]] = None # durl resource: flv, mp4. 345 | desc: Optional[str] = None 346 | tags: Optional[List[str]] = None 347 | 348 | 349 | def _parse_bv_html(url, html: str) -> VideoInfo: 350 | init_info = re.search(r'', html).groups()[0] 397 | data = json.loads(data) 398 | queries = data['props']['pageProps']['dehydratedState']['queries'] 399 | season_info = queries[0]['state']['data']['seasonInfo'] 400 | media_info = season_info['mediaInfo'] 401 | stat = media_info['stat'] 402 | status = Status(coin=stat['coins'], view=stat['views'], danmaku=stat['danmakus'], share=stat['share'], 403 | like=stat['likes'], reply=stat['reply'], favorite=stat['favorite'], follow=stat['favorites']) 404 | title = legal_title(media_info['title']) 405 | desc = media_info['evaluate'] 406 | episodes = media_info['episodes'] 407 | path: str = url.split('?')[0].split('/')[-1] 408 | ep_id = path[2:] if path.startswith('ep') else str(episodes[0]["ep_id"]) 409 | p = 0 410 | aid, cid, bvid = 0, 0, "" 411 | pages = [] 412 | img_url = '' 413 | for i, ep in enumerate(episodes): 414 | if str(ep["ep_id"]) == ep_id: 415 | p = i 416 | aid, cid, bvid = ep["aid"], ep["cid"], ep["bvid"] 417 | img_url = ep["cover"] 418 | pages.append(Page(p_name=legal_title(ep["playerEpTitle"]), p_url=ep["link"])) 419 | video_info = VideoInfo( 420 | title=title, status=status, desc=desc, 421 | aid=aid, cid=cid, bvid=bvid, p=p, pages=pages, 422 | img_url=img_url, ep_id=ep_id, 423 | ) 424 | return video_info 425 | 426 | 427 | @raise_api_error 428 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: 429 | try: 430 | # try to get video info from web front-end first 431 | return await _get_video_info_from_html(client, url) 432 | except APIInvalidError: 433 | # try to get video info from api if web front-end is banned 434 | return await _get_video_info_from_api(client, url) 435 | 436 | 437 | async def _get_video_info_from_html(client: httpx.AsyncClient, url: str) -> VideoInfo: 438 | res = await req_retry(client, url, follow_redirects=True) 439 | if str(res.url).startswith("https://www.bilibili.com/festival"): 440 | raise APIInvalidError("特殊节日页面", url) 441 | html = res.text 442 | if "window._riskdata_" in html: 443 | raise APIInvalidError("web 前端访问被风控", url) 444 | if "window.__INITIAL_STATE__" in html: 445 | return _parse_bv_html(url, html) 446 | elif "__NEXT_DATA__" in html: 447 | video_info = _parse_ep_html(url, html) 448 | await _attach_ep_dash(client, video_info) 449 | return video_info 450 | else: 451 | raise APIUnsupportedError("未知页面类型", url) 452 | 453 | 454 | async def _get_video_info_from_api(client: httpx.AsyncClient, url: str) -> VideoInfo: 455 | assert '/av' in url or '/BV' in url # TODO: only support BV or av url 456 | video_info = await _get_video_basic_info_from_api(client, url) 457 | # can not be parallelized since we need to get cid first 458 | await _attach_dash_and_durl_from_api(client, video_info) 459 | return video_info 460 | 461 | 462 | async def _attach_ep_dash(client: httpx.AsyncClient, video_info: VideoInfo): 463 | params = { 464 | 'support_multi_audio': True, 465 | 'avid': video_info.aid, 466 | 'cid': video_info.cid, 467 | 'fnver': 0, 468 | 'fnval': 4048, 469 | 'fourk': 1, 470 | 'ep_id': video_info.ep_id, 471 | } 472 | res = await req_retry(client, 'https://api.bilibili.com/pgc/player/web/v2/playurl', params=params) 473 | res = json.loads(res.text) 474 | data = res['result']['video_info'] 475 | if "dash" in data: 476 | video_info.dash = Dash.from_dict(data) 477 | if "durl" in data: 478 | other = [] 479 | for i in data['durl']: 480 | suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1) 481 | other.append(Media(base_url=i['url'], backup_url=i['backup_url'], size=i['size'], suffix=suffix)) 482 | video_info.other = other 483 | 484 | 485 | async def _attach_dash_and_durl_from_api(client: httpx.AsyncClient, video_info: VideoInfo): 486 | params = {'cid': video_info.cid, 'bvid': video_info.bvid, 487 | 'qn': 120, # 如无 dash 资源(少数老视频),fallback 到 4K 超清 durl 488 | 'fnval': 4048, # 如 dash 资源可用,请求 dash 格式的全部可用流 489 | 'fourk': 1, # 请求 4k 资源 490 | 'fnver': 0, 'platform': 'pc', 'otype': 'json'} 491 | dash_response = await req_retry(client, 'https://api.bilibili.com/x/player/playurl', 492 | params=params, follow_redirects=True) 493 | dash_json = json.loads(dash_response.text) 494 | if dash_json['code'] != 0: 495 | raise APIResourceError(dash_json['message'], video_info.bvid) 496 | dash, other = None, [] 497 | if 'dash' in dash_json['data']: 498 | dash = Dash.from_dict(dash_json['data']) 499 | if 'durl' in dash_json['data']: 500 | for i in dash_json['data']['durl']: 501 | suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1) 502 | other.append(Media(base_url=i['url'], backup_url=i['backup_url'], size=i['size'], suffix=suffix)) 503 | video_info.dash, video_info.other = dash, other 504 | 505 | 506 | async def _get_video_basic_info_from_api(client: httpx.AsyncClient, url) -> VideoInfo: 507 | """通过 view api 获取视频的基本信息,不包括 dash 或 durl(other) 视频流资源""" 508 | aid, bvid, selected_page_num = parse_ids_from_url(url) 509 | params = {'bvid': bvid} if bvid else {'aid': aid} 510 | r = await req_retry(client, 'https://api.bilibili.com/x/web-interface/view', 511 | params=params, follow_redirects=True) 512 | raw_json = json.loads(r.text) 513 | if raw_json['code'] != 0: 514 | raise APIResourceError(raw_json['message'], raw_json['message']) 515 | title = legal_title(raw_json['data']['title']) 516 | h1_title = title # TODO: 根据视频类型,使 h1_title 与实际网页标题的格式一致 517 | aid = raw_json['data']['aid'] 518 | bvid = raw_json['data']['bvid'] 519 | base_url = f"https://www.bilibili.com/video/{bvid}/" 520 | status = Status(**raw_json['data']['stat']) 521 | pages = [] 522 | p = None 523 | cid = None 524 | for idx, i in enumerate(raw_json['data']['pages']): 525 | page_num = int(i['page']) 526 | if page_num == selected_page_num: 527 | p = idx # selected_page_num 的分p 在 pages 列表中的 index 位置 528 | cid = int(i['cid']) # selected_page_num 的分p 的 cid 529 | p_url = f"{base_url}?p={page_num}" 530 | p_name = f"P{page_num}-{i['part']}" 531 | pages.append(Page(p_name=p_name, p_url=p_url)) 532 | assert p is not None, f"没有找到分P: p{selected_page_num},请检查输入" # cid 也会是 None 533 | img_url = raw_json['data']['pic'] 534 | basic_video_info = VideoInfo(title=title, h1_title=h1_title, aid=aid, cid=cid, status=status, 535 | p=p, pages=pages, img_url=img_url, bvid=bvid, dash=None, other=None) 536 | return basic_video_info 537 | 538 | 539 | @raise_api_error 540 | async def get_subtitle_info(client: httpx.AsyncClient, bvid, cid): 541 | params = {'bvid': bvid, 'cid': cid} 542 | res = await req_retry(client, 'https://api.bilibili.com/x/player/v2', params=params) 543 | info = json.loads(res.text) 544 | if info['code'] == -400: 545 | raise APIError(f'未找到字幕信息', params) 546 | return [[f'http:{i["subtitle_url"]}', i['lan_doc']] for i in info['data']['subtitle']['subtitles']] 547 | 548 | 549 | @raise_api_error 550 | async def get_dm_urls(client: httpx.AsyncClient, aid, cid) -> List[str]: 551 | params = {'oid': cid, 'pid': aid, 'type': 1} 552 | res = await req_retry(client, f'https://api.bilibili.com/x/v2/dm/web/view', params=params) 553 | view = parse_view(res.content) 554 | total = int(view['dmSge']['total']) 555 | return [f'https://api.bilibili.com/x/v2/dm/web/seg.so?oid={cid}&type=1&segment_index={i + 1}' for i in range(total)] 556 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | import asyncio 4 | from datetime import datetime, timedelta 5 | from bilix.sites.bilibili import api 6 | 7 | client = httpx.AsyncClient(**api.dft_client_settings) 8 | 9 | 10 | # https://stackoverflow.com/questions/61022713/pytest-asyncio-has-a-closed-event-loop-but-only-when-running-all-tests 11 | @pytest.fixture(scope="session") 12 | def event_loop(): 13 | try: 14 | loop = asyncio.get_running_loop() 15 | except RuntimeError: 16 | loop = asyncio.new_event_loop() 17 | yield loop 18 | loop.close() 19 | 20 | 21 | @pytest.mark.asyncio 22 | async def test_get_cate_meta(): 23 | data = await api.get_cate_meta(client) 24 | assert '舞蹈' in data and "sub" in data["舞蹈"] 25 | assert "宅舞" in data and 'tid' in data['宅舞'] 26 | 27 | 28 | @pytest.mark.asyncio 29 | async def test_get_list_info(): 30 | list_name, up_name, bvids = await api.get_list_info( 31 | client, 32 | "https://space.bilibili.com/369750017/channel/seriesdetail?sid=2458228") 33 | assert list_name == '瘦腰腹跟练' 34 | assert len(bvids) > 0 and bvids[0].startswith('BV') 35 | 36 | 37 | @pytest.mark.asyncio 38 | async def test_get_collect_info(): 39 | list_name, up_name, bvids = await api.get_collect_info( 40 | client, 41 | "https://space.bilibili.com/54296062/channel/collectiondetail?sid=412818&ctype=0") 42 | assert list_name == 'asyncio协程' 43 | assert len(bvids) > 0 and bvids[0].startswith('BV') 44 | 45 | 46 | @pytest.mark.asyncio 47 | async def test_get_favour_page_info(): 48 | fav_name, up_name, total_size, bvids = await api.get_favour_page_info(client, "69072721") 49 | assert fav_name == '默认收藏夹' 50 | assert len(bvids) > 0 and bvids[0].startswith('BV') 51 | 52 | 53 | @pytest.mark.asyncio 54 | async def test_get_cate_page_info(): 55 | time_to = datetime.now() 56 | time_from = time_to - timedelta(days=7) 57 | time_from, time_to = time_from.strftime('%Y%m%d'), time_to.strftime('%Y%m%d') 58 | meta = await api.get_cate_meta(client) 59 | bvids = await api.get_cate_page_info(client, cate_id=meta['宅舞']['tid'], time_from=time_from, time_to=time_to) 60 | assert len(bvids) > 0 and bvids[0].startswith('BV') 61 | 62 | 63 | @pytest.mark.asyncio 64 | async def test_get_up_video_info(): 65 | up_name, total_size, bvids = await api.get_up_video_info(client, "316568752", keyword="什么") 66 | assert len(bvids) > 0 and bvids[0].startswith('BV') 67 | 68 | 69 | # GitHub actions problem... 70 | # @pytest.mark.asyncio 71 | # async.md def test_get_special_audio(): 72 | # # Dolby 73 | # data = await api.get_video_info(client, 'https://www.bilibili.com/video/BV13L4y1K7th') 74 | # assert data.dash['dolby']['type'] != 0 75 | # # Hi-Res 76 | # data = await api.get_video_info(client, 'https://www.bilibili.com/video/BV16K411S7sk') 77 | # assert data.dash['flac']['display'] 78 | 79 | 80 | @pytest.mark.asyncio 81 | async def test_get_video_info(): 82 | methods = (api._get_video_info_from_html, api._get_video_info_from_api) 83 | for method in methods: 84 | # 单个bv视频 85 | data = await method(client, "https://www.bilibili.com/video/BV1sS4y1b7qb?spm_id_from=333.999.0.0") 86 | assert len(data.pages) == 1 87 | assert data.p == 0 88 | assert data.bvid 89 | assert data.img_url.startswith('http://') or data.img_url.startswith('https://') 90 | assert data.dash 91 | # 多个bv视频 92 | data = await method(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 93 | assert len(data.pages) > 1 94 | assert data.p == 4 95 | assert data.bvid 96 | if method is api._get_video_info_from_api: 97 | continue 98 | # 电视剧 99 | data = await method(client, "https://www.bilibili.com/bangumi/play/ss24053?spm_id_from=333.337.0.0") 100 | assert len(data.pages) > 1 101 | assert data.status.follow 102 | # 动漫 103 | data = await method(client, "https://www.bilibili.com/bangumi/play/ss5043?spm_id_from=333.337.0.0") 104 | assert len(data.pages) > 1 105 | assert data.status.follow 106 | # 电影 107 | data = await method(client, 108 | "https://www.bilibili.com/bangumi/play/ss33343?theme=movie&spm_id_from=333.337.0.0") 109 | assert data.title == '天气之子' 110 | assert data.status.follow 111 | # 纪录片 112 | data = await method(client, "https://www.bilibili.com/bangumi/play/ss40509?from_spmid=666.9.hotlist.3") 113 | assert len(data.pages) > 1 114 | assert data.status.follow 115 | 116 | 117 | @pytest.mark.asyncio 118 | async def test_get_subtitle_info(): 119 | data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1hS4y1m7Ma") 120 | data = await api.get_subtitle_info(client, data.bvid, data.cid) 121 | assert data[0][0].startswith('http') 122 | assert data[0][1] 123 | 124 | 125 | @pytest.mark.asyncio 126 | async def test_get_dm_info(): 127 | data = await api.get_video_info(client, 128 | "https://www.bilibili.com/bangumi/play/ss33343?theme=movie&spm_id_from=333.337.0.0") 129 | data = await api.get_dm_urls(client, data.aid, data.cid) 130 | assert len(data) > 0 131 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/downloader_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.bilibili import DownloaderBilibili 4 | 5 | 6 | @pytest.mark.asyncio 7 | async def test_get_collect_or_list(): 8 | d = DownloaderBilibili() 9 | await d.get_collect_or_list('https://space.bilibili.com/54296062/channel/collectiondetail?sid=412818&ctype=0', 10 | quality=999) 11 | await d.get_collect_or_list('https://space.bilibili.com/8251621/channel/seriesdetail?sid=2323334&ctype=0', 12 | quality=999) 13 | await d.aclose() 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_get_favour(): 18 | d = DownloaderBilibili() 19 | await d.get_favour("69072721", num=1, quality=999) 20 | await d.aclose() 21 | 22 | 23 | @pytest.mark.asyncio 24 | async def test_get_cate(): 25 | d = DownloaderBilibili() 26 | await d.get_cate("宅舞", num=1, order="click", keyword="jk", quality=1) 27 | await d.aclose() 28 | 29 | 30 | @pytest.mark.asyncio 31 | async def test_get_up(): 32 | d = DownloaderBilibili() 33 | await d.get_up("455511061", num=1, order="pubdate", quality=1) 34 | await d.aclose() 35 | 36 | 37 | @pytest.mark.asyncio 38 | async def test_get_series(): 39 | d = DownloaderBilibili() 40 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(5, 5), quality=999) 41 | # only audio 42 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(5, 5), only_audio=True) 43 | # image 44 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(1, 1), image=True, quality=999) 45 | # 单个视频 46 | await d.get_series("https://www.bilibili.com/video/BV1sS4y1b7qb?spm_id_from=333.999.0.0", quality=999) 47 | await d.aclose() 48 | 49 | 50 | @pytest.mark.asyncio 51 | async def test_get_dm(): 52 | d = DownloaderBilibili() 53 | await d.get_dm('https://www.bilibili.com/video/BV11Z4y1z7s8?spm_id_from=333.337.search-card.all.click') 54 | await d.aclose() 55 | 56 | 57 | @pytest.mark.asyncio 58 | async def test_get_subtitle(): 59 | d = DownloaderBilibili() 60 | await d.get_subtitle("https://www.bilibili.com/video/BV1hS4y1m7Ma") 61 | await d.aclose() 62 | 63 | 64 | @pytest.mark.asyncio 65 | async def test_choose_quality(): 66 | import os 67 | from bilix.sites.bilibili import api 68 | 69 | client = httpx.AsyncClient() 70 | client.cookies.set('SESSDATA', os.getenv('BILI_TOKEN')) 71 | # dolby 72 | data = await api.get_video_info(client, "https://www.bilibili.com/video/BV13L4y1K7th") 73 | try: 74 | video, audio = data.dash.choose_quality(quality=999, codec=":ec-3") 75 | except KeyError: 76 | assert not os.getenv("BILI_TOKEN") 77 | # normal 78 | data.dash.choose_quality(quality="360P", codec="hev") 79 | # hi-res 80 | data = await api.get_video_info(client, "https://www.bilibili.com/video/BV16K411S7sk") 81 | try: 82 | video, audio = data.dash.choose_quality(quality='1080P', codec="hev:fLaC") 83 | except KeyError: 84 | assert not os.getenv("BILI_TOKEN") 85 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/informer.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from typing import Tuple 3 | from rich.tree import Tree 4 | from .downloader import DownloaderBilibili 5 | from . import api 6 | from bilix.log import logger 7 | from rich import print as rprint 8 | from bilix.utils import convert_size 9 | from bilix.download.utils import req_retry 10 | from bilix.cli.assign import kwargs_filter 11 | 12 | 13 | class InformerBilibili(DownloaderBilibili): 14 | """A special downloader with functionality to log info of bilibili resources""" 15 | 16 | @classmethod 17 | def parse_url(cls, url: str): 18 | res = super().parse_url(url) 19 | func_name = res.__name__.replace("get_", "info_") 20 | return getattr(cls, func_name) 21 | 22 | async def info_key(self, key): 23 | await self.parse_url(key)(self, key) 24 | 25 | async def info_up(self, url: str): 26 | up_name, total_size, bvids = await api.get_up_video_info(self.client, url) 27 | rprint(up_name) 28 | 29 | async def info_favour(self, url: str): 30 | pass 31 | 32 | async def info_collect_or_list(self, url: str): 33 | pass 34 | 35 | async def info_video(self, url: str): 36 | video_info = await api.get_video_info(self.client, url) 37 | if video_info.dash is None and video_info.other is None: 38 | return logger.warning(f'{video_info.title} 需要大会员或该地区不支持') 39 | elif video_info.other and video_info.dash is None: 40 | return rprint(video_info.other) # todo: beautify durl info 41 | 42 | async def ensure_size(m: api.Media): 43 | if m.size is None: 44 | res = await req_retry(self.client, m.base_url, method='GET', headers={'Range': 'bytes=0-1'}) 45 | m.size = int(res.headers['Content-Range'].split('/')[-1]) 46 | 47 | dash = video_info.dash 48 | cors = [ensure_size(m) for m in dash.videos] + [ensure_size(m) for m in dash.audios] 49 | await asyncio.gather(*cors) 50 | 51 | tree = Tree( 52 | f"[bold reverse] {video_info.title}-{video_info.pages[video_info.p].p_name} [/]" 53 | f" {video_info.status.view:,}👀 {video_info.status.like:,}👍 {video_info.status.coin:,}🪙", 54 | guide_style="bold cyan") 55 | video_tree = tree.add("[bold]画面 Video") 56 | audio_tree = tree.add("[bold]声音 Audio") 57 | leaf_fmt = "codec: {codec:32} size: {size}" 58 | # for video 59 | for quality in dash.video_formats: 60 | p_tree = video_tree.add(quality) 61 | for c in dash.video_formats[quality]: 62 | m = dash.video_formats[quality][c] 63 | p_tree.add(leaf_fmt.format(codec=m.codec, size=convert_size(m.size))) 64 | if len(p_tree.children) == 0: 65 | p_tree.style = "rgb(242,93,142)" 66 | p_tree.add("需要登录或大会员") 67 | # for audio 68 | name_map = {"default": "默认音质", "dolby": "杜比全景声 Dolby", "flac": "Hi-Res无损"} 69 | for k in dash.audio_formats: 70 | sub_tree = audio_tree.add(name_map[k]) 71 | if m := dash.audio_formats[k]: 72 | sub_tree.add(leaf_fmt.format(codec=m.codec, size=convert_size(m.size))) 73 | else: 74 | sub_tree.style = "rgb(242,93,142)" 75 | sub_tree.add("需要登录或大会员") 76 | rprint(tree) 77 | 78 | @classmethod 79 | def handle(cls, method: str, keys: Tuple[str, ...], options: dict): 80 | if cls.pattern.match(keys[0]) and 'info' == method: 81 | informer = InformerBilibili(sess_data=options['cookie'], **kwargs_filter(cls, options)) 82 | 83 | # in order to maintain order 84 | async def temp(): 85 | for key in keys: 86 | if len(keys) > 1: 87 | logger.info(f"For {key}") 88 | await informer.info_key(key) 89 | 90 | return informer, temp() 91 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/informer_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from bilix.sites.bilibili import InformerBilibili 3 | 4 | informer = InformerBilibili() 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_bilibili_informer(): 9 | await informer.info_video('https://www.bilibili.com/video/BV1sG411A7r3') 10 | await informer.info_video('https://www.bilibili.com/video/BV1oG4y1Z7fx') 11 | await informer.info_video('https://www.bilibili.com/video/BV1eV411W7tt') 12 | await informer.info_video("https://www.bilibili.com/bangumi/play/ep508404/") 13 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def parse_ids_from_url(url_or_string: str): 5 | bvid, aid, page_num = None, None, 1 6 | if re.match(r'https?://www.bilibili.com/video/BV\w+', url_or_string) or re.match(r'BV\w+', url_or_string): 7 | bvid = re.search(r'(BV\w+)', url_or_string).groups()[0] 8 | assert bvid.isalnum() 9 | elif re.match(r'https?://www.bilibili.com/video/av\d+', url_or_string) or re.match(r'av\d+', url_or_string): 10 | aid = re.search(r'av(\d+)', url_or_string).groups()[0] 11 | assert aid.isdigit() 12 | aid = int(aid) 13 | else: 14 | raise ValueError(f"{url_or_string} is not a valid bilibili video url") 15 | # ?p=123 or &p=123 16 | if m := re.match(r'.*[?&]p=(\d+)', url_or_string): 17 | page_num = int(m.groups()[0]) 18 | assert page_num >= 1 19 | return aid, bvid, page_num 20 | -------------------------------------------------------------------------------- /bilix/sites/bilibili/utils_test.py: -------------------------------------------------------------------------------- 1 | from bilix.sites.bilibili.utils import parse_ids_from_url 2 | 3 | 4 | def test_parse_ids_from_url(): 5 | strings = [ 6 | "https://www.bilibili.com/video/av170001", 7 | "http://www.bilibili.com/video/BV1Xx41117Tz/?ba=labala&p=3#time=1234", 8 | "av170001", 9 | "BV1sE411w7tQ?p=2&from=search", 10 | "https://www.bilibili.com/video/BV1xx411c7HW?p=1" 11 | ] 12 | results = [ 13 | (170001, None, 1), 14 | (None, 'BV1Xx41117Tz', 3), 15 | (170001, None, 1), 16 | (None, 'BV1sE411w7tQ', 2), 17 | (None, 'BV1xx411c7HW', 1) 18 | ] 19 | for index, string in enumerate(strings): 20 | assert parse_ids_from_url(string) == results[index] 21 | -------------------------------------------------------------------------------- /bilix/sites/cctv/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderCctv 2 | 3 | __all__ = ['DownloaderCctv'] 4 | -------------------------------------------------------------------------------- /bilix/sites/cctv/api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | import json 4 | from typing import Sequence, Tuple 5 | 6 | import httpx 7 | import m3u8 8 | 9 | from bilix.download.utils import req_retry, raise_api_error 10 | from bilix.utils import legal_title 11 | 12 | dft_client_settings = { 13 | 'headers': {'user-agent': 'PostmanRuntime/7.29.0'}, 14 | 'http2': True 15 | } 16 | 17 | 18 | @raise_api_error 19 | async def get_id(client: httpx.AsyncClient, url: str) -> Tuple[str, str, str]: 20 | res_web = await req_retry(client, url) 21 | pid = re.findall(r'guid ?= ?"(\w+)"', res_web.text)[0] 22 | vide = re.findall(r'/(VIDE\w+)\.', url)[0] 23 | try: 24 | vida = re.findall(r'videotvCodes ?= ?"(\w+)"', res_web.text)[0] 25 | except IndexError: 26 | vida = None 27 | return pid, vide, vida 28 | 29 | 30 | @raise_api_error 31 | async def get_media_info(client: httpx.AsyncClient, pid: str) -> Tuple[str, Sequence[str]]: 32 | """ 33 | 34 | :param pid: 35 | :param client: 36 | :return: title and m3u8 urls sorted by quality 37 | """ 38 | res = await req_retry(client, f'https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={pid}') 39 | info_data = json.loads(res.text) 40 | # extract 41 | title = legal_title(info_data['title']) 42 | m3u8_main_url = info_data['hls_url'] 43 | res = await req_retry(client, m3u8_main_url) 44 | m3u8_info = m3u8.loads(res.text) 45 | if m3u8_info.base_uri is None: 46 | m3u8_info.base_uri = re.match(r'(https?://[^/]*)/', m3u8_main_url).groups()[0] 47 | m3u8_urls = list(sorted((i.absolute_uri for i in m3u8_info.playlists), reverse=True, 48 | key=lambda s: int(re.findall(r'/(\d+).m3u8', s)[0]))) 49 | return title, m3u8_urls 50 | 51 | 52 | @raise_api_error 53 | async def get_series_info(client: httpx.AsyncClient, vide: str, vida: str) -> Tuple[str, Sequence[str]]: 54 | """ 55 | 56 | :param vide: 57 | :param vida: 58 | :param client: 59 | :return: title and list of guid(pid) 60 | """ 61 | params = {'mode': 0, 'id': vida, 'serviceId': 'tvcctv', 'p': 1, 'n': 999} 62 | res_meta, res_list = await asyncio.gather( 63 | req_retry(client, f"https://api.cntv.cn/NewVideoset/getVideoAlbumInfoByVideoId?id={vide}&serviceId=tvcctv"), 64 | req_retry(client, f'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew', params=params) 65 | ) 66 | meta_data = json.loads(res_meta.text) 67 | list_data = json.loads(res_list.text) 68 | # extract 69 | title = legal_title(meta_data['data']['title']) 70 | pids = [i['guid'] for i in list_data['data']['list']] 71 | return title, pids 72 | -------------------------------------------------------------------------------- /bilix/sites/cctv/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.cctv import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | pid, vide, vida = await api.get_id(client, "https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml") 11 | data = await api.get_media_info(client, pid) 12 | data = await api.get_series_info(client, vide, vida) 13 | pass 14 | -------------------------------------------------------------------------------- /bilix/sites/cctv/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | from pathlib import Path 4 | from typing import Union, Tuple 5 | import httpx 6 | 7 | from . import api 8 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 9 | 10 | 11 | class DownloaderCctv(BaseDownloaderM3u8): 12 | pattern = re.compile(r'https?://(?:tv\.cctv\.com|tv\.cctv\.cn)/?[?/](?:pid=)?(\d+)(?:&vid=(\d+))?(?:&v=(\d+))?') 13 | 14 | def __init__( 15 | self, 16 | *, 17 | client: httpx.AsyncClient = None, 18 | browser: str = None, 19 | speed_limit: Union[float, int] = None, 20 | stream_retry: int = 5, 21 | progress=None, 22 | logger=None, 23 | part_concurrency: int = 10, 24 | video_concurrency: Union[int, asyncio.Semaphore] = 3, 25 | # unique params 26 | hierarchy: bool = True, 27 | ): 28 | client = client or httpx.AsyncClient(**api.dft_client_settings) 29 | super(DownloaderCctv, self).__init__( 30 | client=client, 31 | browser=browser, 32 | speed_limit=speed_limit, 33 | stream_retry=stream_retry, 34 | progress=progress, 35 | logger=logger, 36 | part_concurrency=part_concurrency, 37 | video_concurrency=video_concurrency, 38 | ) 39 | self.hierarchy = hierarchy 40 | 41 | async def get_series(self, url: str, path=Path('.'), quality: int = 0): 42 | """ 43 | :cli: short: s 44 | :param url: 45 | :param path: 46 | :param quality: 47 | :return: 48 | """ 49 | pid, vide, vida = await api.get_id(self.client, url) 50 | if vida is None: # 单个视频 51 | await self.get_video(pid, quality=quality) 52 | else: # 剧集 53 | title, pids = await api.get_series_info(self.client, vide, vida) 54 | if self.hierarchy: 55 | path /= title 56 | path.mkdir(parents=True, exist_ok=True) 57 | await asyncio.gather(*[self.get_video(pid, path, quality) for pid in pids]) 58 | 59 | async def get_video(self, url_or_pid: str, path=Path('.'), quality: int = 0, time_range: Tuple[int, int] = None): 60 | """ 61 | :cli: short: v 62 | :param url_or_pid: 63 | :param path: 64 | :param quality: 65 | :param time_range: 66 | :return: 67 | """ 68 | if url_or_pid.startswith('http'): 69 | pid, _, _ = await api.get_id(self.client, url_or_pid) 70 | else: 71 | pid = url_or_pid 72 | title, m3u8_urls = await api.get_media_info(self.client, pid) 73 | m3u8_url = m3u8_urls[min(quality, len(m3u8_urls) - 1)] 74 | file_path = await self.get_m3u8_video(m3u8_url, path / f"{title}.mp4", time_range=time_range) 75 | return file_path 76 | -------------------------------------------------------------------------------- /bilix/sites/douyin/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderDouyin 2 | 3 | __all__ = ['DownloaderDouyin'] 4 | -------------------------------------------------------------------------------- /bilix/sites/douyin/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Originally From 3 | @Author: https://github.com/Evil0ctal/ 4 | https://github.com/Evil0ctal/Douyin_TikTok_Download_API 5 | 6 | Modified by 7 | @Author: https://github.com/HFrost0/ 8 | """ 9 | import asyncio 10 | import re 11 | import json 12 | from typing import List 13 | import httpx 14 | from pydantic import BaseModel 15 | from bilix.utils import legal_title 16 | from bilix.download.utils import req_retry, raise_api_error 17 | 18 | dft_client_settings = { 19 | 'headers': {'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012)' 20 | ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile' 21 | ' Safari/537.36 Edg/87.0.664.66'}, 22 | 'http2': True 23 | } 24 | 25 | 26 | class VideoInfo(BaseModel): 27 | title: str 28 | author_name: str 29 | wm_urls: List[str] 30 | nwm_urls: List[str] 31 | cover: str 32 | dynamic_cover: str 33 | origin_cover: str 34 | 35 | 36 | @raise_api_error 37 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: 38 | if short_url := re.findall(r'https://v.douyin.com/\w+/', url): 39 | res = await req_retry(client, short_url[0], follow_redirects=True) 40 | url = str(res.url) 41 | if key := re.search(r'/video/(\d+)', url): 42 | key = key.groups()[0] 43 | else: 44 | key = re.search(r"modal_id=(\d+)", url).groups()[0] 45 | res = await req_retry(client, f'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={key}') 46 | data = json.loads(res.text) 47 | data = data['item_list'][0] 48 | # 视频标题 49 | title = legal_title(data['desc']) 50 | # 视频作者昵称 51 | author_name = data['author']['nickname'] 52 | # 有水印视频链接 53 | wm_urls = data['video']['play_addr']['url_list'] 54 | # 无水印视频链接 (在回执JSON中将关键字'playwm'替换为'play'即可获得无水印地址) 55 | nwm_urls = list(map(lambda x: x.replace('playwm', 'play'), wm_urls)) 56 | # 视频封面 57 | cover = data['video']['cover']['url_list'][0] 58 | # 视频动态封面 59 | dynamic_cover = data['video']['dynamic_cover']['url_list'][0] 60 | # 视频原始封面 61 | origin_cover = data['video']['origin_cover']['url_list'][0] 62 | video_info = VideoInfo(title=title, author_name=author_name, wm_urls=wm_urls, nwm_urls=nwm_urls, cover=cover, 63 | dynamic_cover=dynamic_cover, origin_cover=origin_cover) 64 | return video_info 65 | 66 | 67 | if __name__ == '__main__': 68 | async def main(): 69 | client = httpx.AsyncClient(**dft_client_settings) 70 | data = await get_video_info(client, 'https://www.douyin.com/video/7132430286415252773') 71 | print(data) 72 | 73 | 74 | asyncio.run(main()) 75 | -------------------------------------------------------------------------------- /bilix/sites/douyin/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.douyin import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "https://www.douyin.com/video/7132430286415252773") 11 | pass 12 | -------------------------------------------------------------------------------- /bilix/sites/douyin/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | from pathlib import Path 4 | from typing import Union 5 | import httpx 6 | from . import api 7 | from bilix.download.base_downloader_part import BaseDownloaderPart 8 | from bilix.utils import legal_title 9 | 10 | 11 | class DownloaderDouyin(BaseDownloaderPart): 12 | pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(douyin\.com)") 13 | 14 | def __init__( 15 | self, 16 | *, 17 | client: httpx.AsyncClient = None, 18 | browser: str = None, 19 | speed_limit: Union[float, int, None] = None, 20 | stream_retry: int = 5, 21 | progress=None, 22 | logger=None, 23 | part_concurrency: int = 10, 24 | ): 25 | client = client or httpx.AsyncClient(**api.dft_client_settings) 26 | super(DownloaderDouyin, self).__init__( 27 | client=client, 28 | browser=browser, 29 | speed_limit=speed_limit, 30 | stream_retry=stream_retry, 31 | progress=progress, 32 | logger=logger, 33 | part_concurrency=part_concurrency, 34 | ) 35 | 36 | async def get_video(self, url: str, path=Path('.'), image=False): 37 | """ 38 | :cli: short: v 39 | :param url: 40 | :param path: 41 | :param image: 42 | :return: 43 | """ 44 | video_info = await api.get_video_info(self.client, url) 45 | title = legal_title(video_info.author_name, video_info.title) 46 | cors = [self.get_file(video_info.nwm_urls, path=path / f"{title}.mp4")] 47 | if image: 48 | cors.append(self.get_static(video_info.cover, path / title)) 49 | await asyncio.gather(*cors) 50 | -------------------------------------------------------------------------------- /bilix/sites/douyin/downloader_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from bilix.sites.douyin import DownloaderDouyin 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_get_video(): 7 | async with DownloaderDouyin() as d: 8 | await d.get_video('https://v.douyin.com/r4tm4Pe/') 9 | 10 | -------------------------------------------------------------------------------- /bilix/sites/hanime1/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderHanime1 2 | 3 | __all__ = ['DownloaderHanime1'] 4 | -------------------------------------------------------------------------------- /bilix/sites/hanime1/api.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | import httpx 3 | from bilix.utils import legal_title 4 | from bilix.download.utils import req_retry, raise_api_error 5 | from bs4 import BeautifulSoup 6 | 7 | BASE_URL = "https://hanime1.me" 8 | dft_client_settings = { 9 | 'headers': {'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012)' 10 | ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile' 11 | ' Safari/537.36 Edg/87.0.664.66', "Referer": BASE_URL}, 12 | 'http2': False 13 | } 14 | 15 | 16 | class VideoInfo(BaseModel): 17 | url: str 18 | avid: str 19 | title: str 20 | video_url: str 21 | img_url: str 22 | 23 | 24 | @raise_api_error 25 | async def get_video_info(client: httpx.AsyncClient, url_or_avid: str) -> VideoInfo: 26 | if url_or_avid.startswith('http'): 27 | url = url_or_avid 28 | avid = url.split('=')[-1] 29 | else: 30 | url = f'{BASE_URL}/watch?v={url_or_avid}' 31 | avid = url_or_avid 32 | res = await req_retry(client, url) 33 | soup = BeautifulSoup(res.text, "html.parser") 34 | title = soup.find('meta', property="og:title")['content'] 35 | title = legal_title(title) 36 | img_url = soup.find('meta', property="og:image")['content'] 37 | video_url = soup.find('input', {'id': 'video-sd'})['value'] 38 | video_info = VideoInfo(url=url, avid=avid, title=title, img_url=img_url, video_url=video_url) 39 | return video_info 40 | -------------------------------------------------------------------------------- /bilix/sites/hanime1/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.hanime1 import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "https://hanime1.me/watch?v=39123") 11 | assert data.title 12 | data = await api.get_video_info(client, "https://hanime1.me/watch?v=13658") 13 | assert data.title 14 | -------------------------------------------------------------------------------- /bilix/sites/hanime1/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | from pathlib import Path 4 | from typing import Union, Tuple 5 | import httpx 6 | from . import api 7 | from bilix.download.base_downloader_part import BaseDownloaderPart 8 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 9 | 10 | 11 | class DownloaderHanime1(BaseDownloaderM3u8, BaseDownloaderPart): 12 | pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(hanime1\.me)") 13 | 14 | def __init__( 15 | self, 16 | *, 17 | client: httpx.AsyncClient = None, 18 | browser: str = None, 19 | speed_limit: Union[float, int] = None, 20 | stream_retry: int = 5, 21 | progress=None, 22 | logger=None, 23 | part_concurrency: int = 10, 24 | video_concurrency: Union[int, asyncio.Semaphore] = 3, 25 | ): 26 | self.client = client or httpx.AsyncClient(**api.dft_client_settings) 27 | super().__init__( 28 | client=self.client, 29 | browser=browser, 30 | speed_limit=speed_limit, 31 | stream_retry=stream_retry, 32 | progress=progress, 33 | logger=logger, 34 | part_concurrency=part_concurrency, 35 | video_concurrency=video_concurrency, 36 | ) 37 | 38 | async def get_video(self, url: str, path=Path('.'), image=False, time_range: Tuple[int, int] = None): 39 | """ 40 | :cli: short: v 41 | :param url: 42 | :param path: 43 | :param image: 44 | :param time_range: 45 | :return: 46 | """ 47 | video_info = await api.get_video_info(self.client, url) 48 | video_url = video_info.video_url 49 | cors = [ 50 | self.get_m3u8_video( 51 | video_url, path=path / f'{video_info.title}.mp4', time_range=time_range) if '.m3u8' in video_url else 52 | self.get_file(video_url, path=path / f'{video_info.title}.mp4')] 53 | if image: 54 | cors.append(self.get_static(video_info.img_url, path=path / video_info.title)) 55 | await asyncio.gather(*cors) 56 | -------------------------------------------------------------------------------- /bilix/sites/jable/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderJable 2 | 3 | __all__ = ['DownloaderJable'] 4 | -------------------------------------------------------------------------------- /bilix/sites/jable/api.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pydantic import BaseModel 3 | import httpx 4 | from bs4 import BeautifulSoup 5 | from bilix.utils import legal_title 6 | from bilix.download.utils import raise_api_error, req_retry 7 | 8 | BASE_URL = "https://jable.tv" 9 | dft_client_settings = { 10 | 'headers': {'user-agent': 'PostmanRuntime/7.29.0', "Referer": BASE_URL}, 11 | 'http2': False 12 | } 13 | 14 | 15 | class VideoInfo(BaseModel): 16 | url: str 17 | avid: str 18 | title: str 19 | actor_name: str 20 | m3u8_url: str 21 | img_url: str 22 | 23 | 24 | @raise_api_error 25 | async def get_actor_info(client: httpx.AsyncClient, url: str): 26 | res = await req_retry(client, url) 27 | soup = BeautifulSoup(res.text, "html.parser") 28 | actor_name = soup.find('h2', class_='h3-md mb-1').text 29 | urls = [h6.a['href'] for h6 in soup.find('section', class_='pb-3 pb-e-lg-40').find_all('h6')] 30 | return {'actor_name': actor_name, 'urls': urls} 31 | 32 | 33 | @raise_api_error 34 | async def get_video_info(client: httpx.AsyncClient, url_or_avid: str) -> VideoInfo: 35 | if url_or_avid.startswith('http'): 36 | url = url_or_avid 37 | avid = url.split('/')[-2] 38 | else: 39 | url = f'{BASE_URL}/videos/{url_or_avid}/' 40 | avid = url_or_avid 41 | avid = avid.upper() 42 | res = await req_retry(client, url) # proxies default global in httpx 43 | soup = BeautifulSoup(res.text, "html.parser") 44 | title = soup.find('meta', property="og:title")['content'] 45 | title = legal_title(title) 46 | if span := soup.find("span", class_="placeholder rounded-circle"): 47 | actor_name = span['title'] 48 | else: # https://github.com/HFrost0/bilix/issues/45 for some video actor name in different place 49 | actor_name = soup.find("img", class_="avatar rounded-circle")['title'] 50 | img_url = soup.find('meta', property="og:image")['content'] 51 | m3u8_url = re.findall(r'http.*m3u8', res.text)[0] 52 | video_info = VideoInfo(url=url, avid=avid, title=title, img_url=img_url, m3u8_url=m3u8_url, actor_name=actor_name) 53 | return video_info 54 | -------------------------------------------------------------------------------- /bilix/sites/jable/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.jable import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "https://jable.tv/videos/ssis-533/") 11 | assert data.actor_name 12 | data = await api.get_video_info(client, "https://jable.tv/videos/ssis-448/") 13 | assert data.actor_name 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_get_actor_info(): 18 | data = await api.get_actor_info(client, 'https://jable.tv/models/393ec3548aecc34004d54e03becd2ea9/') 19 | assert data['actor_name'].encode('utf8') == b'\xe4\xbd\x90\xe4\xb9\x85\xe8\x89\xaf\xe5\x92\xb2\xe5\xb8\x8c' 20 | assert data['urls'] 21 | -------------------------------------------------------------------------------- /bilix/sites/jable/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | from pathlib import Path 4 | from typing import Union, Tuple 5 | import httpx 6 | from . import api 7 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 8 | 9 | 10 | class DownloaderJable(BaseDownloaderM3u8): 11 | pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(jable\.tv)") 12 | 13 | def __init__( 14 | self, 15 | *, 16 | client: httpx.AsyncClient = None, 17 | browser: str = None, 18 | speed_limit: Union[float, int] = None, 19 | stream_retry: int = 5, 20 | progress=None, 21 | logger=None, 22 | part_concurrency: int = 10, 23 | video_concurrency: Union[int, asyncio.Semaphore] = 3, 24 | # unique params 25 | hierarchy: bool = True, 26 | 27 | ): 28 | client = client or httpx.AsyncClient(**api.dft_client_settings) 29 | super(DownloaderJable, self).__init__( 30 | client=client, 31 | browser=browser, 32 | speed_limit=speed_limit, 33 | stream_retry=stream_retry, 34 | progress=progress, 35 | logger=logger, 36 | part_concurrency=part_concurrency, 37 | video_concurrency=video_concurrency, 38 | ) 39 | self.hierarchy = hierarchy 40 | 41 | async def get_actor(self, url: str, path=Path("."), image=True): 42 | """ 43 | download videos of a actor 44 | :cli: short: a 45 | :param url: actor page url 46 | :param path: save path 47 | :param image: download cover 48 | :return: 49 | """ 50 | data = await api.get_actor_info(self.client, url) 51 | if self.hierarchy: 52 | path /= data['actor_name'] 53 | path.mkdir(parents=True, exist_ok=True) 54 | await asyncio.gather(*[self.get_video(url, path, image) for url in data['urls']]) 55 | 56 | async def get_video(self, url: str, path=Path("."), image=True, time_range: Tuple[int, int] = None): 57 | """ 58 | :cli: short: v 59 | :param url: 60 | :param path: 61 | :param image: 62 | :param time_range: 63 | :return: 64 | """ 65 | video_info = await api.get_video_info(self.client, url) 66 | if self.hierarchy: 67 | path /= f"{video_info.avid} {video_info.actor_name}" 68 | path.mkdir(parents=True, exist_ok=True) 69 | cors = [self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f"{video_info.title}.mp4", 70 | time_range=time_range)] 71 | if image: 72 | cors.append(self.get_static(video_info.img_url, path=path / video_info.title, )) 73 | await asyncio.gather(*cors) 74 | -------------------------------------------------------------------------------- /bilix/sites/tiktok/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderTiktok 2 | 3 | __all__ = ['DownloaderTiktok'] 4 | -------------------------------------------------------------------------------- /bilix/sites/tiktok/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Originally From 3 | @Author: https://github.com/Evil0ctal/ 4 | https://github.com/Evil0ctal/Douyin_TikTok_Download_API 5 | """ 6 | 7 | import re 8 | import json 9 | import random 10 | from typing import List 11 | import httpx 12 | from pydantic import BaseModel 13 | from bilix.utils import legal_title 14 | from bilix.download.utils import req_retry, raise_api_error 15 | 16 | dft_client_settings = { 17 | 'headers': {'user-agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;' 18 | '+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+' 19 | '(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'}, 20 | 'http2': True 21 | } 22 | 23 | 24 | class VideoInfo(BaseModel): 25 | title: str 26 | author_name: str 27 | wm_urls: List[str] 28 | nwm_urls: List[str] 29 | cover: str 30 | dynamic_cover: str 31 | origin_cover: str 32 | 33 | 34 | @raise_api_error 35 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: 36 | if short_url := re.findall(r'https://www.tiktok.com/t/\w+/', url): 37 | res = await req_retry(client, short_url[0], follow_redirects=True) 38 | url = str(res.url) 39 | if key := re.search(r'/video/(\d+)', url): 40 | key = key.groups()[0] 41 | else: 42 | key = re.search(r"/v/(\d+)", url).groups()[0] 43 | params = {'aweme_id': key, 'aid': 1180, 'iid': 6165993682518218889, 44 | 'device_id': random.randint(10 * 10 * 10, 9 * 10 ** 10)} 45 | res = await req_retry(client, 'https://api16-normal-c-useast1a.tiktokv.com/aweme/v1/feed/', params=params) 46 | data = json.loads(res.text) 47 | data = data['aweme_list'][0] 48 | # 视频标题 (如果为空则使用分享标题) 49 | title = legal_title(data['desc'] if data['desc'] != '' else data['share_info']['share_title']) 50 | # 视频作者昵称 51 | author_name = data['author']['nickname'] 52 | # 有水印视频链接 53 | wm_urls = data['video']['download_addr']['url_list'] 54 | # 无水印视频链接 55 | nwm_urls = data['video']['bit_rate'][0]['play_addr']['url_list'] 56 | # 视频封面 57 | cover = data['video']['cover']['url_list'][0] 58 | # 视频动态封面 59 | dynamic_cover = data['video']['dynamic_cover']['url_list'][0] 60 | # 视频原始封面 61 | origin_cover = data['video']['origin_cover']['url_list'][0] 62 | video_info = VideoInfo(title=title, author_name=author_name, wm_urls=wm_urls, nwm_urls=nwm_urls, cover=cover, 63 | dynamic_cover=dynamic_cover, origin_cover=origin_cover) 64 | return video_info 65 | -------------------------------------------------------------------------------- /bilix/sites/tiktok/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.tiktok import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "https://www.tiktok.com/@lindaselection/video/7171715528124271877") 11 | assert data.nwm_urls 12 | -------------------------------------------------------------------------------- /bilix/sites/tiktok/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | from pathlib import Path 4 | from typing import Union 5 | import httpx 6 | from . import api 7 | from bilix.download.base_downloader_part import BaseDownloaderPart 8 | from bilix.utils import legal_title 9 | 10 | 11 | class DownloaderTiktok(BaseDownloaderPart): 12 | pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(titok\.com)") 13 | 14 | def __init__( 15 | self, 16 | *, 17 | client: httpx.AsyncClient = None, 18 | browser: str = None, 19 | speed_limit: Union[float, int, None] = None, 20 | stream_retry: int = 5, 21 | progress=None, 22 | logger=None, 23 | part_concurrency: int = 10, 24 | ): 25 | client = client or httpx.AsyncClient(**api.dft_client_settings) 26 | super(DownloaderTiktok, self).__init__( 27 | client=client, 28 | browser=browser, 29 | speed_limit=speed_limit, 30 | stream_retry=stream_retry, 31 | progress=progress, 32 | logger=logger, 33 | part_concurrency=part_concurrency, 34 | ) 35 | 36 | async def get_video(self, url: str, path=Path('.'), image=False): 37 | """ 38 | :cli: short: v 39 | :param url: 40 | :param path: 41 | :param image: 42 | :return: 43 | """ 44 | video_info = await api.get_video_info(self.client, url) 45 | title = legal_title(video_info.author_name, video_info.title) 46 | # since TikTok backup not fast enough some time, use the first one 47 | cors = [self.get_file(video_info.nwm_urls[0], path / f'{title}.mp4')] 48 | if image: 49 | cors.append(self.get_static(video_info.cover, path=path / title, )) 50 | await asyncio.gather(*cors) 51 | -------------------------------------------------------------------------------- /bilix/sites/tiktok/downloader_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from bilix.sites.tiktok import DownloaderTiktok 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_get_video(): 7 | async with DownloaderTiktok() as d: 8 | await d.get_video('https://www.tiktok.com/@evil0ctal/video/7168978761973550378') 9 | 10 | -------------------------------------------------------------------------------- /bilix/sites/yhdmp/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderYhdmp 2 | 3 | __all__ = ['DownloaderYhdmp'] 4 | -------------------------------------------------------------------------------- /bilix/sites/yhdmp/api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import random 4 | import re 5 | from pathlib import Path 6 | from pydantic import BaseModel 7 | from typing import Union, List 8 | import httpx 9 | import execjs 10 | from bs4 import BeautifulSoup 11 | from bilix.utils import legal_title 12 | from bilix.download.utils import req_retry as rr, raise_api_error 13 | 14 | BASE_URL = "https://www.yhdmp.cc" 15 | dft_client_settings = { 16 | 'headers': {'user-agent': 'PostmanRuntime/7.29.0', "Referer": BASE_URL}, 17 | 'http2': False 18 | } 19 | _js = None 20 | 21 | 22 | def _get_js(): 23 | global _js 24 | if _js is None: 25 | with open(Path(__file__).parent / 'yhdmp.js', 'r') as f: 26 | _js = execjs.compile(f.read()) 27 | return _js 28 | 29 | 30 | def _get_t2_k2(t1: str, k1: str) -> dict: 31 | new_cookies = _get_js().call("get_t2_k2", t1, k1) 32 | return new_cookies 33 | 34 | 35 | def _decode(data: str) -> str: 36 | return _get_js().call('__getplay_rev_data', data) 37 | 38 | 39 | async def req_retry(client: httpx.AsyncClient, url_or_urls: Union[str, List[str]], 40 | method: str = 'GET', 41 | follow_redirects: bool = False, 42 | **kwargs): 43 | if 't1' in client.cookies and 'k1' in client.cookies: 44 | new_cookies = _get_t2_k2(client.cookies['t1'], client.cookies['k1']) 45 | if 't2' in client.cookies: 46 | client.cookies.delete('t2') 47 | if 'k2' in client.cookies: 48 | client.cookies.delete('k2') 49 | client.cookies.update(new_cookies) 50 | 51 | res = await rr(client, url_or_urls, method, follow_redirects, **kwargs) 52 | return res 53 | 54 | 55 | class VideoInfo(BaseModel): 56 | aid: Union[str, int] 57 | play_idx: int 58 | ep_idx: int 59 | title: str 60 | sub_title: str 61 | play_info: List[Union[List[str], List]] # may be empty 62 | m3u8_url: str 63 | 64 | 65 | @raise_api_error 66 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: 67 | aid, play_idx, ep_idx = url.split('/')[-1].split('.')[0].split('-') 68 | play_idx, ep_idx = int(play_idx), int(ep_idx) 69 | # request 70 | res_web = req_retry(client, url) 71 | m3u8_url = get_m3u8_url(url=url, client=client) 72 | if 't1' in client.cookies and 'k1' in client.cookies: 73 | res_web, m3u8_url = await asyncio.gather(res_web, m3u8_url) 74 | else: 75 | res_web, m3u8_url = await res_web, await m3u8_url 76 | # extract 77 | title, sub_title = map(legal_title, 78 | re.search(r'target="_self">([^<]+):([^<]+)', res_web.text).groups()) 79 | soup = BeautifulSoup(res_web.text, 'html.parser') 80 | divs = soup.find_all('div', class_="movurl") 81 | play_info = [] 82 | for div in divs: 83 | play_info.append([[legal_title(a["title"]), f"{BASE_URL}/{a['href']}"] for a in div.find_all("a")]) 84 | video_info = VideoInfo(aid=aid, play_idx=play_idx, ep_idx=ep_idx, title=title, sub_title=sub_title, 85 | play_info=play_info, m3u8_url=m3u8_url) 86 | return video_info 87 | 88 | 89 | @raise_api_error 90 | async def get_m3u8_url(client: httpx.AsyncClient, url): 91 | aid, play_idx, ep_idx = url.split('/')[-1].split('.')[0].split('-') 92 | params = {"aid": aid, "playindex": play_idx, "epindex": ep_idx, "r": random.random()} 93 | res_play = await req_retry(client, f"{BASE_URL}/_getplay", params=params) 94 | if res_play.text.startswith("err"): # maybe first time 95 | res_play = await req_retry(client, f"{BASE_URL}/_getplay", params=params) 96 | data = json.loads(res_play.text) 97 | purl, vurl = _decode(data['purl']), _decode(data['vurl']) 98 | m3u8_url = purl.split("url=")[-1] + vurl 99 | return m3u8_url 100 | -------------------------------------------------------------------------------- /bilix/sites/yhdmp/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.yhdmp import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "https://www.yhdmp.cc/vp/22224-1-0.html") 11 | data = await api.get_m3u8_url(client, "https://www.yhdmp.cc/vp/22224-1-0.html") 12 | pass 13 | -------------------------------------------------------------------------------- /bilix/sites/yhdmp/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | import httpx 4 | from typing import Sequence, Union, Tuple 5 | from . import api 6 | from bilix.utils import legal_title, cors_slice 7 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 8 | 9 | 10 | class DownloaderYhdmp(BaseDownloaderM3u8): 11 | def __init__( 12 | self, 13 | *, 14 | api_client: httpx.AsyncClient = None, 15 | stream_client: httpx.AsyncClient = None, 16 | browser: str = None, 17 | speed_limit: Union[float, int] = None, 18 | stream_retry: int = 5, 19 | progress=None, 20 | logger=None, 21 | part_concurrency: int = 10, 22 | video_concurrency: Union[int, asyncio.Semaphore] = 3, 23 | hierarchy: bool = True, 24 | ): 25 | stream_client = stream_client or httpx.AsyncClient() 26 | super(DownloaderYhdmp, self).__init__( 27 | client=stream_client, 28 | browser=browser, 29 | speed_limit=speed_limit, 30 | stream_retry=stream_retry, 31 | progress=progress, 32 | logger=logger, 33 | part_concurrency=part_concurrency, 34 | video_concurrency=video_concurrency, 35 | ) 36 | self.api_client = api_client or httpx.AsyncClient(**api.dft_client_settings) 37 | self.hierarchy = hierarchy 38 | 39 | async def get_series(self, url: str, path=Path('.'), p_range: Sequence[int] = None): 40 | """ 41 | :cli: short: s 42 | :param url: 43 | :param path: 44 | :param p_range: 45 | :return: 46 | """ 47 | video_info = await api.get_video_info(self.api_client, url) 48 | ep_idx = video_info.ep_idx 49 | play_idx = video_info.play_idx 50 | title = video_info.title 51 | if self.hierarchy: 52 | path = path / title 53 | path.mkdir(parents=True, exist_ok=True) 54 | 55 | # no need to reuse get_video since we only need m3u8_url 56 | async def get_video(page_url, name): 57 | m3u8_url = await api.get_m3u8_url(self.api_client, page_url) 58 | await self.get_m3u8_video(m3u8_url=m3u8_url, path=path / name) 59 | 60 | cors = [] 61 | for idx, (sub_title, url) in enumerate(video_info.play_info[play_idx]): 62 | if ep_idx == idx: 63 | cors.append(self.get_m3u8_video(m3u8_url=video_info.m3u8_url, 64 | path=path / f'{legal_title(title, sub_title)}.mp4')) 65 | else: 66 | cors.append(get_video(url, legal_title(title, sub_title))) 67 | if p_range: 68 | cors = cors_slice(cors, p_range) 69 | await asyncio.gather(*cors) 70 | 71 | async def get_video(self, url: str, path=Path('.'), time_range=None): 72 | """ 73 | :cli: short: v 74 | :param url: 75 | :param path: 76 | :param time_range: 77 | :return: 78 | """ 79 | video_info = await api.get_video_info(self.api_client, url) 80 | name = legal_title(video_info.title, video_info.sub_title) 81 | await self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{name}.mp4', time_range=time_range) 82 | 83 | @classmethod 84 | def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict) -> bool: 85 | return 'yhdmp' in keys[0] 86 | -------------------------------------------------------------------------------- /bilix/sites/yinghuacd/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderYinghuacd 2 | 3 | __all__ = ['DownloaderYinghuacd'] 4 | -------------------------------------------------------------------------------- /bilix/sites/yinghuacd/api.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pydantic import BaseModel 3 | from typing import Union, List 4 | import httpx 5 | from bs4 import BeautifulSoup 6 | from bilix.download.utils import req_retry, raise_api_error 7 | 8 | BASE_URL = "http://www.yinghuacd.com" 9 | dft_client_settings = { 10 | 'headers': {'user-agent': 'PostmanRuntime/7.29.0'}, 11 | 'http2': False 12 | } 13 | 14 | 15 | class VideoInfo(BaseModel): 16 | title: str 17 | sub_title: str 18 | play_info: List[Union[List[str], List]] # may be empty 19 | m3u8_url: str 20 | 21 | 22 | @raise_api_error 23 | async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: 24 | # request 25 | res = await req_retry(client, url) 26 | m3u8_url = re.search(r'http.*m3u8', res.text)[0] 27 | soup = BeautifulSoup(res.text, 'html.parser') 28 | h1 = soup.find('h1') 29 | title, sub_title = h1.a.text, h1.span.text[1:] 30 | 31 | # extract 32 | play_info = [[a.text, f"{BASE_URL}{a['href']}"] for a in soup.find('div', class_="movurls").find_all('a')] 33 | video_info = VideoInfo(title=title, sub_title=sub_title, play_info=play_info, m3u8_url=m3u8_url) 34 | return video_info 35 | -------------------------------------------------------------------------------- /bilix/sites/yinghuacd/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.yinghuacd import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "http://www.yinghuacd.com/v/5606-7.html") 11 | pass 12 | -------------------------------------------------------------------------------- /bilix/sites/yinghuacd/downloader.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | import httpx 4 | import re 5 | from m3u8 import Segment 6 | from typing import Sequence, Union, Tuple 7 | from . import api 8 | from bilix.utils import legal_title, cors_slice 9 | from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 10 | from bilix.exception import APIError 11 | 12 | 13 | class DownloaderYinghuacd(BaseDownloaderM3u8): 14 | def __init__( 15 | self, 16 | *, 17 | stream_client: httpx.AsyncClient = None, 18 | api_client: httpx.AsyncClient = None, 19 | browser: str = None, 20 | speed_limit: Union[float, int] = None, 21 | stream_retry: int = 5, 22 | progress=None, 23 | logger=None, 24 | part_concurrency: int = 10, 25 | video_concurrency: Union[int, asyncio.Semaphore] = 3, 26 | hierarchy: bool = True, 27 | ): 28 | stream_client = stream_client or httpx.AsyncClient() 29 | super(DownloaderYinghuacd, self).__init__( 30 | client=stream_client, 31 | browser=browser, 32 | speed_limit=speed_limit, 33 | stream_retry=stream_retry, 34 | progress=progress, 35 | logger=logger, 36 | part_concurrency=part_concurrency, 37 | video_concurrency=video_concurrency, 38 | ) 39 | self.api_client = api_client or httpx.AsyncClient(**api.dft_client_settings) 40 | self.hierarchy = hierarchy 41 | 42 | def _after_seg(self, seg: Segment, content: bytearray) -> bytearray: 43 | # in case .png 44 | if re.fullmatch(r'.*\.png', seg.absolute_uri): 45 | _, _, content = content.partition(b'\x47\x40') 46 | return content 47 | 48 | async def get_series(self, url: str, path=Path("."), p_range: Sequence[int] = None): 49 | """ 50 | :cli: short: s 51 | :param url: 52 | :param path: 53 | :param p_range: 54 | :return: 55 | """ 56 | video_info = await api.get_video_info(self.api_client, url) 57 | if self.hierarchy: 58 | path /= video_info.title 59 | path.mkdir(parents=True, exist_ok=True) 60 | cors = [self.get_video(u, path=path, video_info=video_info if u == url else None) 61 | for _, u in video_info.play_info] 62 | if p_range: 63 | cors = cors_slice(cors, p_range) 64 | await asyncio.gather(*cors) 65 | 66 | async def get_video(self, url: str, path=Path('.'), time_range=None, video_info=None): 67 | """ 68 | :cli: short: v 69 | :param url: 70 | :param path: 71 | :param time_range: 72 | :param video_info: 73 | :return: 74 | """ 75 | if video_info is None: 76 | try: 77 | video_info = await api.get_video_info(self.api_client, url) 78 | except APIError as e: 79 | return self.logger.error(e) 80 | else: 81 | video_info = video_info 82 | name = legal_title(video_info.title, video_info.sub_title) 83 | await self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{name}.mp4', time_range=time_range) 84 | 85 | @classmethod 86 | def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict): 87 | return 'yinghuacd' in keys[0] 88 | -------------------------------------------------------------------------------- /bilix/sites/youtube/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloader import DownloaderYoutube 2 | 3 | __all__ = ['DownloaderYoutube'] 4 | -------------------------------------------------------------------------------- /bilix/sites/youtube/api.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from pydantic import BaseModel 4 | import httpx 5 | from bilix.download.utils import req_retry 6 | from bilix.utils import legal_title 7 | 8 | dft_client_settings = { 9 | 'headers': { 10 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 11 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36', 12 | 'referer': 'https://www.youtube.com/' 13 | }, 14 | } 15 | 16 | 17 | class VideoInfo(BaseModel): 18 | # url: str 19 | title: str 20 | video_url: str 21 | audio_url: str 22 | # img_url: str 23 | 24 | 25 | async def get_video_info(client: httpx.AsyncClient, url: str): 26 | response = await req_retry(client=client, url_or_urls=url) 27 | # 解析 28 | json_str = re.findall('var ytInitialPlayerResponse = (.*?);var', response.text)[0] 29 | json_data = json.loads(json_str) 30 | video_url = json_data['streamingData']['adaptiveFormats'][0]['url'] 31 | audio_url = json_data['streamingData']['adaptiveFormats'][-2]['url'] 32 | title = legal_title(json_data['videoDetails']['title']) 33 | video_info = VideoInfo(video_url=video_url, audio_url=audio_url, title=title) 34 | return video_info 35 | -------------------------------------------------------------------------------- /bilix/sites/youtube/api_test.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import pytest 3 | from bilix.sites.youtube import api 4 | 5 | client = httpx.AsyncClient(**api.dft_client_settings) 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_get_video_info(): 10 | data = await api.get_video_info(client, "https://www.youtube.com/watch?v=26lanyBFXw8") 11 | assert data.video_url and data.audio_url and data.title 12 | -------------------------------------------------------------------------------- /bilix/sites/youtube/downloader.py: -------------------------------------------------------------------------------- 1 | import re 2 | import asyncio 3 | from pathlib import Path 4 | from typing import Union 5 | import httpx 6 | from . import api 7 | from bilix.download.base_downloader_part import BaseDownloaderPart 8 | from bilix import ffmpeg 9 | 10 | 11 | class DownloaderYoutube(BaseDownloaderPart): 12 | pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(youtube\.com)") 13 | 14 | def __init__( 15 | self, 16 | *, 17 | client: httpx.AsyncClient = None, 18 | browser: str = None, 19 | speed_limit: Union[float, int] = None, 20 | stream_retry: int = 5, 21 | progress=None, 22 | logger=None, 23 | part_concurrency: int = 10, 24 | # unique params 25 | video_concurrency: Union[int, asyncio.Semaphore] = 3 26 | ): 27 | client = client or httpx.AsyncClient(**api.dft_client_settings) 28 | super(DownloaderYoutube, self).__init__( 29 | client=client, 30 | browser=browser, 31 | speed_limit=speed_limit, 32 | stream_retry=stream_retry, 33 | progress=progress, 34 | logger=logger, 35 | part_concurrency=part_concurrency 36 | ) 37 | self.video_sema = asyncio.Semaphore(video_concurrency) if type(video_concurrency) is int else video_concurrency 38 | 39 | async def get_video(self, url: str, path=Path('.')): 40 | """ 41 | :cli: short: v 42 | :param url 43 | :param path: 44 | :return: 45 | """ 46 | async with self.video_sema: 47 | video_info = await api.get_video_info(self.client, url) 48 | video_path = path / (video_info.title + '.mp4') 49 | if video_path.exists(): 50 | return self.logger.info(f'[green]已存在[/green] {video_path.name}') 51 | task_id = await self.progress.add_task(description=video_info.title, upper=True) 52 | path_lst = await asyncio.gather( 53 | self.get_file(url_or_urls=video_info.video_url, path=path / (video_info.title + '-v'), task_id=task_id), 54 | self.get_file(url_or_urls=video_info.audio_url, path=path / (video_info.title + '-a'), task_id=task_id) 55 | ) 56 | await ffmpeg.combine(path_lst, output_path=path / (video_info.title + '.mp4')) 57 | self.logger.info(f'[cyan]已完成[/cyan] {video_path.name}') 58 | await self.progress.update(task_id=task_id, visible=False) 59 | -------------------------------------------------------------------------------- /bilix/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | some useful functions 3 | """ 4 | import html 5 | import json 6 | import re 7 | import time 8 | from functools import wraps 9 | from urllib.parse import quote_plus 10 | from typing import Union, Sequence, Coroutine, List, Tuple, Optional 11 | from bilix.log import logger 12 | 13 | 14 | def cors_slice(cors: Sequence[Coroutine], p_range: Sequence[int]): 15 | h, t = p_range[0] - 1, p_range[1] 16 | assert 0 <= h <= t 17 | [cor.close() for idx, cor in enumerate(cors) if idx < h or idx >= t] # avoid runtime warning 18 | cors = cors[h:t] 19 | return cors 20 | 21 | 22 | def legal_title(*parts: str, join_str: str = '-'): 23 | """ 24 | join several string parts to os illegal file/dir name (no illegal character and not too long). 25 | auto skip empty. 26 | 27 | :param parts: 28 | :param join_str: the string to join each part 29 | :return: 30 | """ 31 | return join_str.join(filter(lambda x: len(x) > 0, map(replace_illegal, parts))) 32 | 33 | 34 | def replace_illegal(s: str): 35 | """strip, unescape html and replace os illegal character in s""" 36 | s = s.strip() 37 | s = html.unescape(s) # handel & "... 38 | s = re.sub(r"[/\\:*?\"<>|\n\t]", '', s) # replace illegal filename character 39 | return s 40 | 41 | 42 | def convert_size(total_bytes: int) -> str: 43 | unit, suffix = pick_unit_and_suffix( 44 | total_bytes, ["bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"], 1000 45 | ) 46 | return f"{total_bytes / unit:,.2f}{suffix}" 47 | 48 | 49 | def pick_unit_and_suffix(size: int, suffixes: List[str], base: int) -> Tuple[int, str]: 50 | """Borrowed from rich.filesize. Pick a suffix and base for the given size.""" 51 | for i, suffix in enumerate(suffixes): 52 | unit = base ** i 53 | if size < unit * base: 54 | break 55 | else: 56 | raise ValueError('Invalid input') 57 | return unit, suffix 58 | 59 | 60 | def parse_bytes_str(s: str) -> float: 61 | """"Parse a string byte quantity into an integer""" 62 | units_map = {unit: i for i, unit in enumerate(['', *'KMGTPEZY'])} 63 | units_re = '|'.join(units_map.keys()) 64 | m = re.fullmatch(rf'(?P\d+(?:\.\d+)?)\s*(?P{units_re})B?', s) 65 | if not m: 66 | raise ValueError(f"Invalid bytes str {s} to parse to number") 67 | num = float(m.group('num')) 68 | mult = 1000 ** units_map[m.group('unit')] 69 | return num * mult 70 | 71 | 72 | def valid_sess_data(sess_data: Optional[str]) -> str: 73 | """check and encode sess_data""" 74 | # url-encoding sess_data if it's not encoded 75 | # https://github.com/HFrost0/bilix/pull/114https://github.com/HFrost0/bilix/pull/114 76 | if sess_data and not re.search(r'(%[0-9A-Fa-f]{2})|(\+)', sess_data): 77 | sess_data = quote_plus(sess_data) 78 | logger.debug(f"sess_data encoded: {sess_data}") 79 | return sess_data 80 | 81 | 82 | def t2s(t: int) -> str: 83 | return str(t) 84 | 85 | 86 | def s2t(s: str) -> int: 87 | """ 88 | :param s: hour:minute:second or xx(s) format input 89 | :return: 90 | """ 91 | if ':' not in s: 92 | return int(s) 93 | h, m, s = map(int, s.split(':')) 94 | return h * 60 * 60 + m * 60 + s 95 | 96 | 97 | def json2srt(data: Union[bytes, str, dict]): 98 | b = False 99 | if type(data) is bytes: 100 | data = data.decode('utf-8') 101 | b = True 102 | if type(data) is str: 103 | data = json.loads(data) 104 | 105 | def t2str(t): 106 | ms = int(round(t % 1, 3) * 1000) 107 | s = int(t) 108 | m = s // 60 109 | h = m // 60 110 | m, s = m % 60, s % 60 111 | t_str = f'{h:0>2}:{m:0>2}:{s:0>2},{ms:0>3}' 112 | return t_str 113 | 114 | res = '' 115 | for idx, i in enumerate(data['body']): 116 | from_time, to_time = t2str(i['from']), t2str(i['to']) 117 | content = i['content'] 118 | res += f"{idx + 1}\n{from_time} --> {to_time}\n{content}\n\n" 119 | return res.encode('utf-8') if b else res 120 | 121 | 122 | def timer(func): 123 | @wraps(func) 124 | def wrapper(*args, **kwargs): 125 | start = time.monotonic_ns() 126 | res = func(*args, **kwargs) 127 | logger.debug( 128 | f"{func.__name__} cost {time.monotonic_ns() - start} ns with args: {args}, kwargs: {kwargs} result: {res}") 129 | return res 130 | 131 | return wrapper 132 | -------------------------------------------------------------------------------- /docs/.vitepress/config.ts: -------------------------------------------------------------------------------- 1 | import {defineConfig} from 'vitepress' 2 | 3 | // https://vitepress.dev/reference/site-config 4 | export default defineConfig({ 5 | title: "bilix", 6 | description: "bilix download", 7 | base: '/bilix/', 8 | lastUpdated: true, 9 | themeConfig: { 10 | // https://vitepress.dev/reference/default-theme-config 11 | editLink: { 12 | pattern: 'https://github.com/HFrost0/bilix/edit/master/docs/:path' 13 | }, 14 | algolia: { 15 | appId: 'F4ZDY9KUXU', 16 | apiKey: '30aaace8ddea0d6f25ac39ea70ce8bd8', 17 | indexName: 'bilix' 18 | }, 19 | footer: { 20 | message: 'Released under the Apache 2.0 License.', 21 | copyright: 'Copyright © 2022-present HFrost0' 22 | }, 23 | socialLinks: [ 24 | {icon: 'github', link: 'https://github.com/HFrost0/bilix'} 25 | ] 26 | }, 27 | 28 | locales: { 29 | root: { 30 | label: '中文', 31 | lang: 'zh', 32 | themeConfig: { 33 | nav: [ 34 | {text: 'Home', link: '/'}, 35 | {text: '安装', link: '/install'}, 36 | {text: '快速上手', link: '/quickstart'} 37 | ], 38 | sidebar: [ 39 | {text: '安装', link: '/install'}, 40 | {text: '快速上手', link: '/quickstart'}, 41 | {text: '进阶使用', link: '/advance_guide'}, 42 | { 43 | text: 'Python调用', 44 | items: [ 45 | {text: '异步基础', link: '/async'}, 46 | {text: '下载案例', link: '/download_examples'}, 47 | {text: 'API案例', link: '/api_examples'} 48 | ] 49 | }, 50 | {text: '更多', link: '/more'}, 51 | ], 52 | } 53 | }, 54 | 55 | en: { 56 | label: 'English', 57 | lang: 'en', // optional, will be added as `lang` attribute on `html` tag 58 | themeConfig: { 59 | nav: [ 60 | {text: 'Home', link: '/en/'}, 61 | {text: 'Install', link: '/en/install'}, 62 | {text: 'Quickstart', link: '/en/quickstart'} 63 | ], 64 | sidebar: [ 65 | {text: 'Install', link: '/en/install'}, 66 | {text: 'Quickstart', link: '/en/quickstart'}, 67 | {text: 'Advance Guide', link: '/en/advance_guide'}, 68 | { 69 | text: 'Python API', 70 | items: [ 71 | {text: 'Async basic', link: '/en/async'}, 72 | {text: 'Download Examples', link: '/en/download_examples'}, 73 | {text: 'API Examples', link: '/en/api_examples'} 74 | ] 75 | }, 76 | {text: 'More', link: '/en/more'}, 77 | ], 78 | }, 79 | } 80 | }, 81 | }) 82 | -------------------------------------------------------------------------------- /docs/.vitepress/theme/index.ts: -------------------------------------------------------------------------------- 1 | import Theme from 'vitepress/theme' 2 | import './style/var.css' 3 | 4 | export default { 5 | extends: Theme, 6 | } 7 | -------------------------------------------------------------------------------- /docs/.vitepress/theme/style/var.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --vp-home-hero-name-color: transparent; 3 | --vp-home-hero-name-background: linear-gradient( 135deg, #79F1A4 10%, #0E5CAD 100%);; 4 | } 5 | -------------------------------------------------------------------------------- /docs/advance_guide.md: -------------------------------------------------------------------------------- 1 | # 进阶使用 2 | 请使用`bilix -h`查看更多参数提示,包括方法名简写,视频画面质量选择,并发量控制,下载速度限制,下载目录等。 3 | 4 | ## 方法名简写 5 | 6 | 觉得`get_series`,`get_video`这些方法名写起来太麻烦了?同感!你可以使用他们的简写,这样快多了: 7 | 8 | ```shell 9 | bilix s 'url' 10 | bilix v 'url' 11 | ... 12 | ``` 13 | 更多简写请查看`bilix -h` 14 | 15 | ## 登录 16 | 17 | 你是大会员?🥸,两种方式登录 18 | 19 | * 直接填写cookie 20 | 21 | 在`--cookie`参数中填写浏览器缓存的`SESSDATA`cookie,填写后可以下载需要大会员的视频 22 | 23 | * 从浏览器载入cookie 24 | 25 | 在浏览器中登录之后,使用`-fb --from-browser`参数从浏览器中读取cookie,例如`-fb chrome`,使用这种方法可能需要授权,bilix读取浏览器cookie的 26 | 方式为开源项目[browser_cookie3](https://github.com/borisbabic/browser_cookie3)。 27 | :::tip 28 | 如果你总是需要保持登录,在linux和mac系统中你可以使用`alias bilix=bilix --cookie xxxxxx`或`alias bilix=bilix -fb chrome`来为`bilix`命令创建别名 29 | ::: 30 | 31 | ## 画质,音质和编码选择 32 | 33 | 你可以使用`--quality`即`-q`参数选择画面分辨率,bilix支持两种不同的选择方式: 34 | 35 | * 相对选择(默认) 36 | 37 | bilix在默认情况下会为你选择可选的最高画质进行下载(即`-q 0`),如果你想下载第二清晰的可使用`-q 1`进行指定,以此类推,指定序号越大画质越低, 38 | 当超过可选择范围时,默认选择到最低画质,例如你总是可以通过`-q 999`来选择到最低画质。 39 | * 绝对选择 40 | 41 | 在某些时候,你只希望下载720P的视频,但是720P在相对选择中并不总是处于固定的位置,这在下载收藏夹,合集等等场景中经常出现。 42 | 另外有可能你就是喜欢通过`-q 1080P`这样的方式来指定画质。 43 | 没问题,bilix同时也支持通过`-q 4K` `-q '1080P 高码率'`等字符串的形式来直接指定画质,字符串为b站显示的画质名称的子串即可。 44 | 45 | 在更加专业用户的需求中,可能需要指定特定的视频编码进行下载,而b站支持的编码在网页或app中是不可见的,bilix为此设计了方法`info` 46 | , 通过它你可以完全了解该视频的所有信息: 47 | 48 | ```text 49 | bilix info 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' 50 | 51 | 【4K·HDR·Hi-Res】群青 - YOASOBI 33,899👀 1,098👍 201🪙 52 | ┣━━ 画面 Video 53 | ┃ ┣━━ HDR 真彩 54 | ┃ ┃ ┗━━ codec: hev1.2.4.L153.90 total: 149.86MB 55 | ┃ ┣━━ 4K 超清 56 | ┃ ┃ ┣━━ codec: avc1.640034 total: 320.78MB 57 | ┃ ┃ ┗━━ codec: hev1.1.6.L153.90 total: 106.54MB 58 | ┃ ┣━━ 1080P 60帧 59 | ┃ ┃ ┣━━ codec: avc1.640032 total: 171.91MB 60 | ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.66MB 61 | ┃ ┣━━ 1080P 高清 62 | ┃ ┃ ┣━━ codec: avc1.640032 total: 86.01MB 63 | ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.18MB 64 | ┃ ┣━━ 720P 高清 65 | ┃ ┃ ┣━━ codec: avc1.640028 total: 57.39MB 66 | ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 11.53MB 67 | ┃ ┣━━ 480P 清晰 68 | ┃ ┃ ┣━━ codec: avc1.64001F total: 25.87MB 69 | ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 7.61MB 70 | ┃ ┗━━ 360P 流畅 71 | ┃ ┣━━ codec: hev1.1.6.L120.90 total: 5.24MB 72 | ┃ ┗━━ codec: avc1.64001E total: 11.59MB 73 | ┗━━ 声音 Audio 74 | ┣━━ 默认音质 75 | ┃ ┗━━ codec: mp4a.40.2 total: 10.78MB 76 | ┗━━ Hi-Res无损 77 | ┗━━ codec: fLaC total: 94.55MB 78 | ``` 79 | 80 | 看上去不错😇,那么我要怎么才能下到指定编码的视频呢? 81 | 82 | bilix提供了另一个参数`--codec`来指定编码格式,例如你可以通过组合`-q 480P --codec hev1.1.6.L120.90`来指定下载7.61MB的那个。 83 | `--codec`参数与`-q`参数类似,也支持子串指定,例如你可以通过`--codec hev`来使得所有视频都选择`hev`开头的编码。 84 | 85 | 对于音质,部分视频会含有大会员专享的杜比全景声和Hi-Res无损音质,利用`--codec`参数可以指定这些音频,例如 86 | 87 | ```shell 88 | bilix v 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' --codec hev:fLaC 89 | ``` 90 | 91 | `--codec hev:fLaC`中使用`:`将画质编码和音频编码隔开,如只指定音频编码,可使用`--codec :fLaC` 92 | 93 | ## 关于断点重连 94 | 95 | 用户可以通过Ctrl+C中断任务,对于未完成的文件,重新执行命令会在之前的进度基础上下载,已完成的文件会进行跳过。 96 | 但是对于未完成的文件,以下情况建议清除未完成任务的临时文件再执行命令,否则可能残留部分临时文件。 97 | 98 | - 中断后改变画面质量`-q`或编码`--codec` 99 | - 中断后改变分段并发数`--part-con` 100 | - 中断后改变时间范围`--time-range` 101 | 102 | ## 一次提供多个url 103 | bilix的所有方法都支持提供多个`url` 104 | ```shell 105 | bilix v 'url1' 'url2' 'url3' 106 | bilix up 'up_url1' 'up_url2' 107 | ``` 108 | 当你提供多个`url`时,并发控制当然也正常工作 109 | 110 | 111 | ## 更多站点支持 112 | bilix除了b站以外也支持了一些别的站点,但作者精力有限,所以失效也不奇怪。具体可见[discussion](https://github.com/HFrost0/bilix/discussions/39) 113 | 114 | ## 基本下载方法 115 | 对于一些基本的下载场景 116 | * 你可以直接通过文件链接下载 117 | ```shell 118 | bilix f 'https://xxxx.com/xxxx.mp4' 119 | ``` 120 | * 你可以通过m3u8 url直接下载m3u8视频 121 | ```shell 122 | bilix m3u8 'https:/xxxx.com/xxxx.m3u8' 123 | ``` 124 | 125 | ## 代理 126 | bilix默认使用系统代理 127 | -------------------------------------------------------------------------------- /docs/api_examples.md: -------------------------------------------------------------------------------- 1 | # API案例 2 | bilix 提供了各个网站的api,如果你有需要当然可以使用,并且它们都是异步的 3 | ```python 4 | import asyncio 5 | from bilix.sites.bilibili import api 6 | from httpx import AsyncClient 7 | 8 | 9 | async def main(): 10 | # 需要先实例化一个用来进行http请求的client 11 | client = AsyncClient(**api.dft_client_settings) 12 | data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849') 13 | print(data) 14 | 15 | 16 | asyncio.run(main()) 17 | 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/async.md: -------------------------------------------------------------------------------- 1 | # 异步基础 2 | 异步无疑是python中处理网络请求的最佳技术,因为它可以承载极高的并发量。 3 | 在python中使用bilix之前,你需要先对python中的异步编程有一些了解。python官方使用[asyncio](https://docs.python.org/3/library/asyncio.html) 4 | 提供异步I/O的支持。 5 | 6 | ```python 7 | async def hello(): 8 | print("hello world") 9 | ``` 10 | 11 | 对于一个async函数(`def`变为`async def`)来说调用不会直接执行函数,而是返回一个协程(coroutine)对象 12 | 13 | ```python 14 | c = hello() 15 | >>> c 16 | 17 | 18 | ``` 19 | 20 | 我们可以将这个coroutine提交到asyncio的事件循环中执行它 21 | 22 | ```python 23 | import asyncio 24 | 25 | >>> asyncio.run(c) 26 | "hello world" 27 | ``` 28 | 29 | bilix的所有下载方法都是异步的,所以你也可以这样执行他们 30 | ```python 31 | import asyncio 32 | from bilix.sites.bilibili import DownloaderBilibili 33 | 34 | d = DownloaderBilibili() 35 | asyncio.run(d.get_video('url')) 36 | ``` 37 | -------------------------------------------------------------------------------- /docs/download_examples.md: -------------------------------------------------------------------------------- 1 | # 下载案例 2 | 3 | 觉得命令行太麻烦,不够强大?bilix可做为python的库调用,并且接口设计易用,功能更强大,这给了你很大的扩展空间 4 | 5 | ## 从最简单的开始 6 | 7 | ```python 8 | import asyncio 9 | # 导入下载器,里面有很多方法,例如get_series, get_video, get_favour,get_dm等等 10 | from bilix.sites.bilibili import DownloaderBilibili 11 | 12 | 13 | async def main(): 14 | # 你可以使用async with上下文管理器来开启和关闭一个下载器 15 | async with DownloaderBilibili() as d: 16 | # 然后用await异步等待下载完成 17 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 18 | 19 | 20 | if __name__ == '__main__': 21 | asyncio.run(main()) 22 | 23 | ``` 24 | 25 | ## 组合多种任务 / 控制并发量 26 | 27 | 你可以组合下载器返回的协程对象,利用gather并发执行他们,他们执行的并发度收到下载器对象的严格约束,因此不会对服务器造成意想不到的负担。 28 | 29 | ```python 30 | import asyncio 31 | from bilix.sites.bilibili import DownloaderBilibili 32 | 33 | 34 | async def main(): 35 | d = DownloaderBilibili(video_concurrency=5, part_concurrency=10) 36 | cor1 = d.get_series( 37 | 'https://www.bilibili.com/bangumi/play/ss28277' 38 | , quality=999) 39 | cor2 = d.get_up(url_or_mid='436482484', quality=999) 40 | cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122', quality=999) 41 | await asyncio.gather(cor1, cor2, cor3) 42 | await d.aclose() 43 | 44 | 45 | if __name__ == '__main__': 46 | asyncio.run(main()) 47 | 48 | 49 | ``` 50 | 51 | ## 下载切片 52 | 53 | 你可以只下视频的一小段 54 | 55 | ```python 56 | import asyncio 57 | from bilix.sites.bilibili import DownloaderBilibili 58 | 59 | 60 | async def main(): 61 | """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣""" 62 | async with DownloaderBilibili() as d: 63 | # time_range (start_time, end_time) 64 | await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7)) 65 | 66 | 67 | if __name__ == '__main__': 68 | asyncio.run(main()) 69 | 70 | ``` 71 | 72 | ## 同时下载多个站点 73 | 74 | 你可以同时初始化不同网站的下载器,并且利用他们方法返回的协程对象进行并发下载。各个下载器之间的并发控制是独立的,因此可以最大化利用自己的网络资源。 75 | 76 | ```python 77 | import asyncio 78 | from bilix.sites.bilibili import DownloaderBilibili 79 | from bilix.sites.cctv import DownloaderCctv 80 | 81 | 82 | async def main(): 83 | async with DownloaderBilibili() as d_bl, DownloaderCctv() as d_tv: 84 | await asyncio.gather( 85 | d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999), 86 | d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999) 87 | ) 88 | 89 | 90 | if __name__ == '__main__': 91 | asyncio.run(main()) 92 | 93 | ``` 94 | 95 | ## 限制下载速度 96 | 97 | 限制下载速度很简单,下面的例子限制了b站点总下载速度在1MB/s以下 98 | 99 | ```python 100 | import asyncio 101 | from bilix.sites.bilibili import DownloaderBilibili 102 | from bilix.sites.cctv import DownloaderCctv 103 | 104 | 105 | async def main(): 106 | async with DownloaderBilibili(speed_limit=1e6) as d: # limit to 1MB/s 107 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 108 | 109 | 110 | if __name__ == '__main__': 111 | asyncio.run(main()) 112 | 113 | ``` 114 | 115 | 另外,多个下载器之间的速度设置也是独立的 116 | 117 | ```python 118 | async def main(): 119 | # 就像并发控制一样,每个downloader的速度设置也是独立的 120 | async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d: 121 | await asyncio.gather( 122 | bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'), 123 | cctv_d.get_series('https://www.douyin.com/video/7132430286415252773') 124 | ) 125 | ``` 126 | 127 | ## 显示进度条 128 | 129 | 使用python模块时,进度条默认不显示,如需显示,可以 130 | 131 | ```python 132 | from bilix.progress.cli_progress import CLIProgress 133 | 134 | CLIProgress.start() 135 | ``` 136 | 137 | 或者通过任意下载器内部的`progress`对象打开 138 | 139 | ```python 140 | d.progress.start() 141 | ``` 142 | -------------------------------------------------------------------------------- /docs/en/advance_guide.md: -------------------------------------------------------------------------------- 1 | # Advance Guide 2 | Please use `bilix -h` for more help,including method short alias,video quality selection,concurrency control, 3 | download speed control,download directory... 4 | 5 | ## Method short alias 6 | 7 | Method names like `get_series` and `get_video` are too cumbersome to write? Agreed! You can use their 8 | short alias for faster access: 9 | 10 | ```shell 11 | bilix s 'url' 12 | bilix v 'url' 13 | ... 14 | ``` 15 | please check `bilix -h` for all short alias 16 | 17 | ## Login 18 | 19 | there are two ways to login 20 | 21 | * cookie option 22 | 23 | By adding the `SESSDATA` cookie from your browser's cache in the `--cookie` option, you can download videos that require a premium membership. 24 | 25 | * load cookies from browser 26 | 27 | After logging in through the browser, use the `-fb --from-browser` option to read cookies from the browser, 28 | such as `-fb chrome`. Using this method may require authorization. The method that `bilix` uses to read browser 29 | cookies is the open-source project [browser_cookie3](https://github.com/borisbabic/browser_cookie3). 30 | 31 | :::tip 32 | If you want to keep logged in, you can use `alias bilix=bilix --cookie xxxxxx` or `alias bilix=bilix -fb chrome` 33 | to create an alias for the `bilix` command 34 | ::: 35 | 36 | ## Video and audio quality, codec selection 37 | 38 | You can use `--quality -q`option to choose video resolution,bilix supports two different selection ways: 39 | 40 | * relatively choose (default) 41 | 42 | By default, bilix will select the accessible highest quality for you (that is, `-q 0`), for the second, use `-q 1` to specify, the larger number the lower resolution. 43 | When the number out of index, the lowest quality will be is selected. For example, you can always select the lowest quality by `-q 999`. 44 | * absolute choose 45 | 46 | You can use`-q 1080P` to specific a resolution, the string is a substring of the resolution name on bilibili. 47 | 48 | For more advanced users who may need to specify a particular video codec for download, the encodings supported by Bilibili are not visible on the website or in the app. For this purpose, bilix has designed the `info` method. By using it, you can fully understand all the information about the video: 49 | 50 | ```text 51 | bilix info 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' 52 | 53 | 【4K·HDR·Hi-Res】群青 - YOASOBI 33,899👀 1,098👍 201🪙 54 | ┣━━ 画面 Video 55 | ┃ ┣━━ HDR 真彩 56 | ┃ ┃ ┗━━ codec: hev1.2.4.L153.90 total: 149.86MB 57 | ┃ ┣━━ 4K 超清 58 | ┃ ┃ ┣━━ codec: avc1.640034 total: 320.78MB 59 | ┃ ┃ ┗━━ codec: hev1.1.6.L153.90 total: 106.54MB 60 | ┃ ┣━━ 1080P 60帧 61 | ┃ ┃ ┣━━ codec: avc1.640032 total: 171.91MB 62 | ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.66MB 63 | ┃ ┣━━ 1080P 高清 64 | ┃ ┃ ┣━━ codec: avc1.640032 total: 86.01MB 65 | ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.18MB 66 | ┃ ┣━━ 720P 高清 67 | ┃ ┃ ┣━━ codec: avc1.640028 total: 57.39MB 68 | ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 11.53MB 69 | ┃ ┣━━ 480P 清晰 70 | ┃ ┃ ┣━━ codec: avc1.64001F total: 25.87MB 71 | ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 7.61MB 72 | ┃ ┗━━ 360P 流畅 73 | ┃ ┣━━ codec: hev1.1.6.L120.90 total: 5.24MB 74 | ┃ ┗━━ codec: avc1.64001E total: 11.59MB 75 | ┗━━ 声音 Audio 76 | ┣━━ 默认音质 77 | ┃ ┗━━ codec: mp4a.40.2 total: 10.78MB 78 | ┗━━ Hi-Res无损 79 | ┗━━ codec: fLaC total: 94.55MB 80 | ``` 81 | 82 | looks good😇,so how can I download the video with the specified codec? 83 | 84 | bilix provides another option `--codec`. For example, you can use a combination like `-q 480P --codec hev1.1.6.L120.90` 85 | to specify downloading the 7.61MB one. The `--codec` option is similar to the `-q` option which supports substring specification, 86 | for example using `--codec hev` to make all videos choose codec that start with hev. 87 | 88 | For audio quality, some videos may contain Dolby and Hi-Res audio. You can use the `--codec` option to specify these 89 | audio formats, for example: 90 | 91 | ```shell 92 | bilix v 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' --codec hev:fLaC 93 | ``` 94 | 95 | in `--codec hev:fLaC`, use`:` to split video and audio codec, if you just want to specify audio codec,you can use`--codec :fLaC` 96 | 97 | ## Resuming Interrupted Downloads 98 | 99 | Users can interrupt tasks by pressing `Ctrl+C`. For unfinished files, re-executing the command will resume the download 100 | based on the previous progress, and completed files will be skipped. However, for unfinished files, it is recommended 101 | to clear the temporary files of the unfinished tasks before executing the command again in the following situations, 102 | otherwise some temporary files may remain: 103 | 104 | * Changing the video quality `-q` or `--codec` after interruption 105 | * Changing the `--part-con` after interruption 106 | * Changing the `--time-range` after interruption 107 | 108 | ## Provide multiple urls at once 109 | All methods of bilix support providing multiple `url` 110 | ```shell 111 | bilix v 'url1' 'url2' 'url3' 112 | bilix up 'up_url1' 'up_url2' 113 | ``` 114 | Concurrency, speed control also works fine when you provide multiple `url` of course 115 | 116 | 117 | ## Support for More Sites 118 | 119 | bilix also supports some other websites, but their availability may vary as the author is currently busy. 120 | For further information, please refer to the following [discussion](https://github.com/HFrost0/bilix/discussions/39). 121 | 122 | ## Basic Download method 123 | For some basic download scenarios 124 | * You can directly download a file through the file url 125 | ```shell 126 | bilix f 'https://xxxx.com/xxxx.mp4' 127 | ``` 128 | * you can directly download m3u8 video by url 129 | ```shell 130 | bilix m3u8 'https:/xxxx.com/xxxx.m3u8' 131 | ``` 132 | 133 | ## Proxy 134 | bilix will use system proxy by default 135 | -------------------------------------------------------------------------------- /docs/en/api_examples.md: -------------------------------------------------------------------------------- 1 | # API Examples 2 | bilix provides the APIs of various websites, and they are all asynchronous 3 | ```python 4 | import asyncio 5 | from bilix.sites.bilibili import api 6 | from httpx import AsyncClient 7 | 8 | 9 | async def main(): 10 | # instantiate a httpx client for making http requests 11 | client = AsyncClient(**api.dft_client_settings) 12 | data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849') 13 | print(data) 14 | 15 | 16 | asyncio.run(main()) 17 | 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/en/async.md: -------------------------------------------------------------------------------- 1 | # Async basic 2 | Asynchronous programming in Python excels at handling network requests with high concurrency. 3 | Before using bilix in Python, you need to have some understanding of asynchronous programming in Python. 4 | The official Python [asyncio](https://docs.python.org/3/library/asyncio.html) library provides support for asynchronous I/O. 5 | 6 | ```python 7 | async def hello(): 8 | print("hello world") 9 | ``` 10 | 11 | For an async function (async def), calling it will not directly execute the function but instead return a coroutine object. 12 | ```python 13 | c = hello() 14 | >>> c 15 | 16 | 17 | ``` 18 | 19 | We can submit the coroutine obj to asyncio's event loop to execute it 20 | 21 | ```python 22 | import asyncio 23 | 24 | >>> asyncio.run(c) 25 | "hello world" 26 | ``` 27 | 28 | All download methods of bilix are asynchronous, so you can execute them like this 29 | ```python 30 | import asyncio 31 | from bilix.sites.bilibili import DownloaderBilibili 32 | 33 | d = DownloaderBilibili() 34 | asyncio.run(d.get_video('url')) 35 | ``` 36 | -------------------------------------------------------------------------------- /docs/en/download_examples.md: -------------------------------------------------------------------------------- 1 | # Download Examples 2 | 3 | Command line is too cumbersome and not powerful enough for you? bilix can be used as a Python library 4 | with user-friendly interfaces and enhanced functionality for greater flexibility. 5 | 6 | ## Start with the simplest 7 | 8 | ```python 9 | import asyncio 10 | from bilix.sites.bilibili import DownloaderBilibili 11 | 12 | 13 | async def main(): 14 | # you can use async with context manager to open and close a downloader 15 | async with DownloaderBilibili() as d: 16 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 17 | 18 | 19 | if __name__ == '__main__': 20 | asyncio.run(main()) 21 | 22 | ``` 23 | 24 | ## Combine multiple tasks and control concurrency 25 | 26 | You can combine the coroutine objects returned by the downloader and use gather to execute them concurrently. 27 | The concurrency is strictly restricted by the downloader object, ensuring no unexpected burden on the server. 28 | 29 | ```python 30 | import asyncio 31 | from bilix.sites.bilibili import DownloaderBilibili 32 | 33 | 34 | async def main(): 35 | d = DownloaderBilibili(video_concurrency=5, part_concurrency=10) 36 | cor1 = d.get_series( 37 | 'https://www.bilibili.com/bangumi/play/ss28277' 38 | , quality=999) 39 | cor2 = d.get_up(url_or_mid='436482484', quality=999) 40 | cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122', quality=999) 41 | await asyncio.gather(cor1, cor2, cor3) 42 | await d.aclose() 43 | 44 | 45 | if __name__ == '__main__': 46 | asyncio.run(main()) 47 | 48 | 49 | ``` 50 | 51 | ## Download a clip 52 | 53 | You can download just a clip of the video 54 | 55 | ```python 56 | import asyncio 57 | from bilix.sites.bilibili import DownloaderBilibili 58 | 59 | 60 | async def main(): 61 | """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣""" 62 | async with DownloaderBilibili() as d: 63 | # time_range (start_time, end_time) 64 | await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7)) 65 | 66 | 67 | if __name__ == '__main__': 68 | asyncio.run(main()) 69 | 70 | ``` 71 | 72 | ## Download from multiple sites simultaneously 73 | 74 | You can initialize downloaders for different websites, and use the coroutine objects returned by their 75 | methods for concurrent downloads. The concurrency control between different downloaders is independent, allowing you to 76 | maximize the use of your network resources. 77 | 78 | ```python 79 | import asyncio 80 | from bilix.sites.bilibili import DownloaderBilibili 81 | from bilix.sites.cctv import DownloaderCctv 82 | 83 | 84 | async def main(): 85 | async with DownloaderBilibili() as d_bl, DownloaderCctv() as d_tv: 86 | await asyncio.gather( 87 | d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999), 88 | d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999) 89 | ) 90 | 91 | 92 | if __name__ == '__main__': 93 | asyncio.run(main()) 94 | 95 | ``` 96 | 97 | ## Limit download speed 98 | 99 | Limiting the download speed is very simple. 100 | The following example limits the total download speed below 1MB/s 101 | 102 | ```python 103 | import asyncio 104 | from bilix.sites.bilibili import DownloaderBilibili 105 | from bilix.sites.cctv import DownloaderCctv 106 | 107 | 108 | async def main(): 109 | async with DownloaderBilibili(speed_limit=1e6) as d: # limit to 1MB/s 110 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 111 | 112 | 113 | if __name__ == '__main__': 114 | asyncio.run(main()) 115 | 116 | ``` 117 | 118 | In addition, the speed settings between downloaders are also independent 119 | 120 | ```python 121 | async def main(): 122 | # 就像并发控制一样,每个downloader的速度设置也是独立的 123 | async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d: 124 | await asyncio.gather( 125 | bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'), 126 | cctv_d.get_series('https://www.douyin.com/video/7132430286415252773') 127 | ) 128 | ``` 129 | 130 | ## Show progress bar 131 | 132 | When using the python module, the progress bar is not displayed by default. If you want to display it, you can 133 | 134 | ```python 135 | from bilix.progress.cli_progress import CLIProgress 136 | 137 | CLIProgress.start() 138 | ``` 139 | 140 | or open via the `progress` object inside any downloader 141 | 142 | ```python 143 | d.progress.start() 144 | ``` 145 | 146 | 147 | -------------------------------------------------------------------------------- /docs/en/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | # https://vitepress.dev/reference/default-theme-home-page 3 | layout: home 4 | 5 | hero: 6 | name: "bilix" 7 | tagline: Lightning-fast asynchronous download tool for bilibili and more 8 | actions: 9 | - theme: brand 10 | text: Quickstart 11 | link: /en/quickstart 12 | - theme: alt 13 | text: Python API 14 | link: /en/async 15 | 16 | features: 17 | - icon: ⚡️ 18 | title: Fast & Async 19 | details: Asynchronous high concurrency support, controllable concurrency and speed settings 20 | - icon: 😉 21 | title: Lightweight & User-friendly 22 | details: Lightweight user-friendly CLI with progress notification, focusing on core functionality 23 | - icon: 📝 24 | title: Fully-featured 25 | details: Submissions, anime, TV Series, video clip, audio, favourite, danmaku ,cover... 26 | - icon: 🔨 27 | title: Extensible 28 | details: Extensible Python module suitable for more download scenarios 29 | --- 30 | -------------------------------------------------------------------------------- /docs/en/install.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | bilix is a powerful Python asynchronous video download tool that requires two steps to install: 3 | 4 | 1. pip install(require python >= 3.8) 5 | ```shell 6 | pip install bilix 7 | ``` 8 | If you are a macOS user, you can also use `brew` to install: 9 | ```shell 10 | brew install bilix 11 | ``` 12 | 13 | 2. [FFmpeg](https://ffmpeg.org) :A command-line video tool for compositing downloaded audio and video 14 | 15 | * For macOS, it can be installed via `brew install ffmpeg` 16 | * For Windows, please go to the official website https://ffmpeg.org/download.html#build-windows , you need to configure environment variables after installation 17 | 18 | ::: info 19 | Just make sure that you can call the `ffmpeg` command from the command line in the end. 20 | ::: 21 | -------------------------------------------------------------------------------- /docs/en/more.md: -------------------------------------------------------------------------------- 1 | # More 2 | 3 | ## Community 4 | 5 | If you find any bugs or other issues, feel free to raise an [Issue](https://github.com/HFrost0/bilix/issues). 6 | 7 | If you have new ideas or new feature requests,welcome to participate in 8 | the [Discussion](https://github.com/HFrost0/bilix/discussions) 9 | 10 | If you find this project helpful, you can support the author by [Star](https://github.com/HFrost0/bilix/stargazers)🌟 11 | 12 | ## Contribute 13 | 14 | ❤️ Welcome~ Details can be found in [Contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING_EN.md) 15 | 16 | ## Known Bugs 🤡 17 | 18 | When two video names are exactly the same, task conflicts occur but no error is reported. 19 | -------------------------------------------------------------------------------- /docs/en/quickstart.md: -------------------------------------------------------------------------------- 1 | # Quickstart 2 | 3 | bilix offers a simple command line interface, so open the terminal and start downloading now! 4 | 5 | ## Batch download 6 | 7 | Batch download entire anime series, TV shows, movies, and UP submissions... just replace the `url` in the 8 | command with the web link of any video in the series you want to download. 9 | 10 | Head over to bilibili and find one to try (like [this](https://www.bilibili.com/video/BV1JE411g7XF)), 11 | `bilix` will download the files to the `videos` folder in the current directory of the command line, which is automatically created by default. 12 | 13 | ```shell 14 | bilix get_series 'url' 15 | ``` 16 | 17 | `get_series` is powerful, as it automatically recognizes and downloads all videos in a series. 18 | 19 | ::: info 20 | * What is a series: For example, all parts of a multi-part submission, all episodes of an anime or TV show. 21 | * Some URLs containing parameters need to be wrapped in `''` when used in the terminal. 22 | The Windows cmd does not support `''`, but you can use PowerShell or Windows Terminal as an alternative. 23 | ::: 24 | 25 | ## Single download 26 | 27 | User😨:I don't want to download that many, just a single video. No problem, try this, just provide the web link of that video: 28 | 29 | ```shell 30 | bilix get_video 'url' 31 | ``` 32 | :::info 33 | Do you know that? methods like `get_series` `get_video` all has a [short alias](/en/advance_guide) 34 | ::: 35 | 36 | 37 | ## Audio download 38 | 39 | Assuming you like the music and only want to download audio, then you can use the optional parameter `--only-audio` 40 | 41 | ```shell 42 | bilix get_series 'url' --only-audio 43 | ``` 44 | 45 | ## Clip download 46 | 47 | The video, live record is too long, I need to download the clip I am interested in✂️, then you can use the 48 | `--time-range -tr` parameter to specify the time range 49 | 50 | ```shell 51 | bilix get_vedio 'url' -tr 0:16:53-0:17:49 52 | ``` 53 | 54 | In this example, a time range from 16 minutes 53 seconds to 17 minutes 49 seconds is specified. 55 | The format can be `h:m:s-h:m:s`, or `s-s` 56 | 57 | this option is only available in `get_video`, you can combine `-tr` with `--only-audio` to download audio clip 58 | 59 | ## Uploader download 60 | 61 | If you want to download the latest 100 submissions from an uploader 62 | 63 | ```shell 64 | bilix get_up 'https://space.bilibili.com/672328094' --num 100 65 | ``` 66 | 67 | `https://space.bilibili.com/672328094` is the uploader space url,you can also use uploader id `672328094` to replace `url` 68 | 69 | 70 | ## Download Videos by Category 71 | 72 | Suppose you enjoy watching the dance category👍 and want to download the top 20 超级敏感 宅舞 videos with 73 | the highest play count in the last 30 days, you can use: 74 | 75 | ```shell 76 | bilix get_cate 宅舞 --keyword 超级敏感 --order click --num 20 --days 30 77 | ``` 78 | 79 | `get_cate` supports every sub-category on bilibili and offers options for sorting and keyword searching. 80 | For more details, please refer to `bilix -h` or the code comments. 81 | 82 | ## Download Videos from Favorites 83 | 84 | If you need to download videos from your own or someone else's favorites, you can use the `get_favour` method 85 | 86 | ```shell 87 | bilix get_favour 'https://space.bilibili.com/11499954/favlist?fid=1445680654' --num 20 88 | ``` 89 | 90 | `https://space.bilibili.com/11499954/favlist?fid=1445680654` is the URL for the favorites. If you want to know 91 | the URL of a favorites, the easiest way is to click on it in the Bilibili webpage's left-side menu, and the URL 92 | will appear in the browser's address bar. Alternatively, you can directly replace the URL with the fid `1445680654` 93 | 94 | ## Download collection or video list 95 | 96 | If you want to download the collection or video list released by a uploader, you can use the `get_collect` method 97 | 98 | ```shell 99 | bilix get_collect 'url' 100 | ``` 101 | 102 | Replace `url` with the url of a collection or video list details page([for example](https://space.bilibili.com/369750017/channel/collectiondetail?sid=630)) 103 | 104 | 105 | ## Download subtitle, danmaku, cover... 106 | 107 | Add options `--subtitle` `--dm` `--image` according to your need to download these additional files 108 | 109 | ```shell 110 | bilix get_series 'url' --subtitle --dm --image 111 | ``` 112 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | # https://vitepress.dev/reference/default-theme-home-page 3 | layout: home 4 | 5 | hero: 6 | name: "bilix" 7 | tagline: 快如闪电的异步下载工具,支持bilibili及更多 8 | actions: 9 | - theme: brand 10 | text: 快速上手 11 | link: /quickstart 12 | - theme: alt 13 | text: Python调用 14 | link: /async 15 | 16 | features: 17 | - icon: ⚡️ 18 | title: 高速异步 19 | details: 异步高并发支持,可控的并发量和速度设置 20 | - icon: 😉 21 | title: 轻量易用 22 | details: 友好的CLI及进度提示,专注核心功能 23 | - icon: 📝 24 | title: 功能齐全 25 | details: 投稿,弹幕,收藏夹,分区,动漫,电视剧,切片,封面,音频... 26 | - icon: 🔨 27 | title: 可拓展 28 | details: 可扩展的Python模块适应更多下载场景 29 | --- 30 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # 安装 2 | bilix是一个强大的Python异步视频下载工具,安装它需要完成两个步骤: 3 | 4 | 1. pip安装(需要python3.8及以上) 5 | ```shell 6 | pip install bilix 7 | ``` 8 | 9 | 如果你是macOS用户,也可以使用`brew`安装: 10 | ```shell 11 | brew install bilix 12 | ``` 13 | 14 | 2. [FFmpeg](https://ffmpeg.org) :一个命令行视频工具,用于合成下载的音频和视频 15 | 16 | * macOS 下可以通过`brew install ffmpeg`进行安装。 17 | * Windows 下载请至官网 https://ffmpeg.org/download.html#build-windows ,安装好后需要配置环境变量。 18 | 19 | ::: info 20 | 最终确保在命令行中可以调用`ffmpeg`命令即可。 21 | ::: 22 | -------------------------------------------------------------------------------- /docs/more.md: -------------------------------------------------------------------------------- 1 | # 更多 2 | 3 | ## 欢迎提问 4 | 5 | 如果你发现任何bug或者其他问题,欢迎提[Issue](https://github.com/HFrost0/bilix/issues)。 6 | 7 | 如果你有新想法或新的功能请求,欢迎在[Discussion](https://github.com/HFrost0/bilix/discussions)中参与讨论 8 | 9 | 如果觉得该项目对你有所帮助,可以给作者一个小小的[Star](https://github.com/HFrost0/bilix/stargazers)🌟 10 | 11 | 12 | ## 参与贡献 13 | 14 | ❤️ 非常欢迎~详情可见[contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING.md) 15 | 16 | ## 已知的bug 🤡 17 | 18 | 当两个视频名字完全一样时,任务冲突但不会报错 19 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "docs:dev": "vitepress dev", 4 | "docs:build": "vitepress build", 5 | "docs:preview": "vitepress preview" 6 | }, 7 | "devDependencies": { 8 | "vitepress": "^1.0.0-alpha.63" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /docs/quickstart.md: -------------------------------------------------------------------------------- 1 | # 快速上手 2 | 3 | bilix提供了简单的命令行使用方式,打开终端开始下载吧~ 4 | 5 | ## 批量下载 6 | 7 | 批量下载整部动漫,电视剧,纪录片,电影,up投稿.....只需要把命令中的`url`替换成你要下载的系列中任意一个视频的网页链接。\ 8 | 到 bilibili 上找一个来试试吧~,比如这个李宏毅老师的机器学习视频:[链接](https://www.bilibili.com/video/BV1JE411g7XF), 9 | `bilix`会下载文件至命令行当前目录的`videos`文件夹中,默认自动创建。 10 | 11 | ```shell 12 | bilix get_series 'url' 13 | ``` 14 | 15 | `get_series`很强大,会自动识别系列所有视频并下载,当然,如果该系列只有一个视频(比如单p投稿)也是可以正常下载的。 16 | 17 | ::: info 18 | * 什么是一个系列(series):比如一个多p投稿的所有p,一部动漫,电视剧的所有集。 19 | 20 | * 某些含有参数的url在终端中要用`''`包住,而windows的命令提示符不支持`''`,可用powershell或windows terminal代替。 21 | ::: 22 | 23 | ## 单个下载 24 | 25 | 用户😨:我不想下载那么多,只想下载单个视频。没问题,试试这个,只需要提供那个视频的网页链接: 26 | 27 | ```shell 28 | bilix get_video 'url' 29 | ``` 30 | :::info 31 | 你知道吗?`get_series` `get_video`方法名都有[简写](/advance_guide) 32 | ::: 33 | 34 | 35 | ## 下载音频 36 | 37 | 假设你喜欢音乐区,只想下载音频,那么可以使用可选参数`--only-audio`,例如下面是下载[A叔](https://space.bilibili.com/6075139) 38 | 一个钢琴曲合集音频的例子 39 | 40 | ```shell 41 | bilix get_series 'https://www.bilibili.com/video/BV1ts411D7mf' --only-audio 42 | ``` 43 | 44 | ## 切片下载 45 | 46 | 视频,直播录像太长,我需要下载我感兴趣的片段✂️,那么可以使用`--time-range -tr`参数指定时间段下载切片 47 | 48 | ```shell 49 | bilix get_vedio 'url' -tr 0:16:53-0:17:49 50 | ``` 51 | 52 | 这个例子中指定了16分53秒至17分49秒的片段。 `-tr`参数的格式为`h:m:s-h:m:s`,起始时间和结束时间以`-`分割,时分秒以`:` 53 | 分割。或者`s-s`格式,例如1013秒至1069秒`1013-1069` 54 | 55 | 该参数仅在`get_video`中生效,仅下载音频也支持该参数 56 | 57 | ## 下载特定up主的投稿 58 | 59 | 假设你是一个嘉心糖,想要下载嘉然小姐最新投稿的100个视频,那么你可以使用命令: 60 | 61 | ```shell 62 | bilix get_up 'https://space.bilibili.com/672328094' --num 100 63 | ``` 64 | 65 | `https://space.bilibili.com/672328094` 是up空间页url,另外用up主id`672328094`替换url同样也是可以的 66 | 67 | ## 下载分区视频 68 | 69 | 假设你喜欢看舞蹈区👍,想要下载最近30天播放量最高的20个超级敏感宅舞视频,那么你可以使用 70 | 71 | ```shell 72 | bilix get_cate 宅舞 --keyword 超级敏感 --order click --num 20 --days 30 73 | ``` 74 | 75 | `get_cate`支持b站的每个子分区,可以使用排序,关键词搜索等,详细请参考`bilix -h`或代码注释 76 | 77 | ## 下载收藏夹视频 78 | 79 | 如果你需要下载自己或者其他人收藏夹中的视频,你可以使用`get_favour`方法 80 | 81 | ```shell 82 | bilix get_favour 'https://space.bilibili.com/11499954/favlist?fid=1445680654' --num 20 83 | ``` 84 | 85 | `https://space.bilibili.com/11499954/favlist?fid=1445680654` 是收藏夹url,如果要知道一个收藏夹的url是什么, 86 | 最简单的办法是在b站网页左侧列表中点击切换到该收藏夹,url就会出现在浏览器的地址栏中。另外直接使用url中的fid`1445680654` 87 | 替换url也是可以的。 88 | 89 | ## 下载合集或视频列表 90 | 91 | 如果你需要下载up主发布的合集或视频列表,你可以使用`get_collect`方法 92 | 93 | ```shell 94 | bilix get_collect 'url' 95 | ``` 96 | 97 | 将`url`替换为某个合集或视频列表详情页的url(例如[这个](https://space.bilibili.com/369750017/channel/collectiondetail?sid=630))即可下载合集或列表内所有视频 98 | 99 | :::info 100 | 合集和视频列表有什么区别?b站的合集可以订阅,列表则没有这个功能,但是他们都在up主空间页面的合集和列表菜单中,例如[这个](https://space.bilibili.com/369750017/channel/series) 101 | ,`get_collect`会根据详情页url中的信息判断这个链接是合集还是列表 102 | ::: 103 | 104 | ## 下载字幕,弹幕,封面... 105 | 106 | 在命令中加入可选参数`--subtitle`(字幕) `--dm`(弹幕) `--image`(封面),即可下载这些附属文件 107 | 108 | ```shell 109 | bilix get_series 'url' --subtitle --dm --image 110 | ``` 111 | -------------------------------------------------------------------------------- /examples/a_very_simple_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用bilix在python中最简单的实践🤖 3 | The simplest practice of using bilix in python 4 | """ 5 | import asyncio 6 | # 导入下载器,里面有很多方法,例如get_series, get_video, get_favour,get_dm等等,总能找到符合你需求的 7 | # downloader with many method like get_series, get_video... 8 | from bilix.sites.bilibili import DownloaderBilibili 9 | 10 | 11 | async def main(): 12 | # 你可以使用with上下文管理器来开启和关闭一个下载器 13 | # you can use with to open and close a downloader 14 | async with DownloaderBilibili() as d: 15 | # 然后用await等待下载完成 16 | # and use await to download 17 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 18 | 19 | 20 | async def main2(): 21 | d = DownloaderBilibili() 22 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 23 | # 或者,手动关闭,一样很简单 24 | # or you can call aclose() manually 25 | await d.aclose() 26 | 27 | 28 | if __name__ == '__main__': 29 | asyncio.run(main()) 30 | -------------------------------------------------------------------------------- /examples/download_by_timerange.py: -------------------------------------------------------------------------------- 1 | """ 2 | 你可以只下视频的一小段 3 | You can download just a small clip of the video 4 | """ 5 | import asyncio 6 | 7 | from bilix.sites.bilibili import DownloaderBilibili 8 | 9 | 10 | async def main(): 11 | """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣""" 12 | async with DownloaderBilibili() as d: 13 | # time_range (start_time, end_time) 14 | await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7)) 15 | 16 | 17 | if __name__ == '__main__': 18 | asyncio.run(main()) 19 | -------------------------------------------------------------------------------- /examples/limit_download_rate.py: -------------------------------------------------------------------------------- 1 | """ 2 | 限制下载速度很简单 3 | limit download rate is simple 4 | """ 5 | import asyncio 6 | from bilix.sites.bilibili import DownloaderBilibili 7 | from bilix.sites.cctv import DownloaderCctv 8 | 9 | 10 | async def main(): 11 | async with DownloaderBilibili(speed_limit=1e6) as d: # limit to 1MB/s 12 | await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") 13 | 14 | 15 | async def main2(): 16 | # 就像并发控制一样,每个downloader的速度设置也是独立的 17 | # Like concurrency control, the speed settings of each downloader are independent 18 | async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d: 19 | await asyncio.gather( 20 | bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'), 21 | cctv_d.get_series('https://www.douyin.com/video/7132430286415252773') 22 | ) 23 | 24 | 25 | if __name__ == '__main__': 26 | asyncio.run(main()) 27 | -------------------------------------------------------------------------------- /examples/multi_site_download_same_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | 你可以同时初始化不同网站的下载器,并且利用他们方法返回的协程对象进行并发下载。 3 | 各个下载器之间的并发控制是独立的,因此可以最大化利用自己的网络资源。 4 | 5 | You can initialize the downloaders of different websites at the same time, and use the coroutine objects returned by 6 | their methods to download concurrently. The concurrency control between each downloader is independent, so you can 7 | maximize the use of your network resources. 8 | """ 9 | import asyncio 10 | from bilix.sites.bilibili import DownloaderBilibili 11 | from bilix.sites.douyin import DownloaderDouyin 12 | from bilix.sites.cctv import DownloaderCctv 13 | 14 | 15 | async def main(): 16 | async with DownloaderBilibili() as d_bl, DownloaderDouyin() as d_dy, DownloaderCctv() as d_tv: 17 | await asyncio.gather( 18 | d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999), 19 | d_dy.get_video('https://www.douyin.com/video/7132430286415252773'), 20 | d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999) 21 | ) 22 | 23 | 24 | if __name__ == '__main__': 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /examples/multi_type_tasks.py: -------------------------------------------------------------------------------- 1 | """ 2 | 你可以组合下载器返回的协程对象,利用gather并发执行他们,他们执行的并发度收到下载器对象的严格约束,因此不会对服务器造成意想不到的负担。 3 | 4 | You can combine coroutine objects returned by the downloader and use gather to execute them concurrently. 5 | The concurrency is strictly constrained by the downloader object, so it will not cause unexpected burden on 6 | the site server. 7 | """ 8 | import asyncio 9 | from bilix.sites.bilibili import DownloaderBilibili 10 | 11 | 12 | async def main(): 13 | d = DownloaderBilibili(video_concurrency=5, part_concurrency=10) 14 | cor1 = d.get_series( 15 | 'https://www.bilibili.com/bangumi/play/ss28277?spm_id_from=333.337.0.0', 16 | quality=999) 17 | cor2 = d.get_up(url_or_mid='436482484', quality=999) 18 | cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122?from_spmid=666.4.0.0', quality=999) 19 | await asyncio.gather(cor1, cor2, cor3) 20 | await d.aclose() 21 | 22 | 23 | if __name__ == '__main__': 24 | asyncio.run(main()) 25 | -------------------------------------------------------------------------------- /examples/use_of_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | bilix 提供了各个网站的api,如果你有需要当然可以使用,并且它们都是异步的 3 | 4 | bilix provides api for various websites. You can use them if you need, and they are asynchronous 5 | """ 6 | import asyncio 7 | 8 | from bilix.sites.bilibili import api 9 | from httpx import AsyncClient 10 | 11 | 12 | async def main(): 13 | # 需要先实例化一个用来进行http请求的client 14 | # first we should initialize a http client 15 | client = AsyncClient(**api.dft_client_settings) 16 | data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849') 17 | print(data) 18 | 19 | 20 | asyncio.run(main()) 21 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "bilix" 7 | dynamic = ["version"] 8 | description = "⚡️Lightning-fast asynchronous download tool for bilibili and more" 9 | readme = "README.md" 10 | license = "Apache-2.0" 11 | requires-python = ">=3.8" 12 | authors = [ 13 | { name = "HFrost0", email = "hhlfrost@gmail.com" }, 14 | ] 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3 :: Only", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | ] 24 | dependencies = [ 25 | "aiofiles>=0.8.0", 26 | "anyio", 27 | "danmakuC>=0.3.5", 28 | "bs4", 29 | "click>=8.0.3", 30 | "httpx[http2]>=0.23.3", 31 | "json5", 32 | "m3u8>=3.5.0", 33 | "pycryptodome", 34 | "pydantic>=2.5.3", 35 | "rich", 36 | "browser_cookie3>=0.17.1", 37 | "pymp4>=1.2.0", 38 | ] 39 | 40 | [project.scripts] 41 | bilix = "bilix.cli.main:main" 42 | 43 | [project.urls] 44 | Homepage = "https://github.com/HFrost0/bilix" 45 | 46 | [tool.hatch.version] 47 | path = "bilix/__init__.py" 48 | 49 | [tool.hatch.build.targets.sdist] 50 | include = [ 51 | "/bilix", 52 | ] 53 | --------------------------------------------------------------------------------