├── LICENSE ├── README.md ├── README.rst ├── nbdler ├── __init__.py ├── api.py ├── block.py ├── client │ ├── __init__.py │ ├── abstract.py │ ├── aiohttp.py │ ├── base_http.py │ └── requests.py ├── download.py ├── error.py ├── file.py ├── handler.py ├── progress.py ├── request.py ├── rpc.py ├── session.py ├── uri.py ├── utils.py └── version.py └── setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Nbdler 2 | =============== 3 | [![Build Status](https://img.shields.io/badge/build-passing-green.svg)](https://github.com/ZSAIM/Nbdler) 4 | [![Build Status](https://img.shields.io/badge/pypi-v3.0.3-blue.svg)](https://pypi.org/project/Nbdler/) 5 | 6 | Nbdler 是由Python3 编写的异步多客户端、多来源下载工具。 7 | 8 | 9 | # 支持协议 10 | 11 | - HTTP 12 | - HTTPS 13 | 14 | # 支持客户端 15 | 16 | - [**aiohttp**](https://github.com/aio-libs/aiohttp): Async http client/server framework. 17 | - [**requests**](https://github.com/psf/requests): A simple, yet elegant HTTP library. 18 | 19 | # 特征 20 | 21 | - 支持断点续传 22 | - 支持多来源多客户端下载 23 | - 支持速度限速 24 | - 支持下载缓冲设置 25 | - 支持代理(取决于客户端) 26 | 27 | # 入门使用 28 | 29 | ### 简单下载示例 30 | ```python 31 | import asyncio 32 | import nbdler 33 | 34 | async def main(): 35 | request = nbdler.Request('http://a/file', file_path='file') 36 | async with nbdler.dlopen(request) as dl: 37 | dl.start() 38 | while not dl.is_finished(): 39 | print((f'filename={dl.file.name}, ' 40 | f'transfer rate={round(dl.transfer_rate() / 1024)} kb/s, ' 41 | f'{round(dl.percent_complete(), 2)} % percent complete')) 42 | await asyncio.sleep(1) 43 | await dl.ajoin() 44 | 45 | asyncio.run(main()) 46 | ``` 47 | ### 多客户端,多来源,指定处理客户端,指定最大并发数 48 | ```python 49 | import asyncio 50 | import nbdler 51 | 52 | async def main(): 53 | request = nbdler.Request('http://a/file', 54 | client_policy=nbdler.get_policy(http='aiohttp', https='requests'), 55 | max_concurrent=16, file_path='file') 56 | request.put('https://b/file') 57 | async with nbdler.dlopen(request) as dl: 58 | await dl.astart() 59 | await dl.ajoin() 60 | 61 | asyncio.run(main()) 62 | ``` 63 | 64 | ### 关于方法 65 | 66 | 67 | # Installation 68 | 69 | $ pip install Nbdler 70 | 71 | # Requirements 72 | 73 | - Python >= 3.5.3 74 | - aiohttp 75 | - requests 76 | 77 | 78 | # 许可证 79 | 80 | Apache-2.0 81 | 82 | # TODO 83 | 84 | - [ ] 完善使用文档。 85 | - [ ] 实现Handler处理器(SampleValidate 保证多来源下载时的资源匹配)。 86 | - [ ] 实现DownloadSession(以便实现下载器的进程隔离,同时实现RPC进程通信)。 87 | - [ ] 支持FTP协议。 88 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Nbdler 2 | ====== 3 | 4 | 5 | 6 | Installation 7 | ============= 8 | 9 | :: 10 | 11 | $ pip install Nbdler 12 | 13 | 14 | License 15 | ======= 16 | 17 | Apache-2.0 18 | 19 | -------------------------------------------------------------------------------- /nbdler/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .api import dlopen 4 | from .download import Downloader 5 | from .request import Request 6 | from .client import get_policy 7 | 8 | from .error import MaxRetriesExceeded, ClientError, HandlerError 9 | -------------------------------------------------------------------------------- /nbdler/api.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import mimetypes 4 | import os 5 | import asyncio 6 | from concurrent.futures.thread import ThreadPoolExecutor 7 | from .utils import forever_loop_in_executor 8 | 9 | from .download import Downloader 10 | from .client import get_policy 11 | from .uri import URIs 12 | from .progress import Progress 13 | from .error import MaxRetriesExceeded 14 | from .block import BlockGroup 15 | from .file import File 16 | from .request import Request 17 | 18 | 19 | __all__ = [ 20 | 'dlopen', 21 | ] 22 | 23 | 24 | def dlopen(request, handlers=None, *, do_async=True, executors=None): 25 | """ 打开下载请求Request对象,并构造返回Downloader。 26 | Args: 27 | request: 下载请求对象或下载配置文件路径。 28 | handlers: 添加的Handler对象列表,仅适用于打开下载配置文件 29 | do_async: 是否使用异步打开 30 | executors: 使用指定的concurrent.futures.thread打开,默认新创线程执行。 31 | """ 32 | async def open_request(): 33 | # 打开请求Request对象 34 | client_policy = request.client_policy 35 | if client_policy is None: 36 | client_policy = get_policy() 37 | 38 | uris = URIs.load_from_source_uris(request.uris) 39 | progress = Progress((0, None)) 40 | source_uri = None 41 | resp = None 42 | exceptions = [] 43 | 44 | max_retries = request.max_retries 45 | if request.max_retries is None: 46 | max_retries = float('inf') 47 | 48 | while True: 49 | for source_uri in uris: 50 | try: 51 | client_cls = client_policy.get_solution(source_uri.protocol) 52 | resp = await client_cls.dlopen( 53 | source_uri, progress, **source_uri.kwargs) 54 | except BaseException as err: 55 | exceptions.append(err) 56 | max_retries -= 1 57 | if max_retries < 0: 58 | raise MaxRetriesExceeded(f'max_retries: {request.max_retries}', exceptions) 59 | else: 60 | break 61 | else: 62 | continue 63 | break 64 | 65 | source_uri.set_response(resp) 66 | path, name = os.path.split(request.file_path) 67 | if not name: 68 | name = source_uri.path.rsplit('/', 1)[-1] 69 | if not name: 70 | ext = mimetypes.guess_extension(resp.content_type) 71 | name = f'{source_uri.hostname}{ext or ""}' 72 | size = resp.length 73 | 74 | block_grp = BlockGroup(request.chunk_size, size) 75 | block_grp.insert((0, size)) 76 | opts = request.opts 77 | opts.update(dict( 78 | client_policy=client_policy, 79 | resume_capability=resp.resume_capability, 80 | )) 81 | return Downloader( 82 | File(path, name, size), 83 | uris, 84 | block_grp, 85 | **opts 86 | ) 87 | 88 | async def open_cfg(): 89 | # 打开下载配置文件 90 | file = request 91 | if not os.path.isfile(file): 92 | raise FileNotFoundError(f'下载数据配置文件{file}未找到。') 93 | with open(file, mode='r') as fd: 94 | dumpy_json = fd.read() 95 | dumpy = json.loads(dumpy_json) 96 | return Downloader.loads(dumpy, handlers) 97 | 98 | async def do_open(): 99 | if isinstance(request, Request): 100 | return await open_request() 101 | else: 102 | return await open_cfg() 103 | 104 | def callback(fut): 105 | executors.shutdown(False) 106 | 107 | new_executor = False 108 | if executors is None: 109 | executors = ThreadPoolExecutor( 110 | max_workers=1, thread_name_prefix='Nbdler.dlopen() Worker') 111 | new_executor = True 112 | 113 | exec_fut = forever_loop_in_executor(executors) 114 | if new_executor: 115 | exec_fut.add_done_callback(callback) 116 | 117 | loop = exec_fut.get_loop() 118 | if do_async: 119 | def done_stop_loop(fut): 120 | nonlocal exec_fut 121 | exec_fut.close() 122 | 123 | future = asyncio.wrap_future( 124 | asyncio.run_coroutine_threadsafe(do_open(), loop=loop)) 125 | future.add_done_callback(done_stop_loop) 126 | result = _AsyncDownloadOpenContextManager(future) 127 | else: 128 | future = asyncio.run_coroutine_threadsafe(do_open(), loop=loop) 129 | result = future.result() 130 | exec_fut.close() 131 | return result 132 | 133 | 134 | class _AsyncDownloadOpenContextManager: 135 | __slots__ = '_future', '_result' 136 | 137 | def __init__(self, future): 138 | self._future = future 139 | self._result = None 140 | 141 | def __await__(self): 142 | return self._future.__await__() 143 | 144 | def __iter__(self): 145 | return self.__await__() 146 | 147 | async def __aenter__(self): 148 | self._result = await self._future 149 | return await self._result.__aenter__() 150 | 151 | async def __aexit__(self, exc_type, exc_val, exc_tb): 152 | return await self._result.__aexit__(exc_type, exc_val, exc_tb) 153 | -------------------------------------------------------------------------------- /nbdler/block.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | """ 下载块 3 | 4 | 下载块Chunk结构: 5 | 6 | +---------------------------------+ 7 | | Chunk | 8 | | +--------+----------+ | 9 | | | client | progress | | 10 | | +--------+----------+ | 11 | +---------------------------------+ 12 | | Block [0:25] | 13 | | >>>>>-------------------- 05/25 | 14 | +---------------------------------+ 15 | |+|+|+|+|+|+|+|+|+|-|-|-|-|-|-|-|-| 16 | |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-| <--------- block 17 | +---------------------------------+ 18 | 19 | 其中: '>' 表示已下载块; '-' 表示未下载块 20 | 21 | 下载块管理器: 22 | 23 | +-------------------------------------------------------------+ 24 | | ChunkManager | 25 | | +--------C1-------+ +---C3----+ +---------C2--------+ | 26 | | |-|-|-|-|-|-|-|-|-| |-|-|-|-|-| |-|-|-|-|-|-|-|-|-|-| | 27 | | |-|-|-|-|-|-|-|-|-| |-|-|-|-|-| |-|-|-|-|-|-|-|-|-|-| | 28 | | |-|-|-|-|-|-|-|-|-| |-|-|-|-|-| |-|-|-|-|-|-|-|-|-|-| | 29 | | +-----------------+ +---------+ +-------------------+ | 30 | +-------------------------------------------------------------+ 31 | 32 | 文件下载块切片原理: 33 | 34 | 表述: 35 | 搜索从剩余下载量最多的下载块中切片生成新的下载块。 36 | 37 | 1. 下载块C1 (总下载块=C1) 38 | +--------------------------------------------------------------------------------------------------------------------+ 39 | | C1 [0:100] | 40 | | >>>>>>>--------------------------------------------------------------------------------------------- | 41 | +--------------------------------------------------------------------------------------------------------------------+ 42 | 43 | 2. 从下载块B1中切片分出下载块C2 (总下载块=C1+C2) 44 | +--------------------------------------------------------------+-----------------------------------------------------+ 45 | | C1 [0:54] | C2 [54:100] | 46 | | >>>>>>>>>>>>>>---------------------------------------- | >>>------------------------------------------- | 47 | +--------------------------------------------------------------+-----------------------------------------------------+ 48 | 49 | 3. 从下载块C2中切片分出下载块B3 (总下载块=C1+C2+C3) 50 | +----------------------------------------------------------+-----------------------------+---------------------------+ 51 | | C1 [0:54] | C2 [54:79] | C3 [79:100] | 52 | | >>>>>>>>>>>>>>>>>>>>>--------------------------------- | >>>>>>>>----------------- | >>>>----------------- | 53 | +----------------------------------------------------------+-----------------------------+---------------------------+ 54 | 55 | 4. 从下载块C1中切片分出下载块C4 (总下载块=C1+C2+C3+C4) 56 | +--------------------------------------+----------------------+----------------------------+-------------------------+ 57 | | C1 [0:36] | C4 [36:54] | C2 [54:79] | C3 [79:100] | 58 | | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>----- | >----------------- | >>>>>>>>>>>>>>>>>-------- | >>>>>>>>>>>>>>>>----- | 59 | +--------------------------------------+----------------------+-------------------------—--+-------------------------+ 60 | 61 | 5. 从下载块C4中切片分出下载块B5 (总下载块=C1+C2+C3+C4+C5) 62 | +--------------------------------------+------------+------------+---------------------------+-----------------------+ 63 | | C1 [0:36] | C4 [36:46] | C5 [46:54] | C2 [54:79] | C3 [79:100] | 64 | | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>-- | >>>>>----- | >>------ | >>>>>>>>>>>>>>>>>>>>>>--- | >>>>>>>>>>>>>>>>>---- | 65 | +--------------------------------------+------------+------------+---------------------------+-----------------------+ 66 | 67 | ... 68 | 69 | 70 | """ 71 | from .utils import UsageInfo 72 | from math import ceil 73 | from threading import RLock 74 | from time import time 75 | from .progress import Progress 76 | import bisect 77 | 78 | 79 | class Chunk: 80 | __slots__ = 'uri_id', 'begin', 'end' 81 | 82 | def __init__(self, uri_id, begin, end): 83 | """ 84 | :param 85 | uri_id : 源URI的ID 86 | begin : 单元的开始索引 87 | end : 单元的结束索引 88 | """ 89 | self.uri_id = uri_id 90 | self.begin = begin 91 | self.end = end 92 | 93 | @property 94 | def length(self): 95 | return self.end - self.begin 96 | 97 | def __repr__(self): 98 | return f'' 99 | 100 | def __iter__(self): 101 | return iter([self.uri_id, self.begin, self.end]) 102 | 103 | 104 | class Block: 105 | """ 下载块是对某一刻下载进度的快照,是一个状态量。不具备实时性。 106 | 既然不作为实时数据,那么不需要对其进行强制与实时数据对应。 107 | 那么对于未确定大小的进度对象Progress,即大小为inf时,不必关注其大小,关注的是其下载的量的块记录。 108 | """ 109 | __slots__ = 'client', 'progress', '_chunk_size', '_range', '_chunks', '_lock' 110 | 111 | def __init__(self, progress, chunk_size, init_chunks=None): 112 | self.client = None 113 | self.progress = progress 114 | self._chunk_size = chunk_size 115 | 116 | begin = progress.begin // chunk_size 117 | 118 | end = 1 119 | if progress.end not in (float('inf'), None): 120 | end = int(ceil(progress.end / chunk_size)) 121 | 122 | self._range = [begin, end] 123 | self._chunks = [] 124 | 125 | if init_chunks is not None: 126 | self._chunks = [Chunk(*chunk) for chunk in init_chunks] 127 | 128 | self._lock = RLock() 129 | self.refresh() 130 | 131 | @property 132 | def begin(self): 133 | return self._range[0] 134 | 135 | @property 136 | def end(self): 137 | return self._range[1] 138 | 139 | @property 140 | def length(self): 141 | try: 142 | return self._range[1] - self._range[0] 143 | except TypeError: 144 | return float('inf') 145 | 146 | @property 147 | def chunks(self): 148 | self.refresh() 149 | return self._chunks 150 | 151 | def current_uri(self): 152 | return self.client and self.client.source_uri 153 | 154 | def slice(self, request_range): 155 | resp_range = self.progress.slice(request_range) 156 | self.refresh() 157 | return resp_range 158 | 159 | def unused_length(self): 160 | """ 返回下载块中未处理的块chunk长度。""" 161 | if self.progress.end in (None, float('inf')): 162 | return float('inf') if not self.progress.is_walk_finished() else 0 163 | self.refresh() 164 | 165 | return (not self._chunks and self.length) or self.length - self._chunks[-1].end 166 | 167 | def __getitem__(self, index): 168 | assert type(index) is int 169 | for c in self.chunks: 170 | if c.begin <= index < c.end: 171 | return c 172 | 173 | assert False 174 | 175 | def refresh(self): 176 | """ 刷新下载块进度信息。 177 | 178 | 由于下载块只是作为快照的形式监控,并不直接由下载客户端接管,所以得到的信息并不是实时的, 179 | 这就需要在获取信息的时候进行刷新信息。 180 | 对外的接口都已进行了刷新数据来保证数据的实时性。 181 | """ 182 | with self._lock: 183 | progress = self.progress 184 | 185 | block_begin = self.begin 186 | 187 | cur_uri = self.current_uri() 188 | 189 | cur_walk = progress.walk_length / self._chunk_size 190 | if not progress.walk_left: 191 | cur_walk = int(ceil(cur_walk)) 192 | else: 193 | cur_walk = int(cur_walk) 194 | 195 | cur_done = progress.done_length / self._chunk_size 196 | if not progress.done_left: 197 | cur_done = int(ceil(cur_done)) 198 | else: 199 | cur_done = int(cur_done) 200 | 201 | # 更新块范围 202 | block_end = progress.end 203 | if block_end is None: 204 | block_end = cur_walk + block_begin 205 | block_end = int(ceil(block_end / self._chunk_size)) 206 | 207 | self._range[1] = block_end 208 | 209 | last_chunk = (self._chunks and self._chunks[-1]) or None 210 | if last_chunk is None: 211 | if cur_uri is not None: 212 | self._chunks.append(Chunk(cur_uri.id, 0, cur_walk)) 213 | else: 214 | pass 215 | else: 216 | if cur_uri is not None: 217 | if last_chunk.uri_id != cur_uri.id: 218 | self._chunks.append(Chunk(cur_uri.id, cur_walk, cur_walk)) 219 | else: 220 | last_chunk.end = cur_walk 221 | 222 | def half_unused(self): 223 | unused_len = self.unused_length() 224 | put_begin = self.begin + (self.length - unused_len) + int(ceil(unused_len / 2)) 225 | put_end = self.end 226 | if put_begin == put_end: 227 | return None 228 | return put_begin * self._chunk_size, put_end * self._chunk_size 229 | 230 | def request(self, client): 231 | self.client = client 232 | return self 233 | 234 | async def __aenter__(self): 235 | from nbdler.handler import block_context 236 | 237 | assert self.client 238 | block_context.set(self) 239 | return await self.client.__aenter__() 240 | 241 | async def __aexit__(self, exc_type, exc_val, exc_tb): 242 | from nbdler.handler import block_context 243 | 244 | self.refresh() 245 | client = self.client 246 | await client.__aexit__(exc_type, exc_val, exc_tb) 247 | self.client = None 248 | block_context.set(None) 249 | 250 | def dumps(self): 251 | return { 252 | 'progress': list(self.progress), 253 | 'range': [self.begin, self.end], 254 | 'chunks': [list(c) for c in self._chunks] 255 | } 256 | 257 | def __repr__(self): 258 | return f'' 259 | 260 | def __iter__(self): 261 | return iter([list(self.progress), self._chunk_size, [list(block) for block in self._chunks]]) 262 | 263 | def __lt__(self, other): 264 | return self.begin < other.begin 265 | 266 | 267 | class BlockGroup: 268 | """ 下载块管理器。 """ 269 | def __init__(self, chunk_size, total_size, duration=0): 270 | self._blocks = [] 271 | self.chunk_size = chunk_size 272 | try: 273 | self.total_chunk = int(ceil(total_size / chunk_size)) 274 | except (TypeError, OverflowError): 275 | self.total_chunk = 1 276 | 277 | if total_size is None: 278 | total_size = float('inf') 279 | self.total_size = total_size 280 | 281 | self.usage_info = UsageInfo(self.walk_length) 282 | 283 | self._start_time = None 284 | self._duration = duration 285 | 286 | def transfer_rate(self): 287 | """ 实时数据传输速率。 """ 288 | return self.usage_info.rate 289 | 290 | def average_speed(self): 291 | """ 平均数据传输速率。 """ 292 | total_time = self._duration + time() - (self._start_time or time()) 293 | return self.walk_length() / (total_time or float('inf')) 294 | 295 | def walk_length(self): 296 | """ 已下载字节数。 """ 297 | return sum((v.progress.walk_length for v in self._blocks)) 298 | 299 | def done_length(self): 300 | """ 已缓冲的字节数。""" 301 | return sum((v.progress.done_length for v in self._blocks)) 302 | 303 | def remaining_length(self): 304 | """ 还剩余字节数。 """ 305 | return self.total_size - self.walk_length() 306 | 307 | def remaining_time(self): 308 | """ 估计剩余时间。 """ 309 | realtime_rate = self.transfer_rate() 310 | if not realtime_rate: 311 | return float('inf') 312 | return self.remaining_length() / realtime_rate 313 | 314 | def percent_complete(self): 315 | return self.walk_length() * 100 / self.total_size 316 | 317 | def is_walk_finished(self): 318 | for b in self._blocks: 319 | if not b.progress.is_walk_finished(): 320 | return False 321 | return not self.integrity_check() 322 | 323 | def is_done_finished(self): 324 | for b in self._blocks: 325 | if not b.progress.is_done_finished(): 326 | return False 327 | return not self.integrity_check() 328 | 329 | is_finished = is_done_finished 330 | 331 | def insert(self, put_range): 332 | """ 插入下载块。 333 | Args: 334 | put_range: 插入的快进度范围range 335 | """ 336 | block = Block(Progress(put_range), self.chunk_size) 337 | bisect.insort(self._blocks, block) 338 | 339 | return block 340 | 341 | def unfinished_blocks(self): 342 | return [b for b in self._blocks if not b.progress.is_walk_finished()] 343 | 344 | def activate(self): 345 | """ 激活下载块映射图。 """ 346 | self._start_time = time() 347 | 348 | def deactivate(self): 349 | """ 关闭下载块映射图。 """ 350 | self._duration += time() - (self._start_time or time()) 351 | self._start_time = None 352 | self.usage_info.reset() 353 | if self.is_walk_finished(): 354 | if self.total_size in (None, float('inf')): 355 | self.total_size = self.walk_length() 356 | 357 | def integrity_check(self): 358 | """ 下载块映射图完整性检测。 359 | 如果下载块缺失返回缺失的块,否则返回[]。 360 | """ 361 | if not self._blocks: 362 | return [(0, self.total_chunk)] 363 | missing = [] 364 | prv_end = self._blocks[0].end 365 | prv_b = None 366 | for v in self._blocks[1:]: 367 | v.refresh() 368 | if v.begin - prv_end > 0: 369 | # 如果下一个下载块的起点索引比上一个下载块的结束索引要大,说明了这其中缺少了一块。 370 | missing.append((prv_end, v.begin)) 371 | elif v.begin - prv_end < 0: 372 | # 如果下一个下载块的起点索引比上一个下载块的结束索引要小,说明这出现了下载块范围交叉。 373 | raise ValueError(f'完整性校验不通过。冲突:{prv_b} <-> {v}') 374 | 375 | prv_end = v.end 376 | prv_b = v 377 | 378 | return missing 379 | 380 | def dumps(self): 381 | return { 382 | 'chunk_size': self.chunk_size, 383 | 'total_size': self.total_size, 384 | 'duration': self._duration, 385 | 'blocks': [b.dumps() for b in self._blocks], 386 | } 387 | 388 | @classmethod 389 | def loads(cls, dumpy): 390 | chunk_size = dumpy['chunk_size'] 391 | block_grp = cls(chunk_size, dumpy['total_size'], dumpy['duration']) 392 | for block in dumpy['blocks']: 393 | progress = Progress(*block['progress']) 394 | block = Block(progress, chunk_size, block['chunks']) 395 | bisect.insort(block_grp._blocks, block) 396 | return block_grp 397 | 398 | def __iter__(self): 399 | """ 迭代返回下载块对象。""" 400 | return iter([self.chunk_size, self.total_size, [list(block) for block in self._blocks]]) 401 | 402 | def __repr__(self): 403 | return f'' 405 | -------------------------------------------------------------------------------- /nbdler/client/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from . import aiohttp, requests 4 | from .abstract import AbstractClient 5 | from collections import defaultdict 6 | 7 | __all__ = ['get_policy', 'ClientPolicy'] 8 | 9 | _solutions = defaultdict(list) 10 | _name_solution = {} 11 | 12 | 13 | class ClientPolicy: 14 | def __init__(self, **specified_mapping): 15 | self._specified = {k.lower(): v.lower() for k, v in specified_mapping.items()} 16 | 17 | def get_solution(self, protocol): 18 | """ 返回根据策略决定的客户端处理模块。 19 | Args: 20 | protocol: 要处理的协议 21 | 22 | Returns: 23 | 返回客户端处理方案 24 | """ 25 | sol_name = self._specified.get(protocol, None) 26 | if sol_name is None: 27 | # 使用该协议最新注册的客户端处理器作为默认的处理策略 28 | sol_name = _solutions.get(protocol, [None])[-1] 29 | if sol_name is None: 30 | raise NotImplementedError(f'没有找到协议{protocol}的处理策略。') 31 | solution = _name_solution.get(sol_name, None) 32 | if solution is None: 33 | raise NotImplementedError(f'没有找到名称为{sol_name}的客户端处理器。') 34 | return solution 35 | 36 | def __iter__(self): 37 | return iter(self._specified.items()) 38 | 39 | 40 | class ProtocolSolution: 41 | def __init__(self, module): 42 | self._module = module 43 | 44 | @property 45 | def name(self): 46 | return self._module.NAME 47 | 48 | @property 49 | def supported_protocols(self): 50 | return self._module.PROTOCOL_SUPPORT 51 | 52 | def is_async(self): 53 | return self._module.ASYNC_EXECUTE 54 | 55 | @property 56 | def dlopen(self): 57 | return self._module.ClientHandler.dlopen 58 | 59 | def get_client(self, *args, **kwargs): 60 | return self._module.ClientHandler(*args, **kwargs) 61 | 62 | def get_session(self, *args, **kwargs): 63 | return self._module.ClientSession(*args, **kwargs) 64 | 65 | 66 | def get_policy(**kwargs): 67 | return ClientPolicy(**kwargs) 68 | 69 | 70 | def register(module): 71 | """ 注册下载客户端处理模块。 72 | 73 | 客户端模块规范: 74 | 1. 客户端处理程序要求继承abstract_base.py中的AbstractClient类 75 | 2. 使用类变量NAME作为客户端的唯一标识名称,尽量避免与其他客户端重名, 76 | 重名的处理策略是后注册覆盖前注册。 77 | 3. 使用ClientHandler作为客户端的类名,或通过赋值该模块变量名实现 78 | 4. 使用ClientSession作为客户端会话,必须存在该变量,若不需要会话则赋值noop函数, 79 | 客户端会话创建不提供参数,若需要提供使用functions.partial传递定义 80 | 81 | Args: 82 | module: 协议处理解决方案 83 | 84 | """ 85 | global _solutions, _name_solution 86 | solution = ProtocolSolution(module) 87 | for protocol in solution.supported_protocols: 88 | _solutions[protocol].append(solution.name) 89 | _name_solution[solution.name] = solution 90 | 91 | 92 | def main(): 93 | # 多线程HTTP/HTTPS,使用requests库 94 | register(requests) 95 | # 异步HTTP/HTTPS,使用aiohttp库 96 | register(aiohttp) 97 | 98 | 99 | # 注册下载客户端 100 | main() 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /nbdler/client/abstract.py: -------------------------------------------------------------------------------- 1 | 2 | import asyncio 3 | 4 | 5 | class AbstractClient: 6 | """ 抽象客户端 7 | 8 | Class Variable: 9 | NAME: 客户端名称,作为客户端的唯一标识 10 | PROTOCOL_SUPPORT: 客户端支持处理的协议, protocol/scheme 11 | ASYNC_EXECUTE: 指定客户端的是否为异步实现 12 | TIMEOUT: 客户端默认的连接connect,读取read超时参数 13 | """ 14 | 15 | TIMEOUT = 10 16 | 17 | def __init__(self, session, 18 | source_uri, 19 | progress, 20 | resume_capability, 21 | **kwargs): 22 | """ 23 | Args: 24 | session: 客户端会话 25 | source_uri: 下载源SourceUri对象 26 | progress: 请求进度对象Progress 27 | resume_capability: 是否支持断点续传,若为None则代表不确定,连接后将根据实际情况赋值 28 | """ 29 | self.source_uri = source_uri 30 | self.progress = progress 31 | self.resume_capability = resume_capability 32 | self.kwargs = kwargs 33 | 34 | self._closed = False 35 | self.session = session 36 | self.resp = None 37 | 38 | async def connect(self): 39 | """ (可定义非异步方法)客户端连接 40 | 41 | Returns: 42 | UriResponse对象,该对象指定了资源的基本信息。 43 | """ 44 | raise NotImplementedError 45 | 46 | async def fetch(self): 47 | """ (可定义非异步方法)客户端循环获取数据 """ 48 | raise NotImplementedError 49 | 50 | async def pause(self): 51 | """ 客户端暂停 """ 52 | self._closed = True 53 | raise NotImplementedError 54 | 55 | async def close(self): 56 | raise NotImplementedError 57 | 58 | async def run(self): 59 | raise NotImplementedError 60 | 61 | async def __aenter__(self): 62 | """ 异步with enter. 63 | 64 | 进入客户端,准备开始客户端。 65 | 该方法不应该执行非异步的长耗时任务。 66 | 67 | Returns: 68 | 返回自身对象self 69 | """ 70 | self._closed = False 71 | return self 72 | 73 | async def __aexit__(self, exc_type, exc_val, exc_tb): 74 | """ 异步with exit. 75 | 76 | 退出客户端,做必要的链接关闭操作,设置实例变量_closed=True。 77 | """ 78 | coro_or_result = self.close() 79 | # 兼容异步关闭链接方法 80 | if asyncio.iscoroutine(coro_or_result): 81 | await coro_or_result 82 | self.session = None 83 | self._closed = True 84 | 85 | def __enter__(self): 86 | """ 同步with enter. 87 | 88 | 进入客户端,准备开始客户端。 89 | 该方法不应该执行非异步的长耗时任务。 90 | 91 | Returns: 92 | 返回自身对象self 93 | """ 94 | self._closed = False 95 | return self 96 | 97 | def __exit__(self, exc_type, exc_val, exc_tb): 98 | self.close() 99 | self.session = None 100 | self._closed = True 101 | 102 | @classmethod 103 | def dlopen(cls, source, progress, **kwargs): 104 | raise NotImplementedError 105 | 106 | def __repr__(self): 107 | status = 'running' 108 | if self._closed: 109 | status = 'closed' 110 | name = self.run.__globals__['NAME'] 111 | support = self.run.__globals__['PROTOCOL_SUPPORT'] 112 | is_async = self.run.__globals__['ASYNC_EXECUTE'] 113 | return f'' 116 | 117 | 118 | def noop(): 119 | """ ignore function. """ 120 | return None 121 | 122 | 123 | NAME = 'abstract' 124 | PROTOCOL_SUPPORT = ('http', 'https') 125 | ASYNC_EXECUTE = True 126 | 127 | ClientSession = noop 128 | ClientHandler = AbstractClient 129 | -------------------------------------------------------------------------------- /nbdler/client/aiohttp.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import aiohttp 4 | import asyncio 5 | from urllib.parse import urlunparse, urlparse 6 | from nbdler.uri import URIResponse 7 | from .base_http import BaseHTTPClient, content_range_fullsize, content_type_mimetype 8 | from traceback import format_exc 9 | from nbdler.handler import h 10 | import logging 11 | import nbdler 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | class AIOHTTPClient(BaseHTTPClient): 17 | TIMEOUT = 10 18 | 19 | async def connect(self): 20 | session = self.session 21 | source_uri = self.source_uri 22 | proxies = source_uri.proxies or {} 23 | proxy = None 24 | if not proxies.get(source_uri.scheme): 25 | if source_uri.kwargs.get('trust_env', False): 26 | for scheme, proxy_info in aiohttp.helpers.proxies_from_env().items(): 27 | if scheme == source_uri.scheme: 28 | proxy = str(proxy_info.proxy) 29 | proxy_auth = proxy_info.proxy_auth 30 | if proxy_auth is not None: 31 | # 将代理验证添加入代理链接 32 | username = proxy_auth.login 33 | password = proxy_auth.password 34 | proxy_parse = urlparse(str(proxies)) 35 | scheme, netloc, path, params, query, fragment = list(proxy_parse) 36 | netloc = f'{username}:{password}@{netloc}' 37 | proxy = urlunparse([scheme, netloc, path, params, query, fragment]) 38 | break 39 | 40 | cookies = source_uri.cookies 41 | uri, headers = self._build_uri_headers() 42 | 43 | timeout = self.kwargs.get('timeout', None) or AIOHTTPClient.TIMEOUT 44 | timeout = aiohttp.ClientTimeout(sock_connect=timeout, sock_read=timeout) 45 | 46 | try: 47 | resp = await session.get( 48 | uri, 49 | headers=headers, 50 | cookies=cookies, 51 | proxy=proxy, 52 | timeout=timeout, 53 | ) 54 | except (aiohttp.ClientError, asyncio.TimeoutError) as error: 55 | raise nbdler.error.TimeoutError(f"{uri}") from error 56 | except BaseException as error: 57 | log.debug(f'{error}', format_exc()) 58 | raise nbdler.error.FatalError() from error 59 | else: 60 | total_length = content_range_fullsize(resp.headers.get('content-range')) 61 | response = URIResponse(str(resp.url), list(resp.headers.items()), resp.status, resp.reason, 62 | total_length, content_type_mimetype(resp.headers.get('content-type')), 63 | self.progress.range, resp.status == 206) 64 | 65 | if self.resume_capability is None: 66 | if resp.status not in (206, 200): 67 | raise nbdler.error.FatalError(f"[{resp.status} {resp.reason}] '{resp.url}'") 68 | self.resume_capability = resp.status == 206 69 | 70 | elif self.resume_capability is True: 71 | if not resp.status == 206: 72 | raise nbdler.error.FatalError(f"[{resp.status} {resp.reason}] '{resp.url}'") 73 | 74 | self.session = session 75 | self.resp = resp 76 | return response 77 | 78 | async def fetch(self): 79 | session, resp = self.session, self.resp 80 | pg = self.progress 81 | 82 | speed_adjuster = h.speed_adjuster 83 | slicer = h.slicer 84 | uri_mgr = h.uri_mgr 85 | file_data = h.file_data 86 | 87 | pg.start() 88 | 89 | uri_mgr.success(resp) 90 | 91 | receive_data = resp.content.read 92 | data = b'' 93 | while True: 94 | if self._closed: 95 | break 96 | 97 | await speed_adjuster.acquire() 98 | await slicer.response() 99 | 100 | previous_len = len(data) 101 | remain_len = pg.total_length - pg.walk_length 102 | try: 103 | if remain_len >= 8192: 104 | data += await receive_data(8192) 105 | elif remain_len > 0: 106 | data += await receive_data(remain_len) 107 | else: 108 | break 109 | except asyncio.TimeoutError as err: 110 | uri_mgr.timeout(err) 111 | break 112 | except BaseException as err: 113 | uri_mgr.fatal(err) 114 | break 115 | 116 | walk_len = len(data) - previous_len 117 | if not walk_len: 118 | if resp.content_length is None: 119 | pg.set_walk_finish() 120 | break 121 | 122 | pg.walk(walk_len) 123 | 124 | if pg.walk_length >= pg.total_length: 125 | break 126 | elif len(data) >= 65536: # 64 KB 127 | await file_data.store(data) 128 | data = b'' 129 | if data: 130 | await file_data.store(data) 131 | 132 | pg.stop() 133 | 134 | async def run(self): 135 | if self.resp: 136 | self.close() 137 | 138 | await h.slicer.response() 139 | try: 140 | resp = await self.connect() 141 | except nbdler.error.UriError as err: 142 | h.uri_mgr.fatal(err) 143 | raise 144 | else: 145 | h.uri_mgr.success(resp) 146 | 147 | # self.validate_token(resp) 148 | if not self._closed: 149 | await self.fetch() 150 | 151 | def close(self): 152 | session = self.session 153 | resp = self.resp 154 | self.session = None 155 | self.resp = None 156 | if resp: 157 | resp.release() 158 | resp.close() 159 | 160 | @classmethod 161 | async def dlopen(cls, source, progress, **kwargs): 162 | async with ClientSession() as session: 163 | async with cls(session, source, progress, None, **kwargs) as cli: 164 | resp = await cli.connect() 165 | size = resp.length 166 | progress._range = (0, size) 167 | 168 | return resp 169 | 170 | 171 | NAME = 'aiohttp' 172 | PROTOCOL_SUPPORT = ('http', 'https') 173 | ASYNC_EXECUTE = True 174 | 175 | ClientHandler = AIOHTTPClient 176 | 177 | 178 | class ClientSession(aiohttp.ClientSession): 179 | async def close(self) -> None: 180 | await super().close() 181 | 182 | # doc: https://docs.aiohttp.org/en/latest/client_advanced.html#graceful-shutdown 183 | # 会话关闭强制等待避免异常 184 | await asyncio.sleep(0.25) 185 | 186 | -------------------------------------------------------------------------------- /nbdler/client/base_http.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from wsgiref.headers import Headers 4 | from urllib.parse import urlunparse 5 | from .abstract import AbstractClient 6 | from ..utils import update_range_field 7 | import logging 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | class BaseHTTPClient(AbstractClient): 13 | NAME = 'base_http' 14 | PROTOCOL_SUPPORT = ('http', 'https') 15 | ASYNC_EXECUTE = None 16 | TIMEOUT = 10 17 | 18 | def _build_uri_headers(self): 19 | source_uri = self.source_uri 20 | pg = self.progress 21 | uri = source_uri.uri 22 | headers = Headers(source_uri.headers.items()) 23 | 24 | if self.resume_capability is not False: 25 | range_field = source_uri.range_field 26 | if range_field is None: 27 | range_field = { 28 | 'Range': 'bytes={begin}-{end_with}' 29 | } 30 | 31 | scheme, netloc, path, params, query, fragment = list(source_uri.urlparse) 32 | req_range = (pg.begin + pg.walk_length, pg.end) 33 | query = (query + ''.join( 34 | [f'{k}={update_range_field(v, req_range)}' 35 | for k, v in range_field.items() if k.startswith('&')] 36 | )).lstrip('&') 37 | 38 | for k, v in range_field.items(): 39 | if not k.startswith('&'): 40 | headers.add_header(k, update_range_field(v, req_range)) 41 | 42 | # 由于一些浏览器地址栏会直接把空格显示出来而不进行编码,所以这里单独对空格编码。 43 | uri = urlunparse((scheme, netloc, path, params, query, fragment)).replace(' ', '%20') 44 | 45 | return uri, headers 46 | 47 | def close(self): 48 | session = self.session 49 | resp = self.resp 50 | self.session = None 51 | self.resp = None 52 | if resp: 53 | resp.close() 54 | 55 | async def pause(self): 56 | self._closed = True 57 | 58 | def validate_token(self, current_resp): 59 | resp = self.resp 60 | if resp is None: 61 | raise ValueError('cannot validate on a unconnected client.') 62 | 63 | source_resp = self.source_uri.getresponse() 64 | 65 | # TODO: 在多下载源的情况下对下载源之间经过资源数据采样校验,通过后作为响应基准 66 | if source_resp is None: 67 | raise ValueError('下载源没有经过校验的资源响应基准。') 68 | 69 | validate_name = ['length', 'etag', 'content_md5', 'content_type', 'last_modified'] 70 | 71 | if not all([getattr(current_resp, name) == getattr(source_resp, name) 72 | for name in validate_name]): 73 | log.warning([f'{name}: ({getattr(current_resp, name)}) ?= ({getattr(source_resp, name)})' 74 | for name in validate_name]) 75 | raise ValueError('connection resource token not match.') 76 | return True 77 | 78 | 79 | def content_range_fullsize(content_range): 80 | """ 从HTTP响应头中的Content-Range中获取文件总长。""" 81 | if content_range is None: 82 | return None 83 | return int(content_range.rsplit('/', 1)[-1]) 84 | 85 | 86 | def content_type_mimetype(content_type_header): 87 | """ 从HTTP响应头中的Content-Type中获取文件mimetype类型。""" 88 | if content_type_header is None: 89 | return None 90 | return content_type_header.split(';', 1)[0] or None 91 | -------------------------------------------------------------------------------- /nbdler/client/requests.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import requests 4 | from nbdler.uri import URIResponse 5 | from traceback import format_exc 6 | from .base_http import BaseHTTPClient, content_range_fullsize, content_type_mimetype 7 | from nbdler.handler import h 8 | import logging 9 | import nbdler 10 | from requests.utils import get_environ_proxies 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class HTTPClient(BaseHTTPClient): 16 | TIMEOUT = 10 17 | 18 | def connect(self): 19 | session = self.session 20 | source_uri = self.source_uri 21 | proxies = source_uri.proxies or {} 22 | if not proxies.get(source_uri.scheme): 23 | if source_uri.kwargs.get('trust_env', False): 24 | # Set environment's proxies. 25 | no_proxy = proxies.get('no_proxy') if proxies is not None else None 26 | env_proxies = get_environ_proxies(source_uri.uri, no_proxy=no_proxy) 27 | for (k, v) in env_proxies.items(): 28 | proxies.setdefault(k, v) 29 | 30 | cookies = source_uri.cookies 31 | verify = source_uri.kwargs.get('verify', True) 32 | uri, headers = self._build_uri_headers() 33 | timeout = self.kwargs.get('timeout', None) or HTTPClient.TIMEOUT 34 | try: 35 | resp = requests.get( 36 | source_uri.uri, 37 | headers=headers, 38 | proxies=proxies, 39 | cookies=cookies, 40 | timeout=timeout, 41 | stream=True, 42 | verify=verify 43 | ) 44 | except requests.exceptions.Timeout as error: 45 | raise nbdler.error.TimeoutError(f"{uri}") from error 46 | except BaseException as error: 47 | log.debug(f'{error}', format_exc()) 48 | raise nbdler.error.FatalError() from error 49 | else: 50 | total_length = content_range_fullsize(resp.headers.get('content-range')) 51 | response = URIResponse(str(resp.url), list(resp.headers.items()), resp.status_code, resp.reason, 52 | total_length, content_type_mimetype(resp.headers.get('content-type')), 53 | self.progress.range, resp.status_code == 206) 54 | 55 | if self.resume_capability is None: 56 | if resp.status_code not in (206, 200): 57 | raise nbdler.error.FatalError(f"[{resp.status_code} {resp.reason}] '{resp.url}'") 58 | self.resume_capability = resp.status_code == 206 59 | 60 | elif self.resume_capability is True: 61 | if not resp.status_code == 206: 62 | raise nbdler.error.FatalError(f"[{resp.status_code} {resp.reason}] '{resp.url}'") 63 | 64 | self.session = session 65 | self.resp = resp 66 | return response 67 | 68 | def fetch(self): 69 | session, resp = self.session, self.resp 70 | pg = self.progress 71 | 72 | speed_adjuster = h.speed_adjuster 73 | slicer = h.slicer 74 | uri_mgr = h.uri_mgr 75 | file_data = h.file_data 76 | receive_data = resp.raw.read 77 | 78 | pg.start() 79 | 80 | uri_mgr.success(resp) 81 | 82 | data = b'' 83 | while True: 84 | if self._closed: 85 | break 86 | 87 | speed_adjuster.acquire_threadsafe() 88 | slicer.response_threadsafe() 89 | 90 | previous_len = len(data) 91 | remain_len = pg.total_length - pg.walk_length 92 | try: 93 | if remain_len >= 8192: 94 | data += receive_data(8192) 95 | elif remain_len > 0: 96 | data += receive_data(remain_len) 97 | else: 98 | break 99 | except requests.exceptions.Timeout as err: 100 | uri_mgr.timeout(err) 101 | break 102 | except BaseException as err: 103 | uri_mgr.fatal(err) 104 | break 105 | 106 | walk_len = len(data) - previous_len 107 | if not walk_len: 108 | 109 | if resp.headers.get('content-length') is None: 110 | pg.set_walk_finish() 111 | break 112 | 113 | pg.walk(walk_len) 114 | 115 | if pg.walk_length >= pg.total_length: 116 | break 117 | elif len(data) >= 65536: # 64 KB 118 | file_data.store_threadsafe(data) 119 | data = b'' 120 | if data: 121 | file_data.store_threadsafe(data) 122 | 123 | pg.stop() 124 | 125 | def run(self): 126 | h.slicer.response_threadsafe() 127 | try: 128 | resp = self.connect() 129 | except nbdler.error.UriError as err: 130 | h.uri_mgr.fatal(err) 131 | raise 132 | else: 133 | h.uri_mgr.success(resp) 134 | 135 | self.validate_token(resp) 136 | if not self._closed: 137 | self.fetch() 138 | 139 | @classmethod 140 | async def dlopen(cls, source, progress, **kwargs): 141 | with cls(None, source, progress, None, **kwargs) as cli: 142 | resp = cli.connect() 143 | return resp 144 | 145 | 146 | def session_without_trust_env(): 147 | session = requests.Session() 148 | # 默认创建不使用环境中的代理的会话,如要使用设置下载源的trust_env参数。 149 | session.trust_env = False 150 | return session 151 | 152 | 153 | NAME = 'requests' 154 | PROTOCOL_SUPPORT = ('http', 'https') 155 | ASYNC_EXECUTE = False 156 | 157 | ClientHandler = HTTPClient 158 | ClientSession = session_without_trust_env 159 | 160 | -------------------------------------------------------------------------------- /nbdler/download.py: -------------------------------------------------------------------------------- 1 | 2 | from concurrent.futures.thread import ThreadPoolExecutor 3 | from nbdler.handler import ( 4 | SpeedAdjuster, 5 | AIOReaderWriter, 6 | BlockSlicer, 7 | FileTempData, 8 | ClientWorker, 9 | URIStatusManager, 10 | GatherException, 11 | h, Handlers) 12 | from .client import get_policy, ClientPolicy 13 | from .version import VERSION 14 | from .utils import forever_loop_in_executor 15 | from traceback import format_exc 16 | import weakref 17 | import warnings 18 | import asyncio 19 | import os 20 | 21 | __all__ = ( 22 | 'Downloader', 23 | ) 24 | 25 | 26 | class DownloadConfigure: 27 | ADJUSTABLE = frozenset( 28 | {'max_concurrent', 'max_speed', 'buffer_size', 'timeout', 'interval', 'client_policy'}) 29 | 30 | def __init__(self, resume_capability, max_concurrent, chunk_size, buffer_size, timeout=10, 31 | max_speed=None, downloading_ext='.downloading', interval=0.5, client_policy=None, **kwargs): 32 | 33 | self.version = VERSION 34 | self.resume_capability = resume_capability 35 | self.max_concurrent = max_concurrent 36 | self.chunk_size = chunk_size 37 | self.buffer_size = buffer_size 38 | self.timeout = timeout 39 | self.interval = interval 40 | self.max_speed = max_speed 41 | self.downloading_ext = downloading_ext 42 | self.client_policy = client_policy 43 | self.kwargs = kwargs 44 | 45 | def set(self, **kwargs): 46 | """ 设置配置。 47 | Args: 48 | **kwargs: 49 | max_concurrent: 最大并发数 50 | max_speed: 最大速度限制 51 | buffer_size: 最大文件缓冲大小 52 | timeout: 客户端连接接收超时时间 53 | interval: 速度调节间隙 54 | client_policy: 客户端处理策略 55 | """ 56 | attrs = set(kwargs).intersection(DownloadConfigure.ADJUSTABLE) 57 | for attr in attrs: 58 | self.__setattr__(attr, kwargs[attr]) 59 | 60 | def dumps(self): 61 | opts = dict(self.__dict__) 62 | client_policy = self.client_policy 63 | opts['client_policy'] = dict(client_policy) 64 | opts.update(opts.pop('kwargs')) 65 | return opts 66 | 67 | @classmethod 68 | def loads(cls, dumpy): 69 | config = cls(**dumpy) 70 | if not isinstance(config.client_policy, ClientPolicy): 71 | config.client_policy = get_policy(**config.client_policy) 72 | return config 73 | 74 | def __repr__(self): 75 | return (f'') 77 | 78 | 79 | class Downloader: 80 | def __init__(self, file, uris, block_grp, *, handlers=None, **kwargs): 81 | 82 | self.file = file 83 | self.uris = uris 84 | self.block_grp = block_grp 85 | self.config = DownloadConfigure.loads(kwargs) 86 | 87 | self._executor = None 88 | 89 | self._loop = None 90 | self._future = None 91 | self._closed = False 92 | self._handlers = Handlers() 93 | 94 | if handlers is None: 95 | handlers = [] 96 | 97 | buildin_handlers = [ 98 | ClientWorker, 99 | SpeedAdjuster, 100 | FileTempData, 101 | AIOReaderWriter, 102 | BlockSlicer, 103 | GatherException, 104 | URIStatusManager, 105 | ] 106 | handlers.extend(buildin_handlers) 107 | for handler in handlers: 108 | if handler.name in self._handlers: 109 | continue 110 | if isinstance(handler, type): 111 | handler = handler() 112 | 113 | handler.add_parent(weakref.proxy(self)) 114 | self._handlers[handler.name] = handler 115 | 116 | def exceptions(self, exception_type=None, *, just_new_exception=True): 117 | """ 线程安全获取异常 118 | 119 | 以生成器的形式获取内部发生的异常,当下载任务暂停或者完成后将中断生成器的迭代。 120 | 使用方式: 121 | for exception in dl.exceptions(): 122 | do_some_works(exception) 123 | 124 | Args: 125 | exception_type: 指定异常类型,可选ClientError、HandlerError。默认None则获取所有异常。 126 | just_new_exception: 是否忽略当前时间前的旧异常,仅返回之后的新异常。 127 | 128 | Yields: 129 | 内部出现的client或handler异常对象。 130 | """ 131 | yield from self._handlers.exception.acquire_threadsafe( 132 | exception_type, just_new_exception=just_new_exception) 133 | 134 | def aexceptions(self, exception_type=None, *, just_new_exception=True): 135 | """ 异步返回异常错误。 具体参见exceptions()方法。 136 | 使用方式: 137 | async for exception in dl.aexceptions(): 138 | do_some_works(exception) 139 | """ 140 | return self._handlers.exception.acquire( 141 | exception_type, just_new_exception=just_new_exception) 142 | 143 | async def astart(self): 144 | """ 在当前事件循环中运行下载器。""" 145 | if self._closed: 146 | raise RuntimeError('Downloader is already closed.') 147 | loop = asyncio.get_running_loop() 148 | self._loop = loop 149 | if self.block_grp.is_done_finished(): 150 | raise RuntimeError('download is completed.') 151 | 152 | self._future = loop.create_future() 153 | 154 | async def handler_worker(hd): 155 | try: 156 | return await hd.start() 157 | except BaseException as err: 158 | h.exception.handler_error(err) 159 | self.pause(0) 160 | 161 | with h.enter(self._handlers, loop): 162 | self.block_grp.activate() 163 | # prepare() 164 | await self._handlers.prepare() 165 | # start() 166 | result = await asyncio.gather( 167 | *[handler_worker(handler) for handler in h.iter_all()] 168 | ) 169 | # join() 170 | await self._handlers.join() 171 | self.block_grp.deactivate() 172 | 173 | self._future.set_result(result) 174 | 175 | def start(self, *, loop=None): 176 | """ 在指定的循环中运行下载器。 177 | 178 | 若loop=None不指定事件循环,那么将创建新的线程作为下载器的事件循环。 179 | 180 | Args: 181 | loop: 指定事件循环运行下载器 182 | 183 | Returns: 184 | 返回下载器运行的concurrent.future.Future对象 185 | """ 186 | 187 | if self._closed: 188 | raise RuntimeError('Downloader is already closed.') 189 | 190 | if self.block_grp.is_finished(): 191 | raise RuntimeError('download is already finished.') 192 | 193 | if self._loop is not None: 194 | loop = self._loop 195 | 196 | if loop is None: 197 | def cb(f): 198 | nonlocal executor 199 | executor.shutdown(False) 200 | 201 | executor = ThreadPoolExecutor( 202 | max_workers=1, thread_name_prefix=f'Downloader {self.file.name} {self.file.size}') 203 | exec_fut = forever_loop_in_executor(executor) 204 | exec_fut.add_done_callback(cb) 205 | self._executor = executor 206 | loop = exec_fut.get_loop() 207 | 208 | fut = asyncio.run_coroutine_threadsafe(self.astart(), loop=loop) 209 | self._loop = loop 210 | return fut 211 | 212 | async def apause(self): 213 | """ 异步暂停等待。""" 214 | if self._closed: 215 | raise RuntimeError('Downloader is already closed.') 216 | result = await self._await_loopsafe(self._handlers.pause()) 217 | await self.ajoin() 218 | return result 219 | 220 | async def aclose(self): 221 | """ 异步关闭下载器。""" 222 | if self._closed: 223 | raise RuntimeError('Downloader is already closed.') 224 | 225 | if not self._future.done(): 226 | raise RuntimeError('cannot close a running Downloader.') 227 | result = await self._await_loopsafe(self._handlers.close()) 228 | await self.ajoin() 229 | self._closed = True 230 | 231 | if self._executor: 232 | self._loop.call_soon_threadsafe(self._loop.stop) 233 | 234 | # 若文件已完毕,去除.downloading后缀 235 | if self.block_grp.is_done_finished(): 236 | file = self.file 237 | filepath = f'{file.pathname}{self.config.downloading_ext}' 238 | start_filepath = file.pathname 239 | target_filepath = start_filepath 240 | postfix = 0 241 | while True: 242 | try: 243 | os.rename(filepath, target_filepath) 244 | except FileExistsError: 245 | postfix += 1 246 | target_filepath = os.path.join(file.path, file.number_name(postfix)) 247 | else: 248 | if postfix != 0: 249 | file.name = file.number_name(postfix) 250 | break 251 | 252 | # 删除下载配置文件 253 | os.unlink(f'{start_filepath}{self.config.downloading_ext}.cfg') 254 | return result 255 | 256 | async def ajoin(self): 257 | """ 异步等待下载器结束。""" 258 | if self._closed: 259 | raise RuntimeError('Downloader is already closed.') 260 | return await self._await_loopsafe(self._future) 261 | 262 | async def _await_loopsafe(self, *coros_or_futures): 263 | """ 事件循环安全的异步等待。 264 | 265 | Args: 266 | *coros_or_futures: coroutine或future对象列表。 267 | 268 | Returns: 269 | 返回coros_or_futures的返回结果列表。 270 | """ 271 | current_loop = asyncio.get_running_loop() 272 | loop = self._loop 273 | if loop is None: 274 | loop = current_loop 275 | 276 | async def _execute_loop(): 277 | with h.enter(self._handlers): 278 | r = await asyncio.gather(*coros_or_futures) 279 | return r 280 | fut = asyncio.run_coroutine_threadsafe(_execute_loop(), loop) 281 | result = await asyncio.wrap_future(fut) 282 | 283 | return result 284 | 285 | def _call_threadsafe(self, coroutine, timeout=None): 286 | """ 下载器的异步操作线程安全化。 287 | Args: 288 | coroutine: 异步操作协程 289 | timeout: 超时等待事件 290 | 291 | Returns: 292 | 当timeout=0时,返回concurrent.future.Future对象, 293 | 否则,协程coroutine的执行结果或抛出超时异常。 294 | """ 295 | loop = self._loop 296 | assert loop 297 | future = asyncio.run_coroutine_threadsafe(coroutine, loop) 298 | if timeout == 0: 299 | return future 300 | return future.result(timeout) 301 | 302 | def pause(self, timeout=None): 303 | """ 线程安全暂停下载器。具体参见apause方法""" 304 | if self._closed: 305 | raise RuntimeError('Downloader is already closed.') 306 | return self._call_threadsafe(self.apause(), timeout=timeout) 307 | 308 | def close(self, timeout=None): 309 | """ 线程安全关闭下载器。具体参见aclose方法""" 310 | if self._closed: 311 | raise RuntimeError('Downloader is already closed.') 312 | return self._call_threadsafe(self.aclose(), timeout=timeout) 313 | 314 | def join(self, timeout=None): 315 | """ 线程安全等待下载器。具体参见ajoin方法""" 316 | if self._closed: 317 | raise RuntimeError('Downloader is already closed.') 318 | return self._call_threadsafe(self.ajoin(), timeout=timeout) 319 | 320 | def dumps(self): 321 | dumpy = { 322 | 'config': self.config.dumps(), 323 | 'file': self.file.dumps(), 324 | 'uris': self.uris.dumps(), 325 | 'block_grp': self.block_grp.dumps(), 326 | } 327 | return dumpy 328 | 329 | @classmethod 330 | def loads(cls, dumpy, handlers=None): 331 | from nbdler.uri import URIs 332 | from nbdler.file import File 333 | from nbdler.block import BlockGroup 334 | 335 | uris = URIs.loads(dumpy['uris']) 336 | file = File(**dumpy['file']) 337 | block_grp = BlockGroup.loads(dumpy['block_grp']) 338 | return cls(file, uris, block_grp, handlers=handlers, **dumpy['config']) 339 | 340 | transfer_rate = property(lambda self: self.block_grp.transfer_rate) 341 | 342 | average_speed = property(lambda self: self.block_grp.average_speed) 343 | 344 | walk_length = property(lambda self: self.block_grp.walk_length) 345 | 346 | done_length = property(lambda self: self.block_grp.done_length) 347 | 348 | remaining_length = property(lambda self: self.block_grp.remaining_length) 349 | 350 | remaining_time = property(lambda self: self.block_grp.remaining_time) 351 | 352 | percent_complete = property(lambda self: self.block_grp.percent_complete) 353 | 354 | is_walk_finished = property(lambda self: self.block_grp.is_walk_finished) 355 | 356 | is_done_finished = property(lambda self: self.block_grp.is_done_finished) 357 | 358 | def is_finished(self): 359 | """ 返回文件是否下载完毕。""" 360 | return self.block_grp.is_finished() and (not self._future or self._future.done()) 361 | 362 | def set_config(self, **kwargs): 363 | """ 配置下载器。参见DownloadConfigure.set()方法。""" 364 | self.config.set(**kwargs) 365 | 366 | def __repr__(self): 367 | running = False 368 | if self._future is not None and not self._future.done(): 369 | running = True 370 | return f'' 371 | 372 | async def __aenter__(self): 373 | return self 374 | 375 | async def __aexit__(self, exc_type, exc_val, exc_tb): 376 | return await self.aclose() 377 | 378 | def __del__(self, _warnings=warnings): 379 | if not self._closed: 380 | self.close() 381 | 382 | 383 | -------------------------------------------------------------------------------- /nbdler/error.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Error(Exception): 4 | def __init__(self, *args, **kwargs): 5 | self.args = args 6 | self.kwargs = kwargs 7 | 8 | def __repr__(self): 9 | return f'<{self.__class__.__name__} args={self.args} kwargs={self.kwargs}>' 10 | 11 | 12 | class GatherableError(Error): 13 | def __init__(self, exception, exc_info, *args, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | self.exception = exception 16 | self.exc_info = exc_info 17 | 18 | def __repr__(self): 19 | return f'<{self.__class__.__name__} exc="{self.exception}">' 20 | 21 | 22 | class ClientError(GatherableError): 23 | pass 24 | 25 | 26 | class HandlerError(GatherableError): 27 | pass 28 | 29 | 30 | class UriError(Error): 31 | pass 32 | 33 | 34 | class TimeoutError(UriError): 35 | pass 36 | 37 | 38 | class FatalError(UriError): 39 | pass 40 | 41 | 42 | class MaxRetriesExceeded(ClientError): 43 | pass 44 | -------------------------------------------------------------------------------- /nbdler/file.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class File: 5 | __slots__ = 'name', 'path', 'size' 6 | 7 | def __init__(self, path, name, size): 8 | """ 9 | :param 10 | path : 文件路径(不包括文件名) 11 | name : 文件名称 12 | size : 文件大小 13 | """ 14 | self.name = name 15 | self.path = path 16 | self.size = size 17 | 18 | @property 19 | def extension(self): 20 | return os.path.splitext(self.name)[-1] 21 | 22 | @property 23 | def pathname(self): 24 | return os.path.join(self.path, self.name) 25 | 26 | def number_name(self, number): 27 | just_name, ext = os.path.splitext(self.name) 28 | return f'{just_name}({number}){ext}' 29 | 30 | def __repr__(self): 31 | return f'' 32 | 33 | def dumps(self): 34 | return { 35 | 'path': self.path, 36 | 'name': self.name, 37 | 'size': self.size, 38 | } 39 | -------------------------------------------------------------------------------- /nbdler/handler.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections import defaultdict 3 | from contextvars import ContextVar 4 | from contextlib import asynccontextmanager, contextmanager 5 | from concurrent.futures.thread import ThreadPoolExecutor 6 | from nbdler.error import HandlerError, ClientError 7 | from functools import partial 8 | from copy import copy 9 | from operator import attrgetter 10 | import threading 11 | from nbdler.utils import UsageInfo 12 | from traceback import format_exc 13 | import logging 14 | import weakref 15 | import json 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | block_context = ContextVar('block context') 20 | 21 | 22 | def _lookup_block(): 23 | """ 查找上下文的下载块。""" 24 | return block_context.get() 25 | 26 | 27 | def await_coroutine_threadsafe(coro, timeout=None): 28 | """ 线程安全等待协程结束。 29 | Args: 30 | coro: 协程 31 | timeout: 等待超时事件 32 | 33 | Returns: 34 | 返回协程的执行结果,或抛出异常。 35 | """ 36 | fut = asyncio.run_coroutine_threadsafe(coro, h.loop) 37 | return fut.result(timeout) 38 | 39 | 40 | class Handlers(dict): 41 | def __init__(self, *args, **kwargs): 42 | super().__init__(*args, **kwargs) 43 | self._ready = None 44 | 45 | def __getattr__(self, item): 46 | return self[item] 47 | 48 | async def prepare(self): 49 | """ Handler启动预处理,通常预启动,做初始化工作。启动标志在该方法设置。""" 50 | self._ready = asyncio.Event(loop=asyncio.get_running_loop()) 51 | result = await asyncio.gather(*[handler.prepare() for handler in self.values()]) 52 | self._ready.set() 53 | return result 54 | 55 | async def start(self): 56 | """ 此方法用于启动Handler的异步工作Handler.run()方法。""" 57 | result = await asyncio.gather(*[handler.start() for handler in self.values()]) 58 | self._ready = None 59 | return result 60 | 61 | async def close(self): 62 | await self._wait_for_ready() 63 | return await asyncio.gather(*[handler.close() for handler in self.values()]) 64 | 65 | async def _wait_for_ready(self): 66 | """ 等待Handler准备就绪。""" 67 | ready = self._ready 68 | if ready is not None: 69 | await ready.wait() 70 | 71 | async def pause(self): 72 | await self._wait_for_ready() 73 | return await asyncio.gather(*[handler.pause() for handler in self.values()]) 74 | 75 | async def join(self): 76 | await self._wait_for_ready() 77 | return await asyncio.gather(*[handler.join() for handler in self.values()]) 78 | 79 | 80 | class _HandlerReference(threading.local): 81 | # handlers 异步上下文。 82 | __context__ = ContextVar('handlers dict') 83 | 84 | def __init__(self): 85 | # handlers 线程上下文事件循环 86 | self._loop = None 87 | 88 | @property 89 | def loop(self): 90 | return self._loop 91 | 92 | @property 93 | def owner(self): 94 | return self.__context__.get() 95 | 96 | @contextmanager 97 | def enter(self, handlers, loop=None): 98 | assert self._loop or loop 99 | if loop: 100 | if not isinstance(loop, weakref.ProxyType): 101 | loop = weakref.proxy(loop) 102 | self._loop = loop 103 | 104 | if not isinstance(handlers, weakref.ProxyType): 105 | handlers = weakref.proxy(handlers) 106 | 107 | token = self.__context__.set(handlers) 108 | 109 | yield self 110 | self.__context__.reset(token) 111 | 112 | def __getattr__(self, item): 113 | return self.__context__.get()[item] 114 | 115 | def __iter__(self): 116 | return iter(self.__context__.get().values()) 117 | 118 | iter_all = __iter__ 119 | 120 | 121 | h = _HandlerReference() 122 | 123 | 124 | class Handler: 125 | name = None 126 | 127 | parent = None 128 | _future = None 129 | 130 | def add_parent(self, parent): 131 | self.parent = parent 132 | 133 | async def prepare(self, *args, **kwargs): 134 | pass 135 | 136 | async def start(self): 137 | assert not self._future or self._future.done() 138 | loop = asyncio.get_running_loop() 139 | future = loop.create_future() 140 | self._future = future 141 | try: 142 | result = await self.run() 143 | finally: 144 | future.set_result(None) 145 | 146 | async def run(self, *args, **kwargs): 147 | raise NotImplementedError 148 | 149 | async def pause(self, *args, **kwargs): 150 | raise NotImplementedError 151 | 152 | async def close(self, *args, **kwargs): 153 | raise NotImplementedError 154 | 155 | def __repr__(self): 156 | return f'' 157 | 158 | async def join(self): 159 | return await self._future 160 | 161 | def info_getter(self): 162 | return None 163 | 164 | 165 | # TODO: 在多下载源的情况下对下载源之间经过资源数据采样校验,通过后作为响应基准 166 | class SampleValidate(Handler): 167 | name = 'uri_validate' 168 | 169 | 170 | class GatherException(Handler): 171 | """ 下载异常状态集合。 172 | 173 | 负责工作: 174 | 1. 收集下载过程中发生的异常 175 | 2. 提供对异常的外部推送 176 | """ 177 | name = 'exception' 178 | 179 | def __init__(self): 180 | # 实现异常获取的线程安全 181 | self._exceptions = defaultdict(list) 182 | self._cond = threading.Condition(threading.Lock()) 183 | 184 | # async 异步异常获取回调 185 | self._waiter_callbacks = set() 186 | 187 | self._stopped = False 188 | 189 | def handler_error(self, exception): 190 | """ 推送handler异常 191 | 192 | Args: 193 | exception: 发生的异常对象 194 | """ 195 | with self._cond: 196 | self._exceptions[HandlerError].append( 197 | HandlerError(exception, format_exc())) 198 | # 释放线程锁 199 | self._cond.notify_all() 200 | 201 | # 释放异步锁 202 | for waiter in self._waiter_callbacks: 203 | waiter() 204 | 205 | def client_error(self, exception): 206 | """ 推送client客户端异常 207 | 208 | Args: 209 | exception: 发生的异常对象 210 | """ 211 | with self._cond: 212 | self._exceptions[ClientError].append( 213 | ClientError(exception, format_exc())) 214 | # 释放线程锁 215 | self._cond.notify_all() 216 | # 释放异步锁 217 | for waiter in self._waiter_callbacks: 218 | waiter() 219 | 220 | def _fetch_exceptions(self, exception_type=None): 221 | if exception_type is None: 222 | exceptions = [] 223 | for v in self._exceptions.values(): 224 | exceptions.extend(v) 225 | else: 226 | exceptions = list(self._exceptions[exception_type]) 227 | 228 | return exceptions 229 | 230 | def acquire_threadsafe(self, exception_type=None, *, just_new_exception=True): 231 | """ 线程安全获取异常 232 | 233 | 以生成器的形式获取内部发生的异常,当下载任务暂停或者完成后将中断生成器的迭代。 234 | 235 | Args: 236 | exception_type: 指定异常类型,可选ClientError、HandlerError。默认None则获取所有异常。 237 | just_new_exception: 是否忽略当前时间前的旧异常,仅返回之后的新异常。 238 | 239 | Yields: 240 | 内部出现的client或handler异常对象。 241 | """ 242 | old_exc_list = [] 243 | if just_new_exception: 244 | old_exc_list = self._fetch_exceptions() 245 | while True: 246 | with self._cond: 247 | if self._stopped: 248 | break 249 | 250 | # 在上一次异常推送过程中是否有新的异常被忽略 251 | # 如果有忽略的异常就不需要等待,先处理被忽略的异常 252 | before_new_exc = self._fetch_exceptions() 253 | before_new_diff = sorted( 254 | set(before_new_exc).difference(old_exc_list), 255 | key=before_new_exc.index) 256 | 257 | if not before_new_diff: 258 | self._cond.wait() 259 | 260 | if not before_new_diff: 261 | new_exc_list = self._fetch_exceptions(exception_type) 262 | new_exc_set = set(new_exc_list).difference(old_exc_list) 263 | else: 264 | new_exc_list = before_new_exc 265 | new_exc_set = before_new_diff 266 | 267 | if not new_exc_set: 268 | continue 269 | 270 | for exc in sorted(new_exc_set, key=new_exc_list.index): 271 | yield exc 272 | old_exc_list = new_exc_list 273 | 274 | async def acquire(self, exception_type=None, *, just_new_exception=True): 275 | """ 异步获取异常 276 | 277 | acquire的异步化方法,具体作用参看acquire()方法。 278 | 279 | Args: 280 | exception_type: 指定异常类型,可选ClientError、HandlerError。默认None则获取所有异常。 281 | just_new_exception: 是否忽略当前时间前的旧异常,仅返回之后的新异常。 282 | 283 | Yields: 284 | 内部出现的client或handler异常对象。 285 | """ 286 | def release_waiter(): 287 | nonlocal cond, loop 288 | 289 | async def _release(): 290 | async with cond: 291 | cond.notify_all() 292 | asyncio.run_coroutine_threadsafe(_release(), loop=loop) 293 | 294 | loop = asyncio.get_running_loop() 295 | cond = asyncio.Condition(asyncio.Lock()) 296 | self._waiter_callbacks.add(release_waiter) 297 | 298 | old_exc_list = [] 299 | if not just_new_exception: 300 | old_exc_list = self._fetch_exceptions(exception_type) 301 | while True: 302 | with self._cond: 303 | if self._stopped: 304 | break 305 | before_new_exc = self._fetch_exceptions() 306 | before_new_diff = sorted( 307 | set(before_new_exc).difference(old_exc_list), 308 | key=before_new_exc.index) 309 | 310 | if not before_new_diff: 311 | async with cond: 312 | await cond.wait() 313 | 314 | if not before_new_diff: 315 | new_exc_list = self._fetch_exceptions(exception_type) 316 | new_exc_set = set(new_exc_list).difference(old_exc_list) 317 | else: 318 | new_exc_list = before_new_exc 319 | new_exc_set = before_new_diff 320 | 321 | if not new_exc_set: 322 | continue 323 | 324 | for exc in sorted(new_exc_set, key=new_exc_list.index): 325 | yield exc 326 | old_exc_list = new_exc_list 327 | 328 | async def run(self): 329 | self._stopped = False 330 | self._exceptions.clear() 331 | 332 | async def close(self): 333 | pass 334 | 335 | async def pause(self): 336 | self._stopped = True 337 | with self._cond: 338 | self._cond.notify_all() 339 | 340 | for waiter in self._waiter_callbacks: 341 | waiter() 342 | 343 | def __repr__(self): 344 | count = {k: len(v) for k, v in self._exceptions.items()} 345 | return f'' 346 | 347 | 348 | class URIStatus: 349 | def __init__(self, uri): 350 | self.source_uri = uri 351 | self._used = 0 352 | self._success = 0 353 | self._timeout = 0 354 | self._fatal = 0 355 | 356 | self._logs = [] 357 | self._users = {} 358 | 359 | self._conn_delay_moving_avg = [0 for _ in range(8)] 360 | self._conn_delay = float('inf') 361 | 362 | def log(self, resp): 363 | self._logs.append(resp) 364 | 365 | def _response_delay(self, time_s): 366 | moving_avg = self._conn_delay_moving_avg 367 | moving_avg.append(time_s) 368 | moving_avg.pop(0) 369 | failure_count = moving_avg.count(float('inf')) 370 | if failure_count >= 5: 371 | self._conn_delay = float('inf') 372 | else: 373 | self._conn_delay = sum([delay for delay in moving_avg 374 | if delay != float('inf')]) / (8 - failure_count) 375 | 376 | def use(self, block): 377 | self._used += 1 378 | self._users[block] = UsageInfo(lambda: block.progress.walk_length) 379 | 380 | def timeout(self, block, resp): 381 | self._timeout += 1 382 | self.log(f'{block} {resp}') 383 | 384 | def success(self, block, resp): 385 | self._success += 1 386 | self.log(f'{block} {resp}') 387 | # TODO: 在多下载源的情况下对下载源之间经过资源数据采样校验,通过后作为响应基准 388 | if self.source_uri.getresponse() is None: 389 | self.source_uri.set_response(resp) 390 | self._response_delay(self._users[block].timelength()) 391 | 392 | def fatal(self, block, resp): 393 | self._fatal += 1 394 | self.log(f'{block} {resp}') 395 | 396 | def disuse(self, block): 397 | self._used -= 1 398 | del self._users[block] 399 | 400 | def is_available(self): 401 | """ 返回当前下载源是否超过有效使用次数。 """ 402 | return self.source_uri.max_conn is None or self.source_uri.max_conn > self._used 403 | 404 | @property 405 | def users(self): 406 | return self._users 407 | 408 | def get_copy(self): 409 | """ 返回URI下载源对象的副本。""" 410 | return copy(self.source_uri) 411 | 412 | def transfer_rate(self): 413 | """ 返回下载源的传输速率。""" 414 | return sum([user.rate for user in self._users.values()]) 415 | 416 | def average_speed(self): 417 | """ 返回当下载源的平均连接的传输速率。""" 418 | users = [user.rate for user in self._users.values()] 419 | return sum(users) / len(users) 420 | 421 | def refresh(self): 422 | """ 刷新当前下载源的状态使用信息。""" 423 | for user in self._users.values(): 424 | user.refresh() 425 | 426 | def __repr__(self): 427 | return (f'') 429 | 430 | def info(self): 431 | return { 432 | 'transfer_rate': self.transfer_rate(), 433 | 'used': self._used, 434 | 'success': self._success, 435 | 'timeout': self._timeout, 436 | 'fatal': self._fatal, 437 | 'connection_delay': self._conn_delay 438 | } 439 | 440 | 441 | class URIStatusManager(Handler): 442 | """ URI状态管理器使用URIStatus对象管理URI下载源。 443 | 444 | 负责工作: 445 | 1. 管理和调配URI下载源 446 | 2. 监控URI下载源工作状态 447 | """ 448 | 449 | name = 'uri_mgr' 450 | 451 | def __init__(self): 452 | self._uri_status = {} 453 | self._cond = None 454 | self._stopped = False 455 | 456 | async def prepare(self): 457 | self._cond = asyncio.Condition() 458 | for uri in self.parent.uris: 459 | self._uri_status.setdefault(uri.id, URIStatus(uri)) 460 | 461 | async def get_uri(self): 462 | """ 返回URI状态对象供客户端使用。 463 | 464 | 以下载源的使用次数为主,尽可能的覆盖所有的下载源,之后根据单连接的下载源传输速度快慢 465 | 分配下载源。 466 | 467 | Returns: 468 | 被分配的URIStatus对象。 469 | """ 470 | avl_uris = self._find_avl_uris() 471 | while not avl_uris: 472 | await self._cond.wait() 473 | avl_uris = self._find_avl_uris() 474 | 475 | uri = avl_uris[0] 476 | if uri._used > 0: 477 | uri = sorted(avl_uris, key=lambda u: u.average_speed(), reverse=True)[0] 478 | return uri 479 | 480 | def _find_avl_uris(self): 481 | more_used = sorted(self._uri_status.values(), key=attrgetter('_used')) 482 | return list(filter(lambda u: u.is_available(), more_used)) 483 | 484 | def success(self, resp): 485 | block = _lookup_block() 486 | self._uri_status[block.current_uri().id].success(block, resp) 487 | 488 | def timeout(self, resp): 489 | block = _lookup_block() 490 | self._uri_status[block.current_uri().id].timeout(block, resp) 491 | 492 | def fatal(self, resp): 493 | block = _lookup_block() 494 | self._uri_status[block.current_uri().id].fatal(block, resp) 495 | 496 | async def run(self): 497 | self._stopped = False 498 | async_sleep = asyncio.sleep 499 | 500 | uri_status = self._uri_status 501 | while True: 502 | await async_sleep(1) 503 | if self._stopped: 504 | break 505 | 506 | for status in uri_status.values(): 507 | status.refresh() 508 | 509 | self._cond = None 510 | 511 | async def pause(self): 512 | self._stopped = True 513 | 514 | async def close(self): 515 | pass 516 | 517 | def __repr__(self): 518 | return f'' 519 | 520 | def info_getter(self): 521 | return {k: v.info() for k, v in self._uri_status.items()} 522 | 523 | 524 | class ClientWorker(Handler): 525 | """ (主处理器)异步客户端调配工作器。 526 | 527 | 负责工作: 528 | 1. 客户端会话管理 529 | 2. 下载块工作调配 530 | 3. 工作进度检测 531 | """ 532 | name = 'client_worker' 533 | 534 | def __init__(self): 535 | self._block_queue = None 536 | self._working_blocks = set() 537 | self._client_session = {} 538 | self._stopped = False 539 | self._executors = None 540 | self._tasks = set() 541 | 542 | async def prepare(self): 543 | self._stopped = False 544 | 545 | self._block_queue = asyncio.Queue() 546 | self._executors = ThreadPoolExecutor( 547 | max_workers=self.parent.config.max_concurrent, 548 | thread_name_prefix=self.parent.file.name 549 | ) 550 | 551 | async def run(self): 552 | def goto_work(blo): 553 | """ 后台执行下载块。 """ 554 | def cb(fut): 555 | # 回调移除工作下载块,并交由下载块检测 556 | self._block_queue.put_nowait(blo) 557 | self._working_blocks.remove(blo) 558 | 559 | task = asyncio.run_coroutine_threadsafe( 560 | self._worker(blo), loop) 561 | self._working_blocks.add(blo) 562 | task.add_done_callback(cb) 563 | return task 564 | 565 | loop = asyncio.get_running_loop() 566 | config = self.parent.config 567 | block_group = self.parent.block_grp 568 | 569 | # 准备未完成的下载块 570 | unfinished_blocks = block_group.unfinished_blocks() 571 | # 提交下载块到工作区 572 | while unfinished_blocks: 573 | block = unfinished_blocks.pop(0) 574 | await self.submit(block) 575 | 576 | # 对下载块做出处理决策 577 | work_queue = self._block_queue 578 | resume_capability = config.resume_capability 579 | while True: 580 | block = await work_queue.get() 581 | work_queue.task_done() 582 | if block is None: 583 | break 584 | if block.unused_length(): 585 | # 重试下载块 586 | goto_work(block) 587 | else: 588 | # 检查任务是否完成 589 | if block_group.is_walk_finished(): 590 | missing = block_group.integrity_check() 591 | if missing: 592 | h.exception.handler_error(RuntimeError(f'Missing Blocks: {missing}')) 593 | if unfinished_blocks: 594 | await work_queue.put(unfinished_blocks.pop(0)) 595 | continue 596 | break 597 | # 已完成其中一下载块后允许对未完成下载块进行切片补充并发量 598 | if resume_capability: 599 | if len(block_group.unfinished_blocks()) < config.max_concurrent: 600 | if unfinished_blocks: 601 | goto_work(unfinished_blocks.pop(0)) 602 | else: 603 | h.slicer.request() 604 | 605 | # 任务完成或暂停,清除冗余队列信息 606 | while not work_queue.empty(): 607 | await work_queue.get() 608 | work_queue.task_done() 609 | 610 | # 待所有下载块退出 611 | while self._working_blocks: 612 | await work_queue.get() 613 | work_queue.task_done() 614 | 615 | self._executors.shutdown(False) 616 | # 非阻塞执行关闭所有handler 617 | self.parent.pause(0) 618 | 619 | async def submit(self, block): 620 | """ 提交下载块到工作区。 621 | Args: 622 | block: 下载块Block对象。 623 | """ 624 | if self._stopped: 625 | return False 626 | await self._block_queue.put(block) 627 | 628 | async def _worker(self, block): 629 | """ 客户端工作worker。 630 | Args: 631 | block: 下载块Block对象。 632 | """ 633 | def run_client_threadsafe(): 634 | nonlocal cli, loop, handlers_ref, block 635 | token = block_context.set(block) 636 | with h.enter(handlers_ref, loop): 637 | try: 638 | return cli.run() 639 | except BaseException as e: 640 | h.exception.client_error(e) 641 | finally: 642 | block_context.reset(token) 643 | 644 | if self._stopped: 645 | return 646 | handlers_ref = h.owner 647 | loop = asyncio.get_running_loop() 648 | config = self.parent.config 649 | 650 | # 准备下载源 651 | uri = await h.uri_mgr.get_uri() 652 | 653 | source_uri = uri.get_copy() 654 | resume_capability = config.resume_capability 655 | client_policy = config.client_policy 656 | 657 | solution = client_policy.get_solution(source_uri.protocol) 658 | 659 | # 准备客户端会话 660 | session = self._client_session.get(solution, None) 661 | if session is None: 662 | session = solution.get_session() 663 | self._client_session[solution] = session 664 | 665 | # 准备客户端处理器 666 | client = solution.get_client( 667 | session, source_uri, block.progress, resume_capability) 668 | 669 | # 为下载块准备客户端进行下载 670 | async with block.request(client) as cli: 671 | uri.use(block) 672 | try: 673 | if solution.is_async(): 674 | fut = cli.run() 675 | else: 676 | fut = loop.run_in_executor(self._executors, run_client_threadsafe) 677 | result = await fut 678 | except BaseException as err: 679 | h.exception.client_error(err) 680 | uri.disuse(block) 681 | return result 682 | 683 | async def close(self): 684 | async def close_sess(sess): 685 | """ 关闭客户端会话。""" 686 | coro_or_result = sess.close() 687 | if asyncio.iscoroutine(coro_or_result): 688 | await coro_or_result 689 | await asyncio.gather(*[close_sess(session) for session in self._client_session.values()]) 690 | self._client_session.clear() 691 | 692 | async def pause(self): 693 | async def pause_cli(blo): 694 | """ 安全暂停关闭客户端。""" 695 | while True: 696 | if blo not in self._working_blocks: 697 | # 若下载块已退出,跳过客户端暂停 698 | return 699 | if blo.client is None: 700 | # 等待客户端进入,同时关闭 701 | await asyncio.sleep(0) 702 | else: 703 | break 704 | await blo.client.pause() 705 | 706 | if not self._stopped: 707 | self._stopped = True 708 | await asyncio.gather(*[pause_cli(block) for block in self._working_blocks]) 709 | await self._block_queue.put(None) 710 | 711 | def __repr__(self): 712 | return f'' 713 | 714 | def info_getter(self): 715 | return { 716 | 'actives': set(self._working_blocks) 717 | } 718 | 719 | 720 | class BlockSlicer(Handler): 721 | """ 下载块切片器。 722 | 723 | 负责工作: 724 | 1. 下载块切片请求和响应 725 | """ 726 | name = 'slicer' 727 | 728 | def __init__(self): 729 | self._waiters = set() 730 | self._lock = threading.Lock() 731 | 732 | async def divide_into(self, n): 733 | """ 下载块切片器分成n份。 734 | 735 | 该方法不建议在在下载块工作过程中进行调用, 736 | 否则可能会出现传输数据冗余的问题。 737 | 738 | Args: 739 | n: 分成n份。 740 | """ 741 | for i in range(n): 742 | self.request() 743 | while self._waiters: 744 | block = self._waiters.pop() 745 | self._slice(block) 746 | 747 | def _slice(self, source_block): 748 | req_range = source_block.half_unused() 749 | if req_range: 750 | result = source_block.slice(req_range) 751 | if result: 752 | block = self.parent.block_grp.insert(result) 753 | return block 754 | 755 | return None 756 | 757 | async def response(self): 758 | """ 客户端用于响应切片器是否需要对其进行切片。 759 | 760 | 若切片器希望对当前下载块进行切片,调用该方法允许切片器安全的对当前下载块进行切片。 761 | 安全的前提是该方法在不影响下载区间的地方调用。 762 | 若当前下载块未在期望切片队列则直接跳过。 763 | """ 764 | if self._waiters: 765 | with self._lock: 766 | source_block = _lookup_block() 767 | if source_block not in self._waiters: 768 | return 769 | self._waiters.remove(source_block) 770 | resp = self._slice(source_block) 771 | if resp is not None: 772 | await h.client_worker.submit(resp) 773 | 774 | def response_threadsafe(self): 775 | with self._lock: 776 | if not self._waiters and _lookup_block() not in self._waiters: 777 | return False 778 | await_coroutine_threadsafe(self.response()) 779 | 780 | def request(self): 781 | """ 请求一次下载块切片。 782 | 783 | 使用最大剩余block大小策略来选择被切块对象,该方法并未对下载块进行切片, 784 | 需要客户端配合response()方法来响应切片请求。 785 | """ 786 | len_waiting = len(self._waiters) 787 | blocks = sorted(self.parent.block_grp.unfinished_blocks(), key=lambda i: i.unused_length(), reverse=True) 788 | self._waiters = set(blocks[:len_waiting + 1]) 789 | return len(self._waiters) == len_waiting + 1 790 | 791 | async def prepare(self): 792 | # 下载块切片以保证下载块的最大并发量。 793 | config = self.parent.config 794 | if config.resume_capability: 795 | blocks_len = len(self.parent.block_grp.unfinished_blocks()) 796 | if blocks_len < config.max_concurrent: 797 | await self.divide_into(config.max_concurrent - blocks_len) 798 | 799 | async def run(self): 800 | pass 801 | 802 | async def close(self): 803 | pass 804 | 805 | async def pause(self): 806 | self._waiters.clear() 807 | 808 | def __repr__(self): 809 | return f'' 810 | 811 | def info_getter(self): 812 | return { 813 | 'waiters': set(self._waiters) 814 | } 815 | 816 | 817 | class SpeedAdjuster(Handler): 818 | """ 速度调节器。 819 | 820 | 负责工作: 821 | 1. 最大速度限制 822 | 2. 实时速度信息刷新 823 | """ 824 | name = 'speed_adjuster' 825 | 826 | def __init__(self): 827 | self._opened = False 828 | self._stopped = True 829 | self._thread_cond = threading.Condition(threading.RLock()) 830 | self._sema_value = 0 831 | self._async_cond = None 832 | 833 | async def _release_all(self): 834 | with self._thread_cond: 835 | async with self._async_cond: 836 | self._sema_value = float('inf') 837 | self._thread_cond.notify_all() 838 | self._async_cond.notify_all() 839 | 840 | def acquire_threadsafe(self): 841 | if self._opened: 842 | while True: 843 | with self._thread_cond: 844 | value = self._sema_value 845 | if value > 0: 846 | self._sema_value -= 1 847 | break 848 | self._thread_cond.wait() 849 | return False 850 | 851 | async def acquire(self): 852 | if self._opened: 853 | while True: 854 | with self._thread_cond: 855 | async with self._async_cond: 856 | value = self._sema_value 857 | if value > 0: 858 | self._sema_value -= 1 859 | break 860 | await self._async_cond.wait() 861 | return False 862 | 863 | async def prepare(self): 864 | assert self._stopped 865 | self._async_cond = asyncio.Condition() 866 | self._stopped = False 867 | 868 | async def run(self): 869 | 870 | async_sleep = asyncio.sleep 871 | block_grp = self.parent.block_grp 872 | config = self.parent.config 873 | max_speed = config.max_speed 874 | fraction = 0 875 | if max_speed is not None: 876 | self._opened = True 877 | while True: 878 | if self._stopped: 879 | break 880 | await async_sleep(config.interval) 881 | 882 | # 刷新总的下载块实时传输速率 883 | block_grp.usage_info.refresh() 884 | 885 | # 当最大下载速度配置有变化后则响应相应的速度限速开关 886 | if config.max_speed != max_speed: 887 | # 最大速度限制参数被修改 888 | max_speed = config.max_speed 889 | if max_speed is None: 890 | self._opened = False 891 | await self._release_all() 892 | else: 893 | self._opened = True 894 | fraction = 0 895 | 896 | # 如果限制的下载速率就处理信号量 897 | if max_speed is not None: 898 | value = config.max_speed * config.interval / 8196 899 | 900 | # 由于下载客户端以单次读数据粒度进行限速,所以为了更细化的限速 901 | # 对计算出来的信号量粒度小数保留下来留给下次累加。 902 | fraction += value % 1 903 | value = int(value) 904 | if fraction >= 1: 905 | value += 1 906 | fraction -= 1 907 | with self._thread_cond: 908 | async with self._async_cond: 909 | self._sema_value = value 910 | self._thread_cond.notify_all() 911 | self._async_cond.notify_all() 912 | 913 | async def close(self): 914 | pass 915 | 916 | async def pause(self): 917 | if not self._stopped: 918 | self._stopped = True 919 | self._opened = False 920 | await self._release_all() 921 | 922 | def __repr__(self): 923 | return f'' 924 | 925 | def info_getter(self): 926 | return { 927 | 'value': self._sema_value 928 | } 929 | 930 | 931 | class FileTempData(Handler): 932 | """ 下载文件缓冲和保存的IO读写器。 933 | 934 | 负责工作: 935 | 1. 文件缓冲和写入 936 | 2. 下载状态的保存 937 | """ 938 | 939 | name = 'file_data' 940 | 941 | def __init__(self): 942 | self._buffers = defaultdict(list) 943 | self._counter = 0 944 | self._unreleased = None 945 | self._lock = threading.RLock() 946 | self._stopped = True 947 | 948 | async def saving_state(self): 949 | """ 保存当前下载状态。 950 | 951 | 以cfg的文件形式保存当前下载配置以备文件下载状态的恢复。 952 | """ 953 | dumpy = self.parent.dumps() 954 | async with h.aio.open(f'{self.parent.file.pathname}{self.parent.config.downloading_ext}.cfg', mode='w') as f: 955 | await f.write(json.dumps(dumpy)) 956 | 957 | async def _release(self): 958 | buffers = self._buffers 959 | counter = self._counter 960 | self._counter = 0 961 | self._buffers = defaultdict(list) 962 | return await self._unreleased.put((counter, buffers)) 963 | 964 | def store_threadsafe(self, data): 965 | """ 线程安全保存临时下载数据。""" 966 | with self._lock: 967 | block = _lookup_block() 968 | self._buffers[block.progress].append(data) 969 | self._counter += len(data) 970 | if self.parent.config.buffer_size <= self._counter: 971 | await_coroutine_threadsafe(self._release()) 972 | 973 | async def store(self, data): 974 | """ 缓冲传输数据。 975 | 976 | 当缓冲的数据超过了buffer_size,将对缓冲进行释放写入文件。 977 | 978 | Args: 979 | data: 要被缓冲的传输数据 980 | """ 981 | block = _lookup_block() 982 | self._buffers[block.progress].append(data) 983 | self._counter += len(data) 984 | if self.parent.config.buffer_size <= self._counter: 985 | await self._release() 986 | 987 | async def prepare(self): 988 | assert self._stopped 989 | self._unreleased = asyncio.Queue() 990 | self._stopped = False 991 | 992 | async def run(self): 993 | unreleased = self._unreleased 994 | file = self.parent.file 995 | filepath = f'{file.pathname}{self.parent.config.downloading_ext}' 996 | 997 | # 通过下载块是否有walk_length的情况来判断是否需要重写文件。 998 | if not self.parent.block_grp.done_length(): 999 | async with h.aio.open(f'{file.pathname}{self.parent.config.downloading_ext}', mode='wb') as fd: 1000 | if file.size is not None: 1001 | await fd.seek(file.size - 1) 1002 | await fd.write(b'\x00') 1003 | 1004 | async with h.aio.open(filepath, mode='rb+') as fd: 1005 | while True: 1006 | result = await unreleased.get() 1007 | if result is None: 1008 | unreleased.task_done() 1009 | break 1010 | counter, buffers = result 1011 | for pg, lines in buffers.items(): 1012 | await fd.seek(pg.begin + pg.done_length) 1013 | await fd.writelines(lines) 1014 | pg.done(sum([len(line) for line in lines])) 1015 | 1016 | # 删除引用,尽快回收垃圾 1017 | del lines 1018 | del result 1019 | del buffers 1020 | await self.saving_state() 1021 | unreleased.task_done() 1022 | 1023 | async def pause(self): 1024 | if not self._stopped: 1025 | self._stopped = True 1026 | await h.client_worker.join() 1027 | await self._release() 1028 | await self._unreleased.put(None) 1029 | 1030 | async def close(self): 1031 | pass 1032 | 1033 | def info_getter(self): 1034 | return { 1035 | 'size': self._counter, 1036 | 'ready': 1 1037 | } 1038 | 1039 | def __repr__(self): 1040 | return f'' 1041 | 1042 | 1043 | class AIOReaderWriter(Handler): 1044 | """ AIO读写工作线程。 1045 | 1046 | 为了避免IO的文件读写阻塞影响下载工作线程,该处理器实现异步文件IO读写方法 1047 | 1048 | 负责工作: 1049 | 1. 管理IO读写线程 1050 | """ 1051 | name = 'aio' 1052 | 1053 | def __init__(self): 1054 | self._executor = None 1055 | self._writers = set() 1056 | 1057 | async def prepare(self): 1058 | self._executor = ThreadPoolExecutor( 1059 | max_workers=1, thread_name_prefix=f'BufferWriter {self.parent.file.name}') 1060 | 1061 | @asynccontextmanager 1062 | async def open(self, file, mode='r', *args, **kwargs): 1063 | """ 异步打开文件。 1064 | 1065 | Args: 1066 | file: 参见io.open()方法参数file 1067 | mode: 参见io.open()方法参数mode 1068 | args: 参见io.open()方法参数的列表参数 1069 | kwargs: 参见io.open()方法参数字典参数 1070 | 1071 | Returns: 1072 | 异步文件对象AsyncIOFile,对耗时IO文件操作进行异步定义。 1073 | """ 1074 | def async_open(): 1075 | return open(file, mode, *args, **kwargs) 1076 | 1077 | executor = self._executor 1078 | assert executor 1079 | loop = asyncio.get_running_loop() 1080 | fd = await loop.run_in_executor(executor, async_open) 1081 | aiofile = AIOFile(executor, fd, loop=loop) 1082 | self._writers.add(aiofile) 1083 | yield aiofile 1084 | # 关闭文件 1085 | await loop.run_in_executor(executor, fd.close) 1086 | self._writers.remove(aiofile) 1087 | 1088 | async def run(self): 1089 | pass 1090 | 1091 | async def close(self): 1092 | for handler in h.iter_all(): 1093 | if handler != self: 1094 | await handler.join() 1095 | self._executor.shutdown(False) 1096 | 1097 | async def pause(self): 1098 | pass 1099 | 1100 | 1101 | class AIOFile: 1102 | """ 异步文件读写对象。 1103 | 1104 | 由AIOReaderWriter的工作线程处理的异步读写对象,将耗时的IO读写由工作线程执行。 1105 | """ 1106 | _async_attr = frozenset( 1107 | {'read', 'readline', 'readlines', 'write', 'writeline', 1108 | 'writelines', 'seek', 'flush', 'truncate'}) 1109 | 1110 | def __init__(self, executor, fd, loop=None): 1111 | self._executor = executor 1112 | self._fd = fd 1113 | self._loop = loop 1114 | 1115 | def __getattr__(self, item): 1116 | func = getattr(self._fd, item) 1117 | if item in self._async_attr: 1118 | def ready(*args, loop=None, **kwargs): 1119 | if loop is None: 1120 | loop = asyncio.get_running_loop() 1121 | 1122 | if kwargs: 1123 | handler = partial(getattr(self._fd, item), **kwargs) 1124 | else: 1125 | handler = getattr(self._fd, item) 1126 | fut = loop.run_in_executor(self._executor, handler, *args) 1127 | return fut 1128 | func = ready 1129 | 1130 | return func 1131 | 1132 | async def __aenter__(self): 1133 | return self 1134 | 1135 | async def __aexit__(self, exc_type, exc_val, exc_tb): 1136 | await self.close() 1137 | 1138 | def __repr__(self): 1139 | return f'' 1140 | -------------------------------------------------------------------------------- /nbdler/progress.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | # from ..utils.misc import Component 4 | # # from .misc import Timer 5 | 6 | 7 | class Progress: 8 | __slots__ = '_range', 'walk_length', 'done_length', '_timer' 9 | 10 | def __init__(self, range, walk_length=0, done_length=0, increment_time=0): 11 | begin, end = range 12 | self._range = range 13 | 14 | if done_length != walk_length: 15 | # 存在下载缓冲未写入文件,下载配置文件可能被非正常关闭。尝试回退下载进度。 16 | done_length = walk_length 17 | 18 | self.walk_length = walk_length 19 | self.done_length = done_length 20 | 21 | def is_walk_finished(self): 22 | return self.walk_length >= self.total_length 23 | 24 | def is_done_finished(self): 25 | return self.done_length >= self.total_length 26 | 27 | def is_finished(self): 28 | return self.is_walk_finished() and self.is_done_finished() 29 | 30 | @property 31 | def range(self): 32 | return self._range 33 | 34 | @property 35 | def begin(self): 36 | return self._range[0] 37 | 38 | @property 39 | def end(self): 40 | return self._range[1] 41 | 42 | @property 43 | def total_length(self): 44 | try: 45 | return self._range[1] - self._range[0] 46 | except TypeError: 47 | return float('inf') 48 | 49 | @property 50 | def walk_left(self): 51 | return self.total_length - self.walk_length 52 | 53 | @property 54 | def done_left(self): 55 | return self.total_length - self.done_length 56 | 57 | @property 58 | def differ(self): 59 | return self.walk_length - self.done_length 60 | 61 | @property 62 | def time_length(self): 63 | return 0 64 | 65 | @property 66 | def average_speed(self): 67 | return 68 | 69 | @property 70 | def percent_complete(self): 71 | return self.walk_length * 100 / self.total_length 72 | 73 | def walk(self, byte_len): 74 | self.walk_length += byte_len 75 | 76 | def done(self, byte_len): 77 | self.done_length += byte_len 78 | 79 | def start(self): 80 | pass 81 | 82 | def stop(self): 83 | pass 84 | 85 | def set_walk_finish(self): 86 | """ 由于存在未指定结尾的情况,也就是未指定下载大小的情况。 87 | 那么当下载完全的情况下,允许强制其以当前下载量作为结尾。 88 | """ 89 | assert self.end is None 90 | self._range = (self.begin, self.begin + self.walk_length) 91 | 92 | def reset(self): 93 | self.walk_length = 0 94 | self.done_length = 0 95 | 96 | def slice(self, request_range): 97 | """ 下载进度切片。""" 98 | put_begin, put_end = request_range 99 | if put_begin > self.begin + self.walk_length: 100 | if put_end != self.end: 101 | put_end = self.end 102 | if put_begin >= put_end: 103 | return None 104 | else: 105 | return None 106 | 107 | self._range = (self._range[0], put_begin) 108 | return put_begin, put_end 109 | 110 | def __repr__(self): 111 | return ' {:.2%}'.format(self.begin, self.end, self.percent_complete / 100) 112 | 113 | def __iter__(self): 114 | return iter([self._range, self.walk_length, self.done_length]) 115 | -------------------------------------------------------------------------------- /nbdler/request.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | from nbdler.uri import SourceURI 4 | from typing import Sequence, Mapping, AnyStr, Union, Optional 5 | from nbdler.client import ClientPolicy 6 | import bisect 7 | 8 | 9 | class RequestConfig: 10 | def __init__(self, *, file_path: AnyStr, 11 | max_concurrent: int=5, 12 | buffer_size: int=20*1024*1024, 13 | chunk_size: int=64*1024, 14 | max_retries: Optional[int]=3, 15 | timeout: Optional[int]=10, 16 | client_policy: Optional[ClientPolicy]=None, 17 | **kwargs): 18 | """ 19 | Args: 20 | file_path: 目标文件的路径名称 21 | max_concurrent: 最大并发数 22 | buffer_size: 最大请求次数仅用于dlopen的时候尝试的次数。 23 | chunk_size: 下载客户端的最大线程数量。 24 | max_retries: 内存缓冲的最大值。 25 | timeout: 下载客户端的超时参数。None一般代表着时无限阻塞链接。 26 | client_policy: 指定客户端处理策略,默认策略由 nbdler.client.__init__._DEFAULT_POLICY 指定 27 | **kwargs: 保存提供的额外参数,以在后续提供给下载客户端来让客户端进行选择调整。 28 | - downloading_ext: 下载中文件扩展名 29 | - interval: 心跳刷新间隔 30 | """ 31 | self.file_path = file_path 32 | self.max_concurrent = max_concurrent 33 | self.chunk_size = chunk_size 34 | self.max_retries = max_retries 35 | self.buffer_size = buffer_size 36 | self.timeout = timeout 37 | self.client_policy = client_policy 38 | self.handlers = [] 39 | self.kwargs = kwargs 40 | 41 | def add_handler(self, *handlers): 42 | """ 添加或更新下载处理器。 43 | Args: 44 | *handlers: 继承nbdler.handler.Handler类的处理器列表参数, 45 | 通过名称标识处理器,类变量name作为处理器唯一标识。 46 | 与内置处理器重名则替换内置处理器。 47 | 内置处理器有: 48 | - client_worker: 客户端调配控制器 49 | - slicer: 下载块切片器 50 | - speed_adjuster: 速度调节器,用于限速和实时速率刷新 51 | - uri_mgr: 下载源状态管理器 52 | - exception: 异常收集处理器 53 | - file_data: 文件缓冲区 54 | - aio: 异步文件读写工作线程 55 | """ 56 | for handler in handlers: 57 | bisect.insort(self.handlers, handler) 58 | 59 | def __repr__(self): 60 | return f'' 61 | 62 | 63 | class Request(RequestConfig): 64 | 65 | def __init__(self, uri=None, headers=None, cookies=None, proxies=None, 66 | max_conn=None, range_field=None, name='main', *, file_path, **kwargs): 67 | """ 68 | Args: 69 | uri,headers,cookies,proxies,max_conn,range_field,name: 参考put()方法。 70 | **kwargs: 参考RequestConfig的__init__()方法 71 | """ 72 | super(Request, self).__init__(file_path=file_path, **kwargs) 73 | self._uris = [] 74 | if uri: 75 | kwargs = {k: kwargs[k] for k in set(kwargs).difference(self.__dict__)} 76 | self.put(uri, headers, cookies, proxies=proxies, 77 | max_conn=max_conn, range_field=range_field, name=name, **kwargs) 78 | 79 | def put(self, uri: AnyStr, 80 | headers: Optional[Union[Sequence, Mapping[str, str]]]=None, 81 | cookies: Optional[Mapping[str, str]]=None, 82 | proxies: Optional[Mapping]=None, 83 | max_conn: Optional[int]=None, 84 | range_field: Optional[Mapping[str, str]]=None, 85 | name: Optional[str]=None, 86 | **kwargs): 87 | """ 添加下载源。 88 | Args: 89 | uri: URI链接 90 | headers: 请求头 91 | cookies: 下载源请求传递的Cookie,要求传递字典dict类型 92 | proxies: 代理服务器,带auth例子:{'http': 'http://user:pass@some.proxy.com'} 93 | max_conn: 最大连接数 94 | range_field: 范围请求定义,要求提供字典类型,如 {'Range': 'bytes={begin}-{end_with}'} 95 | name: 下载源名称,仅用于标记,默认不提供,系统自动编号 96 | **kwargs: 允许根据下载源参数指定客户端的特定操作。 97 | - trust_env: 使用系统代理 98 | 99 | Returns: 100 | 返回未经编号的下载源。 101 | """ 102 | uri = uri.strip() 103 | src_url = SourceURI(None, uri, headers, cookies, proxies, max_conn, range_field, name, **kwargs) 104 | self._uris.append(src_url) 105 | return src_url 106 | 107 | @property 108 | def opts(self): 109 | """ 返回请求中的配置字典信息。 110 | 111 | Returns: 112 | 返回配置信息字典,具体键值参考RequestConfig。 113 | """ 114 | opts = {k: v for k, v in self.__dict__.items() if not k.startswith('_')} 115 | opts.update(opts.pop('kwargs')) 116 | return opts 117 | 118 | @property 119 | def uris(self): 120 | return self._uris 121 | 122 | def __repr__(self): 123 | return f'' 124 | 125 | def dumps(self): 126 | return { 127 | 'config': self.opts, 128 | 'uris': [uri.dumps() for uri in self._uris] 129 | } 130 | 131 | @classmethod 132 | def loads(cls, dumpy): 133 | request = cls(**dumpy['config']) 134 | for uri in dumpy['uris']: 135 | request.put(**uri) 136 | -------------------------------------------------------------------------------- /nbdler/rpc.py: -------------------------------------------------------------------------------- 1 | 2 | # TODO: DownloadRPCClient, DownloadRPCServer 3 | -------------------------------------------------------------------------------- /nbdler/session.py: -------------------------------------------------------------------------------- 1 | 2 | # TODO: DownloadSession 3 | -------------------------------------------------------------------------------- /nbdler/uri.py: -------------------------------------------------------------------------------- 1 | from wsgiref.headers import Headers as _Headers 2 | from urllib.parse import urlparse 3 | from base64 import b64decode 4 | from typing import AnyStr, Sequence, Optional, Union, Mapping 5 | 6 | 7 | class Headers(_Headers): 8 | """ 解决wsgiref.headers键值类型不支持基于str的类型。 9 | 强制转换成str,避免抛出异常。""" 10 | def __init__(self, headers: Optional[Union[Sequence, Mapping]]=None): 11 | if headers is None: 12 | headers = [] 13 | elif isinstance(headers, dict): 14 | headers = list(headers.items()) 15 | elif isinstance(headers, list): 16 | headers = list(headers) 17 | else: 18 | raise TypeError() 19 | 20 | super().__init__(headers) 21 | 22 | def _convert_string_type(self, value): 23 | """Convert/check value type.""" 24 | if type(value) is str: 25 | return value 26 | 27 | return str(value) 28 | 29 | 30 | class BaseURI: 31 | def __init__(self, uri: AnyStr, headers): 32 | self._uri = None 33 | self._urlparse = None 34 | 35 | self.headers = Headers(headers) 36 | self.uri = uri 37 | 38 | @property 39 | def uri(self): 40 | return self._uri 41 | 42 | @uri.setter 43 | def uri(self, value): 44 | self._uri = value 45 | self._urlparse = urlparse(value) 46 | 47 | @property 48 | def urlparse(self): 49 | return self._urlparse 50 | 51 | @property 52 | def hostname(self): 53 | if not self._urlparse: 54 | return None 55 | return self._urlparse.hostname 56 | 57 | @property 58 | def port(self): 59 | if not self._urlparse: 60 | return None 61 | return self._urlparse.port 62 | 63 | @property 64 | def path(self): 65 | if not self._urlparse: 66 | return None 67 | return self._urlparse.path 68 | 69 | @property 70 | def scheme(self): 71 | if not self._urlparse: 72 | return None 73 | return self._urlparse.scheme 74 | 75 | protocol = scheme 76 | 77 | @property 78 | def query(self): 79 | if not self._urlparse: 80 | return None 81 | return self._urlparse.query 82 | 83 | @property 84 | def netloc(self): 85 | if not self._urlparse: 86 | return None 87 | return self._urlparse.netloc 88 | 89 | 90 | class SourceURI(BaseURI): 91 | def __init__(self, id, uri, headers, cookies=None, proxies=None, 92 | max_conn=None, range_field=None, name=None, response=None, **kwargs): 93 | super(SourceURI, self).__init__(uri, headers) 94 | self.id = id 95 | self.cookies = cookies 96 | self.proxies = proxies 97 | self.max_conn = max_conn 98 | 99 | self.range_field = range_field 100 | 101 | self.name = name 102 | self._response = URIResponse.loads(response) if response else None 103 | self.kwargs = kwargs 104 | 105 | def getresponse(self): 106 | return self._response 107 | 108 | def set_response(self, resp): 109 | self._response = resp 110 | 111 | def dumps(self): 112 | kwargs = { 113 | 'id': self.id, 114 | 'uri': self.uri, 115 | 'headers': self.headers.items(), 116 | 'cookies': self.cookies, 117 | 'proxies': self.proxies, 118 | 'max_conn': self.max_conn, 119 | 'range_field': self.range_field, 120 | 'name': self.name, 121 | 'response': self._response and self._response.dumps() 122 | } 123 | kwargs.update(self.kwargs) 124 | return kwargs 125 | 126 | @classmethod 127 | def loads(cls, dumpy): 128 | return cls(**dumpy) 129 | 130 | def __repr__(self): 131 | return f'' 132 | 133 | 134 | class URIResponse(BaseURI): 135 | def __init__(self, uri, 136 | headers, 137 | code, 138 | msg, 139 | length, 140 | content_type, 141 | range, 142 | resume_capability, 143 | **kwargs): 144 | """ 145 | Args: 146 | uri: 响应URI 147 | headers: 响应头 148 | code: 响应代码 149 | msg: 响应消息 150 | length: 资源总长 151 | content_type: 资源类型 152 | range: 资源范围 153 | resume_capability: 是否支持断点续传 154 | **kwargs: 额外参数 155 | """ 156 | super().__init__(uri, headers) 157 | self.code = code 158 | self.length = length 159 | self.range = range 160 | self.msg = msg 161 | self.content_type = content_type 162 | self.resume_capability = resume_capability 163 | self.kwargs = kwargs 164 | 165 | # extract HTTP headers 166 | self.etag = self.headers.get('etag') 167 | self.date = self.headers.get('date') 168 | self.last_modified = self.headers.get('last-modified') 169 | self.content_range = self.headers.get('content-range') 170 | self.content_md5 = self.headers.get('content-md5') 171 | self.expires = self.headers.get('expires') 172 | self.md5 = None 173 | if self.content_md5: 174 | # RFC1864 175 | try: 176 | self.md5 = b64decode(self.content_md5).hex() 177 | except: 178 | pass 179 | 180 | def dumps(self): 181 | kwargs = { 182 | 'uri': self.uri, 183 | 'headers': list(self.headers.items()), 184 | 'code': self.code, 185 | 'length': self.length, 186 | 'range': self.range, 187 | 'content_type': self.content_type, 188 | 'msg': self.msg, 189 | 'resume_capability': self.resume_capability 190 | } 191 | kwargs.update(self.kwargs) 192 | return kwargs 193 | 194 | @classmethod 195 | def loads(cls, dumpy): 196 | return cls(**dumpy) 197 | 198 | def __repr__(self): 199 | return (f"") 201 | 202 | 203 | class URIs: 204 | """ 下载源管理器。 """ 205 | def __init__(self): 206 | self._uris = [] 207 | 208 | def __len__(self): 209 | return len(self._uris) 210 | 211 | def __getitem__(self, item): 212 | return self._uris.__getitem__(item) 213 | 214 | def __iter__(self): 215 | return iter(self._uris) 216 | 217 | def put(self, uri, 218 | headers=None, 219 | cookies=None, 220 | proxies=None, 221 | max_conn=None, 222 | range_field=None, 223 | name=None, 224 | **kwargs): 225 | """ 添加新的下载源。 """ 226 | put_id = self.__newid() 227 | if name is None: 228 | name = str(put_id) 229 | src_url = SourceURI(put_id, uri, headers, cookies, proxies, max_conn, range_field, name, **kwargs) 230 | 231 | self._uris[put_id] = src_url 232 | return self._uris[put_id] 233 | 234 | def __newid(self): 235 | """ 内部用于生成新ID号的函数。 """ 236 | try: 237 | index = self._uris.index(None) 238 | except ValueError: 239 | index = len(self._uris) 240 | self._uris.append(None) 241 | return index 242 | 243 | def dumps(self): 244 | return [uri.dumps() for uri in self._uris] 245 | 246 | @classmethod 247 | def loads(cls, dumpy): 248 | uris = cls() 249 | for uri in dumpy: 250 | uris._uris.append(SourceURI(**uri)) 251 | return uris 252 | 253 | @classmethod 254 | def load_from_source_uris(cls, source_uris: Sequence[SourceURI]): 255 | uris = cls() 256 | uris.import_uris(source_uris) 257 | return uris 258 | 259 | def import_uris(self, source_uris: Sequence[SourceURI]): 260 | for uri in source_uris: 261 | self.put(uri.uri, uri.headers.items(), uri.cookies, uri.proxies, uri.max_conn, 262 | uri.range_field, uri.name, **uri.kwargs) 263 | 264 | def __repr__(self): 265 | return f'' 266 | -------------------------------------------------------------------------------- /nbdler/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import deque 3 | import asyncio 4 | from contextlib import contextmanager 5 | import threading 6 | from concurrent import futures 7 | 8 | 9 | class UsageInfo: 10 | """ 用于记录使用信息。 11 | 其中包括使用时长和使用速率。 12 | """ 13 | 14 | __slots__ = '_fetch_length', '_previous_length', '_previous_time', '_start_time', '_moving_avg', 'rate' 15 | 16 | def __init__(self, fetch_length): 17 | self._fetch_length = fetch_length 18 | 19 | self._previous_length = fetch_length() 20 | self._start_time = time.time() 21 | self._previous_time = self._start_time 22 | self._moving_avg = deque([0 for _ in range(8)]) 23 | self.rate = 0 24 | 25 | def reset(self): 26 | self._start_time = time.time() 27 | self._previous_time = self._start_time 28 | self._moving_avg = deque([0 for _ in range(8)]) 29 | self.rate = 0 30 | 31 | def timelength(self): 32 | return time.time() - self._start_time 33 | 34 | def refresh(self): 35 | cur_time = time.time() 36 | cur_length = self._fetch_length() 37 | diff_time = cur_time - self._previous_time 38 | diff_length = cur_length - self._previous_length 39 | 40 | self._previous_length = cur_length 41 | self._previous_time = cur_time 42 | speed = diff_length / (diff_time or float('inf')) 43 | 44 | self._moving_avg.pop() 45 | self._moving_avg.appendleft(speed) 46 | 47 | self.rate = sum(self._moving_avg) / 8 48 | 49 | 50 | def update_range_field(range_filed, target_range): 51 | """ 更新范围域。 52 | 53 | 通过{}包含域定义,如{begin} 54 | 可选域定义: 55 | begin: 范围开头。 56 | end: 范围结尾,不包括该值本身。 57 | end_with: 范围结尾,包括该值本身。 58 | length: 范围长度。 59 | 60 | 示例:target_range=(2, 99) 61 | 域值更新为:begin=2, end=99, end_with=98, length=97 62 | 63 | Args: 64 | range_filed: 范围域定义,可更新域有begin,end,end_with,length 65 | target_range: 要更新的范围值 66 | 67 | Returns: 68 | 更新域值后的结果。 69 | """ 70 | target_begin, target_end = target_range 71 | 72 | begin = target_begin 73 | if target_end is None or target_end == float('inf'): 74 | end = '' 75 | end_with = '' 76 | length = '' 77 | else: 78 | end = target_end 79 | if target_end > 0: 80 | end_with = target_end - 1 81 | else: 82 | end_with = '' 83 | length = end - begin 84 | return range_filed.format( 85 | begin=begin, 86 | end=end, 87 | end_with=end_with, 88 | length=length) 89 | 90 | 91 | class _ExecutorEventLoopFuture: 92 | """ 在Executor中安全运行run_forever的事件循环的Future。 """ 93 | def __init__(self, task_fut, loop_fut): 94 | self._loop = loop_fut 95 | self._task = task_fut 96 | 97 | def __await__(self): 98 | yield from asyncio.wrap_future(self._task) 99 | 100 | def __iter__(self): 101 | yield from asyncio.wrap_future(self._task) 102 | 103 | def get_loop(self): 104 | return self._loop.result() 105 | 106 | async def aget_loop(self): 107 | return await asyncio.wrap_future(self._loop) 108 | 109 | def join(self): 110 | return self._task.result() 111 | 112 | async def ajoin(self): 113 | return await asyncio.wrap_future(self._task) 114 | 115 | result = join 116 | 117 | aresult = ajoin 118 | 119 | def close(self): 120 | loop = self.get_loop() 121 | return loop.call_soon_threadsafe(loop.stop) 122 | 123 | async def aclose(self): 124 | loop = await self.aget_loop() 125 | fut = loop.call_soon_threadsafe(loop.stop) 126 | return await asyncio.wrap_future(fut) 127 | 128 | def add_done_callback(self, __fn): 129 | return self._task.add_done_callback(__fn) 130 | 131 | 132 | def forever_loop_in_executor(executor, loop=None): 133 | """ 在concurrent.futures.thread.ThreadPoolExecutor中运行异步事件循环。 134 | 该事件循环线程只能使用loop.stop()方法停止。""" 135 | def _run(): 136 | nonlocal loop, future_loop 137 | if loop is None: 138 | try: 139 | loop = asyncio.get_event_loop() 140 | if loop.is_closed(): 141 | raise RuntimeError('new loop') 142 | except RuntimeError: 143 | loop = asyncio.new_event_loop() 144 | asyncio.set_event_loop(loop) 145 | 146 | future_loop.set_result(loop) 147 | try: 148 | loop.run_forever() 149 | finally: 150 | try: 151 | cancel_all_tasks(loop) 152 | loop.run_until_complete(loop.shutdown_asyncgens()) 153 | finally: 154 | loop.close() 155 | 156 | future_loop = futures.Future() 157 | task_fut = executor.submit(_run) 158 | return _ExecutorEventLoopFuture(task_fut, future_loop) 159 | 160 | 161 | def cancel_all_tasks(loop): 162 | """ 关闭循环中剩余的所有任务。 """ 163 | # source from asyncio.runners._cancel_all_tasks 164 | 165 | to_cancel = asyncio.tasks.all_tasks(loop) 166 | if not to_cancel: 167 | return 168 | 169 | for task in to_cancel: 170 | task.cancel() 171 | 172 | loop.run_until_complete( 173 | asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True)) 174 | 175 | for task in to_cancel: 176 | if task.cancelled(): 177 | continue 178 | if task.exception() is not None: 179 | loop.call_exception_handler({ 180 | 'message': 'unhandled exception during asyncio.run() shutdown', 181 | 'exception': task.exception(), 182 | 'task': task, 183 | }) -------------------------------------------------------------------------------- /nbdler/version.py: -------------------------------------------------------------------------------- 1 | 2 | TITLE = 'Nbdler' 3 | DESCRIPTION = 'Python multi-client Downloader.' 4 | URL = 'https://github.com/ZSAIM/Nbdler' 5 | VERSION = '3.0.3' 6 | AUTHOR = 'ZSAIM' 7 | AUTHOR_EMAIL = 'zzsaim@163.com' 8 | LICENSE = 'Apache 2.0' 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from setuptools import setup, find_packages 4 | import io 5 | import os 6 | 7 | here = os.path.abspath(os.path.dirname(__file__)) 8 | 9 | about = {} 10 | with open(os.path.join(here, 'nbdler', 'version.py'), 'r', encoding='utf-8') as f: 11 | exec(f.read(), about) 12 | 13 | with io.open('README.rst', 'r', encoding='utf-8') as readme: 14 | long_description = readme.read() 15 | 16 | install_requires = [ 17 | 'aiohttp', 18 | 'requests' 19 | ] 20 | 21 | 22 | setup( 23 | name=about['TITLE'], 24 | version=about['VERSION'], 25 | description=about['DESCRIPTION'], 26 | long_description=long_description, 27 | author=about['AUTHOR'], 28 | author_email=about['AUTHOR_EMAIL'], 29 | url=about['URL'], 30 | license=about['LICENSE'], 31 | classifiers=[ 32 | 'Development Status :: 5 - Production/Stable', 33 | 'Intended Audience :: Developers', 34 | 'License :: OSI Approved :: Apache Software License', 35 | 'Programming Language :: Python', 36 | 'Programming Language :: Python :: 3', 37 | ], 38 | packages=find_packages(), 39 | install_requires=install_requires, 40 | ) --------------------------------------------------------------------------------