├── LICENSE
├── README.md
├── README.rst
├── nbdler
    ├── __init__.py
    ├── api.py
    ├── block.py
    ├── client
    │   ├── __init__.py
    │   ├── abstract.py
    │   ├── aiohttp.py
    │   ├── base_http.py
    │   └── requests.py
    ├── download.py
    ├── error.py
    ├── file.py
    ├── handler.py
    ├── progress.py
    ├── request.py
    ├── rpc.py
    ├── session.py
    ├── uri.py
    ├── utils.py
    └── version.py
└── setup.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Nbdler
 2 | ===============
 3 | [![Build Status](https://img.shields.io/badge/build-passing-green.svg)](https://github.com/ZSAIM/Nbdler)
 4 | [![Build Status](https://img.shields.io/badge/pypi-v3.0.3-blue.svg)](https://pypi.org/project/Nbdler/)
 5 | 
 6 | Nbdler 是由Python3 编写的异步多客户端、多来源下载工具。
 7 | 
 8 | 
 9 | # 支持协议
10 | 
11 | - HTTP
12 | - HTTPS
13 | 
14 | # 支持客户端
15 | 
16 | - [**aiohttp**](https://github.com/aio-libs/aiohttp): Async http client/server framework.
17 | - [**requests**](https://github.com/psf/requests): A simple, yet elegant HTTP library.
18 | 
19 | # 特征
20 | 
21 | - 支持断点续传
22 | - 支持多来源多客户端下载
23 | - 支持速度限速
24 | - 支持下载缓冲设置
25 | - 支持代理（取决于客户端）
26 | 
27 | # 入门使用
28 | 
29 | ### 简单下载示例
30 | ```python
31 | import asyncio
32 | import nbdler
33 | 
34 | async def main():
35 |     request = nbdler.Request('http://a/file', file_path='file')
36 |     async with nbdler.dlopen(request) as dl:
37 |         dl.start()
38 |         while not dl.is_finished():
39 |             print((f'filename={dl.file.name}, '
40 |                    f'transfer rate={round(dl.transfer_rate() / 1024)} kb/s, '
41 |                    f'{round(dl.percent_complete(), 2)} % percent complete'))    
42 |             await asyncio.sleep(1)
43 |         await dl.ajoin()
44 |             
45 | asyncio.run(main())
46 | ```
47 | ### 多客户端，多来源，指定处理客户端，指定最大并发数
48 | ```python
49 | import asyncio
50 | import nbdler
51 | 
52 | async def main():
53 |     request = nbdler.Request('http://a/file', 
54 |                 client_policy=nbdler.get_policy(http='aiohttp', https='requests'), 
55 |                 max_concurrent=16, file_path='file')
56 |     request.put('https://b/file')
57 |     async with nbdler.dlopen(request) as dl:
58 |         await dl.astart()
59 |         await dl.ajoin()
60 |             
61 | asyncio.run(main())
62 | ```
63 | 
64 | ### 关于方法
65 | 
66 | 
67 | # Installation
68 | 
69 |     $ pip install Nbdler
70 | 
71 | # Requirements
72 | 
73 | - Python >= 3.5.3
74 | - aiohttp
75 | - requests
76 | 
77 | 
78 | # 许可证
79 | 
80 | Apache-2.0
81 | 
82 | # TODO
83 | 
84 | - [ ] 完善使用文档。
85 | - [ ] 实现Handler处理器(SampleValidate 保证多来源下载时的资源匹配)。
86 | - [ ] 实现DownloadSession(以便实现下载器的进程隔离，同时实现RPC进程通信)。
87 | - [ ] 支持FTP协议。
88 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Nbdler
 2 | ======
 3 | 
 4 | 
 5 | 
 6 | Installation
 7 | =============
 8 | 
 9 | ::
10 | 
11 |     $ pip install Nbdler
12 | 
13 | 
14 | License
15 | =======
16 | 
17 | Apache-2.0
18 | 
19 | 


--------------------------------------------------------------------------------
/nbdler/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | from .api import dlopen
4 | from .download import Downloader
5 | from .request import Request
6 | from .client import get_policy
7 | 
8 | from .error import MaxRetriesExceeded, ClientError, HandlerError
9 | 


--------------------------------------------------------------------------------
/nbdler/api.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | import mimetypes
  4 | import os
  5 | import asyncio
  6 | from concurrent.futures.thread import ThreadPoolExecutor
  7 | from .utils import forever_loop_in_executor
  8 | 
  9 | from .download import Downloader
 10 | from .client import get_policy
 11 | from .uri import URIs
 12 | from .progress import Progress
 13 | from .error import MaxRetriesExceeded
 14 | from .block import BlockGroup
 15 | from .file import File
 16 | from .request import Request
 17 | 
 18 | 
 19 | __all__ = [
 20 |     'dlopen',
 21 | ]
 22 | 
 23 | 
 24 | def dlopen(request, handlers=None, *, do_async=True, executors=None):
 25 |     """ 打开下载请求Request对象，并构造返回Downloader。
 26 |     Args:
 27 |         request: 下载请求对象或下载配置文件路径。
 28 |         handlers: 添加的Handler对象列表，仅适用于打开下载配置文件
 29 |         do_async: 是否使用异步打开
 30 |         executors: 使用指定的concurrent.futures.thread打开，默认新创线程执行。
 31 |     """
 32 |     async def open_request():
 33 |         # 打开请求Request对象
 34 |         client_policy = request.client_policy
 35 |         if client_policy is None:
 36 |             client_policy = get_policy()
 37 | 
 38 |         uris = URIs.load_from_source_uris(request.uris)
 39 |         progress = Progress((0, None))
 40 |         source_uri = None
 41 |         resp = None
 42 |         exceptions = []
 43 | 
 44 |         max_retries = request.max_retries
 45 |         if request.max_retries is None:
 46 |             max_retries = float('inf')
 47 | 
 48 |         while True:
 49 |             for source_uri in uris:
 50 |                 try:
 51 |                     client_cls = client_policy.get_solution(source_uri.protocol)
 52 |                     resp = await client_cls.dlopen(
 53 |                         source_uri, progress, **source_uri.kwargs)
 54 |                 except BaseException as err:
 55 |                     exceptions.append(err)
 56 |                     max_retries -= 1
 57 |                     if max_retries < 0:
 58 |                         raise MaxRetriesExceeded(f'max_retries: {request.max_retries}', exceptions)
 59 |                 else:
 60 |                     break
 61 |             else:
 62 |                 continue
 63 |             break
 64 | 
 65 |         source_uri.set_response(resp)
 66 |         path, name = os.path.split(request.file_path)
 67 |         if not name:
 68 |             name = source_uri.path.rsplit('/', 1)[-1]
 69 |             if not name:
 70 |                 ext = mimetypes.guess_extension(resp.content_type)
 71 |                 name = f'{source_uri.hostname}{ext or ""}'
 72 |         size = resp.length
 73 | 
 74 |         block_grp = BlockGroup(request.chunk_size, size)
 75 |         block_grp.insert((0, size))
 76 |         opts = request.opts
 77 |         opts.update(dict(
 78 |             client_policy=client_policy,
 79 |             resume_capability=resp.resume_capability,
 80 |         ))
 81 |         return Downloader(
 82 |             File(path, name, size),
 83 |             uris,
 84 |             block_grp,
 85 |             **opts
 86 |         )
 87 | 
 88 |     async def open_cfg():
 89 |         # 打开下载配置文件
 90 |         file = request
 91 |         if not os.path.isfile(file):
 92 |             raise FileNotFoundError(f'下载数据配置文件{file}未找到。')
 93 |         with open(file, mode='r') as fd:
 94 |             dumpy_json = fd.read()
 95 |         dumpy = json.loads(dumpy_json)
 96 |         return Downloader.loads(dumpy, handlers)
 97 | 
 98 |     async def do_open():
 99 |         if isinstance(request, Request):
100 |             return await open_request()
101 |         else:
102 |             return await open_cfg()
103 | 
104 |     def callback(fut):
105 |         executors.shutdown(False)
106 | 
107 |     new_executor = False
108 |     if executors is None:
109 |         executors = ThreadPoolExecutor(
110 |             max_workers=1, thread_name_prefix='Nbdler.dlopen() Worker')
111 |         new_executor = True
112 | 
113 |     exec_fut = forever_loop_in_executor(executors)
114 |     if new_executor:
115 |         exec_fut.add_done_callback(callback)
116 | 
117 |     loop = exec_fut.get_loop()
118 |     if do_async:
119 |         def done_stop_loop(fut):
120 |             nonlocal exec_fut
121 |             exec_fut.close()
122 | 
123 |         future = asyncio.wrap_future(
124 |             asyncio.run_coroutine_threadsafe(do_open(), loop=loop))
125 |         future.add_done_callback(done_stop_loop)
126 |         result = _AsyncDownloadOpenContextManager(future)
127 |     else:
128 |         future = asyncio.run_coroutine_threadsafe(do_open(), loop=loop)
129 |         result = future.result()
130 |         exec_fut.close()
131 |     return result
132 | 
133 | 
134 | class _AsyncDownloadOpenContextManager:
135 |     __slots__ = '_future', '_result'
136 | 
137 |     def __init__(self, future):
138 |         self._future = future
139 |         self._result = None
140 | 
141 |     def __await__(self):
142 |         return self._future.__await__()
143 | 
144 |     def __iter__(self):
145 |         return self.__await__()
146 | 
147 |     async def __aenter__(self):
148 |         self._result = await self._future
149 |         return await self._result.__aenter__()
150 | 
151 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
152 |         return await self._result.__aexit__(exc_type, exc_val, exc_tb)
153 | 


--------------------------------------------------------------------------------
/nbdler/block.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | """ 下载块
  3 | 
  4 | 下载块Chunk结构:
  5 | 
  6 | +---------------------------------+
  7 | | Chunk                           |
  8 | |         +--------+----------+   |
  9 | |         | client | progress |   |
 10 | |         +--------+----------+   |
 11 | +---------------------------------+
 12 | | Block [0:25]                    |
 13 | | >>>>>-------------------- 05/25 |
 14 | +---------------------------------+
 15 | |+|+|+|+|+|+|+|+|+|-|-|-|-|-|-|-|-|
 16 | |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|  <--------- block
 17 | +---------------------------------+
 18 | 
 19 | 其中: '>' 表示已下载块; '-' 表示未下载块
 20 | 
 21 | 下载块管理器:
 22 | 
 23 | +-------------------------------------------------------------+
 24 | | ChunkManager                                                |
 25 | |      +--------C1-------+ +---C3----+ +---------C2--------+  |
 26 | |      |-|-|-|-|-|-|-|-|-| |-|-|-|-|-| |-|-|-|-|-|-|-|-|-|-|  |
 27 | |      |-|-|-|-|-|-|-|-|-| |-|-|-|-|-| |-|-|-|-|-|-|-|-|-|-|  |
 28 | |      |-|-|-|-|-|-|-|-|-| |-|-|-|-|-| |-|-|-|-|-|-|-|-|-|-|  |
 29 | |      +-----------------+ +---------+ +-------------------+  |
 30 | +-------------------------------------------------------------+
 31 | 
 32 | 文件下载块切片原理:
 33 | 
 34 | 表述：
 35 |     搜索从剩余下载量最多的下载块中切片生成新的下载块。
 36 | 
 37 | 1. 下载块C1 (总下载块=C1)
 38 | +--------------------------------------------------------------------------------------------------------------------+
 39 | |                                                C1 [0:100]                                                          |
 40 | |        >>>>>>>---------------------------------------------------------------------------------------------        |
 41 | +--------------------------------------------------------------------------------------------------------------------+
 42 | 
 43 | 2. 从下载块B1中切片分出下载块C2 (总下载块=C1+C2)
 44 | +--------------------------------------------------------------+-----------------------------------------------------+
 45 | |                         C1 [0:54]                            |                   C2 [54:100]                       |
 46 | |    >>>>>>>>>>>>>>----------------------------------------    |    >>>-------------------------------------------   |
 47 | +--------------------------------------------------------------+-----------------------------------------------------+
 48 | 
 49 | 3. 从下载块C2中切片分出下载块B3 (总下载块=C1+C2+C3)
 50 | +----------------------------------------------------------+-----------------------------+---------------------------+
 51 | |                         C1 [0:54]                        |         C2 [54:79]          |        C3 [79:100]        |
 52 | |  >>>>>>>>>>>>>>>>>>>>>---------------------------------  |  >>>>>>>>-----------------  |   >>>>-----------------   |
 53 | +----------------------------------------------------------+-----------------------------+---------------------------+
 54 | 
 55 | 4. 从下载块C1中切片分出下载块C4 (总下载块=C1+C2+C3+C4)
 56 | +--------------------------------------+----------------------+----------------------------+-------------------------+
 57 | |               C1 [0:36]              |      C4 [36:54]      |         C2 [54:79]         |      C3 [79:100]        |
 58 | | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>----- |  >-----------------  |  >>>>>>>>>>>>>>>>>-------- |  >>>>>>>>>>>>>>>>-----  |
 59 | +--------------------------------------+----------------------+-------------------------—--+-------------------------+
 60 | 
 61 | 5. 从下载块C4中切片分出下载块B5 (总下载块=C1+C2+C3+C4+C5)
 62 | +--------------------------------------+------------+------------+---------------------------+-----------------------+
 63 | |               C1 [0:36]              | C4 [36:46] | C5 [46:54] |         C2 [54:79]        |      C3 [79:100]      |
 64 | | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>-- | >>>>>----- |  >>------  | >>>>>>>>>>>>>>>>>>>>>>--- | >>>>>>>>>>>>>>>>>---- |
 65 | +--------------------------------------+------------+------------+---------------------------+-----------------------+
 66 | 
 67 | ...
 68 | 
 69 | 
 70 | """
 71 | from .utils import UsageInfo
 72 | from math import ceil
 73 | from threading import RLock
 74 | from time import time
 75 | from .progress import Progress
 76 | import bisect
 77 | 
 78 | 
 79 | class Chunk:
 80 |     __slots__ = 'uri_id', 'begin', 'end'
 81 | 
 82 |     def __init__(self, uri_id, begin, end):
 83 |         """
 84 |         :param
 85 |             uri_id     : 源URI的ID
 86 |             begin   : 单元的开始索引
 87 |             end     : 单元的结束索引
 88 |         """
 89 |         self.uri_id = uri_id
 90 |         self.begin = begin
 91 |         self.end = end
 92 | 
 93 |     @property
 94 |     def length(self):
 95 |         return self.end - self.begin
 96 | 
 97 |     def __repr__(self):
 98 |         return f'<Cell ({self.uri_id})[{self.begin}-{self.end}]>'
 99 | 
100 |     def __iter__(self):
101 |         return iter([self.uri_id, self.begin, self.end])
102 | 
103 | 
104 | class Block:
105 |     """ 下载块是对某一刻下载进度的快照，是一个状态量。不具备实时性。
106 |     既然不作为实时数据，那么不需要对其进行强制与实时数据对应。
107 |     那么对于未确定大小的进度对象Progress，即大小为inf时，不必关注其大小，关注的是其下载的量的块记录。
108 |     """
109 |     __slots__ = 'client', 'progress', '_chunk_size', '_range', '_chunks', '_lock'
110 | 
111 |     def __init__(self, progress, chunk_size, init_chunks=None):
112 |         self.client = None
113 |         self.progress = progress
114 |         self._chunk_size = chunk_size
115 | 
116 |         begin = progress.begin // chunk_size
117 | 
118 |         end = 1
119 |         if progress.end not in (float('inf'), None):
120 |             end = int(ceil(progress.end / chunk_size))
121 | 
122 |         self._range = [begin, end]
123 |         self._chunks = []
124 | 
125 |         if init_chunks is not None:
126 |             self._chunks = [Chunk(*chunk) for chunk in init_chunks]
127 | 
128 |         self._lock = RLock()
129 |         self.refresh()
130 | 
131 |     @property
132 |     def begin(self):
133 |         return self._range[0]
134 | 
135 |     @property
136 |     def end(self):
137 |         return self._range[1]
138 | 
139 |     @property
140 |     def length(self):
141 |         try:
142 |             return self._range[1] - self._range[0]
143 |         except TypeError:
144 |             return float('inf')
145 | 
146 |     @property
147 |     def chunks(self):
148 |         self.refresh()
149 |         return self._chunks
150 | 
151 |     def current_uri(self):
152 |         return self.client and self.client.source_uri
153 | 
154 |     def slice(self, request_range):
155 |         resp_range = self.progress.slice(request_range)
156 |         self.refresh()
157 |         return resp_range
158 | 
159 |     def unused_length(self):
160 |         """ 返回下载块中未处理的块chunk长度。"""
161 |         if self.progress.end in (None, float('inf')):
162 |             return float('inf') if not self.progress.is_walk_finished() else 0
163 |         self.refresh()
164 | 
165 |         return (not self._chunks and self.length) or self.length - self._chunks[-1].end
166 | 
167 |     def __getitem__(self, index):
168 |         assert type(index) is int
169 |         for c in self.chunks:
170 |             if c.begin <= index < c.end:
171 |                 return c
172 | 
173 |         assert False
174 | 
175 |     def refresh(self):
176 |         """ 刷新下载块进度信息。
177 | 
178 |         由于下载块只是作为快照的形式监控，并不直接由下载客户端接管，所以得到的信息并不是实时的，
179 |         这就需要在获取信息的时候进行刷新信息。
180 |         对外的接口都已进行了刷新数据来保证数据的实时性。
181 |         """
182 |         with self._lock:
183 |             progress = self.progress
184 | 
185 |             block_begin = self.begin
186 | 
187 |             cur_uri = self.current_uri()
188 | 
189 |             cur_walk = progress.walk_length / self._chunk_size
190 |             if not progress.walk_left:
191 |                 cur_walk = int(ceil(cur_walk))
192 |             else:
193 |                 cur_walk = int(cur_walk)
194 | 
195 |             cur_done = progress.done_length / self._chunk_size
196 |             if not progress.done_left:
197 |                 cur_done = int(ceil(cur_done))
198 |             else:
199 |                 cur_done = int(cur_done)
200 | 
201 |             # 更新块范围
202 |             block_end = progress.end
203 |             if block_end is None:
204 |                 block_end = cur_walk + block_begin
205 |             block_end = int(ceil(block_end / self._chunk_size))
206 | 
207 |             self._range[1] = block_end
208 | 
209 |             last_chunk = (self._chunks and self._chunks[-1]) or None
210 |             if last_chunk is None:
211 |                 if cur_uri is not None:
212 |                     self._chunks.append(Chunk(cur_uri.id, 0, cur_walk))
213 |                 else:
214 |                     pass
215 |             else:
216 |                 if cur_uri is not None:
217 |                     if last_chunk.uri_id != cur_uri.id:
218 |                         self._chunks.append(Chunk(cur_uri.id, cur_walk, cur_walk))
219 |                     else:
220 |                         last_chunk.end = cur_walk
221 | 
222 |     def half_unused(self):
223 |         unused_len = self.unused_length()
224 |         put_begin = self.begin + (self.length - unused_len) + int(ceil(unused_len / 2))
225 |         put_end = self.end
226 |         if put_begin == put_end:
227 |             return None
228 |         return put_begin * self._chunk_size, put_end * self._chunk_size
229 | 
230 |     def request(self, client):
231 |         self.client = client
232 |         return self
233 | 
234 |     async def __aenter__(self):
235 |         from nbdler.handler import block_context
236 | 
237 |         assert self.client
238 |         block_context.set(self)
239 |         return await self.client.__aenter__()
240 | 
241 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
242 |         from nbdler.handler import block_context
243 | 
244 |         self.refresh()
245 |         client = self.client
246 |         await client.__aexit__(exc_type, exc_val, exc_tb)
247 |         self.client = None
248 |         block_context.set(None)
249 | 
250 |     def dumps(self):
251 |         return {
252 |             'progress': list(self.progress),
253 |             'range': [self.begin, self.end],
254 |             'chunks': [list(c) for c in self._chunks]
255 |         }
256 | 
257 |     def __repr__(self):
258 |         return f'<Chunk [{self.begin}-{self.end}] {self.progress.percent_complete / 100:.2%} client={self.client}>'
259 | 
260 |     def __iter__(self):
261 |         return iter([list(self.progress), self._chunk_size, [list(block) for block in self._chunks]])
262 | 
263 |     def __lt__(self, other):
264 |         return self.begin < other.begin
265 | 
266 | 
267 | class BlockGroup:
268 |     """ 下载块管理器。 """
269 |     def __init__(self, chunk_size, total_size, duration=0):
270 |         self._blocks = []
271 |         self.chunk_size = chunk_size
272 |         try:
273 |             self.total_chunk = int(ceil(total_size / chunk_size))
274 |         except (TypeError, OverflowError):
275 |             self.total_chunk = 1
276 | 
277 |         if total_size is None:
278 |             total_size = float('inf')
279 |         self.total_size = total_size
280 | 
281 |         self.usage_info = UsageInfo(self.walk_length)
282 | 
283 |         self._start_time = None
284 |         self._duration = duration
285 | 
286 |     def transfer_rate(self):
287 |         """ 实时数据传输速率。 """
288 |         return self.usage_info.rate
289 | 
290 |     def average_speed(self):
291 |         """ 平均数据传输速率。 """
292 |         total_time = self._duration + time() - (self._start_time or time())
293 |         return self.walk_length() / (total_time or float('inf'))
294 | 
295 |     def walk_length(self):
296 |         """ 已下载字节数。 """
297 |         return sum((v.progress.walk_length for v in self._blocks))
298 | 
299 |     def done_length(self):
300 |         """ 已缓冲的字节数。"""
301 |         return sum((v.progress.done_length for v in self._blocks))
302 | 
303 |     def remaining_length(self):
304 |         """ 还剩余字节数。 """
305 |         return self.total_size - self.walk_length()
306 | 
307 |     def remaining_time(self):
308 |         """ 估计剩余时间。 """
309 |         realtime_rate = self.transfer_rate()
310 |         if not realtime_rate:
311 |             return float('inf')
312 |         return self.remaining_length() / realtime_rate
313 | 
314 |     def percent_complete(self):
315 |         return self.walk_length() * 100 / self.total_size
316 | 
317 |     def is_walk_finished(self):
318 |         for b in self._blocks:
319 |             if not b.progress.is_walk_finished():
320 |                 return False
321 |         return not self.integrity_check()
322 | 
323 |     def is_done_finished(self):
324 |         for b in self._blocks:
325 |             if not b.progress.is_done_finished():
326 |                 return False
327 |         return not self.integrity_check()
328 | 
329 |     is_finished = is_done_finished
330 | 
331 |     def insert(self, put_range):
332 |         """ 插入下载块。
333 |         Args:
334 |             put_range: 插入的快进度范围range
335 |         """
336 |         block = Block(Progress(put_range), self.chunk_size)
337 |         bisect.insort(self._blocks, block)
338 | 
339 |         return block
340 | 
341 |     def unfinished_blocks(self):
342 |         return [b for b in self._blocks if not b.progress.is_walk_finished()]
343 | 
344 |     def activate(self):
345 |         """ 激活下载块映射图。 """
346 |         self._start_time = time()
347 | 
348 |     def deactivate(self):
349 |         """ 关闭下载块映射图。 """
350 |         self._duration += time() - (self._start_time or time())
351 |         self._start_time = None
352 |         self.usage_info.reset()
353 |         if self.is_walk_finished():
354 |             if self.total_size in (None, float('inf')):
355 |                 self.total_size = self.walk_length()
356 | 
357 |     def integrity_check(self):
358 |         """ 下载块映射图完整性检测。
359 |         如果下载块缺失返回缺失的块，否则返回[]。
360 |         """
361 |         if not self._blocks:
362 |             return [(0, self.total_chunk)]
363 |         missing = []
364 |         prv_end = self._blocks[0].end
365 |         prv_b = None
366 |         for v in self._blocks[1:]:
367 |             v.refresh()
368 |             if v.begin - prv_end > 0:
369 |                 # 如果下一个下载块的起点索引比上一个下载块的结束索引要大，说明了这其中缺少了一块。
370 |                 missing.append((prv_end, v.begin))
371 |             elif v.begin - prv_end < 0:
372 |                 # 如果下一个下载块的起点索引比上一个下载块的结束索引要小，说明这出现了下载块范围交叉。
373 |                 raise ValueError(f'完整性校验不通过。冲突：{prv_b} <-> {v}')
374 | 
375 |             prv_end = v.end
376 |             prv_b = v
377 | 
378 |         return missing
379 | 
380 |     def dumps(self):
381 |         return {
382 |             'chunk_size': self.chunk_size,
383 |             'total_size': self.total_size,
384 |             'duration': self._duration,
385 |             'blocks': [b.dumps() for b in self._blocks],
386 |         }
387 | 
388 |     @classmethod
389 |     def loads(cls, dumpy):
390 |         chunk_size = dumpy['chunk_size']
391 |         block_grp = cls(chunk_size, dumpy['total_size'], dumpy['duration'])
392 |         for block in dumpy['blocks']:
393 |             progress = Progress(*block['progress'])
394 |             block = Block(progress, chunk_size, block['chunks'])
395 |             bisect.insort(block_grp._blocks, block)
396 |         return block_grp
397 | 
398 |     def __iter__(self):
399 |         """ 迭代返回下载块对象。"""
400 |         return iter([self.chunk_size, self.total_size, [list(block) for block in self._blocks]])
401 | 
402 |     def __repr__(self):
403 |         return f'<BlockGroup transfer_rate={round(self.transfer_rate() / 1024)} kb/s ' \
404 |                f'percent={round(self.percent_complete(), 2)}%>'
405 | 


--------------------------------------------------------------------------------
/nbdler/client/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from . import aiohttp, requests
  4 | from .abstract import AbstractClient
  5 | from collections import defaultdict
  6 | 
  7 | __all__ = ['get_policy', 'ClientPolicy']
  8 | 
  9 | _solutions = defaultdict(list)
 10 | _name_solution = {}
 11 | 
 12 | 
 13 | class ClientPolicy:
 14 |     def __init__(self, **specified_mapping):
 15 |         self._specified = {k.lower(): v.lower() for k, v in specified_mapping.items()}
 16 | 
 17 |     def get_solution(self, protocol):
 18 |         """ 返回根据策略决定的客户端处理模块。
 19 |         Args:
 20 |             protocol: 要处理的协议
 21 | 
 22 |         Returns:
 23 |             返回客户端处理方案
 24 |         """
 25 |         sol_name = self._specified.get(protocol, None)
 26 |         if sol_name is None:
 27 |             # 使用该协议最新注册的客户端处理器作为默认的处理策略
 28 |             sol_name = _solutions.get(protocol, [None])[-1]
 29 |         if sol_name is None:
 30 |             raise NotImplementedError(f'没有找到协议{protocol}的处理策略。')
 31 |         solution = _name_solution.get(sol_name, None)
 32 |         if solution is None:
 33 |             raise NotImplementedError(f'没有找到名称为{sol_name}的客户端处理器。')
 34 |         return solution
 35 | 
 36 |     def __iter__(self):
 37 |         return iter(self._specified.items())
 38 | 
 39 | 
 40 | class ProtocolSolution:
 41 |     def __init__(self, module):
 42 |         self._module = module
 43 | 
 44 |     @property
 45 |     def name(self):
 46 |         return self._module.NAME
 47 | 
 48 |     @property
 49 |     def supported_protocols(self):
 50 |         return self._module.PROTOCOL_SUPPORT
 51 | 
 52 |     def is_async(self):
 53 |         return self._module.ASYNC_EXECUTE
 54 | 
 55 |     @property
 56 |     def dlopen(self):
 57 |         return self._module.ClientHandler.dlopen
 58 | 
 59 |     def get_client(self, *args, **kwargs):
 60 |         return self._module.ClientHandler(*args, **kwargs)
 61 | 
 62 |     def get_session(self, *args, **kwargs):
 63 |         return self._module.ClientSession(*args, **kwargs)
 64 | 
 65 | 
 66 | def get_policy(**kwargs):
 67 |     return ClientPolicy(**kwargs)
 68 | 
 69 | 
 70 | def register(module):
 71 |     """ 注册下载客户端处理模块。
 72 | 
 73 |     客户端模块规范：
 74 |     1. 客户端处理程序要求继承abstract_base.py中的AbstractClient类
 75 |     2. 使用类变量NAME作为客户端的唯一标识名称，尽量避免与其他客户端重名，
 76 |         重名的处理策略是后注册覆盖前注册。
 77 |     3. 使用ClientHandler作为客户端的类名，或通过赋值该模块变量名实现
 78 |     4. 使用ClientSession作为客户端会话，必须存在该变量，若不需要会话则赋值noop函数，
 79 |         客户端会话创建不提供参数，若需要提供使用functions.partial传递定义
 80 | 
 81 |     Args:
 82 |         module: 协议处理解决方案
 83 | 
 84 |     """
 85 |     global _solutions, _name_solution
 86 |     solution = ProtocolSolution(module)
 87 |     for protocol in solution.supported_protocols:
 88 |         _solutions[protocol].append(solution.name)
 89 |         _name_solution[solution.name] = solution
 90 | 
 91 | 
 92 | def main():
 93 |     # 多线程HTTP/HTTPS，使用requests库
 94 |     register(requests)
 95 |     # 异步HTTP/HTTPS，使用aiohttp库
 96 |     register(aiohttp)
 97 | 
 98 | 
 99 | # 注册下载客户端
100 | main()
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/nbdler/client/abstract.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import asyncio
  3 | 
  4 | 
  5 | class AbstractClient:
  6 |     """ 抽象客户端
  7 | 
  8 |     Class Variable:
  9 |         NAME: 客户端名称，作为客户端的唯一标识
 10 |         PROTOCOL_SUPPORT: 客户端支持处理的协议, protocol/scheme
 11 |         ASYNC_EXECUTE: 指定客户端的是否为异步实现
 12 |         TIMEOUT: 客户端默认的连接connect,读取read超时参数
 13 |     """
 14 | 
 15 |     TIMEOUT = 10
 16 | 
 17 |     def __init__(self, session,
 18 |                  source_uri,
 19 |                  progress,
 20 |                  resume_capability,
 21 |                  **kwargs):
 22 |         """
 23 |         Args:
 24 |             session: 客户端会话
 25 |             source_uri: 下载源SourceUri对象
 26 |             progress: 请求进度对象Progress
 27 |             resume_capability: 是否支持断点续传，若为None则代表不确定，连接后将根据实际情况赋值
 28 |         """
 29 |         self.source_uri = source_uri
 30 |         self.progress = progress
 31 |         self.resume_capability = resume_capability
 32 |         self.kwargs = kwargs
 33 | 
 34 |         self._closed = False
 35 |         self.session = session
 36 |         self.resp = None
 37 | 
 38 |     async def connect(self):
 39 |         """ (可定义非异步方法)客户端连接
 40 | 
 41 |         Returns:
 42 |             UriResponse对象，该对象指定了资源的基本信息。
 43 |         """
 44 |         raise NotImplementedError
 45 | 
 46 |     async def fetch(self):
 47 |         """ (可定义非异步方法)客户端循环获取数据 """
 48 |         raise NotImplementedError
 49 | 
 50 |     async def pause(self):
 51 |         """ 客户端暂停 """
 52 |         self._closed = True
 53 |         raise NotImplementedError
 54 | 
 55 |     async def close(self):
 56 |         raise NotImplementedError
 57 | 
 58 |     async def run(self):
 59 |         raise NotImplementedError
 60 | 
 61 |     async def __aenter__(self):
 62 |         """ 异步with enter.
 63 | 
 64 |         进入客户端，准备开始客户端。
 65 |         该方法不应该执行非异步的长耗时任务。
 66 | 
 67 |         Returns:
 68 |             返回自身对象self
 69 |         """
 70 |         self._closed = False
 71 |         return self
 72 | 
 73 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
 74 |         """ 异步with exit.
 75 | 
 76 |         退出客户端，做必要的链接关闭操作，设置实例变量_closed=True。
 77 |         """
 78 |         coro_or_result = self.close()
 79 |         # 兼容异步关闭链接方法
 80 |         if asyncio.iscoroutine(coro_or_result):
 81 |             await coro_or_result
 82 |         self.session = None
 83 |         self._closed = True
 84 | 
 85 |     def __enter__(self):
 86 |         """ 同步with enter.
 87 | 
 88 |         进入客户端，准备开始客户端。
 89 |         该方法不应该执行非异步的长耗时任务。
 90 | 
 91 |         Returns:
 92 |             返回自身对象self
 93 |         """
 94 |         self._closed = False
 95 |         return self
 96 | 
 97 |     def __exit__(self, exc_type, exc_val, exc_tb):
 98 |         self.close()
 99 |         self.session = None
100 |         self._closed = True
101 | 
102 |     @classmethod
103 |     def dlopen(cls, source, progress, **kwargs):
104 |         raise NotImplementedError
105 | 
106 |     def __repr__(self):
107 |         status = 'running'
108 |         if self._closed:
109 |             status = 'closed'
110 |         name = self.run.__globals__['NAME']
111 |         support = self.run.__globals__['PROTOCOL_SUPPORT']
112 |         is_async = self.run.__globals__['ASYNC_EXECUTE']
113 |         return f'<Client {status} name="{name}" ' \
114 |                f'support={support} ' \
115 |                f'async={is_async}>'
116 | 
117 | 
118 | def noop():
119 |     """ ignore function. """
120 |     return None
121 | 
122 | 
123 | NAME = 'abstract'
124 | PROTOCOL_SUPPORT = ('http', 'https')
125 | ASYNC_EXECUTE = True
126 | 
127 | ClientSession = noop
128 | ClientHandler = AbstractClient
129 | 


--------------------------------------------------------------------------------
/nbdler/client/aiohttp.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import aiohttp
  4 | import asyncio
  5 | from urllib.parse import urlunparse, urlparse
  6 | from nbdler.uri import URIResponse
  7 | from .base_http import BaseHTTPClient, content_range_fullsize, content_type_mimetype
  8 | from traceback import format_exc
  9 | from nbdler.handler import h
 10 | import logging
 11 | import nbdler
 12 | 
 13 | log = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class AIOHTTPClient(BaseHTTPClient):
 17 |     TIMEOUT = 10
 18 | 
 19 |     async def connect(self):
 20 |         session = self.session
 21 |         source_uri = self.source_uri
 22 |         proxies = source_uri.proxies or {}
 23 |         proxy = None
 24 |         if not proxies.get(source_uri.scheme):
 25 |             if source_uri.kwargs.get('trust_env', False):
 26 |                 for scheme, proxy_info in aiohttp.helpers.proxies_from_env().items():
 27 |                     if scheme == source_uri.scheme:
 28 |                         proxy = str(proxy_info.proxy)
 29 |                         proxy_auth = proxy_info.proxy_auth
 30 |                         if proxy_auth is not None:
 31 |                             # 将代理验证添加入代理链接
 32 |                             username = proxy_auth.login
 33 |                             password = proxy_auth.password
 34 |                             proxy_parse = urlparse(str(proxies))
 35 |                             scheme, netloc, path, params, query, fragment = list(proxy_parse)
 36 |                             netloc = f'{username}:{password}@{netloc}'
 37 |                             proxy = urlunparse([scheme, netloc, path, params, query, fragment])
 38 |                         break
 39 | 
 40 |         cookies = source_uri.cookies
 41 |         uri, headers = self._build_uri_headers()
 42 | 
 43 |         timeout = self.kwargs.get('timeout', None) or AIOHTTPClient.TIMEOUT
 44 |         timeout = aiohttp.ClientTimeout(sock_connect=timeout, sock_read=timeout)
 45 | 
 46 |         try:
 47 |             resp = await session.get(
 48 |                 uri,
 49 |                 headers=headers,
 50 |                 cookies=cookies,
 51 |                 proxy=proxy,
 52 |                 timeout=timeout,
 53 |             )
 54 |         except (aiohttp.ClientError, asyncio.TimeoutError) as error:
 55 |             raise nbdler.error.TimeoutError(f"{uri}") from error
 56 |         except BaseException as error:
 57 |             log.debug(f'{error}', format_exc())
 58 |             raise nbdler.error.FatalError() from error
 59 |         else:
 60 |             total_length = content_range_fullsize(resp.headers.get('content-range'))
 61 |             response = URIResponse(str(resp.url), list(resp.headers.items()), resp.status, resp.reason,
 62 |                                    total_length, content_type_mimetype(resp.headers.get('content-type')),
 63 |                                    self.progress.range, resp.status == 206)
 64 | 
 65 |         if self.resume_capability is None:
 66 |             if resp.status not in (206, 200):
 67 |                 raise nbdler.error.FatalError(f"[{resp.status} {resp.reason}] '{resp.url}'")
 68 |             self.resume_capability = resp.status == 206
 69 | 
 70 |         elif self.resume_capability is True:
 71 |             if not resp.status == 206:
 72 |                 raise nbdler.error.FatalError(f"[{resp.status} {resp.reason}] '{resp.url}'")
 73 | 
 74 |         self.session = session
 75 |         self.resp = resp
 76 |         return response
 77 | 
 78 |     async def fetch(self):
 79 |         session, resp = self.session, self.resp
 80 |         pg = self.progress
 81 | 
 82 |         speed_adjuster = h.speed_adjuster
 83 |         slicer = h.slicer
 84 |         uri_mgr = h.uri_mgr
 85 |         file_data = h.file_data
 86 | 
 87 |         pg.start()
 88 | 
 89 |         uri_mgr.success(resp)
 90 | 
 91 |         receive_data = resp.content.read
 92 |         data = b''
 93 |         while True:
 94 |             if self._closed:
 95 |                 break
 96 | 
 97 |             await speed_adjuster.acquire()
 98 |             await slicer.response()
 99 | 
100 |             previous_len = len(data)
101 |             remain_len = pg.total_length - pg.walk_length
102 |             try:
103 |                 if remain_len >= 8192:
104 |                     data += await receive_data(8192)
105 |                 elif remain_len > 0:
106 |                     data += await receive_data(remain_len)
107 |                 else:
108 |                     break
109 |             except asyncio.TimeoutError as err:
110 |                 uri_mgr.timeout(err)
111 |                 break
112 |             except BaseException as err:
113 |                 uri_mgr.fatal(err)
114 |                 break
115 | 
116 |             walk_len = len(data) - previous_len
117 |             if not walk_len:
118 |                 if resp.content_length is None:
119 |                     pg.set_walk_finish()
120 |                 break
121 | 
122 |             pg.walk(walk_len)
123 | 
124 |             if pg.walk_length >= pg.total_length:
125 |                 break
126 |             elif len(data) >= 65536:  # 64 KB
127 |                 await file_data.store(data)
128 |                 data = b''
129 |         if data:
130 |             await file_data.store(data)
131 | 
132 |         pg.stop()
133 | 
134 |     async def run(self):
135 |         if self.resp:
136 |             self.close()
137 | 
138 |         await h.slicer.response()
139 |         try:
140 |             resp = await self.connect()
141 |         except nbdler.error.UriError as err:
142 |             h.uri_mgr.fatal(err)
143 |             raise
144 |         else:
145 |             h.uri_mgr.success(resp)
146 | 
147 |             # self.validate_token(resp)
148 |             if not self._closed:
149 |                 await self.fetch()
150 | 
151 |     def close(self):
152 |         session = self.session
153 |         resp = self.resp
154 |         self.session = None
155 |         self.resp = None
156 |         if resp:
157 |             resp.release()
158 |             resp.close()
159 | 
160 |     @classmethod
161 |     async def dlopen(cls, source, progress, **kwargs):
162 |         async with ClientSession() as session:
163 |             async with cls(session, source, progress, None, **kwargs) as cli:
164 |                 resp = await cli.connect()
165 |                 size = resp.length
166 |                 progress._range = (0, size)
167 | 
168 |         return resp
169 | 
170 | 
171 | NAME = 'aiohttp'
172 | PROTOCOL_SUPPORT = ('http', 'https')
173 | ASYNC_EXECUTE = True
174 | 
175 | ClientHandler = AIOHTTPClient
176 | 
177 | 
178 | class ClientSession(aiohttp.ClientSession):
179 |     async def close(self) -> None:
180 |         await super().close()
181 | 
182 |         # doc: https://docs.aiohttp.org/en/latest/client_advanced.html#graceful-shutdown
183 |         # 会话关闭强制等待避免异常
184 |         await asyncio.sleep(0.25)
185 | 
186 | 


--------------------------------------------------------------------------------
/nbdler/client/base_http.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from wsgiref.headers import Headers
 4 | from urllib.parse import urlunparse
 5 | from .abstract import AbstractClient
 6 | from ..utils import update_range_field
 7 | import logging
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | class BaseHTTPClient(AbstractClient):
13 |     NAME = 'base_http'
14 |     PROTOCOL_SUPPORT = ('http', 'https')
15 |     ASYNC_EXECUTE = None
16 |     TIMEOUT = 10
17 | 
18 |     def _build_uri_headers(self):
19 |         source_uri = self.source_uri
20 |         pg = self.progress
21 |         uri = source_uri.uri
22 |         headers = Headers(source_uri.headers.items())
23 | 
24 |         if self.resume_capability is not False:
25 |             range_field = source_uri.range_field
26 |             if range_field is None:
27 |                 range_field = {
28 |                     'Range': 'bytes={begin}-{end_with}'
29 |                 }
30 | 
31 |             scheme, netloc, path, params, query, fragment = list(source_uri.urlparse)
32 |             req_range = (pg.begin + pg.walk_length, pg.end)
33 |             query = (query + ''.join(
34 |                 [f'{k}={update_range_field(v, req_range)}'
35 |                  for k, v in range_field.items() if k.startswith('&')]
36 |             )).lstrip('&')
37 | 
38 |             for k, v in range_field.items():
39 |                 if not k.startswith('&'):
40 |                     headers.add_header(k, update_range_field(v, req_range))
41 | 
42 |             # 由于一些浏览器地址栏会直接把空格显示出来而不进行编码，所以这里单独对空格编码。
43 |             uri = urlunparse((scheme, netloc, path, params, query, fragment)).replace(' ', '%20')
44 | 
45 |         return uri, headers
46 | 
47 |     def close(self):
48 |         session = self.session
49 |         resp = self.resp
50 |         self.session = None
51 |         self.resp = None
52 |         if resp:
53 |             resp.close()
54 | 
55 |     async def pause(self):
56 |         self._closed = True
57 | 
58 |     def validate_token(self, current_resp):
59 |         resp = self.resp
60 |         if resp is None:
61 |             raise ValueError('cannot validate on a unconnected client.')
62 | 
63 |         source_resp = self.source_uri.getresponse()
64 | 
65 |         # TODO: 在多下载源的情况下对下载源之间经过资源数据采样校验，通过后作为响应基准
66 |         if source_resp is None:
67 |             raise ValueError('下载源没有经过校验的资源响应基准。')
68 | 
69 |         validate_name = ['length', 'etag', 'content_md5', 'content_type', 'last_modified']
70 | 
71 |         if not all([getattr(current_resp, name) == getattr(source_resp, name)
72 |                     for name in validate_name]):
73 |             log.warning([f'{name}: ({getattr(current_resp, name)}) ?= ({getattr(source_resp, name)})'
74 |                          for name in validate_name])
75 |             raise ValueError('connection resource token not match.')
76 |         return True
77 | 
78 | 
79 | def content_range_fullsize(content_range):
80 |     """ 从HTTP响应头中的Content-Range中获取文件总长。"""
81 |     if content_range is None:
82 |         return None
83 |     return int(content_range.rsplit('/', 1)[-1])
84 | 
85 | 
86 | def content_type_mimetype(content_type_header):
87 |     """ 从HTTP响应头中的Content-Type中获取文件mimetype类型。"""
88 |     if content_type_header is None:
89 |         return None
90 |     return content_type_header.split(';', 1)[0] or None
91 | 


--------------------------------------------------------------------------------
/nbdler/client/requests.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import requests
  4 | from nbdler.uri import URIResponse
  5 | from traceback import format_exc
  6 | from .base_http import BaseHTTPClient, content_range_fullsize, content_type_mimetype
  7 | from nbdler.handler import h
  8 | import logging
  9 | import nbdler
 10 | from requests.utils import get_environ_proxies
 11 | 
 12 | log = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class HTTPClient(BaseHTTPClient):
 16 |     TIMEOUT = 10
 17 | 
 18 |     def connect(self):
 19 |         session = self.session
 20 |         source_uri = self.source_uri
 21 |         proxies = source_uri.proxies or {}
 22 |         if not proxies.get(source_uri.scheme):
 23 |             if source_uri.kwargs.get('trust_env', False):
 24 |                 # Set environment's proxies.
 25 |                 no_proxy = proxies.get('no_proxy') if proxies is not None else None
 26 |                 env_proxies = get_environ_proxies(source_uri.uri, no_proxy=no_proxy)
 27 |                 for (k, v) in env_proxies.items():
 28 |                     proxies.setdefault(k, v)
 29 | 
 30 |         cookies = source_uri.cookies
 31 |         verify = source_uri.kwargs.get('verify', True)
 32 |         uri, headers = self._build_uri_headers()
 33 |         timeout = self.kwargs.get('timeout', None) or HTTPClient.TIMEOUT
 34 |         try:
 35 |             resp = requests.get(
 36 |                 source_uri.uri,
 37 |                 headers=headers,
 38 |                 proxies=proxies,
 39 |                 cookies=cookies,
 40 |                 timeout=timeout,
 41 |                 stream=True,
 42 |                 verify=verify
 43 |             )
 44 |         except requests.exceptions.Timeout as error:
 45 |             raise nbdler.error.TimeoutError(f"{uri}") from error
 46 |         except BaseException as error:
 47 |             log.debug(f'{error}', format_exc())
 48 |             raise nbdler.error.FatalError() from error
 49 |         else:
 50 |             total_length = content_range_fullsize(resp.headers.get('content-range'))
 51 |             response = URIResponse(str(resp.url), list(resp.headers.items()), resp.status_code, resp.reason,
 52 |                                    total_length, content_type_mimetype(resp.headers.get('content-type')),
 53 |                                    self.progress.range, resp.status_code == 206)
 54 | 
 55 |         if self.resume_capability is None:
 56 |             if resp.status_code not in (206, 200):
 57 |                 raise nbdler.error.FatalError(f"[{resp.status_code} {resp.reason}] '{resp.url}'")
 58 |             self.resume_capability = resp.status_code == 206
 59 | 
 60 |         elif self.resume_capability is True:
 61 |             if not resp.status_code == 206:
 62 |                 raise nbdler.error.FatalError(f"[{resp.status_code} {resp.reason}] '{resp.url}'")
 63 | 
 64 |         self.session = session
 65 |         self.resp = resp
 66 |         return response
 67 | 
 68 |     def fetch(self):
 69 |         session, resp = self.session, self.resp
 70 |         pg = self.progress
 71 | 
 72 |         speed_adjuster = h.speed_adjuster
 73 |         slicer = h.slicer
 74 |         uri_mgr = h.uri_mgr
 75 |         file_data = h.file_data
 76 |         receive_data = resp.raw.read
 77 | 
 78 |         pg.start()
 79 | 
 80 |         uri_mgr.success(resp)
 81 | 
 82 |         data = b''
 83 |         while True:
 84 |             if self._closed:
 85 |                 break
 86 | 
 87 |             speed_adjuster.acquire_threadsafe()
 88 |             slicer.response_threadsafe()
 89 | 
 90 |             previous_len = len(data)
 91 |             remain_len = pg.total_length - pg.walk_length
 92 |             try:
 93 |                 if remain_len >= 8192:
 94 |                     data += receive_data(8192)
 95 |                 elif remain_len > 0:
 96 |                     data += receive_data(remain_len)
 97 |                 else:
 98 |                     break
 99 |             except requests.exceptions.Timeout as err:
100 |                 uri_mgr.timeout(err)
101 |                 break
102 |             except BaseException as err:
103 |                 uri_mgr.fatal(err)
104 |                 break
105 | 
106 |             walk_len = len(data) - previous_len
107 |             if not walk_len:
108 | 
109 |                 if resp.headers.get('content-length') is None:
110 |                     pg.set_walk_finish()
111 |                 break
112 | 
113 |             pg.walk(walk_len)
114 | 
115 |             if pg.walk_length >= pg.total_length:
116 |                 break
117 |             elif len(data) >= 65536:  # 64 KB
118 |                 file_data.store_threadsafe(data)
119 |                 data = b''
120 |         if data:
121 |             file_data.store_threadsafe(data)
122 | 
123 |         pg.stop()
124 | 
125 |     def run(self):
126 |         h.slicer.response_threadsafe()
127 |         try:
128 |             resp = self.connect()
129 |         except nbdler.error.UriError as err:
130 |             h.uri_mgr.fatal(err)
131 |             raise
132 |         else:
133 |             h.uri_mgr.success(resp)
134 | 
135 |             self.validate_token(resp)
136 |             if not self._closed:
137 |                 self.fetch()
138 | 
139 |     @classmethod
140 |     async def dlopen(cls, source, progress, **kwargs):
141 |         with cls(None, source, progress, None, **kwargs) as cli:
142 |             resp = cli.connect()
143 |             return resp
144 | 
145 | 
146 | def session_without_trust_env():
147 |     session = requests.Session()
148 |     # 默认创建不使用环境中的代理的会话，如要使用设置下载源的trust_env参数。
149 |     session.trust_env = False
150 |     return session
151 | 
152 | 
153 | NAME = 'requests'
154 | PROTOCOL_SUPPORT = ('http', 'https')
155 | ASYNC_EXECUTE = False
156 | 
157 | ClientHandler = HTTPClient
158 | ClientSession = session_without_trust_env
159 | 
160 | 


--------------------------------------------------------------------------------
/nbdler/download.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from concurrent.futures.thread import ThreadPoolExecutor
  3 | from nbdler.handler import (
  4 |     SpeedAdjuster,
  5 |     AIOReaderWriter,
  6 |     BlockSlicer,
  7 |     FileTempData,
  8 |     ClientWorker,
  9 |     URIStatusManager,
 10 |     GatherException,
 11 |     h, Handlers)
 12 | from .client import get_policy, ClientPolicy
 13 | from .version import VERSION
 14 | from .utils import forever_loop_in_executor
 15 | from traceback import format_exc
 16 | import weakref
 17 | import warnings
 18 | import asyncio
 19 | import os
 20 | 
 21 | __all__ = (
 22 |     'Downloader',
 23 | )
 24 | 
 25 | 
 26 | class DownloadConfigure:
 27 |     ADJUSTABLE = frozenset(
 28 |         {'max_concurrent', 'max_speed', 'buffer_size', 'timeout', 'interval', 'client_policy'})
 29 | 
 30 |     def __init__(self, resume_capability, max_concurrent, chunk_size, buffer_size, timeout=10,
 31 |                  max_speed=None, downloading_ext='.downloading', interval=0.5, client_policy=None, **kwargs):
 32 | 
 33 |         self.version = VERSION
 34 |         self.resume_capability = resume_capability
 35 |         self.max_concurrent = max_concurrent
 36 |         self.chunk_size = chunk_size
 37 |         self.buffer_size = buffer_size
 38 |         self.timeout = timeout
 39 |         self.interval = interval
 40 |         self.max_speed = max_speed
 41 |         self.downloading_ext = downloading_ext
 42 |         self.client_policy = client_policy
 43 |         self.kwargs = kwargs
 44 | 
 45 |     def set(self, **kwargs):
 46 |         """ 设置配置。
 47 |         Args:
 48 |             **kwargs:
 49 |                 max_concurrent: 最大并发数
 50 |                 max_speed: 最大速度限制
 51 |                 buffer_size: 最大文件缓冲大小
 52 |                 timeout: 客户端连接接收超时时间
 53 |                 interval: 速度调节间隙
 54 |                 client_policy: 客户端处理策略
 55 |         """
 56 |         attrs = set(kwargs).intersection(DownloadConfigure.ADJUSTABLE)
 57 |         for attr in attrs:
 58 |             self.__setattr__(attr, kwargs[attr])
 59 | 
 60 |     def dumps(self):
 61 |         opts = dict(self.__dict__)
 62 |         client_policy = self.client_policy
 63 |         opts['client_policy'] = dict(client_policy)
 64 |         opts.update(opts.pop('kwargs'))
 65 |         return opts
 66 | 
 67 |     @classmethod
 68 |     def loads(cls, dumpy):
 69 |         config = cls(**dumpy)
 70 |         if not isinstance(config.client_policy, ClientPolicy):
 71 |             config.client_policy = get_policy(**config.client_policy)
 72 |         return config
 73 | 
 74 |     def __repr__(self):
 75 |         return (f'<DownloadConfigure version={self.version} max_concurrent={self.max_concurrent} '
 76 |                 f'resume_capability={self.resume_capability}>')
 77 | 
 78 | 
 79 | class Downloader:
 80 |     def __init__(self, file, uris, block_grp, *, handlers=None, **kwargs):
 81 | 
 82 |         self.file = file
 83 |         self.uris = uris
 84 |         self.block_grp = block_grp
 85 |         self.config = DownloadConfigure.loads(kwargs)
 86 | 
 87 |         self._executor = None
 88 | 
 89 |         self._loop = None
 90 |         self._future = None
 91 |         self._closed = False
 92 |         self._handlers = Handlers()
 93 | 
 94 |         if handlers is None:
 95 |             handlers = []
 96 | 
 97 |         buildin_handlers = [
 98 |             ClientWorker,
 99 |             SpeedAdjuster,
100 |             FileTempData,
101 |             AIOReaderWriter,
102 |             BlockSlicer,
103 |             GatherException,
104 |             URIStatusManager,
105 |         ]
106 |         handlers.extend(buildin_handlers)
107 |         for handler in handlers:
108 |             if handler.name in self._handlers:
109 |                 continue
110 |             if isinstance(handler, type):
111 |                 handler = handler()
112 | 
113 |             handler.add_parent(weakref.proxy(self))
114 |             self._handlers[handler.name] = handler
115 | 
116 |     def exceptions(self, exception_type=None, *, just_new_exception=True):
117 |         """ 线程安全获取异常
118 | 
119 |         以生成器的形式获取内部发生的异常，当下载任务暂停或者完成后将中断生成器的迭代。
120 |         使用方式：
121 |             for exception in dl.exceptions():
122 |                 do_some_works(exception)
123 | 
124 |         Args:
125 |             exception_type: 指定异常类型，可选ClientError、HandlerError。默认None则获取所有异常。
126 |             just_new_exception: 是否忽略当前时间前的旧异常，仅返回之后的新异常。
127 | 
128 |         Yields:
129 |             内部出现的client或handler异常对象。
130 |         """
131 |         yield from self._handlers.exception.acquire_threadsafe(
132 |             exception_type, just_new_exception=just_new_exception)
133 | 
134 |     def aexceptions(self, exception_type=None, *, just_new_exception=True):
135 |         """ 异步返回异常错误。 具体参见exceptions()方法。
136 |         使用方式：
137 |             async for exception in dl.aexceptions():
138 |                 do_some_works(exception)
139 |         """
140 |         return self._handlers.exception.acquire(
141 |             exception_type, just_new_exception=just_new_exception)
142 | 
143 |     async def astart(self):
144 |         """ 在当前事件循环中运行下载器。"""
145 |         if self._closed:
146 |             raise RuntimeError('Downloader is already closed.')
147 |         loop = asyncio.get_running_loop()
148 |         self._loop = loop
149 |         if self.block_grp.is_done_finished():
150 |             raise RuntimeError('download is completed.')
151 | 
152 |         self._future = loop.create_future()
153 | 
154 |         async def handler_worker(hd):
155 |             try:
156 |                 return await hd.start()
157 |             except BaseException as err:
158 |                 h.exception.handler_error(err)
159 |                 self.pause(0)
160 | 
161 |         with h.enter(self._handlers, loop):
162 |             self.block_grp.activate()
163 |             # prepare()
164 |             await self._handlers.prepare()
165 |             # start()
166 |             result = await asyncio.gather(
167 |                 *[handler_worker(handler) for handler in h.iter_all()]
168 |             )
169 |             # join()
170 |             await self._handlers.join()
171 |             self.block_grp.deactivate()
172 | 
173 |         self._future.set_result(result)
174 | 
175 |     def start(self, *, loop=None):
176 |         """ 在指定的循环中运行下载器。
177 | 
178 |         若loop=None不指定事件循环，那么将创建新的线程作为下载器的事件循环。
179 | 
180 |         Args:
181 |             loop: 指定事件循环运行下载器
182 | 
183 |         Returns:
184 |             返回下载器运行的concurrent.future.Future对象
185 |         """
186 | 
187 |         if self._closed:
188 |             raise RuntimeError('Downloader is already closed.')
189 | 
190 |         if self.block_grp.is_finished():
191 |             raise RuntimeError('download is already finished.')
192 | 
193 |         if self._loop is not None:
194 |             loop = self._loop
195 | 
196 |         if loop is None:
197 |             def cb(f):
198 |                 nonlocal executor
199 |                 executor.shutdown(False)
200 | 
201 |             executor = ThreadPoolExecutor(
202 |                 max_workers=1, thread_name_prefix=f'Downloader {self.file.name} {self.file.size}')
203 |             exec_fut = forever_loop_in_executor(executor)
204 |             exec_fut.add_done_callback(cb)
205 |             self._executor = executor
206 |             loop = exec_fut.get_loop()
207 | 
208 |         fut = asyncio.run_coroutine_threadsafe(self.astart(), loop=loop)
209 |         self._loop = loop
210 |         return fut
211 | 
212 |     async def apause(self):
213 |         """ 异步暂停等待。"""
214 |         if self._closed:
215 |             raise RuntimeError('Downloader is already closed.')
216 |         result = await self._await_loopsafe(self._handlers.pause())
217 |         await self.ajoin()
218 |         return result
219 | 
220 |     async def aclose(self):
221 |         """ 异步关闭下载器。"""
222 |         if self._closed:
223 |             raise RuntimeError('Downloader is already closed.')
224 | 
225 |         if not self._future.done():
226 |             raise RuntimeError('cannot close a running Downloader.')
227 |         result = await self._await_loopsafe(self._handlers.close())
228 |         await self.ajoin()
229 |         self._closed = True
230 | 
231 |         if self._executor:
232 |             self._loop.call_soon_threadsafe(self._loop.stop)
233 | 
234 |         # 若文件已完毕，去除.downloading后缀
235 |         if self.block_grp.is_done_finished():
236 |             file = self.file
237 |             filepath = f'{file.pathname}{self.config.downloading_ext}'
238 |             start_filepath = file.pathname
239 |             target_filepath = start_filepath
240 |             postfix = 0
241 |             while True:
242 |                 try:
243 |                     os.rename(filepath, target_filepath)
244 |                 except FileExistsError:
245 |                     postfix += 1
246 |                     target_filepath = os.path.join(file.path, file.number_name(postfix))
247 |                 else:
248 |                     if postfix != 0:
249 |                         file.name = file.number_name(postfix)
250 |                     break
251 | 
252 |             # 删除下载配置文件
253 |             os.unlink(f'{start_filepath}{self.config.downloading_ext}.cfg')
254 |         return result
255 | 
256 |     async def ajoin(self):
257 |         """ 异步等待下载器结束。"""
258 |         if self._closed:
259 |             raise RuntimeError('Downloader is already closed.')
260 |         return await self._await_loopsafe(self._future)
261 | 
262 |     async def _await_loopsafe(self, *coros_or_futures):
263 |         """ 事件循环安全的异步等待。
264 | 
265 |         Args:
266 |             *coros_or_futures: coroutine或future对象列表。
267 | 
268 |         Returns:
269 |             返回coros_or_futures的返回结果列表。
270 |         """
271 |         current_loop = asyncio.get_running_loop()
272 |         loop = self._loop
273 |         if loop is None:
274 |             loop = current_loop
275 | 
276 |         async def _execute_loop():
277 |             with h.enter(self._handlers):
278 |                 r = await asyncio.gather(*coros_or_futures)
279 |                 return r
280 |         fut = asyncio.run_coroutine_threadsafe(_execute_loop(), loop)
281 |         result = await asyncio.wrap_future(fut)
282 | 
283 |         return result
284 | 
285 |     def _call_threadsafe(self, coroutine, timeout=None):
286 |         """ 下载器的异步操作线程安全化。
287 |         Args:
288 |             coroutine: 异步操作协程
289 |             timeout: 超时等待事件
290 | 
291 |         Returns:
292 |             当timeout=0时，返回concurrent.future.Future对象，
293 |             否则，协程coroutine的执行结果或抛出超时异常。
294 |         """
295 |         loop = self._loop
296 |         assert loop
297 |         future = asyncio.run_coroutine_threadsafe(coroutine, loop)
298 |         if timeout == 0:
299 |             return future
300 |         return future.result(timeout)
301 | 
302 |     def pause(self, timeout=None):
303 |         """ 线程安全暂停下载器。具体参见apause方法"""
304 |         if self._closed:
305 |             raise RuntimeError('Downloader is already closed.')
306 |         return self._call_threadsafe(self.apause(), timeout=timeout)
307 | 
308 |     def close(self, timeout=None):
309 |         """ 线程安全关闭下载器。具体参见aclose方法"""
310 |         if self._closed:
311 |             raise RuntimeError('Downloader is already closed.')
312 |         return self._call_threadsafe(self.aclose(), timeout=timeout)
313 | 
314 |     def join(self, timeout=None):
315 |         """ 线程安全等待下载器。具体参见ajoin方法"""
316 |         if self._closed:
317 |             raise RuntimeError('Downloader is already closed.')
318 |         return self._call_threadsafe(self.ajoin(), timeout=timeout)
319 | 
320 |     def dumps(self):
321 |         dumpy = {
322 |             'config': self.config.dumps(),
323 |             'file': self.file.dumps(),
324 |             'uris': self.uris.dumps(),
325 |             'block_grp': self.block_grp.dumps(),
326 |         }
327 |         return dumpy
328 | 
329 |     @classmethod
330 |     def loads(cls, dumpy, handlers=None):
331 |         from nbdler.uri import URIs
332 |         from nbdler.file import File
333 |         from nbdler.block import BlockGroup
334 | 
335 |         uris = URIs.loads(dumpy['uris'])
336 |         file = File(**dumpy['file'])
337 |         block_grp = BlockGroup.loads(dumpy['block_grp'])
338 |         return cls(file, uris, block_grp, handlers=handlers, **dumpy['config'])
339 | 
340 |     transfer_rate = property(lambda self: self.block_grp.transfer_rate)
341 | 
342 |     average_speed = property(lambda self: self.block_grp.average_speed)
343 | 
344 |     walk_length = property(lambda self: self.block_grp.walk_length)
345 | 
346 |     done_length = property(lambda self: self.block_grp.done_length)
347 | 
348 |     remaining_length = property(lambda self: self.block_grp.remaining_length)
349 | 
350 |     remaining_time = property(lambda self: self.block_grp.remaining_time)
351 | 
352 |     percent_complete = property(lambda self: self.block_grp.percent_complete)
353 | 
354 |     is_walk_finished = property(lambda self: self.block_grp.is_walk_finished)
355 | 
356 |     is_done_finished = property(lambda self: self.block_grp.is_done_finished)
357 | 
358 |     def is_finished(self):
359 |         """ 返回文件是否下载完毕。"""
360 |         return self.block_grp.is_finished() and (not self._future or self._future.done())
361 | 
362 |     def set_config(self, **kwargs):
363 |         """ 配置下载器。参见DownloadConfigure.set()方法。"""
364 |         self.config.set(**kwargs)
365 | 
366 |     def __repr__(self):
367 |         running = False
368 |         if self._future is not None and not self._future.done():
369 |             running = True
370 |         return f'<Downloader filename={self.file.name} running={running} closed={self._closed}>'
371 | 
372 |     async def __aenter__(self):
373 |         return self
374 | 
375 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
376 |         return await self.aclose()
377 | 
378 |     def __del__(self, _warnings=warnings):
379 |         if not self._closed:
380 |             self.close()
381 | 
382 | 
383 | 


--------------------------------------------------------------------------------
/nbdler/error.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class Error(Exception):
 4 |     def __init__(self, *args, **kwargs):
 5 |         self.args = args
 6 |         self.kwargs = kwargs
 7 | 
 8 |     def __repr__(self):
 9 |         return f'<{self.__class__.__name__} args={self.args} kwargs={self.kwargs}>'
10 | 
11 | 
12 | class GatherableError(Error):
13 |     def __init__(self, exception, exc_info, *args, **kwargs):
14 |         super().__init__(*args, **kwargs)
15 |         self.exception = exception
16 |         self.exc_info = exc_info
17 | 
18 |     def __repr__(self):
19 |         return f'<{self.__class__.__name__} exc="{self.exception}">'
20 | 
21 | 
22 | class ClientError(GatherableError):
23 |     pass
24 | 
25 | 
26 | class HandlerError(GatherableError):
27 |     pass
28 | 
29 | 
30 | class UriError(Error):
31 |     pass
32 | 
33 | 
34 | class TimeoutError(UriError):
35 |     pass
36 | 
37 | 
38 | class FatalError(UriError):
39 |     pass
40 | 
41 | 
42 | class MaxRetriesExceeded(ClientError):
43 |     pass
44 | 


--------------------------------------------------------------------------------
/nbdler/file.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | class File:
 5 |     __slots__ = 'name', 'path', 'size'
 6 | 
 7 |     def __init__(self, path, name, size):
 8 |         """
 9 |         :param
10 |             path    : 文件路径（不包括文件名）
11 |             name    : 文件名称
12 |             size    : 文件大小
13 |         """
14 |         self.name = name
15 |         self.path = path
16 |         self.size = size
17 | 
18 |     @property
19 |     def extension(self):
20 |         return os.path.splitext(self.name)[-1]
21 | 
22 |     @property
23 |     def pathname(self):
24 |         return os.path.join(self.path, self.name)
25 | 
26 |     def number_name(self, number):
27 |         just_name, ext = os.path.splitext(self.name)
28 |         return f'{just_name}({number}){ext}'
29 | 
30 |     def __repr__(self):
31 |         return f'<File {self.pathname}>'
32 | 
33 |     def dumps(self):
34 |         return {
35 |             'path': self.path,
36 |             'name': self.name,
37 |             'size': self.size,
38 |         }
39 | 


--------------------------------------------------------------------------------
/nbdler/handler.py:
--------------------------------------------------------------------------------
   1 | import asyncio
   2 | from collections import defaultdict
   3 | from contextvars import ContextVar
   4 | from contextlib import asynccontextmanager, contextmanager
   5 | from concurrent.futures.thread import ThreadPoolExecutor
   6 | from nbdler.error import HandlerError, ClientError
   7 | from functools import partial
   8 | from copy import copy
   9 | from operator import attrgetter
  10 | import threading
  11 | from nbdler.utils import UsageInfo
  12 | from traceback import format_exc
  13 | import logging
  14 | import weakref
  15 | import json
  16 | 
  17 | log = logging.getLogger(__name__)
  18 | 
  19 | block_context = ContextVar('block context')
  20 | 
  21 | 
  22 | def _lookup_block():
  23 |     """ 查找上下文的下载块。"""
  24 |     return block_context.get()
  25 | 
  26 | 
  27 | def await_coroutine_threadsafe(coro, timeout=None):
  28 |     """ 线程安全等待协程结束。
  29 |     Args:
  30 |         coro: 协程
  31 |         timeout: 等待超时事件
  32 | 
  33 |     Returns:
  34 |         返回协程的执行结果，或抛出异常。
  35 |     """
  36 |     fut = asyncio.run_coroutine_threadsafe(coro, h.loop)
  37 |     return fut.result(timeout)
  38 | 
  39 | 
  40 | class Handlers(dict):
  41 |     def __init__(self, *args, **kwargs):
  42 |         super().__init__(*args, **kwargs)
  43 |         self._ready = None
  44 | 
  45 |     def __getattr__(self, item):
  46 |         return self[item]
  47 | 
  48 |     async def prepare(self):
  49 |         """ Handler启动预处理，通常预启动，做初始化工作。启动标志在该方法设置。"""
  50 |         self._ready = asyncio.Event(loop=asyncio.get_running_loop())
  51 |         result = await asyncio.gather(*[handler.prepare() for handler in self.values()])
  52 |         self._ready.set()
  53 |         return result
  54 | 
  55 |     async def start(self):
  56 |         """ 此方法用于启动Handler的异步工作Handler.run()方法。"""
  57 |         result = await asyncio.gather(*[handler.start() for handler in self.values()])
  58 |         self._ready = None
  59 |         return result
  60 | 
  61 |     async def close(self):
  62 |         await self._wait_for_ready()
  63 |         return await asyncio.gather(*[handler.close() for handler in self.values()])
  64 | 
  65 |     async def _wait_for_ready(self):
  66 |         """ 等待Handler准备就绪。"""
  67 |         ready = self._ready
  68 |         if ready is not None:
  69 |             await ready.wait()
  70 | 
  71 |     async def pause(self):
  72 |         await self._wait_for_ready()
  73 |         return await asyncio.gather(*[handler.pause() for handler in self.values()])
  74 | 
  75 |     async def join(self):
  76 |         await self._wait_for_ready()
  77 |         return await asyncio.gather(*[handler.join() for handler in self.values()])
  78 | 
  79 | 
  80 | class _HandlerReference(threading.local):
  81 |     # handlers 异步上下文。
  82 |     __context__ = ContextVar('handlers dict')
  83 | 
  84 |     def __init__(self):
  85 |         # handlers 线程上下文事件循环
  86 |         self._loop = None
  87 | 
  88 |     @property
  89 |     def loop(self):
  90 |         return self._loop
  91 | 
  92 |     @property
  93 |     def owner(self):
  94 |         return self.__context__.get()
  95 | 
  96 |     @contextmanager
  97 |     def enter(self, handlers, loop=None):
  98 |         assert self._loop or loop
  99 |         if loop:
 100 |             if not isinstance(loop, weakref.ProxyType):
 101 |                 loop = weakref.proxy(loop)
 102 |             self._loop = loop
 103 | 
 104 |         if not isinstance(handlers, weakref.ProxyType):
 105 |             handlers = weakref.proxy(handlers)
 106 | 
 107 |         token = self.__context__.set(handlers)
 108 | 
 109 |         yield self
 110 |         self.__context__.reset(token)
 111 | 
 112 |     def __getattr__(self, item):
 113 |         return self.__context__.get()[item]
 114 | 
 115 |     def __iter__(self):
 116 |         return iter(self.__context__.get().values())
 117 | 
 118 |     iter_all = __iter__
 119 | 
 120 | 
 121 | h = _HandlerReference()
 122 | 
 123 | 
 124 | class Handler:
 125 |     name = None
 126 | 
 127 |     parent = None
 128 |     _future = None
 129 | 
 130 |     def add_parent(self, parent):
 131 |         self.parent = parent
 132 | 
 133 |     async def prepare(self, *args, **kwargs):
 134 |         pass
 135 | 
 136 |     async def start(self):
 137 |         assert not self._future or self._future.done()
 138 |         loop = asyncio.get_running_loop()
 139 |         future = loop.create_future()
 140 |         self._future = future
 141 |         try:
 142 |             result = await self.run()
 143 |         finally:
 144 |             future.set_result(None)
 145 | 
 146 |     async def run(self, *args, **kwargs):
 147 |         raise NotImplementedError
 148 | 
 149 |     async def pause(self, *args, **kwargs):
 150 |         raise NotImplementedError
 151 | 
 152 |     async def close(self, *args, **kwargs):
 153 |         raise NotImplementedError
 154 | 
 155 |     def __repr__(self):
 156 |         return f'<Handler name={self.name}>'
 157 | 
 158 |     async def join(self):
 159 |         return await self._future
 160 | 
 161 |     def info_getter(self):
 162 |         return None
 163 | 
 164 | 
 165 | # TODO: 在多下载源的情况下对下载源之间经过资源数据采样校验，通过后作为响应基准
 166 | class SampleValidate(Handler):
 167 |     name = 'uri_validate'
 168 | 
 169 | 
 170 | class GatherException(Handler):
 171 |     """ 下载异常状态集合。
 172 | 
 173 |     负责工作：
 174 |         1. 收集下载过程中发生的异常
 175 |         2. 提供对异常的外部推送
 176 |     """
 177 |     name = 'exception'
 178 | 
 179 |     def __init__(self):
 180 |         # 实现异常获取的线程安全
 181 |         self._exceptions = defaultdict(list)
 182 |         self._cond = threading.Condition(threading.Lock())
 183 | 
 184 |         # async 异步异常获取回调
 185 |         self._waiter_callbacks = set()
 186 | 
 187 |         self._stopped = False
 188 | 
 189 |     def handler_error(self, exception):
 190 |         """ 推送handler异常
 191 | 
 192 |         Args:
 193 |             exception:  发生的异常对象
 194 |         """
 195 |         with self._cond:
 196 |             self._exceptions[HandlerError].append(
 197 |                 HandlerError(exception, format_exc()))
 198 |             # 释放线程锁
 199 |             self._cond.notify_all()
 200 | 
 201 |             # 释放异步锁
 202 |             for waiter in self._waiter_callbacks:
 203 |                 waiter()
 204 | 
 205 |     def client_error(self, exception):
 206 |         """ 推送client客户端异常
 207 | 
 208 |         Args:
 209 |             exception:  发生的异常对象
 210 |         """
 211 |         with self._cond:
 212 |             self._exceptions[ClientError].append(
 213 |                 ClientError(exception, format_exc()))
 214 |             # 释放线程锁
 215 |             self._cond.notify_all()
 216 |             # 释放异步锁
 217 |             for waiter in self._waiter_callbacks:
 218 |                 waiter()
 219 | 
 220 |     def _fetch_exceptions(self, exception_type=None):
 221 |         if exception_type is None:
 222 |             exceptions = []
 223 |             for v in self._exceptions.values():
 224 |                 exceptions.extend(v)
 225 |         else:
 226 |             exceptions = list(self._exceptions[exception_type])
 227 | 
 228 |         return exceptions
 229 | 
 230 |     def acquire_threadsafe(self, exception_type=None, *, just_new_exception=True):
 231 |         """ 线程安全获取异常
 232 | 
 233 |         以生成器的形式获取内部发生的异常，当下载任务暂停或者完成后将中断生成器的迭代。
 234 | 
 235 |         Args:
 236 |             exception_type: 指定异常类型，可选ClientError、HandlerError。默认None则获取所有异常。
 237 |             just_new_exception: 是否忽略当前时间前的旧异常，仅返回之后的新异常。
 238 | 
 239 |         Yields:
 240 |             内部出现的client或handler异常对象。
 241 |         """
 242 |         old_exc_list = []
 243 |         if just_new_exception:
 244 |             old_exc_list = self._fetch_exceptions()
 245 |         while True:
 246 |             with self._cond:
 247 |                 if self._stopped:
 248 |                     break
 249 | 
 250 |                 # 在上一次异常推送过程中是否有新的异常被忽略
 251 |                 # 如果有忽略的异常就不需要等待，先处理被忽略的异常
 252 |                 before_new_exc = self._fetch_exceptions()
 253 |                 before_new_diff = sorted(
 254 |                     set(before_new_exc).difference(old_exc_list),
 255 |                     key=before_new_exc.index)
 256 | 
 257 |                 if not before_new_diff:
 258 |                     self._cond.wait()
 259 | 
 260 |             if not before_new_diff:
 261 |                 new_exc_list = self._fetch_exceptions(exception_type)
 262 |                 new_exc_set = set(new_exc_list).difference(old_exc_list)
 263 |             else:
 264 |                 new_exc_list = before_new_exc
 265 |                 new_exc_set = before_new_diff
 266 | 
 267 |             if not new_exc_set:
 268 |                 continue
 269 | 
 270 |             for exc in sorted(new_exc_set, key=new_exc_list.index):
 271 |                 yield exc
 272 |             old_exc_list = new_exc_list
 273 | 
 274 |     async def acquire(self, exception_type=None, *, just_new_exception=True):
 275 |         """ 异步获取异常
 276 | 
 277 |         acquire的异步化方法，具体作用参看acquire()方法。
 278 | 
 279 |         Args:
 280 |             exception_type: 指定异常类型，可选ClientError、HandlerError。默认None则获取所有异常。
 281 |             just_new_exception: 是否忽略当前时间前的旧异常，仅返回之后的新异常。
 282 | 
 283 |         Yields:
 284 |             内部出现的client或handler异常对象。
 285 |         """
 286 |         def release_waiter():
 287 |             nonlocal cond, loop
 288 | 
 289 |             async def _release():
 290 |                 async with cond:
 291 |                     cond.notify_all()
 292 |             asyncio.run_coroutine_threadsafe(_release(), loop=loop)
 293 | 
 294 |         loop = asyncio.get_running_loop()
 295 |         cond = asyncio.Condition(asyncio.Lock())
 296 |         self._waiter_callbacks.add(release_waiter)
 297 | 
 298 |         old_exc_list = []
 299 |         if not just_new_exception:
 300 |             old_exc_list = self._fetch_exceptions(exception_type)
 301 |         while True:
 302 |             with self._cond:
 303 |                 if self._stopped:
 304 |                     break
 305 |                 before_new_exc = self._fetch_exceptions()
 306 |                 before_new_diff = sorted(
 307 |                     set(before_new_exc).difference(old_exc_list),
 308 |                     key=before_new_exc.index)
 309 | 
 310 |             if not before_new_diff:
 311 |                 async with cond:
 312 |                     await cond.wait()
 313 | 
 314 |             if not before_new_diff:
 315 |                 new_exc_list = self._fetch_exceptions(exception_type)
 316 |                 new_exc_set = set(new_exc_list).difference(old_exc_list)
 317 |             else:
 318 |                 new_exc_list = before_new_exc
 319 |                 new_exc_set = before_new_diff
 320 | 
 321 |             if not new_exc_set:
 322 |                 continue
 323 | 
 324 |             for exc in sorted(new_exc_set, key=new_exc_list.index):
 325 |                 yield exc
 326 |             old_exc_list = new_exc_list
 327 | 
 328 |     async def run(self):
 329 |         self._stopped = False
 330 |         self._exceptions.clear()
 331 | 
 332 |     async def close(self):
 333 |         pass
 334 | 
 335 |     async def pause(self):
 336 |         self._stopped = True
 337 |         with self._cond:
 338 |             self._cond.notify_all()
 339 | 
 340 |         for waiter in self._waiter_callbacks:
 341 |             waiter()
 342 | 
 343 |     def __repr__(self):
 344 |         count = {k: len(v) for k, v in self._exceptions.items()}
 345 |         return f'<GatherException {count} future={self._future}>'
 346 | 
 347 | 
 348 | class URIStatus:
 349 |     def __init__(self, uri):
 350 |         self.source_uri = uri
 351 |         self._used = 0
 352 |         self._success = 0
 353 |         self._timeout = 0
 354 |         self._fatal = 0
 355 | 
 356 |         self._logs = []
 357 |         self._users = {}
 358 | 
 359 |         self._conn_delay_moving_avg = [0 for _ in range(8)]
 360 |         self._conn_delay = float('inf')
 361 | 
 362 |     def log(self, resp):
 363 |         self._logs.append(resp)
 364 | 
 365 |     def _response_delay(self, time_s):
 366 |         moving_avg = self._conn_delay_moving_avg
 367 |         moving_avg.append(time_s)
 368 |         moving_avg.pop(0)
 369 |         failure_count = moving_avg.count(float('inf'))
 370 |         if failure_count >= 5:
 371 |             self._conn_delay = float('inf')
 372 |         else:
 373 |             self._conn_delay = sum([delay for delay in moving_avg
 374 |                                     if delay != float('inf')]) / (8 - failure_count)
 375 | 
 376 |     def use(self, block):
 377 |         self._used += 1
 378 |         self._users[block] = UsageInfo(lambda: block.progress.walk_length)
 379 | 
 380 |     def timeout(self, block, resp):
 381 |         self._timeout += 1
 382 |         self.log(f'{block} {resp}')
 383 | 
 384 |     def success(self, block, resp):
 385 |         self._success += 1
 386 |         self.log(f'{block} {resp}')
 387 |         # TODO: 在多下载源的情况下对下载源之间经过资源数据采样校验，通过后作为响应基准
 388 |         if self.source_uri.getresponse() is None:
 389 |             self.source_uri.set_response(resp)
 390 |         self._response_delay(self._users[block].timelength())
 391 | 
 392 |     def fatal(self, block, resp):
 393 |         self._fatal += 1
 394 |         self.log(f'{block} {resp}')
 395 | 
 396 |     def disuse(self, block):
 397 |         self._used -= 1
 398 |         del self._users[block]
 399 | 
 400 |     def is_available(self):
 401 |         """ 返回当前下载源是否超过有效使用次数。 """
 402 |         return self.source_uri.max_conn is None or self.source_uri.max_conn > self._used
 403 | 
 404 |     @property
 405 |     def users(self):
 406 |         return self._users
 407 | 
 408 |     def get_copy(self):
 409 |         """ 返回URI下载源对象的副本。"""
 410 |         return copy(self.source_uri)
 411 | 
 412 |     def transfer_rate(self):
 413 |         """ 返回下载源的传输速率。"""
 414 |         return sum([user.rate for user in self._users.values()])
 415 | 
 416 |     def average_speed(self):
 417 |         """ 返回当下载源的平均连接的传输速率。"""
 418 |         users = [user.rate for user in self._users.values()]
 419 |         return sum(users) / len(users)
 420 | 
 421 |     def refresh(self):
 422 |         """ 刷新当前下载源的状态使用信息。"""
 423 |         for user in self._users.values():
 424 |             user.refresh()
 425 | 
 426 |     def __repr__(self):
 427 |         return (f'<URIStatus {self.transfer_rate() / 1024} kb/s [{self._conn_delay * 1000} ms]'
 428 |                 f'(used={self._used}, success={self._success}, timeout={self._timeout}, fatal={self._fatal})>')
 429 | 
 430 |     def info(self):
 431 |         return {
 432 |             'transfer_rate': self.transfer_rate(),
 433 |             'used': self._used,
 434 |             'success': self._success,
 435 |             'timeout': self._timeout,
 436 |             'fatal': self._fatal,
 437 |             'connection_delay': self._conn_delay
 438 |         }
 439 | 
 440 | 
 441 | class URIStatusManager(Handler):
 442 |     """ URI状态管理器使用URIStatus对象管理URI下载源。
 443 | 
 444 |     负责工作：
 445 |         1. 管理和调配URI下载源
 446 |         2. 监控URI下载源工作状态
 447 |     """
 448 | 
 449 |     name = 'uri_mgr'
 450 | 
 451 |     def __init__(self):
 452 |         self._uri_status = {}
 453 |         self._cond = None
 454 |         self._stopped = False
 455 | 
 456 |     async def prepare(self):
 457 |         self._cond = asyncio.Condition()
 458 |         for uri in self.parent.uris:
 459 |             self._uri_status.setdefault(uri.id, URIStatus(uri))
 460 | 
 461 |     async def get_uri(self):
 462 |         """  返回URI状态对象供客户端使用。
 463 | 
 464 |         以下载源的使用次数为主，尽可能的覆盖所有的下载源，之后根据单连接的下载源传输速度快慢
 465 |         分配下载源。
 466 | 
 467 |         Returns:
 468 |             被分配的URIStatus对象。
 469 |         """
 470 |         avl_uris = self._find_avl_uris()
 471 |         while not avl_uris:
 472 |             await self._cond.wait()
 473 |             avl_uris = self._find_avl_uris()
 474 | 
 475 |         uri = avl_uris[0]
 476 |         if uri._used > 0:
 477 |             uri = sorted(avl_uris, key=lambda u: u.average_speed(), reverse=True)[0]
 478 |         return uri
 479 | 
 480 |     def _find_avl_uris(self):
 481 |         more_used = sorted(self._uri_status.values(), key=attrgetter('_used'))
 482 |         return list(filter(lambda u: u.is_available(), more_used))
 483 | 
 484 |     def success(self, resp):
 485 |         block = _lookup_block()
 486 |         self._uri_status[block.current_uri().id].success(block, resp)
 487 | 
 488 |     def timeout(self, resp):
 489 |         block = _lookup_block()
 490 |         self._uri_status[block.current_uri().id].timeout(block, resp)
 491 | 
 492 |     def fatal(self, resp):
 493 |         block = _lookup_block()
 494 |         self._uri_status[block.current_uri().id].fatal(block, resp)
 495 | 
 496 |     async def run(self):
 497 |         self._stopped = False
 498 |         async_sleep = asyncio.sleep
 499 | 
 500 |         uri_status = self._uri_status
 501 |         while True:
 502 |             await async_sleep(1)
 503 |             if self._stopped:
 504 |                 break
 505 | 
 506 |             for status in uri_status.values():
 507 |                 status.refresh()
 508 | 
 509 |         self._cond = None
 510 | 
 511 |     async def pause(self):
 512 |         self._stopped = True
 513 | 
 514 |     async def close(self):
 515 |         pass
 516 | 
 517 |     def __repr__(self):
 518 |         return f'<URIStatusManager {self._uri_status} future={self._future}>'
 519 | 
 520 |     def info_getter(self):
 521 |         return {k: v.info() for k, v in self._uri_status.items()}
 522 | 
 523 | 
 524 | class ClientWorker(Handler):
 525 |     """ （主处理器）异步客户端调配工作器。
 526 | 
 527 |     负责工作：
 528 |         1. 客户端会话管理
 529 |         2. 下载块工作调配
 530 |         3. 工作进度检测
 531 |     """
 532 |     name = 'client_worker'
 533 | 
 534 |     def __init__(self):
 535 |         self._block_queue = None
 536 |         self._working_blocks = set()
 537 |         self._client_session = {}
 538 |         self._stopped = False
 539 |         self._executors = None
 540 |         self._tasks = set()
 541 | 
 542 |     async def prepare(self):
 543 |         self._stopped = False
 544 | 
 545 |         self._block_queue = asyncio.Queue()
 546 |         self._executors = ThreadPoolExecutor(
 547 |             max_workers=self.parent.config.max_concurrent,
 548 |             thread_name_prefix=self.parent.file.name
 549 |         )
 550 | 
 551 |     async def run(self):
 552 |         def goto_work(blo):
 553 |             """ 后台执行下载块。 """
 554 |             def cb(fut):
 555 |                 # 回调移除工作下载块，并交由下载块检测
 556 |                 self._block_queue.put_nowait(blo)
 557 |                 self._working_blocks.remove(blo)
 558 | 
 559 |             task = asyncio.run_coroutine_threadsafe(
 560 |                 self._worker(blo), loop)
 561 |             self._working_blocks.add(blo)
 562 |             task.add_done_callback(cb)
 563 |             return task
 564 | 
 565 |         loop = asyncio.get_running_loop()
 566 |         config = self.parent.config
 567 |         block_group = self.parent.block_grp
 568 | 
 569 |         # 准备未完成的下载块
 570 |         unfinished_blocks = block_group.unfinished_blocks()
 571 |         # 提交下载块到工作区
 572 |         while unfinished_blocks:
 573 |             block = unfinished_blocks.pop(0)
 574 |             await self.submit(block)
 575 | 
 576 |         # 对下载块做出处理决策
 577 |         work_queue = self._block_queue
 578 |         resume_capability = config.resume_capability
 579 |         while True:
 580 |             block = await work_queue.get()
 581 |             work_queue.task_done()
 582 |             if block is None:
 583 |                 break
 584 |             if block.unused_length():
 585 |                 # 重试下载块
 586 |                 goto_work(block)
 587 |             else:
 588 |                 # 检查任务是否完成
 589 |                 if block_group.is_walk_finished():
 590 |                     missing = block_group.integrity_check()
 591 |                     if missing:
 592 |                         h.exception.handler_error(RuntimeError(f'Missing Blocks: {missing}'))
 593 |                     if unfinished_blocks:
 594 |                         await work_queue.put(unfinished_blocks.pop(0))
 595 |                         continue
 596 |                     break
 597 |                 # 已完成其中一下载块后允许对未完成下载块进行切片补充并发量
 598 |                 if resume_capability:
 599 |                     if len(block_group.unfinished_blocks()) < config.max_concurrent:
 600 |                         if unfinished_blocks:
 601 |                             goto_work(unfinished_blocks.pop(0))
 602 |                         else:
 603 |                             h.slicer.request()
 604 | 
 605 |         # 任务完成或暂停，清除冗余队列信息
 606 |         while not work_queue.empty():
 607 |             await work_queue.get()
 608 |             work_queue.task_done()
 609 | 
 610 |         # 待所有下载块退出
 611 |         while self._working_blocks:
 612 |             await work_queue.get()
 613 |             work_queue.task_done()
 614 | 
 615 |         self._executors.shutdown(False)
 616 |         # 非阻塞执行关闭所有handler
 617 |         self.parent.pause(0)
 618 | 
 619 |     async def submit(self, block):
 620 |         """ 提交下载块到工作区。
 621 |         Args:
 622 |             block: 下载块Block对象。
 623 |         """
 624 |         if self._stopped:
 625 |             return False
 626 |         await self._block_queue.put(block)
 627 | 
 628 |     async def _worker(self, block):
 629 |         """ 客户端工作worker。
 630 |         Args:
 631 |             block: 下载块Block对象。
 632 |         """
 633 |         def run_client_threadsafe():
 634 |             nonlocal cli, loop, handlers_ref, block
 635 |             token = block_context.set(block)
 636 |             with h.enter(handlers_ref, loop):
 637 |                 try:
 638 |                     return cli.run()
 639 |                 except BaseException as e:
 640 |                     h.exception.client_error(e)
 641 |                 finally:
 642 |                     block_context.reset(token)
 643 | 
 644 |         if self._stopped:
 645 |             return
 646 |         handlers_ref = h.owner
 647 |         loop = asyncio.get_running_loop()
 648 |         config = self.parent.config
 649 | 
 650 |         # 准备下载源
 651 |         uri = await h.uri_mgr.get_uri()
 652 | 
 653 |         source_uri = uri.get_copy()
 654 |         resume_capability = config.resume_capability
 655 |         client_policy = config.client_policy
 656 | 
 657 |         solution = client_policy.get_solution(source_uri.protocol)
 658 | 
 659 |         # 准备客户端会话
 660 |         session = self._client_session.get(solution, None)
 661 |         if session is None:
 662 |             session = solution.get_session()
 663 |             self._client_session[solution] = session
 664 | 
 665 |         # 准备客户端处理器
 666 |         client = solution.get_client(
 667 |             session, source_uri, block.progress, resume_capability)
 668 | 
 669 |         # 为下载块准备客户端进行下载
 670 |         async with block.request(client) as cli:
 671 |             uri.use(block)
 672 |             try:
 673 |                 if solution.is_async():
 674 |                     fut = cli.run()
 675 |                 else:
 676 |                     fut = loop.run_in_executor(self._executors, run_client_threadsafe)
 677 |                 result = await fut
 678 |             except BaseException as err:
 679 |                 h.exception.client_error(err)
 680 |             uri.disuse(block)
 681 |         return result
 682 | 
 683 |     async def close(self):
 684 |         async def close_sess(sess):
 685 |             """ 关闭客户端会话。"""
 686 |             coro_or_result = sess.close()
 687 |             if asyncio.iscoroutine(coro_or_result):
 688 |                 await coro_or_result
 689 |         await asyncio.gather(*[close_sess(session) for session in self._client_session.values()])
 690 |         self._client_session.clear()
 691 | 
 692 |     async def pause(self):
 693 |         async def pause_cli(blo):
 694 |             """ 安全暂停关闭客户端。"""
 695 |             while True:
 696 |                 if blo not in self._working_blocks:
 697 |                     # 若下载块已退出，跳过客户端暂停
 698 |                     return
 699 |                 if blo.client is None:
 700 |                     # 等待客户端进入，同时关闭
 701 |                     await asyncio.sleep(0)
 702 |                 else:
 703 |                     break
 704 |             await blo.client.pause()
 705 | 
 706 |         if not self._stopped:
 707 |             self._stopped = True
 708 |             await asyncio.gather(*[pause_cli(block) for block in self._working_blocks])
 709 |             await self._block_queue.put(None)
 710 | 
 711 |     def __repr__(self):
 712 |         return f'<ClientWorker {self._working_blocks} future={self._future}>'
 713 | 
 714 |     def info_getter(self):
 715 |         return {
 716 |             'actives': set(self._working_blocks)
 717 |         }
 718 | 
 719 | 
 720 | class BlockSlicer(Handler):
 721 |     """ 下载块切片器。
 722 | 
 723 |     负责工作：
 724 |         1. 下载块切片请求和响应
 725 |     """
 726 |     name = 'slicer'
 727 | 
 728 |     def __init__(self):
 729 |         self._waiters = set()
 730 |         self._lock = threading.Lock()
 731 | 
 732 |     async def divide_into(self, n):
 733 |         """ 下载块切片器分成n份。
 734 | 
 735 |         该方法不建议在在下载块工作过程中进行调用，
 736 |         否则可能会出现传输数据冗余的问题。
 737 | 
 738 |         Args:
 739 |             n: 分成n份。
 740 |         """
 741 |         for i in range(n):
 742 |             self.request()
 743 |             while self._waiters:
 744 |                 block = self._waiters.pop()
 745 |                 self._slice(block)
 746 | 
 747 |     def _slice(self, source_block):
 748 |         req_range = source_block.half_unused()
 749 |         if req_range:
 750 |             result = source_block.slice(req_range)
 751 |             if result:
 752 |                 block = self.parent.block_grp.insert(result)
 753 |                 return block
 754 | 
 755 |         return None
 756 | 
 757 |     async def response(self):
 758 |         """ 客户端用于响应切片器是否需要对其进行切片。
 759 | 
 760 |         若切片器希望对当前下载块进行切片，调用该方法允许切片器安全的对当前下载块进行切片。
 761 |         安全的前提是该方法在不影响下载区间的地方调用。
 762 |         若当前下载块未在期望切片队列则直接跳过。
 763 |         """
 764 |         if self._waiters:
 765 |             with self._lock:
 766 |                 source_block = _lookup_block()
 767 |                 if source_block not in self._waiters:
 768 |                     return
 769 |                 self._waiters.remove(source_block)
 770 |                 resp = self._slice(source_block)
 771 |             if resp is not None:
 772 |                 await h.client_worker.submit(resp)
 773 | 
 774 |     def response_threadsafe(self):
 775 |         with self._lock:
 776 |             if not self._waiters and _lookup_block() not in self._waiters:
 777 |                 return False
 778 |         await_coroutine_threadsafe(self.response())
 779 | 
 780 |     def request(self):
 781 |         """ 请求一次下载块切片。
 782 | 
 783 |         使用最大剩余block大小策略来选择被切块对象，该方法并未对下载块进行切片，
 784 |         需要客户端配合response()方法来响应切片请求。
 785 |         """
 786 |         len_waiting = len(self._waiters)
 787 |         blocks = sorted(self.parent.block_grp.unfinished_blocks(), key=lambda i: i.unused_length(), reverse=True)
 788 |         self._waiters = set(blocks[:len_waiting + 1])
 789 |         return len(self._waiters) == len_waiting + 1
 790 | 
 791 |     async def prepare(self):
 792 |         # 下载块切片以保证下载块的最大并发量。
 793 |         config = self.parent.config
 794 |         if config.resume_capability:
 795 |             blocks_len = len(self.parent.block_grp.unfinished_blocks())
 796 |             if blocks_len < config.max_concurrent:
 797 |                 await self.divide_into(config.max_concurrent - blocks_len)
 798 | 
 799 |     async def run(self):
 800 |         pass
 801 | 
 802 |     async def close(self):
 803 |         pass
 804 | 
 805 |     async def pause(self):
 806 |         self._waiters.clear()
 807 | 
 808 |     def __repr__(self):
 809 |         return f'<BlockSlicer {self._waiters} future={self._future}>'
 810 | 
 811 |     def info_getter(self):
 812 |         return {
 813 |             'waiters': set(self._waiters)
 814 |         }
 815 | 
 816 | 
 817 | class SpeedAdjuster(Handler):
 818 |     """ 速度调节器。
 819 | 
 820 |     负责工作：
 821 |         1. 最大速度限制
 822 |         2. 实时速度信息刷新
 823 |     """
 824 |     name = 'speed_adjuster'
 825 | 
 826 |     def __init__(self):
 827 |         self._opened = False
 828 |         self._stopped = True
 829 |         self._thread_cond = threading.Condition(threading.RLock())
 830 |         self._sema_value = 0
 831 |         self._async_cond = None
 832 | 
 833 |     async def _release_all(self):
 834 |         with self._thread_cond:
 835 |             async with self._async_cond:
 836 |                 self._sema_value = float('inf')
 837 |                 self._thread_cond.notify_all()
 838 |                 self._async_cond.notify_all()
 839 | 
 840 |     def acquire_threadsafe(self):
 841 |         if self._opened:
 842 |             while True:
 843 |                 with self._thread_cond:
 844 |                     value = self._sema_value
 845 |                     if value > 0:
 846 |                         self._sema_value -= 1
 847 |                         break
 848 |                     self._thread_cond.wait()
 849 |         return False
 850 | 
 851 |     async def acquire(self):
 852 |         if self._opened:
 853 |             while True:
 854 |                 with self._thread_cond:
 855 |                     async with self._async_cond:
 856 |                         value = self._sema_value
 857 |                         if value > 0:
 858 |                             self._sema_value -= 1
 859 |                             break
 860 |                         await self._async_cond.wait()
 861 |         return False
 862 | 
 863 |     async def prepare(self):
 864 |         assert self._stopped
 865 |         self._async_cond = asyncio.Condition()
 866 |         self._stopped = False
 867 | 
 868 |     async def run(self):
 869 | 
 870 |         async_sleep = asyncio.sleep
 871 |         block_grp = self.parent.block_grp
 872 |         config = self.parent.config
 873 |         max_speed = config.max_speed
 874 |         fraction = 0
 875 |         if max_speed is not None:
 876 |             self._opened = True
 877 |         while True:
 878 |             if self._stopped:
 879 |                 break
 880 |             await async_sleep(config.interval)
 881 | 
 882 |             # 刷新总的下载块实时传输速率
 883 |             block_grp.usage_info.refresh()
 884 | 
 885 |             # 当最大下载速度配置有变化后则响应相应的速度限速开关
 886 |             if config.max_speed != max_speed:
 887 |                 # 最大速度限制参数被修改
 888 |                 max_speed = config.max_speed
 889 |                 if max_speed is None:
 890 |                     self._opened = False
 891 |                     await self._release_all()
 892 |                 else:
 893 |                     self._opened = True
 894 |                     fraction = 0
 895 | 
 896 |             # 如果限制的下载速率就处理信号量
 897 |             if max_speed is not None:
 898 |                 value = config.max_speed * config.interval / 8196
 899 | 
 900 |                 # 由于下载客户端以单次读数据粒度进行限速，所以为了更细化的限速
 901 |                 # 对计算出来的信号量粒度小数保留下来留给下次累加。
 902 |                 fraction += value % 1
 903 |                 value = int(value)
 904 |                 if fraction >= 1:
 905 |                     value += 1
 906 |                     fraction -= 1
 907 |                 with self._thread_cond:
 908 |                     async with self._async_cond:
 909 |                         self._sema_value = value
 910 |                         self._thread_cond.notify_all()
 911 |                         self._async_cond.notify_all()
 912 | 
 913 |     async def close(self):
 914 |         pass
 915 | 
 916 |     async def pause(self):
 917 |         if not self._stopped:
 918 |             self._stopped = True
 919 |             self._opened = False
 920 |             await self._release_all()
 921 | 
 922 |     def __repr__(self):
 923 |         return f'<SpeedAdjuster max_speed={self.parent.config.max_speed} future={self._future}>'
 924 | 
 925 |     def info_getter(self):
 926 |         return {
 927 |             'value': self._sema_value
 928 |         }
 929 | 
 930 | 
 931 | class FileTempData(Handler):
 932 |     """ 下载文件缓冲和保存的IO读写器。
 933 | 
 934 |     负责工作：
 935 |         1. 文件缓冲和写入
 936 |         2. 下载状态的保存
 937 |     """
 938 | 
 939 |     name = 'file_data'
 940 | 
 941 |     def __init__(self):
 942 |         self._buffers = defaultdict(list)
 943 |         self._counter = 0
 944 |         self._unreleased = None
 945 |         self._lock = threading.RLock()
 946 |         self._stopped = True
 947 | 
 948 |     async def saving_state(self):
 949 |         """ 保存当前下载状态。
 950 | 
 951 |         以cfg的文件形式保存当前下载配置以备文件下载状态的恢复。
 952 |         """
 953 |         dumpy = self.parent.dumps()
 954 |         async with h.aio.open(f'{self.parent.file.pathname}{self.parent.config.downloading_ext}.cfg', mode='w') as f:
 955 |             await f.write(json.dumps(dumpy))
 956 | 
 957 |     async def _release(self):
 958 |         buffers = self._buffers
 959 |         counter = self._counter
 960 |         self._counter = 0
 961 |         self._buffers = defaultdict(list)
 962 |         return await self._unreleased.put((counter, buffers))
 963 | 
 964 |     def store_threadsafe(self, data):
 965 |         """ 线程安全保存临时下载数据。"""
 966 |         with self._lock:
 967 |             block = _lookup_block()
 968 |             self._buffers[block.progress].append(data)
 969 |             self._counter += len(data)
 970 |             if self.parent.config.buffer_size <= self._counter:
 971 |                 await_coroutine_threadsafe(self._release())
 972 | 
 973 |     async def store(self, data):
 974 |         """ 缓冲传输数据。
 975 | 
 976 |         当缓冲的数据超过了buffer_size，将对缓冲进行释放写入文件。
 977 | 
 978 |         Args:
 979 |             data: 要被缓冲的传输数据
 980 |         """
 981 |         block = _lookup_block()
 982 |         self._buffers[block.progress].append(data)
 983 |         self._counter += len(data)
 984 |         if self.parent.config.buffer_size <= self._counter:
 985 |             await self._release()
 986 | 
 987 |     async def prepare(self):
 988 |         assert self._stopped
 989 |         self._unreleased = asyncio.Queue()
 990 |         self._stopped = False
 991 | 
 992 |     async def run(self):
 993 |         unreleased = self._unreleased
 994 |         file = self.parent.file
 995 |         filepath = f'{file.pathname}{self.parent.config.downloading_ext}'
 996 | 
 997 |         # 通过下载块是否有walk_length的情况来判断是否需要重写文件。
 998 |         if not self.parent.block_grp.done_length():
 999 |             async with h.aio.open(f'{file.pathname}{self.parent.config.downloading_ext}', mode='wb') as fd:
1000 |                 if file.size is not None:
1001 |                     await fd.seek(file.size - 1)
1002 |                     await fd.write(b'\x00')
1003 | 
1004 |         async with h.aio.open(filepath, mode='rb+') as fd:
1005 |             while True:
1006 |                 result = await unreleased.get()
1007 |                 if result is None:
1008 |                     unreleased.task_done()
1009 |                     break
1010 |                 counter, buffers = result
1011 |                 for pg, lines in buffers.items():
1012 |                     await fd.seek(pg.begin + pg.done_length)
1013 |                     await fd.writelines(lines)
1014 |                     pg.done(sum([len(line) for line in lines]))
1015 | 
1016 |                 # 删除引用，尽快回收垃圾
1017 |                 del lines
1018 |                 del result
1019 |                 del buffers
1020 |                 await self.saving_state()
1021 |                 unreleased.task_done()
1022 | 
1023 |     async def pause(self):
1024 |         if not self._stopped:
1025 |             self._stopped = True
1026 |             await h.client_worker.join()
1027 |             await self._release()
1028 |             await self._unreleased.put(None)
1029 | 
1030 |     async def close(self):
1031 |         pass
1032 | 
1033 |     def info_getter(self):
1034 |         return {
1035 |             'size': self._counter,
1036 |             'ready': 1
1037 |         }
1038 | 
1039 |     def __repr__(self):
1040 |         return f'<FileTempData counter={self._counter} future={self._future}>'
1041 | 
1042 | 
1043 | class AIOReaderWriter(Handler):
1044 |     """ AIO读写工作线程。
1045 | 
1046 |     为了避免IO的文件读写阻塞影响下载工作线程，该处理器实现异步文件IO读写方法
1047 | 
1048 |     负责工作：
1049 |         1. 管理IO读写线程
1050 |     """
1051 |     name = 'aio'
1052 | 
1053 |     def __init__(self):
1054 |         self._executor = None
1055 |         self._writers = set()
1056 | 
1057 |     async def prepare(self):
1058 |         self._executor = ThreadPoolExecutor(
1059 |             max_workers=1, thread_name_prefix=f'BufferWriter {self.parent.file.name}')
1060 | 
1061 |     @asynccontextmanager
1062 |     async def open(self, file, mode='r', *args, **kwargs):
1063 |         """ 异步打开文件。
1064 | 
1065 |         Args:
1066 |             file: 参见io.open()方法参数file
1067 |             mode: 参见io.open()方法参数mode
1068 |             args: 参见io.open()方法参数的列表参数
1069 |             kwargs: 参见io.open()方法参数字典参数
1070 | 
1071 |         Returns:
1072 |             异步文件对象AsyncIOFile，对耗时IO文件操作进行异步定义。
1073 |         """
1074 |         def async_open():
1075 |             return open(file, mode, *args, **kwargs)
1076 | 
1077 |         executor = self._executor
1078 |         assert executor
1079 |         loop = asyncio.get_running_loop()
1080 |         fd = await loop.run_in_executor(executor, async_open)
1081 |         aiofile = AIOFile(executor, fd, loop=loop)
1082 |         self._writers.add(aiofile)
1083 |         yield aiofile
1084 |         # 关闭文件
1085 |         await loop.run_in_executor(executor, fd.close)
1086 |         self._writers.remove(aiofile)
1087 | 
1088 |     async def run(self):
1089 |         pass
1090 | 
1091 |     async def close(self):
1092 |         for handler in h.iter_all():
1093 |             if handler != self:
1094 |                 await handler.join()
1095 |         self._executor.shutdown(False)
1096 | 
1097 |     async def pause(self):
1098 |         pass
1099 | 
1100 | 
1101 | class AIOFile:
1102 |     """ 异步文件读写对象。
1103 | 
1104 |     由AIOReaderWriter的工作线程处理的异步读写对象，将耗时的IO读写由工作线程执行。
1105 |     """
1106 |     _async_attr = frozenset(
1107 |         {'read', 'readline', 'readlines', 'write', 'writeline',
1108 |          'writelines', 'seek', 'flush', 'truncate'})
1109 | 
1110 |     def __init__(self, executor, fd, loop=None):
1111 |         self._executor = executor
1112 |         self._fd = fd
1113 |         self._loop = loop
1114 | 
1115 |     def __getattr__(self, item):
1116 |         func = getattr(self._fd, item)
1117 |         if item in self._async_attr:
1118 |             def ready(*args, loop=None, **kwargs):
1119 |                 if loop is None:
1120 |                     loop = asyncio.get_running_loop()
1121 | 
1122 |                 if kwargs:
1123 |                     handler = partial(getattr(self._fd, item), **kwargs)
1124 |                 else:
1125 |                     handler = getattr(self._fd, item)
1126 |                 fut = loop.run_in_executor(self._executor, handler, *args)
1127 |                 return fut
1128 |             func = ready
1129 | 
1130 |         return func
1131 | 
1132 |     async def __aenter__(self):
1133 |         return self
1134 | 
1135 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
1136 |         await self.close()
1137 | 
1138 |     def __repr__(self):
1139 |         return f'<AIOFile {self._fd}>'
1140 | 


--------------------------------------------------------------------------------
/nbdler/progress.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | 
  3 | # from ..utils.misc import Component
  4 | # # from .misc import Timer
  5 | 
  6 | 
  7 | class Progress:
  8 |     __slots__ = '_range', 'walk_length', 'done_length', '_timer'
  9 | 
 10 |     def __init__(self, range, walk_length=0, done_length=0, increment_time=0):
 11 |         begin, end = range
 12 |         self._range = range
 13 | 
 14 |         if done_length != walk_length:
 15 |             # 存在下载缓冲未写入文件，下载配置文件可能被非正常关闭。尝试回退下载进度。
 16 |             done_length = walk_length
 17 | 
 18 |         self.walk_length = walk_length
 19 |         self.done_length = done_length
 20 | 
 21 |     def is_walk_finished(self):
 22 |         return self.walk_length >= self.total_length
 23 | 
 24 |     def is_done_finished(self):
 25 |         return self.done_length >= self.total_length
 26 | 
 27 |     def is_finished(self):
 28 |         return self.is_walk_finished() and self.is_done_finished()
 29 | 
 30 |     @property
 31 |     def range(self):
 32 |         return self._range
 33 | 
 34 |     @property
 35 |     def begin(self):
 36 |         return self._range[0]
 37 | 
 38 |     @property
 39 |     def end(self):
 40 |         return self._range[1]
 41 | 
 42 |     @property
 43 |     def total_length(self):
 44 |         try:
 45 |             return self._range[1] - self._range[0]
 46 |         except TypeError:
 47 |             return float('inf')
 48 | 
 49 |     @property
 50 |     def walk_left(self):
 51 |         return self.total_length - self.walk_length
 52 | 
 53 |     @property
 54 |     def done_left(self):
 55 |         return self.total_length - self.done_length
 56 | 
 57 |     @property
 58 |     def differ(self):
 59 |         return self.walk_length - self.done_length
 60 | 
 61 |     @property
 62 |     def time_length(self):
 63 |         return 0
 64 | 
 65 |     @property
 66 |     def average_speed(self):
 67 |         return
 68 | 
 69 |     @property
 70 |     def percent_complete(self):
 71 |         return self.walk_length * 100 / self.total_length
 72 | 
 73 |     def walk(self, byte_len):
 74 |         self.walk_length += byte_len
 75 | 
 76 |     def done(self, byte_len):
 77 |         self.done_length += byte_len
 78 | 
 79 |     def start(self):
 80 |         pass
 81 | 
 82 |     def stop(self):
 83 |         pass
 84 | 
 85 |     def set_walk_finish(self):
 86 |         """ 由于存在未指定结尾的情况，也就是未指定下载大小的情况。
 87 |         那么当下载完全的情况下，允许强制其以当前下载量作为结尾。
 88 |         """
 89 |         assert self.end is None
 90 |         self._range = (self.begin, self.begin + self.walk_length)
 91 | 
 92 |     def reset(self):
 93 |         self.walk_length = 0
 94 |         self.done_length = 0
 95 | 
 96 |     def slice(self, request_range):
 97 |         """ 下载进度切片。"""
 98 |         put_begin, put_end = request_range
 99 |         if put_begin > self.begin + self.walk_length:
100 |             if put_end != self.end:
101 |                 put_end = self.end
102 |             if put_begin >= put_end:
103 |                 return None
104 |         else:
105 |             return None
106 | 
107 |         self._range = (self._range[0], put_begin)
108 |         return put_begin, put_end
109 | 
110 |     def __repr__(self):
111 |         return '<Progress [{}-{}]> {:.2%}'.format(self.begin, self.end, self.percent_complete / 100)
112 | 
113 |     def __iter__(self):
114 |         return iter([self._range, self.walk_length, self.done_length])
115 | 


--------------------------------------------------------------------------------
/nbdler/request.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | 
  3 | from nbdler.uri import SourceURI
  4 | from typing import Sequence, Mapping, AnyStr, Union, Optional
  5 | from nbdler.client import ClientPolicy
  6 | import bisect
  7 | 
  8 | 
  9 | class RequestConfig:
 10 |     def __init__(self, *, file_path: AnyStr,
 11 |                  max_concurrent: int=5,
 12 |                  buffer_size: int=20*1024*1024,
 13 |                  chunk_size: int=64*1024,
 14 |                  max_retries: Optional[int]=3,
 15 |                  timeout: Optional[int]=10,
 16 |                  client_policy: Optional[ClientPolicy]=None,
 17 |                  **kwargs):
 18 |         """
 19 |         Args:
 20 |             file_path: 目标文件的路径名称
 21 |             max_concurrent: 最大并发数
 22 |             buffer_size: 最大请求次数仅用于dlopen的时候尝试的次数。
 23 |             chunk_size: 下载客户端的最大线程数量。
 24 |             max_retries: 内存缓冲的最大值。
 25 |             timeout: 下载客户端的超时参数。None一般代表着时无限阻塞链接。
 26 |             client_policy: 指定客户端处理策略，默认策略由 nbdler.client.__init__._DEFAULT_POLICY 指定
 27 |             **kwargs: 保存提供的额外参数，以在后续提供给下载客户端来让客户端进行选择调整。
 28 |                       - downloading_ext: 下载中文件扩展名
 29 |                       - interval: 心跳刷新间隔
 30 |         """
 31 |         self.file_path = file_path
 32 |         self.max_concurrent = max_concurrent
 33 |         self.chunk_size = chunk_size
 34 |         self.max_retries = max_retries
 35 |         self.buffer_size = buffer_size
 36 |         self.timeout = timeout
 37 |         self.client_policy = client_policy
 38 |         self.handlers = []
 39 |         self.kwargs = kwargs
 40 | 
 41 |     def add_handler(self, *handlers):
 42 |         """ 添加或更新下载处理器。
 43 |         Args:
 44 |             *handlers: 继承nbdler.handler.Handler类的处理器列表参数，
 45 |                        通过名称标识处理器，类变量name作为处理器唯一标识。
 46 |                        与内置处理器重名则替换内置处理器。
 47 |                        内置处理器有:
 48 |                        - client_worker: 客户端调配控制器
 49 |                        - slicer: 下载块切片器
 50 |                        - speed_adjuster: 速度调节器，用于限速和实时速率刷新
 51 |                        - uri_mgr: 下载源状态管理器
 52 |                        - exception: 异常收集处理器
 53 |                        - file_data: 文件缓冲区
 54 |                        - aio: 异步文件读写工作线程
 55 |         """
 56 |         for handler in handlers:
 57 |             bisect.insort(self.handlers, handler)
 58 | 
 59 |     def __repr__(self):
 60 |         return f'<BaseRequest {self.file_path}>'
 61 | 
 62 | 
 63 | class Request(RequestConfig):
 64 | 
 65 |     def __init__(self, uri=None, headers=None, cookies=None, proxies=None,
 66 |                  max_conn=None, range_field=None, name='main', *, file_path, **kwargs):
 67 |         """
 68 |         Args:
 69 |             uri,headers,cookies,proxies,max_conn,range_field,name: 参考put()方法。
 70 |             **kwargs: 参考RequestConfig的__init__()方法
 71 |         """
 72 |         super(Request, self).__init__(file_path=file_path, **kwargs)
 73 |         self._uris = []
 74 |         if uri:
 75 |             kwargs = {k: kwargs[k] for k in set(kwargs).difference(self.__dict__)}
 76 |             self.put(uri, headers, cookies, proxies=proxies,
 77 |                      max_conn=max_conn, range_field=range_field, name=name, **kwargs)
 78 | 
 79 |     def put(self, uri: AnyStr,
 80 |             headers: Optional[Union[Sequence, Mapping[str, str]]]=None,
 81 |             cookies: Optional[Mapping[str, str]]=None,
 82 |             proxies: Optional[Mapping]=None,
 83 |             max_conn: Optional[int]=None,
 84 |             range_field: Optional[Mapping[str, str]]=None,
 85 |             name: Optional[str]=None,
 86 |             **kwargs):
 87 |         """ 添加下载源。
 88 |         Args:
 89 |             uri: URI链接
 90 |             headers: 请求头
 91 |             cookies: 下载源请求传递的Cookie，要求传递字典dict类型
 92 |             proxies: 代理服务器，带auth例子：{'http': 'http://user:pass@some.proxy.com'}
 93 |             max_conn: 最大连接数
 94 |             range_field: 范围请求定义，要求提供字典类型，如 {'Range': 'bytes={begin}-{end_with}'}
 95 |             name: 下载源名称，仅用于标记，默认不提供，系统自动编号
 96 |             **kwargs: 允许根据下载源参数指定客户端的特定操作。
 97 |                 - trust_env:          使用系统代理
 98 | 
 99 |         Returns:
100 |             返回未经编号的下载源。
101 |         """
102 |         uri = uri.strip()
103 |         src_url = SourceURI(None, uri, headers, cookies, proxies, max_conn, range_field, name, **kwargs)
104 |         self._uris.append(src_url)
105 |         return src_url
106 | 
107 |     @property
108 |     def opts(self):
109 |         """ 返回请求中的配置字典信息。
110 | 
111 |         Returns:
112 |             返回配置信息字典，具体键值参考RequestConfig。
113 |         """
114 |         opts = {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
115 |         opts.update(opts.pop('kwargs'))
116 |         return opts
117 | 
118 |     @property
119 |     def uris(self):
120 |         return self._uris
121 | 
122 |     def __repr__(self):
123 |         return f'<Request {self._uris and self._uris[0].uri}>'
124 | 
125 |     def dumps(self):
126 |         return {
127 |             'config': self.opts,
128 |             'uris': [uri.dumps() for uri in self._uris]
129 |         }
130 | 
131 |     @classmethod
132 |     def loads(cls, dumpy):
133 |         request = cls(**dumpy['config'])
134 |         for uri in dumpy['uris']:
135 |             request.put(**uri)
136 | 


--------------------------------------------------------------------------------
/nbdler/rpc.py:
--------------------------------------------------------------------------------
1 | 
2 | # TODO: DownloadRPCClient, DownloadRPCServer
3 | 


--------------------------------------------------------------------------------
/nbdler/session.py:
--------------------------------------------------------------------------------
1 | 
2 | # TODO: DownloadSession
3 | 


--------------------------------------------------------------------------------
/nbdler/uri.py:
--------------------------------------------------------------------------------
  1 | from wsgiref.headers import Headers as _Headers
  2 | from urllib.parse import urlparse
  3 | from base64 import b64decode
  4 | from typing import AnyStr, Sequence, Optional, Union, Mapping
  5 | 
  6 | 
  7 | class Headers(_Headers):
  8 |     """ 解决wsgiref.headers键值类型不支持基于str的类型。
  9 |     强制转换成str，避免抛出异常。"""
 10 |     def __init__(self, headers: Optional[Union[Sequence, Mapping]]=None):
 11 |         if headers is None:
 12 |             headers = []
 13 |         elif isinstance(headers, dict):
 14 |             headers = list(headers.items())
 15 |         elif isinstance(headers, list):
 16 |             headers = list(headers)
 17 |         else:
 18 |             raise TypeError()
 19 | 
 20 |         super().__init__(headers)
 21 | 
 22 |     def _convert_string_type(self, value):
 23 |         """Convert/check value type."""
 24 |         if type(value) is str:
 25 |             return value
 26 | 
 27 |         return str(value)
 28 | 
 29 | 
 30 | class BaseURI:
 31 |     def __init__(self, uri: AnyStr, headers):
 32 |         self._uri = None
 33 |         self._urlparse = None
 34 | 
 35 |         self.headers = Headers(headers)
 36 |         self.uri = uri
 37 | 
 38 |     @property
 39 |     def uri(self):
 40 |         return self._uri
 41 | 
 42 |     @uri.setter
 43 |     def uri(self, value):
 44 |         self._uri = value
 45 |         self._urlparse = urlparse(value)
 46 | 
 47 |     @property
 48 |     def urlparse(self):
 49 |         return self._urlparse
 50 | 
 51 |     @property
 52 |     def hostname(self):
 53 |         if not self._urlparse:
 54 |             return None
 55 |         return self._urlparse.hostname
 56 | 
 57 |     @property
 58 |     def port(self):
 59 |         if not self._urlparse:
 60 |             return None
 61 |         return self._urlparse.port
 62 | 
 63 |     @property
 64 |     def path(self):
 65 |         if not self._urlparse:
 66 |             return None
 67 |         return self._urlparse.path
 68 | 
 69 |     @property
 70 |     def scheme(self):
 71 |         if not self._urlparse:
 72 |             return None
 73 |         return self._urlparse.scheme
 74 | 
 75 |     protocol = scheme
 76 | 
 77 |     @property
 78 |     def query(self):
 79 |         if not self._urlparse:
 80 |             return None
 81 |         return self._urlparse.query
 82 | 
 83 |     @property
 84 |     def netloc(self):
 85 |         if not self._urlparse:
 86 |             return None
 87 |         return self._urlparse.netloc
 88 | 
 89 | 
 90 | class SourceURI(BaseURI):
 91 |     def __init__(self, id, uri, headers, cookies=None, proxies=None,
 92 |                  max_conn=None, range_field=None, name=None, response=None, **kwargs):
 93 |         super(SourceURI, self).__init__(uri, headers)
 94 |         self.id = id
 95 |         self.cookies = cookies
 96 |         self.proxies = proxies
 97 |         self.max_conn = max_conn
 98 | 
 99 |         self.range_field = range_field
100 | 
101 |         self.name = name
102 |         self._response = URIResponse.loads(response) if response else None
103 |         self.kwargs = kwargs
104 | 
105 |     def getresponse(self):
106 |         return self._response
107 | 
108 |     def set_response(self, resp):
109 |         self._response = resp
110 | 
111 |     def dumps(self):
112 |         kwargs = {
113 |             'id': self.id,
114 |             'uri': self.uri,
115 |             'headers': self.headers.items(),
116 |             'cookies': self.cookies,
117 |             'proxies': self.proxies,
118 |             'max_conn': self.max_conn,
119 |             'range_field': self.range_field,
120 |             'name': self.name,
121 |             'response': self._response and self._response.dumps()
122 |         }
123 |         kwargs.update(self.kwargs)
124 |         return kwargs
125 | 
126 |     @classmethod
127 |     def loads(cls, dumpy):
128 |         return cls(**dumpy)
129 | 
130 |     def __repr__(self):
131 |         return f'<SourceURI id={self.id} name={self.name} uri="{self._uri}">'
132 | 
133 | 
134 | class URIResponse(BaseURI):
135 |     def __init__(self, uri,
136 |                  headers,
137 |                  code,
138 |                  msg,
139 |                  length,
140 |                  content_type,
141 |                  range,
142 |                  resume_capability,
143 |                  **kwargs):
144 |         """
145 |         Args:
146 |             uri: 响应URI
147 |             headers: 响应头
148 |             code: 响应代码
149 |             msg: 响应消息
150 |             length: 资源总长
151 |             content_type: 资源类型
152 |             range: 资源范围
153 |             resume_capability: 是否支持断点续传
154 |             **kwargs: 额外参数
155 |         """
156 |         super().__init__(uri, headers)
157 |         self.code = code
158 |         self.length = length
159 |         self.range = range
160 |         self.msg = msg
161 |         self.content_type = content_type
162 |         self.resume_capability = resume_capability
163 |         self.kwargs = kwargs
164 | 
165 |         # extract HTTP headers
166 |         self.etag = self.headers.get('etag')
167 |         self.date = self.headers.get('date')
168 |         self.last_modified = self.headers.get('last-modified')
169 |         self.content_range = self.headers.get('content-range')
170 |         self.content_md5 = self.headers.get('content-md5')
171 |         self.expires = self.headers.get('expires')
172 |         self.md5 = None
173 |         if self.content_md5:
174 |             # RFC1864
175 |             try:
176 |                 self.md5 = b64decode(self.content_md5).hex()
177 |             except:
178 |                 pass
179 | 
180 |     def dumps(self):
181 |         kwargs = {
182 |             'uri': self.uri,
183 |             'headers': list(self.headers.items()),
184 |             'code': self.code,
185 |             'length': self.length,
186 |             'range': self.range,
187 |             'content_type': self.content_type,
188 |             'msg': self.msg,
189 |             'resume_capability': self.resume_capability
190 |         }
191 |         kwargs.update(self.kwargs)
192 |         return kwargs
193 | 
194 |     @classmethod
195 |     def loads(cls, dumpy):
196 |         return cls(**dumpy)
197 | 
198 |     def __repr__(self):
199 |         return (f"<UriResponse [{self.code} {self.msg}]'{self.uri}' "
200 |                 f"range={self.range}, resume_capability={self.resume_capability}>")
201 | 
202 | 
203 | class URIs:
204 |     """ 下载源管理器。 """
205 |     def __init__(self):
206 |         self._uris = []
207 | 
208 |     def __len__(self):
209 |         return len(self._uris)
210 | 
211 |     def __getitem__(self, item):
212 |         return self._uris.__getitem__(item)
213 | 
214 |     def __iter__(self):
215 |         return iter(self._uris)
216 | 
217 |     def put(self, uri,
218 |             headers=None,
219 |             cookies=None,
220 |             proxies=None,
221 |             max_conn=None,
222 |             range_field=None,
223 |             name=None,
224 |             **kwargs):
225 |         """ 添加新的下载源。 """
226 |         put_id = self.__newid()
227 |         if name is None:
228 |             name = str(put_id)
229 |         src_url = SourceURI(put_id, uri, headers, cookies, proxies, max_conn, range_field, name, **kwargs)
230 | 
231 |         self._uris[put_id] = src_url
232 |         return self._uris[put_id]
233 | 
234 |     def __newid(self):
235 |         """ 内部用于生成新ID号的函数。 """
236 |         try:
237 |             index = self._uris.index(None)
238 |         except ValueError:
239 |             index = len(self._uris)
240 |             self._uris.append(None)
241 |         return index
242 | 
243 |     def dumps(self):
244 |         return [uri.dumps() for uri in self._uris]
245 | 
246 |     @classmethod
247 |     def loads(cls, dumpy):
248 |         uris = cls()
249 |         for uri in dumpy:
250 |             uris._uris.append(SourceURI(**uri))
251 |         return uris
252 | 
253 |     @classmethod
254 |     def load_from_source_uris(cls, source_uris: Sequence[SourceURI]):
255 |         uris = cls()
256 |         uris.import_uris(source_uris)
257 |         return uris
258 | 
259 |     def import_uris(self, source_uris: Sequence[SourceURI]):
260 |         for uri in source_uris:
261 |             self.put(uri.uri, uri.headers.items(), uri.cookies, uri.proxies, uri.max_conn,
262 |                      uri.range_field, uri.name, **uri.kwargs)
263 | 
264 |     def __repr__(self):
265 |         return f'<URIs {self._uris}>'
266 | 


--------------------------------------------------------------------------------
/nbdler/utils.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from collections import deque
  3 | import asyncio
  4 | from contextlib import contextmanager
  5 | import threading
  6 | from concurrent import futures
  7 | 
  8 | 
  9 | class UsageInfo:
 10 |     """ 用于记录使用信息。
 11 |     其中包括使用时长和使用速率。
 12 |     """
 13 | 
 14 |     __slots__ = '_fetch_length', '_previous_length', '_previous_time', '_start_time', '_moving_avg', 'rate'
 15 | 
 16 |     def __init__(self, fetch_length):
 17 |         self._fetch_length = fetch_length
 18 | 
 19 |         self._previous_length = fetch_length()
 20 |         self._start_time = time.time()
 21 |         self._previous_time = self._start_time
 22 |         self._moving_avg = deque([0 for _ in range(8)])
 23 |         self.rate = 0
 24 | 
 25 |     def reset(self):
 26 |         self._start_time = time.time()
 27 |         self._previous_time = self._start_time
 28 |         self._moving_avg = deque([0 for _ in range(8)])
 29 |         self.rate = 0
 30 | 
 31 |     def timelength(self):
 32 |         return time.time() - self._start_time
 33 | 
 34 |     def refresh(self):
 35 |         cur_time = time.time()
 36 |         cur_length = self._fetch_length()
 37 |         diff_time = cur_time - self._previous_time
 38 |         diff_length = cur_length - self._previous_length
 39 | 
 40 |         self._previous_length = cur_length
 41 |         self._previous_time = cur_time
 42 |         speed = diff_length / (diff_time or float('inf'))
 43 | 
 44 |         self._moving_avg.pop()
 45 |         self._moving_avg.appendleft(speed)
 46 | 
 47 |         self.rate = sum(self._moving_avg) / 8
 48 | 
 49 | 
 50 | def update_range_field(range_filed, target_range):
 51 |     """ 更新范围域。
 52 | 
 53 |     通过{}包含域定义，如{begin}
 54 |     可选域定义：
 55 |         begin: 范围开头。
 56 |         end: 范围结尾，不包括该值本身。
 57 |         end_with: 范围结尾，包括该值本身。
 58 |         length: 范围长度。
 59 | 
 60 |     示例：target_range=(2, 99)
 61 |     域值更新为：begin=2, end=99, end_with=98, length=97
 62 | 
 63 |     Args:
 64 |         range_filed: 范围域定义，可更新域有begin,end,end_with,length
 65 |         target_range: 要更新的范围值
 66 | 
 67 |     Returns:
 68 |         更新域值后的结果。
 69 |     """
 70 |     target_begin, target_end = target_range
 71 | 
 72 |     begin = target_begin
 73 |     if target_end is None or target_end == float('inf'):
 74 |         end = ''
 75 |         end_with = ''
 76 |         length = ''
 77 |     else:
 78 |         end = target_end
 79 |         if target_end > 0:
 80 |             end_with = target_end - 1
 81 |         else:
 82 |             end_with = ''
 83 |         length = end - begin
 84 |     return range_filed.format(
 85 |         begin=begin,
 86 |         end=end,
 87 |         end_with=end_with,
 88 |         length=length)
 89 | 
 90 | 
 91 | class _ExecutorEventLoopFuture:
 92 |     """ 在Executor中安全运行run_forever的事件循环的Future。 """
 93 |     def __init__(self, task_fut, loop_fut):
 94 |         self._loop = loop_fut
 95 |         self._task = task_fut
 96 | 
 97 |     def __await__(self):
 98 |         yield from asyncio.wrap_future(self._task)
 99 | 
100 |     def __iter__(self):
101 |         yield from asyncio.wrap_future(self._task)
102 | 
103 |     def get_loop(self):
104 |         return self._loop.result()
105 | 
106 |     async def aget_loop(self):
107 |         return await asyncio.wrap_future(self._loop)
108 | 
109 |     def join(self):
110 |         return self._task.result()
111 | 
112 |     async def ajoin(self):
113 |         return await asyncio.wrap_future(self._task)
114 | 
115 |     result = join
116 | 
117 |     aresult = ajoin
118 | 
119 |     def close(self):
120 |         loop = self.get_loop()
121 |         return loop.call_soon_threadsafe(loop.stop)
122 | 
123 |     async def aclose(self):
124 |         loop = await self.aget_loop()
125 |         fut = loop.call_soon_threadsafe(loop.stop)
126 |         return await asyncio.wrap_future(fut)
127 | 
128 |     def add_done_callback(self, __fn):
129 |         return self._task.add_done_callback(__fn)
130 | 
131 | 
132 | def forever_loop_in_executor(executor, loop=None):
133 |     """ 在concurrent.futures.thread.ThreadPoolExecutor中运行异步事件循环。
134 |     该事件循环线程只能使用loop.stop()方法停止。"""
135 |     def _run():
136 |         nonlocal loop, future_loop
137 |         if loop is None:
138 |             try:
139 |                 loop = asyncio.get_event_loop()
140 |                 if loop.is_closed():
141 |                     raise RuntimeError('new loop')
142 |             except RuntimeError:
143 |                 loop = asyncio.new_event_loop()
144 |             asyncio.set_event_loop(loop)
145 | 
146 |         future_loop.set_result(loop)
147 |         try:
148 |             loop.run_forever()
149 |         finally:
150 |             try:
151 |                 cancel_all_tasks(loop)
152 |                 loop.run_until_complete(loop.shutdown_asyncgens())
153 |             finally:
154 |                 loop.close()
155 | 
156 |     future_loop = futures.Future()
157 |     task_fut = executor.submit(_run)
158 |     return _ExecutorEventLoopFuture(task_fut, future_loop)
159 | 
160 | 
161 | def cancel_all_tasks(loop):
162 |     """ 关闭循环中剩余的所有任务。 """
163 |     # source from asyncio.runners._cancel_all_tasks
164 | 
165 |     to_cancel = asyncio.tasks.all_tasks(loop)
166 |     if not to_cancel:
167 |         return
168 | 
169 |     for task in to_cancel:
170 |         task.cancel()
171 | 
172 |     loop.run_until_complete(
173 |         asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
174 | 
175 |     for task in to_cancel:
176 |         if task.cancelled():
177 |             continue
178 |         if task.exception() is not None:
179 |             loop.call_exception_handler({
180 |                 'message': 'unhandled exception during asyncio.run() shutdown',
181 |                 'exception': task.exception(),
182 |                 'task': task,
183 |             })


--------------------------------------------------------------------------------
/nbdler/version.py:
--------------------------------------------------------------------------------
1 | 
2 | TITLE = 'Nbdler'
3 | DESCRIPTION = 'Python multi-client Downloader.'
4 | URL = 'https://github.com/ZSAIM/Nbdler'
5 | VERSION = '3.0.3'
6 | AUTHOR = 'ZSAIM'
7 | AUTHOR_EMAIL = 'zzsaim@163.com'
8 | LICENSE = 'Apache 2.0'
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from setuptools import setup, find_packages
 4 | import io
 5 | import os
 6 | 
 7 | here = os.path.abspath(os.path.dirname(__file__))
 8 | 
 9 | about = {}
10 | with open(os.path.join(here, 'nbdler', 'version.py'), 'r', encoding='utf-8') as f:
11 |     exec(f.read(), about)
12 | 
13 | with io.open('README.rst', 'r', encoding='utf-8') as readme:
14 |     long_description = readme.read()
15 | 
16 | install_requires = [
17 |     'aiohttp',
18 |     'requests'
19 | ]
20 | 
21 | 
22 | setup(
23 |     name=about['TITLE'],
24 |     version=about['VERSION'],
25 |     description=about['DESCRIPTION'],
26 |     long_description=long_description,
27 |     author=about['AUTHOR'],
28 |     author_email=about['AUTHOR_EMAIL'],
29 |     url=about['URL'],
30 |     license=about['LICENSE'],
31 |     classifiers=[
32 |             'Development Status :: 5 - Production/Stable',
33 |             'Intended Audience :: Developers',
34 |             'License :: OSI Approved :: Apache Software License',
35 |             'Programming Language :: Python',
36 |             'Programming Language :: Python :: 3',
37 |         ],
38 |     packages=find_packages(),
39 |     install_requires=install_requires,
40 | )


--------------------------------------------------------------------------------