├── .gitignore ├── LICENSE.txt ├── README.md ├── setup.py ├── siaslice.py └── tests ├── Makefile ├── mock_status.py ├── test_cases_that_cost_sc.py ├── test_misc.py └── test_siad.py /.gitignore: -------------------------------------------------------------------------------- 1 | tests/*.img 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Environments 103 | .env 104 | .venv 105 | env/ 106 | venv/ 107 | ENV/ 108 | env.bak/ 109 | venv.bak/ 110 | 111 | # Spyder project settings 112 | .spyderproject 113 | .spyproject 114 | 115 | # Rope project settings 116 | .ropeproject 117 | 118 | # mkdocs documentation 119 | /site 120 | 121 | # mypy 122 | .mypy_cache/ 123 | .dmypy.json 124 | dmypy.json 125 | 126 | # Pyre type checker 127 | .pyre/ 128 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Ryan Young 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sia Slice 2 | 3 | ...is a small program that maintains a mirror of a large file on the 4 | [Sia](https://sia.tech) decentralized storage network. The envisioned use case 5 | is storing a long-term, low-cost disk image, with periodic updates, for backups 6 | and archives on The Cloud™. 7 | 8 | The basic idea behind Sia Slice is to chop up a single block device (or other 9 | large, monolithic file) into thousands of 80MiB chunks, LZMA-compress those 10 | chunks, and then upload them to Sia. That way, the next time you sync, you only 11 | have to upload the chunks that have changed since the last sync. Sync operations 12 | always construct a complete and identical mirror; there is no history, and there 13 | are no "full" or "incremental" snapshots. This minimizes both the complexity of 14 | the program and the storage requirements on Sia. 15 | 16 | Besides, if you need those features, you can simply use any filesystem you 17 | choose on top of the original device or disk image. This ability is what makes 18 | Sia Slice unique: In contrast to other synchronization programs like 19 | [Siasync](https://github.com/tbenz9/siasync) and 20 | [Repertory](https://bitbucket.org/blockstorage/repertory/src/master/) that 21 | operate at the file level, Sia Sice operates at the *block level*. 22 | 23 | ![Curses screenshot](https://raw.githubusercontent.com/wiki/YoRyan/sia-slice/transfer-screen.png) 24 | 25 | ![Sia screenshot](https://raw.githubusercontent.com/wiki/YoRyan/sia-slice/sia-ui-screen.png) 26 | 27 | Sia Slice was written for GNU/Linux systems with Python 3.7 or later. Ports to 28 | other platforms should be possible with minimal effort. 29 | 30 | The author uses Sia Slice weekly to mirror his Btrfs backup drive. 31 | 32 | ### Installation 33 | 34 | ``` 35 | pip install https://github.com/YoRyan/sia-slice 36 | ``` 37 | 38 | ### Usage 39 | 40 | You must provide the API password required to communicate with Sia, either 41 | through the command line or by setting `$SIA_API_PASSWORD`. I recommend using 42 | the environment variable: 43 | 44 | ``` 45 | export SIA_API_PASSWORD=xxxxxx 46 | ``` 47 | 48 | To copy the contents of `/dev/sdb1` to a new Sia folder at `/backupdrive`: 49 | 50 | ``` 51 | siaslice --mirror /dev/sdb1 backupdrive 52 | ``` 53 | 54 | (To sync again, just run the same command. Sia Slice will locate the previous 55 | uploads and determine which blocks need to be re-uploaded.) 56 | 57 | To download all Sia Slice data from `/backupdrive` and reassemble it on 58 | `/dev/sdc1`: 59 | 60 | ``` 61 | siaslice --download /dev/sdc1 backupdrive 62 | ``` 63 | 64 | Finally, Sia Slice writes a timestamped state file that can be used to resume 65 | any mirror or download operation in the event of a program crash, network 66 | interruption, or other catastrophic event. 67 | 68 | To resume a stalled mirror operation: 69 | 70 | ``` 71 | siaslice --resume siaslice-mirror-20191024-1522.dat 72 | ``` 73 | 74 | ### Notes 75 | 76 | Sia is an emerging technology, and I jumped through a lot of hoops to write this 77 | software. For the curious, I have written a companion 78 | [blog post](https://youngryan.com/2019/10/introducing-sia-slice-my-absurdly-cheap-block-storage-solution/). 79 | 80 | For contributors and forkers, some tests are located in tests/. 81 | ``` 82 | pip install -e . 83 | pip install asynctest 84 | cd tests/ 85 | python -m unittest ... 86 | ``` 87 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name='siaslice', 5 | version='0.0', 6 | packages=find_packages(), 7 | include_package_data=True, 8 | zip_safe=False, 9 | install_requires=[ 10 | 'aiofile', 11 | 'aiohttp' 12 | ], 13 | entry_points={ 14 | 'console_scripts': [ 15 | 'siaslice=siaslice:main' 16 | ] 17 | } 18 | ) 19 | -------------------------------------------------------------------------------- /siaslice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """A program to sync a large file to Sia with incremental updates.""" 5 | 6 | 7 | __author__ = 'Ryan Young' 8 | __contact__ = 'ryan@youngryan.com' 9 | __copyright__ = 'Copyright 2019' 10 | __license__ = 'MIT' 11 | __version__ = '1.0' 12 | 13 | 14 | import asyncio 15 | import curses 16 | import os 17 | import pickle 18 | import re 19 | from argparse import ArgumentParser 20 | from collections import namedtuple 21 | from datetime import datetime, timedelta, timezone 22 | from hashlib import md5 23 | from io import DEFAULT_BUFFER_SIZE 24 | from lzma import LZMACompressor, LZMADecompressor 25 | from sys import stderr 26 | from types import AsyncGeneratorType, GeneratorType 27 | 28 | import aiofile 29 | import aiohttp 30 | 31 | 32 | DEFAULT_BLOCK_MB = 80 33 | TRANSFER_STALLED_MIN = 3*60 34 | 35 | OpStatus = namedtuple('OpStatus', ['transfers', 'current_index', 36 | 'last_index', 'block_size']) 37 | 38 | class SiadError(Exception): 39 | def __init__(self, status, fields): 40 | super().__init__(self) 41 | self.status = status 42 | self.message = fields.get('message', '') 43 | self.fields = {key: value for key, value 44 | in fields.items() if key != 'message'} 45 | def __str__(self): 46 | return f'<[{self.status}] {self.message}>' 47 | def __repr__(self): 48 | return self.__str__() 49 | 50 | 51 | class SiadSession(): 52 | USER_AGENT = 'Sia-Agent' 53 | MAX_CONCURRENT_UPLOADS = 1 54 | MAX_CONCURRENT_DOWNLOADS = 10 55 | 56 | def __init__(self, domain, api_password, debug=False): 57 | self._client = None 58 | self._domain = domain 59 | self._api_password = api_password 60 | self._debug = debug 61 | self._upload_sem = asyncio.BoundedSemaphore( 62 | value=SiadSession.MAX_CONCURRENT_UPLOADS) 63 | self._download_sem = asyncio.BoundedSemaphore( 64 | value=SiadSession.MAX_CONCURRENT_DOWNLOADS) 65 | 66 | async def open(self): 67 | self._client = aiohttp.ClientSession( 68 | auth=aiohttp.BasicAuth('', password=self._api_password), 69 | timeout=aiohttp.ClientTimeout(total=None)) 70 | async def __aenter__(self): 71 | await self.open() 72 | return self 73 | 74 | async def close(self): 75 | await self._client.close() 76 | async def __aexit__(self, exc_type, exc, tb): 77 | await self.close() 78 | 79 | async def get(self, *path, **qs): 80 | headers = {'User-Agent': SiadSession.USER_AGENT} 81 | response = await self._client.get(f"{self._domain}/{'/'.join(path)}", 82 | params=qs, headers=headers) 83 | if self._debug: 84 | print(f"[{response.status}] GET /{'/'.join(path)} {qs}", file=stderr) 85 | if response.status >= 400 and response.status < 600: 86 | raise SiadError(response.status, await response.json()) 87 | else: 88 | return response 89 | 90 | async def post(self, data, *path, **qs): 91 | headers = {'User-Agent': SiadSession.USER_AGENT} 92 | response = await self._client.post(f"{self._domain}/{'/'.join(path)}", 93 | data=data, params=qs, headers=headers) 94 | if self._debug: 95 | print(f"[{response.status}] POST /{'/'.join(path)} {qs}", file=stderr) 96 | if response.status >= 400 and response.status < 600: 97 | raise SiadError(response.status, await response.json()) 98 | else: 99 | return response 100 | 101 | async def upload(self, siapath, data): 102 | part_siapath = siapath[:-1] + (f'{siapath[-1]}.part',) 103 | async with self._upload_sem: 104 | await self.post(data, 'renter', 'uploadstream', *part_siapath) 105 | await self.post(b'', 'renter', 'rename', *part_siapath, 106 | newsiapath=format_sp(siapath)) 107 | 108 | async def download(self, siapath, readsize=DEFAULT_BUFFER_SIZE): 109 | async with self._download_sem: 110 | response = await self.get('renter', 'stream', *siapath) 111 | while True: 112 | chunk = await response.content.read(readsize) 113 | if chunk: 114 | yield chunk 115 | else: 116 | break 117 | 118 | async def validate_path(self, sp): 119 | try: 120 | await self.post(b'', 'renter', 'validatesiapath', sp) 121 | except SiadError as err: 122 | if err.status == 400: 123 | return False 124 | else: 125 | raise err 126 | else: 127 | return True 128 | 129 | def parse_time(ts): 130 | match = re.search(r'^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(\.\d+)' 131 | r'(-|\+)(\d\d):(\d\d)$', ts) 132 | if not match: 133 | raise ValueError(f'invalid sia timestamp: {ts}') 134 | tz_s = 1 if match.group(8) == '+' else -1 135 | tz = timezone(timedelta(hours=tz_s*int(match.group(9)), 136 | minutes=tz_s*int(match.group(10)))) 137 | return datetime(int(match.group(1)), # year 138 | int(match.group(2)), # month 139 | int(match.group(3)), # day 140 | hour=int(match.group(4)), 141 | minute=int(match.group(5)), 142 | second=int(match.group(6)), 143 | microsecond=round(float(match.group(7))*1000000), 144 | tzinfo=tz) 145 | 146 | 147 | class SiapathStorage(): 148 | _BlockFile = namedtuple('_BlockFile', ['siapath', 'md5_hash', 'size', 'partial', 149 | 'complete', 'stalled', 'upload_progress']) 150 | 151 | def __init__(self, session, *siapath, 152 | default_block_size=DEFAULT_BLOCK_MB*1000*1000): 153 | self._session = session 154 | self._siapath = siapath 155 | self.block_size = default_block_size 156 | self.block_files = {} 157 | 158 | async def update(self): 159 | try: 160 | response = await self._session.get('renter', 'dir', *self._siapath) 161 | except SiadError: 162 | siafiles = [] 163 | else: 164 | siafiles = (await response.json()).get('files', []) 165 | 166 | block_size = self.block_size 167 | block_files = {} 168 | now = datetime.now(timezone.utc) 169 | for siafile in siafiles: 170 | file_match = re.search( 171 | r'/siaslice\.(\d+)MiB\.(\d+)\.([a-z\d]+)\.lz(\.part)?$', 172 | siafile['siapath']) 173 | if not file_match: 174 | continue 175 | 176 | file_index = int(file_match.group(2)) 177 | if file_index in block_files: 178 | raise ValueError(f'duplicate files found for block {file_index}') 179 | 180 | file_block_size = int(file_match.group(1))*1000*1000 181 | if not block_size: 182 | block_size = file_block_size 183 | elif block_size != file_block_size: 184 | raise ValueError(f'inconsistent block sizes at {siafile.siapath} - ' 185 | f'found {file_block_size}B, expected {block_size}B') 186 | 187 | file_md5_hash = file_match.group(3) 188 | file_partial = file_match.group(4) is not None 189 | 190 | file_age = now - SiadSession.parse_time(siafile['createtime']) 191 | block_files[file_index] = SiapathStorage._BlockFile( 192 | siapath=tuple(siafile['siapath'].split('/')), 193 | md5_hash=file_md5_hash, 194 | size=siafile['filesize'], 195 | partial=file_partial, 196 | complete=siafile['available'], 197 | stalled=((not siafile['available'] or file_partial) 198 | and file_age/timedelta(minutes=1) >= TRANSFER_STALLED_MIN), 199 | upload_progress=siafile['uploadprogress']/100.0) 200 | self.block_size = block_size 201 | self.block_files = block_files 202 | 203 | async def delete(self, index): 204 | if index in self.block_files: 205 | siapath = self.block_files[index].siapath 206 | await self._session.post(b'', 'renter', 'delete', *siapath) 207 | else: 208 | raise FileNotFoundError 209 | await self.update() 210 | 211 | async def upload(self, index, md5_hash, data, overwrite=False): 212 | filename = f'siaslice.{format_bs(self.block_size)}.{index}.{md5_hash}.lz' 213 | if index in self.block_files: 214 | if overwrite: 215 | try: 216 | await self.delete(index) 217 | except FileNotFoundError: 218 | pass 219 | else: 220 | raise FileExistsError 221 | await self._session.upload(self._siapath + (filename,), data) 222 | await self.update() 223 | 224 | async def download(self, index): 225 | await self.update() 226 | if index not in self.block_files: 227 | raise FileNotFoundError 228 | block_file = self.block_files[index] 229 | if block_file.partial or not block_file.complete: 230 | raise FileNotFoundError 231 | 232 | loop = asyncio.get_running_loop() 233 | lz = LZMADecompressor() 234 | async for chunk in self._session.download(block_file.siapath): 235 | yield await loop.run_in_executor(None, lz.decompress, chunk) 236 | 237 | 238 | def main(): 239 | argp = ArgumentParser( 240 | description='Sync a large file to Sia with incremental updates.') 241 | argp.add_argument( 242 | '-v', '--version', action='version', version=f'%(prog)s {__version__}') 243 | argp_op = argp.add_mutually_exclusive_group(required=True) 244 | argp_op.add_argument('-m', '--mirror', action='store_true', 245 | help='sync a copy to Sia') 246 | argp_op.add_argument('-d', '--download', action='store_true', 247 | help='reconstruct a copy using Sia') 248 | argp_op.add_argument( 249 | '-r', '--resume', action='store_true', 250 | help='resume a stalled operation with the provided state file') 251 | argp.add_argument('-b', '--block', default=f'{DEFAULT_BLOCK_MB}', type=int, 252 | help=('set block size in MiB for initial sync (not applicable ' 253 | f'to other operations; default: {DEFAULT_BLOCK_MB})')) 254 | argp.add_argument('-a', '--api', default='http://localhost:9980', 255 | help=('the HTTP endpoint to communicate with Sia ' 256 | "(default: 'http://localhost:9980')")) 257 | argp.add_argument('-p', '--password', 258 | default=os.environ.get('SIA_API_PASSWORD', ''), 259 | help=('the API password to communicate with Sia ' 260 | "(default: read from $SIA_API_PASSWORD)")) 261 | argp.add_argument('-t', '--text', action='store_true', 262 | help='don\'t display the curses interface') 263 | argp.add_argument('--debug', action='store_true', 264 | help='log API calls to stderr') 265 | argp.add_argument('file', help=('file target for uploads, source for ' 266 | 'downloads, or state to resume from')) 267 | argp.add_argument( 268 | 'siapath', nargs='?', 269 | help='Sia directory target for uploads or source for downloads') 270 | args = argp.parse_args() 271 | def start(stdscr): 272 | nonlocal args 273 | asyncio.run(amain(args, stdscr=stdscr)) 274 | if args.text: 275 | start(None) 276 | else: 277 | curses.wrapper(start) 278 | 279 | 280 | async def amain(args, stdscr=None): 281 | async with SiadSession(args.api, args.password, debug=args.debug) as session: 282 | async def siapath(): 283 | if not args.siapath: 284 | raise ValueError('no siapath specified') 285 | if not await session.validate_path(args.siapath): 286 | raise ValueError(f'invalid siapath: {args.siapath}') 287 | return tuple(args.siapath.split('/')) 288 | if args.mirror: 289 | await do_mirror(session, args.file, await siapath(), 290 | block_size=args.block*1000*1000, stdscr=stdscr) 291 | elif args.download: 292 | await do_download(session, args.file, await siapath(), stdscr=stdscr) 293 | elif args.resume: 294 | async with aiofile.AIOFile(args.file, 'rb') as state_afp: 295 | state_pickle = pickle.loads(await state_afp.read()) 296 | if 'siaslice-mirror' in args.file: 297 | await do_mirror( 298 | session, state_pickle['source_file'], state_pickle['siapath'], 299 | start_block=state_pickle['current_index'], 300 | block_size=state_pickle['block_size'], stdscr=stdscr) 301 | elif 'siaslice-download' in args.file: 302 | await do_download( 303 | session, state_pickle['target_file'], state_pickle['siapath'], 304 | start_block=state_pickle['start_block'], stdscr=stdscr) 305 | else: 306 | raise ValueError(f'bad state file: {args.file}') 307 | 308 | 309 | async def do_mirror(session, source_file, siapath, start_block=0, 310 | block_size=DEFAULT_BLOCK_MB*1000*1000, stdscr=None): 311 | storage = SiapathStorage(session, *siapath, default_block_size=block_size) 312 | await storage.update() 313 | 314 | state_file = f"siaslice-mirror-{datetime.now().strftime('%Y%m%d-%H%M')}.dat" 315 | async with aiofile.AIOFile(state_file, mode='wb') as state_afp, \ 316 | aiofile.AIOFile(source_file, mode='rb') as source_afp: 317 | async for status in siapath_mirror(storage, source_afp, 318 | start_block=start_block): 319 | await state_afp.write(pickle.dumps({ 320 | 'source_file': source_file, 321 | 'siapath': siapath, 322 | 'block_size': block_size, 323 | 'current_index': status.current_index})) 324 | await state_afp.fsync() 325 | show_status(stdscr, status, 326 | title=f'{source_file} -> {format_sp(siapath)}') 327 | os.remove(state_file) 328 | 329 | 330 | async def siapath_mirror(storage, source_afp, start_block=0): 331 | current_index = 0 332 | transfers = {} 333 | status = asyncio.Condition() 334 | 335 | async def read(): 336 | nonlocal schedule_reads 337 | async for index in schedule_reads(): 338 | pos = index*storage.block_size 339 | eof = await source_afp.read(1, offset=pos) == b'' 340 | if eof: 341 | break 342 | 343 | def region_agen(): return region_read(source_afp, pos, 344 | storage.block_size) 345 | if await is_zeroes(region_agen()): 346 | try: 347 | await storage.delete(index) 348 | except FileNotFoundError: 349 | pass 350 | else: 351 | md5_hash = await md5_hasher(region_agen()) 352 | block_file = storage.block_files.get(index, None) 353 | if (block_file is None 354 | or block_file.md5_hash != md5_hash 355 | or block_file.partial 356 | or block_file.stalled): 357 | await storage.upload( 358 | index, md5_hash, lzma_compress(region_agen()), 359 | overwrite=True) 360 | 361 | async def schedule_reads(): 362 | nonlocal status, current_index 363 | linear_index = start_block 364 | while True: 365 | reupload = next( 366 | (index for index, bf in storage.block_files.items() 367 | if index < linear_index and (bf.partial or bf.stalled)), None) 368 | if reupload is not None: 369 | index = reupload 370 | else: 371 | index = linear_index 372 | linear_index += 1 373 | 374 | async with status: 375 | current_index = index 376 | status.notify() 377 | yield index 378 | 379 | async def watch_storage(): 380 | nonlocal status, transfers, current_index, read_task 381 | uploads_done = False 382 | while True: 383 | await asyncio.sleep(5) 384 | await storage.update() 385 | async with status: 386 | transfers = {index: bf.upload_progress for index, bf 387 | in storage.block_files.items() 388 | if not bf.complete or bf.partial} 389 | status.notify() 390 | 391 | uploads_done = transfers == {} 392 | if uploads_done and read_task.done(): 393 | async with status: 394 | status.notify() 395 | break 396 | 397 | read_task = asyncio.create_task(read()) 398 | watch_task = asyncio.create_task(watch_storage()) 399 | last_block = int(os.stat(source_afp.fileno()).st_size//storage.block_size) 400 | async with status: 401 | while not read_task.done(): 402 | await status.wait() 403 | yield OpStatus( 404 | transfers=transfers, last_index=last_block, 405 | current_index=current_index, block_size=storage.block_size) 406 | await read_task 407 | await watch_task 408 | 409 | # Trim extraneous blocks in the event of a shrunken source. 410 | # Can be *dangerous* if the user made a mistake, so wait a minute first. 411 | trim_indices = (index for index in storage.block_files.keys() 412 | if index > current_index) 413 | to_trim = next(trim_indices, None) 414 | if to_trim is not None: 415 | await asyncio.sleep(60) 416 | await storage.delete(to_trim) 417 | for to_trim in trim_indices: 418 | await storage.delete(to_trim) 419 | 420 | 421 | async def region_read(afp, start, max_length, readsize=DEFAULT_BUFFER_SIZE): 422 | ptr = start 423 | end = start + max_length 424 | while ptr < end: 425 | chunk = await afp.read(min(readsize, end - ptr), offset=ptr) 426 | if chunk: 427 | yield chunk 428 | ptr += len(chunk) 429 | else: 430 | break 431 | 432 | 433 | async def is_zeroes(abytesgen): 434 | async for chunk in abytesgen: 435 | if chunk.count(0) != len(chunk): 436 | return False 437 | return True 438 | 439 | 440 | async def md5_hasher(adata): 441 | loop = asyncio.get_running_loop() 442 | hasher = md5() 443 | async for chunk in adata: 444 | await loop.run_in_executor(None, hasher.update, chunk) 445 | return await loop.run_in_executor(None, hasher.hexdigest) 446 | 447 | 448 | async def lzma_compress(adata): 449 | loop = asyncio.get_running_loop() 450 | lz = LZMACompressor() 451 | async for chunk in adata: 452 | yield await loop.run_in_executor(None, lz.compress, chunk) 453 | yield await loop.run_in_executor(None, lz.flush) 454 | 455 | 456 | async def do_download(session, target_file, siapath, start_block=0, stdscr=None): 457 | storage = SiapathStorage(session, *siapath) 458 | await storage.update() 459 | 460 | try: 461 | target_afp = aiofile.AIOFile(target_file, mode='r+b') 462 | await target_afp.open() 463 | except FileNotFoundError: 464 | target_afp = aiofile.AIOFile(target_file, mode='wb') 465 | await target_afp.open() 466 | 467 | state_file = f"siaslice-download-{datetime.now().strftime('%Y%m%d-%H%M')}.dat" 468 | async with aiofile.AIOFile(state_file, mode='wb') as state_afp: 469 | async for status in siapath_download(storage, target_afp, 470 | start_block=start_block): 471 | await state_afp.write(pickle.dumps({ 472 | 'target_file': target_file, 473 | 'siapath': siapath, 474 | 'current_index': status.current_index})) 475 | await state_afp.fsync() 476 | show_status(stdscr, status, 477 | title=f'{format_sp(siapath)} -> {target_file}') 478 | target_afp.close() 479 | os.remove(state_file) 480 | 481 | 482 | async def siapath_download(storage, target_afp, start_block=0): 483 | current_index = 0 484 | transfers = {} 485 | status = asyncio.Condition() 486 | 487 | async def parallel_download(): 488 | nonlocal status, transfers, download 489 | for index, block_file in storage.block_files.items(): 490 | async with status: 491 | transfers[index] = 0.0 492 | status.notify() 493 | yield download(index, block_file) 494 | 495 | async def download(index, block_file): 496 | nonlocal status, transfers, current_index 497 | written = 0 498 | async for chunk in storage.download(index): 499 | await target_afp.write(chunk, offset=index*storage.block_size + written) 500 | written += len(chunk) 501 | async with status: 502 | transfers[index] = (written/block_file.size 503 | if block_file.size > 0 else 0.0) 504 | status.notify() 505 | async with status: 506 | del transfers[index] 507 | current_index = min(transfers.keys()) if transfers != {} else index 508 | status.notify() 509 | 510 | download_task = asyncio.create_task( 511 | await_all(limit_concurrency((task async for task in parallel_download()), 512 | SiadSession.MAX_CONCURRENT_DOWNLOADS))) 513 | async def wait_for_complete(task): 514 | nonlocal status 515 | await task 516 | async with status: 517 | status.notify() 518 | wait_task = asyncio.create_task(wait_for_complete(download_task)) 519 | async with status: 520 | while not download_task.done(): 521 | await status.wait() 522 | yield OpStatus(transfers=transfers, current_index=current_index, 523 | last_index=len(storage.block_files) - 1, 524 | block_size=storage.block_size) 525 | await wait_task 526 | 527 | 528 | def format_bs(block_size): 529 | n = int(block_size/1e3/1e3) 530 | if n >= 10*1000*1000: 531 | return f'{round(n/1000/1000, 3)}TiB' 532 | if n >= 10*1000: 533 | return f'{round(n/1000, 3)}GiB' 534 | else: 535 | return f'{n}MiB' 536 | 537 | def format_sp(siapath): return '/'.join(siapath) 538 | 539 | 540 | def show_status(stdscr, status, title=''): 541 | if stdscr is None: 542 | show_text_status(status, title=title) 543 | else: 544 | show_curses_status(stdscr, status, title=title) 545 | 546 | def show_text_status(status, title=''): 547 | from json import dump as jdump 548 | from sys import stdout 549 | 550 | jtransfers = [{'block': key, 'progress': value} 551 | for key, value in sorted(status.transfers.items())] 552 | jdump({'title': title, 553 | 'current_index': status.current_index, 554 | 'last_index': status.last_index, 555 | 'transfers': jtransfers}, stdout) 556 | print() 557 | 558 | def show_curses_status(stdscr, status, title=''): 559 | stdscr.refresh() 560 | lines, cols = stdscr.getmaxyx() 561 | curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_WHITE) 562 | 563 | filepos = format_bs(status.current_index*status.block_size) 564 | if status.last_index > 0: 565 | blocks = f'block {status.current_index} / {status.last_index} ({filepos})' 566 | else: 567 | blocks = f'block {status.current_index} ({filepos})' 568 | stdscr.insstr(0, 0, ' '*cols, curses.color_pair(1)) 569 | stdscr.insstr(0, 0, title[:cols], curses.color_pair(1)) 570 | stdscr.insstr(0, max(cols - len(blocks) - 1, 0), ' ' + blocks, 571 | curses.color_pair(1)) 572 | 573 | visible_transfers = min(len(status.transfers), lines - 1) 574 | transfers = sorted(status.transfers.items())[-visible_transfers:] 575 | def progress_bar(y, block, pct): 576 | bar_size = max(cols - 11 - 4 - 2, 0) 577 | n_done = max(round(pct*bar_size), 1) 578 | stdscr.insstr(y, 0, f'{block: 10} ') 579 | stdscr.insstr(y, 11, f"[{'='*(n_done - 1)}>{' '*(bar_size - n_done)}]") 580 | if pct >= 1.0: 581 | stdscr.insstr(y, cols - 4, ' '*4) 582 | else: 583 | stdscr.insstr(y, cols - 4, f'{round(pct*100.0): 3}%') 584 | for l in range(1, lines): 585 | try: 586 | progress_bar(l, *transfers[l - 1]) 587 | except IndexError: 588 | stdscr.insstr(l, 0, ' '*cols) 589 | 590 | stdscr.refresh() 591 | 592 | 593 | def limit_concurrency(generator, limit): 594 | sem = asyncio.BoundedSemaphore(value=limit) 595 | async def wrap_sync(the_gen): 596 | nonlocal sem 597 | for cor in the_gen: 598 | await sem.acquire() 599 | yield finish_task(cor) 600 | async def wrap_async(the_gen): 601 | nonlocal sem 602 | async for cor in the_gen: 603 | await sem.acquire() 604 | yield finish_task(cor) 605 | async def finish_task(the_cor): 606 | nonlocal sem 607 | await the_cor 608 | sem.release() 609 | if isinstance(generator, GeneratorType): 610 | return wrap_sync(generator) 611 | elif isinstance(generator, AsyncGeneratorType): 612 | return wrap_async(generator) 613 | 614 | 615 | async def await_all(generator): 616 | if isinstance(generator, GeneratorType): 617 | await asyncio.gather(*generator) 618 | elif isinstance(generator, AsyncGeneratorType): 619 | running = 0 620 | cv = asyncio.Condition() 621 | 622 | async def finish_task(the_cor): 623 | nonlocal running, cv 624 | await the_cor 625 | running -= 1 626 | async with cv: 627 | cv.notify() 628 | async for cor in generator: 629 | running += 1 630 | asyncio.create_task(finish_task(cor)) 631 | async with cv: 632 | await cv.wait_for(lambda: running == 0) 633 | else: 634 | raise ValueError(f'not a generator: {generator}') 635 | 636 | 637 | if __name__ == '__main__': 638 | main() 639 | 640 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | all : empty.img 40MiBempty.img 20Kstripe.img 40Mstripe.img 2 | 3 | empty.img : 4 | touch $@ 5 | 6 | 40MiBempty.img : 7 | dd if=/dev/zero of=$@ bs=1000000 count=40 8 | -------------------------------------------------------------------------------- /tests/mock_status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from random import randint, random 4 | from time import sleep 5 | 6 | from curses import wrapper 7 | 8 | import siaslice as ss 9 | 10 | 11 | if __name__ == '__main__': 12 | def mockup(last): 13 | transfers = { 0: 0.0, 1: 1.0 } 14 | transfers.update(dict((i, random()) for i in range(2, last))) 15 | return ss.OpStatus(transfers=transfers, current_index=randint(0, last), 16 | last_index=last, block_size=100*1000*1000) 17 | mockups = [mockup(5), mockup(10), mockup(50)] 18 | 19 | def curses(stdscr): 20 | for i, status in enumerate(mockups): 21 | ss.show_status(stdscr, status, f'progress screen #{i + 1}') 22 | sleep(2) 23 | wrapper(curses) 24 | 25 | def text(): 26 | for i, status in enumerate(mockups): 27 | ss.show_status(None, status, f'progress screen #{i + 1}') 28 | text() 29 | 30 | -------------------------------------------------------------------------------- /tests/test_cases_that_cost_sc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import asynctest 4 | from aiofile import AIOFile 5 | 6 | import siaslice as ss 7 | 8 | 9 | DOMAIN = 'http://localhost:9980' 10 | API_PASS = os.environ['SIA_API_PASSWORD'] 11 | 12 | 13 | class TestSiaOperations(asynctest.TestCase): 14 | TEST_DIR = 'siaslice_test_dir_abcd1234' 15 | 16 | async def setUp(self): 17 | self.session = ss.SiadSession(DOMAIN, API_PASS) 18 | await self.session.open() 19 | 20 | async def tearDown(self): 21 | await self.session.close() 22 | 23 | async def test_delete_block(self): 24 | await self.session.post( 25 | b'', 'renter', 'uploadstream', 26 | TestSiaOperations.TEST_DIR, 'siaslice.40MiB.69.x.lz') 27 | storage = ss.SiapathStorage(self.session, TestSiaOperations.TEST_DIR) 28 | await storage.update() 29 | 30 | await storage.delete(69) 31 | self.assertNotIn(69, storage.block_files) 32 | 33 | async def test_read_hashes(self): 34 | await self.session.post(b'', 'renter', 'uploadstream', 35 | TestSiaOperations.TEST_DIR, 'siaslice.40MiB.0.x.lz') 36 | await self.session.post(b'', 'renter', 'uploadstream', 37 | TestSiaOperations.TEST_DIR, 'siaslice.40MiB.1.y.lz') 38 | await self.session.post(b'', 'renter', 'uploadstream', 39 | TestSiaOperations.TEST_DIR, 'siaslice.40MiB.2.z.lz') 40 | storage = ss.SiapathStorage(self.session, TestSiaOperations.TEST_DIR) 41 | await storage.update() 42 | 43 | storage_hashes = [storage.block_files[i].md5_hash for i in range(3)] 44 | self.assertEqual(storage_hashes, ['x', 'y', 'z']) 45 | 46 | await storage.delete(0) 47 | await storage.delete(1) 48 | await storage.delete(2) 49 | 50 | async def test_sia_mirror_1_block(self): 51 | storage = ss.SiapathStorage(self.session, TestSiaOperations.TEST_DIR, 52 | default_block_size=40*1000*1000) 53 | await storage.update() 54 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 55 | reference = await afp.read() 56 | async for status in ss.siapath_mirror(storage, afp): 57 | pass 58 | 59 | uploaded = b'' 60 | async for chunk in storage.download(0): 61 | uploaded += chunk 62 | self.assertEqual(uploaded, reference) 63 | 64 | await storage.delete(0) 65 | 66 | async def test_sia_mirror_2_blocks(self): 67 | storage = ss.SiapathStorage(self.session, TestSiaOperations.TEST_DIR, 68 | default_block_size=20*1000*1000) 69 | await storage.update() 70 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 71 | reference = await afp.read() 72 | async for status in ss.siapath_mirror(storage, afp): 73 | pass 74 | 75 | uploaded = b'' 76 | async for chunk in storage.download(0): 77 | uploaded += chunk 78 | async for chunk in storage.download(1): 79 | uploaded += chunk 80 | self.assertEqual(uploaded, reference) 81 | 82 | await storage.delete(0) 83 | await storage.delete(1) 84 | 85 | async def test_sia_download_1_block(self): 86 | storage = ss.SiapathStorage(self.session, TestSiaOperations.TEST_DIR, 87 | default_block_size=40*1000*1000) 88 | await storage.update() 89 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 90 | reference = await afp.read() 91 | async for status in ss.siapath_mirror(storage, afp): 92 | pass 93 | 94 | async with AIOFile('test_download.img', 'wb') as afp: 95 | async for status in ss.siapath_download(storage, afp): 96 | pass 97 | with open('test_download.img', 'rb') as fp: 98 | downloaded = fp.read() 99 | os.remove('test_download.img') 100 | self.assertEqual(downloaded, reference) 101 | 102 | await storage.delete(0) 103 | 104 | async def test_sia_download_2_blocks(self): 105 | storage = ss.SiapathStorage(self.session, TestSiaOperations.TEST_DIR, 106 | default_block_size=20*1000*1000) 107 | await storage.update() 108 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 109 | reference = await afp.read() 110 | async for status in ss.siapath_mirror(storage, afp): 111 | pass 112 | 113 | async with AIOFile('test_download.img', 'wb') as afp: 114 | async for status in ss.siapath_download(storage, afp): 115 | pass 116 | with open('test_download.img', 'rb') as fp: 117 | downloaded = fp.read() 118 | os.remove('test_download.img') 119 | self.assertEqual(downloaded, reference) 120 | 121 | await storage.delete(0) 122 | await storage.delete(1) 123 | 124 | 125 | if __name__ == '__main__': 126 | asynctest.main() 127 | 128 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | from asyncio import sleep 2 | from hashlib import md5 3 | from lzma import compress 4 | 5 | import asynctest 6 | from aiofile import AIOFile 7 | 8 | import siaslice as ss 9 | 10 | 11 | class TestTaskGenerator(asynctest.TestCase): 12 | 13 | async def test_await_all(self): 14 | mock = asynctest.CoroutineMock() 15 | async def cor(): 16 | await mock(1) 17 | await ss.await_all(cor() for i in range(10)) 18 | mock.assert_has_awaits([asynctest.call(1)]*10) 19 | 20 | async def test_limit_concurrency(self): 21 | mock = asynctest.CoroutineMock() 22 | v = 0 23 | async def cor(): 24 | nonlocal v 25 | new_v = v + 1 26 | await sleep(1) 27 | v = new_v 28 | await mock(v) 29 | await ss.await_all(ss.limit_concurrency((cor() for i in range(10)), 1)) 30 | mock.assert_has_awaits([asynctest.call(i) for i in range(1, 11)]) 31 | 32 | 33 | class TestGenerators(asynctest.TestCase): 34 | 35 | async def test_afp_generator(self): 36 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 37 | reference = await afp.read() 38 | read = b''.join([chunk async for chunk 39 | in ss.region_read(afp, 0, 40*1000*1000)]) 40 | self.assertEqual(read, reference) 41 | 42 | async def test_is_zeroes(self): 43 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 44 | self.assertTrue(await ss.is_zeroes(ss.region_read(afp, 0, 40*1000*1000))) 45 | 46 | async def test_is_not_zeroes(self): 47 | async def agen(gen): 48 | for x in gen: 49 | yield x 50 | chunks = [b'\0\0\0\0', b'\0\0\0\0', b'\0\0\0X', b'\0\0\0\0'] 51 | self.assertFalse(await ss.is_zeroes(agen(iter(chunks)))) 52 | 53 | async def test_md5_hasher(self): 54 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 55 | reference = md5(await afp.read()).hexdigest() 56 | compare = await ss.md5_hasher(ss.region_read(afp, 0, 40*1000*1000)) 57 | self.assertEqual(compare, reference) 58 | 59 | async def test_lzma_compress(self): 60 | async with AIOFile('40MiBempty.img', mode='rb') as afp: 61 | reference = compress(await afp.read()) 62 | agen = ss.region_read(afp, 0, 40*1000*1000) 63 | compare = b''.join([chunk async for chunk in ss.lzma_compress(agen)]) 64 | self.assertEqual(compare, reference) 65 | 66 | 67 | if __name__ == '__main__': 68 | asynctest.main() 69 | 70 | -------------------------------------------------------------------------------- /tests/test_siad.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import asynctest 4 | 5 | import siaslice as ss 6 | 7 | 8 | DOMAIN = 'http://localhost:9980' 9 | API_PASS = environ['SIA_API_PASSWORD'] 10 | 11 | 12 | class GetRequest(asynctest.TestCase): 13 | 14 | async def setUp(self): 15 | self.session = ss.SiadSession(DOMAIN, API_PASS) 16 | await self.session.open() 17 | 18 | async def tearDown(self): 19 | await self.session.close() 20 | 21 | async def test_version_check(self): 22 | response = await self.session.get('daemon', 'version') 23 | self.assertEqual(response.status, 200) 24 | self.assertIn('version', await response.json()) 25 | 26 | async def test_stream_nonexistent_file(self): 27 | with self.assertRaises(ss.SiadError) as err: 28 | await self.session.get('renter', 'stream', 'siaslice_test_file_abcd1234') 29 | self.assertEqual(err.status, 500) 30 | 31 | 32 | class PostRequest(asynctest.TestCase): 33 | 34 | async def setUp(self): 35 | self.session = ss.SiadSession(DOMAIN, API_PASS) 36 | await self.session.open() 37 | 38 | async def tearDown(self): 39 | await self.session.close() 40 | 41 | async def test_valid_siapath(self): 42 | response = await self.session.post(b'', 'renter', 'validatesiapath', 43 | 'this', 'is', 'a', 'valid', '$iapath') 44 | self.assertEqual(response.status, 204) 45 | self.assertEqual(await response.text(), '') 46 | 47 | async def test_invalid_siapath(self): 48 | with self.assertRaises(ss.SiadError) as err: 49 | await self.session.post(b'', 'renter', 'validatesiapath', '') 50 | self.assertEqual(err.status, 400) 51 | 52 | 53 | if __name__ == '__main__': 54 | asynctest.main() 55 | 56 | --------------------------------------------------------------------------------