├── rq ├── .gitignore ├── COPYING ├── setup.py ├── libraptorq ├── __main__.py └── __init__.py └── README.rst /rq: -------------------------------------------------------------------------------- 1 | libraptorq/__main__.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /*.egg-info 2 | /build 3 | /dist 4 | *.pyc 5 | *.pyo 6 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2012 Mike Kazantsev 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | #-*- coding: utf-8 -*- 3 | 4 | from setuptools import setup, find_packages 5 | import os, sys 6 | 7 | # Error-handling here is to allow package to be built w/o README included 8 | try: 9 | readme = open(os.path.join( 10 | os.path.dirname(__file__), 'README.rst' )).read() 11 | except IOError: readme = '' 12 | 13 | setup( 14 | 15 | name = 'libraptorq', 16 | version = '18.4.0', 17 | author = 'Mike Kazantsev', 18 | author_email = 'mk.fraggod@gmail.com', 19 | license = 'WTFPL', 20 | keywords = [ 21 | 'fec', 'forward', 'error', 'correction', 'fountain', 'code', 22 | 'rateless', 'erasure', 'codes', 'raptor', 'raptorq', 'libraptorq', 23 | 'lossy', 'reliable', 'encoding', 'rate', 'parity', 'redundancy', 'reliability' ], 24 | 25 | url = 'http://github.com/mk-fg/python-libraptorq', 26 | 27 | description = 'Python CFFI bindings for libRaptorQ' 28 | ' (RaptorQ RFC6330 FEC implementation).', 29 | long_description = readme, 30 | 31 | classifiers = [ 32 | 'Development Status :: 4 - Beta', 33 | 'Environment :: Console', 34 | 'Environment :: No Input/Output (Daemon)', 35 | 'Intended Audience :: Developers', 36 | 'Intended Audience :: End Users/Desktop', 37 | 'Intended Audience :: Information Technology', 38 | 'Intended Audience :: Science/Research', 39 | 'Intended Audience :: Telecommunications Industry', 40 | 'License :: Public Domain', 41 | 'Operating System :: POSIX', 42 | 'Operating System :: POSIX :: Linux', 43 | 'Programming Language :: Python', 44 | 'Programming Language :: Python :: 2.7', 45 | 'Programming Language :: Python :: 2 :: Only', 46 | 'Topic :: Communications', 47 | 'Topic :: Internet', 48 | 'Topic :: System :: Archiving', 49 | 'Topic :: Utilities' ], 50 | 51 | install_requires = ['cffi', 'setuptools'], 52 | 53 | packages = find_packages(), 54 | include_package_data = True, 55 | 56 | entry_points = { 57 | 'console_scripts': ['rq = libraptorq.__main__:main'] }) 58 | -------------------------------------------------------------------------------- /libraptorq/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | from __future__ import print_function 3 | 4 | import itertools as it, operator as op, functools as ft 5 | from os.path import dirname, basename, exists, isdir, join, abspath 6 | import os, sys, types, math, json, base64, hashlib, logging, time 7 | 8 | 9 | try: import libraptorq 10 | except ImportError: 11 | # Make sure tool works from a checkout 12 | if __name__ != '__main__': raise 13 | pkg_root = abspath(dirname(__file__)) 14 | for pkg_root in pkg_root, dirname(pkg_root): 15 | if isdir(join(pkg_root, 'libraptorq'))\ 16 | and exists(join(pkg_root, 'setup.py')): 17 | sys.path.insert(0, dirname(__file__)) 18 | try: import libraptorq 19 | except ImportError: pass 20 | else: break 21 | else: raise ImportError('Failed to find/import "libraptorq" module') 22 | 23 | from libraptorq import RQEncoder, RQDecoder, RQError 24 | 25 | 26 | sys.stdout, sys.stderr = ( 27 | os.fdopen(s.fileno(), 'wb', 0) for s in [sys.stdout, sys.stderr] ) 28 | p = lambda fmt,*a,**k:\ 29 | print(*( [fmt.format(*a,**k)]\ 30 | if isinstance(fmt, types.StringTypes) and (a or k) 31 | else [[fmt] + list(a), k] ), file=sys.stderr) 32 | 33 | b64_encode = base64.urlsafe_b64encode 34 | b64_decode = lambda s:\ 35 | base64.urlsafe_b64decode(bytes(s))\ 36 | if '-' in s or '_' in s else bytes(s).decode('base64') 37 | 38 | num_fmt = lambda n: '{:,}'.format(n) 39 | 40 | def _timer_iter(): 41 | ts0 = time.time() 42 | while True: 43 | ts = time.time() 44 | ts_diff, ts0 = ts - ts0, ts 45 | yield ts_diff 46 | 47 | def timer_iter(): 48 | timer = _timer_iter() 49 | next(timer) 50 | return timer 51 | 52 | 53 | class EncDecFailure(Exception): pass 54 | 55 | def encode(opts, data): 56 | data_len, data_sha256 = len(data), hashlib.sha256(data).digest() 57 | if data_len % 4: data += '\0' * (4 - data_len % 4) 58 | timer = timer_iter() 59 | with RQEncoder( data, 60 | opts.subsymbol_size, opts.symbol_size, opts.max_memory ) as enc: 61 | log.debug('Initialized RQEncoder (%.3fs)...', next(timer)) 62 | oti_scheme, oti_common = enc.oti_scheme, enc.oti_common 63 | if not opts.no_precompute: 64 | enc.precompute(opts.threads, background=False) 65 | log.debug('Precomputed blocks (%.3fs)...', next(timer)) 66 | 67 | symbols, enc_k, n_drop = list(), 0, 0 68 | for block in enc: 69 | enc_k += block.symbols # not including repair ones 70 | block_syms = list(block.encode_iter( 71 | repair_rate=opts.repair_symbols_rate )) 72 | if opts.drop_rate > 0: 73 | import random 74 | n_drop_block = int(round(len(block_syms) * opts.drop_rate, 0)) 75 | for n in xrange(n_drop_block): 76 | block_syms[int(random.random() * len(block_syms))] = None 77 | n_drop += n_drop_block 78 | symbols.extend(block_syms) 79 | log.debug('Finished encoding symbols (%s blocks, %.3fs)...', enc.blocks, next(timer)) 80 | log.debug('Closed RQEncoder (%.3fs)...', next(timer)) 81 | 82 | symbols = filter(None, symbols) 83 | if log.isEnabledFor(logging.DEBUG): 84 | log.debug( 85 | 'Encoded %s B into %s symbols (needed: >%s, repair rate:' 86 | ' %d%%), %s dropped (%d%%), %s left in output (%s B without ids)', 87 | num_fmt(data_len), num_fmt(len(symbols) + n_drop), 88 | num_fmt(enc_k), opts.repair_symbols_rate*100, 89 | num_fmt(n_drop), opts.drop_rate*100, num_fmt(len(symbols)), 90 | num_fmt(sum(len(s[1]) for s in symbols)) ) 91 | 92 | return dict( data_bytes=data_len, 93 | oti_scheme=oti_scheme, oti_common=oti_common, 94 | symbols=list((s[0], b64_encode(s[1])) for s in symbols), 95 | checksums=dict(sha256=b64_encode(data_sha256)) ) 96 | 97 | 98 | def decode(opts, data): 99 | data_dec = _decode(opts, data) 100 | if data['data_bytes'] != len(data_dec): 101 | raise EncDecFailure( 102 | 'Data length mismatch - {} B encoded vs {} B decoded' 103 | .format(num_fmt(data['data_bytes']), num_fmt(len(data_dec))) ) 104 | data_chk = data.get('checksums', dict()) 105 | for k, v in data_chk.viewitems(): 106 | v = b64_decode(v) 107 | if getattr(hashlib, k)(data_dec).digest() != v: 108 | raise EncDecFailure('Data checksum ({}) mismatch'.format(k)) 109 | return data_dec 110 | 111 | def _decode(opts, data): 112 | n_syms, n_syms_total, n_sym_bytes = 0, len(data['symbols']), 0 113 | if ( not data['symbols'] # zero-input/zero-output case 114 | and data['oti_common'] == data['oti_scheme'] == 0 ): return '' 115 | timer = timer_iter() 116 | with RQDecoder(data['oti_common'], data['oti_scheme']) as dec: 117 | log.debug('Initialized RQDecoder (%.3fs)...', next(timer)) 118 | err = 'no symbols available' 119 | for sym_id, sym in data['symbols']: 120 | sym_id, sym = int(sym_id), b64_decode(sym) 121 | try: dec.add_symbol(sym, sym_id) 122 | except RQError as err: continue 123 | n_syms, n_sym_bytes = n_syms + 1, n_sym_bytes + len(sym) 124 | try: data = dec.decode()[:data['data_bytes']] # strips \0 padding to rq block size 125 | except RQError as err: pass 126 | else: 127 | log.debug('Decoded enough symbols to recover data (%.3fs)...', next(timer)) 128 | break 129 | else: 130 | raise EncDecFailure(( 'Faled to decode data from {}' 131 | ' total symbols (processed: {}) - {}' ).format(n_syms_total, n_syms, err)) 132 | log.debug('Closed RQDecoder (%.3fs)...', next(timer)) 133 | if log.isEnabledFor(logging.DEBUG): 134 | log.debug( 135 | 'Decoded %s B of data from %s processed' 136 | ' symbols (%s B without ids, symbols total: %s)', 137 | num_fmt(len(data)), num_fmt(n_syms), 138 | num_fmt(n_sym_bytes), num_fmt(n_syms_total) ) 139 | return data 140 | 141 | 142 | def main(args=None, error_func=None): 143 | import argparse 144 | parser = argparse.ArgumentParser( 145 | description='Encode/decode data using RaptorQ rateless' 146 | ' erasure encoding ("fountain code") algorithm, using libRaptorQ through CFFI.') 147 | parser.add_argument('--debug', action='store_true', help='Verbose operation mode.') 148 | cmds = parser.add_subparsers( dest='cmd', 149 | title='Supported operations (have their own suboptions as well)' ) 150 | 151 | 152 | cmd = cmds.add_parser('encode', 153 | help='Encode file into chunks and dump these along with OTI parameters as a JSON structure.') 154 | cmd.add_argument('path_src', nargs='?', 155 | help='Path to a file which contents should be encoded. Stdin will be used, if not specified.') 156 | cmd.add_argument('path_dst', nargs='?', 157 | help='Path to write resulting JSON to. Will be dumped to stdout, if not specified.') 158 | 159 | cmd.add_argument('--no-precompute', action='store_true', 160 | help='Do not run precompute() synchronously before encoding symbols.' 161 | ' Should be much slower, so probably only useful for benchmarking or debugging.') 162 | cmd.add_argument('-j', '--threads', 163 | type=int, metavar='n', 164 | help='Number of encoder threads to use. 0 to scale to all cpus (default).') 165 | cmd.add_argument('-k', '--subsymbol-size', 166 | type=int, metavar='bytes', 167 | help='Should almost always be equal to symbol size.' 168 | ' See RFC6330 for details. Set to value of symbols size if not specified.') 169 | cmd.add_argument('-s', '--symbol-size', 170 | required=True, type=int, metavar='bytes', 171 | help='Size of each indivisible (must either be' 172 | ' present intact or lost entirely when decoding) symbol in the output.' 173 | ' Using wrong value here (for data size) can result in undecodable output.' 174 | ' See RFC6330 or libRaptorQ code/docs for more information.' 175 | ' Must be specified manually.') 176 | cmd.add_argument('-m', '--max-memory', 177 | required=True, type=int, metavar='int', 178 | help='Value for working memory of the decoder,' 179 | ' see RFC6330 or libRaptorQ code/docs for more information.' 180 | ' Raise it if encoding fails to produce valid (decodable) data.' 181 | ' Must be specified manually.') 182 | 183 | cmd.add_argument('-n', '--repair-symbols-rate', 184 | required=True, type=float, metavar='float', 185 | help='Fraction of extra symbols to generate above what is required' 186 | ' to reassemble to file as a fraction of that "required" count.' 187 | ' For example, if 100 symbols are required, "-n 0.5" will generate 150 symbols.' 188 | ' Must be specified manually.') 189 | 190 | cmd.add_argument('-d', '--drop-rate', 191 | default=0, type=float, metavar='0-1.0', 192 | help='Drop specified randomly-picked fraction' 193 | ' of symbols encoded for each block (incl. ones for repair).' 194 | ' I.e. just discard these right after encoding. Mainly useful for testing.') 195 | 196 | 197 | cmd = cmds.add_parser('decode', help='Decode lines of base64 into a file.') 198 | cmd.add_argument('path_src', nargs='?', 199 | help='Path to a file with JSON structure, such as produced by "encode" operation.' 200 | ' Stdin will be used, if not specified.') 201 | cmd.add_argument('path_dst', nargs='?', 202 | help='Path to write assembled file to. Will be dumped to stdout, if not specified.') 203 | 204 | 205 | opts = parser.parse_args(sys.argv[1:] if args is None else args) 206 | 207 | global log 208 | logging.basicConfig( 209 | format='%(asctime)s :: %(levelname)s :: %(message)s', 210 | datefmt='%Y-%m-%d %H:%M:%S', 211 | level=logging.DEBUG if opts.debug else logging.WARNING ) 212 | log = logging.getLogger() 213 | 214 | src = sys.stdin if not opts.path_src else open(opts.path_src, 'rb') 215 | try: data = src.read() 216 | finally: src.close() 217 | 218 | try: 219 | if opts.cmd == 'encode': 220 | if not opts.subsymbol_size: opts.subsymbol_size = opts.symbol_size 221 | try: data = encode(opts, data) 222 | except RQError as err: raise EncDecFailure(str(err)) 223 | data = json.dumps(data, sort_keys=True, indent=2, separators=(',', ': ')) 224 | elif opts.cmd == 'decode': 225 | data = decode(opts, json.loads(data)) 226 | else: raise NotImplementedError(opts.cmd) 227 | except EncDecFailure as err: 228 | log.error('Operation failed - %s', err) 229 | return 1 230 | 231 | if data is not None: 232 | dst = sys.stdout if not opts.path_dst else open(opts.path_dst, 'wb') 233 | try: dst.write(data) 234 | finally: dst.close() 235 | 236 | 237 | if __name__ == '__main__': sys.exit(main()) 238 | -------------------------------------------------------------------------------- /libraptorq/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import itertools as it, operator as op, functools as ft 5 | import math 6 | 7 | from cffi import FFI 8 | import ctypes.util 9 | 10 | 11 | def _add_lib_wrappers(funcs=None, props=None): 12 | def make_ctx_func(func_name): 13 | ctx_fn = 'rq_{}'.format(func_name) 14 | def _ctx_func(self, *args): 15 | return getattr(self, ctx_fn)(*args) 16 | return _ctx_func 17 | def _add_wrappers(cls_name, cls_parents, cls_attrs): 18 | ctx_funcs, ctx_props = funcs or list(), props or list() 19 | for fn in it.chain(ctx_props, ctx_funcs): 20 | k = fn.lower() 21 | if k in cls_attrs: continue 22 | func = make_ctx_func(fn) 23 | if fn in ctx_props: func = property(func) 24 | cls_attrs[k] = func 25 | return type(cls_name, cls_parents, cls_attrs) 26 | return _add_wrappers 27 | 28 | 29 | class RQError(Exception): pass 30 | 31 | class RQObject(object): 32 | 33 | _cdefs = ''' 34 | typedef uint64_t RaptorQ_OTI_Common_Data; 35 | typedef uint32_t RaptorQ_OTI_Scheme_Specific_Data; 36 | 37 | typedef enum { 38 | NONE = 0, 39 | ENC_8 = 1, ENC_16 = 2, ENC_32 = 3, ENC_64 = 4, 40 | DEC_8 = 5, DEC_16 = 6, DEC_32 = 7, DEC_64 = 8 41 | } RaptorQ_type; 42 | 43 | struct RaptorQ_ptr; 44 | 45 | struct RaptorQ_ptr* RaptorQ_Enc ( 46 | const RaptorQ_type type, 47 | void *data, 48 | const uint64_t size, 49 | const uint16_t min_subsymbol_size, 50 | const uint16_t symbol_size, 51 | const size_t max_memory); 52 | 53 | struct RaptorQ_ptr* RaptorQ_Dec ( 54 | const RaptorQ_type type, 55 | const RaptorQ_OTI_Common_Data common, 56 | const RaptorQ_OTI_Scheme_Specific_Data scheme); 57 | 58 | // Encoding 59 | 60 | RaptorQ_OTI_Common_Data RaptorQ_OTI_Common (struct RaptorQ_ptr *enc); 61 | RaptorQ_OTI_Scheme_Specific_Data RaptorQ_OTI_Scheme (struct RaptorQ_ptr *enc); 62 | 63 | uint16_t RaptorQ_symbol_size (struct RaptorQ_ptr *ptr); 64 | uint8_t RaptorQ_blocks (struct RaptorQ_ptr *ptr); 65 | uint32_t RaptorQ_block_size (struct RaptorQ_ptr *ptr, const uint8_t sbn); 66 | uint16_t RaptorQ_symbols (struct RaptorQ_ptr *ptr, const uint8_t sbn); 67 | uint32_t RaptorQ_max_repair (struct RaptorQ_ptr *enc, const uint8_t sbn); 68 | size_t RaptorQ_precompute_max_memory (struct RaptorQ_ptr *enc); 69 | 70 | void RaptorQ_precompute ( 71 | struct RaptorQ_ptr *enc, 72 | const uint8_t threads, 73 | const bool background); 74 | 75 | uint64_t RaptorQ_encode_id ( 76 | struct RaptorQ_ptr *enc, 77 | void **data, 78 | const uint64_t size, 79 | const uint32_t id); 80 | uint64_t RaptorQ_encode ( 81 | struct RaptorQ_ptr *enc, 82 | void **data, 83 | const uint64_t size, 84 | const uint32_t esi, 85 | const uint8_t sbn); 86 | uint32_t RaptorQ_id (const uint32_t esi, const uint8_t sbn); 87 | 88 | // Decoding 89 | 90 | uint64_t RaptorQ_bytes (struct RaptorQ_ptr *dec); 91 | 92 | uint64_t RaptorQ_decode ( 93 | struct RaptorQ_ptr *dec, 94 | void **data, 95 | const size_t size); 96 | uint64_t RaptorQ_decode_block ( 97 | struct RaptorQ_ptr *dec, 98 | void **data, 99 | const size_t size, 100 | const uint8_t sbn); 101 | 102 | bool RaptorQ_add_symbol_id ( 103 | struct RaptorQ_ptr *dec, 104 | void **data, 105 | const uint32_t size, 106 | const uint32_t id); 107 | bool RaptorQ_add_symbol ( 108 | struct RaptorQ_ptr *dec, 109 | void **data, 110 | const uint32_t size, 111 | const uint32_t esi, 112 | const uint8_t sbn); 113 | 114 | // General: free memory 115 | 116 | void RaptorQ_free (struct RaptorQ_ptr **ptr); 117 | void RaptorQ_free_block (struct RaptorQ_ptr *ptr, const uint8_t sbn); 118 | ''' 119 | _ctx = None 120 | 121 | data_size_div, _rq_type, _rq_blk = 4, 32, 'uint32_t' 122 | 123 | def __init__(self): 124 | self._ffi = FFI() 125 | self._ffi.cdef(self._cdefs) 126 | # self.ffi.set_source('_rq', '#include ') 127 | lib_name = ctypes.util.find_library('RaptorQ') # newer cffi should not do that automatically 128 | self._lib = self._ffi.dlopen(lib_name) # ABI mode for simplicity 129 | self.rq_types = ( ['NONE', None] 130 | + list('ENC_{}'.format(2**n) for n in xrange(3, 7)) 131 | + list('DEC_{}'.format(2**n) for n in xrange(3, 7)) ) 132 | self._rq_blk_size = self.data_size_div 133 | 134 | def rq_type_val(self, v, pre): 135 | if isinstance(v, int) or v.isdigit(): v = '{}_{}'.format(pre, v).upper() 136 | else: v = bytes(v).upper() 137 | assert v in self.rq_types, [v, self.rq_types] 138 | return getattr(self._lib, v) 139 | 140 | def __getattr__(self, k): 141 | if k.startswith('rq_'): 142 | if not self._ctx: raise RuntimeError('ContextManager not initialized or already freed') 143 | return ft.partial(getattr(self._lib, 'RaptorQ_{}'.format(k[3:])), self._ctx) 144 | return self.__getattribute__(k) 145 | 146 | def open(self): 147 | self._ctx = self._ctx_init[0](*self._ctx_init[1]) 148 | return self._ctx 149 | 150 | def close(self): 151 | if self._ctx: 152 | ptr = self._ffi.new('struct RaptorQ_ptr **') 153 | ptr[0] = self._ctx 154 | self._lib.RaptorQ_free(ptr) 155 | self._ctx = None 156 | 157 | def __enter__(self): 158 | self.open() 159 | return self 160 | def __exit__(self, *err): self.close() 161 | def __del__(self): self.close() 162 | 163 | 164 | def sym_id(self, esi, sbn): return self._lib.RaptorQ_id(esi, sbn) 165 | 166 | _sym_n = None 167 | def _sym_buff(self, init=None): 168 | if not self._sym_n: self._sym_n = self.symbol_size / self._rq_blk_size 169 | buff = self._ffi.new('{}[]'.format(self._rq_blk), self._sym_n) 170 | buff_ptr = self._ffi.new('void **', buff) 171 | buff_raw = self._ffi.buffer(buff) 172 | if init: buff_raw[:] = init 173 | return buff_ptr, lambda: bytes(buff_raw) 174 | 175 | 176 | class RQEncoder(RQObject): 177 | 178 | __metaclass__ = _add_lib_wrappers( 179 | props=[ 'symbol_size', 'blocks', 'bytes', 180 | 'precompute_max_memory', 'OTI_Common', 'OTI_Scheme' ], 181 | funcs=['block_size', 'symbols', 'free_block', 'max_repair'] ) 182 | 183 | def __init__(self, data, min_subsymbol_size, symbol_size, max_memory, init_check=True): 184 | super(RQEncoder, self).__init__() 185 | self._sym_n = symbol_size / self._rq_blk_size 186 | assert len(data) % self._rq_blk_size == 0, len(data) 187 | rq_len = len(data) // self._rq_blk_size 188 | self._ctx_init_check = init_check 189 | self._ctx_init = self._lib.RaptorQ_Enc,\ 190 | [ self.rq_type_val(self._rq_type, 'enc'), data, rq_len, 191 | min_subsymbol_size, symbol_size, max_memory ] 192 | 193 | def open(self): 194 | super(RQEncoder, self).open() 195 | if self._ctx_init_check and self.oti_scheme == self.oti_common == 0: 196 | raise RQError('Failed to initialize' 197 | ' encoder for specified data and encoding parameters') 198 | return self._ctx 199 | 200 | def precompute(self, n_threads=None, background=False): 201 | return self.rq_precompute(n_threads or 0, background) 202 | 203 | def encode(self, sym_id=None, esi=None, sbn=None): 204 | buff_ptr, buff_get = self._sym_buff() 205 | if sym_id is not None: 206 | n = self.rq_encode_id(buff_ptr, self._sym_n, sym_id) 207 | elif esi is not None and sbn is not None: 208 | n = self.rq_encode(buff_ptr, self._sym_n, esi, sbn) 209 | else: raise ValueError(sym_id, esi, sbn) 210 | if n != self._sym_n: raise RQError('Failure when creating the symbol') 211 | return buff_get() 212 | 213 | def encode_block_iter(self): 214 | for sbn in xrange(self.blocks): 215 | yield RQEncoderBlock(self, sbn) 216 | 217 | def __iter__(self): return self.encode_block_iter() 218 | 219 | 220 | class RQEncoderBlock(object): 221 | 222 | symbol_size = property(lambda s: s.encoder.symbol_size) 223 | symbols = property(lambda s: s.encoder.symbols(s.sbn)) 224 | max_repair = property(lambda s: s.encoder.max_repair(s.sbn)) 225 | 226 | def __init__(self, encoder, sbn): 227 | self.encoder, self.sbn = encoder, sbn 228 | 229 | def encode(self, esi): 230 | sym_id = self.encoder.sym_id(esi, self.sbn) 231 | return sym_id, self.encoder.encode(sym_id) 232 | 233 | def encode_iter(self, repair_rate=None, repair_count_max=None): 234 | n = self.symbols 235 | nr = int(math.ceil(n * repair_rate) if repair_rate is not None else 0) 236 | nr = min(nr, self.max_repair) 237 | if repair_count_max is not None: nr = min(nr, repair_count_max) 238 | for esi in xrange(n + nr): yield self.encode(esi) 239 | 240 | def __iter__(self): return self.encode_iter() 241 | 242 | 243 | class RQDecoder(RQObject): 244 | 245 | __metaclass__ = _add_lib_wrappers( 246 | props=['symbol_size', 'blocks', 'bytes'], 247 | funcs=['block_size', 'symbols', 'max_repair'] ) 248 | 249 | def __init__(self, oti_common, oti_scheme): 250 | super(RQDecoder, self).__init__() 251 | self._ctx_init = self._lib.RaptorQ_Dec,\ 252 | [self.rq_type_val(self._rq_type, 'dec'), oti_common, oti_scheme] 253 | 254 | def __enter__(self): 255 | super(RQDecoder, self).__enter__() 256 | self._sym_n = self.symbol_size / self._rq_blk_size 257 | return self 258 | 259 | def add_symbol(self, sym, sym_id=None, esi=None, sbn=None): 260 | buff_ptr, buff_get = self._sym_buff(sym) 261 | assert len(sym) == self._sym_n * self._rq_blk_size 262 | if sym_id is not None: 263 | chk = self.rq_add_symbol_id(buff_ptr, self._sym_n, sym_id) 264 | elif esi is not None and sbn is not None: 265 | chk = self.rq_add_symbol(buff_ptr, self._sym_n, esi, sbn) 266 | else: raise ValueError(sym_id, esi, sbn) 267 | if not chk: 268 | raise RQError( 'Failed to decode symbol' 269 | ' (id: {}/{}/{}, data: {!r})'.format(sym_id, esi, sbn, sym) ) 270 | 271 | def _block_buff(self, bs): 272 | buff_n = int(math.ceil(bs / float(self._rq_blk_size))) 273 | buff = self._ffi.new('{}[]'.format(self._rq_blk), buff_n) 274 | buff_ptr = self._ffi.new('void **', buff) 275 | return buff_n, buff_ptr, lambda n: bytes(self._ffi.buffer(buff, n)) 276 | 277 | def decode(self, partial=False): 278 | buff_n, buff_ptr, buff_get = self._block_buff(self.bytes) 279 | n = self.rq_decode(buff_ptr, buff_n) 280 | if not partial and n != buff_n: 281 | raise RQError('Failed to decode data - not enough symbols received') 282 | return buff_get(n * self._rq_blk_size) 283 | 284 | def decode_block(self, sbn, partial=False): 285 | buff_n, buff_ptr, buff_get = self._block_buff(self.block_size(sbn)) 286 | n = self.rq_decode_block(buff_ptr, buff_n, sbn) 287 | if not partial and n != buff_n: 288 | raise RQError('Failed to decode data - not enough symbols received') 289 | return buff_get(n * self._rq_blk_size) 290 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | python-libraptorq 2 | ================= 3 | 4 | Python 2.X CFFI_ bindings for libRaptorQ_ v0.1.x - C++11 implementation of 5 | RaptorQ Forward Error Correction codes, as described in RFC6330_. 6 | 7 | **Warning**: Using libRaptorQ RFC6330 API (which this module wraps around) 8 | properly requires knowledge of some concepts and parameters described in that 9 | RFC, and not using correct ones may result in undecodable data! 10 | See "Usage" section below for more details. 11 | 12 | **Warning**: As far as I know (not a lawyer), there are lots of patents around 13 | the use of this technology, which might be important for any high-profile and 14 | commercial projects, especially in US and Canada. 15 | 16 | | 17 | 18 | .. contents:: 19 | :backlinks: none 20 | 21 | .. _CFFI: http://cffi.readthedocs.org/ 22 | .. _libRaptorQ: https://www.fenrirproject.org/Luker/libRaptorQ/wikis/home 23 | .. _RFC6330: https://tools.ietf.org/html/rfc6330 24 | 25 | 26 | 27 | General info 28 | ------------ 29 | 30 | Quoting `wikipedia on Raptor code`_: 31 | 32 | Raptor codes, as with fountain codes in general, encode a given message 33 | consisting of a number of symbols, k, into a potentially limitless sequence of 34 | encoding symbols such that knowledge of any k or more encoding symbols allows 35 | the message to be recovered with some non-zero probability. 36 | 37 | Raptor ("RAPid TORnado") codes are the first known class of fountain codes 38 | with linear time encoding and decoding. 39 | 40 | And RFC6330_: 41 | 42 | RaptorQ codes are a new family of codes that provide superior flexibility, 43 | support for larger source block sizes, and better coding efficiency than 44 | Raptor codes in RFC 5053. 45 | 46 | ... in most cases, a set of cardinality equal to the number of source symbols 47 | is sufficient; in rare cases, a set of cardinality slightly more than the 48 | number of source symbols is required. 49 | 50 | In practice this means that source data block of size 1 MiB (for example) can 51 | (with very high probability) be recovered from any 1.002 MiB of the received 52 | symbols for it (from `"Application Layer Forward Error Correction for Mobile 53 | Multimedia Broadcasting Case Study" paper`_). 54 | 55 | Note that being a probablilistic algorithm, RaptorQ can have highly-improbable 56 | pathological cases and be exploited through these e.g. by dropping specific data 57 | blocks (see `"Stopping a Rapid Tornado with a Puff" paper`_ for more details). 58 | 59 | Encoded data will be roughly same size as original plus the "repair symbols", 60 | i.e. almost no size overhead, except for what is intentionally generated. 61 | 62 | .. _wikipedia on Raptor code: https://en.wikipedia.org/wiki/Raptor_code 63 | .. _"Application Layer Forward Error Correction for Mobile Multimedia Broadcasting Case Study" paper: 64 | https://www.qualcomm.com/media/documents/files/raptor-codes-for-mobile-multimedia-broadcasting-case-study.pdf 65 | .. _"Stopping a Rapid Tornado with a Puff" paper: http://jmsalopes.com/pubs/sp.pdf 66 | 67 | 68 | 69 | Usage 70 | ----- 71 | 72 | Module includes command-line script ("rq", when installed or as symlink in the 73 | repo), which has example code for both encoding and decoding, and can be used as 74 | a standalone tool, or for basic algorithm testing/showcase. 75 | 76 | Can also be used from command-line via ``python2 -m libraptorq ...`` invocation 77 | (when installed as module), e.g. ``python2 -m libraptorq --help``. 78 | 79 | **Important**: With current 0.1.x libRaptorQ API, specifying unsuitable 80 | parameters for encoding, such as having symbol_size=16 and max_memory=200 for 81 | encoding 200K+ of data WILL result in **silently** producing encoded data that 82 | **cannot be decoded**. 83 | 84 | 85 | Command-line script 86 | ''''''''''''''''''' 87 | 88 | Note: it's just an example/testing script to run and check if module works with 89 | specific parameters or see how to use it, don't rely on it as a production tool 90 | or anything like that. 91 | 92 | To encode file, with 50% extra symbols (resulting indivisible data chunks to be 93 | stored/transmitted intact or lost entirely) and 30% of total from these (K 94 | required symbols + X repair symbols) dropped (for testing purposes) before 95 | saving them to "setup.py.enc":: 96 | 97 | % ./rq --debug encode -s16 -m200 --repair-symbols-rate 0.5 --drop-rate 0.3 setup.py setup.py.enc 98 | Initialized RQEncoder (0.063s)... 99 | Precomputed blocks (0.002s)... 100 | Finished encoding symbols (9 blocks, 0.008s)... 101 | Closed RQEncoder (0.002s)... 102 | Encoded 1,721 B into 167 symbols (needed: >108, repair rate: 50%), 103 | 45 dropped (30%), 122 left in output (1,952 B without ids) 104 | 105 | Decode original file back from these:: 106 | 107 | % ./rq --debug decode setup.py.enc setup.py.dec 108 | Initialized RQDecoder (0.064s)... 109 | Decoded enough symbols to recover data (0.010s)... 110 | Closed RQDecoder (0.002s)... 111 | Decoded 1,721 B of data from 108 processed symbols (1,728 B without ids, symbols total: 122) 112 | 113 | % sha256sum -b setup.py{,.dec} 114 | 36c50348459b51821a2715b0f5c4ef08647d66f77a29913121af4f0f4dfef454 *setup.py 115 | 36c50348459b51821a2715b0f5c4ef08647d66f77a29913121af4f0f4dfef454 *setup.py.dec 116 | 117 | No matter which chunks are dropped (get picked by ``random.choice``), file 118 | should be recoverable from output as long as number of chunks left (in each 119 | "block") is slightly (by ~0.02%) above K. 120 | 121 | Output data ("setup.py.enc" in the example) for the script is JSON-encoded list 122 | of base64-encoded symbols, as well as some parameters for lib init 123 | (``oti_scheme``, ``oti_common``). 124 | 125 | Input data length and sha256 hash of source data are only there to make sure 126 | that decoded data is same as original (or exit with error otherwise). 127 | 128 | See output with --help option for all the other script parameters. 129 | 130 | 131 | Python module 132 | ''''''''''''' 133 | 134 | To use as a python2 module:: 135 | 136 | from libraptorq import RQEncoder 137 | 138 | data = 'some input string' * 500 139 | 140 | # Data size must be divisible by RQEncoder.data_size_div 141 | data_len, n = len(data), RQEncoder.data_size_div 142 | if data_len % n: data += '\0' * (n - data_len % n) 143 | 144 | with RQEncoder(data, min_subsymbol_size=4, symbol_size=16, max_memory=200) as enc: 145 | 146 | symbols = dict() 147 | oti_scheme, oti_common = enc.oti_scheme, enc.oti_common 148 | 149 | for block in enc: 150 | symbols.update(block.encode_iter(repair_rate=0)) 151 | 152 | data_encoded = data_len, oti_scheme, oti_common, symbols 153 | 154 | ``oti_scheme`` and ``oti_common`` are two integers specifying encoder options, 155 | needed to initialize decoder, which can be hard-coded (if constant) on both ends. 156 | 157 | ``block.encode_iter()`` can be used without options to produce max possible 158 | amount of symbols, up to ``block.symbols + block.max_repair``. 159 | Above example only produces K symbols - min amount required. 160 | 161 | For decoding (reverse operation):: 162 | 163 | from libraptorq import RQDecoder 164 | 165 | data_len, oti_scheme, oti_common, symbols = data_encoded 166 | 167 | with RQDecoder(oti_common, oti_scheme) as dec: 168 | for sym_id, sym in symbols.viewitems(): dec.add_symbol(sym, sym_id) 169 | 170 | data = dec.decode()[:data_len] 171 | 172 | Note that in practice, e.g. when transmitting each symbol in a udp packet, one'd 173 | want to send something like ``sym_id || sym_data || checksum``, and keep sending 174 | these from ``block.encode_iter()`` until other side acknowledges that it can 175 | decode a block (i.e. enough symbols received, see ``RQDecoder.decode_block()``), 176 | then start streaming the next block in similar fashion. 177 | 178 | See `__main__.py 179 | `_ 180 | file (cli script) for an extended example, and libRaptorQ_ docs for info on its 181 | C API, which this module wraps around. 182 | 183 | 184 | 185 | Installation 186 | ------------ 187 | 188 | It's a regular package for Python 2.7 (not 3.X). 189 | 190 | It uses and needs CFFI_ (can/should be installed by pip_) and libRaptorQ_ v0.1.x 191 | installed (as libRaptorQ.so) on the system. 192 | 193 | libRaptorQ v1.x (as opposed to current stable version 0.1.9) has different API 194 | and **will not** work with this module. 195 | 196 | Using pip_ is the best way:: 197 | 198 | % pip install libraptorq 199 | 200 | If you don't have it, use:: 201 | 202 | % easy_install pip 203 | % pip install libraptorq 204 | 205 | Alternatively (see also `pip2014.com`_ and `pip install guide`_):: 206 | 207 | % curl https://raw.github.com/pypa/pip/master/contrib/get-pip.py | python2 208 | % pip install libraptorq 209 | 210 | Or, if you absolutely must:: 211 | 212 | % easy_install libraptorq 213 | 214 | But, you really shouldn't do that. 215 | 216 | Current-git version can be installed like this:: 217 | 218 | % pip install 'git+https://github.com/mk-fg/python-libraptorq.git#egg=libraptorq' 219 | 220 | Note that to install stuff in system-wide PATH and site-packages, elevated 221 | privileges are often required. 222 | Use "install --user", `~/.pydistutils.cfg`_ or virtualenv_ to do unprivileged 223 | installs into custom paths. 224 | 225 | Alternatively, ``./rq`` tool can be run right from the checkout tree without any 226 | installation, if that's the only thing you need there. 227 | 228 | .. _pip: http://pip-installer.org/ 229 | .. _pip2014.com: http://pip2014.com/ 230 | .. _pip install guide: http://www.pip-installer.org/en/latest/installing.html 231 | .. _~/.pydistutils.cfg: http://docs.python.org/install/index.html#distutils-configuration-files 232 | .. _virtualenv: http://pypi.python.org/pypi/virtualenv 233 | 234 | 235 | 236 | Random Notes 237 | ------------ 238 | 239 | * See `github-issue-1`_ for more info on what happens when encoding parameters 240 | (such as symbol_size and max_memory) are specified carelessly, and why 241 | command-line interface of this module does not have defaults for these. 242 | 243 | * libRaptorQ is currently used via CFFI in "ABI Mode" to avoid any extra hassle 244 | with compilation and the need for compiler, see `CFFI docs on the subject`_ 245 | for more info on what it means. 246 | 247 | * When decoding, libRaptorQ can raise errors for ``add_symbol()`` calls, when 248 | source block is already decoded and that extra symbol is not needed. 249 | 250 | * libRaptorQ allows to specify "rq_type" parameter for internal data alignment 251 | size (C++ iterator element), which is hard-coded to RQ_ENC_32/RQ_DEC_32 252 | in the module, for simplicity. 253 | 254 | * Lack of Python 3.X compatibility is due to me not using it at all (yet?), so 255 | don't need it, have nothing against it in principle. 256 | 257 | .. _github-issue-1: https://github.com/mk-fg/python-libraptorq/issues/1 258 | .. _CFFI docs on the subject: https://cffi.readthedocs.org/en/latest/cdef.html 259 | --------------------------------------------------------------------------------