├── .gitignore ├── README.md └── bcode.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | bcode 2 | ===== 3 | 4 | yet another… but mine is fast as hell. 5 | -------------------------------------------------------------------------------- /bcode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | bencode/decode library. 5 | 6 | bencoding is used in bittorrent files 7 | 8 | use the exposed functions to encode/decode them. 9 | """ 10 | 11 | from io import BytesIO, SEEK_CUR 12 | try: #py 3.3 13 | from collections.abc import Iterable, Mapping 14 | except ImportError: 15 | from collections import Iterable, Mapping 16 | 17 | _TYPE_INT = b'i' 18 | _TYPE_LIST = b'l' 19 | _TYPE_DICT = b'd' 20 | _TYPE_END = b'e' 21 | _TYPE_SEP = b':' 22 | _TYPES_STR = b'0123456789' 23 | 24 | TYPES = { 25 | _TYPE_INT: int, 26 | _TYPE_LIST: list, 27 | _TYPE_DICT: dict, 28 | _TYPE_END: None, 29 | #_TYPE_SEP only appears in strings, not here 30 | } 31 | for byte in _TYPES_STR: 32 | TYPES[bytes([byte])] = str #b'0': str, b'1': str, … 33 | 34 | def _readuntil(f, end=_TYPE_END): 35 | """Helper function to read bytes until a certain end byte is hit""" 36 | buf = bytearray() 37 | while True: 38 | byte = f.read(1) 39 | if byte != end: 40 | buf += byte 41 | else: 42 | break 43 | return buf 44 | 45 | def _decode_int(f): 46 | """ 47 | Integer types are normal ascii integers 48 | Delimited at the start with 'i' and the end with 'e' 49 | """ 50 | assert f.read(1) == _TYPE_INT 51 | return int(_readuntil(f)) 52 | 53 | def _decode_buffer(f): 54 | """ 55 | String types are normal (byte)strings 56 | starting with an integer followed by ':' 57 | which designates the string’s length. 58 | 59 | Since there’s no way to specify the byte type 60 | in bencoded files, we have to guess 61 | """ 62 | strlen = int(_readuntil(f, _TYPE_SEP)) 63 | buf = f.read(strlen) 64 | try: 65 | return buf.decode() 66 | except UnicodeDecodeError: 67 | return buf 68 | 69 | def _decode_list(f): 70 | assert f.read(1) == _TYPE_LIST 71 | ret = [] 72 | while True: 73 | item = bdecode(f) 74 | if item is None: 75 | break 76 | else: 77 | ret.append(item) 78 | return ret 79 | 80 | def _decode_dict(f): 81 | assert f.read(1) == _TYPE_DICT 82 | ret = {} 83 | while True: 84 | key = bdecode(f) 85 | if key is None: 86 | break 87 | else: 88 | assert isinstance(key, (str, bytes)) 89 | ret[key] = bdecode(f) 90 | return ret 91 | 92 | DECODERS = { 93 | int: _decode_int, 94 | str: _decode_buffer, 95 | list: _decode_list, 96 | dict: _decode_dict, 97 | } 98 | 99 | def bdecode(f): 100 | """ 101 | bdecodes data contained in a file f opened in bytes mode. 102 | works by looking up the type byte, 103 | and using it to look up the respective decoding function, 104 | which in turn is used to return the decoded object 105 | """ 106 | btype = TYPES[f.read(1)] 107 | if btype is not None: 108 | f.seek(-1, SEEK_CUR) 109 | return DECODERS[btype](f) 110 | else: #Used in dicts and lists to designate an end 111 | return None 112 | 113 | def bdecode_buffer(data): 114 | """Convenience wrapper around bdecode that accepts strings or bytes""" 115 | if isinstance(data, str): 116 | data = data.encode() 117 | with BytesIO(data) as f: 118 | return bdecode(f) 119 | 120 | ################ 121 | ### Encoding ### 122 | ################ 123 | 124 | def _encode_int(integer, f): 125 | f.write(_TYPE_INT) 126 | f.write(str(integer).encode()) 127 | f.write(_TYPE_END) 128 | 129 | def _encode_buffer(string, f): 130 | """Writes the bencoded form of the input string or bytes""" 131 | if isinstance(string, str): 132 | string = string.encode() 133 | f.write(str(len(string)).encode()) 134 | f.write(_TYPE_SEP) 135 | f.write(string) 136 | 137 | def _encode_iterable(iterable, f): 138 | f.write(_TYPE_LIST) 139 | for item in iterable: 140 | bencode(item, f) 141 | f.write(_TYPE_END) 142 | 143 | def _encode_mapping(mapping, f): 144 | f.write(_TYPE_DICT) 145 | for key, value in mapping.items(): 146 | _encode_buffer(key, f) 147 | bencode(value, f) 148 | f.write(_TYPE_END) 149 | 150 | def bencode(data, f): 151 | """ 152 | Writes a serializable data piece to f 153 | The order of tests is nonarbitrary, 154 | as strings and mappings are iterable. 155 | """ 156 | if isinstance(data, int): 157 | _encode_int(data, f) 158 | elif isinstance(data, (str, bytes)): 159 | _encode_buffer(data, f) 160 | elif isinstance(data, Mapping): 161 | _encode_mapping(data, f) 162 | elif isinstance(data, Iterable): 163 | _encode_iterable(data, f) 164 | 165 | def bencode_buffer(data): 166 | """ 167 | Convenience wrapper around bencode that returns a byte array 168 | of the serialized sata 169 | """ 170 | with BytesIO() as f: 171 | bencode(data, f) 172 | return f.getvalue() 173 | 174 | def main(): 175 | import sys, pprint 176 | from argparse import ArgumentParser, FileType 177 | parser = ArgumentParser(description='Decodes bencoded files to python objects.') 178 | parser.add_argument('infile', nargs='?', type=FileType('rb'), default=sys.stdin.buffer, 179 | help='bencoded file (e.g. torrent) [Default: stdin]') 180 | parser.add_argument('outfile', nargs='?', type=FileType('w'), default=sys.stdout, 181 | help='python-syntax serialization [Default: stdout]') 182 | args = parser.parse_args() 183 | 184 | data = bdecode(args.infile) 185 | pprint.pprint(data, stream=args.outfile) 186 | 187 | if __name__ == '__main__': 188 | main() 189 | --------------------------------------------------------------------------------