├── .gitignore
├── README.md
└── bcode.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | bcode
2 | =====
3 | 
4 | yet another… but mine is fast as hell.
5 | 


--------------------------------------------------------------------------------
/bcode.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | bencode/decode library.
  5 | 
  6 | bencoding is used in bittorrent files
  7 | 
  8 | use the exposed functions to encode/decode them.
  9 | """
 10 | 
 11 | from io import BytesIO, SEEK_CUR
 12 | try: #py 3.3
 13 | 	from collections.abc import Iterable, Mapping
 14 | except ImportError:
 15 | 	from collections     import Iterable, Mapping
 16 | 
 17 | _TYPE_INT  = b'i'
 18 | _TYPE_LIST = b'l'
 19 | _TYPE_DICT = b'd'
 20 | _TYPE_END  = b'e'
 21 | _TYPE_SEP  = b':'
 22 | _TYPES_STR = b'0123456789'
 23 | 
 24 | TYPES = {
 25 | 	_TYPE_INT:  int,
 26 | 	_TYPE_LIST: list,
 27 | 	_TYPE_DICT: dict,
 28 | 	_TYPE_END:  None,
 29 | 	#_TYPE_SEP only appears in strings, not here
 30 | }
 31 | for byte in _TYPES_STR:
 32 | 	TYPES[bytes([byte])] = str #b'0': str, b'1': str, …
 33 | 
 34 | def _readuntil(f, end=_TYPE_END):
 35 | 	"""Helper function to read bytes until a certain end byte is hit"""
 36 | 	buf = bytearray()
 37 | 	while True:
 38 | 		byte = f.read(1)
 39 | 		if byte != end:
 40 | 			buf += byte
 41 | 		else:
 42 | 			break
 43 | 	return buf
 44 | 
 45 | def _decode_int(f):
 46 | 	"""
 47 | 	Integer types are normal ascii integers
 48 | 	Delimited at the start with 'i' and the end with 'e'
 49 | 	"""
 50 | 	assert f.read(1) == _TYPE_INT
 51 | 	return int(_readuntil(f))
 52 | 
 53 | def _decode_buffer(f):
 54 | 	"""
 55 | 	String types are normal (byte)strings
 56 | 	starting with an integer followed by ':'
 57 | 	which designates the string’s length.
 58 | 	
 59 | 	Since there’s no way to specify the byte type
 60 | 	in bencoded files, we have to guess
 61 | 	"""
 62 | 	strlen = int(_readuntil(f, _TYPE_SEP))
 63 | 	buf = f.read(strlen)
 64 | 	try:
 65 | 		return buf.decode()
 66 | 	except UnicodeDecodeError:
 67 | 		return buf
 68 | 
 69 | def _decode_list(f):
 70 | 	assert f.read(1) == _TYPE_LIST
 71 | 	ret = []
 72 | 	while True:
 73 | 		item = bdecode(f)
 74 | 		if item is None:
 75 | 			break
 76 | 		else:
 77 | 			ret.append(item)
 78 | 	return ret
 79 | 
 80 | def _decode_dict(f):
 81 | 	assert f.read(1) == _TYPE_DICT
 82 | 	ret = {}
 83 | 	while True:
 84 | 		key = bdecode(f)
 85 | 		if key is None:
 86 | 			break
 87 | 		else:
 88 | 			assert isinstance(key, (str, bytes))
 89 | 			ret[key] = bdecode(f)
 90 | 	return ret
 91 | 
 92 | DECODERS = {
 93 | 	int:  _decode_int,
 94 | 	str:  _decode_buffer,
 95 | 	list: _decode_list,
 96 | 	dict: _decode_dict,
 97 | }
 98 | 
 99 | def bdecode(f):
100 | 	"""
101 | 	bdecodes data contained in a file f opened in bytes mode.
102 | 	works by looking up the type byte,
103 | 	and using it to look up the respective decoding function,
104 | 	which in turn is used to return the decoded object
105 | 	"""
106 | 	btype = TYPES[f.read(1)]
107 | 	if btype is not None:
108 | 		f.seek(-1, SEEK_CUR)
109 | 		return DECODERS[btype](f)
110 | 	else: #Used in dicts and lists to designate an end
111 | 		return None
112 | 
113 | def bdecode_buffer(data):
114 | 	"""Convenience wrapper around bdecode that accepts strings or bytes"""
115 | 	if isinstance(data, str):
116 | 		data = data.encode()
117 | 	with BytesIO(data) as f:
118 | 		return bdecode(f)
119 | 
120 | ################
121 | ### Encoding ###
122 | ################
123 | 
124 | def _encode_int(integer, f):
125 | 	f.write(_TYPE_INT)
126 | 	f.write(str(integer).encode())
127 | 	f.write(_TYPE_END)
128 | 
129 | def _encode_buffer(string, f):
130 | 	"""Writes the bencoded form of the input string or bytes"""
131 | 	if isinstance(string, str):
132 | 		string = string.encode()
133 | 	f.write(str(len(string)).encode())
134 | 	f.write(_TYPE_SEP)
135 | 	f.write(string)
136 | 
137 | def _encode_iterable(iterable, f):
138 | 	f.write(_TYPE_LIST)
139 | 	for item in iterable:
140 | 		bencode(item, f)
141 | 	f.write(_TYPE_END)
142 | 
143 | def _encode_mapping(mapping, f):
144 | 	f.write(_TYPE_DICT)
145 | 	for key, value in mapping.items():
146 | 		_encode_buffer(key, f)
147 | 		bencode(value, f)
148 | 	f.write(_TYPE_END)
149 | 
150 | def bencode(data, f):
151 | 	"""
152 | 	Writes a serializable data piece to f
153 | 	The order of tests is nonarbitrary,
154 | 	as strings and mappings are iterable.
155 | 	"""
156 | 	if isinstance(data, int):
157 | 		_encode_int(data, f)
158 | 	elif isinstance(data, (str, bytes)):
159 | 		_encode_buffer(data, f)
160 | 	elif isinstance(data, Mapping):
161 | 		_encode_mapping(data, f)
162 | 	elif isinstance(data, Iterable):
163 | 		_encode_iterable(data, f)
164 | 
165 | def bencode_buffer(data):
166 | 	"""
167 | 	Convenience wrapper around bencode that returns a byte array
168 | 	of the serialized sata
169 | 	"""
170 | 	with BytesIO() as f:
171 | 		bencode(data, f)
172 | 		return f.getvalue()
173 | 
174 | def main():
175 | 	import sys, pprint
176 | 	from argparse import ArgumentParser, FileType
177 | 	parser = ArgumentParser(description='Decodes bencoded files to python objects.')
178 | 	parser.add_argument('infile',  nargs='?', type=FileType('rb'), default=sys.stdin.buffer,
179 | 		help='bencoded file (e.g. torrent) [Default: stdin]')
180 | 	parser.add_argument('outfile', nargs='?', type=FileType('w'), default=sys.stdout,
181 | 		help='python-syntax serialization [Default: stdout]')
182 | 	args = parser.parse_args()
183 | 	
184 | 	data = bdecode(args.infile)
185 | 	pprint.pprint(data, stream=args.outfile)
186 | 
187 | if __name__ == '__main__':
188 | 	main()
189 | 


--------------------------------------------------------------------------------