├── .gitignore ├── .travis.yml ├── LICENSE ├── README.rst ├── benchmark ├── benchmark.py ├── requirements.txt └── run.sh ├── better_bencode ├── __init__.py ├── __main__.py ├── _fast.c └── _pure.py ├── setup.py ├── tests ├── __init__.py └── test_bencode.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | **.pyc 2 | **.so 3 | .cache 4 | .directory 5 | __pycache__/ 6 | build/ 7 | tmp/ 8 | dist/ 9 | .tox/ 10 | MANIFEST 11 | *.egg-info/ 12 | benchmark/venv/ 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | jobs: 3 | include: 4 | - python: 2.7 5 | - python: 3.5 6 | - python: 3.6 7 | - python: 3.7 8 | - python: 3.8 9 | - python: “3.9-dev” 10 | - python: pypy 11 | - python: pypy3 12 | - name: "Python 3.7 on macOS" 13 | os: osx 14 | osx_image: xcode11.2 15 | language: shell 16 | before_install: 17 | - python -m pip install --upgrade pip pytest 18 | - name: "Python 3.7 on Windows" 19 | os: windows 20 | language: shell 21 | before_install: 22 | - choco install python --version 3.7.0 23 | - python -m pip install --upgrade pip pytest 24 | env: PATH=/c/Python37:/c/Python37/Scripts:$PATH 25 | install: 26 | - python setup.py install 27 | script: 28 | - pytest 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 by Krzysztof Kosyl 2 | 3 | Some rights reserved. 4 | 5 | Redistribution and use in source and binary forms of the software as well 6 | as documentation, with or without modification, are permitted provided 7 | that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials provided 15 | with the distribution. 16 | 17 | * The names of the contributors may not be used to endorse or 18 | promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND 22 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 23 | NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 32 | DAMAGE. 33 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Better Bencode 3 | ============== 4 | 5 | :author: Krzysztof Kosyl 6 | :version: 0.2.1 7 | :date: 2016-05-12 8 | 9 | 10 | Why use ``better_bencode``? 11 | --------------------------- 12 | 13 | * standard Python object serialization functions: ``load()``, ``loads()``, ``dump()``, ``dumps()`` 14 | * works with Python 2.6, 2.7, 3.3, 3.4, 3.5 and PyPy 15 | * 4.5 times faster than ``bencode`` module, thanks to C Extension 16 | * well tested 17 | 18 | 19 | Installation 20 | ------------ 21 | 22 | :: 23 | 24 | $ pip install better-bencode 25 | 26 | 27 | Example 28 | ------- 29 | 30 | .. code-block:: pycon 31 | 32 | >>> import better_bencode 33 | >>> dumped = better_bencode.dumps(['spam', 42]) 34 | >>> better_bencode.loads(dumped) 35 | ['spam', 42] 36 | -------------------------------------------------------------------------------- /benchmark/benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | import time 6 | from cStringIO import StringIO 7 | 8 | import better_bencode._pure as pure 9 | import better_bencode_fast as fast 10 | 11 | import json 12 | import simplejson 13 | import marshal 14 | import pickle 15 | import cPickle 16 | import msgpack 17 | 18 | 19 | EXAMPLES = [ 20 | range(100), 21 | ["zero", 0, "one", 1, "two", 2], 22 | {"zero": 0, "one": 1, "two": 2}, 23 | "0123456789", 24 | ] 25 | 26 | 27 | class Empty(object): 28 | __name__ = 'Empty' 29 | def dump(self, value, fp): return '' 30 | def dumps(self, value): return '' 31 | def load(self, fp): return None 32 | def loads(self, value): return None 33 | 34 | REPEATS = 1000 35 | 36 | print '%-20s %10s %10s %10s %10s %6s' % ( 37 | 'MODULE', 'dump', 'dumps', 'load', 'loads', 'SIZE' 38 | ) 39 | for module in [Empty(), fast, pure, json, simplejson, marshal, pickle, cPickle, msgpack]: 40 | time_start = time.time() 41 | for example in EXAMPLES: 42 | for i in xrange(REPEATS * (module != marshal)): 43 | fp = StringIO() 44 | module.dump(example, fp) 45 | fp.getvalue() 46 | dump_duration = time.time() - time_start 47 | 48 | time_start = time.time() 49 | for example in EXAMPLES: 50 | for i in xrange(REPEATS): 51 | module.dumps(example) 52 | dumps_duration = time.time() - time_start 53 | 54 | time_start = time.time() 55 | for example in EXAMPLES: 56 | example_encoded = module.dumps(example) 57 | for i in xrange(REPEATS * (module != marshal)): 58 | fp = StringIO(example_encoded) 59 | module.load(fp) 60 | load_duration = time.time() - time_start 61 | 62 | time_start = time.time() 63 | for example in EXAMPLES: 64 | example_encoded = module.dumps(example) 65 | for i in xrange(REPEATS): 66 | module.loads(example_encoded) 67 | loads_duration = time.time() - time_start 68 | 69 | size = sum(len(module.dumps(example)) for example in EXAMPLES) 70 | 71 | print '%-20s %8.3fms %8.3fms %8.3fms %8.3fms %5dB' % ( 72 | module.__name__, 73 | dump_duration * 1000.0, dumps_duration * 1000.0, 74 | load_duration * 1000.0, loads_duration * 1000.0, 75 | size, 76 | ) 77 | -------------------------------------------------------------------------------- /benchmark/requirements.txt: -------------------------------------------------------------------------------- 1 | -e .. 2 | msgpack-python 3 | simplejson -------------------------------------------------------------------------------- /benchmark/run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | if [ ! -d "$DIRECTORY" ]; then 4 | virtualenv --no-site-packages venv 5 | fi 6 | 7 | source venv/bin/activate 8 | pip install -r requirements.txt 9 | python benchmark.py 10 | deactivate 11 | -------------------------------------------------------------------------------- /better_bencode/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | try: 6 | from better_bencode._fast import dump, dumps, load, loads 7 | from better_bencode._fast import BencodeValueError, BencodeTypeError 8 | except ImportError: 9 | from better_bencode._pure import dump, dumps, load, loads 10 | from better_bencode._pure import BencodeValueError, BencodeTypeError -------------------------------------------------------------------------------- /better_bencode/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | """ 6 | Commandline tool to validate and pretty-print Bencode:: 7 | 8 | $ echo 'li12e3:fooe' | python -m better_bencode 9 | [12L, 'foo'] 10 | """ 11 | 12 | 13 | import sys 14 | import pprint 15 | 16 | 17 | import better_bencode 18 | 19 | 20 | def main(argv): 21 | """ Validate and pretty-print Bencode """ 22 | 23 | if len(argv) == 2: 24 | fin = open(argv[1], 'rb') 25 | else: 26 | fin = sys.stdin 27 | 28 | try: 29 | data = better_bencode.load(fin) 30 | pprint.pprint(data) 31 | return 0 32 | except ValueError as exc: 33 | sys.stderr.write('Error: %s\n' % exc) 34 | return 1 35 | finally: 36 | fin.close() 37 | 38 | 39 | sys.exit(main(sys.argv)) 40 | -------------------------------------------------------------------------------- /better_bencode/_fast.c: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | 4 | 5 | #if PY_MAJOR_VERSION >= 3 6 | #define PY_BUILD_VALUE_BYTES "y#" 7 | #define PyString_FromStringAndSize PyBytes_FromStringAndSize 8 | #define PyString_AsStringAndSize PyBytes_AsStringAndSize 9 | #define PyString_Size PyBytes_Size 10 | #define PyInt_CheckExact(obj) 0 11 | #else 12 | #define PY_BUILD_VALUE_BYTES "s#" 13 | #endif 14 | 15 | 16 | struct benc_state { 17 | unsigned int cast : 1; 18 | 19 | Py_ssize_t size; 20 | Py_ssize_t offset; 21 | char* buffer; 22 | PyObject* file; 23 | 24 | PyObject** references_stack; 25 | int references_size; 26 | int references_top; 27 | }; 28 | 29 | 30 | PyObject* BencodeValueError; 31 | PyObject* BencodeTypeError; 32 | 33 | 34 | static void benc_state_init(struct benc_state* bs) { 35 | bs->size = 256; 36 | bs->offset = 0; 37 | bs->buffer = malloc(bs->size); 38 | bs->file = NULL; 39 | 40 | bs->references_size = 8; 41 | bs->references_top = 0; 42 | bs->references_stack = malloc(sizeof(PyObject*) * bs->references_size); 43 | } 44 | 45 | 46 | static void benc_state_free(struct benc_state* bs) { 47 | if (bs->buffer != NULL) { 48 | free(bs->buffer); 49 | } 50 | if (bs->references_stack != NULL) { 51 | free(bs->references_stack); 52 | } 53 | } 54 | 55 | 56 | static void benc_state_flush(struct benc_state* bs) { 57 | if (bs->offset > 0) { 58 | PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, bs->buffer, bs->offset); 59 | bs->offset = 0; 60 | } 61 | } 62 | 63 | 64 | static void benc_state_write_char(struct benc_state* bs, char c) { 65 | if (bs->file == NULL) { 66 | if ((bs->offset + 1) >= bs->size) { 67 | bs->buffer = realloc(bs->buffer, bs->size * 2); 68 | } 69 | bs->buffer[bs->offset++] = c; 70 | } else { 71 | if ((bs->offset + 1) >= bs->size) { 72 | PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, bs->buffer, bs->offset); 73 | bs->offset = 0; 74 | } 75 | bs->buffer[bs->offset++] = c; 76 | } 77 | } 78 | 79 | 80 | static void benc_state_write_buffer(struct benc_state* bs, char* buff, Py_ssize_t size) { 81 | if (bs->file == NULL) { 82 | Py_ssize_t new_size; 83 | for (new_size = bs->size; new_size <= (bs->offset + size); new_size *= 2); 84 | if (new_size > bs->size) { 85 | bs->buffer = realloc(bs->buffer, new_size); 86 | bs->size = new_size; 87 | } 88 | memcpy(bs->buffer + bs->offset, buff, size); 89 | bs->offset += size; 90 | } else { 91 | if (bs->offset + size >= bs->size) { 92 | PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, bs->buffer, bs->offset); 93 | bs->offset = 0; 94 | } 95 | if (size >= bs->size) { 96 | PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, buff, size); 97 | } else { 98 | memcpy(bs->buffer + bs->offset, buff, size); 99 | bs->offset += size; 100 | } 101 | } 102 | } 103 | 104 | 105 | static void benc_state_write_format(struct benc_state* bs, const Py_ssize_t limit, const void *format, ...) { 106 | char buffer[limit + 1]; // moze by malloca()? 107 | 108 | va_list ap; 109 | va_start(ap, format); 110 | int size = vsnprintf(buffer, limit, format, ap); 111 | va_end(ap); 112 | 113 | return benc_state_write_buffer(bs, buffer, (size < limit) ? size : (limit - 1)); 114 | } 115 | 116 | 117 | static int benc_state_read_char(struct benc_state* bs) { 118 | if (bs->file == NULL) { 119 | if (bs->offset < bs->size) { 120 | return bs->buffer[bs->offset++]; 121 | } else { 122 | return -1; 123 | } 124 | } else { 125 | char *buffer; 126 | int result; 127 | Py_ssize_t length; 128 | PyObject *data = PyObject_CallMethod(bs->file, "read", "i", 1); 129 | if (-1 == PyString_AsStringAndSize(data, &buffer, &length)) { 130 | return -1; 131 | } 132 | if (length == 1) { 133 | result = buffer[0]; 134 | } else { 135 | result = -1; 136 | } 137 | Py_DECREF(data); 138 | return result; 139 | } 140 | } 141 | 142 | 143 | static PyObject *benc_state_read_pystring(struct benc_state* bs, int size) { 144 | if (bs->file == NULL) { 145 | if (bs->offset + size <= bs->size) { 146 | PyObject *result = PyString_FromStringAndSize(bs->buffer + bs->offset, size); 147 | bs->offset += size; 148 | return result; 149 | } else { 150 | PyErr_Format( 151 | BencodeValueError, 152 | "unexpected end of data" 153 | ); 154 | return NULL; 155 | } 156 | } else { 157 | PyObject *result = PyObject_CallMethod(bs->file, "read", "i", size); 158 | if (PyString_Size(result) == size) { 159 | return result; 160 | } else { 161 | Py_DECREF(result); 162 | PyErr_Format( 163 | BencodeValueError, 164 | "unexpected end of data" 165 | ); 166 | return NULL; 167 | } 168 | } 169 | } 170 | 171 | static void benc_state_references_push(struct benc_state* bs, PyObject *obj) { 172 | if ((bs->references_top + 1) == bs->references_size) { 173 | bs->references_size *= 2; 174 | bs->references_stack = realloc( 175 | bs->references_stack, 176 | sizeof(PyObject*) * bs->references_size 177 | ); 178 | } 179 | bs->references_stack[bs->references_top++] = obj; 180 | } 181 | 182 | static void benc_state_references_pop(struct benc_state* bs) { 183 | bs->references_top--; 184 | } 185 | 186 | static int benc_state_references_contains(struct benc_state* bs, PyObject *obj) { 187 | int i; 188 | for (i = 0; i < bs->references_top; i++) { 189 | if (bs->references_stack[i] == obj) { 190 | return 1; 191 | } 192 | } 193 | return 0; 194 | } 195 | 196 | 197 | static int do_dump(struct benc_state *bs, PyObject* obj); 198 | 199 | static int do_dump(struct benc_state *bs, PyObject* obj) { 200 | int i = 0, n = 0; 201 | 202 | if (benc_state_references_contains(bs, obj)) { 203 | PyErr_Format( 204 | BencodeValueError, 205 | "circular reference detected" 206 | ); 207 | return 0; 208 | } 209 | 210 | if (PyBytes_CheckExact(obj)) { 211 | char *buff = PyBytes_AS_STRING(obj); 212 | Py_ssize_t size = PyBytes_GET_SIZE(obj); 213 | 214 | benc_state_write_format(bs, 12, "%d:", size); 215 | benc_state_write_buffer(bs, buff, size); 216 | } else if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) { 217 | long x = PyLong_AsLong(obj); 218 | benc_state_write_format(bs, 23, "i%lde", x); 219 | } else if (bs->cast && PyBool_Check(obj)) { 220 | long x = PyLong_AsLong(obj); 221 | benc_state_write_format(bs, 4, "i%lde", x); 222 | } else if (PyList_CheckExact(obj) || (bs->cast && PyList_Check(obj))) { 223 | n = PyList_GET_SIZE(obj); 224 | benc_state_references_push(bs, obj); 225 | benc_state_write_char(bs, 'l'); 226 | for (i = 0; i < n; i++) { 227 | do_dump(bs, PyList_GET_ITEM(obj, i)); 228 | } 229 | benc_state_write_char(bs, 'e'); 230 | benc_state_references_pop(bs); 231 | } else if (bs->cast && PyTuple_Check(obj)) { 232 | n = PyTuple_GET_SIZE(obj); 233 | benc_state_references_push(bs, obj); 234 | benc_state_write_char(bs, 'l'); 235 | for (i = 0; i < n; i++) { 236 | do_dump(bs, PyTuple_GET_ITEM(obj, i)); 237 | } 238 | benc_state_write_char(bs, 'e'); 239 | benc_state_references_pop(bs); 240 | } else if (PyDict_CheckExact(obj)) { 241 | Py_ssize_t index = 0; 242 | PyObject *keys, *key, *value; 243 | keys = PyDict_Keys(obj); 244 | PyList_Sort(keys); 245 | 246 | benc_state_references_push(bs, obj); 247 | benc_state_write_char(bs, 'd'); 248 | for (index = 0; index < PyList_Size(keys); index++) { 249 | key = PyList_GetItem(keys, index); 250 | value = PyDict_GetItem(obj, key); 251 | do_dump(bs, key); 252 | do_dump(bs, value); 253 | } 254 | benc_state_write_char(bs, 'e'); 255 | benc_state_references_pop(bs); 256 | 257 | Py_DECREF(keys); 258 | } else { 259 | PyErr_Format( 260 | BencodeTypeError, 261 | "type %s is not Bencode serializable", 262 | Py_TYPE(obj)->tp_name 263 | ); 264 | } 265 | return 0; 266 | } 267 | 268 | static PyObject* dump(PyObject* self, PyObject* args, PyObject* kwargs) { 269 | static char *kwlist[] = {"obj", "write", "cast", NULL}; 270 | 271 | PyObject* obj; 272 | PyObject* write; 273 | int cast = 0; 274 | 275 | struct benc_state bs; 276 | benc_state_init(&bs); 277 | 278 | if (!PyArg_ParseTupleAndKeywords( 279 | args, kwargs, "OO|i", kwlist, 280 | &obj, &write, &cast 281 | )) 282 | { 283 | return NULL; 284 | } 285 | 286 | bs.file = write; 287 | bs.cast = !!cast; 288 | 289 | do_dump(&bs, obj); 290 | 291 | benc_state_flush(&bs); 292 | benc_state_free(&bs); 293 | 294 | if (PyErr_Occurred()) { 295 | return NULL; 296 | } else { 297 | return Py_BuildValue(PY_BUILD_VALUE_BYTES, bs.buffer, bs.offset); 298 | } 299 | } 300 | 301 | 302 | static PyObject* dumps(PyObject* self, PyObject* args, PyObject* kwargs) { 303 | static char *kwlist[] = {"obj", "cast", NULL}; 304 | 305 | PyObject* obj; 306 | PyObject* result; 307 | int cast = 0; 308 | 309 | struct benc_state bs; 310 | benc_state_init(&bs); 311 | 312 | if (!PyArg_ParseTupleAndKeywords( 313 | args, kwargs, "O|i", kwlist, 314 | &obj, &cast 315 | )) 316 | { 317 | return NULL; 318 | } 319 | 320 | bs.cast = !!cast; 321 | 322 | do_dump(&bs, obj); 323 | 324 | if (PyErr_Occurred()) { 325 | benc_state_free(&bs); 326 | return NULL; 327 | } else { 328 | result = Py_BuildValue(PY_BUILD_VALUE_BYTES, bs.buffer, bs.offset); 329 | benc_state_free(&bs); 330 | return result; 331 | } 332 | } 333 | 334 | 335 | static PyObject *do_load(struct benc_state *bs) { 336 | PyObject *retval = NULL; 337 | 338 | int first = benc_state_read_char(bs); 339 | 340 | switch (first) { 341 | case 'i': { 342 | int sign = 1; 343 | int read_cnt = 0; 344 | long long value = 0; 345 | int current = benc_state_read_char(bs); 346 | if (current == '-') { 347 | sign = -1; 348 | current = benc_state_read_char(bs); 349 | } 350 | while (('0' <= current) && (current <= '9')) { 351 | value = value * 10 + (current - '0'); 352 | current = benc_state_read_char(bs); 353 | read_cnt++; 354 | } 355 | 356 | if ('e' == current) { 357 | if (read_cnt > 0) { 358 | value *= sign; 359 | retval = PyLong_FromLongLong(value); 360 | } else { 361 | PyErr_Format( 362 | BencodeValueError, 363 | "unexpected end of data" 364 | ); 365 | retval = NULL; 366 | } 367 | } else if (-1 == current) { 368 | PyErr_Format( 369 | BencodeValueError, 370 | "unexpected end of data" 371 | ); 372 | retval = NULL; 373 | } else { 374 | PyErr_Format( 375 | BencodeValueError, 376 | "unexpected byte 0x%.2x", 377 | current 378 | ); 379 | retval = NULL; 380 | } 381 | 382 | } break; 383 | 384 | case '0': 385 | case '1': 386 | case '2': 387 | case '3': 388 | case '4': 389 | case '5': 390 | case '6': 391 | case '7': 392 | case '8': 393 | case '9': { 394 | int size = first - '0'; 395 | char current = benc_state_read_char(bs); 396 | while (('0' <= current) && (current <= '9')) { 397 | size = size * 10 + (current - '0'); 398 | current = benc_state_read_char(bs); 399 | } 400 | if (':' == current) { 401 | retval = benc_state_read_pystring(bs, size); 402 | } else if (-1 == current) { 403 | PyErr_Format( 404 | BencodeValueError, 405 | "unexpected end of data" 406 | ); 407 | retval = NULL; 408 | } else { 409 | PyErr_Format( 410 | BencodeValueError, 411 | "unexpected byte 0x%.2x", 412 | current 413 | ); 414 | retval = NULL; 415 | } 416 | 417 | } break; 418 | case 'e': 419 | Py_INCREF(PyExc_StopIteration); 420 | retval = PyExc_StopIteration; 421 | break; 422 | case 'l': { 423 | PyObject *v = PyList_New(0); 424 | PyObject *item; 425 | 426 | while (1) { 427 | item = do_load(bs); 428 | 429 | if (item == PyExc_StopIteration) { 430 | Py_DECREF(PyExc_StopIteration); 431 | break; 432 | } 433 | 434 | if (item == NULL) { 435 | if (!PyErr_Occurred()) { 436 | PyErr_SetString( 437 | BencodeTypeError, 438 | "unexpected error in list" 439 | ); 440 | } 441 | Py_DECREF(v); 442 | v = NULL; 443 | break; 444 | } 445 | 446 | PyList_Append(v, item); 447 | Py_DECREF(item); 448 | } 449 | 450 | retval = v; 451 | } break; 452 | case 'd': { 453 | PyObject *v = PyDict_New(); 454 | 455 | while (1) { 456 | PyObject *key, *val; 457 | key = val = NULL; 458 | key = do_load(bs); 459 | 460 | if (key == PyExc_StopIteration) { 461 | Py_DECREF(PyExc_StopIteration); 462 | break; 463 | } 464 | 465 | if (key == NULL) { 466 | if (!PyErr_Occurred()) { 467 | PyErr_SetString(BencodeTypeError, "unexpected error in dict"); 468 | } 469 | break; 470 | } 471 | 472 | val = do_load(bs); 473 | if (val != NULL) { 474 | PyDict_SetItem(v, key, val); 475 | } else { 476 | if (!PyErr_Occurred()) { 477 | PyErr_SetString(BencodeTypeError, "unexpected error in dict"); 478 | } 479 | break; 480 | } 481 | Py_DECREF(key); 482 | Py_XDECREF(val); 483 | } 484 | if (PyErr_Occurred()) { 485 | Py_DECREF(v); 486 | v = NULL; 487 | } 488 | retval = v; 489 | } break; 490 | case -1: { 491 | PyErr_Format( 492 | BencodeValueError, 493 | "unexpected end of data" 494 | ); 495 | retval = NULL; 496 | } break; 497 | default: 498 | PyErr_Format( 499 | BencodeValueError, 500 | "unexpected byte 0x%.2x", 501 | first 502 | ); 503 | retval = NULL; 504 | break; 505 | } 506 | return retval; 507 | } 508 | 509 | 510 | static PyObject* load(PyObject* self, PyObject* args) { 511 | struct benc_state bs; 512 | memset(&bs, 0, sizeof(struct benc_state)); 513 | 514 | if (!PyArg_ParseTuple(args, "O", &(bs.file))) 515 | return NULL; 516 | 517 | PyObject* obj = do_load(&bs); 518 | 519 | return obj; 520 | } 521 | 522 | 523 | static PyObject* loads(PyObject* self, PyObject* args) { 524 | struct benc_state bs; 525 | memset(&bs, 0, sizeof(struct benc_state)); 526 | 527 | if (!PyArg_ParseTuple(args, PY_BUILD_VALUE_BYTES, &(bs.buffer), &(bs.size))) 528 | return NULL; 529 | 530 | PyObject* obj = do_load(&bs); 531 | 532 | return obj; 533 | } 534 | 535 | 536 | static PyObject *add_errors(PyObject *module) { 537 | BencodeValueError = PyErr_NewException( 538 | "better_bencode._fast.BencodeValueError", PyExc_ValueError, NULL 539 | ); 540 | Py_INCREF(BencodeValueError); 541 | PyModule_AddObject(module, "BencodeValueError", BencodeValueError); 542 | 543 | BencodeTypeError = PyErr_NewException( 544 | "better_bencode._fast.BencodeTypeError", PyExc_TypeError, NULL 545 | ); 546 | Py_INCREF(BencodeTypeError); 547 | PyModule_AddObject(module, "BencodeTypeError", BencodeTypeError); 548 | 549 | return module; 550 | } 551 | 552 | 553 | static PyMethodDef better_bencode_fastMethods[] = { 554 | {"load", load, METH_VARARGS, "Deserialize ``fp`` to a Python object."}, 555 | {"loads", loads, METH_VARARGS, "Deserialize ``s`` to a Python object."}, 556 | {"dump", dump, METH_VARARGS|METH_KEYWORDS, "Serialize ``obj`` as a Bencode formatted stream to ``fp``."}, 557 | {"dumps", dumps, METH_VARARGS|METH_KEYWORDS, "Serialize ``obj`` to a Bencode formatted ``bytes``."}, 558 | {NULL, NULL, 0, NULL} 559 | }; 560 | 561 | 562 | 563 | #if PY_MAJOR_VERSION >= 3 564 | static struct PyModuleDef better_bencode_fast_module = { 565 | PyModuleDef_HEAD_INIT, 566 | "better_bencode._fast", 567 | NULL, 568 | -1, 569 | better_bencode_fastMethods, 570 | NULL, 571 | NULL, 572 | NULL, 573 | NULL 574 | }; 575 | 576 | PyMODINIT_FUNC 577 | PyInit__fast(void) { 578 | PyObject *module = PyModule_Create(&better_bencode_fast_module); 579 | return add_errors(module); 580 | 581 | } 582 | #else 583 | PyMODINIT_FUNC 584 | init_fast(void) { 585 | PyObject *module = Py_InitModule("better_bencode._fast", better_bencode_fastMethods); 586 | (void) add_errors(module); 587 | } 588 | #endif 589 | -------------------------------------------------------------------------------- /better_bencode/_pure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | """ 6 | Pure Python implementation of Bencode serialization format. 7 | To be used when fast C Extension cannot be compiled. 8 | """ 9 | 10 | 11 | import sys 12 | 13 | 14 | if sys.version_info[0] == 2: 15 | try: 16 | from cStringIO import StringIO 17 | except ImportError: 18 | from StringIO import StringIO 19 | else: 20 | from io import BytesIO as StringIO 21 | 22 | 23 | if sys.version_info[0] == 2: 24 | INTEGER_TYPES = (int, long) 25 | BINARY_TYPES = (str, ) 26 | int_to_binary = lambda val: str(val) 27 | else: 28 | INTEGER_TYPES = (int,) 29 | BINARY_TYPES = (bytes, ) 30 | int_to_binary = lambda val: bytes(str(val), 'ascii') 31 | 32 | 33 | class BencodeValueError(ValueError): 34 | pass 35 | 36 | 37 | class BencodeTypeError(TypeError): 38 | pass 39 | 40 | 41 | def _dump_implementation(obj, write, path, cast): 42 | """ dump()/dumps() implementation """ 43 | 44 | t = type(obj) 45 | 46 | if id(obj) in path: 47 | raise BencodeValueError('circular reference detected') 48 | 49 | if t in INTEGER_TYPES: 50 | write(b'i') 51 | write(int_to_binary(obj)) 52 | write(b'e') 53 | elif t in BINARY_TYPES: 54 | write(int_to_binary(len(obj))) 55 | write(b':') 56 | write(obj) 57 | elif t is list or (cast and issubclass(t, (list, tuple))): 58 | write(b'l') 59 | for item in obj: 60 | _dump_implementation(item, write, path + [id(obj)], cast) 61 | write(b'e') 62 | elif t is dict: 63 | write(b'd') 64 | 65 | data = sorted(obj.items()) 66 | for key, val in data: 67 | _dump_implementation(key, write, path + [id(obj)], cast) 68 | _dump_implementation(val, write, path + [id(obj)], cast) 69 | write(b'e') 70 | elif cast and t is bool: 71 | write(b'i') 72 | write(int_to_binary(int(obj))) 73 | write(b'e') 74 | else: 75 | raise BencodeTypeError( 76 | 'type %s is not Bencode serializable' % type(obj).__name__ 77 | ) 78 | 79 | 80 | def dump(obj, fp, cast=False): 81 | """Serialize ``obj`` as a Bencode formatted stream to ``fp``.""" 82 | 83 | _dump_implementation(obj, fp.write, [], cast) 84 | 85 | 86 | def dumps(obj, cast=False): 87 | """Serialize ``obj`` to a Bencode formatted ``bytes``.""" 88 | 89 | fp = [] 90 | _dump_implementation(obj, fp.append, [], cast) 91 | return b''.join(fp) 92 | 93 | 94 | def _read_until(delimiter, read): 95 | """ Read char by char until ``delimiter`` occurs. """ 96 | 97 | result = b'' 98 | ch = read(1) 99 | if not ch: 100 | raise BencodeValueError('unexpected end of data') 101 | while ch != delimiter: 102 | result += ch 103 | ch = read(1) 104 | if not ch: 105 | raise BencodeValueError('unexpected end of data') 106 | return result 107 | 108 | 109 | def _load_implementation(read): 110 | """ load()/loads() implementation """ 111 | 112 | first = read(1) 113 | 114 | if first == b'e': 115 | return StopIteration 116 | elif first == b'i': 117 | value = b'' 118 | ch = read(1) 119 | while (b'0' <= ch <= b'9') or (ch == b'-'): 120 | value += ch 121 | ch = read(1) 122 | if ch == b'' or (ch == b'e' and value in (b'', b'-')): 123 | raise BencodeValueError('unexpected end of data') 124 | if ch != b'e': 125 | raise BencodeValueError('unexpected byte 0x%.2x' % ord(ch)) 126 | return int(value) 127 | elif b'0' <= first <= b'9': 128 | size = 0 129 | while b'0' <= first <= b'9': 130 | size = size * 10 + (ord(first) - ord('0')) 131 | first = read(1) 132 | if first == b'': 133 | raise BencodeValueError('unexpected end of data') 134 | if first != b':': 135 | raise BencodeValueError('unexpected byte 0x%.2x' % ord(first)) 136 | data = read(size) 137 | if len(data) != size: 138 | raise BencodeValueError('unexpected end of data') 139 | return data 140 | elif first == b'l': 141 | result = [] 142 | while True: 143 | val = _load_implementation(read) 144 | if val is StopIteration: 145 | return result 146 | result.append(val) 147 | elif first == b'd': 148 | result = {} 149 | while True: 150 | this = read(1) 151 | if this == b'e': 152 | return result 153 | elif this == b'': 154 | raise BencodeValueError('unexpected end of data') 155 | elif not this.isdigit(): 156 | raise BencodeValueError('unexpected byte 0x%.2x' % ord(this)) 157 | size = int(this + _read_until(b':', read)) 158 | key = read(size) 159 | val = _load_implementation(read) 160 | result[key] = val 161 | elif first == b'': 162 | raise BencodeValueError('unexpected end of data') 163 | else: 164 | raise BencodeValueError('unexpected byte 0x%.2x' % ord(first)) 165 | 166 | 167 | def load(fp): 168 | """Deserialize ``fp`` to a Python object.""" 169 | 170 | return _load_implementation(fp.read) 171 | 172 | 173 | def loads(data): 174 | """Deserialize ``s`` to a Python object.""" 175 | 176 | fp = StringIO(data) 177 | return _load_implementation(fp.read) 178 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | import io 6 | import codecs 7 | import os 8 | import sys 9 | 10 | from setuptools import setup, Extension 11 | from distutils.command.build_ext import build_ext 12 | from distutils.errors import \ 13 | CCompilerError, DistutilsExecError, DistutilsPlatformError 14 | 15 | 16 | def read(filename): 17 | with io.open(filename, encoding='utf-8') as f: 18 | data = f.read() 19 | return data 20 | 21 | 22 | class BuildFailed(Exception): 23 | pass 24 | 25 | 26 | class SafeBuildExt(build_ext): 27 | def run(self): 28 | try: 29 | build_ext.run(self) 30 | except DistutilsPlatformError: 31 | raise BuildFailed() 32 | 33 | def build_extension(self, ext): 34 | try: 35 | build_ext.build_extension(self, ext) 36 | except (CCompilerError, DistutilsExecError, DistutilsPlatformError): 37 | raise BuildFailed() 38 | 39 | 40 | def run_setup(with_binary): 41 | if with_binary: 42 | ext_modules = [ 43 | Extension( 44 | 'better_bencode._fast', 45 | sources=['better_bencode/_fast.c'], 46 | ), 47 | ] 48 | cmdclass = dict(build_ext=SafeBuildExt) 49 | else: 50 | ext_modules = [] 51 | cmdclass = dict() 52 | 53 | setup( 54 | name='better-bencode', 55 | version='0.2.1', 56 | url='https://github.com/kosqx/better-bencode', 57 | license='BSD', 58 | author='Krzysztof Kosyl', 59 | install_requires=[], 60 | author_email='krzysztof.kosyl@gmail.com', 61 | description='Fast, standard compliant Bencode serialization', 62 | long_description=read('README.rst'), 63 | packages=['better_bencode'], 64 | include_package_data=True, 65 | platforms='any', 66 | classifiers=[ 67 | 'Development Status :: 4 - Beta', 68 | 'Intended Audience :: Developers', 69 | 'License :: OSI Approved :: BSD License', 70 | 'Operating System :: OS Independent', 71 | 72 | 'Programming Language :: Python', 73 | 'Programming Language :: Python :: 2', 74 | 'Programming Language :: Python :: 2.6', 75 | 'Programming Language :: Python :: 2.7', 76 | 'Programming Language :: Python :: 3', 77 | 'Programming Language :: Python :: 3.3', 78 | 'Programming Language :: Python :: 3.4', 79 | 'Programming Language :: Python :: 3.5', 80 | 'Programming Language :: Python :: Implementation :: CPython', 81 | 'Programming Language :: Python :: Implementation :: PyPy', 82 | ], 83 | ext_modules=ext_modules, 84 | cmdclass=cmdclass, 85 | ) 86 | 87 | 88 | try: 89 | run_setup(not hasattr(sys, 'pypy_version_info')) 90 | except BuildFailed: 91 | print('*' * 75) 92 | print("WARNING: The C extension could not be compiled.") 93 | print("Module better_bencode will work correctly, but will be slower.") 94 | print('*' * 75) 95 | 96 | run_setup(False) 97 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /tests/test_bencode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | import sys 6 | import os.path 7 | 8 | 9 | # remove top repository dir to avoid importing local code 10 | sys.path = [ 11 | directory 12 | for directory in sys.path 13 | if not os.path.exists(os.path.join(directory, 'README.rst')) 14 | ] 15 | 16 | 17 | if sys.version_info[0] == 2: 18 | try: 19 | from cStringIO import StringIO 20 | except ImportError: 21 | from StringIO import StringIO 22 | else: 23 | from io import BytesIO as StringIO 24 | 25 | 26 | import pytest 27 | 28 | 29 | import better_bencode._pure as pure 30 | import better_bencode as auto 31 | try: 32 | import better_bencode._fast as fast 33 | except ImportError as e: 34 | print(e) 35 | fast = None 36 | 37 | 38 | MODULES = [module for module in [auto, fast, pure] if module is not None] 39 | 40 | 41 | @pytest.mark.parametrize('module', MODULES) 42 | def test_error_load(module): 43 | assert hasattr(module, 'BencodeValueError') 44 | assert issubclass(module.BencodeValueError, ValueError) 45 | 46 | 47 | @pytest.mark.parametrize('module', MODULES) 48 | def test_error_dump(module): 49 | assert hasattr(module, 'BencodeTypeError') 50 | assert issubclass(module.BencodeTypeError, TypeError) 51 | 52 | 53 | TEST_DATA = [ 54 | (b'de', {}), 55 | (b'le', []), 56 | (b'i0e', 0), 57 | (b'i42e', 42), 58 | (b'i-42e', -42), 59 | (b'i9223372036854775807e', 2**63-1), 60 | (b'i-9223372036854775808e', -(2**63)), 61 | (b'0:', b''), 62 | (b'4:spam', b'spam'), 63 | (b'l4:spami42ee', [b'spam', 42]), 64 | (b'd3:fooi42ee', {b'foo': 42}), 65 | (b'd3:bar4:spam3:fooi42ee', {b'bar': b'spam', b'foo': 42}), 66 | (b'd1:ai1e1:bi2e1:ci3ee', {b'a': 1, b'b': 2, b'c': 3}), 67 | (b'd1:a1:be', {b'a': b'b'}), 68 | ] 69 | TESTS = [ 70 | (module,) + test 71 | for module in MODULES 72 | for test in TEST_DATA 73 | ] 74 | 75 | 76 | @pytest.mark.parametrize(('module', 'binary', 'struct'), TESTS) 77 | def test_loads(module, binary, struct): 78 | assert module.loads(binary) == struct 79 | 80 | 81 | @pytest.mark.parametrize(('module', 'binary', 'struct'), TESTS) 82 | def test_load(module, binary, struct): 83 | fp = StringIO(binary) 84 | assert module.load(fp) == struct 85 | 86 | 87 | @pytest.mark.parametrize(('module', 'binary', 'struct'), TESTS) 88 | def test_dumps(module, binary, struct): 89 | assert module.dumps(struct) == binary 90 | 91 | 92 | @pytest.mark.parametrize(('module', 'binary', 'struct'), TESTS) 93 | def test_dump(module, binary, struct): 94 | fp = StringIO() 95 | module.dump(struct, fp) 96 | assert fp.getvalue() == binary 97 | 98 | 99 | ##################################################################### 100 | # dump TypeError tests 101 | 102 | 103 | TESTS_TYPEERROR = [ 104 | (module, test) 105 | for module in MODULES 106 | for test in [ 107 | u'', (), set(), frozenset(), 108 | len, TypeError, 109 | True, False, None, 1.0, 110 | ] 111 | ] 112 | 113 | 114 | @pytest.mark.parametrize(('module', 'struct'), TESTS_TYPEERROR) 115 | def test_dump_typeerror(module, struct): 116 | with pytest.raises(TypeError) as excinfo: 117 | fp = StringIO() 118 | module.dump(struct, fp) 119 | assert type(struct).__name__ in str(excinfo.value) 120 | 121 | 122 | @pytest.mark.parametrize(('module', 'struct'), TESTS_TYPEERROR) 123 | def test_dump_dumperror(module, struct): 124 | with pytest.raises(module.BencodeTypeError) as excinfo: 125 | fp = StringIO() 126 | module.dump(struct, fp) 127 | assert type(struct).__name__ in str(excinfo.value) 128 | 129 | 130 | @pytest.mark.parametrize(('module', 'struct'), TESTS_TYPEERROR) 131 | def test_dumps_typeerror(module, struct): 132 | with pytest.raises(TypeError) as excinfo: 133 | module.dumps(struct) 134 | assert type(struct).__name__ in str(excinfo.value) 135 | 136 | 137 | @pytest.mark.parametrize(('module', 'struct'), TESTS_TYPEERROR) 138 | def test_dumps_dumperror(module, struct): 139 | with pytest.raises(module.BencodeTypeError) as excinfo: 140 | module.dumps(struct) 141 | assert type(struct).__name__ in str(excinfo.value) 142 | 143 | 144 | 145 | @pytest.mark.parametrize('module', MODULES) 146 | def test_dumps_reference_list(module): 147 | a = [[]] 148 | a[0].append(a) 149 | 150 | with pytest.raises(ValueError) as excinfo: 151 | module.dumps(a) 152 | assert str(excinfo.value) == 'circular reference detected' 153 | 154 | @pytest.mark.parametrize('module', MODULES) 155 | def test_dumps_reference_list_deep(module): 156 | a = [[[[[[[[[[[[[[[]]]]]]]]]]]]]]] 157 | a[0][0][0][0][0][0][0][0][0][0][0][0][0].append(a) 158 | 159 | with pytest.raises(ValueError) as excinfo: 160 | module.dumps(a) 161 | assert str(excinfo.value) == 'circular reference detected' 162 | 163 | 164 | @pytest.mark.parametrize('module', MODULES) 165 | def test_dumps_reference_dict(module): 166 | a = {b'a': {b'b': {}}} 167 | a[b'a'][b'b'][b'c'] = a 168 | 169 | with pytest.raises(ValueError) as excinfo: 170 | module.dumps(a) 171 | assert str(excinfo.value) == 'circular reference detected' 172 | 173 | 174 | ##################################################################### 175 | # load ValueError tests 176 | 177 | 178 | TESTS_VALUEERROR = [ 179 | (module, binary, msg) 180 | for module in MODULES 181 | for binary, msg in [ 182 | (b'<', 'unexpected byte 0x3c'), 183 | (b' ', 'unexpected byte 0x20'), 184 | (b'x', 'unexpected byte 0x78'), 185 | (b'', 'unexpected end of data'), 186 | 187 | (b'1', 'unexpected end of data'), 188 | (b'1:', 'unexpected end of data'), 189 | (b'1x', 'unexpected byte 0x78'), 190 | 191 | (b'i', 'unexpected end of data'), 192 | (b'ie', 'unexpected end of data'), 193 | (b'i-e', 'unexpected end of data'), 194 | (b'ixe', 'unexpected byte 0x78'), 195 | 196 | (b'l', 'unexpected end of data'), 197 | (b'lx', 'unexpected byte 0x78'), 198 | (b'lxe', 'unexpected byte 0x78'), 199 | (b'l1:a', 'unexpected end of data'), 200 | (b'l1:ax', 'unexpected byte 0x78'), 201 | 202 | (b'd', 'unexpected end of data'), 203 | (b'dx', 'unexpected byte 0x78'), 204 | (b'dxe', 'unexpected byte 0x78'), 205 | (b'd1:a', 'unexpected end of data'), 206 | (b'd1:ax', 'unexpected byte 0x78'), 207 | (b'd1:a1:b', 'unexpected end of data'), 208 | (b'd1:a1:bx', 'unexpected byte 0x78'), 209 | ] 210 | ] 211 | 212 | 213 | @pytest.mark.parametrize(('module', 'binary', 'msg'), TESTS_VALUEERROR) 214 | def test_load_valueerror(module, binary, msg): 215 | with pytest.raises(ValueError) as excinfo: 216 | fp = StringIO(binary) 217 | module.load(fp) 218 | assert str(excinfo.value) == msg 219 | 220 | 221 | @pytest.mark.parametrize(('module', 'binary', 'msg'), TESTS_VALUEERROR) 222 | def test_loads_valueerror(module, binary, msg): 223 | with pytest.raises(ValueError) as excinfo: 224 | module.loads(binary) 225 | assert str(excinfo.value) == msg 226 | 227 | 228 | ##################################################################### 229 | # docstrings 230 | 231 | 232 | @pytest.mark.parametrize('module', MODULES) 233 | def test_docstrings_dump(module): 234 | assert module.dump.__doc__ == "Serialize ``obj`` as a Bencode formatted stream to ``fp``." 235 | 236 | 237 | @pytest.mark.parametrize('module', MODULES) 238 | def test_docstrings_dumps(module): 239 | assert module.dumps.__doc__ == "Serialize ``obj`` to a Bencode formatted ``bytes``." 240 | 241 | 242 | @pytest.mark.parametrize('module', MODULES) 243 | def test_docstrings_load(module): 244 | assert module.load.__doc__ == "Deserialize ``fp`` to a Python object." 245 | 246 | 247 | @pytest.mark.parametrize('module', MODULES) 248 | def test_docstrings_loads(module): 249 | assert module.loads.__doc__ == "Deserialize ``s`` to a Python object." 250 | 251 | 252 | ##################################################################### 253 | # cast 254 | 255 | 256 | from collections import namedtuple 257 | Point = namedtuple('Point', 'x y') 258 | 259 | 260 | class MyList(list): 261 | def __init__(self, *data): 262 | list.__init__(self, data) 263 | 264 | 265 | CAST_TEST_DATA = [ 266 | (False, b'i0e', 0), 267 | (True, b'i1e', 1), 268 | 269 | ((), b'le', []), 270 | ((1,), b'li1ee', [1]), 271 | 272 | (Point(-1, 1), b'li-1ei1ee', [-1, 1]), 273 | (MyList(-1, 1), b'li-1ei1ee', [-1, 1]), 274 | 275 | ] 276 | CAST_TESTS = [ 277 | (module,) + test 278 | for module in MODULES 279 | for test in CAST_TEST_DATA 280 | ] 281 | 282 | @pytest.mark.parametrize(('module', 'indata', 'binary', 'outdata'), CAST_TESTS) 283 | def test_cast_dumps_ok(module, indata, binary, outdata): 284 | dumped = module.dumps(indata, cast=True) 285 | assert dumped == binary 286 | assert module.loads(dumped) == outdata 287 | 288 | 289 | @pytest.mark.parametrize(('module', 'indata', 'binary', 'outdata'), CAST_TESTS) 290 | def test_cast_dumps_error(module, indata, binary, outdata): 291 | with pytest.raises(TypeError) as excinfo: 292 | module.dumps(indata) 293 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py26,py27,pypy,py33,py34,py35,py36,py37,py38,py39 3 | 4 | [testenv] 5 | changedir=tests 6 | deps=pytest 7 | commands=py.test 8 | --------------------------------------------------------------------------------