├── .gitignore ├── ChangeLog.txt ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── setup.py ├── tnetstring ├── __init__.py ├── _tnetstring.c ├── dbg.h ├── tests │ ├── __init__.py │ ├── test_format.py │ └── test_misc.py ├── tns_core.c └── tns_core.h └── tools └── shootout.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *~ 4 | *.swp 5 | build/ 6 | MANIFEST 7 | -------------------------------------------------------------------------------- /ChangeLog.txt: -------------------------------------------------------------------------------- 1 | 2 | v0.2.1: 3 | 4 | * Fix memory leak in tnetstring.pop(); thanks tarvip. 5 | * Fix bug in handling of large integers; thanks gdamjan. 6 | 7 | 8 | v0.2.0: 9 | 10 | * Easy loading of unicode strings. If you pass an optional "encoding" 11 | argument to load/loads/pop then it will return unicode string objects 12 | rather than byte strings. 13 | * Easy dumping of unicode strings. If you pass an optional "encoding" 14 | argument to dump/dumps then it will write unicode strings in that 15 | encoding. 16 | 17 | 18 | v0.1.0: 19 | 20 | * Initial version; you might say *everything* has changed. 21 | 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Ryan Kelly 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | 2 | include LICENSE.txt 3 | include ChangeLog.txt 4 | include README.rst 5 | recursive-include tnetstring *.c 6 | recursive-include tnetstring *.h 7 | recursive-include tnetstring/tests *.txt 8 | 9 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | Status: Unmaintained 3 | ==================== 4 | 5 | .. image:: http://unmaintained.tech/badge.svg 6 | :target: http://unmaintained.tech/ 7 | :alt: No Maintenance Intended 8 | 9 | I am `no longer actively maintaining this project `_. 10 | 11 | 12 | tnetstring: data serialization using typed netstrings 13 | ====================================================== 14 | 15 | 16 | This is a data serialization library. It's a lot like JSON but it uses a 17 | new syntax called "typed netstrings" that Zed has proposed for use in the 18 | Mongrel2 webserver. It's designed to be simpler and easier to implement 19 | than JSON, with a happy consequence of also being faster in many cases. 20 | 21 | An ordinary netstring is a blob of data prefixed with its length and postfixed 22 | with a sanity-checking comma. The string "hello world" encodes like this:: 23 | 24 | 11:hello world, 25 | 26 | Typed netstrings add other datatypes by replacing the comma with a type tag. 27 | Here's the integer 12345 encoded as a tnetstring:: 28 | 29 | 5:12345# 30 | 31 | And here's the list [12345,True,0] which mixes integers and bools:: 32 | 33 | 19:5:12345#4:true!1:0#] 34 | 35 | Simple enough? This module gives you the following functions: 36 | 37 | :dump: dump an object as a tnetstring to a file 38 | :dumps: dump an object as a tnetstring to a string 39 | :load: load a tnetstring-encoded object from a file 40 | :loads: load a tnetstring-encoded object from a string 41 | :pop: pop a tnetstring-encoded object from the front of a string 42 | 43 | Note that since parsing a tnetstring requires reading all the data into memory 44 | at once, there's no efficiency gain from using the file-based versions of these 45 | functions. They're only here so you can use load() to read precisely one 46 | item from a file or socket without consuming any extra data. 47 | 48 | The tnetstrings specification explicitly states that strings are binary blobs 49 | and forbids the use of unicode at the protocol level. As a convenience to 50 | python programmers, this library lets you specify an application-level encoding 51 | to translate python's unicode strings to and from binary blobs: 52 | 53 | >>> print repr(tnetstring.loads("2:\xce\xb1,")) 54 | '\xce\xb1' 55 | >>> 56 | >>> print repr(tnetstring.loads("2:\xce\xb1,", "utf8")) 57 | u'\u03b1' 58 | 59 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # This is the tnetstring setuptools script. 3 | # Originally developed by Ryan Kelly, 2011. 4 | # 5 | # This script is placed in the public domain. 6 | # If there's no public domain where you come from, 7 | # you can use it under the MIT license. 8 | # 9 | 10 | import sys 11 | setup_kwds = {} 12 | if sys.version_info > (3,): 13 | from setuptools import setup, Extension 14 | setup_kwds["test_suite"] = "tnetstring.test" 15 | setup_kwds["use_2to3"] = True 16 | else: 17 | from distutils.core import setup, Extension 18 | 19 | 20 | try: 21 | next = next 22 | except NameError: 23 | def next(i): 24 | return i.next() 25 | 26 | 27 | info = {} 28 | try: 29 | src = open("tnetstring/__init__.py") 30 | lines = [] 31 | ln = next(src) 32 | while "__version__" not in ln: 33 | lines.append(ln) 34 | ln = next(src) 35 | while "__version__" in ln: 36 | lines.append(ln) 37 | ln = next(src) 38 | exec("".join(lines),info) 39 | except Exception: 40 | pass 41 | 42 | 43 | NAME = "tnetstring" 44 | VERSION = info["__version__"] 45 | DESCRIPTION = "data serialization using typed netstrings" 46 | LONG_DESC = info["__doc__"] 47 | AUTHOR = "Ryan Kelly" 48 | AUTHOR_EMAIL = "ryan@rfk.id.au" 49 | URL="http://github.com/rfk/tnetstring" 50 | LICENSE = "MIT" 51 | KEYWORDS = "netstring serialize" 52 | CLASSIFIERS = [ 53 | "Programming Language :: Python", 54 | "Programming Language :: Python :: 2", 55 | #"Programming Language :: Python :: 3", 56 | "Development Status :: 4 - Beta", 57 | "License :: OSI Approved :: MIT License" 58 | ] 59 | 60 | setup(name=NAME, 61 | version=VERSION, 62 | author=AUTHOR, 63 | author_email=AUTHOR_EMAIL, 64 | url=URL, 65 | description=DESCRIPTION, 66 | long_description=LONG_DESC, 67 | license=LICENSE, 68 | keywords=KEYWORDS, 69 | packages=["tnetstring","tnetstring.tests"], 70 | ext_modules = [ 71 | Extension(name="_tnetstring",sources=["tnetstring/_tnetstring.c"]), 72 | ], 73 | classifiers=CLASSIFIERS, 74 | **setup_kwds 75 | ) 76 | 77 | -------------------------------------------------------------------------------- /tnetstring/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | tnetstring: data serialization using typed netstrings 3 | ====================================================== 4 | 5 | 6 | This is a data serialization library. It's a lot like JSON but it uses a 7 | new syntax called "typed netstrings" that Zed has proposed for use in the 8 | Mongrel2 webserver. It's designed to be simpler and easier to implement 9 | than JSON, with a happy consequence of also being faster in many cases. 10 | 11 | An ordinary netstring is a blob of data prefixed with its length and postfixed 12 | with a sanity-checking comma. The string "hello world" encodes like this:: 13 | 14 | 11:hello world, 15 | 16 | Typed netstrings add other datatypes by replacing the comma with a type tag. 17 | Here's the integer 12345 encoded as a tnetstring:: 18 | 19 | 5:12345# 20 | 21 | And here's the list [12345,True,0] which mixes integers and bools:: 22 | 23 | 19:5:12345#4:true!1:0#] 24 | 25 | Simple enough? This module gives you the following functions: 26 | 27 | :dump: dump an object as a tnetstring to a file 28 | :dumps: dump an object as a tnetstring to a string 29 | :load: load a tnetstring-encoded object from a file 30 | :loads: load a tnetstring-encoded object from a string 31 | :pop: pop a tnetstring-encoded object from the front of a string 32 | 33 | Note that since parsing a tnetstring requires reading all the data into memory 34 | at once, there's no efficiency gain from using the file-based versions of these 35 | functions. They're only here so you can use load() to read precisely one 36 | item from a file or socket without consuming any extra data. 37 | 38 | The tnetstrings specification explicitly states that strings are binary blobs 39 | and forbids the use of unicode at the protocol level. As a convenience to 40 | python programmers, this library lets you specify an application-level encoding 41 | to translate python's unicode strings to and from binary blobs: 42 | 43 | >>> print repr(tnetstring.loads("2:\\xce\\xb1,")) 44 | '\\xce\\xb1' 45 | >>> 46 | >>> print repr(tnetstring.loads("2:\\xce\\xb1,", "utf8")) 47 | u'\\u03b1' 48 | 49 | """ 50 | 51 | __ver_major__ = 0 52 | __ver_minor__ = 2 53 | __ver_patch__ = 1 54 | __ver_sub__ = "" 55 | __version__ = "%d.%d.%d%s" % (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__) 56 | 57 | 58 | from collections import deque 59 | 60 | 61 | def dumps(value,encoding=None): 62 | """dumps(object,encoding=None) -> string 63 | 64 | This function dumps a python object as a tnetstring. 65 | """ 66 | # This uses a deque to collect output fragments in reverse order, 67 | # then joins them together at the end. It's measurably faster 68 | # than creating all the intermediate strings. 69 | # If you're reading this to get a handle on the tnetstring format, 70 | # consider the _gdumps() function instead; it's a standard top-down 71 | # generator that's simpler to understand but much less efficient. 72 | q = deque() 73 | _rdumpq(q,0,value,encoding) 74 | return "".join(q) 75 | 76 | 77 | def dump(value,file,encoding=None): 78 | """dump(object,file,encoding=None) 79 | 80 | This function dumps a python object as a tnetstring and writes it to 81 | the given file. 82 | """ 83 | file.write(dumps(value,encoding)) 84 | 85 | 86 | def _rdumpq(q,size,value,encoding=None): 87 | """Dump value as a tnetstring, to a deque instance, last chunks first. 88 | 89 | This function generates the tnetstring representation of the given value, 90 | pushing chunks of the output onto the given deque instance. It pushes 91 | the last chunk first, then recursively generates more chunks. 92 | 93 | When passed in the current size of the string in the queue, it will return 94 | the new size of the string in the queue. 95 | 96 | Operating last-chunk-first makes it easy to calculate the size written 97 | for recursive structures without having to build their representation as 98 | a string. This is measurably faster than generating the intermediate 99 | strings, especially on deeply nested structures. 100 | """ 101 | write = q.appendleft 102 | if value is None: 103 | write("0:~") 104 | return size + 3 105 | if value is True: 106 | write("4:true!") 107 | return size + 7 108 | if value is False: 109 | write("5:false!") 110 | return size + 8 111 | if isinstance(value,(int,long)): 112 | data = str(value) 113 | ldata = len(data) 114 | span = str(ldata) 115 | write("#") 116 | write(data) 117 | write(":") 118 | write(span) 119 | return size + 2 + len(span) + ldata 120 | if isinstance(value,(float,)): 121 | # Use repr() for float rather than str(). 122 | # It round-trips more accurately. 123 | # Probably unnecessary in later python versions that 124 | # use David Gay's ftoa routines. 125 | data = repr(value) 126 | ldata = len(data) 127 | span = str(ldata) 128 | write("^") 129 | write(data) 130 | write(":") 131 | write(span) 132 | return size + 2 + len(span) + ldata 133 | if isinstance(value,str): 134 | lvalue = len(value) 135 | span = str(lvalue) 136 | write(",") 137 | write(value) 138 | write(":") 139 | write(span) 140 | return size + 2 + len(span) + lvalue 141 | if isinstance(value,(list,tuple,)): 142 | write("]") 143 | init_size = size = size + 1 144 | for item in reversed(value): 145 | size = _rdumpq(q,size,item,encoding) 146 | span = str(size - init_size) 147 | write(":") 148 | write(span) 149 | return size + 1 + len(span) 150 | if isinstance(value,dict): 151 | write("}") 152 | init_size = size = size + 1 153 | for (k,v) in value.iteritems(): 154 | size = _rdumpq(q,size,v,encoding) 155 | size = _rdumpq(q,size,k,encoding) 156 | span = str(size - init_size) 157 | write(":") 158 | write(span) 159 | return size + 1 + len(span) 160 | if isinstance(value,unicode): 161 | if encoding is None: 162 | raise ValueError("must specify encoding to dump unicode strings") 163 | value = value.encode(encoding) 164 | lvalue = len(value) 165 | span = str(lvalue) 166 | write(",") 167 | write(value) 168 | write(":") 169 | write(span) 170 | return size + 2 + len(span) + lvalue 171 | raise ValueError("unserializable object") 172 | 173 | 174 | def _gdumps(value,encoding): 175 | """Generate fragments of value dumped as a tnetstring. 176 | 177 | This is the naive dumping algorithm, implemented as a generator so that 178 | it's easy to pass to "".join() without building a new list. 179 | 180 | This is mainly here for comparison purposes; the _rdumpq version is 181 | measurably faster as it doesn't have to build intermediate strins. 182 | """ 183 | if value is None: 184 | yield "0:~" 185 | elif value is True: 186 | yield "4:true!" 187 | elif value is False: 188 | yield "5:false!" 189 | elif isinstance(value,(int,long)): 190 | data = str(value) 191 | yield str(len(data)) 192 | yield ":" 193 | yield data 194 | yield "#" 195 | elif isinstance(value,(float,)): 196 | data = repr(value) 197 | yield str(len(data)) 198 | yield ":" 199 | yield data 200 | yield "^" 201 | elif isinstance(value,(str,)): 202 | yield str(len(value)) 203 | yield ":" 204 | yield value 205 | yield "," 206 | elif isinstance(value,(list,tuple,)): 207 | sub = [] 208 | for item in value: 209 | sub.extend(_gdumps(item)) 210 | sub = "".join(sub) 211 | yield str(len(sub)) 212 | yield ":" 213 | yield sub 214 | yield "]" 215 | elif isinstance(value,(dict,)): 216 | sub = [] 217 | for (k,v) in value.iteritems(): 218 | sub.extend(_gdumps(k)) 219 | sub.extend(_gdumps(v)) 220 | sub = "".join(sub) 221 | yield str(len(sub)) 222 | yield ":" 223 | yield sub 224 | yield "}" 225 | elif isinstance(value,(unicode,)): 226 | if encoding is None: 227 | raise ValueError("must specify encoding to dump unicode strings") 228 | value = value.encode(encoding) 229 | yield str(len(value)) 230 | yield ":" 231 | yield value 232 | yield "," 233 | else: 234 | raise ValueError("unserializable object") 235 | 236 | 237 | def loads(string,encoding=None): 238 | """loads(string,encoding=None) -> object 239 | 240 | This function parses a tnetstring into a python object. 241 | """ 242 | # No point duplicating effort here. In the C-extension version, 243 | # loads() is measurably faster then pop() since it can avoid 244 | # the overhead of building a second string. 245 | return pop(string,encoding)[0] 246 | 247 | 248 | def load(file,encoding=None): 249 | """load(file,encoding=None) -> object 250 | 251 | This function reads a tnetstring from a file and parses it into a 252 | python object. The file must support the read() method, and this 253 | function promises not to read more data than necessary. 254 | """ 255 | # Read the length prefix one char at a time. 256 | # Note that the netstring spec explicitly forbids padding zeros. 257 | c = file.read(1) 258 | if not c.isdigit(): 259 | raise ValueError("not a tnetstring: missing or invalid length prefix") 260 | datalen = ord(c) - ord("0") 261 | c = file.read(1) 262 | if datalen != 0: 263 | while c.isdigit(): 264 | datalen = (10 * datalen) + (ord(c) - ord("0")) 265 | if datalen > 999999999: 266 | errmsg = "not a tnetstring: absurdly large length prefix" 267 | raise ValueError(errmsg) 268 | c = file.read(1) 269 | if c != ":": 270 | raise ValueError("not a tnetstring: missing or invalid length prefix") 271 | # Now we can read and parse the payload. 272 | # This repeats the dispatch logic of pop() so we can avoid 273 | # re-constructing the outermost tnetstring. 274 | data = file.read(datalen) 275 | if len(data) != datalen: 276 | raise ValueError("not a tnetstring: length prefix too big") 277 | type = file.read(1) 278 | if type == ",": 279 | if encoding is not None: 280 | return data.decode(encoding) 281 | return data 282 | if type == "#": 283 | try: 284 | return int(data) 285 | except ValueError: 286 | raise ValueError("not a tnetstring: invalid integer literal") 287 | if type == "^": 288 | try: 289 | return float(data) 290 | except ValueError: 291 | raise ValueError("not a tnetstring: invalid float literal") 292 | if type == "!": 293 | if data == "true": 294 | return True 295 | elif data == "false": 296 | return False 297 | else: 298 | raise ValueError("not a tnetstring: invalid boolean literal") 299 | if type == "~": 300 | if data: 301 | raise ValueError("not a tnetstring: invalid null literal") 302 | return None 303 | if type == "]": 304 | l = [] 305 | while data: 306 | (item,data) = pop(data,encoding) 307 | l.append(item) 308 | return l 309 | if type == "}": 310 | d = {} 311 | while data: 312 | (key,data) = pop(data,encoding) 313 | (val,data) = pop(data,encoding) 314 | d[key] = val 315 | return d 316 | raise ValueError("unknown type tag") 317 | 318 | 319 | 320 | def pop(string,encoding=None): 321 | """pop(string,encoding=None) -> (object, remain) 322 | 323 | This function parses a tnetstring into a python object. 324 | It returns a tuple giving the parsed object and a string 325 | containing any unparsed data from the end of the string. 326 | """ 327 | # Parse out data length, type and remaining string. 328 | try: 329 | (dlen,rest) = string.split(":",1) 330 | dlen = int(dlen) 331 | except ValueError: 332 | raise ValueError("not a tnetstring: missing or invalid length prefix") 333 | try: 334 | (data,type,remain) = (rest[:dlen],rest[dlen],rest[dlen+1:]) 335 | except IndexError: 336 | # This fires if len(rest) < dlen, meaning we don't need 337 | # to further validate that data is the right length. 338 | raise ValueError("not a tnetstring: invalid length prefix") 339 | # Parse the data based on the type tag. 340 | if type == ",": 341 | if encoding is not None: 342 | return (data.decode(encoding),remain) 343 | return (data,remain) 344 | if type == "#": 345 | try: 346 | return (int(data),remain) 347 | except ValueError: 348 | raise ValueError("not a tnetstring: invalid integer literal") 349 | if type == "^": 350 | try: 351 | return (float(data),remain) 352 | except ValueError: 353 | raise ValueError("not a tnetstring: invalid float literal") 354 | if type == "!": 355 | if data == "true": 356 | return (True,remain) 357 | elif data == "false": 358 | return (False,remain) 359 | else: 360 | raise ValueError("not a tnetstring: invalid boolean literal") 361 | if type == "~": 362 | if data: 363 | raise ValueError("not a tnetstring: invalid null literal") 364 | return (None,remain) 365 | if type == "]": 366 | l = [] 367 | while data: 368 | (item,data) = pop(data,encoding) 369 | l.append(item) 370 | return (l,remain) 371 | if type == "}": 372 | d = {} 373 | while data: 374 | (key,data) = pop(data,encoding) 375 | (val,data) = pop(data,encoding) 376 | d[key] = val 377 | return (d,remain) 378 | raise ValueError("unknown type tag") 379 | 380 | 381 | 382 | # Use the c-extension version if available 383 | try: 384 | import _tnetstring 385 | except ImportError: 386 | pass 387 | else: 388 | dumps = _tnetstring.dumps 389 | load = _tnetstring.load 390 | loads = _tnetstring.loads 391 | pop = _tnetstring.pop 392 | 393 | -------------------------------------------------------------------------------- /tnetstring/_tnetstring.c: -------------------------------------------------------------------------------- 1 | // 2 | // _tnetstring.c: python module for fast encode/decode of typed-netstrings 3 | // 4 | // You get the following functions: 5 | // 6 | // dumps: dump a python object to a tnetstring 7 | // loads: parse tnetstring into a python object 8 | // load: parse tnetstring from a file-like object 9 | // pop: parse tnetstring into a python object, 10 | // return it along with unparsed data. 11 | 12 | #include 13 | 14 | 15 | #define TNS_MAX_LENGTH 999999999 16 | #include "tns_core.c" 17 | 18 | 19 | // We have one static tns_ops struct for parsing bytestrings. 20 | static tns_ops _tnetstring_ops_bytes; 21 | 22 | // Unicode parsing ops are created on demand. 23 | // We allocate a struct containing all the function pointers along with 24 | // the encoding string, as a primitive kind of closure. 25 | // Eventually we should cache these. 26 | struct tns_ops_with_encoding_s { 27 | tns_ops ops; 28 | char *encoding; 29 | }; 30 | typedef struct tns_ops_with_encoding_s tns_ops_with_encoding; 31 | 32 | static tns_ops *_tnetstring_get_unicode_ops(PyObject *encoding); 33 | 34 | 35 | // _tnetstring_loads: parse tnetstring-format value from a string. 36 | // 37 | static PyObject* 38 | _tnetstring_loads(PyObject* self, PyObject *args) 39 | { 40 | PyObject *string = NULL; 41 | PyObject *encoding = Py_None; 42 | PyObject *val = NULL; 43 | tns_ops *ops = &_tnetstring_ops_bytes; 44 | char *data; 45 | size_t len; 46 | 47 | if(!PyArg_UnpackTuple(args, "loads", 1, 2, &string, &encoding)) { 48 | return NULL; 49 | } 50 | if(!PyString_Check(string)) { 51 | PyErr_SetString(PyExc_TypeError, "arg must be a string"); 52 | return NULL; 53 | } 54 | Py_INCREF(string); 55 | 56 | if(encoding == Py_None) { 57 | data = PyString_AS_STRING(string); 58 | len = PyString_GET_SIZE(string); 59 | val = tns_parse(ops, data, len, NULL); 60 | } else { 61 | if(!PyString_Check(encoding)) { 62 | PyErr_SetString(PyExc_TypeError, "encoding must be a string"); 63 | goto error; 64 | } 65 | Py_INCREF(encoding); 66 | ops = _tnetstring_get_unicode_ops(encoding); 67 | if(ops == NULL) { 68 | Py_DECREF(encoding); 69 | goto error; 70 | } 71 | data = PyString_AS_STRING(string); 72 | len = PyString_GET_SIZE(string); 73 | val = tns_parse(ops, data, len, NULL); 74 | free(ops); 75 | Py_DECREF(encoding); 76 | } 77 | 78 | Py_DECREF(string); 79 | return val; 80 | 81 | error: 82 | Py_DECREF(string); 83 | return NULL; 84 | } 85 | 86 | 87 | // _tnetstring_load: parse tnetstring-format value from a file. 88 | // 89 | // This takes care to read no more data than is required to get the 90 | // full tnetstring-encoded value. It might read arbitrarily-much 91 | // data if the file doesn't begin with a valid tnetstring. 92 | // 93 | static PyObject* 94 | _tnetstring_load(PyObject* self, PyObject *args) 95 | { 96 | PyObject *val = NULL; 97 | PyObject *file = NULL; 98 | PyObject *encoding = Py_None; 99 | PyObject *methnm = NULL; 100 | PyObject *metharg = NULL; 101 | PyObject *res = NULL; 102 | tns_ops *ops = &_tnetstring_ops_bytes; 103 | char c, *data; 104 | size_t datalen = 0; 105 | 106 | if(!PyArg_UnpackTuple(args, "load", 1, 2, &file, &encoding)) { 107 | goto error; 108 | } 109 | Py_INCREF(file); 110 | 111 | if(encoding != Py_None) { 112 | if(!PyString_Check(encoding)) { 113 | PyErr_SetString(PyExc_TypeError, "encoding must be a string"); 114 | goto error; 115 | } 116 | Py_INCREF(encoding); 117 | ops = _tnetstring_get_unicode_ops(encoding); 118 | if(ops == NULL) { 119 | goto error; 120 | } 121 | } 122 | 123 | // We're going to read one char at a time 124 | if((methnm = PyString_FromString("read")) == NULL) { 125 | goto error; 126 | } 127 | if((metharg = PyInt_FromLong(1)) == NULL) { 128 | goto error; 129 | } 130 | 131 | // Read the length prefix one char at a time 132 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL); 133 | if(res == NULL) { 134 | goto error; 135 | } 136 | Py_INCREF(res); 137 | if(!PyString_Check(res) || !PyString_GET_SIZE(res)) { 138 | PyErr_SetString(PyExc_ValueError, 139 | "Not a tnetstring: invalid or missing length prefix"); 140 | goto error; 141 | } 142 | c = PyString_AS_STRING(res)[0]; 143 | Py_DECREF(res); res = NULL; 144 | // Note that the netstring spec explicitly forbids padding zeroes. 145 | // If the first char is zero, it must be the only char. 146 | if(c < '0' || c > '9') { 147 | PyErr_SetString(PyExc_ValueError, 148 | "Not a tnetstring: invalid or missing length prefix"); 149 | goto error; 150 | } else if (c == '0') { 151 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL); 152 | if(res == NULL) { 153 | goto error; 154 | } 155 | Py_INCREF(res); 156 | if(!PyString_Check(res) || !PyString_GET_SIZE(res)) { 157 | PyErr_SetString(PyExc_ValueError, 158 | "Not a tnetstring: invalid or missing length prefix"); 159 | goto error; 160 | } 161 | c = PyString_AS_STRING(res)[0]; 162 | Py_DECREF(res); res = NULL; 163 | } else { 164 | do { 165 | datalen = (10 * datalen) + (c - '0'); 166 | check(datalen <= TNS_MAX_LENGTH, 167 | "Not a tnetstring: absurdly large length prefix"); 168 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL); 169 | if(res == NULL) { 170 | goto error; 171 | } 172 | Py_INCREF(res); 173 | if(!PyString_Check(res) || !PyString_GET_SIZE(res)) { 174 | PyErr_SetString(PyExc_ValueError, 175 | "Not a tnetstring: invalid or missing length prefix"); 176 | goto error; 177 | } 178 | c = PyString_AS_STRING(res)[0]; 179 | Py_DECREF(res); res = NULL; 180 | } while(c >= '0' && c <= '9'); 181 | } 182 | 183 | // Validate end-of-length-prefix marker. 184 | if(c != ':') { 185 | PyErr_SetString(PyExc_ValueError, 186 | "Not a tnetstring: missing length prefix"); 187 | goto error; 188 | } 189 | 190 | // Read the data plus terminating type tag. 191 | Py_DECREF(metharg); 192 | if((metharg = PyInt_FromSize_t(datalen + 1)) == NULL) { 193 | goto error; 194 | } 195 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL); 196 | if(res == NULL) { 197 | goto error; 198 | } 199 | Py_INCREF(res); 200 | Py_DECREF(file); file = NULL; 201 | Py_DECREF(methnm); methnm = NULL; 202 | Py_DECREF(metharg); metharg = NULL; 203 | if(!PyString_Check(res) || PyString_GET_SIZE(res) != datalen + 1) { 204 | PyErr_SetString(PyExc_ValueError, 205 | "Not a tnetstring: invalid length prefix"); 206 | goto error; 207 | } 208 | 209 | // Parse out the payload object 210 | data = PyString_AS_STRING(res); 211 | val = tns_parse_payload(ops, data[datalen], data, datalen); 212 | Py_DECREF(res); res = NULL; 213 | 214 | if(ops != &_tnetstring_ops_bytes) { 215 | free(ops); 216 | Py_DECREF(encoding); 217 | } 218 | 219 | return val; 220 | 221 | error: 222 | if(file != NULL) { 223 | Py_DECREF(file); 224 | } 225 | if(ops != &_tnetstring_ops_bytes) { 226 | free(ops); 227 | Py_DECREF(encoding); 228 | } 229 | if(methnm != NULL) { 230 | Py_DECREF(methnm); 231 | } 232 | if(metharg != NULL) { 233 | Py_DECREF(metharg); 234 | } 235 | if(res != NULL) { 236 | Py_DECREF(res); 237 | } 238 | if(val != NULL) { 239 | Py_DECREF(val); 240 | } 241 | return NULL; 242 | } 243 | 244 | 245 | static PyObject* 246 | _tnetstring_pop(PyObject* self, PyObject *args) 247 | { 248 | PyObject *string = NULL; 249 | PyObject *val = NULL; 250 | PyObject *rest = NULL; 251 | PyObject *result = NULL; 252 | PyObject *encoding = Py_None; 253 | tns_ops *ops = &_tnetstring_ops_bytes; 254 | char *data, *remain; 255 | size_t len; 256 | 257 | if(!PyArg_UnpackTuple(args, "pop", 1, 2, &string, &encoding)) { 258 | return NULL; 259 | } 260 | if(!PyString_Check(string)) { 261 | PyErr_SetString(PyExc_TypeError, "arg must be a string"); 262 | return NULL; 263 | } 264 | if(encoding != Py_None) { 265 | if(!PyString_Check(encoding)) { 266 | PyErr_SetString(PyExc_TypeError, "encoding must be a string"); 267 | return NULL; 268 | } 269 | Py_INCREF(encoding); 270 | ops = _tnetstring_get_unicode_ops(encoding); 271 | if(ops == NULL) { 272 | Py_DECREF(encoding); 273 | return NULL; 274 | } 275 | } 276 | Py_INCREF(string); 277 | 278 | data = PyString_AS_STRING(string); 279 | len = PyString_GET_SIZE(string); 280 | val = tns_parse(ops, data, len, &remain); 281 | Py_DECREF(string); 282 | if(ops != &_tnetstring_ops_bytes) { 283 | free(ops); 284 | Py_DECREF(encoding); 285 | } 286 | if(val == NULL) { 287 | return NULL; 288 | } 289 | 290 | rest = PyString_FromStringAndSize(remain, len-(remain-data)); 291 | if(rest == NULL) { 292 | result = NULL; 293 | } else { 294 | result = PyTuple_Pack(2, val, rest); 295 | Py_DECREF(rest); 296 | } 297 | Py_DECREF(val); 298 | return result; 299 | } 300 | 301 | 302 | static PyObject* 303 | _tnetstring_dumps(PyObject* self, PyObject *args) 304 | { 305 | PyObject *object = NULL; 306 | PyObject *string = NULL; 307 | PyObject *encoding = Py_None; 308 | tns_ops *ops = &_tnetstring_ops_bytes; 309 | tns_outbuf outbuf; 310 | 311 | if(!PyArg_UnpackTuple(args, "dumps", 1, 2, &object, &encoding)) { 312 | return NULL; 313 | } 314 | if(encoding != Py_None) { 315 | if(!PyString_Check(encoding)) { 316 | PyErr_SetString(PyExc_TypeError, "encoding must be a string"); 317 | return NULL; 318 | } 319 | Py_INCREF(encoding); 320 | ops = _tnetstring_get_unicode_ops(encoding); 321 | if(ops == NULL) { 322 | Py_DECREF(encoding); 323 | return NULL; 324 | } 325 | } 326 | Py_INCREF(object); 327 | 328 | if(tns_outbuf_init(&outbuf) == -1) { 329 | goto error; 330 | } 331 | if(tns_render_value(ops, object, &outbuf) == -1) { 332 | goto error; 333 | } 334 | 335 | Py_DECREF(object); 336 | string = PyString_FromStringAndSize(NULL,tns_outbuf_size(&outbuf)); 337 | if(string == NULL) { 338 | goto error; 339 | } 340 | 341 | tns_outbuf_memmove(&outbuf, PyString_AS_STRING(string)); 342 | free(outbuf.buffer); 343 | 344 | if(ops != &_tnetstring_ops_bytes) { 345 | free(ops); 346 | Py_DECREF(encoding); 347 | } 348 | 349 | return string; 350 | 351 | error: 352 | if(ops != &_tnetstring_ops_bytes) { 353 | free(ops); 354 | Py_DECREF(encoding); 355 | } 356 | Py_DECREF(object); 357 | return NULL; 358 | } 359 | 360 | 361 | static PyMethodDef _tnetstring_methods[] = { 362 | {"load", 363 | (PyCFunction)_tnetstring_load, 364 | METH_VARARGS, 365 | PyDoc_STR("load(file,encoding=None) -> object\n" 366 | "This function reads a tnetstring from a file and parses it\n" 367 | "into a python object.")}, 368 | 369 | {"loads", 370 | (PyCFunction)_tnetstring_loads, 371 | METH_VARARGS, 372 | PyDoc_STR("loads(string,encoding=None) -> object\n" 373 | "This function parses a tnetstring into a python object.")}, 374 | 375 | {"pop", 376 | (PyCFunction)_tnetstring_pop, 377 | METH_VARARGS, 378 | PyDoc_STR("pop(string,encoding=None) -> (object, remain)\n" 379 | "This function parses a tnetstring into a python object.\n" 380 | "It returns a tuple giving the parsed object and a string\n" 381 | "containing any unparsed data.")}, 382 | 383 | {"dumps", 384 | (PyCFunction)_tnetstring_dumps, 385 | METH_VARARGS, 386 | PyDoc_STR("dumps(object,encoding=None) -> string\n" 387 | "This function dumps a python object as a tnetstring.")}, 388 | 389 | {NULL, NULL} 390 | }; 391 | 392 | 393 | // Functions to hook the parser core up to python. 394 | 395 | static void* 396 | tns_parse_string(const tns_ops *ops, const char *data, size_t len) 397 | { 398 | return PyString_FromStringAndSize(data, len); 399 | } 400 | 401 | 402 | static void* 403 | tns_parse_unicode(const tns_ops *ops, const char *data, size_t len) 404 | { 405 | char* encoding = ((tns_ops_with_encoding*)ops)->encoding; 406 | return PyUnicode_Decode(data, len, encoding, NULL); 407 | } 408 | 409 | 410 | static void* 411 | tns_parse_integer(const tns_ops *ops, const char *data, size_t len) 412 | { 413 | long l = 0; 414 | long long ll = 0; 415 | int sign = 1; 416 | char c; 417 | char *dataend; 418 | const char *pos, *eod; 419 | PyObject *v = NULL; 420 | 421 | // Anything with less than 10 digits, we can fit into a long. 422 | // Hand-parsing, as we need tighter error-checking than strtol. 423 | if (len < 10) { 424 | pos = data; 425 | eod = data + len; 426 | c = *pos++; 427 | switch(c) { 428 | case '0': 429 | case '1': 430 | case '2': 431 | case '3': 432 | case '4': 433 | case '5': 434 | case '6': 435 | case '7': 436 | case '8': 437 | case '9': 438 | l = c - '0'; 439 | break; 440 | case '+': 441 | break; 442 | case '-': 443 | sign = -1; 444 | break; 445 | default: 446 | sentinel("invalid integer literal"); 447 | } 448 | while(pos < eod) { 449 | c = *pos++; 450 | check(c >= '0' && c <= '9', "invalid integer literal"); 451 | l = (l * 10) + (c - '0'); 452 | } 453 | return PyLong_FromLong(l * sign); 454 | } 455 | // Anything with less than 19 digits fits in a long long. 456 | // Hand-parsing, as we need tighter error-checking than strtoll. 457 | else if(len < 19) { 458 | pos = data; 459 | eod = data + len; 460 | c = *pos++; 461 | switch(c) { 462 | case '0': 463 | case '1': 464 | case '2': 465 | case '3': 466 | case '4': 467 | case '5': 468 | case '6': 469 | case '7': 470 | case '8': 471 | case '9': 472 | ll = c - '0'; 473 | break; 474 | case '+': 475 | break; 476 | case '-': 477 | sign = -1; 478 | break; 479 | default: 480 | sentinel("invalid integer literal"); 481 | } 482 | while(pos < eod) { 483 | c = *pos++; 484 | check(c >= '0' && c <= '9', "invalid integer literal"); 485 | ll = (ll * 10) + (c - '0'); 486 | } 487 | return PyLong_FromLongLong(ll * sign); 488 | } 489 | // Really big numbers are passed to python's native parser. 490 | else { 491 | // PyLong_FromString allows leading whitespace, so we have to check 492 | // that there is none present in the string. 493 | c = *data; 494 | switch(c) { 495 | case '0': 496 | case '1': 497 | case '2': 498 | case '3': 499 | case '4': 500 | case '5': 501 | case '6': 502 | case '7': 503 | case '8': 504 | case '9': 505 | break; 506 | case '+': 507 | case '-': 508 | c = *(data+1); 509 | check(c >= '0' && c <= '9', "invalid integer literal"); 510 | break; 511 | default: 512 | sentinel("invalid integer literal"); 513 | } 514 | // PyLong_FromString insists that the string end in a NULL byte. 515 | // I am *not* copying all that data. Instead we lie a little bit 516 | // about the const-ness of data, write a NULL over the format terminator 517 | // and restore the original character when we're done. 518 | c = data[len]; 519 | ((char*)data)[len] = '\0'; 520 | v = PyLong_FromString((char *)data, &dataend, 10); 521 | ((char*)data)[len] = c; 522 | check(dataend == data + len, "invalid integer literal"); 523 | return v; 524 | } 525 | sentinel("invalid code branch, check your compiler..."); 526 | 527 | error: 528 | return NULL; 529 | } 530 | 531 | 532 | static void* 533 | tns_parse_float(const tns_ops *ops, const char *data, size_t len) 534 | { 535 | double d = 0; 536 | char *dataend; 537 | 538 | // Technically this allows whitespace around the float, which 539 | // isn't valid in a tnetstring. But I don't want to waste the 540 | // time checking and I am *not* reimplementing strtod. 541 | d = strtod(data, &dataend); 542 | if(dataend != data + len) { 543 | return NULL; 544 | } 545 | return PyFloat_FromDouble(d); 546 | } 547 | 548 | 549 | static void* 550 | tns_get_null(const tns_ops *ops) 551 | { 552 | Py_INCREF(Py_None); 553 | return Py_None; 554 | } 555 | 556 | 557 | static void* 558 | tns_get_true(const tns_ops *ops) 559 | { 560 | Py_INCREF(Py_True); 561 | return Py_True; 562 | } 563 | 564 | 565 | static void* 566 | tns_get_false(const tns_ops *ops) 567 | { 568 | Py_INCREF(Py_False); 569 | return Py_False; 570 | } 571 | 572 | 573 | static void* 574 | tns_new_dict(const tns_ops *ops) 575 | { 576 | return PyDict_New(); 577 | } 578 | 579 | 580 | static void* 581 | tns_new_list(const tns_ops *ops) 582 | { 583 | return PyList_New(0); 584 | } 585 | 586 | 587 | static void 588 | tns_free_value(const tns_ops *ops, void *value) 589 | { 590 | Py_XDECREF(value); 591 | } 592 | 593 | 594 | static int 595 | tns_add_to_dict(const tns_ops *ops, void *dict, void *key, void *item) 596 | { 597 | int res; 598 | res = PyDict_SetItem(dict, key, item); 599 | Py_DECREF(key); 600 | Py_DECREF(item); 601 | if(res == -1) { 602 | return -1; 603 | } 604 | return 0; 605 | } 606 | 607 | 608 | static int 609 | tns_add_to_list(const tns_ops *ops, void *list, void *item) 610 | { 611 | int res; 612 | res = PyList_Append(list, item); 613 | Py_DECREF(item); 614 | if(res == -1) { 615 | return -1; 616 | } 617 | return 0; 618 | } 619 | 620 | 621 | static int 622 | tns_render_string(const tns_ops *ops, void *val, tns_outbuf *outbuf) 623 | { 624 | return tns_outbuf_puts(outbuf, PyString_AS_STRING(val), 625 | PyString_GET_SIZE(val)); 626 | } 627 | 628 | 629 | static int 630 | tns_render_unicode(const tns_ops *ops, void *val, tns_outbuf *outbuf) 631 | { 632 | PyObject *bytes; 633 | char* encoding = ((tns_ops_with_encoding*)ops)->encoding; 634 | 635 | if(PyUnicode_Check(val)) { 636 | bytes = PyUnicode_Encode(PyUnicode_AS_UNICODE(val), 637 | PyUnicode_GET_SIZE(val), 638 | encoding, NULL); 639 | if(bytes == NULL) { 640 | return -1; 641 | } 642 | if(tns_render_string(ops, bytes, outbuf) == -1) { 643 | return -1; 644 | } 645 | Py_DECREF(bytes); 646 | return 0; 647 | } 648 | 649 | if(PyString_Check(val)) { 650 | return tns_render_string(ops, val, outbuf); 651 | } 652 | 653 | return -1; 654 | } 655 | 656 | 657 | static int 658 | tns_render_integer(const tns_ops *ops, void *val, tns_outbuf *outbuf) 659 | { 660 | PyObject *string = NULL; 661 | int res = 0; 662 | 663 | string = PyObject_Str(val); 664 | if(string == NULL) { 665 | return -1; 666 | } 667 | 668 | res = tns_render_string(ops, string, outbuf); 669 | Py_DECREF(string); 670 | return res; 671 | } 672 | 673 | 674 | static int 675 | tns_render_float(const tns_ops *ops, void *val, tns_outbuf *outbuf) 676 | { 677 | PyObject *string; 678 | int res = 0; 679 | 680 | string = PyObject_Repr(val); 681 | if(string == NULL) { 682 | return -1; 683 | } 684 | 685 | res = tns_render_string(ops, string, outbuf); 686 | Py_DECREF(string); 687 | return res; 688 | } 689 | 690 | 691 | static int 692 | tns_render_bool(const tns_ops *ops, void *val, tns_outbuf *outbuf) 693 | { 694 | if(val == Py_True) { 695 | return tns_outbuf_puts(outbuf, "true", 4); 696 | } else { 697 | return tns_outbuf_puts(outbuf, "false", 5); 698 | } 699 | } 700 | 701 | 702 | static int 703 | tns_render_dict(const tns_ops *ops, void *val, tns_outbuf *outbuf) 704 | { 705 | PyObject *key, *item; 706 | Py_ssize_t pos = 0; 707 | 708 | while(PyDict_Next(val, &pos, &key, &item)) { 709 | if(tns_render_value(ops, item, outbuf) == -1) { 710 | return -1; 711 | } 712 | if(tns_render_value(ops, key, outbuf) == -1) { 713 | return -1; 714 | } 715 | } 716 | return 0; 717 | } 718 | 719 | 720 | static int 721 | tns_render_list(const tns_ops *ops, void *val, tns_outbuf *outbuf) 722 | { 723 | PyObject *item; 724 | Py_ssize_t idx; 725 | 726 | // Remember, all output is in reverse. 727 | // So we must write the last element first. 728 | idx = PyList_GET_SIZE(val) - 1; 729 | while(idx >= 0) { 730 | item = PyList_GET_ITEM(val, idx); 731 | if(tns_render_value(ops, item, outbuf) == -1) { 732 | return -1; 733 | } 734 | idx--; 735 | } 736 | return 0; 737 | } 738 | 739 | 740 | static 741 | tns_type_tag tns_get_type(const tns_ops *ops, void *val) 742 | { 743 | if(val == Py_True || val == Py_False) { 744 | return tns_tag_bool; 745 | } 746 | if(val == Py_None) { 747 | return tns_tag_null; 748 | } 749 | if(PyInt_Check((PyObject*)val) || PyLong_Check((PyObject*)val)) { 750 | return tns_tag_integer; 751 | } 752 | if(PyFloat_Check((PyObject*)val)) { 753 | return tns_tag_float; 754 | } 755 | if(PyString_Check((PyObject*)val)) { 756 | return tns_tag_string; 757 | } 758 | if(PyList_Check((PyObject*)val)) { 759 | return tns_tag_list; 760 | } 761 | if(PyDict_Check((PyObject*)val)) { 762 | return tns_tag_dict; 763 | } 764 | return 0; 765 | } 766 | 767 | 768 | static 769 | tns_type_tag tns_get_type_unicode(const tns_ops *ops, void *val) 770 | { 771 | tns_type_tag type = 0; 772 | 773 | type = tns_get_type(ops, val); 774 | if(type == 0) { 775 | if(PyUnicode_Check(val)) { 776 | type = tns_tag_string; 777 | } 778 | } 779 | 780 | return type; 781 | } 782 | 783 | 784 | static tns_ops *_tnetstring_get_unicode_ops(PyObject *encoding) 785 | { 786 | tns_ops_with_encoding *opswe = NULL; 787 | tns_ops *ops = NULL; 788 | 789 | opswe = malloc(sizeof(tns_ops_with_encoding)); 790 | if(opswe == NULL) { 791 | PyErr_SetString(PyExc_MemoryError, "could not allocate ops struct"); 792 | return NULL; 793 | } 794 | ops = (tns_ops*)opswe; 795 | 796 | opswe->encoding = PyString_AS_STRING(encoding); 797 | 798 | ops->get_type = &tns_get_type_unicode; 799 | ops->free_value = &tns_free_value; 800 | 801 | ops->parse_string = tns_parse_unicode; 802 | ops->parse_integer = tns_parse_integer; 803 | ops->parse_float = tns_parse_float; 804 | ops->get_null = tns_get_null; 805 | ops->get_true = tns_get_true; 806 | ops->get_false = tns_get_false; 807 | 808 | ops->render_string = tns_render_unicode; 809 | ops->render_integer = tns_render_integer; 810 | ops->render_float = tns_render_float; 811 | ops->render_bool = tns_render_bool; 812 | 813 | ops->new_dict = tns_new_dict; 814 | ops->add_to_dict = tns_add_to_dict; 815 | ops->render_dict = tns_render_dict; 816 | 817 | ops->new_list = tns_new_list; 818 | ops->add_to_list = tns_add_to_list; 819 | ops->render_list = tns_render_list; 820 | 821 | return ops; 822 | } 823 | 824 | 825 | PyDoc_STRVAR(module_doc, 826 | "Fast encoding/decoding of typed-netstrings." 827 | ); 828 | 829 | 830 | PyMODINIT_FUNC 831 | init_tnetstring(void) 832 | { 833 | Py_InitModule3("_tnetstring", _tnetstring_methods, module_doc); 834 | 835 | // Initialize function pointers for parsing bytes. 836 | _tnetstring_ops_bytes.get_type = &tns_get_type; 837 | _tnetstring_ops_bytes.free_value = &tns_free_value; 838 | 839 | _tnetstring_ops_bytes.parse_string = tns_parse_string; 840 | _tnetstring_ops_bytes.parse_integer = tns_parse_integer; 841 | _tnetstring_ops_bytes.parse_float = tns_parse_float; 842 | _tnetstring_ops_bytes.get_null = tns_get_null; 843 | _tnetstring_ops_bytes.get_true = tns_get_true; 844 | _tnetstring_ops_bytes.get_false = tns_get_false; 845 | 846 | _tnetstring_ops_bytes.render_string = tns_render_string; 847 | _tnetstring_ops_bytes.render_integer = tns_render_integer; 848 | _tnetstring_ops_bytes.render_float = tns_render_float; 849 | _tnetstring_ops_bytes.render_bool = tns_render_bool; 850 | 851 | _tnetstring_ops_bytes.new_dict = tns_new_dict; 852 | _tnetstring_ops_bytes.add_to_dict = tns_add_to_dict; 853 | _tnetstring_ops_bytes.render_dict = tns_render_dict; 854 | 855 | _tnetstring_ops_bytes.new_list = tns_new_list; 856 | _tnetstring_ops_bytes.add_to_list = tns_add_to_list; 857 | _tnetstring_ops_bytes.render_list = tns_render_list; 858 | } 859 | 860 | -------------------------------------------------------------------------------- /tnetstring/dbg.h: -------------------------------------------------------------------------------- 1 | // 2 | // dbg.h: minimal checking and debugging functions 3 | // 4 | // This is a small compatibility shim for the Mongrel2 "dbg.h" interface, 5 | // to make it easier to port code back and forth between the tnetstring 6 | // implementation in Mongrel2 and this module. 7 | // 8 | 9 | #ifndef __dbg_h__ 10 | #define __dbg_h__ 11 | 12 | #define check(A, M, ...) if(!(A)) { if(PyErr_Occurred() == NULL) { PyErr_Format(PyExc_ValueError, M, ##__VA_ARGS__); }; goto error; } 13 | 14 | #define sentinel(M, ...) check(0, M, ##__VA_ARGS__) 15 | 16 | #define check_mem(A) if(A==NULL) { if(PyErr_Occurred() == NULL) { PyErr_SetString(PyExc_MemoryError, "Out of memory."); }; goto error; } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /tnetstring/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rfk/tnetstring/146381498a07d6053e044375562be08ef16017c2/tnetstring/tests/__init__.py -------------------------------------------------------------------------------- /tnetstring/tests/test_format.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import unittest 4 | import random 5 | import math 6 | import StringIO 7 | 8 | 9 | import tnetstring 10 | 11 | 12 | FORMAT_EXAMPLES = { 13 | '0:}': {}, 14 | '0:]': [], 15 | '51:5:hello,39:11:12345678901#4:this,4:true!0:~4:\x00\x00\x00\x00,]}': 16 | {'hello': [12345678901, 'this', True, None, '\x00\x00\x00\x00']}, 17 | '5:12345#': 12345, 18 | '12:this is cool,': "this is cool", 19 | '0:,': "", 20 | '0:~': None, 21 | '4:true!': True, 22 | '5:false!': False, 23 | '10:\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00,': "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 24 | '24:5:12345#5:67890#5:xxxxx,]': [12345, 67890, 'xxxxx'], 25 | '18:3:0.1^3:0.2^3:0.3^]': [0.1, 0.2, 0.3], 26 | '243:238:233:228:223:218:213:208:203:198:193:188:183:178:173:168:163:158:153:148:143:138:133:128:123:118:113:108:103:99:95:91:87:83:79:75:71:67:63:59:55:51:47:43:39:35:31:27:23:19:15:11:hello-there,]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]': [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["hello-there"]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] 27 | } 28 | 29 | 30 | def get_random_object(random=random,depth=0,unicode=False): 31 | """Generate a random serializable object.""" 32 | # The probability of generating a scalar value increases as the depth increase. 33 | # This ensures that we bottom out eventually. 34 | if random.randint(depth,10) <= 4: 35 | what = random.randint(0,1) 36 | if what == 0: 37 | n = random.randint(0,10) 38 | l = [] 39 | for _ in xrange(n): 40 | l.append(get_random_object(random,depth+1,unicode)) 41 | return l 42 | if what == 1: 43 | n = random.randint(0,10) 44 | d = {} 45 | for _ in xrange(n): 46 | n = random.randint(0,100) 47 | k = "".join(chr(random.randint(32,126)) for _ in xrange(n)) 48 | if unicode: 49 | k = k.decode("ascii") 50 | d[k] = get_random_object(random,depth+1,unicode) 51 | return d 52 | else: 53 | what = random.randint(0,4) 54 | if what == 0: 55 | return None 56 | if what == 1: 57 | return True 58 | if what == 2: 59 | return False 60 | if what == 3: 61 | if random.randint(0,1) == 0: 62 | return random.randint(0,sys.maxint) 63 | else: 64 | return -1 * random.randint(0,sys.maxint) 65 | n = random.randint(0,100) 66 | if unicode: 67 | return u"".join(chr(random.randint(32,126)) for _ in xrange(n)) 68 | 69 | 70 | 71 | class Test_Format(unittest.TestCase): 72 | 73 | def test_roundtrip_format_examples(self): 74 | for data, expect in FORMAT_EXAMPLES.items(): 75 | self.assertEqual(expect,tnetstring.loads(data)) 76 | self.assertEqual(expect,tnetstring.loads(tnetstring.dumps(expect))) 77 | self.assertEqual((expect,""),tnetstring.pop(data)) 78 | 79 | def test_roundtrip_format_random(self): 80 | for _ in xrange(500): 81 | v = get_random_object() 82 | self.assertEqual(v,tnetstring.loads(tnetstring.dumps(v))) 83 | self.assertEqual((v,""),tnetstring.pop(tnetstring.dumps(v))) 84 | 85 | def test_unicode_handling(self): 86 | self.assertRaises(ValueError,tnetstring.dumps,u"hello") 87 | self.assertEquals(tnetstring.dumps(u"hello","utf8"),"5:hello,") 88 | self.assertEquals(type(tnetstring.loads("5:hello,")),str) 89 | self.assertEquals(type(tnetstring.loads("5:hello,","utf8")),unicode) 90 | ALPHA = u"\N{GREEK CAPITAL LETTER ALPHA}lpha" 91 | self.assertEquals(tnetstring.dumps(ALPHA,"utf8"),"6:"+ALPHA.encode("utf8")+",") 92 | self.assertEquals(tnetstring.dumps(ALPHA,"utf16"),"12:"+ALPHA.encode("utf16")+",") 93 | self.assertEquals(tnetstring.loads("12:\xff\xfe\x91\x03l\x00p\x00h\x00a\x00,","utf16"),ALPHA) 94 | 95 | def test_roundtrip_format_unicode(self): 96 | for _ in xrange(500): 97 | v = get_random_object(unicode=True) 98 | self.assertEqual(v,tnetstring.loads(tnetstring.dumps(v,"utf8"),"utf8")) 99 | self.assertEqual((v,""),tnetstring.pop(tnetstring.dumps(v,"utf16"),"utf16")) 100 | 101 | def test_roundtrip_big_integer(self): 102 | i1 = math.factorial(30000) 103 | s = tnetstring.dumps(i1) 104 | i2 = tnetstring.loads(s) 105 | self.assertEquals(i1, i2) 106 | 107 | 108 | class Test_FileLoading(unittest.TestCase): 109 | 110 | def test_roundtrip_file_examples(self): 111 | for data, expect in FORMAT_EXAMPLES.items(): 112 | s = StringIO.StringIO() 113 | s.write(data) 114 | s.write("OK") 115 | s.seek(0) 116 | self.assertEqual(expect,tnetstring.load(s)) 117 | self.assertEqual("OK",s.read()) 118 | s = StringIO.StringIO() 119 | tnetstring.dump(expect,s) 120 | s.write("OK") 121 | s.seek(0) 122 | self.assertEqual(expect,tnetstring.load(s)) 123 | self.assertEqual("OK",s.read()) 124 | 125 | def test_roundtrip_file_random(self): 126 | for _ in xrange(500): 127 | v = get_random_object() 128 | s = StringIO.StringIO() 129 | tnetstring.dump(v,s) 130 | s.write("OK") 131 | s.seek(0) 132 | self.assertEqual(v,tnetstring.load(s)) 133 | self.assertEqual("OK",s.read()) 134 | 135 | def test_error_on_absurd_lengths(self): 136 | s = StringIO.StringIO() 137 | s.write("1000000000:pwned!,") 138 | s.seek(0) 139 | self.assertRaises(ValueError,tnetstring.load,s) 140 | self.assertEquals(s.read(1),":") 141 | -------------------------------------------------------------------------------- /tnetstring/tests/test_misc.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import os.path 4 | import difflib 5 | import unittest 6 | import doctest 7 | 8 | import tnetstring 9 | 10 | 11 | class Test_Misc(unittest.TestCase): 12 | 13 | def test_readme_matches_docstring(self): 14 | """Ensure that the README is in sync with the docstring. 15 | 16 | This test should always pass; if the README is out of sync it just 17 | updates it with the contents of tnetstring.__doc__. 18 | """ 19 | dirname = os.path.dirname 20 | readme = os.path.join(dirname(dirname(dirname(__file__))),"README.rst") 21 | if not os.path.isfile(readme): 22 | f = open(readme,"wb") 23 | f.write(tnetstring.__doc__.encode()) 24 | f.close() 25 | else: 26 | f = open(readme,"rb") 27 | if f.read() != tnetstring.__doc__: 28 | f.close() 29 | f = open(readme,"wb") 30 | f.write(tnetstring.__doc__.encode()) 31 | f.close() 32 | 33 | -------------------------------------------------------------------------------- /tnetstring/tns_core.c: -------------------------------------------------------------------------------- 1 | // 2 | // tns_core.c: core code for a tnetstring parser in C 3 | // 4 | // This is code for parsing and rendering data in the provisional 5 | // typed-netstring format proposed for inclusion in Mongrel2. You can 6 | // think of it like a JSON library that uses a simpler wire format. 7 | // 8 | 9 | #include "dbg.h" 10 | #include "tns_core.h" 11 | 12 | #ifndef TNS_MAX_LENGTH 13 | #define TNS_MAX_LENGTH 999999999 14 | #endif 15 | 16 | // Current outbuf implementation writes data starting at the back of 17 | // the allocated buffer. When finished we simply memmove it to the front. 18 | // Here *buffer points to the allocated buffer, while *head points to the 19 | // last characer written to the buffer (and thus decreases as we write). 20 | struct tns_outbuf_s { 21 | char *buffer; 22 | char *head; 23 | size_t alloc_size; 24 | }; 25 | 26 | 27 | // Helper function for parsing a dict; basically parses items in a loop. 28 | static int tns_parse_dict(const tns_ops *ops, void *dict, const char *data, size_t len); 29 | 30 | // Helper function for parsing a list; basically parses items in a loop. 31 | static int tns_parse_list(const tns_ops *ops, void *list, const char *data, size_t len); 32 | 33 | // Helper function for writing the length prefix onto a rendered value. 34 | static int tns_outbuf_clamp(tns_outbuf *outbuf, size_t orig_size); 35 | 36 | // Finalize an outbuf, turning the allocated buffer into a standard 37 | // char* array. Can't use the outbuf once it has been finalized. 38 | static char* tns_outbuf_finalize(tns_outbuf *outbuf, size_t *len); 39 | 40 | // Free the memory allocated in an outbuf. 41 | // Can't use the outbuf once it has been freed. 42 | static void tns_outbuf_free(tns_outbuf *outbuf); 43 | 44 | // Helper function to read a base-ten integer off a string. 45 | // Due to additional constraints, we can do it faster than strtoi. 46 | static size_t tns_strtosz(const char *data, size_t len, size_t *sz, char **end); 47 | 48 | 49 | void* tns_parse(const tns_ops *ops, const char *data, size_t len, char **remain) 50 | { 51 | char *valstr = NULL; 52 | tns_type_tag type = tns_tag_null; 53 | size_t vallen = 0; 54 | 55 | // Read the length of the value, and verify that it ends in a colon. 56 | check(tns_strtosz(data, len, &vallen, &valstr) != -1, 57 | "Not a tnetstring: invalid length prefix."); 58 | check(*valstr == ':', 59 | "Not a tnetstring: invalid length prefix."); 60 | valstr++; 61 | check((valstr+vallen) < (data+len), 62 | "Not a tnetstring: invalid length prefix."); 63 | 64 | // Grab the type tag from the end of the value. 65 | type = valstr[vallen]; 66 | 67 | // Output the remainder of the string if necessary. 68 | if(remain != NULL) { 69 | *remain = valstr + vallen + 1; 70 | } 71 | 72 | // Now dispatch type parsing based on the type tag. 73 | return tns_parse_payload(ops, type, valstr, vallen); 74 | 75 | error: 76 | return NULL; 77 | } 78 | 79 | 80 | // This appears to be faster than using strncmp to compare 81 | // against a small string constant. Ugly but fast. 82 | #define STR_EQ_TRUE(s) (s[0]=='t' && s[1]=='r' && s[2]=='u' && s[3]=='e') 83 | #define STR_EQ_FALSE(s) (s[0]=='f' && s[1]=='a' && s[2]=='l' \ 84 | && s[3]=='s' && s[4] == 'e') 85 | 86 | void* tns_parse_payload(const tns_ops *ops,tns_type_tag type, const char *data, size_t len) 87 | { 88 | void *val = NULL; 89 | 90 | assert(ops != NULL && "ops struct cannot be NULL"); 91 | 92 | switch(type) { 93 | // Primitive type: a string blob. 94 | case tns_tag_string: 95 | val = ops->parse_string(ops, data, len); 96 | check(val != NULL, "Not a tnetstring: invalid string literal."); 97 | break; 98 | // Primitive type: an integer. 99 | case tns_tag_integer: 100 | val = ops->parse_integer(ops, data, len); 101 | check(val != NULL, "Not a tnetstring: invalid integer literal."); 102 | break; 103 | // Primitive type: a float. 104 | case tns_tag_float: 105 | val = ops->parse_float(ops, data, len); 106 | check(val != NULL, "Not a tnetstring: invalid float literal."); 107 | break; 108 | // Primitive type: a boolean. 109 | // The only acceptable values are "true" and "false". 110 | case tns_tag_bool: 111 | if(len == 4 && STR_EQ_TRUE(data)) { 112 | val = ops->get_true(ops); 113 | } else if(len == 5 && STR_EQ_FALSE(data)) { 114 | val = ops->get_false(ops); 115 | } else { 116 | sentinel("Not a tnetstring: invalid boolean literal."); 117 | val = NULL; 118 | } 119 | break; 120 | // Primitive type: a null. 121 | // This must be a zero-length string. 122 | case tns_tag_null: 123 | check(len == 0, "Not a tnetstring: invalid null literal."); 124 | val = ops->get_null(ops); 125 | break; 126 | // Compound type: a dict. 127 | // The data is written 128 | case tns_tag_dict: 129 | val = ops->new_dict(ops); 130 | check(val != NULL, "Could not create dict."); 131 | check(tns_parse_dict(ops, val, data, len) != -1, 132 | "Not a tnetstring: broken dict items."); 133 | break; 134 | // Compound type: a list. 135 | // The data is written 136 | case tns_tag_list: 137 | val = ops->new_list(ops); 138 | check(val != NULL, "Could not create list."); 139 | check(tns_parse_list(ops, val, data, len) != -1, 140 | "Not a tnetstring: broken list items."); 141 | break; 142 | // Whoops, that ain't a tnetstring. 143 | default: 144 | sentinel("Not a tnetstring: invalid type tag."); 145 | } 146 | 147 | return val; 148 | 149 | error: 150 | if(val != NULL) { 151 | ops->free_value(ops, val); 152 | } 153 | return NULL; 154 | } 155 | 156 | #undef STR_EQ_TRUE 157 | #undef STR_EQ_FALSE 158 | 159 | 160 | char* tns_render(const tns_ops *ops, void *val, size_t *len) 161 | { 162 | tns_outbuf outbuf; 163 | 164 | check(tns_outbuf_init(&outbuf) != -1, "Failed to initialize outbuf."); 165 | check(tns_render_value(ops, val, &outbuf) != -1, "Failed to render value."); 166 | 167 | return tns_outbuf_finalize(&outbuf, len); 168 | 169 | error: 170 | tns_outbuf_free(&outbuf); 171 | return NULL; 172 | } 173 | 174 | 175 | int tns_render_value(const tns_ops *ops, void *val, tns_outbuf *outbuf) 176 | { 177 | tns_type_tag type = tns_tag_null; 178 | int res = -1; 179 | size_t orig_size = 0; 180 | 181 | assert(ops != NULL && "ops struct cannot be NULL"); 182 | 183 | // Find out the type tag for the given value. 184 | type = ops->get_type(ops, val); 185 | check(type != 0, "type not serializable."); 186 | 187 | tns_outbuf_putc(outbuf, type); 188 | orig_size = tns_outbuf_size(outbuf); 189 | 190 | // Render it into the output buffer using callbacks. 191 | switch(type) { 192 | case tns_tag_string: 193 | res = ops->render_string(ops, val, outbuf); 194 | break; 195 | case tns_tag_integer: 196 | res = ops->render_integer(ops, val, outbuf); 197 | break; 198 | case tns_tag_float: 199 | res = ops->render_float(ops, val, outbuf); 200 | break; 201 | case tns_tag_bool: 202 | res = ops->render_bool(ops, val, outbuf); 203 | break; 204 | case tns_tag_null: 205 | res = 0; 206 | break; 207 | case tns_tag_dict: 208 | res = ops->render_dict(ops, val, outbuf); 209 | break; 210 | case tns_tag_list: 211 | res = ops->render_list(ops, val, outbuf); 212 | break; 213 | default: 214 | sentinel("unknown type tag: '%c'.", type); 215 | } 216 | 217 | check(res == 0, "Failed to render value of type '%c'.", type); 218 | return tns_outbuf_clamp(outbuf, orig_size); 219 | 220 | error: 221 | return -1; 222 | } 223 | 224 | 225 | static int tns_parse_list(const tns_ops *ops, void *val, const char *data, size_t len) 226 | { 227 | void *item = NULL; 228 | char *remain = NULL; 229 | 230 | assert(val != NULL && "value cannot be NULL"); 231 | assert(data != NULL && "data cannot be NULL"); 232 | 233 | while(len > 0) { 234 | item = tns_parse(ops, data, len, &remain); 235 | check(item != NULL, "Failed to parse list."); 236 | len = len - (remain - data); 237 | data = remain; 238 | check(ops->add_to_list(ops, val, item) != -1, 239 | "Failed to add item to list."); 240 | item = NULL; 241 | } 242 | 243 | return 0; 244 | 245 | error: 246 | if(item) { 247 | ops->free_value(ops, item); 248 | } 249 | return -1; 250 | } 251 | 252 | 253 | static int tns_parse_dict(const tns_ops *ops, void *val, const char *data, size_t len) 254 | { 255 | void *key = NULL; 256 | void *item = NULL; 257 | char *remain = NULL; 258 | 259 | assert(val != NULL && "value cannot be NULL"); 260 | assert(data != NULL && "data cannot be NULL"); 261 | 262 | while(len > 0) { 263 | key = tns_parse(ops, data, len, &remain); 264 | check(key != NULL, "Failed to parse dict key from tnetstring."); 265 | len = len - (remain - data); 266 | data = remain; 267 | 268 | item = tns_parse(ops, data, len, &remain); 269 | check(item != NULL, "Failed to parse dict item from tnetstring."); 270 | len = len - (remain - data); 271 | data = remain; 272 | 273 | check(ops->add_to_dict(ops, val, key, item) != -1, 274 | "Failed to add element to dict."); 275 | 276 | key = NULL; 277 | item = NULL; 278 | } 279 | 280 | return 0; 281 | 282 | error: 283 | if(key) { 284 | ops->free_value(ops, key); 285 | } 286 | if(item) { 287 | ops->free_value(ops, item); 288 | } 289 | return -1; 290 | } 291 | 292 | 293 | 294 | static INLINE size_t 295 | tns_strtosz(const char *data, size_t len, size_t *sz, char **end) 296 | { 297 | char c; 298 | const char *pos, *eod; 299 | size_t value = 0; 300 | 301 | pos = data; 302 | eod = data + len; 303 | 304 | // The first character must be a digit. 305 | // The netstring spec explicitly forbits padding zeros. 306 | // So if it's a zero, it must be the only char in the string. 307 | c = *pos++; 308 | switch(c) { 309 | case '0': 310 | *sz = 0; 311 | *end = (char*) pos; 312 | return 0; 313 | case '1': 314 | case '2': 315 | case '3': 316 | case '4': 317 | case '5': 318 | case '6': 319 | case '7': 320 | case '8': 321 | case '9': 322 | value = c - '0'; 323 | break; 324 | default: 325 | return -1; 326 | } 327 | 328 | // Consume the remaining digits, up to maximum value length. 329 | while(pos < eod) { 330 | c = *pos; 331 | if(c < '0' || c > '9') { 332 | *sz = value; 333 | *end = (char*) pos; 334 | return 0; 335 | } 336 | value = (value * 10) + (c - '0'); 337 | check(value <= TNS_MAX_LENGTH, 338 | "Not a tnetstring: absurdly large length prefix"); 339 | pos++; 340 | } 341 | 342 | // If we consume the entire string, that's an error. 343 | 344 | error: 345 | return -1; 346 | } 347 | 348 | size_t tns_outbuf_size(tns_outbuf *outbuf) 349 | { 350 | return outbuf->alloc_size - (outbuf->head - outbuf->buffer); 351 | } 352 | 353 | 354 | static INLINE int tns_outbuf_itoa(tns_outbuf *outbuf, size_t n) 355 | { 356 | do { 357 | check(tns_outbuf_putc(outbuf, n%10+'0') != -1, 358 | "Failed to write int to tnetstring buffer."); 359 | n = n / 10; 360 | } while(n > 0); 361 | 362 | return 0; 363 | 364 | error: 365 | return -1; 366 | } 367 | 368 | 369 | int tns_outbuf_init(tns_outbuf *outbuf) 370 | { 371 | outbuf->buffer = malloc(64); 372 | check_mem(outbuf->buffer); 373 | 374 | outbuf->head = outbuf->buffer + 64; 375 | outbuf->alloc_size = 64; 376 | return 0; 377 | 378 | error: 379 | outbuf->head = NULL; 380 | outbuf->alloc_size = 0; 381 | return -1; 382 | } 383 | 384 | 385 | static INLINE void tns_outbuf_free(tns_outbuf *outbuf) 386 | { 387 | if(outbuf) { 388 | free(outbuf->buffer); 389 | outbuf->buffer = NULL; 390 | outbuf->head = 0; 391 | outbuf->alloc_size = 0; 392 | } 393 | } 394 | 395 | 396 | static INLINE int tns_outbuf_extend(tns_outbuf *outbuf, size_t free_size) 397 | { 398 | char *new_buf = NULL; 399 | char *new_head = NULL; 400 | size_t new_size = outbuf->alloc_size * 2; 401 | size_t used_size; 402 | 403 | used_size = tns_outbuf_size(outbuf); 404 | 405 | while(new_size < free_size + used_size) { 406 | new_size = new_size * 2; 407 | } 408 | 409 | new_buf = malloc(new_size); 410 | check_mem(new_buf); 411 | 412 | new_head = new_buf + new_size - used_size; 413 | memmove(new_head, outbuf->head, used_size); 414 | 415 | free(outbuf->buffer); 416 | outbuf->buffer = new_buf; 417 | outbuf->head = new_head; 418 | outbuf->alloc_size = new_size; 419 | 420 | return 0; 421 | 422 | error: 423 | return -1; 424 | } 425 | 426 | 427 | int tns_outbuf_putc(tns_outbuf *outbuf, char c) 428 | { 429 | if(outbuf->buffer == outbuf->head) { 430 | check(tns_outbuf_extend(outbuf, 1) != -1, "Failed to extend buffer"); 431 | } 432 | 433 | *(--outbuf->head) = c; 434 | 435 | return 0; 436 | 437 | error: 438 | return -1; 439 | } 440 | 441 | 442 | int tns_outbuf_puts(tns_outbuf *outbuf, const char *data, size_t len) 443 | { 444 | if(outbuf->head - outbuf->buffer < len) { 445 | check(tns_outbuf_extend(outbuf, len) != -1, "Failed to extend buffer"); 446 | } 447 | 448 | outbuf->head -= len; 449 | memmove(outbuf->head, data, len); 450 | 451 | return 0; 452 | 453 | error: 454 | return -1; 455 | } 456 | 457 | 458 | static char* tns_outbuf_finalize(tns_outbuf *outbuf, size_t *len) 459 | { 460 | char *new_buf = NULL; 461 | size_t used_size; 462 | 463 | used_size = tns_outbuf_size(outbuf); 464 | 465 | memmove(outbuf->buffer, outbuf->head, used_size); 466 | 467 | if(len != NULL) { 468 | *len = used_size; 469 | } else { 470 | if(outbuf->head == outbuf->buffer) { 471 | new_buf = realloc(outbuf->buffer, outbuf->alloc_size*2); 472 | check_mem(new_buf); 473 | outbuf->buffer = new_buf; 474 | outbuf->alloc_size = outbuf->alloc_size * 2; 475 | } 476 | outbuf->buffer[used_size] = '\0'; 477 | } 478 | 479 | return outbuf->buffer; 480 | 481 | error: 482 | free(outbuf->buffer); 483 | outbuf->buffer = NULL; 484 | outbuf->alloc_size = 0; 485 | return NULL; 486 | } 487 | 488 | 489 | static INLINE int tns_outbuf_clamp(tns_outbuf *outbuf, size_t orig_size) 490 | { 491 | size_t datalen = tns_outbuf_size(outbuf) - orig_size; 492 | 493 | check(tns_outbuf_putc(outbuf, ':') != -1, "Failed to clamp outbuf"); 494 | check(tns_outbuf_itoa(outbuf, datalen) != -1, "Failed to clamp outbuf"); 495 | 496 | return 0; 497 | 498 | error: 499 | return -1; 500 | } 501 | 502 | 503 | void tns_outbuf_memmove(tns_outbuf *outbuf, char *dest) 504 | { 505 | memmove(dest, outbuf->head, tns_outbuf_size(outbuf)); 506 | } 507 | 508 | -------------------------------------------------------------------------------- /tnetstring/tns_core.h: -------------------------------------------------------------------------------- 1 | // 2 | // tns_core.h: core code for a tnetstring parser in C 3 | // 4 | // This is code for parsing and rendering data in the provisional 5 | // typed-netstring format proposed for inclusion in Mongrel2. You can 6 | // think of it like a JSON library that uses a simpler wire format. 7 | // 8 | 9 | #ifndef _tns_core_h 10 | #define _tns_core_h 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef _MSC_VER 17 | #define INLINE __forceinline /* use __forceinline (VC++ specific) */ 18 | #else 19 | #define INLINE inline /* use standard inline */ 20 | #endif 21 | 22 | 23 | // tnetstring rendering is done using an "outbuf" struct, which combines 24 | // a malloced string with its allocation information. Rendering is done 25 | // from back to front; the details are deliberately hidden here since 26 | // I'm experimenting with multiple implementations and it might change. 27 | struct tns_outbuf_s; 28 | typedef struct tns_outbuf_s tns_outbuf; 29 | 30 | // This enumeration gives the type tag for each data type in the 31 | // tnetstring encoding. 32 | typedef enum tns_type_tag_e { 33 | tns_tag_string = ',', 34 | tns_tag_integer = '#', 35 | tns_tag_float = '^', 36 | tns_tag_bool = '!', 37 | tns_tag_null = '~', 38 | tns_tag_dict = '}', 39 | tns_tag_list = ']', 40 | } tns_type_tag; 41 | 42 | 43 | // To convert between tnetstrings and the data structures of your application 44 | // you provide the following struct filled with function pointers. They 45 | // will be called by the core parser/renderer as necessary. 46 | // 47 | // Each callback is called with the containing struct as its first argument, 48 | // to allow a primitive type of closure. 49 | 50 | struct tns_ops_s; 51 | typedef struct tns_ops_s tns_ops; 52 | 53 | struct tns_ops_s { 54 | 55 | // Get the type of a data object. 56 | tns_type_tag (*get_type)(const tns_ops *ops, void *val); 57 | 58 | // Parse various types of object from a string. 59 | void* (*parse_string)(const tns_ops *ops, const char *data, size_t len); 60 | void* (*parse_integer)(const tns_ops *ops, const char *data, size_t len); 61 | void* (*parse_float)(const tns_ops * ops, const char *data, size_t len); 62 | 63 | // Constructors for constant primitive datatypes. 64 | void* (*get_null)(const tns_ops *ops); 65 | void* (*get_true)(const tns_ops *ops); 66 | void* (*get_false)(const tns_ops *ops); 67 | 68 | // Render various types of object into a tns_outbuf. 69 | int (*render_string)(const tns_ops *ops, void *val, tns_outbuf *outbuf); 70 | int (*render_integer)(const tns_ops *ops, void *val, tns_outbuf *outbuf); 71 | int (*render_float)(const tns_ops *ops, void *val, tns_outbuf *outbuf); 72 | int (*render_bool)(const tns_ops *ops, void *val, tns_outbuf *outbuf); 73 | 74 | // Functions for building and rendering list values. 75 | // Remember that rendering is done from back to front, so 76 | // you must write the last list element first. 77 | void* (*new_list)(const tns_ops *ops); 78 | int (*add_to_list)(const tns_ops *ops, void* list, void* item); 79 | int (*render_list)(const tns_ops *ops, void* list, tns_outbuf *outbuf); 80 | 81 | // Functions for building and rendering dict values 82 | // Remember that rendering is done from back to front, so 83 | // you must write each value first, follow by its key. 84 | void* (*new_dict)(const tns_ops *ops); 85 | int (*add_to_dict)(const tns_ops *ops, void* dict, void* key, void* item); 86 | int (*render_dict)(const tns_ops *ops, void* dict, tns_outbuf *outbuf); 87 | 88 | // Free values that are no longer in use 89 | void (*free_value)(const tns_ops *ops, void *value); 90 | 91 | }; 92 | 93 | 94 | // Parse an object off the front of a tnetstring. 95 | // Returns a pointer to the parsed object, or NULL if an error occurs. 96 | // The third argument is an output parameter; if non-NULL it will 97 | // receive the unparsed remainder of the string. 98 | extern void* tns_parse(const tns_ops *ops, const char *data, size_t len, char** remain); 99 | 100 | // If you need to read the length prefix yourself, e.g. because you're 101 | // reading data off a socket, you can use this function to get just 102 | // the payload parsing logic. 103 | extern void* tns_parse_payload(const tns_ops *ops, tns_type_tag type, const char *data, size_t len); 104 | 105 | // Render an object into a string. 106 | // On success this function returns a malloced string containing 107 | // the serialization of the given object. The second argument 108 | // 'len' is an output parameter that will receive the number of bytes in 109 | // the string; if NULL then the string will be null-terminated. 110 | // The caller is responsible for freeing the returned string. 111 | // On failure this function returns NULL and 'len' is unmodified. 112 | extern char* tns_render(const tns_ops *ops, void *val, size_t *len); 113 | 114 | // If you need to copy the final result off somewhere else, you 115 | // might like to build your own rendering function from the following. 116 | // It will avoid some double-copying that tns_render does internally. 117 | // Basic plan: Initialize an outbuf, pass it to tns_render_value, then 118 | // copy the bytes away using tns_outbuf_memmove. 119 | extern int tns_render_value(const tns_ops *ops, void *val, tns_outbuf *outbuf); 120 | extern int tns_outbuf_init(tns_outbuf *outbuf); 121 | extern void tns_outbuf_memmove(tns_outbuf *outbuf, char *dest); 122 | 123 | // Use these functions for rendering into an outbuf. 124 | extern size_t tns_outbuf_size(tns_outbuf *outbuf); 125 | extern int tns_outbuf_putc(tns_outbuf *outbuf, char c); 126 | extern int tns_outbuf_puts(tns_outbuf *outbuf, const char *data, size_t len); 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /tools/shootout.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import random 4 | 5 | import cjson 6 | import ujson 7 | import tnetstring 8 | import marshal 9 | 10 | from tnetstring.tests.test_format import FORMAT_EXAMPLES, get_random_object 11 | 12 | TESTS = [] 13 | def add_test(v): 14 | # These modules have a few round-tripping problems... 15 | try: 16 | assert cjson.decode(cjson.encode(v)) == v 17 | assert ujson.loads(ujson.dumps(v)) == v 18 | except Exception: 19 | pass 20 | else: 21 | TESTS.append((v,tnetstring.dumps(v),cjson.encode(v),marshal.dumps(v))) 22 | 23 | # Test it on all our format examples. 24 | for (k,v) in FORMAT_EXAMPLES.iteritems(): 25 | add_test(v) 26 | 27 | # And on some randomly-generated objects. 28 | # Use a fixed random seed for consistency. 29 | r = random.Random(7) 30 | for _ in xrange(20): 31 | v = get_random_object(r) 32 | add_test(v) 33 | 34 | 35 | TEST_DUMP_ONLY = False 36 | TEST_LOAD_ONLY = False 37 | if len(sys.argv) >1 : 38 | if sys.argv[1] == "dumps": 39 | TEST_DUMP_ONLY = True 40 | elif sys.argv[1] == "loads": 41 | TEST_LOAD_ONLY = True 42 | elif sys.argv[1] == "roundtrip": 43 | pass 44 | else: 45 | raise ValueError("unknown test type: " + sys.argv[1]) 46 | 47 | 48 | def thrash_tnetstring(): 49 | for obj, tns, json, msh in TESTS: 50 | if TEST_DUMP_ONLY: 51 | tnetstring.dumps(obj) 52 | elif TEST_LOAD_ONLY: 53 | assert tnetstring.loads(tns) == obj 54 | else: 55 | assert tnetstring.loads(tnetstring.dumps(obj)) == obj 56 | 57 | def thrash_cjson(): 58 | for obj, tns, json, msh in TESTS: 59 | if TEST_DUMP_ONLY: 60 | cjson.encode(obj) 61 | elif TEST_LOAD_ONLY: 62 | assert cjson.decode(json) == obj 63 | else: 64 | assert cjson.decode(cjson.encode(obj)) == obj 65 | 66 | def thrash_ujson(): 67 | for obj, tns, json, msh in TESTS: 68 | if TEST_DUMP_ONLY: 69 | ujson.dumps(obj) 70 | elif TEST_LOAD_ONLY: 71 | assert ujson.loads(json) == obj 72 | else: 73 | assert ujson.loads(ujson.dumps(obj)) == obj 74 | 75 | def thrash_marshal(): 76 | for obj, tns, json, msh in TESTS: 77 | if TEST_DUMP_ONLY: 78 | marshal.dumps(obj) 79 | elif TEST_LOAD_ONLY: 80 | assert marshal.loads(msh) == obj 81 | else: 82 | assert marshal.loads(marshal.dumps(obj)) == obj 83 | 84 | 85 | if __name__ == "__main__": 86 | import timeit 87 | t1 = timeit.Timer("thrash_tnetstring()", 88 | "from shootout import thrash_tnetstring") 89 | t1 = min(t1.repeat(number=10000)) 90 | print "tnetstring", t1 91 | 92 | t2 = timeit.Timer("thrash_cjson()", 93 | "from shootout import thrash_cjson") 94 | t2 = min(t2.repeat(number=10000)) 95 | print "cjson:", t2 96 | print "speedup: ", round((t2 - t1) / (t2) * 100,2), "%" 97 | 98 | t3 = timeit.Timer("thrash_ujson()", 99 | "from shootout import thrash_ujson") 100 | t3 = min(t3.repeat(number=10000)) 101 | print "ujson:", t3 102 | print "speedup: ", round((t3 - t1) / (t3) * 100,2), "%" 103 | 104 | t4 = timeit.Timer("thrash_marshal()", 105 | "from shootout import thrash_marshal") 106 | t4 = min(t4.repeat(number=10000)) 107 | print "marshal:", t4 108 | print "speedup: ", round((t4 - t1) / (t4) * 100,2), "%" 109 | 110 | 111 | --------------------------------------------------------------------------------