├── .gitignore
├── ChangeLog.txt
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── setup.py
├── tnetstring
├── __init__.py
├── _tnetstring.c
├── dbg.h
├── tests
│ ├── __init__.py
│ ├── test_format.py
│ └── test_misc.py
├── tns_core.c
└── tns_core.h
└── tools
└── shootout.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | *~
4 | *.swp
5 | build/
6 | MANIFEST
7 |
--------------------------------------------------------------------------------
/ChangeLog.txt:
--------------------------------------------------------------------------------
1 |
2 | v0.2.1:
3 |
4 | * Fix memory leak in tnetstring.pop(); thanks tarvip.
5 | * Fix bug in handling of large integers; thanks gdamjan.
6 |
7 |
8 | v0.2.0:
9 |
10 | * Easy loading of unicode strings. If you pass an optional "encoding"
11 | argument to load/loads/pop then it will return unicode string objects
12 | rather than byte strings.
13 | * Easy dumping of unicode strings. If you pass an optional "encoding"
14 | argument to dump/dumps then it will write unicode strings in that
15 | encoding.
16 |
17 |
18 | v0.1.0:
19 |
20 | * Initial version; you might say *everything* has changed.
21 |
22 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011 Ryan Kelly
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 |
2 | include LICENSE.txt
3 | include ChangeLog.txt
4 | include README.rst
5 | recursive-include tnetstring *.c
6 | recursive-include tnetstring *.h
7 | recursive-include tnetstring/tests *.txt
8 |
9 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 |
2 | Status: Unmaintained
3 | ====================
4 |
5 | .. image:: http://unmaintained.tech/badge.svg
6 | :target: http://unmaintained.tech/
7 | :alt: No Maintenance Intended
8 |
9 | I am `no longer actively maintaining this project `_.
10 |
11 |
12 | tnetstring: data serialization using typed netstrings
13 | ======================================================
14 |
15 |
16 | This is a data serialization library. It's a lot like JSON but it uses a
17 | new syntax called "typed netstrings" that Zed has proposed for use in the
18 | Mongrel2 webserver. It's designed to be simpler and easier to implement
19 | than JSON, with a happy consequence of also being faster in many cases.
20 |
21 | An ordinary netstring is a blob of data prefixed with its length and postfixed
22 | with a sanity-checking comma. The string "hello world" encodes like this::
23 |
24 | 11:hello world,
25 |
26 | Typed netstrings add other datatypes by replacing the comma with a type tag.
27 | Here's the integer 12345 encoded as a tnetstring::
28 |
29 | 5:12345#
30 |
31 | And here's the list [12345,True,0] which mixes integers and bools::
32 |
33 | 19:5:12345#4:true!1:0#]
34 |
35 | Simple enough? This module gives you the following functions:
36 |
37 | :dump: dump an object as a tnetstring to a file
38 | :dumps: dump an object as a tnetstring to a string
39 | :load: load a tnetstring-encoded object from a file
40 | :loads: load a tnetstring-encoded object from a string
41 | :pop: pop a tnetstring-encoded object from the front of a string
42 |
43 | Note that since parsing a tnetstring requires reading all the data into memory
44 | at once, there's no efficiency gain from using the file-based versions of these
45 | functions. They're only here so you can use load() to read precisely one
46 | item from a file or socket without consuming any extra data.
47 |
48 | The tnetstrings specification explicitly states that strings are binary blobs
49 | and forbids the use of unicode at the protocol level. As a convenience to
50 | python programmers, this library lets you specify an application-level encoding
51 | to translate python's unicode strings to and from binary blobs:
52 |
53 | >>> print repr(tnetstring.loads("2:\xce\xb1,"))
54 | '\xce\xb1'
55 | >>>
56 | >>> print repr(tnetstring.loads("2:\xce\xb1,", "utf8"))
57 | u'\u03b1'
58 |
59 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #
2 | # This is the tnetstring setuptools script.
3 | # Originally developed by Ryan Kelly, 2011.
4 | #
5 | # This script is placed in the public domain.
6 | # If there's no public domain where you come from,
7 | # you can use it under the MIT license.
8 | #
9 |
10 | import sys
11 | setup_kwds = {}
12 | if sys.version_info > (3,):
13 | from setuptools import setup, Extension
14 | setup_kwds["test_suite"] = "tnetstring.test"
15 | setup_kwds["use_2to3"] = True
16 | else:
17 | from distutils.core import setup, Extension
18 |
19 |
20 | try:
21 | next = next
22 | except NameError:
23 | def next(i):
24 | return i.next()
25 |
26 |
27 | info = {}
28 | try:
29 | src = open("tnetstring/__init__.py")
30 | lines = []
31 | ln = next(src)
32 | while "__version__" not in ln:
33 | lines.append(ln)
34 | ln = next(src)
35 | while "__version__" in ln:
36 | lines.append(ln)
37 | ln = next(src)
38 | exec("".join(lines),info)
39 | except Exception:
40 | pass
41 |
42 |
43 | NAME = "tnetstring"
44 | VERSION = info["__version__"]
45 | DESCRIPTION = "data serialization using typed netstrings"
46 | LONG_DESC = info["__doc__"]
47 | AUTHOR = "Ryan Kelly"
48 | AUTHOR_EMAIL = "ryan@rfk.id.au"
49 | URL="http://github.com/rfk/tnetstring"
50 | LICENSE = "MIT"
51 | KEYWORDS = "netstring serialize"
52 | CLASSIFIERS = [
53 | "Programming Language :: Python",
54 | "Programming Language :: Python :: 2",
55 | #"Programming Language :: Python :: 3",
56 | "Development Status :: 4 - Beta",
57 | "License :: OSI Approved :: MIT License"
58 | ]
59 |
60 | setup(name=NAME,
61 | version=VERSION,
62 | author=AUTHOR,
63 | author_email=AUTHOR_EMAIL,
64 | url=URL,
65 | description=DESCRIPTION,
66 | long_description=LONG_DESC,
67 | license=LICENSE,
68 | keywords=KEYWORDS,
69 | packages=["tnetstring","tnetstring.tests"],
70 | ext_modules = [
71 | Extension(name="_tnetstring",sources=["tnetstring/_tnetstring.c"]),
72 | ],
73 | classifiers=CLASSIFIERS,
74 | **setup_kwds
75 | )
76 |
77 |
--------------------------------------------------------------------------------
/tnetstring/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | tnetstring: data serialization using typed netstrings
3 | ======================================================
4 |
5 |
6 | This is a data serialization library. It's a lot like JSON but it uses a
7 | new syntax called "typed netstrings" that Zed has proposed for use in the
8 | Mongrel2 webserver. It's designed to be simpler and easier to implement
9 | than JSON, with a happy consequence of also being faster in many cases.
10 |
11 | An ordinary netstring is a blob of data prefixed with its length and postfixed
12 | with a sanity-checking comma. The string "hello world" encodes like this::
13 |
14 | 11:hello world,
15 |
16 | Typed netstrings add other datatypes by replacing the comma with a type tag.
17 | Here's the integer 12345 encoded as a tnetstring::
18 |
19 | 5:12345#
20 |
21 | And here's the list [12345,True,0] which mixes integers and bools::
22 |
23 | 19:5:12345#4:true!1:0#]
24 |
25 | Simple enough? This module gives you the following functions:
26 |
27 | :dump: dump an object as a tnetstring to a file
28 | :dumps: dump an object as a tnetstring to a string
29 | :load: load a tnetstring-encoded object from a file
30 | :loads: load a tnetstring-encoded object from a string
31 | :pop: pop a tnetstring-encoded object from the front of a string
32 |
33 | Note that since parsing a tnetstring requires reading all the data into memory
34 | at once, there's no efficiency gain from using the file-based versions of these
35 | functions. They're only here so you can use load() to read precisely one
36 | item from a file or socket without consuming any extra data.
37 |
38 | The tnetstrings specification explicitly states that strings are binary blobs
39 | and forbids the use of unicode at the protocol level. As a convenience to
40 | python programmers, this library lets you specify an application-level encoding
41 | to translate python's unicode strings to and from binary blobs:
42 |
43 | >>> print repr(tnetstring.loads("2:\\xce\\xb1,"))
44 | '\\xce\\xb1'
45 | >>>
46 | >>> print repr(tnetstring.loads("2:\\xce\\xb1,", "utf8"))
47 | u'\\u03b1'
48 |
49 | """
50 |
51 | __ver_major__ = 0
52 | __ver_minor__ = 2
53 | __ver_patch__ = 1
54 | __ver_sub__ = ""
55 | __version__ = "%d.%d.%d%s" % (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__)
56 |
57 |
58 | from collections import deque
59 |
60 |
61 | def dumps(value,encoding=None):
62 | """dumps(object,encoding=None) -> string
63 |
64 | This function dumps a python object as a tnetstring.
65 | """
66 | # This uses a deque to collect output fragments in reverse order,
67 | # then joins them together at the end. It's measurably faster
68 | # than creating all the intermediate strings.
69 | # If you're reading this to get a handle on the tnetstring format,
70 | # consider the _gdumps() function instead; it's a standard top-down
71 | # generator that's simpler to understand but much less efficient.
72 | q = deque()
73 | _rdumpq(q,0,value,encoding)
74 | return "".join(q)
75 |
76 |
77 | def dump(value,file,encoding=None):
78 | """dump(object,file,encoding=None)
79 |
80 | This function dumps a python object as a tnetstring and writes it to
81 | the given file.
82 | """
83 | file.write(dumps(value,encoding))
84 |
85 |
86 | def _rdumpq(q,size,value,encoding=None):
87 | """Dump value as a tnetstring, to a deque instance, last chunks first.
88 |
89 | This function generates the tnetstring representation of the given value,
90 | pushing chunks of the output onto the given deque instance. It pushes
91 | the last chunk first, then recursively generates more chunks.
92 |
93 | When passed in the current size of the string in the queue, it will return
94 | the new size of the string in the queue.
95 |
96 | Operating last-chunk-first makes it easy to calculate the size written
97 | for recursive structures without having to build their representation as
98 | a string. This is measurably faster than generating the intermediate
99 | strings, especially on deeply nested structures.
100 | """
101 | write = q.appendleft
102 | if value is None:
103 | write("0:~")
104 | return size + 3
105 | if value is True:
106 | write("4:true!")
107 | return size + 7
108 | if value is False:
109 | write("5:false!")
110 | return size + 8
111 | if isinstance(value,(int,long)):
112 | data = str(value)
113 | ldata = len(data)
114 | span = str(ldata)
115 | write("#")
116 | write(data)
117 | write(":")
118 | write(span)
119 | return size + 2 + len(span) + ldata
120 | if isinstance(value,(float,)):
121 | # Use repr() for float rather than str().
122 | # It round-trips more accurately.
123 | # Probably unnecessary in later python versions that
124 | # use David Gay's ftoa routines.
125 | data = repr(value)
126 | ldata = len(data)
127 | span = str(ldata)
128 | write("^")
129 | write(data)
130 | write(":")
131 | write(span)
132 | return size + 2 + len(span) + ldata
133 | if isinstance(value,str):
134 | lvalue = len(value)
135 | span = str(lvalue)
136 | write(",")
137 | write(value)
138 | write(":")
139 | write(span)
140 | return size + 2 + len(span) + lvalue
141 | if isinstance(value,(list,tuple,)):
142 | write("]")
143 | init_size = size = size + 1
144 | for item in reversed(value):
145 | size = _rdumpq(q,size,item,encoding)
146 | span = str(size - init_size)
147 | write(":")
148 | write(span)
149 | return size + 1 + len(span)
150 | if isinstance(value,dict):
151 | write("}")
152 | init_size = size = size + 1
153 | for (k,v) in value.iteritems():
154 | size = _rdumpq(q,size,v,encoding)
155 | size = _rdumpq(q,size,k,encoding)
156 | span = str(size - init_size)
157 | write(":")
158 | write(span)
159 | return size + 1 + len(span)
160 | if isinstance(value,unicode):
161 | if encoding is None:
162 | raise ValueError("must specify encoding to dump unicode strings")
163 | value = value.encode(encoding)
164 | lvalue = len(value)
165 | span = str(lvalue)
166 | write(",")
167 | write(value)
168 | write(":")
169 | write(span)
170 | return size + 2 + len(span) + lvalue
171 | raise ValueError("unserializable object")
172 |
173 |
174 | def _gdumps(value,encoding):
175 | """Generate fragments of value dumped as a tnetstring.
176 |
177 | This is the naive dumping algorithm, implemented as a generator so that
178 | it's easy to pass to "".join() without building a new list.
179 |
180 | This is mainly here for comparison purposes; the _rdumpq version is
181 | measurably faster as it doesn't have to build intermediate strins.
182 | """
183 | if value is None:
184 | yield "0:~"
185 | elif value is True:
186 | yield "4:true!"
187 | elif value is False:
188 | yield "5:false!"
189 | elif isinstance(value,(int,long)):
190 | data = str(value)
191 | yield str(len(data))
192 | yield ":"
193 | yield data
194 | yield "#"
195 | elif isinstance(value,(float,)):
196 | data = repr(value)
197 | yield str(len(data))
198 | yield ":"
199 | yield data
200 | yield "^"
201 | elif isinstance(value,(str,)):
202 | yield str(len(value))
203 | yield ":"
204 | yield value
205 | yield ","
206 | elif isinstance(value,(list,tuple,)):
207 | sub = []
208 | for item in value:
209 | sub.extend(_gdumps(item))
210 | sub = "".join(sub)
211 | yield str(len(sub))
212 | yield ":"
213 | yield sub
214 | yield "]"
215 | elif isinstance(value,(dict,)):
216 | sub = []
217 | for (k,v) in value.iteritems():
218 | sub.extend(_gdumps(k))
219 | sub.extend(_gdumps(v))
220 | sub = "".join(sub)
221 | yield str(len(sub))
222 | yield ":"
223 | yield sub
224 | yield "}"
225 | elif isinstance(value,(unicode,)):
226 | if encoding is None:
227 | raise ValueError("must specify encoding to dump unicode strings")
228 | value = value.encode(encoding)
229 | yield str(len(value))
230 | yield ":"
231 | yield value
232 | yield ","
233 | else:
234 | raise ValueError("unserializable object")
235 |
236 |
237 | def loads(string,encoding=None):
238 | """loads(string,encoding=None) -> object
239 |
240 | This function parses a tnetstring into a python object.
241 | """
242 | # No point duplicating effort here. In the C-extension version,
243 | # loads() is measurably faster then pop() since it can avoid
244 | # the overhead of building a second string.
245 | return pop(string,encoding)[0]
246 |
247 |
248 | def load(file,encoding=None):
249 | """load(file,encoding=None) -> object
250 |
251 | This function reads a tnetstring from a file and parses it into a
252 | python object. The file must support the read() method, and this
253 | function promises not to read more data than necessary.
254 | """
255 | # Read the length prefix one char at a time.
256 | # Note that the netstring spec explicitly forbids padding zeros.
257 | c = file.read(1)
258 | if not c.isdigit():
259 | raise ValueError("not a tnetstring: missing or invalid length prefix")
260 | datalen = ord(c) - ord("0")
261 | c = file.read(1)
262 | if datalen != 0:
263 | while c.isdigit():
264 | datalen = (10 * datalen) + (ord(c) - ord("0"))
265 | if datalen > 999999999:
266 | errmsg = "not a tnetstring: absurdly large length prefix"
267 | raise ValueError(errmsg)
268 | c = file.read(1)
269 | if c != ":":
270 | raise ValueError("not a tnetstring: missing or invalid length prefix")
271 | # Now we can read and parse the payload.
272 | # This repeats the dispatch logic of pop() so we can avoid
273 | # re-constructing the outermost tnetstring.
274 | data = file.read(datalen)
275 | if len(data) != datalen:
276 | raise ValueError("not a tnetstring: length prefix too big")
277 | type = file.read(1)
278 | if type == ",":
279 | if encoding is not None:
280 | return data.decode(encoding)
281 | return data
282 | if type == "#":
283 | try:
284 | return int(data)
285 | except ValueError:
286 | raise ValueError("not a tnetstring: invalid integer literal")
287 | if type == "^":
288 | try:
289 | return float(data)
290 | except ValueError:
291 | raise ValueError("not a tnetstring: invalid float literal")
292 | if type == "!":
293 | if data == "true":
294 | return True
295 | elif data == "false":
296 | return False
297 | else:
298 | raise ValueError("not a tnetstring: invalid boolean literal")
299 | if type == "~":
300 | if data:
301 | raise ValueError("not a tnetstring: invalid null literal")
302 | return None
303 | if type == "]":
304 | l = []
305 | while data:
306 | (item,data) = pop(data,encoding)
307 | l.append(item)
308 | return l
309 | if type == "}":
310 | d = {}
311 | while data:
312 | (key,data) = pop(data,encoding)
313 | (val,data) = pop(data,encoding)
314 | d[key] = val
315 | return d
316 | raise ValueError("unknown type tag")
317 |
318 |
319 |
320 | def pop(string,encoding=None):
321 | """pop(string,encoding=None) -> (object, remain)
322 |
323 | This function parses a tnetstring into a python object.
324 | It returns a tuple giving the parsed object and a string
325 | containing any unparsed data from the end of the string.
326 | """
327 | # Parse out data length, type and remaining string.
328 | try:
329 | (dlen,rest) = string.split(":",1)
330 | dlen = int(dlen)
331 | except ValueError:
332 | raise ValueError("not a tnetstring: missing or invalid length prefix")
333 | try:
334 | (data,type,remain) = (rest[:dlen],rest[dlen],rest[dlen+1:])
335 | except IndexError:
336 | # This fires if len(rest) < dlen, meaning we don't need
337 | # to further validate that data is the right length.
338 | raise ValueError("not a tnetstring: invalid length prefix")
339 | # Parse the data based on the type tag.
340 | if type == ",":
341 | if encoding is not None:
342 | return (data.decode(encoding),remain)
343 | return (data,remain)
344 | if type == "#":
345 | try:
346 | return (int(data),remain)
347 | except ValueError:
348 | raise ValueError("not a tnetstring: invalid integer literal")
349 | if type == "^":
350 | try:
351 | return (float(data),remain)
352 | except ValueError:
353 | raise ValueError("not a tnetstring: invalid float literal")
354 | if type == "!":
355 | if data == "true":
356 | return (True,remain)
357 | elif data == "false":
358 | return (False,remain)
359 | else:
360 | raise ValueError("not a tnetstring: invalid boolean literal")
361 | if type == "~":
362 | if data:
363 | raise ValueError("not a tnetstring: invalid null literal")
364 | return (None,remain)
365 | if type == "]":
366 | l = []
367 | while data:
368 | (item,data) = pop(data,encoding)
369 | l.append(item)
370 | return (l,remain)
371 | if type == "}":
372 | d = {}
373 | while data:
374 | (key,data) = pop(data,encoding)
375 | (val,data) = pop(data,encoding)
376 | d[key] = val
377 | return (d,remain)
378 | raise ValueError("unknown type tag")
379 |
380 |
381 |
382 | # Use the c-extension version if available
383 | try:
384 | import _tnetstring
385 | except ImportError:
386 | pass
387 | else:
388 | dumps = _tnetstring.dumps
389 | load = _tnetstring.load
390 | loads = _tnetstring.loads
391 | pop = _tnetstring.pop
392 |
393 |
--------------------------------------------------------------------------------
/tnetstring/_tnetstring.c:
--------------------------------------------------------------------------------
1 | //
2 | // _tnetstring.c: python module for fast encode/decode of typed-netstrings
3 | //
4 | // You get the following functions:
5 | //
6 | // dumps: dump a python object to a tnetstring
7 | // loads: parse tnetstring into a python object
8 | // load: parse tnetstring from a file-like object
9 | // pop: parse tnetstring into a python object,
10 | // return it along with unparsed data.
11 |
12 | #include
13 |
14 |
15 | #define TNS_MAX_LENGTH 999999999
16 | #include "tns_core.c"
17 |
18 |
19 | // We have one static tns_ops struct for parsing bytestrings.
20 | static tns_ops _tnetstring_ops_bytes;
21 |
22 | // Unicode parsing ops are created on demand.
23 | // We allocate a struct containing all the function pointers along with
24 | // the encoding string, as a primitive kind of closure.
25 | // Eventually we should cache these.
26 | struct tns_ops_with_encoding_s {
27 | tns_ops ops;
28 | char *encoding;
29 | };
30 | typedef struct tns_ops_with_encoding_s tns_ops_with_encoding;
31 |
32 | static tns_ops *_tnetstring_get_unicode_ops(PyObject *encoding);
33 |
34 |
35 | // _tnetstring_loads: parse tnetstring-format value from a string.
36 | //
37 | static PyObject*
38 | _tnetstring_loads(PyObject* self, PyObject *args)
39 | {
40 | PyObject *string = NULL;
41 | PyObject *encoding = Py_None;
42 | PyObject *val = NULL;
43 | tns_ops *ops = &_tnetstring_ops_bytes;
44 | char *data;
45 | size_t len;
46 |
47 | if(!PyArg_UnpackTuple(args, "loads", 1, 2, &string, &encoding)) {
48 | return NULL;
49 | }
50 | if(!PyString_Check(string)) {
51 | PyErr_SetString(PyExc_TypeError, "arg must be a string");
52 | return NULL;
53 | }
54 | Py_INCREF(string);
55 |
56 | if(encoding == Py_None) {
57 | data = PyString_AS_STRING(string);
58 | len = PyString_GET_SIZE(string);
59 | val = tns_parse(ops, data, len, NULL);
60 | } else {
61 | if(!PyString_Check(encoding)) {
62 | PyErr_SetString(PyExc_TypeError, "encoding must be a string");
63 | goto error;
64 | }
65 | Py_INCREF(encoding);
66 | ops = _tnetstring_get_unicode_ops(encoding);
67 | if(ops == NULL) {
68 | Py_DECREF(encoding);
69 | goto error;
70 | }
71 | data = PyString_AS_STRING(string);
72 | len = PyString_GET_SIZE(string);
73 | val = tns_parse(ops, data, len, NULL);
74 | free(ops);
75 | Py_DECREF(encoding);
76 | }
77 |
78 | Py_DECREF(string);
79 | return val;
80 |
81 | error:
82 | Py_DECREF(string);
83 | return NULL;
84 | }
85 |
86 |
87 | // _tnetstring_load: parse tnetstring-format value from a file.
88 | //
89 | // This takes care to read no more data than is required to get the
90 | // full tnetstring-encoded value. It might read arbitrarily-much
91 | // data if the file doesn't begin with a valid tnetstring.
92 | //
93 | static PyObject*
94 | _tnetstring_load(PyObject* self, PyObject *args)
95 | {
96 | PyObject *val = NULL;
97 | PyObject *file = NULL;
98 | PyObject *encoding = Py_None;
99 | PyObject *methnm = NULL;
100 | PyObject *metharg = NULL;
101 | PyObject *res = NULL;
102 | tns_ops *ops = &_tnetstring_ops_bytes;
103 | char c, *data;
104 | size_t datalen = 0;
105 |
106 | if(!PyArg_UnpackTuple(args, "load", 1, 2, &file, &encoding)) {
107 | goto error;
108 | }
109 | Py_INCREF(file);
110 |
111 | if(encoding != Py_None) {
112 | if(!PyString_Check(encoding)) {
113 | PyErr_SetString(PyExc_TypeError, "encoding must be a string");
114 | goto error;
115 | }
116 | Py_INCREF(encoding);
117 | ops = _tnetstring_get_unicode_ops(encoding);
118 | if(ops == NULL) {
119 | goto error;
120 | }
121 | }
122 |
123 | // We're going to read one char at a time
124 | if((methnm = PyString_FromString("read")) == NULL) {
125 | goto error;
126 | }
127 | if((metharg = PyInt_FromLong(1)) == NULL) {
128 | goto error;
129 | }
130 |
131 | // Read the length prefix one char at a time
132 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
133 | if(res == NULL) {
134 | goto error;
135 | }
136 | Py_INCREF(res);
137 | if(!PyString_Check(res) || !PyString_GET_SIZE(res)) {
138 | PyErr_SetString(PyExc_ValueError,
139 | "Not a tnetstring: invalid or missing length prefix");
140 | goto error;
141 | }
142 | c = PyString_AS_STRING(res)[0];
143 | Py_DECREF(res); res = NULL;
144 | // Note that the netstring spec explicitly forbids padding zeroes.
145 | // If the first char is zero, it must be the only char.
146 | if(c < '0' || c > '9') {
147 | PyErr_SetString(PyExc_ValueError,
148 | "Not a tnetstring: invalid or missing length prefix");
149 | goto error;
150 | } else if (c == '0') {
151 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
152 | if(res == NULL) {
153 | goto error;
154 | }
155 | Py_INCREF(res);
156 | if(!PyString_Check(res) || !PyString_GET_SIZE(res)) {
157 | PyErr_SetString(PyExc_ValueError,
158 | "Not a tnetstring: invalid or missing length prefix");
159 | goto error;
160 | }
161 | c = PyString_AS_STRING(res)[0];
162 | Py_DECREF(res); res = NULL;
163 | } else {
164 | do {
165 | datalen = (10 * datalen) + (c - '0');
166 | check(datalen <= TNS_MAX_LENGTH,
167 | "Not a tnetstring: absurdly large length prefix");
168 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
169 | if(res == NULL) {
170 | goto error;
171 | }
172 | Py_INCREF(res);
173 | if(!PyString_Check(res) || !PyString_GET_SIZE(res)) {
174 | PyErr_SetString(PyExc_ValueError,
175 | "Not a tnetstring: invalid or missing length prefix");
176 | goto error;
177 | }
178 | c = PyString_AS_STRING(res)[0];
179 | Py_DECREF(res); res = NULL;
180 | } while(c >= '0' && c <= '9');
181 | }
182 |
183 | // Validate end-of-length-prefix marker.
184 | if(c != ':') {
185 | PyErr_SetString(PyExc_ValueError,
186 | "Not a tnetstring: missing length prefix");
187 | goto error;
188 | }
189 |
190 | // Read the data plus terminating type tag.
191 | Py_DECREF(metharg);
192 | if((metharg = PyInt_FromSize_t(datalen + 1)) == NULL) {
193 | goto error;
194 | }
195 | res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
196 | if(res == NULL) {
197 | goto error;
198 | }
199 | Py_INCREF(res);
200 | Py_DECREF(file); file = NULL;
201 | Py_DECREF(methnm); methnm = NULL;
202 | Py_DECREF(metharg); metharg = NULL;
203 | if(!PyString_Check(res) || PyString_GET_SIZE(res) != datalen + 1) {
204 | PyErr_SetString(PyExc_ValueError,
205 | "Not a tnetstring: invalid length prefix");
206 | goto error;
207 | }
208 |
209 | // Parse out the payload object
210 | data = PyString_AS_STRING(res);
211 | val = tns_parse_payload(ops, data[datalen], data, datalen);
212 | Py_DECREF(res); res = NULL;
213 |
214 | if(ops != &_tnetstring_ops_bytes) {
215 | free(ops);
216 | Py_DECREF(encoding);
217 | }
218 |
219 | return val;
220 |
221 | error:
222 | if(file != NULL) {
223 | Py_DECREF(file);
224 | }
225 | if(ops != &_tnetstring_ops_bytes) {
226 | free(ops);
227 | Py_DECREF(encoding);
228 | }
229 | if(methnm != NULL) {
230 | Py_DECREF(methnm);
231 | }
232 | if(metharg != NULL) {
233 | Py_DECREF(metharg);
234 | }
235 | if(res != NULL) {
236 | Py_DECREF(res);
237 | }
238 | if(val != NULL) {
239 | Py_DECREF(val);
240 | }
241 | return NULL;
242 | }
243 |
244 |
245 | static PyObject*
246 | _tnetstring_pop(PyObject* self, PyObject *args)
247 | {
248 | PyObject *string = NULL;
249 | PyObject *val = NULL;
250 | PyObject *rest = NULL;
251 | PyObject *result = NULL;
252 | PyObject *encoding = Py_None;
253 | tns_ops *ops = &_tnetstring_ops_bytes;
254 | char *data, *remain;
255 | size_t len;
256 |
257 | if(!PyArg_UnpackTuple(args, "pop", 1, 2, &string, &encoding)) {
258 | return NULL;
259 | }
260 | if(!PyString_Check(string)) {
261 | PyErr_SetString(PyExc_TypeError, "arg must be a string");
262 | return NULL;
263 | }
264 | if(encoding != Py_None) {
265 | if(!PyString_Check(encoding)) {
266 | PyErr_SetString(PyExc_TypeError, "encoding must be a string");
267 | return NULL;
268 | }
269 | Py_INCREF(encoding);
270 | ops = _tnetstring_get_unicode_ops(encoding);
271 | if(ops == NULL) {
272 | Py_DECREF(encoding);
273 | return NULL;
274 | }
275 | }
276 | Py_INCREF(string);
277 |
278 | data = PyString_AS_STRING(string);
279 | len = PyString_GET_SIZE(string);
280 | val = tns_parse(ops, data, len, &remain);
281 | Py_DECREF(string);
282 | if(ops != &_tnetstring_ops_bytes) {
283 | free(ops);
284 | Py_DECREF(encoding);
285 | }
286 | if(val == NULL) {
287 | return NULL;
288 | }
289 |
290 | rest = PyString_FromStringAndSize(remain, len-(remain-data));
291 | if(rest == NULL) {
292 | result = NULL;
293 | } else {
294 | result = PyTuple_Pack(2, val, rest);
295 | Py_DECREF(rest);
296 | }
297 | Py_DECREF(val);
298 | return result;
299 | }
300 |
301 |
302 | static PyObject*
303 | _tnetstring_dumps(PyObject* self, PyObject *args)
304 | {
305 | PyObject *object = NULL;
306 | PyObject *string = NULL;
307 | PyObject *encoding = Py_None;
308 | tns_ops *ops = &_tnetstring_ops_bytes;
309 | tns_outbuf outbuf;
310 |
311 | if(!PyArg_UnpackTuple(args, "dumps", 1, 2, &object, &encoding)) {
312 | return NULL;
313 | }
314 | if(encoding != Py_None) {
315 | if(!PyString_Check(encoding)) {
316 | PyErr_SetString(PyExc_TypeError, "encoding must be a string");
317 | return NULL;
318 | }
319 | Py_INCREF(encoding);
320 | ops = _tnetstring_get_unicode_ops(encoding);
321 | if(ops == NULL) {
322 | Py_DECREF(encoding);
323 | return NULL;
324 | }
325 | }
326 | Py_INCREF(object);
327 |
328 | if(tns_outbuf_init(&outbuf) == -1) {
329 | goto error;
330 | }
331 | if(tns_render_value(ops, object, &outbuf) == -1) {
332 | goto error;
333 | }
334 |
335 | Py_DECREF(object);
336 | string = PyString_FromStringAndSize(NULL,tns_outbuf_size(&outbuf));
337 | if(string == NULL) {
338 | goto error;
339 | }
340 |
341 | tns_outbuf_memmove(&outbuf, PyString_AS_STRING(string));
342 | free(outbuf.buffer);
343 |
344 | if(ops != &_tnetstring_ops_bytes) {
345 | free(ops);
346 | Py_DECREF(encoding);
347 | }
348 |
349 | return string;
350 |
351 | error:
352 | if(ops != &_tnetstring_ops_bytes) {
353 | free(ops);
354 | Py_DECREF(encoding);
355 | }
356 | Py_DECREF(object);
357 | return NULL;
358 | }
359 |
360 |
361 | static PyMethodDef _tnetstring_methods[] = {
362 | {"load",
363 | (PyCFunction)_tnetstring_load,
364 | METH_VARARGS,
365 | PyDoc_STR("load(file,encoding=None) -> object\n"
366 | "This function reads a tnetstring from a file and parses it\n"
367 | "into a python object.")},
368 |
369 | {"loads",
370 | (PyCFunction)_tnetstring_loads,
371 | METH_VARARGS,
372 | PyDoc_STR("loads(string,encoding=None) -> object\n"
373 | "This function parses a tnetstring into a python object.")},
374 |
375 | {"pop",
376 | (PyCFunction)_tnetstring_pop,
377 | METH_VARARGS,
378 | PyDoc_STR("pop(string,encoding=None) -> (object, remain)\n"
379 | "This function parses a tnetstring into a python object.\n"
380 | "It returns a tuple giving the parsed object and a string\n"
381 | "containing any unparsed data.")},
382 |
383 | {"dumps",
384 | (PyCFunction)_tnetstring_dumps,
385 | METH_VARARGS,
386 | PyDoc_STR("dumps(object,encoding=None) -> string\n"
387 | "This function dumps a python object as a tnetstring.")},
388 |
389 | {NULL, NULL}
390 | };
391 |
392 |
393 | // Functions to hook the parser core up to python.
394 |
395 | static void*
396 | tns_parse_string(const tns_ops *ops, const char *data, size_t len)
397 | {
398 | return PyString_FromStringAndSize(data, len);
399 | }
400 |
401 |
402 | static void*
403 | tns_parse_unicode(const tns_ops *ops, const char *data, size_t len)
404 | {
405 | char* encoding = ((tns_ops_with_encoding*)ops)->encoding;
406 | return PyUnicode_Decode(data, len, encoding, NULL);
407 | }
408 |
409 |
410 | static void*
411 | tns_parse_integer(const tns_ops *ops, const char *data, size_t len)
412 | {
413 | long l = 0;
414 | long long ll = 0;
415 | int sign = 1;
416 | char c;
417 | char *dataend;
418 | const char *pos, *eod;
419 | PyObject *v = NULL;
420 |
421 | // Anything with less than 10 digits, we can fit into a long.
422 | // Hand-parsing, as we need tighter error-checking than strtol.
423 | if (len < 10) {
424 | pos = data;
425 | eod = data + len;
426 | c = *pos++;
427 | switch(c) {
428 | case '0':
429 | case '1':
430 | case '2':
431 | case '3':
432 | case '4':
433 | case '5':
434 | case '6':
435 | case '7':
436 | case '8':
437 | case '9':
438 | l = c - '0';
439 | break;
440 | case '+':
441 | break;
442 | case '-':
443 | sign = -1;
444 | break;
445 | default:
446 | sentinel("invalid integer literal");
447 | }
448 | while(pos < eod) {
449 | c = *pos++;
450 | check(c >= '0' && c <= '9', "invalid integer literal");
451 | l = (l * 10) + (c - '0');
452 | }
453 | return PyLong_FromLong(l * sign);
454 | }
455 | // Anything with less than 19 digits fits in a long long.
456 | // Hand-parsing, as we need tighter error-checking than strtoll.
457 | else if(len < 19) {
458 | pos = data;
459 | eod = data + len;
460 | c = *pos++;
461 | switch(c) {
462 | case '0':
463 | case '1':
464 | case '2':
465 | case '3':
466 | case '4':
467 | case '5':
468 | case '6':
469 | case '7':
470 | case '8':
471 | case '9':
472 | ll = c - '0';
473 | break;
474 | case '+':
475 | break;
476 | case '-':
477 | sign = -1;
478 | break;
479 | default:
480 | sentinel("invalid integer literal");
481 | }
482 | while(pos < eod) {
483 | c = *pos++;
484 | check(c >= '0' && c <= '9', "invalid integer literal");
485 | ll = (ll * 10) + (c - '0');
486 | }
487 | return PyLong_FromLongLong(ll * sign);
488 | }
489 | // Really big numbers are passed to python's native parser.
490 | else {
491 | // PyLong_FromString allows leading whitespace, so we have to check
492 | // that there is none present in the string.
493 | c = *data;
494 | switch(c) {
495 | case '0':
496 | case '1':
497 | case '2':
498 | case '3':
499 | case '4':
500 | case '5':
501 | case '6':
502 | case '7':
503 | case '8':
504 | case '9':
505 | break;
506 | case '+':
507 | case '-':
508 | c = *(data+1);
509 | check(c >= '0' && c <= '9', "invalid integer literal");
510 | break;
511 | default:
512 | sentinel("invalid integer literal");
513 | }
514 | // PyLong_FromString insists that the string end in a NULL byte.
515 | // I am *not* copying all that data. Instead we lie a little bit
516 | // about the const-ness of data, write a NULL over the format terminator
517 | // and restore the original character when we're done.
518 | c = data[len];
519 | ((char*)data)[len] = '\0';
520 | v = PyLong_FromString((char *)data, &dataend, 10);
521 | ((char*)data)[len] = c;
522 | check(dataend == data + len, "invalid integer literal");
523 | return v;
524 | }
525 | sentinel("invalid code branch, check your compiler...");
526 |
527 | error:
528 | return NULL;
529 | }
530 |
531 |
532 | static void*
533 | tns_parse_float(const tns_ops *ops, const char *data, size_t len)
534 | {
535 | double d = 0;
536 | char *dataend;
537 |
538 | // Technically this allows whitespace around the float, which
539 | // isn't valid in a tnetstring. But I don't want to waste the
540 | // time checking and I am *not* reimplementing strtod.
541 | d = strtod(data, &dataend);
542 | if(dataend != data + len) {
543 | return NULL;
544 | }
545 | return PyFloat_FromDouble(d);
546 | }
547 |
548 |
549 | static void*
550 | tns_get_null(const tns_ops *ops)
551 | {
552 | Py_INCREF(Py_None);
553 | return Py_None;
554 | }
555 |
556 |
557 | static void*
558 | tns_get_true(const tns_ops *ops)
559 | {
560 | Py_INCREF(Py_True);
561 | return Py_True;
562 | }
563 |
564 |
565 | static void*
566 | tns_get_false(const tns_ops *ops)
567 | {
568 | Py_INCREF(Py_False);
569 | return Py_False;
570 | }
571 |
572 |
573 | static void*
574 | tns_new_dict(const tns_ops *ops)
575 | {
576 | return PyDict_New();
577 | }
578 |
579 |
580 | static void*
581 | tns_new_list(const tns_ops *ops)
582 | {
583 | return PyList_New(0);
584 | }
585 |
586 |
587 | static void
588 | tns_free_value(const tns_ops *ops, void *value)
589 | {
590 | Py_XDECREF(value);
591 | }
592 |
593 |
594 | static int
595 | tns_add_to_dict(const tns_ops *ops, void *dict, void *key, void *item)
596 | {
597 | int res;
598 | res = PyDict_SetItem(dict, key, item);
599 | Py_DECREF(key);
600 | Py_DECREF(item);
601 | if(res == -1) {
602 | return -1;
603 | }
604 | return 0;
605 | }
606 |
607 |
608 | static int
609 | tns_add_to_list(const tns_ops *ops, void *list, void *item)
610 | {
611 | int res;
612 | res = PyList_Append(list, item);
613 | Py_DECREF(item);
614 | if(res == -1) {
615 | return -1;
616 | }
617 | return 0;
618 | }
619 |
620 |
621 | static int
622 | tns_render_string(const tns_ops *ops, void *val, tns_outbuf *outbuf)
623 | {
624 | return tns_outbuf_puts(outbuf, PyString_AS_STRING(val),
625 | PyString_GET_SIZE(val));
626 | }
627 |
628 |
629 | static int
630 | tns_render_unicode(const tns_ops *ops, void *val, tns_outbuf *outbuf)
631 | {
632 | PyObject *bytes;
633 | char* encoding = ((tns_ops_with_encoding*)ops)->encoding;
634 |
635 | if(PyUnicode_Check(val)) {
636 | bytes = PyUnicode_Encode(PyUnicode_AS_UNICODE(val),
637 | PyUnicode_GET_SIZE(val),
638 | encoding, NULL);
639 | if(bytes == NULL) {
640 | return -1;
641 | }
642 | if(tns_render_string(ops, bytes, outbuf) == -1) {
643 | return -1;
644 | }
645 | Py_DECREF(bytes);
646 | return 0;
647 | }
648 |
649 | if(PyString_Check(val)) {
650 | return tns_render_string(ops, val, outbuf);
651 | }
652 |
653 | return -1;
654 | }
655 |
656 |
657 | static int
658 | tns_render_integer(const tns_ops *ops, void *val, tns_outbuf *outbuf)
659 | {
660 | PyObject *string = NULL;
661 | int res = 0;
662 |
663 | string = PyObject_Str(val);
664 | if(string == NULL) {
665 | return -1;
666 | }
667 |
668 | res = tns_render_string(ops, string, outbuf);
669 | Py_DECREF(string);
670 | return res;
671 | }
672 |
673 |
674 | static int
675 | tns_render_float(const tns_ops *ops, void *val, tns_outbuf *outbuf)
676 | {
677 | PyObject *string;
678 | int res = 0;
679 |
680 | string = PyObject_Repr(val);
681 | if(string == NULL) {
682 | return -1;
683 | }
684 |
685 | res = tns_render_string(ops, string, outbuf);
686 | Py_DECREF(string);
687 | return res;
688 | }
689 |
690 |
691 | static int
692 | tns_render_bool(const tns_ops *ops, void *val, tns_outbuf *outbuf)
693 | {
694 | if(val == Py_True) {
695 | return tns_outbuf_puts(outbuf, "true", 4);
696 | } else {
697 | return tns_outbuf_puts(outbuf, "false", 5);
698 | }
699 | }
700 |
701 |
702 | static int
703 | tns_render_dict(const tns_ops *ops, void *val, tns_outbuf *outbuf)
704 | {
705 | PyObject *key, *item;
706 | Py_ssize_t pos = 0;
707 |
708 | while(PyDict_Next(val, &pos, &key, &item)) {
709 | if(tns_render_value(ops, item, outbuf) == -1) {
710 | return -1;
711 | }
712 | if(tns_render_value(ops, key, outbuf) == -1) {
713 | return -1;
714 | }
715 | }
716 | return 0;
717 | }
718 |
719 |
720 | static int
721 | tns_render_list(const tns_ops *ops, void *val, tns_outbuf *outbuf)
722 | {
723 | PyObject *item;
724 | Py_ssize_t idx;
725 |
726 | // Remember, all output is in reverse.
727 | // So we must write the last element first.
728 | idx = PyList_GET_SIZE(val) - 1;
729 | while(idx >= 0) {
730 | item = PyList_GET_ITEM(val, idx);
731 | if(tns_render_value(ops, item, outbuf) == -1) {
732 | return -1;
733 | }
734 | idx--;
735 | }
736 | return 0;
737 | }
738 |
739 |
740 | static
741 | tns_type_tag tns_get_type(const tns_ops *ops, void *val)
742 | {
743 | if(val == Py_True || val == Py_False) {
744 | return tns_tag_bool;
745 | }
746 | if(val == Py_None) {
747 | return tns_tag_null;
748 | }
749 | if(PyInt_Check((PyObject*)val) || PyLong_Check((PyObject*)val)) {
750 | return tns_tag_integer;
751 | }
752 | if(PyFloat_Check((PyObject*)val)) {
753 | return tns_tag_float;
754 | }
755 | if(PyString_Check((PyObject*)val)) {
756 | return tns_tag_string;
757 | }
758 | if(PyList_Check((PyObject*)val)) {
759 | return tns_tag_list;
760 | }
761 | if(PyDict_Check((PyObject*)val)) {
762 | return tns_tag_dict;
763 | }
764 | return 0;
765 | }
766 |
767 |
768 | static
769 | tns_type_tag tns_get_type_unicode(const tns_ops *ops, void *val)
770 | {
771 | tns_type_tag type = 0;
772 |
773 | type = tns_get_type(ops, val);
774 | if(type == 0) {
775 | if(PyUnicode_Check(val)) {
776 | type = tns_tag_string;
777 | }
778 | }
779 |
780 | return type;
781 | }
782 |
783 |
784 | static tns_ops *_tnetstring_get_unicode_ops(PyObject *encoding)
785 | {
786 | tns_ops_with_encoding *opswe = NULL;
787 | tns_ops *ops = NULL;
788 |
789 | opswe = malloc(sizeof(tns_ops_with_encoding));
790 | if(opswe == NULL) {
791 | PyErr_SetString(PyExc_MemoryError, "could not allocate ops struct");
792 | return NULL;
793 | }
794 | ops = (tns_ops*)opswe;
795 |
796 | opswe->encoding = PyString_AS_STRING(encoding);
797 |
798 | ops->get_type = &tns_get_type_unicode;
799 | ops->free_value = &tns_free_value;
800 |
801 | ops->parse_string = tns_parse_unicode;
802 | ops->parse_integer = tns_parse_integer;
803 | ops->parse_float = tns_parse_float;
804 | ops->get_null = tns_get_null;
805 | ops->get_true = tns_get_true;
806 | ops->get_false = tns_get_false;
807 |
808 | ops->render_string = tns_render_unicode;
809 | ops->render_integer = tns_render_integer;
810 | ops->render_float = tns_render_float;
811 | ops->render_bool = tns_render_bool;
812 |
813 | ops->new_dict = tns_new_dict;
814 | ops->add_to_dict = tns_add_to_dict;
815 | ops->render_dict = tns_render_dict;
816 |
817 | ops->new_list = tns_new_list;
818 | ops->add_to_list = tns_add_to_list;
819 | ops->render_list = tns_render_list;
820 |
821 | return ops;
822 | }
823 |
824 |
825 | PyDoc_STRVAR(module_doc,
826 | "Fast encoding/decoding of typed-netstrings."
827 | );
828 |
829 |
830 | PyMODINIT_FUNC
831 | init_tnetstring(void)
832 | {
833 | Py_InitModule3("_tnetstring", _tnetstring_methods, module_doc);
834 |
835 | // Initialize function pointers for parsing bytes.
836 | _tnetstring_ops_bytes.get_type = &tns_get_type;
837 | _tnetstring_ops_bytes.free_value = &tns_free_value;
838 |
839 | _tnetstring_ops_bytes.parse_string = tns_parse_string;
840 | _tnetstring_ops_bytes.parse_integer = tns_parse_integer;
841 | _tnetstring_ops_bytes.parse_float = tns_parse_float;
842 | _tnetstring_ops_bytes.get_null = tns_get_null;
843 | _tnetstring_ops_bytes.get_true = tns_get_true;
844 | _tnetstring_ops_bytes.get_false = tns_get_false;
845 |
846 | _tnetstring_ops_bytes.render_string = tns_render_string;
847 | _tnetstring_ops_bytes.render_integer = tns_render_integer;
848 | _tnetstring_ops_bytes.render_float = tns_render_float;
849 | _tnetstring_ops_bytes.render_bool = tns_render_bool;
850 |
851 | _tnetstring_ops_bytes.new_dict = tns_new_dict;
852 | _tnetstring_ops_bytes.add_to_dict = tns_add_to_dict;
853 | _tnetstring_ops_bytes.render_dict = tns_render_dict;
854 |
855 | _tnetstring_ops_bytes.new_list = tns_new_list;
856 | _tnetstring_ops_bytes.add_to_list = tns_add_to_list;
857 | _tnetstring_ops_bytes.render_list = tns_render_list;
858 | }
859 |
860 |
--------------------------------------------------------------------------------
/tnetstring/dbg.h:
--------------------------------------------------------------------------------
1 | //
2 | // dbg.h: minimal checking and debugging functions
3 | //
4 | // This is a small compatibility shim for the Mongrel2 "dbg.h" interface,
5 | // to make it easier to port code back and forth between the tnetstring
6 | // implementation in Mongrel2 and this module.
7 | //
8 |
9 | #ifndef __dbg_h__
10 | #define __dbg_h__
11 |
12 | #define check(A, M, ...) if(!(A)) { if(PyErr_Occurred() == NULL) { PyErr_Format(PyExc_ValueError, M, ##__VA_ARGS__); }; goto error; }
13 |
14 | #define sentinel(M, ...) check(0, M, ##__VA_ARGS__)
15 |
16 | #define check_mem(A) if(A==NULL) { if(PyErr_Occurred() == NULL) { PyErr_SetString(PyExc_MemoryError, "Out of memory."); }; goto error; }
17 |
18 | #endif
19 |
--------------------------------------------------------------------------------
/tnetstring/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rfk/tnetstring/146381498a07d6053e044375562be08ef16017c2/tnetstring/tests/__init__.py
--------------------------------------------------------------------------------
/tnetstring/tests/test_format.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 | import unittest
4 | import random
5 | import math
6 | import StringIO
7 |
8 |
9 | import tnetstring
10 |
11 |
12 | FORMAT_EXAMPLES = {
13 | '0:}': {},
14 | '0:]': [],
15 | '51:5:hello,39:11:12345678901#4:this,4:true!0:~4:\x00\x00\x00\x00,]}':
16 | {'hello': [12345678901, 'this', True, None, '\x00\x00\x00\x00']},
17 | '5:12345#': 12345,
18 | '12:this is cool,': "this is cool",
19 | '0:,': "",
20 | '0:~': None,
21 | '4:true!': True,
22 | '5:false!': False,
23 | '10:\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00,': "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
24 | '24:5:12345#5:67890#5:xxxxx,]': [12345, 67890, 'xxxxx'],
25 | '18:3:0.1^3:0.2^3:0.3^]': [0.1, 0.2, 0.3],
26 | '243:238:233:228:223:218:213:208:203:198:193:188:183:178:173:168:163:158:153:148:143:138:133:128:123:118:113:108:103:99:95:91:87:83:79:75:71:67:63:59:55:51:47:43:39:35:31:27:23:19:15:11:hello-there,]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]': [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["hello-there"]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
27 | }
28 |
29 |
30 | def get_random_object(random=random,depth=0,unicode=False):
31 | """Generate a random serializable object."""
32 | # The probability of generating a scalar value increases as the depth increase.
33 | # This ensures that we bottom out eventually.
34 | if random.randint(depth,10) <= 4:
35 | what = random.randint(0,1)
36 | if what == 0:
37 | n = random.randint(0,10)
38 | l = []
39 | for _ in xrange(n):
40 | l.append(get_random_object(random,depth+1,unicode))
41 | return l
42 | if what == 1:
43 | n = random.randint(0,10)
44 | d = {}
45 | for _ in xrange(n):
46 | n = random.randint(0,100)
47 | k = "".join(chr(random.randint(32,126)) for _ in xrange(n))
48 | if unicode:
49 | k = k.decode("ascii")
50 | d[k] = get_random_object(random,depth+1,unicode)
51 | return d
52 | else:
53 | what = random.randint(0,4)
54 | if what == 0:
55 | return None
56 | if what == 1:
57 | return True
58 | if what == 2:
59 | return False
60 | if what == 3:
61 | if random.randint(0,1) == 0:
62 | return random.randint(0,sys.maxint)
63 | else:
64 | return -1 * random.randint(0,sys.maxint)
65 | n = random.randint(0,100)
66 | if unicode:
67 | return u"".join(chr(random.randint(32,126)) for _ in xrange(n))
68 |
69 |
70 |
71 | class Test_Format(unittest.TestCase):
72 |
73 | def test_roundtrip_format_examples(self):
74 | for data, expect in FORMAT_EXAMPLES.items():
75 | self.assertEqual(expect,tnetstring.loads(data))
76 | self.assertEqual(expect,tnetstring.loads(tnetstring.dumps(expect)))
77 | self.assertEqual((expect,""),tnetstring.pop(data))
78 |
79 | def test_roundtrip_format_random(self):
80 | for _ in xrange(500):
81 | v = get_random_object()
82 | self.assertEqual(v,tnetstring.loads(tnetstring.dumps(v)))
83 | self.assertEqual((v,""),tnetstring.pop(tnetstring.dumps(v)))
84 |
85 | def test_unicode_handling(self):
86 | self.assertRaises(ValueError,tnetstring.dumps,u"hello")
87 | self.assertEquals(tnetstring.dumps(u"hello","utf8"),"5:hello,")
88 | self.assertEquals(type(tnetstring.loads("5:hello,")),str)
89 | self.assertEquals(type(tnetstring.loads("5:hello,","utf8")),unicode)
90 | ALPHA = u"\N{GREEK CAPITAL LETTER ALPHA}lpha"
91 | self.assertEquals(tnetstring.dumps(ALPHA,"utf8"),"6:"+ALPHA.encode("utf8")+",")
92 | self.assertEquals(tnetstring.dumps(ALPHA,"utf16"),"12:"+ALPHA.encode("utf16")+",")
93 | self.assertEquals(tnetstring.loads("12:\xff\xfe\x91\x03l\x00p\x00h\x00a\x00,","utf16"),ALPHA)
94 |
95 | def test_roundtrip_format_unicode(self):
96 | for _ in xrange(500):
97 | v = get_random_object(unicode=True)
98 | self.assertEqual(v,tnetstring.loads(tnetstring.dumps(v,"utf8"),"utf8"))
99 | self.assertEqual((v,""),tnetstring.pop(tnetstring.dumps(v,"utf16"),"utf16"))
100 |
101 | def test_roundtrip_big_integer(self):
102 | i1 = math.factorial(30000)
103 | s = tnetstring.dumps(i1)
104 | i2 = tnetstring.loads(s)
105 | self.assertEquals(i1, i2)
106 |
107 |
108 | class Test_FileLoading(unittest.TestCase):
109 |
110 | def test_roundtrip_file_examples(self):
111 | for data, expect in FORMAT_EXAMPLES.items():
112 | s = StringIO.StringIO()
113 | s.write(data)
114 | s.write("OK")
115 | s.seek(0)
116 | self.assertEqual(expect,tnetstring.load(s))
117 | self.assertEqual("OK",s.read())
118 | s = StringIO.StringIO()
119 | tnetstring.dump(expect,s)
120 | s.write("OK")
121 | s.seek(0)
122 | self.assertEqual(expect,tnetstring.load(s))
123 | self.assertEqual("OK",s.read())
124 |
125 | def test_roundtrip_file_random(self):
126 | for _ in xrange(500):
127 | v = get_random_object()
128 | s = StringIO.StringIO()
129 | tnetstring.dump(v,s)
130 | s.write("OK")
131 | s.seek(0)
132 | self.assertEqual(v,tnetstring.load(s))
133 | self.assertEqual("OK",s.read())
134 |
135 | def test_error_on_absurd_lengths(self):
136 | s = StringIO.StringIO()
137 | s.write("1000000000:pwned!,")
138 | s.seek(0)
139 | self.assertRaises(ValueError,tnetstring.load,s)
140 | self.assertEquals(s.read(1),":")
141 |
--------------------------------------------------------------------------------
/tnetstring/tests/test_misc.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import os.path
4 | import difflib
5 | import unittest
6 | import doctest
7 |
8 | import tnetstring
9 |
10 |
11 | class Test_Misc(unittest.TestCase):
12 |
13 | def test_readme_matches_docstring(self):
14 | """Ensure that the README is in sync with the docstring.
15 |
16 | This test should always pass; if the README is out of sync it just
17 | updates it with the contents of tnetstring.__doc__.
18 | """
19 | dirname = os.path.dirname
20 | readme = os.path.join(dirname(dirname(dirname(__file__))),"README.rst")
21 | if not os.path.isfile(readme):
22 | f = open(readme,"wb")
23 | f.write(tnetstring.__doc__.encode())
24 | f.close()
25 | else:
26 | f = open(readme,"rb")
27 | if f.read() != tnetstring.__doc__:
28 | f.close()
29 | f = open(readme,"wb")
30 | f.write(tnetstring.__doc__.encode())
31 | f.close()
32 |
33 |
--------------------------------------------------------------------------------
/tnetstring/tns_core.c:
--------------------------------------------------------------------------------
1 | //
2 | // tns_core.c: core code for a tnetstring parser in C
3 | //
4 | // This is code for parsing and rendering data in the provisional
5 | // typed-netstring format proposed for inclusion in Mongrel2. You can
6 | // think of it like a JSON library that uses a simpler wire format.
7 | //
8 |
9 | #include "dbg.h"
10 | #include "tns_core.h"
11 |
12 | #ifndef TNS_MAX_LENGTH
13 | #define TNS_MAX_LENGTH 999999999
14 | #endif
15 |
16 | // Current outbuf implementation writes data starting at the back of
17 | // the allocated buffer. When finished we simply memmove it to the front.
18 | // Here *buffer points to the allocated buffer, while *head points to the
19 | // last characer written to the buffer (and thus decreases as we write).
20 | struct tns_outbuf_s {
21 | char *buffer;
22 | char *head;
23 | size_t alloc_size;
24 | };
25 |
26 |
27 | // Helper function for parsing a dict; basically parses items in a loop.
28 | static int tns_parse_dict(const tns_ops *ops, void *dict, const char *data, size_t len);
29 |
30 | // Helper function for parsing a list; basically parses items in a loop.
31 | static int tns_parse_list(const tns_ops *ops, void *list, const char *data, size_t len);
32 |
33 | // Helper function for writing the length prefix onto a rendered value.
34 | static int tns_outbuf_clamp(tns_outbuf *outbuf, size_t orig_size);
35 |
36 | // Finalize an outbuf, turning the allocated buffer into a standard
37 | // char* array. Can't use the outbuf once it has been finalized.
38 | static char* tns_outbuf_finalize(tns_outbuf *outbuf, size_t *len);
39 |
40 | // Free the memory allocated in an outbuf.
41 | // Can't use the outbuf once it has been freed.
42 | static void tns_outbuf_free(tns_outbuf *outbuf);
43 |
44 | // Helper function to read a base-ten integer off a string.
45 | // Due to additional constraints, we can do it faster than strtoi.
46 | static size_t tns_strtosz(const char *data, size_t len, size_t *sz, char **end);
47 |
48 |
49 | void* tns_parse(const tns_ops *ops, const char *data, size_t len, char **remain)
50 | {
51 | char *valstr = NULL;
52 | tns_type_tag type = tns_tag_null;
53 | size_t vallen = 0;
54 |
55 | // Read the length of the value, and verify that it ends in a colon.
56 | check(tns_strtosz(data, len, &vallen, &valstr) != -1,
57 | "Not a tnetstring: invalid length prefix.");
58 | check(*valstr == ':',
59 | "Not a tnetstring: invalid length prefix.");
60 | valstr++;
61 | check((valstr+vallen) < (data+len),
62 | "Not a tnetstring: invalid length prefix.");
63 |
64 | // Grab the type tag from the end of the value.
65 | type = valstr[vallen];
66 |
67 | // Output the remainder of the string if necessary.
68 | if(remain != NULL) {
69 | *remain = valstr + vallen + 1;
70 | }
71 |
72 | // Now dispatch type parsing based on the type tag.
73 | return tns_parse_payload(ops, type, valstr, vallen);
74 |
75 | error:
76 | return NULL;
77 | }
78 |
79 |
80 | // This appears to be faster than using strncmp to compare
81 | // against a small string constant. Ugly but fast.
82 | #define STR_EQ_TRUE(s) (s[0]=='t' && s[1]=='r' && s[2]=='u' && s[3]=='e')
83 | #define STR_EQ_FALSE(s) (s[0]=='f' && s[1]=='a' && s[2]=='l' \
84 | && s[3]=='s' && s[4] == 'e')
85 |
86 | void* tns_parse_payload(const tns_ops *ops,tns_type_tag type, const char *data, size_t len)
87 | {
88 | void *val = NULL;
89 |
90 | assert(ops != NULL && "ops struct cannot be NULL");
91 |
92 | switch(type) {
93 | // Primitive type: a string blob.
94 | case tns_tag_string:
95 | val = ops->parse_string(ops, data, len);
96 | check(val != NULL, "Not a tnetstring: invalid string literal.");
97 | break;
98 | // Primitive type: an integer.
99 | case tns_tag_integer:
100 | val = ops->parse_integer(ops, data, len);
101 | check(val != NULL, "Not a tnetstring: invalid integer literal.");
102 | break;
103 | // Primitive type: a float.
104 | case tns_tag_float:
105 | val = ops->parse_float(ops, data, len);
106 | check(val != NULL, "Not a tnetstring: invalid float literal.");
107 | break;
108 | // Primitive type: a boolean.
109 | // The only acceptable values are "true" and "false".
110 | case tns_tag_bool:
111 | if(len == 4 && STR_EQ_TRUE(data)) {
112 | val = ops->get_true(ops);
113 | } else if(len == 5 && STR_EQ_FALSE(data)) {
114 | val = ops->get_false(ops);
115 | } else {
116 | sentinel("Not a tnetstring: invalid boolean literal.");
117 | val = NULL;
118 | }
119 | break;
120 | // Primitive type: a null.
121 | // This must be a zero-length string.
122 | case tns_tag_null:
123 | check(len == 0, "Not a tnetstring: invalid null literal.");
124 | val = ops->get_null(ops);
125 | break;
126 | // Compound type: a dict.
127 | // The data is written
128 | case tns_tag_dict:
129 | val = ops->new_dict(ops);
130 | check(val != NULL, "Could not create dict.");
131 | check(tns_parse_dict(ops, val, data, len) != -1,
132 | "Not a tnetstring: broken dict items.");
133 | break;
134 | // Compound type: a list.
135 | // The data is written -
136 | case tns_tag_list:
137 | val = ops->new_list(ops);
138 | check(val != NULL, "Could not create list.");
139 | check(tns_parse_list(ops, val, data, len) != -1,
140 | "Not a tnetstring: broken list items.");
141 | break;
142 | // Whoops, that ain't a tnetstring.
143 | default:
144 | sentinel("Not a tnetstring: invalid type tag.");
145 | }
146 |
147 | return val;
148 |
149 | error:
150 | if(val != NULL) {
151 | ops->free_value(ops, val);
152 | }
153 | return NULL;
154 | }
155 |
156 | #undef STR_EQ_TRUE
157 | #undef STR_EQ_FALSE
158 |
159 |
160 | char* tns_render(const tns_ops *ops, void *val, size_t *len)
161 | {
162 | tns_outbuf outbuf;
163 |
164 | check(tns_outbuf_init(&outbuf) != -1, "Failed to initialize outbuf.");
165 | check(tns_render_value(ops, val, &outbuf) != -1, "Failed to render value.");
166 |
167 | return tns_outbuf_finalize(&outbuf, len);
168 |
169 | error:
170 | tns_outbuf_free(&outbuf);
171 | return NULL;
172 | }
173 |
174 |
175 | int tns_render_value(const tns_ops *ops, void *val, tns_outbuf *outbuf)
176 | {
177 | tns_type_tag type = tns_tag_null;
178 | int res = -1;
179 | size_t orig_size = 0;
180 |
181 | assert(ops != NULL && "ops struct cannot be NULL");
182 |
183 | // Find out the type tag for the given value.
184 | type = ops->get_type(ops, val);
185 | check(type != 0, "type not serializable.");
186 |
187 | tns_outbuf_putc(outbuf, type);
188 | orig_size = tns_outbuf_size(outbuf);
189 |
190 | // Render it into the output buffer using callbacks.
191 | switch(type) {
192 | case tns_tag_string:
193 | res = ops->render_string(ops, val, outbuf);
194 | break;
195 | case tns_tag_integer:
196 | res = ops->render_integer(ops, val, outbuf);
197 | break;
198 | case tns_tag_float:
199 | res = ops->render_float(ops, val, outbuf);
200 | break;
201 | case tns_tag_bool:
202 | res = ops->render_bool(ops, val, outbuf);
203 | break;
204 | case tns_tag_null:
205 | res = 0;
206 | break;
207 | case tns_tag_dict:
208 | res = ops->render_dict(ops, val, outbuf);
209 | break;
210 | case tns_tag_list:
211 | res = ops->render_list(ops, val, outbuf);
212 | break;
213 | default:
214 | sentinel("unknown type tag: '%c'.", type);
215 | }
216 |
217 | check(res == 0, "Failed to render value of type '%c'.", type);
218 | return tns_outbuf_clamp(outbuf, orig_size);
219 |
220 | error:
221 | return -1;
222 | }
223 |
224 |
225 | static int tns_parse_list(const tns_ops *ops, void *val, const char *data, size_t len)
226 | {
227 | void *item = NULL;
228 | char *remain = NULL;
229 |
230 | assert(val != NULL && "value cannot be NULL");
231 | assert(data != NULL && "data cannot be NULL");
232 |
233 | while(len > 0) {
234 | item = tns_parse(ops, data, len, &remain);
235 | check(item != NULL, "Failed to parse list.");
236 | len = len - (remain - data);
237 | data = remain;
238 | check(ops->add_to_list(ops, val, item) != -1,
239 | "Failed to add item to list.");
240 | item = NULL;
241 | }
242 |
243 | return 0;
244 |
245 | error:
246 | if(item) {
247 | ops->free_value(ops, item);
248 | }
249 | return -1;
250 | }
251 |
252 |
253 | static int tns_parse_dict(const tns_ops *ops, void *val, const char *data, size_t len)
254 | {
255 | void *key = NULL;
256 | void *item = NULL;
257 | char *remain = NULL;
258 |
259 | assert(val != NULL && "value cannot be NULL");
260 | assert(data != NULL && "data cannot be NULL");
261 |
262 | while(len > 0) {
263 | key = tns_parse(ops, data, len, &remain);
264 | check(key != NULL, "Failed to parse dict key from tnetstring.");
265 | len = len - (remain - data);
266 | data = remain;
267 |
268 | item = tns_parse(ops, data, len, &remain);
269 | check(item != NULL, "Failed to parse dict item from tnetstring.");
270 | len = len - (remain - data);
271 | data = remain;
272 |
273 | check(ops->add_to_dict(ops, val, key, item) != -1,
274 | "Failed to add element to dict.");
275 |
276 | key = NULL;
277 | item = NULL;
278 | }
279 |
280 | return 0;
281 |
282 | error:
283 | if(key) {
284 | ops->free_value(ops, key);
285 | }
286 | if(item) {
287 | ops->free_value(ops, item);
288 | }
289 | return -1;
290 | }
291 |
292 |
293 |
294 | static INLINE size_t
295 | tns_strtosz(const char *data, size_t len, size_t *sz, char **end)
296 | {
297 | char c;
298 | const char *pos, *eod;
299 | size_t value = 0;
300 |
301 | pos = data;
302 | eod = data + len;
303 |
304 | // The first character must be a digit.
305 | // The netstring spec explicitly forbits padding zeros.
306 | // So if it's a zero, it must be the only char in the string.
307 | c = *pos++;
308 | switch(c) {
309 | case '0':
310 | *sz = 0;
311 | *end = (char*) pos;
312 | return 0;
313 | case '1':
314 | case '2':
315 | case '3':
316 | case '4':
317 | case '5':
318 | case '6':
319 | case '7':
320 | case '8':
321 | case '9':
322 | value = c - '0';
323 | break;
324 | default:
325 | return -1;
326 | }
327 |
328 | // Consume the remaining digits, up to maximum value length.
329 | while(pos < eod) {
330 | c = *pos;
331 | if(c < '0' || c > '9') {
332 | *sz = value;
333 | *end = (char*) pos;
334 | return 0;
335 | }
336 | value = (value * 10) + (c - '0');
337 | check(value <= TNS_MAX_LENGTH,
338 | "Not a tnetstring: absurdly large length prefix");
339 | pos++;
340 | }
341 |
342 | // If we consume the entire string, that's an error.
343 |
344 | error:
345 | return -1;
346 | }
347 |
348 | size_t tns_outbuf_size(tns_outbuf *outbuf)
349 | {
350 | return outbuf->alloc_size - (outbuf->head - outbuf->buffer);
351 | }
352 |
353 |
354 | static INLINE int tns_outbuf_itoa(tns_outbuf *outbuf, size_t n)
355 | {
356 | do {
357 | check(tns_outbuf_putc(outbuf, n%10+'0') != -1,
358 | "Failed to write int to tnetstring buffer.");
359 | n = n / 10;
360 | } while(n > 0);
361 |
362 | return 0;
363 |
364 | error:
365 | return -1;
366 | }
367 |
368 |
369 | int tns_outbuf_init(tns_outbuf *outbuf)
370 | {
371 | outbuf->buffer = malloc(64);
372 | check_mem(outbuf->buffer);
373 |
374 | outbuf->head = outbuf->buffer + 64;
375 | outbuf->alloc_size = 64;
376 | return 0;
377 |
378 | error:
379 | outbuf->head = NULL;
380 | outbuf->alloc_size = 0;
381 | return -1;
382 | }
383 |
384 |
385 | static INLINE void tns_outbuf_free(tns_outbuf *outbuf)
386 | {
387 | if(outbuf) {
388 | free(outbuf->buffer);
389 | outbuf->buffer = NULL;
390 | outbuf->head = 0;
391 | outbuf->alloc_size = 0;
392 | }
393 | }
394 |
395 |
396 | static INLINE int tns_outbuf_extend(tns_outbuf *outbuf, size_t free_size)
397 | {
398 | char *new_buf = NULL;
399 | char *new_head = NULL;
400 | size_t new_size = outbuf->alloc_size * 2;
401 | size_t used_size;
402 |
403 | used_size = tns_outbuf_size(outbuf);
404 |
405 | while(new_size < free_size + used_size) {
406 | new_size = new_size * 2;
407 | }
408 |
409 | new_buf = malloc(new_size);
410 | check_mem(new_buf);
411 |
412 | new_head = new_buf + new_size - used_size;
413 | memmove(new_head, outbuf->head, used_size);
414 |
415 | free(outbuf->buffer);
416 | outbuf->buffer = new_buf;
417 | outbuf->head = new_head;
418 | outbuf->alloc_size = new_size;
419 |
420 | return 0;
421 |
422 | error:
423 | return -1;
424 | }
425 |
426 |
427 | int tns_outbuf_putc(tns_outbuf *outbuf, char c)
428 | {
429 | if(outbuf->buffer == outbuf->head) {
430 | check(tns_outbuf_extend(outbuf, 1) != -1, "Failed to extend buffer");
431 | }
432 |
433 | *(--outbuf->head) = c;
434 |
435 | return 0;
436 |
437 | error:
438 | return -1;
439 | }
440 |
441 |
442 | int tns_outbuf_puts(tns_outbuf *outbuf, const char *data, size_t len)
443 | {
444 | if(outbuf->head - outbuf->buffer < len) {
445 | check(tns_outbuf_extend(outbuf, len) != -1, "Failed to extend buffer");
446 | }
447 |
448 | outbuf->head -= len;
449 | memmove(outbuf->head, data, len);
450 |
451 | return 0;
452 |
453 | error:
454 | return -1;
455 | }
456 |
457 |
458 | static char* tns_outbuf_finalize(tns_outbuf *outbuf, size_t *len)
459 | {
460 | char *new_buf = NULL;
461 | size_t used_size;
462 |
463 | used_size = tns_outbuf_size(outbuf);
464 |
465 | memmove(outbuf->buffer, outbuf->head, used_size);
466 |
467 | if(len != NULL) {
468 | *len = used_size;
469 | } else {
470 | if(outbuf->head == outbuf->buffer) {
471 | new_buf = realloc(outbuf->buffer, outbuf->alloc_size*2);
472 | check_mem(new_buf);
473 | outbuf->buffer = new_buf;
474 | outbuf->alloc_size = outbuf->alloc_size * 2;
475 | }
476 | outbuf->buffer[used_size] = '\0';
477 | }
478 |
479 | return outbuf->buffer;
480 |
481 | error:
482 | free(outbuf->buffer);
483 | outbuf->buffer = NULL;
484 | outbuf->alloc_size = 0;
485 | return NULL;
486 | }
487 |
488 |
489 | static INLINE int tns_outbuf_clamp(tns_outbuf *outbuf, size_t orig_size)
490 | {
491 | size_t datalen = tns_outbuf_size(outbuf) - orig_size;
492 |
493 | check(tns_outbuf_putc(outbuf, ':') != -1, "Failed to clamp outbuf");
494 | check(tns_outbuf_itoa(outbuf, datalen) != -1, "Failed to clamp outbuf");
495 |
496 | return 0;
497 |
498 | error:
499 | return -1;
500 | }
501 |
502 |
503 | void tns_outbuf_memmove(tns_outbuf *outbuf, char *dest)
504 | {
505 | memmove(dest, outbuf->head, tns_outbuf_size(outbuf));
506 | }
507 |
508 |
--------------------------------------------------------------------------------
/tnetstring/tns_core.h:
--------------------------------------------------------------------------------
1 | //
2 | // tns_core.h: core code for a tnetstring parser in C
3 | //
4 | // This is code for parsing and rendering data in the provisional
5 | // typed-netstring format proposed for inclusion in Mongrel2. You can
6 | // think of it like a JSON library that uses a simpler wire format.
7 | //
8 |
9 | #ifndef _tns_core_h
10 | #define _tns_core_h
11 |
12 | #include
13 | #include
14 | #include
15 |
16 | #ifdef _MSC_VER
17 | #define INLINE __forceinline /* use __forceinline (VC++ specific) */
18 | #else
19 | #define INLINE inline /* use standard inline */
20 | #endif
21 |
22 |
23 | // tnetstring rendering is done using an "outbuf" struct, which combines
24 | // a malloced string with its allocation information. Rendering is done
25 | // from back to front; the details are deliberately hidden here since
26 | // I'm experimenting with multiple implementations and it might change.
27 | struct tns_outbuf_s;
28 | typedef struct tns_outbuf_s tns_outbuf;
29 |
30 | // This enumeration gives the type tag for each data type in the
31 | // tnetstring encoding.
32 | typedef enum tns_type_tag_e {
33 | tns_tag_string = ',',
34 | tns_tag_integer = '#',
35 | tns_tag_float = '^',
36 | tns_tag_bool = '!',
37 | tns_tag_null = '~',
38 | tns_tag_dict = '}',
39 | tns_tag_list = ']',
40 | } tns_type_tag;
41 |
42 |
43 | // To convert between tnetstrings and the data structures of your application
44 | // you provide the following struct filled with function pointers. They
45 | // will be called by the core parser/renderer as necessary.
46 | //
47 | // Each callback is called with the containing struct as its first argument,
48 | // to allow a primitive type of closure.
49 |
50 | struct tns_ops_s;
51 | typedef struct tns_ops_s tns_ops;
52 |
53 | struct tns_ops_s {
54 |
55 | // Get the type of a data object.
56 | tns_type_tag (*get_type)(const tns_ops *ops, void *val);
57 |
58 | // Parse various types of object from a string.
59 | void* (*parse_string)(const tns_ops *ops, const char *data, size_t len);
60 | void* (*parse_integer)(const tns_ops *ops, const char *data, size_t len);
61 | void* (*parse_float)(const tns_ops * ops, const char *data, size_t len);
62 |
63 | // Constructors for constant primitive datatypes.
64 | void* (*get_null)(const tns_ops *ops);
65 | void* (*get_true)(const tns_ops *ops);
66 | void* (*get_false)(const tns_ops *ops);
67 |
68 | // Render various types of object into a tns_outbuf.
69 | int (*render_string)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
70 | int (*render_integer)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
71 | int (*render_float)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
72 | int (*render_bool)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
73 |
74 | // Functions for building and rendering list values.
75 | // Remember that rendering is done from back to front, so
76 | // you must write the last list element first.
77 | void* (*new_list)(const tns_ops *ops);
78 | int (*add_to_list)(const tns_ops *ops, void* list, void* item);
79 | int (*render_list)(const tns_ops *ops, void* list, tns_outbuf *outbuf);
80 |
81 | // Functions for building and rendering dict values
82 | // Remember that rendering is done from back to front, so
83 | // you must write each value first, follow by its key.
84 | void* (*new_dict)(const tns_ops *ops);
85 | int (*add_to_dict)(const tns_ops *ops, void* dict, void* key, void* item);
86 | int (*render_dict)(const tns_ops *ops, void* dict, tns_outbuf *outbuf);
87 |
88 | // Free values that are no longer in use
89 | void (*free_value)(const tns_ops *ops, void *value);
90 |
91 | };
92 |
93 |
94 | // Parse an object off the front of a tnetstring.
95 | // Returns a pointer to the parsed object, or NULL if an error occurs.
96 | // The third argument is an output parameter; if non-NULL it will
97 | // receive the unparsed remainder of the string.
98 | extern void* tns_parse(const tns_ops *ops, const char *data, size_t len, char** remain);
99 |
100 | // If you need to read the length prefix yourself, e.g. because you're
101 | // reading data off a socket, you can use this function to get just
102 | // the payload parsing logic.
103 | extern void* tns_parse_payload(const tns_ops *ops, tns_type_tag type, const char *data, size_t len);
104 |
105 | // Render an object into a string.
106 | // On success this function returns a malloced string containing
107 | // the serialization of the given object. The second argument
108 | // 'len' is an output parameter that will receive the number of bytes in
109 | // the string; if NULL then the string will be null-terminated.
110 | // The caller is responsible for freeing the returned string.
111 | // On failure this function returns NULL and 'len' is unmodified.
112 | extern char* tns_render(const tns_ops *ops, void *val, size_t *len);
113 |
114 | // If you need to copy the final result off somewhere else, you
115 | // might like to build your own rendering function from the following.
116 | // It will avoid some double-copying that tns_render does internally.
117 | // Basic plan: Initialize an outbuf, pass it to tns_render_value, then
118 | // copy the bytes away using tns_outbuf_memmove.
119 | extern int tns_render_value(const tns_ops *ops, void *val, tns_outbuf *outbuf);
120 | extern int tns_outbuf_init(tns_outbuf *outbuf);
121 | extern void tns_outbuf_memmove(tns_outbuf *outbuf, char *dest);
122 |
123 | // Use these functions for rendering into an outbuf.
124 | extern size_t tns_outbuf_size(tns_outbuf *outbuf);
125 | extern int tns_outbuf_putc(tns_outbuf *outbuf, char c);
126 | extern int tns_outbuf_puts(tns_outbuf *outbuf, const char *data, size_t len);
127 |
128 | #endif
129 |
--------------------------------------------------------------------------------
/tools/shootout.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 | import random
4 |
5 | import cjson
6 | import ujson
7 | import tnetstring
8 | import marshal
9 |
10 | from tnetstring.tests.test_format import FORMAT_EXAMPLES, get_random_object
11 |
12 | TESTS = []
13 | def add_test(v):
14 | # These modules have a few round-tripping problems...
15 | try:
16 | assert cjson.decode(cjson.encode(v)) == v
17 | assert ujson.loads(ujson.dumps(v)) == v
18 | except Exception:
19 | pass
20 | else:
21 | TESTS.append((v,tnetstring.dumps(v),cjson.encode(v),marshal.dumps(v)))
22 |
23 | # Test it on all our format examples.
24 | for (k,v) in FORMAT_EXAMPLES.iteritems():
25 | add_test(v)
26 |
27 | # And on some randomly-generated objects.
28 | # Use a fixed random seed for consistency.
29 | r = random.Random(7)
30 | for _ in xrange(20):
31 | v = get_random_object(r)
32 | add_test(v)
33 |
34 |
35 | TEST_DUMP_ONLY = False
36 | TEST_LOAD_ONLY = False
37 | if len(sys.argv) >1 :
38 | if sys.argv[1] == "dumps":
39 | TEST_DUMP_ONLY = True
40 | elif sys.argv[1] == "loads":
41 | TEST_LOAD_ONLY = True
42 | elif sys.argv[1] == "roundtrip":
43 | pass
44 | else:
45 | raise ValueError("unknown test type: " + sys.argv[1])
46 |
47 |
48 | def thrash_tnetstring():
49 | for obj, tns, json, msh in TESTS:
50 | if TEST_DUMP_ONLY:
51 | tnetstring.dumps(obj)
52 | elif TEST_LOAD_ONLY:
53 | assert tnetstring.loads(tns) == obj
54 | else:
55 | assert tnetstring.loads(tnetstring.dumps(obj)) == obj
56 |
57 | def thrash_cjson():
58 | for obj, tns, json, msh in TESTS:
59 | if TEST_DUMP_ONLY:
60 | cjson.encode(obj)
61 | elif TEST_LOAD_ONLY:
62 | assert cjson.decode(json) == obj
63 | else:
64 | assert cjson.decode(cjson.encode(obj)) == obj
65 |
66 | def thrash_ujson():
67 | for obj, tns, json, msh in TESTS:
68 | if TEST_DUMP_ONLY:
69 | ujson.dumps(obj)
70 | elif TEST_LOAD_ONLY:
71 | assert ujson.loads(json) == obj
72 | else:
73 | assert ujson.loads(ujson.dumps(obj)) == obj
74 |
75 | def thrash_marshal():
76 | for obj, tns, json, msh in TESTS:
77 | if TEST_DUMP_ONLY:
78 | marshal.dumps(obj)
79 | elif TEST_LOAD_ONLY:
80 | assert marshal.loads(msh) == obj
81 | else:
82 | assert marshal.loads(marshal.dumps(obj)) == obj
83 |
84 |
85 | if __name__ == "__main__":
86 | import timeit
87 | t1 = timeit.Timer("thrash_tnetstring()",
88 | "from shootout import thrash_tnetstring")
89 | t1 = min(t1.repeat(number=10000))
90 | print "tnetstring", t1
91 |
92 | t2 = timeit.Timer("thrash_cjson()",
93 | "from shootout import thrash_cjson")
94 | t2 = min(t2.repeat(number=10000))
95 | print "cjson:", t2
96 | print "speedup: ", round((t2 - t1) / (t2) * 100,2), "%"
97 |
98 | t3 = timeit.Timer("thrash_ujson()",
99 | "from shootout import thrash_ujson")
100 | t3 = min(t3.repeat(number=10000))
101 | print "ujson:", t3
102 | print "speedup: ", round((t3 - t1) / (t3) * 100,2), "%"
103 |
104 | t4 = timeit.Timer("thrash_marshal()",
105 | "from shootout import thrash_marshal")
106 | t4 = min(t4.repeat(number=10000))
107 | print "marshal:", t4
108 | print "speedup: ", round((t4 - t1) / (t4) * 100,2), "%"
109 |
110 |
111 |
--------------------------------------------------------------------------------