├── .gitignore
├── ChangeLog.txt
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── setup.py
├── tnetstring
    ├── __init__.py
    ├── _tnetstring.c
    ├── dbg.h
    ├── tests
    │   ├── __init__.py
    │   ├── test_format.py
    │   └── test_misc.py
    ├── tns_core.c
    └── tns_core.h
└── tools
    └── shootout.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | *~
4 | *.swp
5 | build/
6 | MANIFEST
7 | 


--------------------------------------------------------------------------------
/ChangeLog.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | v0.2.1:
 3 | 
 4 |     * Fix memory leak in tnetstring.pop(); thanks tarvip.
 5 |     * Fix bug in handling of large integers; thanks gdamjan.
 6 | 
 7 | 
 8 | v0.2.0:
 9 | 
10 |     * Easy loading of unicode strings.  If you pass an optional "encoding"
11 |       argument to load/loads/pop then it will return unicode string objects
12 |       rather than byte strings.
13 |     * Easy dumping of unicode strings.  If you pass an optional "encoding"
14 |       argument to dump/dumps then it will write unicode strings in that
15 |       encoding.
16 | 
17 | 
18 | v0.1.0:
19 | 
20 |     * Initial version; you might say *everything* has changed.
21 | 
22 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011 Ryan Kelly
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | 
2 | include LICENSE.txt
3 | include ChangeLog.txt
4 | include README.rst
5 | recursive-include tnetstring *.c
6 | recursive-include tnetstring *.h
7 | recursive-include tnetstring/tests *.txt
8 | 
9 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Status: Unmaintained
 3 | ====================
 4 | 
 5 | .. image:: http://unmaintained.tech/badge.svg
 6 |      :target: http://unmaintained.tech/
 7 |      :alt: No Maintenance Intended 
 8 | 
 9 | I am `no longer actively maintaining this project <https://rfk.id.au/blog/entry/archiving-open-source-projects/>`_.
10 | 
11 | 
12 | tnetstring:  data serialization using typed netstrings
13 | ======================================================
14 | 
15 | 
16 | This is a data serialization library. It's a lot like JSON but it uses a
17 | new syntax called "typed netstrings" that Zed has proposed for use in the
18 | Mongrel2 webserver.  It's designed to be simpler and easier to implement
19 | than JSON, with a happy consequence of also being faster in many cases.
20 | 
21 | An ordinary netstring is a blob of data prefixed with its length and postfixed
22 | with a sanity-checking comma.  The string "hello world" encodes like this::
23 | 
24 |     11:hello world,
25 | 
26 | Typed netstrings add other datatypes by replacing the comma with a type tag.
27 | Here's the integer 12345 encoded as a tnetstring::
28 | 
29 |     5:12345#
30 | 
31 | And here's the list [12345,True,0] which mixes integers and bools::
32 | 
33 |     19:5:12345#4:true!1:0#]
34 | 
35 | Simple enough?  This module gives you the following functions:
36 | 
37 |     :dump:    dump an object as a tnetstring to a file
38 |     :dumps:   dump an object as a tnetstring to a string
39 |     :load:    load a tnetstring-encoded object from a file
40 |     :loads:   load a tnetstring-encoded object from a string
41 |     :pop:     pop a tnetstring-encoded object from the front of a string
42 | 
43 | Note that since parsing a tnetstring requires reading all the data into memory
44 | at once, there's no efficiency gain from using the file-based versions of these
45 | functions.  They're only here so you can use load() to read precisely one
46 | item from a file or socket without consuming any extra data.
47 | 
48 | The tnetstrings specification explicitly states that strings are binary blobs
49 | and forbids the use of unicode at the protocol level.  As a convenience to
50 | python programmers, this library lets you specify an application-level encoding
51 | to translate python's unicode strings to and from binary blobs:
52 | 
53 |     >>> print repr(tnetstring.loads("2:\xce\xb1,"))
54 |     '\xce\xb1'
55 |     >>> 
56 |     >>> print repr(tnetstring.loads("2:\xce\xb1,", "utf8"))
57 |     u'\u03b1'
58 | 
59 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  This is the tnetstring setuptools script.
 3 | #  Originally developed by Ryan Kelly, 2011.
 4 | #
 5 | #  This script is placed in the public domain.
 6 | #  If there's no public domain where you come from,
 7 | #  you can use it under the MIT license.
 8 | #
 9 | 
10 | import sys
11 | setup_kwds = {}
12 | if sys.version_info > (3,):
13 |     from setuptools import setup, Extension
14 |     setup_kwds["test_suite"] = "tnetstring.test"
15 |     setup_kwds["use_2to3"] = True
16 | else:
17 |     from distutils.core import setup, Extension
18 | 
19 | 
20 | try:
21 |     next = next
22 | except NameError:
23 |     def next(i):
24 |         return i.next()
25 | 
26 | 
27 | info = {}
28 | try:
29 |     src = open("tnetstring/__init__.py")
30 |     lines = []
31 |     ln = next(src)
32 |     while "__version__" not in ln:
33 |         lines.append(ln)
34 |         ln = next(src)
35 |     while "__version__" in ln:
36 |         lines.append(ln)
37 |         ln = next(src)
38 |     exec("".join(lines),info)
39 | except Exception:
40 |     pass
41 | 
42 | 
43 | NAME = "tnetstring"
44 | VERSION = info["__version__"]
45 | DESCRIPTION = "data serialization using typed netstrings"
46 | LONG_DESC = info["__doc__"]
47 | AUTHOR = "Ryan Kelly"
48 | AUTHOR_EMAIL = "ryan@rfk.id.au"
49 | URL="http://github.com/rfk/tnetstring"
50 | LICENSE = "MIT"
51 | KEYWORDS = "netstring serialize"
52 | CLASSIFIERS = [
53 |     "Programming Language :: Python",
54 |     "Programming Language :: Python :: 2",
55 |     #"Programming Language :: Python :: 3",
56 |     "Development Status :: 4 - Beta",
57 |     "License :: OSI Approved :: MIT License"
58 | ]
59 | 
60 | setup(name=NAME,
61 |       version=VERSION,
62 |       author=AUTHOR,
63 |       author_email=AUTHOR_EMAIL,
64 |       url=URL,
65 |       description=DESCRIPTION,
66 |       long_description=LONG_DESC,
67 |       license=LICENSE,
68 |       keywords=KEYWORDS,
69 |       packages=["tnetstring","tnetstring.tests"],
70 |       ext_modules = [
71 |           Extension(name="_tnetstring",sources=["tnetstring/_tnetstring.c"]),
72 |       ],
73 |       classifiers=CLASSIFIERS,
74 |       **setup_kwds
75 |      )
76 | 
77 | 


--------------------------------------------------------------------------------
/tnetstring/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | tnetstring:  data serialization using typed netstrings
  3 | ======================================================
  4 | 
  5 | 
  6 | This is a data serialization library. It's a lot like JSON but it uses a
  7 | new syntax called "typed netstrings" that Zed has proposed for use in the
  8 | Mongrel2 webserver.  It's designed to be simpler and easier to implement
  9 | than JSON, with a happy consequence of also being faster in many cases.
 10 | 
 11 | An ordinary netstring is a blob of data prefixed with its length and postfixed
 12 | with a sanity-checking comma.  The string "hello world" encodes like this::
 13 | 
 14 |     11:hello world,
 15 | 
 16 | Typed netstrings add other datatypes by replacing the comma with a type tag.
 17 | Here's the integer 12345 encoded as a tnetstring::
 18 | 
 19 |     5:12345#
 20 | 
 21 | And here's the list [12345,True,0] which mixes integers and bools::
 22 | 
 23 |     19:5:12345#4:true!1:0#]
 24 | 
 25 | Simple enough?  This module gives you the following functions:
 26 | 
 27 |     :dump:    dump an object as a tnetstring to a file
 28 |     :dumps:   dump an object as a tnetstring to a string
 29 |     :load:    load a tnetstring-encoded object from a file
 30 |     :loads:   load a tnetstring-encoded object from a string
 31 |     :pop:     pop a tnetstring-encoded object from the front of a string
 32 | 
 33 | Note that since parsing a tnetstring requires reading all the data into memory
 34 | at once, there's no efficiency gain from using the file-based versions of these
 35 | functions.  They're only here so you can use load() to read precisely one
 36 | item from a file or socket without consuming any extra data.
 37 | 
 38 | The tnetstrings specification explicitly states that strings are binary blobs
 39 | and forbids the use of unicode at the protocol level.  As a convenience to
 40 | python programmers, this library lets you specify an application-level encoding
 41 | to translate python's unicode strings to and from binary blobs:
 42 | 
 43 |     >>> print repr(tnetstring.loads("2:\\xce\\xb1,"))
 44 |     '\\xce\\xb1'
 45 |     >>> 
 46 |     >>> print repr(tnetstring.loads("2:\\xce\\xb1,", "utf8"))
 47 |     u'\\u03b1'
 48 | 
 49 | """
 50 | 
 51 | __ver_major__ = 0
 52 | __ver_minor__ = 2
 53 | __ver_patch__ = 1
 54 | __ver_sub__ = ""
 55 | __version__ = "%d.%d.%d%s" % (__ver_major__,__ver_minor__,__ver_patch__,__ver_sub__)
 56 | 
 57 | 
 58 | from collections import deque
 59 | 
 60 | 
 61 | def dumps(value,encoding=None):
 62 |     """dumps(object,encoding=None) -> string
 63 | 
 64 |     This function dumps a python object as a tnetstring.
 65 |     """
 66 |     #  This uses a deque to collect output fragments in reverse order,
 67 |     #  then joins them together at the end.  It's measurably faster
 68 |     #  than creating all the intermediate strings.
 69 |     #  If you're reading this to get a handle on the tnetstring format,
 70 |     #  consider the _gdumps() function instead; it's a standard top-down
 71 |     #  generator that's simpler to understand but much less efficient.
 72 |     q = deque()
 73 |     _rdumpq(q,0,value,encoding)
 74 |     return "".join(q)
 75 | 
 76 | 
 77 | def dump(value,file,encoding=None):
 78 |     """dump(object,file,encoding=None)
 79 | 
 80 |     This function dumps a python object as a tnetstring and writes it to
 81 |     the given file.
 82 |     """
 83 |     file.write(dumps(value,encoding))
 84 | 
 85 | 
 86 | def _rdumpq(q,size,value,encoding=None):
 87 |     """Dump value as a tnetstring, to a deque instance, last chunks first.
 88 | 
 89 |     This function generates the tnetstring representation of the given value,
 90 |     pushing chunks of the output onto the given deque instance.  It pushes
 91 |     the last chunk first, then recursively generates more chunks.
 92 | 
 93 |     When passed in the current size of the string in the queue, it will return
 94 |     the new size of the string in the queue.
 95 | 
 96 |     Operating last-chunk-first makes it easy to calculate the size written
 97 |     for recursive structures without having to build their representation as
 98 |     a string.  This is measurably faster than generating the intermediate
 99 |     strings, especially on deeply nested structures.
100 |     """
101 |     write = q.appendleft
102 |     if value is None:
103 |         write("0:~")
104 |         return size + 3
105 |     if value is True:
106 |         write("4:true!")
107 |         return size + 7
108 |     if value is False:
109 |         write("5:false!")
110 |         return size + 8
111 |     if isinstance(value,(int,long)):
112 |         data = str(value) 
113 |         ldata = len(data)
114 |         span = str(ldata)
115 |         write("#")
116 |         write(data)
117 |         write(":")
118 |         write(span)
119 |         return size + 2 + len(span) + ldata
120 |     if isinstance(value,(float,)):
121 |         #  Use repr() for float rather than str().
122 |         #  It round-trips more accurately.
123 |         #  Probably unnecessary in later python versions that
124 |         #  use David Gay's ftoa routines.
125 |         data = repr(value) 
126 |         ldata = len(data)
127 |         span = str(ldata)
128 |         write("^")
129 |         write(data)
130 |         write(":")
131 |         write(span)
132 |         return size + 2 + len(span) + ldata
133 |     if isinstance(value,str):
134 |         lvalue = len(value)
135 |         span = str(lvalue)
136 |         write(",")
137 |         write(value)
138 |         write(":")
139 |         write(span)
140 |         return size + 2 + len(span) + lvalue
141 |     if isinstance(value,(list,tuple,)):
142 |         write("]")
143 |         init_size = size = size + 1
144 |         for item in reversed(value):
145 |             size = _rdumpq(q,size,item,encoding)
146 |         span = str(size - init_size)
147 |         write(":")
148 |         write(span)
149 |         return size + 1 + len(span)
150 |     if isinstance(value,dict):
151 |         write("}")
152 |         init_size = size = size + 1
153 |         for (k,v) in value.iteritems():
154 |             size = _rdumpq(q,size,v,encoding)
155 |             size = _rdumpq(q,size,k,encoding)
156 |         span = str(size - init_size)
157 |         write(":")
158 |         write(span)
159 |         return size + 1 + len(span)
160 |     if isinstance(value,unicode):
161 |         if encoding is None:
162 |             raise ValueError("must specify encoding to dump unicode strings")
163 |         value = value.encode(encoding)
164 |         lvalue = len(value)
165 |         span = str(lvalue)
166 |         write(",")
167 |         write(value)
168 |         write(":")
169 |         write(span)
170 |         return size + 2 + len(span) + lvalue
171 |     raise ValueError("unserializable object")
172 | 
173 | 
174 | def _gdumps(value,encoding):
175 |     """Generate fragments of value dumped as a tnetstring.
176 | 
177 |     This is the naive dumping algorithm, implemented as a generator so that
178 |     it's easy to pass to "".join() without building a new list.
179 | 
180 |     This is mainly here for comparison purposes; the _rdumpq version is
181 |     measurably faster as it doesn't have to build intermediate strins.
182 |     """
183 |     if value is None:
184 |         yield "0:~"
185 |     elif value is True:
186 |         yield "4:true!"
187 |     elif value is False:
188 |         yield "5:false!"
189 |     elif isinstance(value,(int,long)):
190 |         data = str(value) 
191 |         yield str(len(data))
192 |         yield ":"
193 |         yield data
194 |         yield "#"
195 |     elif isinstance(value,(float,)):
196 |         data = repr(value) 
197 |         yield str(len(data))
198 |         yield ":"
199 |         yield data
200 |         yield "^"
201 |     elif isinstance(value,(str,)):
202 |         yield str(len(value))
203 |         yield ":"
204 |         yield value
205 |         yield ","
206 |     elif isinstance(value,(list,tuple,)):
207 |         sub = []
208 |         for item in value:
209 |             sub.extend(_gdumps(item))
210 |         sub = "".join(sub)
211 |         yield str(len(sub))
212 |         yield ":"
213 |         yield sub
214 |         yield "]"
215 |     elif isinstance(value,(dict,)):
216 |         sub = []
217 |         for (k,v) in value.iteritems():
218 |             sub.extend(_gdumps(k))
219 |             sub.extend(_gdumps(v))
220 |         sub = "".join(sub)
221 |         yield str(len(sub))
222 |         yield ":"
223 |         yield sub
224 |         yield "}"
225 |     elif isinstance(value,(unicode,)):
226 |         if encoding is None:
227 |             raise ValueError("must specify encoding to dump unicode strings")
228 |         value = value.encode(encoding)
229 |         yield str(len(value))
230 |         yield ":"
231 |         yield value
232 |         yield ","
233 |     else:
234 |         raise ValueError("unserializable object")
235 | 
236 | 
237 | def loads(string,encoding=None):
238 |     """loads(string,encoding=None) -> object
239 | 
240 |     This function parses a tnetstring into a python object.
241 |     """
242 |     #  No point duplicating effort here.  In the C-extension version,
243 |     #  loads() is measurably faster then pop() since it can avoid
244 |     #  the overhead of building a second string.
245 |     return pop(string,encoding)[0]
246 | 
247 | 
248 | def load(file,encoding=None):
249 |     """load(file,encoding=None) -> object
250 | 
251 |     This function reads a tnetstring from a file and parses it into a
252 |     python object.  The file must support the read() method, and this
253 |     function promises not to read more data than necessary.
254 |     """
255 |     #  Read the length prefix one char at a time.
256 |     #  Note that the netstring spec explicitly forbids padding zeros.
257 |     c = file.read(1)
258 |     if not c.isdigit():
259 |         raise ValueError("not a tnetstring: missing or invalid length prefix")
260 |     datalen = ord(c) - ord("0")
261 |     c = file.read(1)
262 |     if datalen != 0:
263 |         while c.isdigit():
264 |             datalen = (10 * datalen) + (ord(c) - ord("0"))
265 |             if datalen > 999999999:
266 |                 errmsg = "not a tnetstring: absurdly large length prefix"
267 |                 raise ValueError(errmsg)
268 |             c = file.read(1)
269 |     if c != ":":
270 |         raise ValueError("not a tnetstring: missing or invalid length prefix")
271 |     #  Now we can read and parse the payload.
272 |     #  This repeats the dispatch logic of pop() so we can avoid
273 |     #  re-constructing the outermost tnetstring.
274 |     data = file.read(datalen)
275 |     if len(data) != datalen:
276 |         raise ValueError("not a tnetstring: length prefix too big")
277 |     type = file.read(1)
278 |     if type == ",":
279 |         if encoding is not None:
280 |             return data.decode(encoding)
281 |         return data
282 |     if type == "#":
283 |         try:
284 |             return int(data)
285 |         except ValueError:
286 |             raise ValueError("not a tnetstring: invalid integer literal")
287 |     if type == "^":
288 |         try:
289 |             return float(data)
290 |         except ValueError:
291 |             raise ValueError("not a tnetstring: invalid float literal")
292 |     if type == "!":
293 |         if data == "true":
294 |             return True
295 |         elif data == "false":
296 |             return False
297 |         else:
298 |             raise ValueError("not a tnetstring: invalid boolean literal")
299 |     if type == "~":
300 |         if data:
301 |             raise ValueError("not a tnetstring: invalid null literal")
302 |         return None
303 |     if type == "]":
304 |         l = []
305 |         while data:
306 |             (item,data) = pop(data,encoding)
307 |             l.append(item)
308 |         return l
309 |     if type == "}":
310 |         d = {}
311 |         while data:
312 |             (key,data) = pop(data,encoding)
313 |             (val,data) = pop(data,encoding)
314 |             d[key] = val
315 |         return d
316 |     raise ValueError("unknown type tag")
317 |     
318 | 
319 | 
320 | def pop(string,encoding=None):
321 |     """pop(string,encoding=None) -> (object, remain)
322 | 
323 |     This function parses a tnetstring into a python object.
324 |     It returns a tuple giving the parsed object and a string
325 |     containing any unparsed data from the end of the string.
326 |     """
327 |     #  Parse out data length, type and remaining string.
328 |     try:
329 |         (dlen,rest) = string.split(":",1)
330 |         dlen = int(dlen)
331 |     except ValueError:
332 |         raise ValueError("not a tnetstring: missing or invalid length prefix")
333 |     try:
334 |         (data,type,remain) = (rest[:dlen],rest[dlen],rest[dlen+1:])
335 |     except IndexError:
336 |         #  This fires if len(rest) < dlen, meaning we don't need
337 |         #  to further validate that data is the right length.
338 |         raise ValueError("not a tnetstring: invalid length prefix")
339 |     #  Parse the data based on the type tag.
340 |     if type == ",":
341 |         if encoding is not None:
342 |             return (data.decode(encoding),remain)
343 |         return (data,remain)
344 |     if type == "#":
345 |         try:
346 |             return (int(data),remain)
347 |         except ValueError:
348 |             raise ValueError("not a tnetstring: invalid integer literal")
349 |     if type == "^":
350 |         try:
351 |             return (float(data),remain)
352 |         except ValueError:
353 |             raise ValueError("not a tnetstring: invalid float literal")
354 |     if type == "!":
355 |         if data == "true":
356 |             return (True,remain)
357 |         elif data == "false":
358 |             return (False,remain)
359 |         else:
360 |             raise ValueError("not a tnetstring: invalid boolean literal")
361 |     if type == "~":
362 |         if data:
363 |             raise ValueError("not a tnetstring: invalid null literal")
364 |         return (None,remain)
365 |     if type == "]":
366 |         l = []
367 |         while data:
368 |             (item,data) = pop(data,encoding)
369 |             l.append(item)
370 |         return (l,remain)
371 |     if type == "}":
372 |         d = {}
373 |         while data:
374 |             (key,data) = pop(data,encoding)
375 |             (val,data) = pop(data,encoding)
376 |             d[key] = val
377 |         return (d,remain)
378 |     raise ValueError("unknown type tag")
379 | 
380 | 
381 | 
382 | #  Use the c-extension version if available
383 | try:
384 |     import _tnetstring
385 | except ImportError:
386 |     pass
387 | else:
388 |     dumps = _tnetstring.dumps
389 |     load = _tnetstring.load
390 |     loads = _tnetstring.loads
391 |     pop = _tnetstring.pop
392 | 
393 | 


--------------------------------------------------------------------------------
/tnetstring/_tnetstring.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  _tnetstring.c:  python module for fast encode/decode of typed-netstrings
  3 | //
  4 | //  You get the following functions:
  5 | //
  6 | //    dumps:  dump a python object to a tnetstring
  7 | //    loads:  parse tnetstring into a python object
  8 | //    load:   parse tnetstring from a file-like object
  9 | //    pop:    parse tnetstring into a python object,
 10 | //            return it along with unparsed data.
 11 | 
 12 | #include <Python.h>
 13 | 
 14 | 
 15 | #define TNS_MAX_LENGTH 999999999
 16 | #include "tns_core.c"
 17 | 
 18 | 
 19 | //  We have one static tns_ops struct for parsing bytestrings.
 20 | static tns_ops _tnetstring_ops_bytes;
 21 | 
 22 | //  Unicode parsing ops are created on demand.
 23 | //  We allocate a struct containing all the function pointers along with
 24 | //  the encoding string, as a primitive kind of closure.
 25 | //  Eventually we should cache these.
 26 | struct tns_ops_with_encoding_s {
 27 |   tns_ops ops;
 28 |   char *encoding;
 29 | };
 30 | typedef struct tns_ops_with_encoding_s tns_ops_with_encoding;
 31 | 
 32 | static tns_ops *_tnetstring_get_unicode_ops(PyObject *encoding);
 33 | 
 34 | 
 35 | //  _tnetstring_loads:  parse tnetstring-format value from a string.
 36 | //
 37 | static PyObject*
 38 | _tnetstring_loads(PyObject* self, PyObject *args) 
 39 | {
 40 |   PyObject *string = NULL;
 41 |   PyObject *encoding = Py_None;
 42 |   PyObject *val = NULL;
 43 |   tns_ops *ops = &_tnetstring_ops_bytes;
 44 |   char *data;
 45 |   size_t len;
 46 | 
 47 |   if(!PyArg_UnpackTuple(args, "loads", 1, 2, &string, &encoding)) {
 48 |       return NULL;
 49 |   }
 50 |   if(!PyString_Check(string)) {
 51 |       PyErr_SetString(PyExc_TypeError, "arg must be a string");
 52 |       return NULL;
 53 |   }
 54 |   Py_INCREF(string);
 55 | 
 56 |   if(encoding == Py_None) {
 57 |       data = PyString_AS_STRING(string);
 58 |       len = PyString_GET_SIZE(string);
 59 |       val = tns_parse(ops, data, len, NULL);
 60 |   } else {
 61 |       if(!PyString_Check(encoding)) {
 62 |           PyErr_SetString(PyExc_TypeError, "encoding must be a string");
 63 |           goto error;
 64 |       }
 65 |       Py_INCREF(encoding);
 66 |       ops = _tnetstring_get_unicode_ops(encoding);
 67 |       if(ops == NULL) {
 68 |           Py_DECREF(encoding);
 69 |           goto error;
 70 |       }
 71 |       data = PyString_AS_STRING(string);
 72 |       len = PyString_GET_SIZE(string);
 73 |       val = tns_parse(ops, data, len, NULL);
 74 |       free(ops);
 75 |       Py_DECREF(encoding);
 76 |   }
 77 | 
 78 |   Py_DECREF(string);
 79 |   return val;
 80 | 
 81 | error:
 82 |   Py_DECREF(string);
 83 |   return NULL;
 84 | }
 85 | 
 86 | 
 87 | //  _tnetstring_load:  parse tnetstring-format value from a file.
 88 | //
 89 | //  This takes care to read no more data than is required to get the
 90 | //  full tnetstring-encoded value.  It might read arbitrarily-much
 91 | //  data if the file doesn't begin with a valid tnetstring.
 92 | //
 93 | static PyObject*
 94 | _tnetstring_load(PyObject* self, PyObject *args) 
 95 | {
 96 |   PyObject *val = NULL;
 97 |   PyObject *file = NULL;
 98 |   PyObject *encoding = Py_None;
 99 |   PyObject *methnm = NULL;
100 |   PyObject *metharg = NULL;
101 |   PyObject *res = NULL;
102 |   tns_ops *ops = &_tnetstring_ops_bytes;
103 |   char c, *data;
104 |   size_t datalen = 0;
105 | 
106 |   if(!PyArg_UnpackTuple(args, "load", 1, 2, &file, &encoding)) {
107 |       goto error;
108 |   }
109 |   Py_INCREF(file);
110 | 
111 |   if(encoding != Py_None) {
112 |       if(!PyString_Check(encoding)) {
113 |           PyErr_SetString(PyExc_TypeError, "encoding must be a string");
114 |           goto error;
115 |       }
116 |       Py_INCREF(encoding);
117 |       ops = _tnetstring_get_unicode_ops(encoding);
118 |       if(ops == NULL) {
119 |           goto error;
120 |       }
121 |   }
122 | 
123 |   //  We're going to read one char at a time
124 |   if((methnm = PyString_FromString("read")) == NULL) {
125 |       goto error;
126 |   }
127 |   if((metharg = PyInt_FromLong(1)) == NULL) {
128 |       goto error;
129 |   }
130 | 
131 |   //  Read the length prefix one char at a time
132 |   res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
133 |   if(res == NULL) {
134 |       goto error;
135 |   }
136 |   Py_INCREF(res);
137 |   if(!PyString_Check(res) || !PyString_GET_SIZE(res)) {
138 |       PyErr_SetString(PyExc_ValueError,
139 |                       "Not a tnetstring: invalid or missing length prefix");
140 |       goto error;
141 |   }
142 |   c = PyString_AS_STRING(res)[0];
143 |   Py_DECREF(res); res = NULL;
144 |   //  Note that the netstring spec explicitly forbids padding zeroes.
145 |   //  If the first char is zero, it must be the only char.
146 |   if(c < '0' || c > '9') {
147 |       PyErr_SetString(PyExc_ValueError,
148 |                       "Not a tnetstring: invalid or missing length prefix");
149 |       goto error;
150 |   } else if (c == '0') {
151 |       res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
152 |       if(res == NULL) {
153 |           goto error;
154 |       }
155 |       Py_INCREF(res);
156 |       if(!PyString_Check(res) || !PyString_GET_SIZE(res)) {
157 |           PyErr_SetString(PyExc_ValueError,
158 |                       "Not a tnetstring: invalid or missing length prefix");
159 |           goto error;
160 |       }
161 |       c = PyString_AS_STRING(res)[0];
162 |       Py_DECREF(res); res = NULL;
163 |   } else {
164 |       do {
165 |           datalen = (10 * datalen) + (c - '0');
166 |           check(datalen <= TNS_MAX_LENGTH,
167 |                 "Not a tnetstring: absurdly large length prefix"); 
168 |           res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
169 |           if(res == NULL) {
170 |               goto error;
171 |           }
172 |           Py_INCREF(res);
173 |           if(!PyString_Check(res) || !PyString_GET_SIZE(res)) {
174 |               PyErr_SetString(PyExc_ValueError,
175 |                         "Not a tnetstring: invalid or missing length prefix");
176 |               goto error;
177 |           }
178 |           c = PyString_AS_STRING(res)[0];
179 |           Py_DECREF(res); res = NULL;
180 |       } while(c >= '0' && c <= '9');
181 |   }
182 | 
183 |   //  Validate end-of-length-prefix marker.
184 |   if(c != ':') {
185 |       PyErr_SetString(PyExc_ValueError,
186 |                       "Not a tnetstring: missing length prefix");
187 |       goto error;
188 |   }
189 |   
190 |   //  Read the data plus terminating type tag.
191 |   Py_DECREF(metharg);
192 |   if((metharg = PyInt_FromSize_t(datalen + 1)) == NULL) {
193 |       goto error;
194 |   } 
195 |   res = PyObject_CallMethodObjArgs(file, methnm, metharg, NULL);
196 |   if(res == NULL) {
197 |       goto error;
198 |   }
199 |   Py_INCREF(res);
200 |   Py_DECREF(file); file = NULL;
201 |   Py_DECREF(methnm); methnm = NULL;
202 |   Py_DECREF(metharg); metharg = NULL;
203 |   if(!PyString_Check(res) || PyString_GET_SIZE(res) != datalen + 1) {
204 |       PyErr_SetString(PyExc_ValueError,
205 |                       "Not a tnetstring: invalid length prefix");
206 |       goto error;
207 |   }
208 | 
209 |   //  Parse out the payload object
210 |   data = PyString_AS_STRING(res);
211 |   val = tns_parse_payload(ops, data[datalen], data, datalen);
212 |   Py_DECREF(res); res = NULL;
213 | 
214 |   if(ops != &_tnetstring_ops_bytes) {
215 |       free(ops);
216 |       Py_DECREF(encoding);
217 |   }
218 | 
219 |   return val;
220 | 
221 | error:
222 |   if(file != NULL) {
223 |       Py_DECREF(file);
224 |   }
225 |   if(ops != &_tnetstring_ops_bytes) {
226 |       free(ops);
227 |       Py_DECREF(encoding);
228 |   }
229 |   if(methnm != NULL) {
230 |       Py_DECREF(methnm);
231 |   }
232 |   if(metharg != NULL) {
233 |       Py_DECREF(metharg);
234 |   }
235 |   if(res != NULL) {
236 |       Py_DECREF(res);
237 |   }
238 |   if(val != NULL) {
239 |       Py_DECREF(val);
240 |   }
241 |   return NULL;
242 | }
243 | 
244 | 
245 | static PyObject*
246 | _tnetstring_pop(PyObject* self, PyObject *args) 
247 | {
248 |   PyObject *string = NULL;
249 |   PyObject *val = NULL;
250 |   PyObject *rest = NULL;
251 |   PyObject *result = NULL;
252 |   PyObject *encoding = Py_None;
253 |   tns_ops *ops = &_tnetstring_ops_bytes;
254 |   char *data, *remain;
255 |   size_t len;
256 | 
257 |   if(!PyArg_UnpackTuple(args, "pop", 1, 2, &string, &encoding)) {
258 |       return NULL;
259 |   }
260 |   if(!PyString_Check(string)) {
261 |       PyErr_SetString(PyExc_TypeError, "arg must be a string");
262 |       return NULL;
263 |   }
264 |   if(encoding != Py_None) {
265 |       if(!PyString_Check(encoding)) {
266 |           PyErr_SetString(PyExc_TypeError, "encoding must be a string");
267 |           return NULL;
268 |       }
269 |       Py_INCREF(encoding);
270 |       ops = _tnetstring_get_unicode_ops(encoding);
271 |       if(ops == NULL) {
272 |           Py_DECREF(encoding);
273 |           return NULL;
274 |       }
275 |   }
276 |   Py_INCREF(string);
277 | 
278 |   data = PyString_AS_STRING(string);
279 |   len = PyString_GET_SIZE(string);
280 |   val = tns_parse(ops, data, len, &remain);
281 |   Py_DECREF(string);
282 |   if(ops != &_tnetstring_ops_bytes) {
283 |       free(ops);
284 |       Py_DECREF(encoding);
285 |   }
286 |   if(val == NULL) {
287 |       return NULL;
288 |   }
289 | 
290 |   rest = PyString_FromStringAndSize(remain, len-(remain-data));
291 |   if(rest == NULL) {
292 |       result = NULL;
293 |   } else {
294 |       result = PyTuple_Pack(2, val, rest);
295 |       Py_DECREF(rest);
296 |   }
297 |   Py_DECREF(val);
298 |   return result;
299 | }
300 | 
301 | 
302 | static PyObject*
303 | _tnetstring_dumps(PyObject* self, PyObject *args)
304 | {
305 |   PyObject *object = NULL;
306 |   PyObject *string = NULL;
307 |   PyObject *encoding = Py_None;
308 |   tns_ops *ops = &_tnetstring_ops_bytes;
309 |   tns_outbuf outbuf;
310 | 
311 |   if(!PyArg_UnpackTuple(args, "dumps", 1, 2, &object, &encoding)) {
312 |       return NULL;
313 |   }
314 |   if(encoding != Py_None) {
315 |       if(!PyString_Check(encoding)) {
316 |           PyErr_SetString(PyExc_TypeError, "encoding must be a string");
317 |           return NULL;
318 |       }
319 |       Py_INCREF(encoding);
320 |       ops = _tnetstring_get_unicode_ops(encoding);
321 |       if(ops == NULL) {
322 |           Py_DECREF(encoding);
323 |           return NULL;
324 |       }
325 |   }
326 |   Py_INCREF(object);
327 | 
328 |   if(tns_outbuf_init(&outbuf) == -1) {
329 |       goto error;
330 |   }
331 |   if(tns_render_value(ops, object, &outbuf) == -1) {
332 |       goto error;
333 |   }
334 | 
335 |   Py_DECREF(object);
336 |   string = PyString_FromStringAndSize(NULL,tns_outbuf_size(&outbuf));
337 |   if(string == NULL) {
338 |       goto error;
339 |   }
340 | 
341 |   tns_outbuf_memmove(&outbuf, PyString_AS_STRING(string));
342 |   free(outbuf.buffer);
343 | 
344 |   if(ops != &_tnetstring_ops_bytes) {
345 |       free(ops);
346 |       Py_DECREF(encoding);
347 |   }
348 | 
349 |   return string;
350 | 
351 | error:
352 |   if(ops != &_tnetstring_ops_bytes) {
353 |       free(ops);
354 |       Py_DECREF(encoding);
355 |   }
356 |   Py_DECREF(object);
357 |   return NULL;
358 | }
359 | 
360 | 
361 | static PyMethodDef _tnetstring_methods[] = {
362 |     {"load",
363 |      (PyCFunction)_tnetstring_load,
364 |      METH_VARARGS,
365 |      PyDoc_STR("load(file,encoding=None) -> object\n"
366 |                "This function reads a tnetstring from a file and parses it\n"
367 |                "into a python object.")},
368 | 
369 |     {"loads",
370 |      (PyCFunction)_tnetstring_loads,
371 |      METH_VARARGS,
372 |      PyDoc_STR("loads(string,encoding=None) -> object\n"
373 |                "This function parses a tnetstring into a python object.")},
374 | 
375 |     {"pop",
376 |      (PyCFunction)_tnetstring_pop,
377 |      METH_VARARGS,
378 |      PyDoc_STR("pop(string,encoding=None) -> (object, remain)\n"
379 |                "This function parses a tnetstring into a python object.\n"
380 |                "It returns a tuple giving the parsed object and a string\n"
381 |                "containing any unparsed data.")},
382 | 
383 |     {"dumps",
384 |      (PyCFunction)_tnetstring_dumps,
385 |      METH_VARARGS,
386 |      PyDoc_STR("dumps(object,encoding=None) -> string\n"
387 |                "This function dumps a python object as a tnetstring.")},
388 | 
389 |     {NULL, NULL}
390 | };
391 | 
392 | 
393 | //  Functions to hook the parser core up to python.
394 | 
395 | static void*
396 | tns_parse_string(const tns_ops *ops, const char *data, size_t len)
397 | {
398 |   return PyString_FromStringAndSize(data, len);
399 | }
400 | 
401 | 
402 | static void*
403 | tns_parse_unicode(const tns_ops *ops, const char *data, size_t len)
404 | {
405 |   char* encoding = ((tns_ops_with_encoding*)ops)->encoding;
406 |   return PyUnicode_Decode(data, len, encoding, NULL);
407 | }
408 | 
409 | 
410 | static void*
411 | tns_parse_integer(const tns_ops *ops, const char *data, size_t len)
412 | {
413 |   long l = 0;
414 |   long long ll = 0;
415 |   int sign = 1;
416 |   char c;
417 |   char *dataend;
418 |   const char *pos, *eod;
419 |   PyObject *v = NULL;
420 | 
421 |   //  Anything with less than 10 digits, we can fit into a long.
422 |   //  Hand-parsing, as we need tighter error-checking than strtol.
423 |   if (len < 10) {
424 |       pos = data;
425 |       eod = data + len;
426 |       c = *pos++;
427 |       switch(c) {
428 |         case '0':
429 |         case '1':
430 |         case '2':
431 |         case '3':
432 |         case '4':
433 |         case '5':
434 |         case '6':
435 |         case '7':
436 |         case '8':
437 |         case '9':
438 |           l = c - '0';
439 |           break;
440 |         case '+':
441 |           break;
442 |         case '-':
443 |           sign = -1;
444 |           break;
445 |         default:
446 |           sentinel("invalid integer literal");
447 |       }
448 |       while(pos < eod) {
449 |           c = *pos++;
450 |           check(c >= '0' && c <= '9', "invalid integer literal");
451 |           l = (l * 10) + (c - '0');
452 |       }
453 |       return PyLong_FromLong(l * sign);
454 |   }
455 |   //  Anything with less than 19 digits fits in a long long.
456 |   //  Hand-parsing, as we need tighter error-checking than strtoll.
457 |   else if(len < 19) {
458 |       pos = data;
459 |       eod = data + len;
460 |       c = *pos++;
461 |       switch(c) {
462 |         case '0':
463 |         case '1':
464 |         case '2':
465 |         case '3':
466 |         case '4':
467 |         case '5':
468 |         case '6':
469 |         case '7':
470 |         case '8':
471 |         case '9':
472 |           ll = c - '0';
473 |           break;
474 |         case '+':
475 |           break;
476 |         case '-':
477 |           sign = -1;
478 |           break;
479 |         default:
480 |           sentinel("invalid integer literal");
481 |       }
482 |       while(pos < eod) {
483 |           c = *pos++;
484 |           check(c >= '0' && c <= '9', "invalid integer literal");
485 |           ll = (ll * 10) + (c - '0');
486 |       }
487 |       return PyLong_FromLongLong(ll * sign);
488 |   }
489 |   //  Really big numbers are passed to python's native parser.
490 |   else { 
491 |       // PyLong_FromString allows leading whitespace, so we have to check
492 |       // that there is none present in the string.
493 |       c = *data;
494 |       switch(c) {
495 |         case '0':
496 |         case '1':
497 |         case '2':
498 |         case '3':
499 |         case '4':
500 |         case '5':
501 |         case '6':
502 |         case '7':
503 |         case '8':
504 |         case '9':
505 |           break;
506 |         case '+':
507 |         case '-':
508 |           c = *(data+1);
509 |           check(c >= '0' && c <= '9', "invalid integer literal");
510 |           break;
511 |         default:
512 |           sentinel("invalid integer literal");
513 |       }
514 |       // PyLong_FromString insists that the string end in a NULL byte.
515 |       // I am *not* copying all that data.  Instead we lie a little bit
516 |       // about the const-ness of data, write a NULL over the format terminator
517 |       // and restore the original character when we're done.
518 |       c = data[len];
519 |       ((char*)data)[len] = '\0';
520 |       v = PyLong_FromString((char *)data, &dataend, 10);
521 |       ((char*)data)[len] = c;
522 |       check(dataend == data + len, "invalid integer literal");
523 |       return v;
524 |   }
525 |   sentinel("invalid code branch, check your compiler...");
526 | 
527 | error:
528 |   return NULL;
529 | }
530 | 
531 | 
532 | static void*
533 | tns_parse_float(const tns_ops *ops, const char *data, size_t len)
534 | {
535 |   double d = 0;
536 |   char *dataend;
537 | 
538 |   //  Technically this allows whitespace around the float, which
539 |   //  isn't valid in a tnetstring.  But I don't want to waste the
540 |   //  time checking and I am *not* reimplementing strtod.
541 |   d = strtod(data, &dataend);
542 |   if(dataend != data + len) {
543 |       return NULL;
544 |   }
545 |   return PyFloat_FromDouble(d);
546 | }
547 | 
548 | 
549 | static void*
550 | tns_get_null(const tns_ops *ops)
551 | {
552 |   Py_INCREF(Py_None);
553 |   return Py_None;
554 | }
555 | 
556 | 
557 | static void*
558 | tns_get_true(const tns_ops *ops)
559 | {
560 |   Py_INCREF(Py_True);
561 |   return Py_True;
562 | }
563 | 
564 | 
565 | static void*
566 | tns_get_false(const tns_ops *ops)
567 | {
568 |   Py_INCREF(Py_False);
569 |   return Py_False;
570 | }
571 | 
572 | 
573 | static void*
574 | tns_new_dict(const tns_ops *ops)
575 | {
576 |   return PyDict_New();
577 | }
578 | 
579 | 
580 | static void*
581 | tns_new_list(const tns_ops *ops)
582 | {
583 |   return PyList_New(0);
584 | }
585 | 
586 | 
587 | static void
588 | tns_free_value(const tns_ops *ops, void *value)
589 | {
590 |   Py_XDECREF(value);
591 | }
592 | 
593 | 
594 | static int
595 | tns_add_to_dict(const tns_ops *ops, void *dict, void *key, void *item)
596 | {
597 |   int res;
598 |   res = PyDict_SetItem(dict, key, item);
599 |   Py_DECREF(key);
600 |   Py_DECREF(item);
601 |   if(res == -1) {
602 |       return -1;
603 |   }
604 |   return 0;
605 | }
606 | 
607 | 
608 | static int
609 | tns_add_to_list(const tns_ops *ops, void *list, void *item)
610 | {
611 |   int res;
612 |   res = PyList_Append(list, item);
613 |   Py_DECREF(item);
614 |   if(res == -1) {
615 |       return -1;
616 |   }
617 |   return 0;
618 | }
619 | 
620 | 
621 | static int
622 | tns_render_string(const tns_ops *ops, void *val, tns_outbuf *outbuf)
623 | {
624 |   return tns_outbuf_puts(outbuf, PyString_AS_STRING(val),
625 |                                  PyString_GET_SIZE(val));
626 | }
627 | 
628 | 
629 | static int
630 | tns_render_unicode(const tns_ops *ops, void *val, tns_outbuf *outbuf)
631 | {
632 |   PyObject *bytes;
633 |   char* encoding = ((tns_ops_with_encoding*)ops)->encoding;
634 | 
635 |   if(PyUnicode_Check(val)) {
636 |       bytes = PyUnicode_Encode(PyUnicode_AS_UNICODE(val),
637 |                                 PyUnicode_GET_SIZE(val),
638 |                                 encoding, NULL);
639 |       if(bytes == NULL) {
640 |           return -1;
641 |       }
642 |       if(tns_render_string(ops, bytes, outbuf) == -1) {
643 |           return -1;
644 |       }
645 |       Py_DECREF(bytes);
646 |       return 0;
647 |   }
648 | 
649 |   if(PyString_Check(val)) {
650 |     return tns_render_string(ops, val, outbuf);
651 |   }
652 | 
653 |   return -1;
654 | }
655 | 
656 | 
657 | static int
658 | tns_render_integer(const tns_ops *ops, void *val, tns_outbuf *outbuf)
659 | {
660 |   PyObject *string = NULL;
661 |   int res = 0;
662 | 
663 |   string = PyObject_Str(val);
664 |   if(string == NULL) {
665 |       return -1;
666 |   }
667 | 
668 |   res = tns_render_string(ops, string, outbuf);
669 |   Py_DECREF(string);
670 |   return res;
671 | }
672 | 
673 | 
674 | static int
675 | tns_render_float(const tns_ops *ops, void *val, tns_outbuf *outbuf)
676 | {
677 |   PyObject *string;
678 |   int res = 0;
679 | 
680 |   string = PyObject_Repr(val);
681 |   if(string == NULL) {
682 |       return -1;
683 |   }
684 | 
685 |   res = tns_render_string(ops, string, outbuf);
686 |   Py_DECREF(string);
687 |   return res;
688 | }
689 | 
690 | 
691 | static int
692 | tns_render_bool(const tns_ops *ops, void *val, tns_outbuf *outbuf)
693 | {
694 |   if(val == Py_True) {
695 |       return tns_outbuf_puts(outbuf, "true", 4);
696 |   } else {
697 |       return tns_outbuf_puts(outbuf, "false", 5);
698 |   }
699 | }
700 | 
701 | 
702 | static int
703 | tns_render_dict(const tns_ops *ops, void *val, tns_outbuf *outbuf)
704 | {
705 |   PyObject *key, *item;
706 |   Py_ssize_t pos = 0;
707 | 
708 |   while(PyDict_Next(val, &pos, &key, &item)) {
709 |       if(tns_render_value(ops, item, outbuf) == -1) {
710 |           return -1;
711 |       }
712 |       if(tns_render_value(ops, key, outbuf) == -1) {
713 |           return -1;
714 |       }
715 |   }
716 |   return 0;
717 | }
718 | 
719 | 
720 | static int
721 | tns_render_list(const tns_ops *ops, void *val, tns_outbuf *outbuf)
722 | {
723 |   PyObject *item;
724 |   Py_ssize_t idx;
725 | 
726 |   //  Remember, all output is in reverse.
727 |   //  So we must write the last element first.
728 |   idx = PyList_GET_SIZE(val) - 1;
729 |   while(idx >= 0) {
730 |       item = PyList_GET_ITEM(val, idx);
731 |       if(tns_render_value(ops, item, outbuf) == -1) {
732 |           return -1;
733 |       }
734 |       idx--;
735 |   }
736 |   return 0;
737 | }
738 | 
739 | 
740 | static
741 | tns_type_tag tns_get_type(const tns_ops *ops, void *val)
742 | {
743 |   if(val == Py_True || val == Py_False) {
744 |     return tns_tag_bool;
745 |   }
746 |   if(val == Py_None) {
747 |     return tns_tag_null;
748 |   }
749 |   if(PyInt_Check((PyObject*)val) || PyLong_Check((PyObject*)val)) {
750 |     return tns_tag_integer;
751 |   }
752 |   if(PyFloat_Check((PyObject*)val)) {
753 |     return tns_tag_float;
754 |   }
755 |   if(PyString_Check((PyObject*)val)) {
756 |     return tns_tag_string;
757 |   }
758 |   if(PyList_Check((PyObject*)val)) {
759 |     return tns_tag_list;
760 |   }
761 |   if(PyDict_Check((PyObject*)val)) {
762 |     return tns_tag_dict;
763 |   }
764 |   return 0;
765 | }
766 | 
767 | 
768 | static
769 | tns_type_tag tns_get_type_unicode(const tns_ops *ops, void *val)
770 | {
771 |   tns_type_tag type = 0;
772 | 
773 |   type = tns_get_type(ops, val);
774 |   if(type == 0) {
775 |       if(PyUnicode_Check(val)) {
776 |           type = tns_tag_string;
777 |       }
778 |   }
779 | 
780 |   return type;
781 | }
782 | 
783 | 
784 | static tns_ops *_tnetstring_get_unicode_ops(PyObject *encoding)
785 | {
786 |   tns_ops_with_encoding *opswe = NULL;
787 |   tns_ops *ops = NULL;
788 | 
789 |   opswe = malloc(sizeof(tns_ops_with_encoding));
790 |   if(opswe == NULL) {
791 |       PyErr_SetString(PyExc_MemoryError, "could not allocate ops struct");
792 |       return NULL;
793 |   }
794 |   ops = (tns_ops*)opswe;
795 | 
796 |   opswe->encoding = PyString_AS_STRING(encoding);
797 | 
798 |   ops->get_type = &tns_get_type_unicode;
799 |   ops->free_value = &tns_free_value;
800 | 
801 |   ops->parse_string = tns_parse_unicode;
802 |   ops->parse_integer = tns_parse_integer;
803 |   ops->parse_float = tns_parse_float;
804 |   ops->get_null = tns_get_null;
805 |   ops->get_true = tns_get_true;
806 |   ops->get_false = tns_get_false;
807 | 
808 |   ops->render_string = tns_render_unicode;
809 |   ops->render_integer = tns_render_integer;
810 |   ops->render_float = tns_render_float;
811 |   ops->render_bool = tns_render_bool;
812 | 
813 |   ops->new_dict = tns_new_dict;
814 |   ops->add_to_dict = tns_add_to_dict;
815 |   ops->render_dict = tns_render_dict;
816 | 
817 |   ops->new_list = tns_new_list;
818 |   ops->add_to_list = tns_add_to_list;
819 |   ops->render_list = tns_render_list;
820 | 
821 |   return ops;
822 | }
823 | 
824 | 
825 | PyDoc_STRVAR(module_doc,
826 | "Fast encoding/decoding of typed-netstrings."
827 | );
828 | 
829 | 
830 | PyMODINIT_FUNC
831 | init_tnetstring(void)
832 | {
833 |   Py_InitModule3("_tnetstring", _tnetstring_methods, module_doc);
834 | 
835 |   //  Initialize function pointers for parsing bytes.
836 |   _tnetstring_ops_bytes.get_type = &tns_get_type;
837 |   _tnetstring_ops_bytes.free_value = &tns_free_value;
838 | 
839 |   _tnetstring_ops_bytes.parse_string = tns_parse_string;
840 |   _tnetstring_ops_bytes.parse_integer = tns_parse_integer;
841 |   _tnetstring_ops_bytes.parse_float = tns_parse_float;
842 |   _tnetstring_ops_bytes.get_null = tns_get_null;
843 |   _tnetstring_ops_bytes.get_true = tns_get_true;
844 |   _tnetstring_ops_bytes.get_false = tns_get_false;
845 | 
846 |   _tnetstring_ops_bytes.render_string = tns_render_string;
847 |   _tnetstring_ops_bytes.render_integer = tns_render_integer;
848 |   _tnetstring_ops_bytes.render_float = tns_render_float;
849 |   _tnetstring_ops_bytes.render_bool = tns_render_bool;
850 | 
851 |   _tnetstring_ops_bytes.new_dict = tns_new_dict;
852 |   _tnetstring_ops_bytes.add_to_dict = tns_add_to_dict;
853 |   _tnetstring_ops_bytes.render_dict = tns_render_dict;
854 | 
855 |   _tnetstring_ops_bytes.new_list = tns_new_list;
856 |   _tnetstring_ops_bytes.add_to_list = tns_add_to_list;
857 |   _tnetstring_ops_bytes.render_list = tns_render_list;
858 | }
859 | 
860 | 


--------------------------------------------------------------------------------
/tnetstring/dbg.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  dbg.h:  minimal checking and debugging functions
 3 | //
 4 | //  This is a small compatibility shim for the Mongrel2 "dbg.h" interface,
 5 | //  to make it easier to port code back and forth between the tnetstring
 6 | //  implementation in Mongrel2 and this module.
 7 | //
 8 | 
 9 | #ifndef __dbg_h__
10 | #define __dbg_h__
11 | 
12 | #define check(A, M, ...) if(!(A)) { if(PyErr_Occurred() == NULL) { PyErr_Format(PyExc_ValueError, M, ##__VA_ARGS__); }; goto error; }
13 | 
14 | #define sentinel(M, ...)  check(0, M, ##__VA_ARGS__)
15 | 
16 | #define check_mem(A) if(A==NULL) { if(PyErr_Occurred() == NULL) { PyErr_SetString(PyExc_MemoryError, "Out of memory."); }; goto error; }
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/tnetstring/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rfk/tnetstring/146381498a07d6053e044375562be08ef16017c2/tnetstring/tests/__init__.py


--------------------------------------------------------------------------------
/tnetstring/tests/test_format.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | import unittest
  4 | import random
  5 | import math
  6 | import StringIO
  7 | 
  8 | 
  9 | import tnetstring
 10 | 
 11 | 
 12 | FORMAT_EXAMPLES = {
 13 |     '0:}': {},
 14 |     '0:]': [],
 15 |     '51:5:hello,39:11:12345678901#4:this,4:true!0:~4:\x00\x00\x00\x00,]}':
 16 |             {'hello': [12345678901, 'this', True, None, '\x00\x00\x00\x00']},
 17 |     '5:12345#': 12345,
 18 |     '12:this is cool,': "this is cool",
 19 |     '0:,': "",
 20 |     '0:~': None,
 21 |     '4:true!': True,
 22 |     '5:false!': False,
 23 |     '10:\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00,': "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
 24 |     '24:5:12345#5:67890#5:xxxxx,]': [12345, 67890, 'xxxxx'],
 25 |     '18:3:0.1^3:0.2^3:0.3^]': [0.1, 0.2, 0.3],
 26 |     '243:238:233:228:223:218:213:208:203:198:193:188:183:178:173:168:163:158:153:148:143:138:133:128:123:118:113:108:103:99:95:91:87:83:79:75:71:67:63:59:55:51:47:43:39:35:31:27:23:19:15:11:hello-there,]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]': [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["hello-there"]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
 27 | }
 28 | 
 29 | 
 30 | def get_random_object(random=random,depth=0,unicode=False):
 31 |     """Generate a random serializable object."""
 32 |     #  The probability of generating a scalar value increases as the depth increase.
 33 |     #  This ensures that we bottom out eventually.
 34 |     if random.randint(depth,10) <= 4:
 35 |         what = random.randint(0,1)
 36 |         if what == 0:
 37 |             n = random.randint(0,10)
 38 |             l = []
 39 |             for _ in xrange(n):
 40 |                 l.append(get_random_object(random,depth+1,unicode))
 41 |             return l
 42 |         if what == 1:
 43 |             n = random.randint(0,10)
 44 |             d = {}
 45 |             for _ in xrange(n):
 46 |                n = random.randint(0,100)
 47 |                k = "".join(chr(random.randint(32,126)) for _ in xrange(n))
 48 |                if unicode:
 49 |                    k = k.decode("ascii")
 50 |                d[k] = get_random_object(random,depth+1,unicode)
 51 |             return d
 52 |     else:
 53 |         what = random.randint(0,4)
 54 |         if what == 0:
 55 |             return None
 56 |         if what == 1:
 57 |             return True
 58 |         if what == 2:
 59 |             return False
 60 |         if what == 3:
 61 |             if random.randint(0,1) == 0:
 62 |                 return random.randint(0,sys.maxint)
 63 |             else:
 64 |                 return -1 * random.randint(0,sys.maxint)
 65 |         n = random.randint(0,100)
 66 |         if unicode:
 67 |             return u"".join(chr(random.randint(32,126)) for _ in xrange(n))
 68 | 
 69 | 
 70 | 
 71 | class Test_Format(unittest.TestCase):
 72 | 
 73 |     def test_roundtrip_format_examples(self):
 74 |         for data, expect in FORMAT_EXAMPLES.items():
 75 |             self.assertEqual(expect,tnetstring.loads(data))
 76 |             self.assertEqual(expect,tnetstring.loads(tnetstring.dumps(expect)))
 77 |             self.assertEqual((expect,""),tnetstring.pop(data))
 78 | 
 79 |     def test_roundtrip_format_random(self):
 80 |         for _ in xrange(500):
 81 |             v = get_random_object()
 82 |             self.assertEqual(v,tnetstring.loads(tnetstring.dumps(v)))
 83 |             self.assertEqual((v,""),tnetstring.pop(tnetstring.dumps(v)))
 84 | 
 85 |     def test_unicode_handling(self):
 86 |         self.assertRaises(ValueError,tnetstring.dumps,u"hello")
 87 |         self.assertEquals(tnetstring.dumps(u"hello","utf8"),"5:hello,")
 88 |         self.assertEquals(type(tnetstring.loads("5:hello,")),str)
 89 |         self.assertEquals(type(tnetstring.loads("5:hello,","utf8")),unicode)
 90 |         ALPHA = u"\N{GREEK CAPITAL LETTER ALPHA}lpha"
 91 |         self.assertEquals(tnetstring.dumps(ALPHA,"utf8"),"6:"+ALPHA.encode("utf8")+",")
 92 |         self.assertEquals(tnetstring.dumps(ALPHA,"utf16"),"12:"+ALPHA.encode("utf16")+",")
 93 |         self.assertEquals(tnetstring.loads("12:\xff\xfe\x91\x03l\x00p\x00h\x00a\x00,","utf16"),ALPHA)
 94 | 
 95 |     def test_roundtrip_format_unicode(self):
 96 |         for _ in xrange(500):
 97 |             v = get_random_object(unicode=True)
 98 |             self.assertEqual(v,tnetstring.loads(tnetstring.dumps(v,"utf8"),"utf8"))
 99 |             self.assertEqual((v,""),tnetstring.pop(tnetstring.dumps(v,"utf16"),"utf16"))
100 | 
101 |     def test_roundtrip_big_integer(self):
102 |         i1 = math.factorial(30000)
103 |         s = tnetstring.dumps(i1)
104 |         i2 = tnetstring.loads(s)
105 |         self.assertEquals(i1, i2)
106 | 
107 | 
108 | class Test_FileLoading(unittest.TestCase):
109 | 
110 |     def test_roundtrip_file_examples(self):
111 |         for data, expect in FORMAT_EXAMPLES.items():
112 |             s = StringIO.StringIO()
113 |             s.write(data)
114 |             s.write("OK")
115 |             s.seek(0)
116 |             self.assertEqual(expect,tnetstring.load(s))
117 |             self.assertEqual("OK",s.read())
118 |             s = StringIO.StringIO()
119 |             tnetstring.dump(expect,s)
120 |             s.write("OK")
121 |             s.seek(0)
122 |             self.assertEqual(expect,tnetstring.load(s))
123 |             self.assertEqual("OK",s.read())
124 | 
125 |     def test_roundtrip_file_random(self):
126 |         for _ in xrange(500):
127 |             v = get_random_object()
128 |             s = StringIO.StringIO()
129 |             tnetstring.dump(v,s)
130 |             s.write("OK")
131 |             s.seek(0)
132 |             self.assertEqual(v,tnetstring.load(s))
133 |             self.assertEqual("OK",s.read())
134 | 
135 |     def test_error_on_absurd_lengths(self):
136 |         s = StringIO.StringIO()
137 |         s.write("1000000000:pwned!,")
138 |         s.seek(0)
139 |         self.assertRaises(ValueError,tnetstring.load,s)
140 |         self.assertEquals(s.read(1),":")
141 | 


--------------------------------------------------------------------------------
/tnetstring/tests/test_misc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import os.path
 4 | import difflib
 5 | import unittest
 6 | import doctest
 7 | 
 8 | import tnetstring
 9 | 
10 | 
11 | class Test_Misc(unittest.TestCase):
12 | 
13 |     def test_readme_matches_docstring(self):
14 |         """Ensure that the README is in sync with the docstring.
15 | 
16 |         This test should always pass; if the README is out of sync it just
17 |         updates it with the contents of tnetstring.__doc__.
18 |         """
19 |         dirname = os.path.dirname
20 |         readme = os.path.join(dirname(dirname(dirname(__file__))),"README.rst")
21 |         if not os.path.isfile(readme):
22 |             f = open(readme,"wb")
23 |             f.write(tnetstring.__doc__.encode())
24 |             f.close()
25 |         else:
26 |             f = open(readme,"rb")
27 |             if f.read() != tnetstring.__doc__:
28 |                 f.close()
29 |                 f = open(readme,"wb")
30 |                 f.write(tnetstring.__doc__.encode())
31 |                 f.close()
32 | 
33 | 


--------------------------------------------------------------------------------
/tnetstring/tns_core.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  tns_core.c:  core code for a tnetstring parser in C
  3 | //
  4 | //  This is code for parsing and rendering data in the provisional
  5 | //  typed-netstring format proposed for inclusion in Mongrel2.  You can
  6 | //  think of it like a JSON library that uses a simpler wire format.
  7 | //
  8 | 
  9 | #include "dbg.h"
 10 | #include "tns_core.h"
 11 | 
 12 | #ifndef TNS_MAX_LENGTH
 13 | #define TNS_MAX_LENGTH 999999999
 14 | #endif
 15 | 
 16 | //  Current outbuf implementation writes data starting at the back of
 17 | //  the allocated buffer.  When finished we simply memmove it to the front.
 18 | //  Here *buffer points to the allocated buffer, while *head points to the
 19 | //  last characer written to the buffer (and thus decreases as we write).
 20 | struct tns_outbuf_s {
 21 |   char *buffer;
 22 |   char *head;
 23 |   size_t alloc_size;
 24 | };
 25 | 
 26 | 
 27 | //  Helper function for parsing a dict; basically parses items in a loop.
 28 | static int tns_parse_dict(const tns_ops *ops, void *dict, const char *data, size_t len);
 29 | 
 30 | //  Helper function for parsing a list; basically parses items in a loop.
 31 | static int tns_parse_list(const tns_ops *ops, void *list, const char *data, size_t len);
 32 | 
 33 | //  Helper function for writing the length prefix onto a rendered value.
 34 | static int tns_outbuf_clamp(tns_outbuf *outbuf, size_t orig_size);
 35 | 
 36 | //  Finalize an outbuf, turning the allocated buffer into a standard
 37 | //  char* array.  Can't use the outbuf once it has been finalized.
 38 | static char* tns_outbuf_finalize(tns_outbuf *outbuf, size_t *len);
 39 | 
 40 | //  Free the memory allocated in an outbuf.
 41 | //  Can't use the outbuf once it has been freed.
 42 | static void tns_outbuf_free(tns_outbuf *outbuf);
 43 | 
 44 | //  Helper function to read a base-ten integer off a string.
 45 | //  Due to additional constraints, we can do it faster than strtoi.
 46 | static size_t tns_strtosz(const char *data, size_t len, size_t *sz, char **end);
 47 | 
 48 | 
 49 | void* tns_parse(const tns_ops *ops, const char *data, size_t len, char **remain)
 50 | {
 51 |   char *valstr = NULL;
 52 |   tns_type_tag type = tns_tag_null;
 53 |   size_t vallen = 0;
 54 | 
 55 |   //  Read the length of the value, and verify that it ends in a colon.
 56 |   check(tns_strtosz(data, len, &vallen, &valstr) != -1,
 57 |         "Not a tnetstring: invalid length prefix.");
 58 |   check(*valstr == ':',
 59 |         "Not a tnetstring: invalid length prefix.");
 60 |   valstr++;
 61 |   check((valstr+vallen) < (data+len),
 62 |         "Not a tnetstring: invalid length prefix.");
 63 | 
 64 |   //  Grab the type tag from the end of the value.
 65 |   type = valstr[vallen];
 66 | 
 67 |   //  Output the remainder of the string if necessary.
 68 |   if(remain != NULL) {
 69 |       *remain = valstr + vallen + 1;
 70 |   }
 71 | 
 72 |   //  Now dispatch type parsing based on the type tag.
 73 |   return tns_parse_payload(ops, type, valstr, vallen);
 74 | 
 75 | error:
 76 |   return NULL;
 77 | }
 78 | 
 79 | 
 80 | //  This appears to be faster than using strncmp to compare
 81 | //  against a small string constant.  Ugly but fast.
 82 | #define STR_EQ_TRUE(s) (s[0]=='t' && s[1]=='r' && s[2]=='u' && s[3]=='e')
 83 | #define STR_EQ_FALSE(s) (s[0]=='f' && s[1]=='a' && s[2]=='l' \
 84 |                                    && s[3]=='s' && s[4] == 'e')
 85 | 
 86 | void* tns_parse_payload(const tns_ops *ops,tns_type_tag type, const char *data, size_t len)
 87 | {
 88 |   void *val = NULL;
 89 | 
 90 |   assert(ops != NULL && "ops struct cannot be NULL");
 91 | 
 92 |   switch(type) {
 93 |     //  Primitive type: a string blob.
 94 |     case tns_tag_string:
 95 |         val = ops->parse_string(ops, data, len);
 96 |         check(val != NULL, "Not a tnetstring: invalid string literal.");
 97 |         break;
 98 |     //  Primitive type: an integer.
 99 |     case tns_tag_integer:
100 |         val = ops->parse_integer(ops, data, len);
101 |         check(val != NULL, "Not a tnetstring: invalid integer literal.");
102 |         break;
103 |     //  Primitive type: a float.
104 |     case tns_tag_float:
105 |         val = ops->parse_float(ops, data, len);
106 |         check(val != NULL, "Not a tnetstring: invalid float literal.");
107 |         break;
108 |     //  Primitive type: a boolean.
109 |     //  The only acceptable values are "true" and "false".
110 |     case tns_tag_bool:
111 |         if(len == 4 && STR_EQ_TRUE(data)) {
112 |             val = ops->get_true(ops);
113 |         } else if(len == 5 && STR_EQ_FALSE(data)) {
114 |             val = ops->get_false(ops);
115 |         } else {
116 |             sentinel("Not a tnetstring: invalid boolean literal.");
117 |             val = NULL;
118 |         }
119 |         break;
120 |     //  Primitive type: a null.
121 |     //  This must be a zero-length string.
122 |     case tns_tag_null:
123 |         check(len == 0, "Not a tnetstring: invalid null literal.");
124 |         val = ops->get_null(ops);
125 |         break;
126 |     //  Compound type: a dict.
127 |     //  The data is written <key><value><key><value>
128 |     case tns_tag_dict:
129 |         val = ops->new_dict(ops);
130 |         check(val != NULL, "Could not create dict.");
131 |         check(tns_parse_dict(ops, val, data, len) != -1,
132 |               "Not a tnetstring: broken dict items.");
133 |         break;
134 |     //  Compound type: a list.
135 |     //  The data is written <item><item><item>
136 |     case tns_tag_list:
137 |         val = ops->new_list(ops);
138 |         check(val != NULL, "Could not create list.");
139 |         check(tns_parse_list(ops, val, data, len) != -1,
140 |               "Not a tnetstring: broken list items.");
141 |         break;
142 |     //  Whoops, that ain't a tnetstring.
143 |     default:
144 |         sentinel("Not a tnetstring: invalid type tag.");
145 |   }
146 | 
147 |   return val;
148 | 
149 | error:
150 |   if(val != NULL) {
151 |       ops->free_value(ops, val);
152 |   }
153 |   return NULL;
154 | }
155 | 
156 | #undef STR_EQ_TRUE
157 | #undef STR_EQ_FALSE
158 | 
159 | 
160 | char* tns_render(const tns_ops *ops, void *val, size_t *len)
161 | {
162 |   tns_outbuf outbuf;
163 | 
164 |   check(tns_outbuf_init(&outbuf) != -1, "Failed to initialize outbuf.");
165 |   check(tns_render_value(ops, val, &outbuf) != -1, "Failed to render value.");
166 | 
167 |   return tns_outbuf_finalize(&outbuf, len);
168 |   
169 | error:
170 |   tns_outbuf_free(&outbuf);
171 |   return NULL;
172 | }
173 | 
174 | 
175 | int tns_render_value(const tns_ops *ops, void *val, tns_outbuf *outbuf)
176 | {
177 |   tns_type_tag type = tns_tag_null;
178 |   int res = -1;
179 |   size_t orig_size = 0;
180 | 
181 |   assert(ops != NULL && "ops struct cannot be NULL");
182 | 
183 |   //  Find out the type tag for the given value.
184 |   type = ops->get_type(ops, val);
185 |   check(type != 0, "type not serializable.");
186 | 
187 |   tns_outbuf_putc(outbuf, type);
188 |   orig_size = tns_outbuf_size(outbuf);
189 | 
190 |   //  Render it into the output buffer using callbacks.
191 |   switch(type) {
192 |     case tns_tag_string:
193 |       res = ops->render_string(ops, val, outbuf);
194 |       break;
195 |     case tns_tag_integer:
196 |       res = ops->render_integer(ops, val, outbuf);
197 |       break;
198 |     case tns_tag_float:
199 |       res = ops->render_float(ops, val, outbuf);
200 |       break;
201 |     case tns_tag_bool:
202 |       res = ops->render_bool(ops, val, outbuf);
203 |       break;
204 |     case tns_tag_null:
205 |       res = 0;
206 |       break;
207 |     case tns_tag_dict:
208 |       res = ops->render_dict(ops, val, outbuf);
209 |       break;
210 |     case tns_tag_list:
211 |       res = ops->render_list(ops, val, outbuf);
212 |       break;
213 |     default:
214 |       sentinel("unknown type tag: '%c'.", type);
215 |   }
216 | 
217 |   check(res == 0, "Failed to render value of type '%c'.", type);
218 |   return tns_outbuf_clamp(outbuf, orig_size);
219 | 
220 | error:
221 |   return -1;
222 | }
223 | 
224 | 
225 | static int tns_parse_list(const tns_ops *ops, void *val, const char *data, size_t len)
226 | {
227 |   void *item = NULL;
228 |   char *remain = NULL;
229 | 
230 |   assert(val != NULL && "value cannot be NULL");
231 |   assert(data != NULL && "data cannot be NULL");
232 | 
233 |   while(len > 0) {
234 |       item = tns_parse(ops, data, len, &remain);
235 |       check(item != NULL, "Failed to parse list.");
236 |       len = len - (remain - data);
237 |       data = remain;
238 |       check(ops->add_to_list(ops, val, item) != -1,
239 |             "Failed to add item to list.");
240 |       item = NULL;
241 |   }
242 | 
243 |   return 0;
244 | 
245 | error:
246 |   if(item) {
247 |       ops->free_value(ops, item);
248 |   }
249 |   return -1;
250 | }
251 | 
252 | 
253 | static int tns_parse_dict(const tns_ops *ops, void *val, const char *data, size_t len)
254 | {
255 |   void *key = NULL;
256 |   void *item = NULL;
257 |   char *remain = NULL;
258 | 
259 |   assert(val != NULL && "value cannot be NULL");
260 |   assert(data != NULL && "data cannot be NULL");
261 | 
262 |   while(len > 0) {
263 |       key = tns_parse(ops, data, len, &remain);
264 |       check(key != NULL, "Failed to parse dict key from tnetstring.");
265 |       len = len - (remain - data);
266 |       data = remain;
267 | 
268 |       item = tns_parse(ops, data, len, &remain);
269 |       check(item != NULL, "Failed to parse dict item from tnetstring.");
270 |       len = len - (remain - data);
271 |       data = remain;
272 | 
273 |       check(ops->add_to_dict(ops, val, key, item) != -1,
274 |             "Failed to add element to dict.");
275 | 
276 |       key = NULL;
277 |       item = NULL;
278 |   }
279 | 
280 |   return 0;
281 | 
282 | error:
283 |   if(key) {
284 |       ops->free_value(ops, key);
285 |   }
286 |   if(item) {
287 |       ops->free_value(ops, item);
288 |   }
289 |   return -1;
290 | }
291 | 
292 | 
293 | 
294 | static INLINE size_t
295 | tns_strtosz(const char *data, size_t len, size_t *sz, char **end)
296 | {
297 |   char c;
298 |   const char *pos, *eod;
299 |   size_t value = 0;
300 | 
301 |   pos = data;
302 |   eod = data + len;
303 | 
304 |   //  The first character must be a digit.
305 |   //  The netstring spec explicitly forbits padding zeros.
306 |   //  So if it's a zero, it must be the only char in the string.
307 |   c = *pos++;
308 |   switch(c) {
309 |     case '0':
310 |       *sz = 0;
311 |       *end = (char*) pos;
312 |       return 0;
313 |     case '1':
314 |     case '2':
315 |     case '3':
316 |     case '4':
317 |     case '5':
318 |     case '6':
319 |     case '7':
320 |     case '8':
321 |     case '9':
322 |       value = c - '0';
323 |       break;
324 |     default:
325 |       return -1;
326 |   }
327 | 
328 |   //  Consume the remaining digits, up to maximum value length.
329 |   while(pos < eod) {
330 |       c = *pos;
331 |       if(c < '0' || c > '9') {
332 |           *sz = value;
333 |           *end = (char*) pos;
334 |           return 0; 
335 |       }
336 |       value = (value * 10) + (c - '0');
337 |       check(value <= TNS_MAX_LENGTH,
338 |             "Not a tnetstring: absurdly large length prefix");
339 |       pos++;
340 |   }
341 | 
342 |   // If we consume the entire string, that's an error.
343 | 
344 | error:
345 |   return -1;
346 | }
347 | 
348 | size_t tns_outbuf_size(tns_outbuf *outbuf)
349 | {
350 |   return outbuf->alloc_size - (outbuf->head - outbuf->buffer);
351 | }
352 | 
353 | 
354 | static INLINE int tns_outbuf_itoa(tns_outbuf *outbuf, size_t n)
355 | {
356 |   do {
357 |       check(tns_outbuf_putc(outbuf, n%10+'0') != -1,
358 |             "Failed to write int to tnetstring buffer.");
359 |       n = n / 10;
360 |   } while(n > 0);
361 | 
362 |   return 0;
363 | 
364 | error:
365 |   return -1;
366 | }
367 | 
368 | 
369 | int tns_outbuf_init(tns_outbuf *outbuf)
370 | {
371 |   outbuf->buffer = malloc(64);
372 |   check_mem(outbuf->buffer);
373 | 
374 |   outbuf->head = outbuf->buffer + 64;
375 |   outbuf->alloc_size = 64;
376 |   return 0;
377 | 
378 | error:
379 |   outbuf->head = NULL;
380 |   outbuf->alloc_size = 0;
381 |   return -1;
382 | }
383 | 
384 | 
385 | static INLINE void tns_outbuf_free(tns_outbuf *outbuf)
386 | {
387 |   if(outbuf) {
388 |       free(outbuf->buffer);
389 |       outbuf->buffer = NULL;
390 |       outbuf->head = 0;
391 |       outbuf->alloc_size = 0;
392 |   }
393 | }
394 | 
395 | 
396 | static INLINE int tns_outbuf_extend(tns_outbuf *outbuf, size_t free_size)
397 | {
398 |   char *new_buf = NULL;
399 |   char *new_head = NULL;
400 |   size_t new_size = outbuf->alloc_size * 2;
401 |   size_t used_size;
402 | 
403 |   used_size = tns_outbuf_size(outbuf);
404 | 
405 |   while(new_size < free_size + used_size) {
406 |       new_size = new_size * 2;
407 |   }
408 | 
409 |   new_buf = malloc(new_size);
410 |   check_mem(new_buf);
411 |  
412 |   new_head = new_buf + new_size - used_size;
413 |   memmove(new_head, outbuf->head, used_size);
414 | 
415 |   free(outbuf->buffer);
416 |   outbuf->buffer = new_buf;
417 |   outbuf->head = new_head;
418 |   outbuf->alloc_size = new_size;
419 | 
420 |   return 0;
421 | 
422 | error:
423 |   return -1;
424 | }
425 | 
426 | 
427 | int tns_outbuf_putc(tns_outbuf *outbuf, char c)
428 | {
429 |   if(outbuf->buffer == outbuf->head) {
430 |       check(tns_outbuf_extend(outbuf, 1) != -1, "Failed to extend buffer");
431 |   }
432 | 
433 |   *(--outbuf->head) = c;
434 | 
435 |   return 0;
436 | 
437 | error:
438 |   return -1;
439 | }
440 | 
441 | 
442 | int tns_outbuf_puts(tns_outbuf *outbuf, const char *data, size_t len)
443 | {
444 |   if(outbuf->head - outbuf->buffer < len) {
445 |       check(tns_outbuf_extend(outbuf, len) != -1, "Failed to extend buffer");
446 |   }
447 | 
448 |   outbuf->head -= len;
449 |   memmove(outbuf->head, data, len);
450 | 
451 |   return 0;
452 | 
453 | error:
454 |   return -1;
455 | }
456 | 
457 | 
458 | static char* tns_outbuf_finalize(tns_outbuf *outbuf, size_t *len)
459 | {
460 |   char *new_buf = NULL;
461 |   size_t used_size;
462 | 
463 |   used_size = tns_outbuf_size(outbuf);
464 | 
465 |   memmove(outbuf->buffer, outbuf->head, used_size);
466 | 
467 |   if(len != NULL) {
468 |       *len = used_size;
469 |   } else {
470 |       if(outbuf->head == outbuf->buffer) {
471 |           new_buf = realloc(outbuf->buffer, outbuf->alloc_size*2);
472 |           check_mem(new_buf);
473 |           outbuf->buffer = new_buf;
474 |           outbuf->alloc_size = outbuf->alloc_size * 2;
475 |       }
476 |       outbuf->buffer[used_size] = '\0';
477 |   }
478 | 
479 |   return outbuf->buffer;
480 | 
481 | error:
482 |   free(outbuf->buffer);
483 |   outbuf->buffer = NULL;
484 |   outbuf->alloc_size = 0;
485 |   return NULL;
486 | }
487 | 
488 | 
489 | static INLINE int tns_outbuf_clamp(tns_outbuf *outbuf, size_t orig_size)
490 | {
491 |     size_t datalen = tns_outbuf_size(outbuf) - orig_size;
492 | 
493 |     check(tns_outbuf_putc(outbuf, ':') != -1, "Failed to clamp outbuf");
494 |     check(tns_outbuf_itoa(outbuf, datalen) != -1, "Failed to clamp outbuf");
495 | 
496 |     return 0;
497 | 
498 | error:
499 |     return -1;
500 | }
501 | 
502 | 
503 | void tns_outbuf_memmove(tns_outbuf *outbuf, char *dest)
504 | {
505 |   memmove(dest, outbuf->head, tns_outbuf_size(outbuf));
506 | }
507 | 
508 | 


--------------------------------------------------------------------------------
/tnetstring/tns_core.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  tns_core.h:  core code for a tnetstring parser in C
  3 | //
  4 | //  This is code for parsing and rendering data in the provisional
  5 | //  typed-netstring format proposed for inclusion in Mongrel2.  You can
  6 | //  think of it like a JSON library that uses a simpler wire format.
  7 | //
  8 | 
  9 | #ifndef _tns_core_h
 10 | #define _tns_core_h
 11 | 
 12 | #include <stdlib.h>
 13 | #include <stddef.h>
 14 | #include <ctype.h>
 15 | 
 16 | #ifdef _MSC_VER
 17 |   #define INLINE __forceinline /* use __forceinline (VC++ specific) */
 18 | #else
 19 |   #define INLINE inline        /* use standard inline */
 20 | #endif
 21 | 
 22 | 
 23 | //  tnetstring rendering is done using an "outbuf" struct, which combines
 24 | //  a malloced string with its allocation information.  Rendering is done
 25 | //  from back to front; the details are deliberately hidden here since
 26 | //  I'm experimenting with multiple implementations and it might change.
 27 | struct tns_outbuf_s;
 28 | typedef struct tns_outbuf_s tns_outbuf;
 29 | 
 30 | //  This enumeration gives the type tag for each data type in the
 31 | //  tnetstring encoding.
 32 | typedef enum tns_type_tag_e {
 33 |     tns_tag_string = ',',
 34 |     tns_tag_integer = '#',
 35 |     tns_tag_float = '^',
 36 |     tns_tag_bool = '!',
 37 |     tns_tag_null = '~',
 38 |     tns_tag_dict = '}',
 39 |     tns_tag_list = ']',
 40 | } tns_type_tag;
 41 | 
 42 | 
 43 | //  To convert between tnetstrings and the data structures of your application
 44 | //  you provide the following struct filled with function pointers.  They
 45 | //  will be called by the core parser/renderer as necessary.
 46 | //
 47 | //  Each callback is called with the containing struct as its first argument,
 48 | //  to allow a primitive type of closure.
 49 | 
 50 | struct tns_ops_s;
 51 | typedef struct tns_ops_s tns_ops;
 52 | 
 53 | struct tns_ops_s {
 54 | 
 55 |   //  Get the type of a data object.
 56 |   tns_type_tag (*get_type)(const tns_ops *ops, void *val);
 57 | 
 58 |   //  Parse various types of object from a string.
 59 |   void* (*parse_string)(const tns_ops *ops, const char *data, size_t len);
 60 |   void* (*parse_integer)(const tns_ops *ops, const char *data, size_t len);
 61 |   void* (*parse_float)(const tns_ops * ops, const char *data, size_t len);
 62 | 
 63 |   //  Constructors for constant primitive datatypes.
 64 |   void* (*get_null)(const tns_ops *ops);
 65 |   void* (*get_true)(const tns_ops *ops);
 66 |   void* (*get_false)(const tns_ops *ops);
 67 | 
 68 |   //  Render various types of object into a tns_outbuf.
 69 |   int (*render_string)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
 70 |   int (*render_integer)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
 71 |   int (*render_float)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
 72 |   int (*render_bool)(const tns_ops *ops, void *val, tns_outbuf *outbuf);
 73 | 
 74 |   //  Functions for building and rendering list values.
 75 |   //  Remember that rendering is done from back to front, so
 76 |   //  you must write the last list element first.
 77 |   void* (*new_list)(const tns_ops *ops);
 78 |   int (*add_to_list)(const tns_ops *ops, void* list, void* item);
 79 |   int (*render_list)(const tns_ops *ops, void* list, tns_outbuf *outbuf);
 80 | 
 81 |   //  Functions for building and rendering dict values
 82 |   //  Remember that rendering is done from back to front, so
 83 |   //  you must write each value first, follow by its key.
 84 |   void* (*new_dict)(const tns_ops *ops);
 85 |   int (*add_to_dict)(const tns_ops *ops, void* dict, void* key, void* item);
 86 |   int (*render_dict)(const tns_ops *ops, void* dict, tns_outbuf *outbuf);
 87 | 
 88 |   //  Free values that are no longer in use
 89 |   void (*free_value)(const tns_ops *ops, void *value);
 90 | 
 91 | };
 92 | 
 93 | 
 94 | //  Parse an object off the front of a tnetstring.
 95 | //  Returns a pointer to the parsed object, or NULL if an error occurs.
 96 | //  The third argument is an output parameter; if non-NULL it will
 97 | //  receive the unparsed remainder of the string.
 98 | extern void* tns_parse(const tns_ops *ops, const char *data, size_t len, char** remain);
 99 | 
100 | //  If you need to read the length prefix yourself, e.g. because you're
101 | //  reading data off a socket, you can use this function to get just
102 | //  the payload parsing logic.
103 | extern void* tns_parse_payload(const tns_ops *ops, tns_type_tag type, const char *data, size_t len);
104 | 
105 | //  Render an object into a string.
106 | //  On success this function returns a malloced string containing
107 | //  the serialization of the given object.  The second argument
108 | //  'len' is an output parameter that will receive the number of bytes in
109 | //  the string; if NULL then the string will be null-terminated.
110 | //  The caller is responsible for freeing the returned string.
111 | //  On failure this function returns NULL and 'len' is unmodified.
112 | extern char* tns_render(const tns_ops *ops, void *val, size_t *len);
113 | 
114 | //  If you need to copy the final result off somewhere else, you 
115 | //  might like to build your own rendering function from the following.
116 | //  It will avoid some double-copying that tns_render does internally.
117 | //  Basic plan: Initialize an outbuf, pass it to tns_render_value, then
118 | //  copy the bytes away using tns_outbuf_memmove.
119 | extern int tns_render_value(const tns_ops *ops, void *val, tns_outbuf *outbuf);
120 | extern int tns_outbuf_init(tns_outbuf *outbuf);
121 | extern void tns_outbuf_memmove(tns_outbuf *outbuf, char *dest);
122 | 
123 | //  Use these functions for rendering into an outbuf.
124 | extern size_t tns_outbuf_size(tns_outbuf *outbuf);
125 | extern int tns_outbuf_putc(tns_outbuf *outbuf, char c);
126 | extern int tns_outbuf_puts(tns_outbuf *outbuf, const char *data, size_t len);
127 | 
128 | #endif
129 | 


--------------------------------------------------------------------------------
/tools/shootout.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | import random
  4 | 
  5 | import cjson
  6 | import ujson
  7 | import tnetstring
  8 | import marshal
  9 | 
 10 | from tnetstring.tests.test_format import FORMAT_EXAMPLES, get_random_object
 11 | 
 12 | TESTS = []
 13 | def add_test(v):
 14 |     #  These modules have a few round-tripping problems...
 15 |     try:
 16 |         assert cjson.decode(cjson.encode(v)) == v
 17 |         assert ujson.loads(ujson.dumps(v)) == v
 18 |     except Exception:
 19 |         pass
 20 |     else:
 21 |         TESTS.append((v,tnetstring.dumps(v),cjson.encode(v),marshal.dumps(v)))
 22 | 
 23 | #  Test it on all our format examples.
 24 | for (k,v) in FORMAT_EXAMPLES.iteritems():
 25 |     add_test(v)
 26 | 
 27 | #  And on some randomly-generated objects.
 28 | #  Use a fixed random seed for consistency.
 29 | r = random.Random(7)
 30 | for _ in xrange(20):
 31 |     v = get_random_object(r)
 32 |     add_test(v)
 33 | 
 34 | 
 35 | TEST_DUMP_ONLY = False
 36 | TEST_LOAD_ONLY = False
 37 | if len(sys.argv) >1 :
 38 |     if sys.argv[1] == "dumps":
 39 |         TEST_DUMP_ONLY = True
 40 |     elif sys.argv[1] == "loads":
 41 |         TEST_LOAD_ONLY = True
 42 |     elif sys.argv[1] == "roundtrip":
 43 |         pass
 44 |     else:
 45 |         raise ValueError("unknown test type: " + sys.argv[1])
 46 | 
 47 | 
 48 | def thrash_tnetstring():
 49 |     for obj, tns, json, msh in TESTS:
 50 |         if TEST_DUMP_ONLY:
 51 |             tnetstring.dumps(obj)
 52 |         elif TEST_LOAD_ONLY:
 53 |             assert tnetstring.loads(tns) == obj
 54 |         else:
 55 |             assert tnetstring.loads(tnetstring.dumps(obj)) == obj
 56 | 
 57 | def thrash_cjson():
 58 |     for obj, tns, json, msh in TESTS:
 59 |         if TEST_DUMP_ONLY:
 60 |             cjson.encode(obj)
 61 |         elif TEST_LOAD_ONLY:
 62 |             assert cjson.decode(json) == obj
 63 |         else:
 64 |             assert cjson.decode(cjson.encode(obj)) == obj
 65 | 
 66 | def thrash_ujson():
 67 |     for obj, tns, json, msh in TESTS:
 68 |         if TEST_DUMP_ONLY:
 69 |             ujson.dumps(obj)
 70 |         elif TEST_LOAD_ONLY:
 71 |             assert ujson.loads(json) == obj
 72 |         else:
 73 |             assert ujson.loads(ujson.dumps(obj)) == obj
 74 | 
 75 | def thrash_marshal():
 76 |     for obj, tns, json, msh in TESTS:
 77 |         if TEST_DUMP_ONLY:
 78 |             marshal.dumps(obj)
 79 |         elif TEST_LOAD_ONLY:
 80 |             assert marshal.loads(msh) == obj
 81 |         else:
 82 |             assert marshal.loads(marshal.dumps(obj)) == obj
 83 | 
 84 | 
 85 | if __name__ == "__main__":
 86 |     import timeit
 87 |     t1 = timeit.Timer("thrash_tnetstring()",
 88 |                       "from shootout import thrash_tnetstring")
 89 |     t1 = min(t1.repeat(number=10000))
 90 |     print "tnetstring", t1
 91 | 
 92 |     t2 = timeit.Timer("thrash_cjson()",
 93 |                       "from shootout import thrash_cjson")
 94 |     t2 = min(t2.repeat(number=10000))
 95 |     print "cjson:", t2
 96 |     print "speedup: ", round((t2 - t1) / (t2) * 100,2), "%"
 97 | 
 98 |     t3 = timeit.Timer("thrash_ujson()",
 99 |                       "from shootout import thrash_ujson")
100 |     t3 = min(t3.repeat(number=10000))
101 |     print "ujson:", t3
102 |     print "speedup: ", round((t3 - t1) / (t3) * 100,2), "%"
103 | 
104 |     t4 = timeit.Timer("thrash_marshal()",
105 |                       "from shootout import thrash_marshal")
106 |     t4 = min(t4.repeat(number=10000))
107 |     print "marshal:", t4
108 |     print "speedup: ", round((t4 - t1) / (t4) * 100,2), "%"
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------