├── .gitignore
├── .travis.yml
├── README
├── setup.py
├── tests.py
└── son.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.pyo
 3 | *.so
 4 | *.egg
 5 | *.egg-info
 6 | build/
 7 | dist/
 8 | venv/
 9 | .idea/
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - 3.3
 4 |   - 3.4
 5 | install:
 6 |   - pip install pytest
 7 |   - python setup.py install
 8 | script:
 9 |   - py.test tests.py
10 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | SON (Simple Object Notation) data interchange format.
 2 | 
 3 | Simple data format similar to JSON, but with some minor changes:
 4 |     - comments starts with # sign and ends with newline (\n)
 5 |     - comma after a key-value pair is optional
 6 |     - comma after an array element is optional
 7 | 
 8 | JSON is compatible with SON in a sense that
 9 | JSON data is also SON data, but not vise versa.
10 | 
11 | SON data example:
12 | 
13 |     {
14 |         # Personal information
15 |     
16 |         "name": "Alexander Grothendieck"
17 |         "fields": "mathematics"
18 |         "main_topics": [
19 |             "Etale cohomology"
20 |             "Motives"
21 |             "Topos theory"
22 |             "Schemes"
23 |         ]
24 |     }
25 | 
26 | License: Public Domain.
27 | NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
28 | 
29 | Original JSON data format was specified by Douglas Crockford.
30 | 
31 | 
32 | Links:
33 | https://github.com/aleksandergurin/simple-object-notation
34 | http://json.org
35 | https://github.com/douglascrockford/JSON-js
36 | https://tools.ietf.org/html/rfc7159
37 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | 
 6 | def long_description():
 7 |     with open('README') as f:
 8 |         return f.read()
 9 | 
10 | setup(
11 |     name='son',
12 |     version='0.1',
13 |     description='(SON) Simple Object Notation data interchange format.',
14 |     long_description=long_description(),
15 |     author='Aleksander Gurin',
16 |     author_email='alek.gurin@gmail.com',
17 |     url='https://github.com/aleksandergurin/simple-object-notation',
18 |     py_modules=['son'],
19 |     license='Public Domain',
20 |     keywords='simple object notation, data format, serialization, deserialization',
21 |     classifiers=[
22 |         'Development Status :: 1 - Planning',
23 |         'Intended Audience :: Developers',
24 |         'Natural Language :: English',
25 |         'License :: Public Domain',
26 |         'Programming Language :: Python',
27 |         'Programming Language :: Python :: 3.3',
28 |         'Programming Language :: Python :: 3.4',
29 |         'Programming Language :: Python :: Implementation :: CPython',
30 |         'Topic :: Software Development'
31 |     ]
32 | )
33 | 


--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from son import loads, dumps
  3 | 
  4 | 
  5 | # Serialization tests
  6 | def test_null_serialize():
  7 |     assert dumps(None) == 'null'
  8 | 
  9 | 
 10 | def test_bool_serialize():
 11 |     assert dumps(True) == 'true'
 12 |     assert dumps(False) == 'false'
 13 | 
 14 | 
 15 | def test_number_serialize():
 16 |     assert dumps(123) == '123'
 17 |     assert dumps(-987) == '-987'
 18 |     assert dumps(0.1) == '0.1'
 19 |     assert dumps(0.00005) == '5e-05'
 20 |     assert dumps(-123e3) == '-123000.0'
 21 | 
 22 | 
 23 | def test_string_serialize():
 24 |     s = r'"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t'        \
 25 |         r'\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014'       \
 26 |         r'\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"'
 27 | 
 28 |     assert dumps('\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009'
 29 |                  '\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014'
 30 |                  '\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f') == s
 31 |     assert dumps('abcdefg') == '"abcdefg"'
 32 |     assert dumps('\u0391\u0392\u0393\u0394') == r'"\u0391\u0392\u0393\u0394"'
 33 |     assert dumps('\u0410\u0411\u0412\u0413') == r'"\u0410\u0411\u0412\u0413"'
 34 |     assert dumps('\u05d0\u05d1\u05d2\u05d3') == r'"\u05d0\u05d1\u05d2\u05d3"'
 35 | 
 36 | 
 37 | def test_array_serialize():
 38 |     assert dumps([1, 2, 3]) == '[1 2 3]'
 39 |     assert dumps([1, 2, 3], json_compatibility=True) == '[1, 2, 3]'
 40 |     assert dumps([[[[[[[[[["one", "two", "three"]]]]]]]]]]) == '[[[[[[[[[["one" "two" "three"]]]]]]]]]]'
 41 |     assert dumps([[1, 2], ["one", "two"]]) == '[[1 2] ["one" "two"]]'
 42 |     assert dumps([[1, 2], ["one", "two"]], json_compatibility=True) == '[[1, 2], ["one", "two"]]'
 43 | 
 44 | 
 45 | def test_object_serialize():
 46 |     assert dumps({"a": 1, "b": 2, "c": 3}, sorted_keys=True) == '{"a": 1 "b": 2 "c": 3}'
 47 |     assert dumps({"a": 1, "b": 2, "c": 3}, sorted_keys=True,
 48 |                  json_compatibility=True) == '{"a": 1, "b": 2, "c": 3}'
 49 |     assert dumps({"c": 12, "b": {"e": {}, "d": [{"f": 15}]}, "a": [[1, 2], [3, 4]]},
 50 |                  sorted_keys=True) == '{"a": [[1 2] [3 4]] "b": {"d": [{"f": 15}] "e": {}} "c": 12}'
 51 |     assert dumps({"c": 12, "b": {"e": {}, "d": [{"f": 15}]}, "a": [[1, 2], [3, 4]]}, sorted_keys=True,
 52 |                  json_compatibility=True) == '{"a": [[1, 2], [3, 4]], "b": {"d": [{"f": 15}], "e": {}}, "c": 12}'
 53 | 
 54 | 
 55 | # Deserialization tests
 56 | # TODO: add tests which should fail
 57 | def test_null_deserialize():
 58 |     assert loads('null') is None
 59 |     assert loads('   null   ') is None
 60 |     assert loads(' # comment\n null # comment') is None
 61 | 
 62 | 
 63 | def test_bool_deserialize():
 64 |     assert loads('true')
 65 |     assert loads('   true   ')
 66 |     assert loads(' # comment\n true # comment')
 67 |     assert not loads('false')
 68 |     assert not loads('   false   ')
 69 |     assert not loads(' # comment\n false # comment')
 70 | 
 71 | 
 72 | def test_int_number_deserialize():
 73 |     assert loads('5') == 5
 74 |     assert loads(' 5 ') == 5
 75 |     assert loads('-17') == -17
 76 |     assert loads(' -17 ') == -17
 77 |     assert loads(' # comment\n  -17  # comment') == -17
 78 | 
 79 | 
 80 | def test_float_number_deserialize():
 81 |     assert loads('5.0') == 5.0
 82 |     assert loads('  5.0  ') == 5.0
 83 |     assert loads('5e1') == 50.0
 84 |     assert loads('  -0.5e1  ') == -5.0
 85 |     assert loads(' # comment\n  -0.5e1  # comment') == -5.0
 86 | 
 87 | 
 88 | def test_string_deserialize():
 89 |     assert loads('"Testing"') == 'Testing'
 90 |     assert loads('   "Testing"   ') == 'Testing'
 91 |     assert loads(' # comment\n "Testing" # comment') == 'Testing'
 92 |     # the following assert test escape symbols "\" \\ \/ \b \f \n \r \t"
 93 |     assert loads(' # comment\n "\\" \\\\ \\/ \\b \\f \\n \\r \\t" # comment') == '" \\ / \b \f \n \r \t'
 94 | 
 95 | 
 96 | def test_string_with_unicode_symbols_deserialize():
 97 |     assert loads('"ΑΒΓΔ"') == 'ΑΒΓΔ'    # Greek
 98 |     assert loads('"АБВГ"') == 'АБВГ'    # Cyrillic
 99 |     assert loads('"אבגד"') == 'אבגד'    # Hebrew
100 | 
101 |     assert loads('"\\u0391\\u0392\\u0393\\u0394"') == '\u0391\u0392\u0393\u0394'    # Greek
102 |     assert loads('"\\u0410\\u0411\\u0412\\u0413"') == '\u0410\u0411\u0412\u0413'    # Cyrillic
103 |     # with lowercase hexadecimal symbols
104 |     assert loads('"\\u05d0\\u05d1\\u05d2\\u05d3"') == '\u05d0\u05d1\u05d2\u05d3'    # Hebrew
105 |     # with uppercase hexadecimal symbols
106 |     assert loads('"\\u05D0\\u05D1\\u05D2\\u05D3"') == '\u05D0\u05D1\u05D2\u05D3'
107 | 
108 | 
109 | def test_array_with_comma_separator_deserialize():
110 |     assert loads(" [1,2,3] ") == [1, 2, 3]
111 |     assert loads("[[[[[[[[[[1, 2, 3]]]]]]]]]]") == [[[[[[[[[[1, 2, 3]]]]]]]]]]
112 |     assert loads('["string", 17, null, [true, false]]') == ["string", 17, None, [True, False]]
113 |     assert loads(' # comment\n [ # comment\n "string" , 17 , # comment\n'
114 |                  'null , \n[\n true,\nfalse\n]\n]') == ["string", 17, None, [True, False]]
115 | 
116 | 
117 | def test_array_without_comma_separator_deserialize():
118 |     assert loads(" [1 2 3] ") == [1, 2, 3]
119 |     assert loads('["string" 17 null [true false]]') == ["string", 17, None, [True, False]]
120 |     assert loads(' # comment\n [ # comment\n"string"\n17\n# comment\n'
121 |                  'null\n[\ntrue\nfalse\n]\n ] ') == ["string", 17, None, [True, False]]
122 | 
123 | 
124 | def test_object_with_comma_separator_deserialize():
125 |     obj = loads(' # comment\n{\n  "one": 1, # comment\n  "two": { "inner": [1, 2, 3] } # comment\n} ')
126 |     assert obj.get('one') == 1
127 |     assert obj.get('two').get("inner") == [1, 2, 3]
128 |     assert len(obj.keys()) == 2
129 | 
130 | 
131 | def test_object_without_comma_separator_deserialize():
132 |     obj = loads(' # comment\n{\n  "one": 1  # comment\n  "two": [1 2 3] "three": false  # comment\n} ')
133 |     assert obj.get('one') == 1
134 |     assert obj.get('two') == [1, 2, 3]
135 |     assert not obj.get('three')
136 |     assert len(obj.keys()) == 3
137 | 


--------------------------------------------------------------------------------
/son.py:
--------------------------------------------------------------------------------
  1 | # Public Domain.
  2 | # NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
  3 | # Original JSON data format was specified by Douglas Crockford.
  4 | # Links:
  5 | # https://github.com/aleksandergurin/simple-object-notation
  6 | # http://json.org
  7 | # https://github.com/douglascrockford/JSON-js
  8 | # https://tools.ietf.org/html/rfc7159
  9 | 
 10 | 
 11 | """SON (Simple Object Notation) data interchange format.
 12 | 
 13 | Simple data format similar to JSON, but with some minor changes:
 14 |     - comments starts with # sign and ends with newline (\n)
 15 |     - comma after a key-value pair is optional
 16 |     - comma after an array element is optional
 17 | 
 18 | JSON is compatible with SON in a sense that
 19 | JSON data is also SON data, but not vise versa.
 20 | 
 21 | SON data example:
 22 | 
 23 |     {
 24 |         # Personal information
 25 | 
 26 |         "name": "Alexander Grothendieck"
 27 |         "fields": "mathematics"
 28 |         "main_topics": ["Etale cohomology"  "Motives"  "Topos theory"  "Schemes"]
 29 |     }
 30 | 
 31 | """
 32 | 
 33 | import re
 34 | import io
 35 | import math
 36 | 
 37 | 
 38 | class Consts:
 39 |     number_re = re.compile(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')
 40 | 
 41 |     escape_deserialize = {
 42 |         '"': '"',
 43 |         '\\': '\\',
 44 |         '/': '/',
 45 |         'b': '\b',
 46 |         'f': '\f',
 47 |         'n': '\n',
 48 |         'r': '\r',
 49 |         't': '\t'
 50 |     }
 51 | 
 52 |     escape_serialize = {
 53 |         '"': '\\"',
 54 |         '\\': '\\\\',
 55 |         '/': '\\/',
 56 |         '\b': '\\b',
 57 |         '\f': '\\f',
 58 |         '\n': '\\n',
 59 |         '\r': '\\r',
 60 |         '\t': '\\t'
 61 |     }
 62 | 
 63 | 
 64 | def dumps(obj, sorted_keys=False, json_compatibility=False):
 65 |     """Serialize an input Python object into SON data.
 66 |     Note: object could not have a circular references
 67 |     (in case when circular reference found ValueError
 68 |     will be raised).
 69 | 
 70 |     :param obj: an input Python object
 71 |     :return: string containing serialized SON data
 72 |     """
 73 | 
 74 |     circular_refs = {}
 75 |     buf = io.StringIO()
 76 | 
 77 |     def string_val(s):
 78 |         buf.write('"')
 79 |         for c in s:
 80 |             if Consts.escape_serialize.get(c):
 81 |                 buf.write(Consts.escape_serialize.get(c))
 82 |             elif 0x1f < ord(c) < 0x7f:
 83 |                 buf.write(c)
 84 |             else:
 85 |                 buf.write('\\u{:04x}'.format(ord(c)))
 86 |         buf.write('"')
 87 | 
 88 |     def array_val(a):
 89 |         buf.write('[')
 90 |         if len(a) != 0:
 91 |             value(a[0])
 92 |             for i in range(1, len(a)):
 93 |                 if json_compatibility:
 94 |                     buf.write(',')
 95 |                 buf.write(' ')
 96 |                 value(a[i])
 97 |         buf.write(']')
 98 | 
 99 |     def object_val(o):
100 |         buf.write('{')
101 | 
102 |         if len(o) != 0:
103 |             if sorted_keys:
104 |                 keys = sorted(o)
105 |             else:
106 |                 keys = list(o.keys())
107 |             string_val(str(keys[0]))
108 |             buf.write(': ')
109 |             value(o.get(keys[0]))
110 |             for i in range(1, len(o)):
111 |                 if json_compatibility:
112 |                     buf.write(',')
113 |                 buf.write(' ')
114 |                 string_val(str(keys[i]))
115 |                 buf.write(': ')
116 |                 value(o.get(keys[i]))
117 |         buf.write('}')
118 | 
119 |     def value(x):
120 |         if isinstance(x, bool):
121 |             if x:
122 |                 buf.write('true')
123 |             else:
124 |                 buf.write('false')
125 |         elif x is None:
126 |             buf.write('null')
127 |         elif isinstance(x, int):
128 |             # We have to create an int instance because
129 |             # subclass of int could override __repr__ method
130 |             buf.write(repr(int(x)))
131 |         elif isinstance(x, float):
132 |             if math.isinf(x) or math.isnan(x):
133 |                 raise ValueError("Out of range float value {}".format(x))
134 |             # We have to create an float instance because
135 |             # subclass of float could override __repr__ method
136 |             buf.write(repr(float(x)))
137 |         elif isinstance(x, str):
138 |             string_val(x)
139 |         elif isinstance(x, dict):
140 |             if id(x) in circular_refs:
141 |                 raise ValueError("Circular reference detected")
142 |             else:
143 |                 circular_refs[id(x)] = x
144 |                 object_val(x)
145 |         elif isinstance(x, list) or isinstance(x, tuple):
146 |             if id(x) in circular_refs:
147 |                 raise ValueError("Circular reference detected")
148 |             else:
149 |                 circular_refs[id(x)] = x
150 |                 array_val(x)
151 |         else:
152 |             raise TypeError("{} is not serializable".format(str(x)))
153 | 
154 |     value(obj)
155 | 
156 |     return buf.getvalue()
157 | 
158 | 
159 | def loads(input_str):
160 |     """Deserialize input SON data to a Python object.
161 | 
162 |     :param input_str: string containing SON data
163 |     :return: python object (dict, list, str, bool or None)
164 |     """
165 |     if not isinstance(input_str, str):
166 |         raise TypeError("Input object must be a string")
167 | 
168 |     it = enumerate(input_str)
169 |     ch, at, line, column = " ", 0, 0, 0
170 | 
171 |     def next_ch():
172 |         nonlocal ch, at, line, column
173 |         try:
174 |             at, ch = next(it)
175 |             if ch == '\n':
176 |                 line += 1
177 |                 column = 0
178 |             else:
179 |                 column += 1
180 |         except StopIteration:
181 |             ch = None
182 | 
183 |     def skip_off(n):
184 |         for i in range(n):
185 |             next_ch()
186 | 
187 |     def error(message):
188 |         # we use 0-based indexes, so we need to add 1 to 'line' and 'at'
189 |         raise ValueError("{}: line {}, column {}".format(message, line + 1, column + 1))
190 | 
191 |     def literal_val():
192 |         if input_str[at:at + 4] == "null":
193 |             skip_off(4)
194 |             return None
195 |         elif input_str[at:at + 4] == "true":
196 |             skip_off(4)
197 |             return True
198 |         elif input_str[at:at + 5] == "false":
199 |             skip_off(5)
200 |             return False
201 |         else:
202 |             error("Unexpected value")
203 | 
204 |     def number_val():
205 |         m = Consts.number_re.match(input_str, at)
206 |         if m is not None:
207 |             integer, fraction, exp = m.groups()
208 |             if fraction or exp:
209 |                 s = integer + (fraction or '') + (exp or '')
210 |                 skip_off(len(s))
211 |                 res = float(s)
212 |             else:
213 |                 s = integer
214 |                 skip_off(len(s))
215 |                 res = int(s)
216 |             return res
217 |         else:
218 |             error("Bad number")
219 | 
220 |     def string_val():
221 |         if ch != '"':
222 |             error("Expecting '\"'")
223 |         next_ch()
224 | 
225 |         res = io.StringIO()
226 |         while ch:
227 |             if ch == '\"':
228 |                 next_ch()
229 |                 return res.getvalue()
230 |             if ch == '\\':
231 |                 next_ch()
232 |                 if not ch:
233 |                     break
234 |                 esc_ch = Consts.escape_deserialize.get(ch)
235 |                 if esc_ch:
236 |                     res.write(esc_ch)
237 |                     next_ch()
238 |                 elif ch == 'u':
239 |                     next_ch()
240 |                     if not ch:
241 |                         break
242 |                     try:
243 |                         xxxx = input_str[at:at + 4]
244 |                         if len(xxxx) != 4:
245 |                             error("Invalid \\uXXXX escape")
246 |                         skip_off(4)
247 |                         res.write(chr(int(xxxx, 16)))
248 |                     except ValueError:
249 |                         error("Invalid \\uXXXX escape")
250 |                 else:
251 |                     error("Invalid escape")
252 |             else:
253 |                 res.write(ch)
254 |                 next_ch()
255 |         error("Bad string")
256 | 
257 |     def array_val():
258 |         if ch != '[':
259 |             error("Expecting '['")
260 |         next_ch()
261 |         skip_spaces_and_comments()
262 | 
263 |         res = []
264 | 
265 |         if ch == ']':
266 |             next_ch()
267 |             return res
268 | 
269 |         while ch:
270 |             # skip_spaces_and_comments() will be called
271 |             # inside value() before and after actual token
272 |             res.append(value())
273 |             if ch == ']':
274 |                 next_ch()
275 |                 return res
276 |             if ch == ',':
277 |                 next_ch()
278 |                 # skip_spaces_and_comments() will be called
279 |                 # inside value() during the next loop iteration
280 |         error("Bad array")
281 | 
282 |     def object_val():
283 |         if ch != '{':
284 |             error("Expecting '{'")
285 |         next_ch()
286 |         skip_spaces_and_comments()
287 | 
288 |         res = {}
289 | 
290 |         if ch == '}':
291 |             next_ch()
292 |             return res
293 | 
294 |         while ch:
295 |             if ch != '"':
296 |                 error("Expecting property name enclosed in '\"'")
297 |             key = string_val()
298 |             skip_spaces_and_comments()
299 |             if ch != ':':
300 |                 error("Expecting ':' delimiter")
301 |             # skip ':'
302 |             next_ch()
303 |             # skip_spaces_and_comments() will be called
304 |             # inside value() before and after actual token
305 |             res[key] = value()
306 |             if ch == '}':
307 |                 next_ch()
308 |                 return res
309 |             if ch == ',':
310 |                 next_ch()
311 |                 skip_spaces_and_comments()
312 |         error("Bad object")
313 | 
314 |     def value():
315 |         skip_spaces_and_comments()
316 | 
317 |         if ch == '{':
318 |             res = object_val()
319 |         elif ch == '[':
320 |             res = array_val()
321 |         elif ch == '"':
322 |             res = string_val()
323 |         elif ('0' <= ch <= '9') or ch == '-':
324 |             res = number_val()
325 |         else:
326 |             res = literal_val()
327 | 
328 |         skip_spaces_and_comments()
329 | 
330 |         return res
331 | 
332 |     def skip_spaces_and_comments():
333 |         while ch:
334 |             if ch.isspace():
335 |                 next_ch()
336 |             elif ch == '#':
337 |                 while ch and ch != '\n':
338 |                     next_ch()
339 |             else:
340 |                 break
341 | 
342 |     result = value()
343 | 
344 |     if ch:
345 |         error("Extra data")
346 | 
347 |     return result
348 | 
349 | 
350 | encode = dumps
351 | decode = loads
352 | 


--------------------------------------------------------------------------------