├── .gitignore ├── .travis.yml ├── README ├── setup.py ├── tests.py └── son.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyo 3 | *.so 4 | *.egg 5 | *.egg-info 6 | build/ 7 | dist/ 8 | venv/ 9 | .idea/ 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.3 4 | - 3.4 5 | install: 6 | - pip install pytest 7 | - python setup.py install 8 | script: 9 | - py.test tests.py 10 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | SON (Simple Object Notation) data interchange format. 2 | 3 | Simple data format similar to JSON, but with some minor changes: 4 | - comments starts with # sign and ends with newline (\n) 5 | - comma after a key-value pair is optional 6 | - comma after an array element is optional 7 | 8 | JSON is compatible with SON in a sense that 9 | JSON data is also SON data, but not vise versa. 10 | 11 | SON data example: 12 | 13 | { 14 | # Personal information 15 | 16 | "name": "Alexander Grothendieck" 17 | "fields": "mathematics" 18 | "main_topics": [ 19 | "Etale cohomology" 20 | "Motives" 21 | "Topos theory" 22 | "Schemes" 23 | ] 24 | } 25 | 26 | License: Public Domain. 27 | NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. 28 | 29 | Original JSON data format was specified by Douglas Crockford. 30 | 31 | 32 | Links: 33 | https://github.com/aleksandergurin/simple-object-notation 34 | http://json.org 35 | https://github.com/douglascrockford/JSON-js 36 | https://tools.ietf.org/html/rfc7159 37 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | 6 | def long_description(): 7 | with open('README') as f: 8 | return f.read() 9 | 10 | setup( 11 | name='son', 12 | version='0.1', 13 | description='(SON) Simple Object Notation data interchange format.', 14 | long_description=long_description(), 15 | author='Aleksander Gurin', 16 | author_email='alek.gurin@gmail.com', 17 | url='https://github.com/aleksandergurin/simple-object-notation', 18 | py_modules=['son'], 19 | license='Public Domain', 20 | keywords='simple object notation, data format, serialization, deserialization', 21 | classifiers=[ 22 | 'Development Status :: 1 - Planning', 23 | 'Intended Audience :: Developers', 24 | 'Natural Language :: English', 25 | 'License :: Public Domain', 26 | 'Programming Language :: Python', 27 | 'Programming Language :: Python :: 3.3', 28 | 'Programming Language :: Python :: 3.4', 29 | 'Programming Language :: Python :: Implementation :: CPython', 30 | 'Topic :: Software Development' 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | 2 | from son import loads, dumps 3 | 4 | 5 | # Serialization tests 6 | def test_null_serialize(): 7 | assert dumps(None) == 'null' 8 | 9 | 10 | def test_bool_serialize(): 11 | assert dumps(True) == 'true' 12 | assert dumps(False) == 'false' 13 | 14 | 15 | def test_number_serialize(): 16 | assert dumps(123) == '123' 17 | assert dumps(-987) == '-987' 18 | assert dumps(0.1) == '0.1' 19 | assert dumps(0.00005) == '5e-05' 20 | assert dumps(-123e3) == '-123000.0' 21 | 22 | 23 | def test_string_serialize(): 24 | s = r'"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t' \ 25 | r'\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014' \ 26 | r'\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"' 27 | 28 | assert dumps('\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009' 29 | '\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014' 30 | '\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f') == s 31 | assert dumps('abcdefg') == '"abcdefg"' 32 | assert dumps('\u0391\u0392\u0393\u0394') == r'"\u0391\u0392\u0393\u0394"' 33 | assert dumps('\u0410\u0411\u0412\u0413') == r'"\u0410\u0411\u0412\u0413"' 34 | assert dumps('\u05d0\u05d1\u05d2\u05d3') == r'"\u05d0\u05d1\u05d2\u05d3"' 35 | 36 | 37 | def test_array_serialize(): 38 | assert dumps([1, 2, 3]) == '[1 2 3]' 39 | assert dumps([1, 2, 3], json_compatibility=True) == '[1, 2, 3]' 40 | assert dumps([[[[[[[[[["one", "two", "three"]]]]]]]]]]) == '[[[[[[[[[["one" "two" "three"]]]]]]]]]]' 41 | assert dumps([[1, 2], ["one", "two"]]) == '[[1 2] ["one" "two"]]' 42 | assert dumps([[1, 2], ["one", "two"]], json_compatibility=True) == '[[1, 2], ["one", "two"]]' 43 | 44 | 45 | def test_object_serialize(): 46 | assert dumps({"a": 1, "b": 2, "c": 3}, sorted_keys=True) == '{"a": 1 "b": 2 "c": 3}' 47 | assert dumps({"a": 1, "b": 2, "c": 3}, sorted_keys=True, 48 | json_compatibility=True) == '{"a": 1, "b": 2, "c": 3}' 49 | assert dumps({"c": 12, "b": {"e": {}, "d": [{"f": 15}]}, "a": [[1, 2], [3, 4]]}, 50 | sorted_keys=True) == '{"a": [[1 2] [3 4]] "b": {"d": [{"f": 15}] "e": {}} "c": 12}' 51 | assert dumps({"c": 12, "b": {"e": {}, "d": [{"f": 15}]}, "a": [[1, 2], [3, 4]]}, sorted_keys=True, 52 | json_compatibility=True) == '{"a": [[1, 2], [3, 4]], "b": {"d": [{"f": 15}], "e": {}}, "c": 12}' 53 | 54 | 55 | # Deserialization tests 56 | # TODO: add tests which should fail 57 | def test_null_deserialize(): 58 | assert loads('null') is None 59 | assert loads(' null ') is None 60 | assert loads(' # comment\n null # comment') is None 61 | 62 | 63 | def test_bool_deserialize(): 64 | assert loads('true') 65 | assert loads(' true ') 66 | assert loads(' # comment\n true # comment') 67 | assert not loads('false') 68 | assert not loads(' false ') 69 | assert not loads(' # comment\n false # comment') 70 | 71 | 72 | def test_int_number_deserialize(): 73 | assert loads('5') == 5 74 | assert loads(' 5 ') == 5 75 | assert loads('-17') == -17 76 | assert loads(' -17 ') == -17 77 | assert loads(' # comment\n -17 # comment') == -17 78 | 79 | 80 | def test_float_number_deserialize(): 81 | assert loads('5.0') == 5.0 82 | assert loads(' 5.0 ') == 5.0 83 | assert loads('5e1') == 50.0 84 | assert loads(' -0.5e1 ') == -5.0 85 | assert loads(' # comment\n -0.5e1 # comment') == -5.0 86 | 87 | 88 | def test_string_deserialize(): 89 | assert loads('"Testing"') == 'Testing' 90 | assert loads(' "Testing" ') == 'Testing' 91 | assert loads(' # comment\n "Testing" # comment') == 'Testing' 92 | # the following assert test escape symbols "\" \\ \/ \b \f \n \r \t" 93 | assert loads(' # comment\n "\\" \\\\ \\/ \\b \\f \\n \\r \\t" # comment') == '" \\ / \b \f \n \r \t' 94 | 95 | 96 | def test_string_with_unicode_symbols_deserialize(): 97 | assert loads('"ΑΒΓΔ"') == 'ΑΒΓΔ' # Greek 98 | assert loads('"АБВГ"') == 'АБВГ' # Cyrillic 99 | assert loads('"אבגד"') == 'אבגד' # Hebrew 100 | 101 | assert loads('"\\u0391\\u0392\\u0393\\u0394"') == '\u0391\u0392\u0393\u0394' # Greek 102 | assert loads('"\\u0410\\u0411\\u0412\\u0413"') == '\u0410\u0411\u0412\u0413' # Cyrillic 103 | # with lowercase hexadecimal symbols 104 | assert loads('"\\u05d0\\u05d1\\u05d2\\u05d3"') == '\u05d0\u05d1\u05d2\u05d3' # Hebrew 105 | # with uppercase hexadecimal symbols 106 | assert loads('"\\u05D0\\u05D1\\u05D2\\u05D3"') == '\u05D0\u05D1\u05D2\u05D3' 107 | 108 | 109 | def test_array_with_comma_separator_deserialize(): 110 | assert loads(" [1,2,3] ") == [1, 2, 3] 111 | assert loads("[[[[[[[[[[1, 2, 3]]]]]]]]]]") == [[[[[[[[[[1, 2, 3]]]]]]]]]] 112 | assert loads('["string", 17, null, [true, false]]') == ["string", 17, None, [True, False]] 113 | assert loads(' # comment\n [ # comment\n "string" , 17 , # comment\n' 114 | 'null , \n[\n true,\nfalse\n]\n]') == ["string", 17, None, [True, False]] 115 | 116 | 117 | def test_array_without_comma_separator_deserialize(): 118 | assert loads(" [1 2 3] ") == [1, 2, 3] 119 | assert loads('["string" 17 null [true false]]') == ["string", 17, None, [True, False]] 120 | assert loads(' # comment\n [ # comment\n"string"\n17\n# comment\n' 121 | 'null\n[\ntrue\nfalse\n]\n ] ') == ["string", 17, None, [True, False]] 122 | 123 | 124 | def test_object_with_comma_separator_deserialize(): 125 | obj = loads(' # comment\n{\n "one": 1, # comment\n "two": { "inner": [1, 2, 3] } # comment\n} ') 126 | assert obj.get('one') == 1 127 | assert obj.get('two').get("inner") == [1, 2, 3] 128 | assert len(obj.keys()) == 2 129 | 130 | 131 | def test_object_without_comma_separator_deserialize(): 132 | obj = loads(' # comment\n{\n "one": 1 # comment\n "two": [1 2 3] "three": false # comment\n} ') 133 | assert obj.get('one') == 1 134 | assert obj.get('two') == [1, 2, 3] 135 | assert not obj.get('three') 136 | assert len(obj.keys()) == 3 137 | -------------------------------------------------------------------------------- /son.py: -------------------------------------------------------------------------------- 1 | # Public Domain. 2 | # NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. 3 | # Original JSON data format was specified by Douglas Crockford. 4 | # Links: 5 | # https://github.com/aleksandergurin/simple-object-notation 6 | # http://json.org 7 | # https://github.com/douglascrockford/JSON-js 8 | # https://tools.ietf.org/html/rfc7159 9 | 10 | 11 | """SON (Simple Object Notation) data interchange format. 12 | 13 | Simple data format similar to JSON, but with some minor changes: 14 | - comments starts with # sign and ends with newline (\n) 15 | - comma after a key-value pair is optional 16 | - comma after an array element is optional 17 | 18 | JSON is compatible with SON in a sense that 19 | JSON data is also SON data, but not vise versa. 20 | 21 | SON data example: 22 | 23 | { 24 | # Personal information 25 | 26 | "name": "Alexander Grothendieck" 27 | "fields": "mathematics" 28 | "main_topics": ["Etale cohomology" "Motives" "Topos theory" "Schemes"] 29 | } 30 | 31 | """ 32 | 33 | import re 34 | import io 35 | import math 36 | 37 | 38 | class Consts: 39 | number_re = re.compile(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?') 40 | 41 | escape_deserialize = { 42 | '"': '"', 43 | '\\': '\\', 44 | '/': '/', 45 | 'b': '\b', 46 | 'f': '\f', 47 | 'n': '\n', 48 | 'r': '\r', 49 | 't': '\t' 50 | } 51 | 52 | escape_serialize = { 53 | '"': '\\"', 54 | '\\': '\\\\', 55 | '/': '\\/', 56 | '\b': '\\b', 57 | '\f': '\\f', 58 | '\n': '\\n', 59 | '\r': '\\r', 60 | '\t': '\\t' 61 | } 62 | 63 | 64 | def dumps(obj, sorted_keys=False, json_compatibility=False): 65 | """Serialize an input Python object into SON data. 66 | Note: object could not have a circular references 67 | (in case when circular reference found ValueError 68 | will be raised). 69 | 70 | :param obj: an input Python object 71 | :return: string containing serialized SON data 72 | """ 73 | 74 | circular_refs = {} 75 | buf = io.StringIO() 76 | 77 | def string_val(s): 78 | buf.write('"') 79 | for c in s: 80 | if Consts.escape_serialize.get(c): 81 | buf.write(Consts.escape_serialize.get(c)) 82 | elif 0x1f < ord(c) < 0x7f: 83 | buf.write(c) 84 | else: 85 | buf.write('\\u{:04x}'.format(ord(c))) 86 | buf.write('"') 87 | 88 | def array_val(a): 89 | buf.write('[') 90 | if len(a) != 0: 91 | value(a[0]) 92 | for i in range(1, len(a)): 93 | if json_compatibility: 94 | buf.write(',') 95 | buf.write(' ') 96 | value(a[i]) 97 | buf.write(']') 98 | 99 | def object_val(o): 100 | buf.write('{') 101 | 102 | if len(o) != 0: 103 | if sorted_keys: 104 | keys = sorted(o) 105 | else: 106 | keys = list(o.keys()) 107 | string_val(str(keys[0])) 108 | buf.write(': ') 109 | value(o.get(keys[0])) 110 | for i in range(1, len(o)): 111 | if json_compatibility: 112 | buf.write(',') 113 | buf.write(' ') 114 | string_val(str(keys[i])) 115 | buf.write(': ') 116 | value(o.get(keys[i])) 117 | buf.write('}') 118 | 119 | def value(x): 120 | if isinstance(x, bool): 121 | if x: 122 | buf.write('true') 123 | else: 124 | buf.write('false') 125 | elif x is None: 126 | buf.write('null') 127 | elif isinstance(x, int): 128 | # We have to create an int instance because 129 | # subclass of int could override __repr__ method 130 | buf.write(repr(int(x))) 131 | elif isinstance(x, float): 132 | if math.isinf(x) or math.isnan(x): 133 | raise ValueError("Out of range float value {}".format(x)) 134 | # We have to create an float instance because 135 | # subclass of float could override __repr__ method 136 | buf.write(repr(float(x))) 137 | elif isinstance(x, str): 138 | string_val(x) 139 | elif isinstance(x, dict): 140 | if id(x) in circular_refs: 141 | raise ValueError("Circular reference detected") 142 | else: 143 | circular_refs[id(x)] = x 144 | object_val(x) 145 | elif isinstance(x, list) or isinstance(x, tuple): 146 | if id(x) in circular_refs: 147 | raise ValueError("Circular reference detected") 148 | else: 149 | circular_refs[id(x)] = x 150 | array_val(x) 151 | else: 152 | raise TypeError("{} is not serializable".format(str(x))) 153 | 154 | value(obj) 155 | 156 | return buf.getvalue() 157 | 158 | 159 | def loads(input_str): 160 | """Deserialize input SON data to a Python object. 161 | 162 | :param input_str: string containing SON data 163 | :return: python object (dict, list, str, bool or None) 164 | """ 165 | if not isinstance(input_str, str): 166 | raise TypeError("Input object must be a string") 167 | 168 | it = enumerate(input_str) 169 | ch, at, line, column = " ", 0, 0, 0 170 | 171 | def next_ch(): 172 | nonlocal ch, at, line, column 173 | try: 174 | at, ch = next(it) 175 | if ch == '\n': 176 | line += 1 177 | column = 0 178 | else: 179 | column += 1 180 | except StopIteration: 181 | ch = None 182 | 183 | def skip_off(n): 184 | for i in range(n): 185 | next_ch() 186 | 187 | def error(message): 188 | # we use 0-based indexes, so we need to add 1 to 'line' and 'at' 189 | raise ValueError("{}: line {}, column {}".format(message, line + 1, column + 1)) 190 | 191 | def literal_val(): 192 | if input_str[at:at + 4] == "null": 193 | skip_off(4) 194 | return None 195 | elif input_str[at:at + 4] == "true": 196 | skip_off(4) 197 | return True 198 | elif input_str[at:at + 5] == "false": 199 | skip_off(5) 200 | return False 201 | else: 202 | error("Unexpected value") 203 | 204 | def number_val(): 205 | m = Consts.number_re.match(input_str, at) 206 | if m is not None: 207 | integer, fraction, exp = m.groups() 208 | if fraction or exp: 209 | s = integer + (fraction or '') + (exp or '') 210 | skip_off(len(s)) 211 | res = float(s) 212 | else: 213 | s = integer 214 | skip_off(len(s)) 215 | res = int(s) 216 | return res 217 | else: 218 | error("Bad number") 219 | 220 | def string_val(): 221 | if ch != '"': 222 | error("Expecting '\"'") 223 | next_ch() 224 | 225 | res = io.StringIO() 226 | while ch: 227 | if ch == '\"': 228 | next_ch() 229 | return res.getvalue() 230 | if ch == '\\': 231 | next_ch() 232 | if not ch: 233 | break 234 | esc_ch = Consts.escape_deserialize.get(ch) 235 | if esc_ch: 236 | res.write(esc_ch) 237 | next_ch() 238 | elif ch == 'u': 239 | next_ch() 240 | if not ch: 241 | break 242 | try: 243 | xxxx = input_str[at:at + 4] 244 | if len(xxxx) != 4: 245 | error("Invalid \\uXXXX escape") 246 | skip_off(4) 247 | res.write(chr(int(xxxx, 16))) 248 | except ValueError: 249 | error("Invalid \\uXXXX escape") 250 | else: 251 | error("Invalid escape") 252 | else: 253 | res.write(ch) 254 | next_ch() 255 | error("Bad string") 256 | 257 | def array_val(): 258 | if ch != '[': 259 | error("Expecting '['") 260 | next_ch() 261 | skip_spaces_and_comments() 262 | 263 | res = [] 264 | 265 | if ch == ']': 266 | next_ch() 267 | return res 268 | 269 | while ch: 270 | # skip_spaces_and_comments() will be called 271 | # inside value() before and after actual token 272 | res.append(value()) 273 | if ch == ']': 274 | next_ch() 275 | return res 276 | if ch == ',': 277 | next_ch() 278 | # skip_spaces_and_comments() will be called 279 | # inside value() during the next loop iteration 280 | error("Bad array") 281 | 282 | def object_val(): 283 | if ch != '{': 284 | error("Expecting '{'") 285 | next_ch() 286 | skip_spaces_and_comments() 287 | 288 | res = {} 289 | 290 | if ch == '}': 291 | next_ch() 292 | return res 293 | 294 | while ch: 295 | if ch != '"': 296 | error("Expecting property name enclosed in '\"'") 297 | key = string_val() 298 | skip_spaces_and_comments() 299 | if ch != ':': 300 | error("Expecting ':' delimiter") 301 | # skip ':' 302 | next_ch() 303 | # skip_spaces_and_comments() will be called 304 | # inside value() before and after actual token 305 | res[key] = value() 306 | if ch == '}': 307 | next_ch() 308 | return res 309 | if ch == ',': 310 | next_ch() 311 | skip_spaces_and_comments() 312 | error("Bad object") 313 | 314 | def value(): 315 | skip_spaces_and_comments() 316 | 317 | if ch == '{': 318 | res = object_val() 319 | elif ch == '[': 320 | res = array_val() 321 | elif ch == '"': 322 | res = string_val() 323 | elif ('0' <= ch <= '9') or ch == '-': 324 | res = number_val() 325 | else: 326 | res = literal_val() 327 | 328 | skip_spaces_and_comments() 329 | 330 | return res 331 | 332 | def skip_spaces_and_comments(): 333 | while ch: 334 | if ch.isspace(): 335 | next_ch() 336 | elif ch == '#': 337 | while ch and ch != '\n': 338 | next_ch() 339 | else: 340 | break 341 | 342 | result = value() 343 | 344 | if ch: 345 | error("Extra data") 346 | 347 | return result 348 | 349 | 350 | encode = dumps 351 | decode = loads 352 | --------------------------------------------------------------------------------