├── LICENSE.md ├── README.md ├── build_tables.py ├── json.txt ├── printer.py └── verifier.py /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Henri Tuhola 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JSON Decoding Algorithm 2 | 3 | Everyone and their dog already has a json parsing and 4 | encoding library. So this module is more of a fun curiosity 5 | rather than a useful tool. 6 | 7 | Now even your pet rock is able to parse JSON. 8 | 9 | ## Parsing tables for JSON 10 | 11 | The code in the [build_tables.py](build_tables.py) 12 | constructs a parsing table that matches on the railroad 13 | diagrams at http://json.org/ 14 | 15 | There's a recognizer that can push and pop states into a stack. 16 | Every state transition is associated with action code that 17 | is separate from the recognizer and allows you to parse the 18 | contents of the input correctly. 19 | 20 | ## Tutorial for implementing a JSON decoder or parser 21 | 22 | Write the following program in your programming language 23 | of choice: 24 | 25 | def parse(input): 26 | stack = [] 27 | state = 0x00 28 | ds = [] # data stack 29 | ss = [] # string stack 30 | es = [] # escape stack 31 | for ch in input: 32 | cat = catcode[min(ord(ch), 0x7E)] 33 | state = parse_ch(cat, ch, stack, state, ds, ss, es) 34 | state = parse_ch(catcode[32], u'', stack, state, ds, ss, es) 35 | if state != 0x00: 36 | raise Exception("JSON decode error: truncated") 37 | if len(ds) != 1: 38 | raise Exception("JSON decode error: too many objects") 39 | return ds.pop() 40 | 41 | def parse_ch(cat, ch, stack, state, ds, ss, es): 42 | while True: 43 | code = states[state][cat] 44 | action = code >> 8 & 0xFF 45 | code = code & 0xFF 46 | if action == 0xFF and code == 0xFF: 47 | raise Exception("JSON decode error: syntax") 48 | elif action >= 0x80: # shift 49 | stack.append(gotos[state]) 50 | action -= 0x80 51 | if action > 0: 52 | do_action(action, ch, ds, ss, es) 53 | if code == 0xFF: 54 | state = stack.pop() 55 | else: 56 | state = code 57 | return state 58 | 59 | # This action table is unique for every language. 60 | # It also depends on which structures you want to 61 | # generate. 62 | def do_action(action, ch, ds, ss, es): 63 | if action == 0x1: # push list 64 | ds.append([]) 65 | # Push object to ds 66 | elif action == 0x2: # push object 67 | ds.append({}) 68 | elif action == 0x3: # pop & append 69 | val = ds.pop() 70 | ds[len(ds)-1].append(val) 71 | elif action == 0x4: # pop pop & setitem 72 | val = ds.pop() 73 | key = ds.pop() 74 | ds[len(ds)-1][key] = val 75 | elif action == 0x5: # push null 76 | ds.append(None) 77 | elif action == 0x6: # push true 78 | ds.append(True) 79 | elif action == 0x7: # push false 80 | ds.append(False) 81 | elif action == 0x8: # push string 82 | val = u"".join(ss) 83 | ds.append(val) 84 | ss[:] = [] # clear ss and es stacks. 85 | es[:] = [] 86 | elif action == 0x9: 87 | val = int(u"".join(ss)) # push int 88 | ds.append(val) 89 | ss[:] = [] # clear ss stack. 90 | elif action == 0xA: 91 | val = float(u"".join(ss)) # push float 92 | ds.append(val) 93 | ss[:] = [] 94 | elif action == 0xB: # push ch to ss 95 | ss.append(ch) 96 | elif action == 0xC: # push ch to es 97 | es.append(ch) 98 | elif action == 0xD: # push escape 99 | ss.append(unichr(escape_characters[ch])) 100 | elif action == 0xE: # push unicode point 101 | ss.append(unichr(int(u"".join(es), 16))) 102 | es[:] = [] 103 | else: # This is very unlikely to happen. But make 104 | # a crashpoint here if possible. 105 | # Also if you write it in parts, let this line 106 | # be the first one you write into this routine. 107 | assert False, "JSON decoder bug" 108 | 109 | # Non-trivial escape characters. At worst you can 110 | # 'switch' or 'if/else' them into do_action -function. 111 | escape_characters = {'b': 8, 't': 9, 'n': 10, 'f': 12, 'r': 13} 112 | 113 | Add the lists 'states', 'gotos', 'catcode' from 114 | [json.txt](json.txt) 115 | in this directory/repository. Add them into same file under 116 | your application. Also add the comment in that file so that 117 | your code stays maintainable. 118 | 119 | If the file is not in the correct format, write a reformatter. DO 120 | NOT try to reformat it by hand to avoid errors. 121 | 122 | ## Recreational use 123 | 124 | This can be probably used to generate random JSON strings as 125 | well. I haven't tried to do that. :D Could be fun and 126 | pointless. 127 | 128 | ## How is this special? 129 | 130 | This project is unique in the sense that it is probably the 131 | easiest to port JSON decoder you can write. 132 | 133 | If you wanted to port this, you would only have to rewrite 134 | the driver and reformat the parsing tables. 135 | 136 | Also the algorithm is incremental. You can suspend it after 137 | any character input. It also builds the JSON as it appears. 138 | 139 | With small modification it'd be able to parse multiple JSON 140 | objects and pass them as they appear in the stream. 141 | 142 | ## Potential uses 143 | 144 | The driver is divided into a recognizer and action table. If 145 | the recognizer finds an input not in JSON syntax, it raises 146 | a SYN error. 147 | 148 | The input is interpreted according to how you program the 149 | do_action -procedure. 150 | 151 | If you want to frown people with traditional JSON parsers, 152 | you could adjust the driver to parse multiple objects and 153 | read JSON objects as they appear in the TCP stream. 154 | 155 | After every JSON object: 156 | 157 | if len(ds) > 0 and state == 0: 158 | return ds.pop(0) 159 | 160 | If you do this, remember to emit newline or whitespace 161 | character after each JSON message. This lets the recipients 162 | JSON parser reach the state where it receives the JSON 163 | object. 164 | 165 | But maybe it's better to use the length#json_message 166 | -protocol. :) Very few other JSON parsers are able of doing 167 | this. 168 | 169 | This is also useful if you don't have a JSON parser you 170 | could trust. For example if your parser mishandles backslash 171 | characters and doubles them on decode/encode. Or if your 172 | parser misrecognizes floats because it tries to parse ',' 173 | rather than '.'. 174 | 175 | The code that you write to use this driver gets easily 176 | tested. And the tables containing the recognizer come from 177 | this project, so you can trust those tables have the same 178 | behavior as here. 179 | 180 | Also can be useful if you want to read the JSON floats into 181 | something else than floating point floats. Just write your 182 | own do_action that does it differently. 183 | 184 | ## What if I want to encode JSON as well? 185 | 186 | To encode JSON you need some routines to tokenize strings 187 | and integers. You may also require a pretty printer. 188 | 189 | [CS-TR-79-770](http://i.stanford.edu/TR/CS-TR-79-770.html) 190 | describes a pretty printing method that should be sufficient 191 | for tokenizing JSON. 192 | 193 | There's a short example of the algorithm described by that 194 | paper in the `printer.py`. Call it with a json file and it 195 | will parse your file with `verifier.py` and then pretty 196 | prints it out. 197 | 198 | The `printer.py` holds everything for stringifying 199 | JSON except the float formatter. 200 | 201 | ## Unicode Gotcha 202 | 203 | This code and algorithms should be neutral about handling 204 | unicode characters. 205 | 206 | With some tuning the pretty printer should even be able to 207 | handle output with non-monospace fonts. Though for 208 | convenience please treat plaintext json output as if it was 209 | monospace. 210 | 211 | Whether this code can handle unicode characters depends on 212 | the code implemented and the programming language you use to 213 | implement it. 214 | 215 | ## Bugfixes 216 | 217 | There is some verification that the state table is correct. 218 | I have made a program that goes through every state 219 | transition. 220 | 221 | If you find a bug in the tables, do not modify them. Modify 222 | the program `build_tables.py` or file an issue in github.com. 223 | 224 | Verifying that it works was tricky. The coverage test 225 | I did catched quite few bugs. I'm quite certain it matches 226 | with the railroad diagram on the json.org now. 227 | 228 | ## License 229 | 230 | [MIT](LICENSE.md) License 231 | 232 | Copyright (c) 2016 Henri Tuhola 233 | 234 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 235 | 236 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 237 | 238 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 239 | -------------------------------------------------------------------------------- /build_tables.py: -------------------------------------------------------------------------------- 1 | # First I started working with something like this. 2 | # Then I decided, that it wouldn't work. 3 | # So I went to write an NFA-based parser. 4 | # Then I realised it'd be as long as if I just 5 | # wrote it directly. So I wrote it directly. 6 | # Then I thought that it would look nice and run fast 7 | # if it was in LR-like tables. 8 | # 9 | # Here we are again... 10 | 11 | # The following program builds the parsing tables. 12 | # The parsing tables consists of two hexadecimal codes. 13 | # One is a state transition and another is an action. 14 | 15 | # If action has 0x80 -bit set, it shifts the current state, 16 | # causing goto[state] to be pushed into the table. 17 | 18 | # If goto is 0xFF, it pops a transition state from the table. 19 | 20 | # To make it easier to create by hand, the table is first 21 | # represented in looser format and only later 'packed' into lists. 22 | 23 | # There are some state transitions that are common and cause 24 | # lot of clutter without these assisting functions. 25 | def sskip(table): 26 | table.update({ 27 | ' ': 0x00FE, '\t': 0x00FE, '\r': 0x00FE, '\n': 0x00FE, 28 | }) 29 | return table 30 | 31 | def pval(table): 32 | table.update({ 33 | 'n': 0x8010, 't': 0x8020, 'f': 0x8030, 34 | '[': 0x8140, 35 | '{': 0x8250, 36 | '"': 0x8060, 37 | '0': 0x8B76, 38 | '1': 0x8B70, 39 | '2': 0x8B70, 40 | '3': 0x8B70, 41 | '4': 0x8B70, 42 | '5': 0x8B70, 43 | '6': 0x8B70, 44 | '7': 0x8B70, 45 | '8': 0x8B70, 46 | '9': 0x8B70, 47 | '-': 0x8B78, 48 | }) 49 | return sskip(table) 50 | 51 | def hexv(table, cmd): 52 | for ch in '0123456789abcdefABCDEF': 53 | table[ch] = cmd 54 | return table 55 | 56 | # This models the railroad diagram on the json.org -website. Exactly. 57 | # There are actions associated with state stransitions, so sort of.. 58 | # this is a recognizer that does parsing actions in middle. 59 | states = { 60 | 0x00: pval({ }), 61 | # null, true, false 62 | 0x10: { 'u': 0x11 }, 0x11: { 'l': 0x12 }, 0x12: { 'l': 0x13 }, 0x13: { '': 0x05FF }, 63 | 0x20: { 'r': 0x21 }, 0x21: { 'u': 0x22 }, 0x22: { 'e': 0x23 }, 0x23: { '': 0x06FF }, 64 | 0x30: { 'a': 0x31 }, 0x31: { 'l': 0x32 }, 0x32: { 's': 0x33 }, 0x33: { 'e': 0x34 }, 0x34: { '': 0x07FF }, 65 | # lists 66 | 0x40: pval({ # first 67 | ']': 0x4F, 68 | }), 69 | 0x41: sskip({ 70 | ',': 0x0342, ']': 0x034F 71 | }), 72 | 0x42: pval({ # next 73 | }), 74 | 0x4F: { '': 0x00FF }, 75 | # dicts 76 | 0x50: sskip({ # first.key 77 | '"': 0x8060, '}': 0x005F, 78 | }), 79 | 0x51: sskip({ 80 | ':': 0x52, 81 | }), 82 | 0x52: pval({ }), # first.val 83 | 0x53: sskip({ 84 | ',': 0x0454, '}': 0x045F, 85 | }), 86 | 0x54: sskip({ # next.key 87 | '"': 0x8060 88 | }), 89 | 0x55: sskip({ 90 | ':': 0x56, 91 | }), 92 | 0x56: pval({ }), # next.val 93 | 94 | 0x5F: { '': 0x00FF }, 95 | # strings 96 | 0x60: { '"': 0x6F, '': 0x0B60, '\\': 0x61 }, 97 | 0x61: { 98 | '"': 0x0B60, '\\': 0x0B60, '/': 0x0B60, 99 | 'b': 0x0D60, 'f': 0x0D60, 'n': 0x0D60, 'r': 0x0D60, 't': 0x0D60, 100 | 'u': 0x8062, 101 | }, 102 | 0x62: hexv({}, 0x0C63), 103 | 0x63: hexv({}, 0x0C64), 104 | 0x64: hexv({}, 0x0C65), 105 | 0x65: hexv({}, 0x0C66), 106 | 0x66: { '': 0x0EFF }, 107 | 0x6F: { '': 0x08FF }, 108 | # numbers 109 | 0x70: { # after 1-9 110 | '0': 0x0BFE, '1': 0x0BFE, '2': 0x0BFE, '3': 0x0BFE, '4': 0x0BFE, 111 | '5': 0x0BFE, '6': 0x0BFE, '7': 0x0BFE, '8': 0x0BFE, '9': 0x0BFE, 112 | '.': 0x0B71, 'e': 0x0B73, 'E': 0x0B73, 113 | '': 0x09FF 114 | }, 115 | 0x71: { # after a dot. 116 | '0': 0x0B72, '1': 0x0B72, '2': 0x0B72, '3': 0x0B72, '4': 0x0B72, 117 | '5': 0x0B72, '6': 0x0B72, '7': 0x0B72, '8': 0x0B72, '9': 0x0B72, 118 | }, 119 | 0x72: { # after a dot digit 120 | '0': 0x0BFE, '1': 0x0BFE, '2': 0x0BFE, '3': 0x0BFE, '4': 0x0BFE, 121 | '5': 0x0BFE, '6': 0x0BFE, '7': 0x0BFE, '8': 0x0BFE, '9': 0x0BFE, 122 | 'e': 0x0B73, 'E': 0x0B73, 123 | '': 0x0AFF, 124 | }, 125 | 0x73: { # after eE 126 | '0': 0x0B75, '1': 0x0B75, '2': 0x0B75, '3': 0x0B75, '4': 0x0B75, 127 | '5': 0x0B75, '6': 0x0B75, '7': 0x0B75, '8': 0x0B75, '9': 0x0B75, 128 | '+': 0x0B74, '-': 0x0B74, 129 | }, 130 | 0x74: { # after eE-+ 131 | '0': 0x0B75, '1': 0x0B75, '2': 0x0B75, '3': 0x0B75, '4': 0x0B75, 132 | '5': 0x0B75, '6': 0x0B75, '7': 0x0B75, '8': 0x0B75, '9': 0x0B75, 133 | }, 134 | 0x75: { # after eE-+ digit 135 | '0': 0x0BFE, '1': 0x0BFE, '2': 0x0BFE, '3': 0x0BFE, '4': 0x0BFE, 136 | '5': 0x0BFE, '6': 0x0BFE, '7': 0x0BFE, '8': 0x0BFE, '9': 0x0BFE, 137 | '': 0x0AFF, 138 | }, 139 | 0x76: { # after 0 140 | '.': 0x0B71, 'e': 0x0B73, 'E': 0x0B73, 141 | '': 0x09FF 142 | }, 143 | 0x78: { # '-' 144 | '0': 0x0B76, '1': 0x0B70, '2': 0x0B70, '3': 0x0B70, '4': 0x0B70, 145 | '5': 0x0B70, '6': 0x0B70, '7': 0x0B70, '8': 0x0B70, '9': 0x0B70, 146 | }, 147 | } 148 | # This program is different from LR parsing tables quite radically such that it 149 | # is not shifting at every transition. Also there's no GOTO entry for every 150 | # left-hand-side rule, because it is not needed. Every state accepts at most one LHS. 151 | gotos = { 152 | 0x00: 0x00, # accept? 153 | 0x40: 0x41, # list.first 154 | 0x42: 0x41, # list.next 155 | 0x50: 0x51, # dict.first.key 156 | 0x52: 0x53, # dict.first.val 157 | 0x54: 0x55, # dict.next.key 158 | 0x56: 0x53, # dict.next.val 159 | 0x61: 0x60, # string.escape 160 | } 161 | 162 | # The non-trivial escape characters are directly dumped. 163 | # So this contains the non-trivial ones only. 164 | escape_characters = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t' } 165 | 166 | # Ok. Here we start to pack the state tables.. First of all we create 167 | # character categories by grouping every character by state column. 168 | anychar = frozenset((state, table.get(u'', 0xFFFF)) 169 | for state, table in states.items()) 170 | groups = {} 171 | groups[anychar] = '' 172 | for n in range(0, 0x7F): 173 | catset = frozenset( 174 | (state, table.get(chr(n), table.get(u'', 0xFFFF))) 175 | for state, table in states.items()) 176 | if catset in groups: 177 | groups[catset] += chr(n) 178 | else: 179 | groups[catset] = chr(n) 180 | 181 | # Then we extract the results into catcode -table. The idea is you clamp 182 | # character ordinal to this range and index from this table. The last 183 | # character '~' happens to be 'anychar', so that simple min(n, 0x7E) works. 184 | catcode = [0 for n in range(0, 0x7F)] 185 | groups.pop(anychar) 186 | columns = [dict(anychar)] 187 | for col, string in sorted(groups.items(), key=lambda a: a[1]): 188 | for ch in string: 189 | catcode[ord(ch)] = len(columns) 190 | columns.append(dict(col)) 191 | 192 | # The state transitions need to be listed and relabeled by list index. 193 | packedtable = [] 194 | mapper = dict((oldlabel, newlabel) for newlabel, oldlabel in enumerate(sorted(states))) 195 | 196 | packedgotos = [] 197 | for newlabel, oldlabel in enumerate(sorted(states)): 198 | row = [] 199 | for col in columns: 200 | code = col[oldlabel] 201 | action = code >> 8 & 0xFF 202 | code = code & 0xFF 203 | code = mapper.get(code, code) 204 | if code == 0xFE: 205 | code = newlabel 206 | code = action << 8 | code 207 | row.append(code) 208 | packedtable.append(row) 209 | goto = gotos.get(oldlabel, 255) # drop to blank goto is a bug. 210 | packedgotos.append(mapper.get(goto, goto)) 211 | 212 | # And then finally, to allow copy/paste. 213 | print '# generated by build_tables.py program: http://github.com/cheery/json_algorithm' 214 | print 'states = [' 215 | for row in packedtable: 216 | print ' [', 217 | for v in row: 218 | print "0x{:04x},".format(v), 219 | print '],' 220 | print ']' 221 | print "gotos =", packedgotos 222 | print "catcode =", catcode 223 | 224 | # # These can be used for debugging. 225 | # print "mapping =", mapper 226 | # # This program does a small consistency check. 227 | # def main(): 228 | # stack = [] 229 | # state = 0x00 230 | # ds = [] # data stack 231 | # ss = [] # string stack 232 | # es = [] # escape stack 233 | # for ch in read_file("test.json").strip(): 234 | # state = parse_ch(ch, stack, state, ds, ss, es) 235 | # state = parse_ch(u' ', stack, state, ds, ss, es) 236 | # if state != 0x00: 237 | # raise Exception("JSON decode error: truncated") 238 | # if len(ds) != 1: 239 | # raise Exception("JSON decode error: too many objects") 240 | # val = ds.pop() 241 | # print '#', val 242 | # 243 | # def read_file(filename): 244 | # with open(filename, "rb") as fd: 245 | # return fd.read().decode('utf-8') 246 | # 247 | # # There's a tutorial in the README.md how to write yourself 248 | # # an engine for driving the tables. 249 | # def parse_ch(ch, stack, state, ds, ss, es): 250 | # while True: 251 | # table = states[state] 252 | # code = table.get(ch, table.get(u'', 0xFFFF)) 253 | # action = code >> 8 & 0xFF 254 | # code = code & 0xFF 255 | # #print repr(ch), hex(state), hex(action), hex(code) 256 | # if action == 0xFF and code == 0xFF: 257 | # raise Exception("JSON decode error: syntax") 258 | # elif action >= 0x80: # shift 259 | # stack.append(gotos[state]) 260 | # action -= 0x80 261 | # if action > 0: 262 | # do_action(action, ch, ds, ss, es) 263 | # if code == 0xFF: 264 | # state = stack.pop() 265 | # else: 266 | # if code != 0xFE: # nop 267 | # state = code 268 | # return state 269 | # 270 | # def do_action(action, ch, ds, ss, es): 271 | # if action == 0x1: 272 | # ds.append([]) 273 | # elif action == 0x2: 274 | # ds.append({}) 275 | # elif action == 0x3: 276 | # val = ds.pop() 277 | # ds[len(ds)-1].append(val) 278 | # elif action == 0x4: 279 | # val = ds.pop() 280 | # key = ds.pop() 281 | # ds[len(ds)-1][key] = val 282 | # elif action == 0x5: 283 | # ds.append(None) 284 | # elif action == 0x6: 285 | # ds.append(True) 286 | # elif action == 0x7: 287 | # ds.append(False) 288 | # elif action == 0x8: 289 | # val = u"".join(ss) 290 | # ds.append(val) 291 | # ss[:] = [] 292 | # es[:] = [] 293 | # elif action == 0x9: 294 | # val = int(u"".join(ss)) 295 | # ds.append(val) 296 | # ss[:] = [] 297 | # elif action == 0xA: 298 | # val = float(u"".join(ss)) 299 | # ds.append(val) 300 | # ss[:] = [] 301 | # elif action == 0xB: 302 | # ss.append(ch) 303 | # elif action == 0xC: 304 | # es.append(ch) 305 | # elif action == 0xD: 306 | # ss.append(escape_characters[ch]) 307 | # elif action == 0xE: 308 | # ss.append(unichr(int(u"".join(es), 16))) 309 | # es[:] = [] 310 | # else: 311 | # assert False, "JSON decoder bug" 312 | # 313 | # # if __name__=="__main__": 314 | # # main() 315 | -------------------------------------------------------------------------------- /json.txt: -------------------------------------------------------------------------------- 1 | # generated by build_tables.py program: http://github.com/cheery/json_algorithm 2 | states = [ 3 | [ 0xffff, 0x0000, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 4 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0002, 0xffff, 0xffff, ], 5 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0003, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 6 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0004, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 7 | [ 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, ], 8 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0006, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 9 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0007, 0xffff, 0xffff, ], 10 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0008, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 11 | [ 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, ], 12 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 13 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 14 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000c, 0xffff, 0xffff, 0xffff, 0xffff, ], 15 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000d, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 16 | [ 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, ], 17 | [ 0xffff, 0x000e, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0x0011, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 18 | [ 0xffff, 0x000f, 0xffff, 0xffff, 0x0310, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0311, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 19 | [ 0xffff, 0x0010, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 20 | [ 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, ], 21 | [ 0xffff, 0x0012, 0x801a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0019, ], 22 | [ 0xffff, 0x0013, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0014, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 23 | [ 0xffff, 0x0014, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 24 | [ 0xffff, 0x0015, 0xffff, 0xffff, 0x0416, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0419, ], 25 | [ 0xffff, 0x0016, 0x801a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 26 | [ 0xffff, 0x0017, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0018, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 27 | [ 0xffff, 0x0018, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 28 | [ 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, ], 29 | [ 0x0b1a, 0x0b1a, 0x0021, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x001b, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, ], 30 | [ 0xffff, 0xffff, 0x0b1a, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b1a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b1a, 0xffff, 0xffff, 0x0d1a, 0xffff, 0x0d1a, 0xffff, 0x0d1a, 0x0d1a, 0xffff, 0x0d1a, 0x801c, 0xffff, 0xffff, ], 31 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c1d, 0x0c1d, 0xffff, 0x0c1d, 0x0c1d, 0xffff, 0xffff, 0xffff, 0x0c1d, 0x0c1d, 0x0c1d, 0x0c1d, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 32 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c1e, 0x0c1e, 0xffff, 0x0c1e, 0x0c1e, 0xffff, 0xffff, 0xffff, 0x0c1e, 0x0c1e, 0x0c1e, 0x0c1e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 33 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c1f, 0x0c1f, 0xffff, 0x0c1f, 0x0c1f, 0xffff, 0xffff, 0xffff, 0x0c1f, 0x0c1f, 0x0c1f, 0x0c1f, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 34 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c20, 0x0c20, 0xffff, 0x0c20, 0x0c20, 0xffff, 0xffff, 0xffff, 0x0c20, 0x0c20, 0x0c20, 0x0c20, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 35 | [ 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, ], 36 | [ 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, ], 37 | [ 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b23, 0x09ff, 0x0b22, 0x0b22, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, ], 38 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b24, 0x0b24, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 39 | [ 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0b24, 0x0b24, 0x0aff, 0x0aff, 0x0b25, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0b25, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, ], 40 | [ 0xffff, 0xffff, 0xffff, 0x0b26, 0xffff, 0x0b26, 0xffff, 0xffff, 0x0b27, 0x0b27, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 41 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b27, 0x0b27, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 42 | [ 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0b27, 0x0b27, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, ], 43 | [ 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b23, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, ], 44 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b28, 0x0b22, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 45 | ] 46 | gotos = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 255, 15, 255, 19, 255, 21, 255, 23, 255, 21, 255, 255, 26, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255] 47 | catcode = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 12, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 0, 0, 0, 16, 17, 11, 11, 18, 19, 0, 0, 0, 0, 0, 20, 0, 21, 0, 0, 0, 22, 23, 24, 25, 0, 0, 0, 0, 0, 26, 0, 27, 0] 48 | -------------------------------------------------------------------------------- /printer.py: -------------------------------------------------------------------------------- 1 | import sys, random, time 2 | import verifier 3 | 4 | # You may either feed this printer a json file, or let it 5 | # create a random json object to print. 6 | def main(): 7 | if len(sys.argv) < 2: 8 | obj = synth_json() 9 | else: 10 | with open(sys.argv[1], "r") as fd: 11 | obj = verifier.parse_string(fd.read().decode('utf-8'), set()) 12 | scan = Scanner() 13 | stringify(scan, obj) 14 | scan.finish() 15 | 16 | # A slightly bad random json object generator. 17 | # It's not a proper test for most things. But it's 18 | # enough to show how the pretty printer works. 19 | def synth_json(depth=0): 20 | if depth > 4: 21 | return synth_string(depth) 22 | return random.choice(synths)(depth) 23 | 24 | def synth_dict(depth): 25 | out = {} 26 | for i in range(random.randint(0, 10)): 27 | out[synth_string(depth)] = synth_json(depth + 1) 28 | return out 29 | 30 | def synth_list(depth): 31 | out = [] 32 | for i in range(random.randint(0, 10)): 33 | out.append(synth_json(depth + 1)) 34 | return out 35 | 36 | def synth_const(depth): 37 | r = random.random() 38 | if r < 0.1: 39 | return random.choice([True, False, None]) 40 | elif r < 0.5: 41 | return random.randint(0, 1000) 42 | elif r < 0.8: 43 | return synth_string(depth) 44 | else: 45 | return 1000 * ( 46 | random.random() * random.random() 47 | - random.random() * random.random()) 48 | 49 | def synth_string(depth): 50 | return random.choice(funny_strings) 51 | 52 | funny_strings = [ 53 | u"progging along", u"chug", u"gunk", u"hazard", u"code", u"bljarmer", u"xok", 54 | u"log", u"blog", u"farmer", u"punk", u"zebra", u"radio", u"epsilon", u"gamma", 55 | u'world "Hello" world', 56 | u''.join(map(chr, range(40))), 57 | u"\\", 58 | u''.join(map(unichr, range(0x2020, 0x203f))), 59 | u''.join(map(unichr, range(0x4020, 0x4040))), 60 | ] 61 | 62 | synths = [synth_dict, synth_list, synth_const] 63 | 64 | # The algorithm used here is a generic pretty printer. 65 | # This specifies the layout and printout that is desired. 66 | def stringify(scan, obj): 67 | if isinstance(obj, dict): 68 | scan.left()(u"{").blank(u"", 4) 69 | more = False 70 | for key, value in sorted(obj.items(), key=lambda a: a[0]): 71 | if more: 72 | scan(u",").blank(u" ", 4) 73 | scan.left() 74 | scan(escape_string(key)+u': ') 75 | stringify(scan, value) 76 | scan.right() 77 | more = True 78 | scan.blank(u"", 0)(u"}").right() 79 | elif isinstance(obj, list): 80 | scan.left()(u"[").blank(u"", 4) 81 | more = False 82 | for item in obj: 83 | if more: 84 | scan(u",").blank(u" ", 4) 85 | stringify(scan, item) 86 | more = True 87 | scan.blank(u"", 0)(u"]").right() 88 | elif isinstance(obj, (str, unicode)): 89 | scan(escape_string(obj)) 90 | elif obj is None: 91 | scan(u"null") 92 | elif obj == True: 93 | scan(u"true") 94 | elif obj == False: 95 | scan(u"false") 96 | elif isinstance(obj, (int, long, float)): # Would also recognize booleans if 97 | scan(str(obj)) # hack. # you let it do so. 98 | # Only works if this 99 | # prints the floats in C notation. 100 | # Otherwise you need to come up 101 | # with your own float formatter. 102 | else: 103 | assert False, "no handler: " + repr(obj) 104 | 105 | # This is the easiest point of failure in your stringifier program. 106 | def escape_string(string): 107 | out = [u'"'] 108 | for ch in string: 109 | n = ord(ch) 110 | if 0x20 <= n and n <= 0x7E or 0xFF < n: # remove the last part in cond if you don't want 111 | if ch == u'\\': # unicode printed out for some reason. 112 | ch = u'\\\\' 113 | elif ch == u'"': 114 | ch = u'\\"' 115 | else: 116 | a = u"0123456789abcdef"[n >> 12] 117 | b = u"0123456789abcdef"[n >> 8 & 15] 118 | c = u"0123456789abcdef"[n >> 4 & 15] 119 | d = u"0123456789abcdef"[n & 15] 120 | ch = u'u' + a + b + c + d 121 | ch = u'\\' + character_escapes.get(n, ch) 122 | out.append(ch) 123 | out.append(u'"') 124 | return u"".join(out) 125 | 126 | character_escapes = {8: u'b', 9: u't', 10: u'n', 12: u'f', 13: u'r'} 127 | 128 | # The scanner runs three line widths before the printer and checks how many 129 | # spaces the blanks and groups take. This allows the printer determine 130 | # whether the line or grouping should be broken into multiple lines. 131 | class Scanner(object): 132 | def __init__(self): 133 | self.printer = Printer() 134 | self.stream = [] 135 | self.stack = [] 136 | self.lastblank = None 137 | self.left_total = 1 138 | self.right_total = 1 # makes sure we won't treat the first 139 | # item differently than others. 140 | 141 | def left(self): 142 | return self(Left()) 143 | 144 | def right(self): 145 | return self(Right()) 146 | 147 | def blank(self, text, indent=0): 148 | return self(Blank(text, indent)) 149 | 150 | def __call__(self, x): 151 | if isinstance(x, Left): 152 | x.size = -self.right_total 153 | self.stack.append(x) 154 | elif isinstance(x, Right): 155 | if len(self.stack) > 0: 156 | self.stack.pop().size += self.right_total 157 | elif isinstance(x, Blank): 158 | if self.lastblank is not None: 159 | self.lastblank.size += self.right_total 160 | self.lastblank = x 161 | x.size = -self.right_total 162 | self.right_total += len(x.text) 163 | else: 164 | self.right_total += len(x) 165 | self.stream.append(x) 166 | while len(self.stream) > 0 and self.right_total - self.left_total > 3*self.printer.margin: 167 | self.left_total += self.printer(self.stream.pop(0)) 168 | return self 169 | 170 | def finish(self): 171 | if self.lastblank is not None: # Without this the last blank 172 | self.lastblank.size += self.right_total # gets very different treatment. 173 | while len(self.stream) > 0: 174 | self.printer(self.stream.pop(0)) 175 | sys.stdout.write('\n') 176 | 177 | # Printer keeps the track of layout during printing. 178 | class Printer(object): 179 | def __init__(self): 180 | self.margin = 80 181 | self.layout = Layout(None, 80, False) 182 | self.spaceleft = 80 183 | self.spaces = 80 184 | 185 | def __call__(self, x): 186 | if isinstance(x, Left): 187 | self.layout = Layout(self.layout, 188 | self.spaces, 189 | x.size < 0 or self.spaceleft < x.size) 190 | return 0 191 | elif isinstance(x, Right): 192 | if self.layout.parent: 193 | self.layout = self.layout.parent 194 | return 0 195 | elif isinstance(x, Blank): 196 | if x.size < 0 or self.spaceleft < x.size or self.layout.force_break: 197 | self.spaces = self.layout.spaces - x.indent 198 | self.spaceleft = self.spaces 199 | sys.stdout.write('\n' + ' '*(self.margin - self.spaces)) 200 | else: 201 | sys.stdout.write(x.text.encode('utf-8')) 202 | self.spaceleft -= len(x.text) 203 | return len(x.text) 204 | else: 205 | sys.stdout.write(x.encode('utf-8')) 206 | self.spaceleft -= len(x) 207 | return len(x) 208 | 209 | # These small objects are scanner and printer internals. 210 | class Layout(object): 211 | def __init__(self, parent, spaces, force_break): 212 | self.parent = parent 213 | self.spaces = spaces 214 | self.force_break = force_break 215 | 216 | # These objects are mutated by the scanner, so they cannot be 217 | # reused. Users of the pretty printer should not create them themselves. 218 | class Left(object): 219 | def __init__(self): 220 | self.size = 0 221 | 222 | class Right(object): 223 | pass 224 | 225 | class Blank(object): 226 | def __init__(self, text, indent=0): 227 | self.text = text 228 | self.indent = indent 229 | self.size = 0 230 | 231 | if __name__ == '__main__': 232 | main() 233 | -------------------------------------------------------------------------------- /verifier.py: -------------------------------------------------------------------------------- 1 | # This program provides very brief verification. 2 | # 3 | 4 | # The idea is that if we trigger every state in 5 | # the parser, we can be certain that the state 6 | # transitions we programmed actually work. 7 | 8 | # So this program is doing a coverage check to 9 | # see that our inputs cause the parser to visit 10 | # every state. 11 | 12 | inputs = [ 13 | r""" 14 | true 15 | """, 16 | r""" 17 | false 18 | """, 19 | r""" 20 | null 21 | """, 22 | r""" 23 | "hello" 24 | """, 25 | r""" 26 | -5 27 | """, 28 | r""" 29 | 5 30 | """, 31 | r""" 32 | 0 33 | """, 34 | r""" 35 | { } 36 | """, 37 | r""" 38 | [ 22.0e+02, "+,-./0:E[]bnstu{}\b\f\t", 1100, 100.01, -0.2e00 ] 39 | """, 40 | r""" 41 | { "":43, "fo\u006Fo" : 41E2, "":"", "":[], 42 | "\uAAAA\uEEEE\uaaaa\ubbbb\ueeee\uffff\u0000\u111101":true, "":false, "":null, "":{} } 43 | """, 44 | r""" 45 | [ -21, -32e+3, 2.3E-1, {"":true}, 46 | {"":false}, {"":null}, 0.20e0 47 | ] 48 | """, 49 | r""" 50 | [ [true], [false], [{"":-1}], {"":[]} ] 51 | """, 52 | r""" 53 | [ 54 | [ "hello\u0020world\r\n" ], 55 | null, true, false, 56 | [ ], 57 | [ "\/\\hello \"world\"" ], 58 | [ null, 123, "hllo", 0, {"":0.00}, [0E-0] ], 59 | { "a" : { 60 | "x": "y", 61 | "z": -1.5, 62 | "w": 12e3, 63 | "q": 0e1 64 | }}, 65 | true 66 | ] 67 | """, 68 | ] 69 | 70 | def main(): 71 | chart = set() 72 | for input in inputs: 73 | print parse_string(input, chart) 74 | v = len(chart) 75 | t = 0 76 | for row in states: 77 | t += sum(0 if code & 255 == 0xFF else 1 for code in row) 78 | print "{} states, visited {}, verification {:d}%".format(t, v, int(100.0*v/t)) 79 | 80 | for state, row in enumerate(states): 81 | for cat, code in enumerate(row): 82 | if code & 255 != 0xFF and (state, cat) not in chart: 83 | print "not visited:", hex(state), hex(cat), repr("".join(chr(g) for g, cc in enumerate(catcode) if cc == cat)), hex(code) 84 | 85 | def parse_string(string, chart): 86 | stack = [] 87 | state = 0x00 88 | ds = [] # data stack 89 | ss = [] # string stack 90 | es = [] # escape stack 91 | for ch in string: 92 | cat = catcode[min(ord(ch), 0x7E)] 93 | state = parse_ch(cat, ch, stack, state, ds, ss, es, chart) 94 | state = parse_ch(catcode[32], u'', stack, state, ds, ss, es, chart) 95 | if state != 0x00: 96 | raise Exception("JSON decode error: truncated") 97 | if len(ds) != 1: 98 | raise Exception("JSON decode error: too many objects") 99 | return ds.pop() 100 | 101 | def parse_ch(cat, ch, stack, state, ds, ss, es, chart): 102 | while True: 103 | code = states[state][cat] 104 | action = code >> 8 & 0xFF 105 | code = code & 0xFF 106 | if action == 0xFF and code == 0xFF: 107 | raise Exception("JSON decode error: syntax") 108 | elif action >= 0x80: # shift 109 | stack.append(gotos[state]) 110 | action -= 0x80 111 | if action > 0: 112 | do_action(action, ch, ds, ss, es) 113 | if code == 0xFF: 114 | state = stack.pop() 115 | else: 116 | chart.add((state, cat)) 117 | state = code 118 | return state 119 | return state 120 | 121 | def do_action(action, ch, ds, ss, es): 122 | if action == 0x1: 123 | ds.append([]) 124 | elif action == 0x2: 125 | ds.append({}) 126 | elif action == 0x3: 127 | val = ds.pop() 128 | ds[len(ds)-1].append(val) 129 | elif action == 0x4: 130 | val = ds.pop() 131 | key = ds.pop() 132 | ds[len(ds)-1][key] = val 133 | elif action == 0x5: 134 | ds.append(None) 135 | elif action == 0x6: 136 | ds.append(True) 137 | elif action == 0x7: 138 | ds.append(False) 139 | elif action == 0x8: 140 | val = u"".join(ss) 141 | ds.append(val) 142 | ss[:] = [] 143 | es[:] = [] 144 | elif action == 0x9: 145 | val = int(u"".join(ss)) 146 | ds.append(val) 147 | ss[:] = [] 148 | elif action == 0xA: 149 | val = float(u"".join(ss)) 150 | ds.append(val) 151 | ss[:] = [] 152 | elif action == 0xB: 153 | ss.append(ch) 154 | elif action == 0xC: 155 | es.append(ch) 156 | elif action == 0xD: 157 | ss.append(unichr(escape_characters[ch])) 158 | elif action == 0xE: 159 | ss.append(unichr(int(u"".join(es), 16))) 160 | es[:] = [] 161 | else: 162 | assert False, "JSON decoder bug" 163 | 164 | def read_file(filename): 165 | with open(filename, "rb") as fd: 166 | return fd.read().decode('utf-8') 167 | 168 | # generated by build_tables.py program: http://github.com/cheery/json_algorithm 169 | states = [ 170 | [ 0xffff, 0x0000, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 171 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0002, 0xffff, 0xffff, ], 172 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0003, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 173 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0004, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 174 | [ 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff, ], 175 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0006, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 176 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0007, 0xffff, 0xffff, ], 177 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0008, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 178 | [ 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, ], 179 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 180 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 181 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000c, 0xffff, 0xffff, 0xffff, 0xffff, ], 182 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000d, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 183 | [ 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, ], 184 | [ 0xffff, 0x000e, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0x0011, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 185 | [ 0xffff, 0x000f, 0xffff, 0xffff, 0x0310, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0311, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 186 | [ 0xffff, 0x0010, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 187 | [ 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, ], 188 | [ 0xffff, 0x0012, 0x801a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0019, ], 189 | [ 0xffff, 0x0013, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0014, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 190 | [ 0xffff, 0x0014, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 191 | [ 0xffff, 0x0015, 0xffff, 0xffff, 0x0416, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0419, ], 192 | [ 0xffff, 0x0016, 0x801a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 193 | [ 0xffff, 0x0017, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0018, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 194 | [ 0xffff, 0x0018, 0x801a, 0xffff, 0xffff, 0x8b29, 0xffff, 0xffff, 0x8b28, 0x8b22, 0xffff, 0xffff, 0xffff, 0x810e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8009, 0xffff, 0x8001, 0xffff, 0xffff, 0x8005, 0xffff, 0x8212, 0xffff, ], 195 | [ 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, ], 196 | [ 0x0b1a, 0x0b1a, 0x0021, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x001b, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, 0x0b1a, ], 197 | [ 0xffff, 0xffff, 0x0b1a, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b1a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b1a, 0xffff, 0xffff, 0x0d1a, 0xffff, 0x0d1a, 0xffff, 0x0d1a, 0x0d1a, 0xffff, 0x0d1a, 0x801c, 0xffff, 0xffff, ], 198 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c1d, 0x0c1d, 0xffff, 0x0c1d, 0x0c1d, 0xffff, 0xffff, 0xffff, 0x0c1d, 0x0c1d, 0x0c1d, 0x0c1d, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 199 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c1e, 0x0c1e, 0xffff, 0x0c1e, 0x0c1e, 0xffff, 0xffff, 0xffff, 0x0c1e, 0x0c1e, 0x0c1e, 0x0c1e, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 200 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c1f, 0x0c1f, 0xffff, 0x0c1f, 0x0c1f, 0xffff, 0xffff, 0xffff, 0x0c1f, 0x0c1f, 0x0c1f, 0x0c1f, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 201 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0c20, 0x0c20, 0xffff, 0x0c20, 0x0c20, 0xffff, 0xffff, 0xffff, 0x0c20, 0x0c20, 0x0c20, 0x0c20, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 202 | [ 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, 0x0eff, ], 203 | [ 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, ], 204 | [ 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b23, 0x09ff, 0x0b22, 0x0b22, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, ], 205 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b24, 0x0b24, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 206 | [ 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0b24, 0x0b24, 0x0aff, 0x0aff, 0x0b25, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0b25, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, ], 207 | [ 0xffff, 0xffff, 0xffff, 0x0b26, 0xffff, 0x0b26, 0xffff, 0xffff, 0x0b27, 0x0b27, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 208 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b27, 0x0b27, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 209 | [ 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0b27, 0x0b27, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, 0x0aff, ], 210 | [ 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b23, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x0b25, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, 0x09ff, ], 211 | [ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0b28, 0x0b22, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, ], 212 | ] 213 | gotos = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 255, 15, 255, 19, 255, 21, 255, 23, 255, 21, 255, 255, 26, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255] 214 | catcode = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 12, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 0, 0, 0, 16, 17, 11, 11, 18, 19, 0, 0, 0, 0, 0, 20, 0, 21, 0, 0, 0, 22, 23, 24, 25, 0, 0, 0, 0, 0, 26, 0, 27, 0] 215 | 216 | escape_characters = {'b': 8, 't': 9, 'n': 10, 'f': 12, 'r': 13} 217 | 218 | if __name__=="__main__": 219 | main() 220 | --------------------------------------------------------------------------------