├── .gitignore ├── Parser.py ├── README.md ├── __init__.py ├── exceptions ├── Exceptions.py └── __init__.py ├── models ├── JsonArray.py ├── JsonObject.py └── __init__.py ├── tokenizer ├── Readers.py ├── Token.py ├── TokenEnum.py ├── TokenList.py ├── Tokenizer.py └── __init__.py └── util ├── Stringify.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .DS_Store 3 | .idea 4 | Test.py -------------------------------------------------------------------------------- /Parser.py: -------------------------------------------------------------------------------- 1 | from .models.JsonArray import JSONArray 2 | 3 | from .models.JsonObject import JSONObject 4 | from .tokenizer.Tokenizer import * 5 | 6 | # Signal token 7 | BEGIN_OBJECT = 1 8 | BEGIN_ARRAY = 2 9 | END_OBJECT = 4 10 | END_ARRAY = 8 11 | 12 | # variable token 13 | NULL_TOKEN = 16 14 | NUMBER_TOKEN = 32 15 | STRING_TOKEN = 64 16 | BOOL_TOKEN = 128 17 | 18 | # separator token 19 | COLON_TOKEN = 256 20 | COMMA_TOKEN = 512 21 | 22 | # end signal 23 | END_JSON = 65536 24 | 25 | 26 | class Parser(object): 27 | __slots__ = ('tokens',) 28 | 29 | @classmethod 30 | def parse(cls, data=None, use_built_in=True): 31 | """ 32 | Parse the json data provided 33 | :param data: data can be ``str`` or ``TokenList``, which comes from ``Tokenizer`` 34 | :param get_python_data: return PythonObj if True else JSON data(JSONObject/JSONArray) 35 | :return: JSONObject or JSONArray 36 | """ 37 | cls.data_conf = use_built_in 38 | if type(data) == str: 39 | cls.tokens = Tokenizer(Reader(data)).get_tokens() 40 | elif type(data) == TokenList: 41 | cls.tokens = data 42 | elif not data: 43 | return JSONObject() 44 | return cls._work() 45 | 46 | @classmethod 47 | def _work(cls): 48 | """ 49 | Major parsing function 50 | :return: JSONObject or JSONArray 51 | """ 52 | token = cls.tokens.next() 53 | if not token: 54 | return JSONObject() 55 | elif token.get_type() == TokenEnum.BEGIN_ARRAY: 56 | return cls.parse_json_array() 57 | elif token.get_type() == TokenEnum.BEGIN_OBJECT: 58 | return cls.parse_json_object() 59 | else: 60 | raise ParseException('Illegal token at beginning') 61 | 62 | @classmethod 63 | def check_token(cls, expected, actual): 64 | """ 65 | Check whether set expected and set actual have intersections(Bit mask). 66 | For instance is the `expected` token is `END_OBJECT` or `COMMA_TOKEN`, which is 4(100) and 512(1000000000) 67 | the `expected` will be 1000000100. 68 | :param expected: the expected set of tokens 69 | :param actual: current token got from parser 70 | :return: True if actual token is one element in `expected` otherwise False will be returned 71 | """ 72 | if expected & actual == 0: 73 | raise ParseException('Unexpected Token at position %d' % cls.tokens.get_cursor_position()) 74 | 75 | @classmethod 76 | def get_text(cls, data): 77 | """ 78 | Decode the string in order to process unicode data in order to make convenience while parsing Chinese data 79 | :param data: raw string 80 | :return: decoded string 81 | """ 82 | return data.encode('utf-8').decode('unicode-escape') 83 | 84 | @classmethod 85 | def parse_json_array(cls): 86 | """ Parse a JSONArray""" 87 | expected = BEGIN_ARRAY | END_ARRAY | BEGIN_OBJECT | END_OBJECT | \ 88 | NULL_TOKEN | NUMBER_TOKEN | BOOL_TOKEN | STRING_TOKEN 89 | array = {False: JSONArray(), True: list()}.get(cls.data_conf) 90 | while cls.tokens.has_next(): 91 | token = cls.tokens.next() 92 | # token_type -> TokenEnum 93 | token_type = token.get_type().value 94 | token_value = token.get_value() 95 | cls.check_token(expected, token_type) 96 | 97 | # check through each condition 98 | if token_type == BEGIN_OBJECT: 99 | array.append(cls.parse_json_object()) 100 | expected = COMMA_TOKEN | END_ARRAY 101 | elif token_type == BEGIN_ARRAY: 102 | array.append(cls.parse_json_array()) 103 | expected = COMMA_TOKEN | END_ARRAY 104 | elif token_type == END_ARRAY: 105 | return array 106 | elif token_type == NULL_TOKEN: 107 | array.append(None) 108 | expected = COMMA_TOKEN | END_ARRAY 109 | elif token_type == NUMBER_TOKEN: 110 | if '.' in token_value or 'e' in token_value or 'E' in token_value: 111 | array.append(float(token_value)) 112 | elif 'x' in token_value: 113 | array.append(int(token_value, base=16)) 114 | else: 115 | array.append(int(token_value)) 116 | expected = COMMA_TOKEN | END_ARRAY 117 | elif token_type == STRING_TOKEN: 118 | array.append(token_value) 119 | expected = COMMA_TOKEN | END_ARRAY 120 | elif token_type == BOOL_TOKEN: 121 | token_value = token_value.lower().capitalize() 122 | array.append({'True': True, 'False': False}[token_value]) 123 | expected = COMMA_TOKEN | END_ARRAY 124 | elif COMMA_TOKEN: 125 | expected = BEGIN_ARRAY | BEGIN_OBJECT | STRING_TOKEN | BOOL_TOKEN | NULL_TOKEN | NUMBER_TOKEN 126 | elif END_JSON: 127 | return array 128 | else: 129 | raise ParseException('Unexpected token at position %d' % cls.tokens.get_cursor_position()) 130 | raise ParseException('Illegal token at position %d' % cls.tokens.get_cursor_position()) 131 | 132 | @classmethod 133 | def parse_json_object(cls): 134 | """Parse a JSONObject""" 135 | obj = {False: JSONObject(), True: dict()}.get(cls.data_conf) 136 | expected = STRING_TOKEN | END_OBJECT 137 | key = None 138 | while cls.tokens.has_next(): 139 | token = cls.tokens.next() 140 | token_type = token.get_type().value 141 | token_value = token.get_value() 142 | cls.check_token(expected, token_type) 143 | 144 | if token_type == BEGIN_OBJECT: 145 | obj.update({key: cls.parse_json_object()}) 146 | expected = COMMA_TOKEN | END_OBJECT 147 | elif token_type == END_OBJECT: 148 | return obj 149 | elif token_type == BEGIN_ARRAY: 150 | obj.update({key: cls.parse_json_array()}) 151 | expected = COMMA_TOKEN | END_OBJECT 152 | elif token_type == NULL_TOKEN: 153 | obj.update({key: None}) 154 | expected = COMMA_TOKEN | END_OBJECT 155 | elif token_type == STRING_TOKEN: 156 | pre_token = cls.tokens.prev_token(2) 157 | if pre_token.get_type().value == COLON_TOKEN: 158 | value = token.get_value() 159 | obj.update({key: value}) 160 | expected = COMMA_TOKEN | END_OBJECT 161 | else: 162 | key = token.get_value() 163 | # if obj.__contains__(key): 164 | # raise KeyError('Duplicated key: {}'.format(key)) 165 | expected = COLON_TOKEN 166 | elif token_type == NUMBER_TOKEN: 167 | if '.' in token_value or 'e' in token_value or 'E' in token_value: 168 | obj.update({key: float(token_value)}) 169 | elif 'x' in token_value: 170 | obj.update({key: int(token_value, base=16)}) 171 | else: 172 | obj.update({key: int(token_value)}) 173 | expected = COMMA_TOKEN | END_OBJECT 174 | elif token_type == BOOL_TOKEN: 175 | token_value = token_value.lower().capitalize() 176 | obj.update({key: {'True': True, 'False': False}[token_value]}) 177 | expected = COMMA_TOKEN | END_OBJECT 178 | elif token_type == COLON_TOKEN: 179 | expected = NULL_TOKEN | NUMBER_TOKEN | BOOL_TOKEN | STRING_TOKEN | BEGIN_ARRAY | BEGIN_OBJECT 180 | elif token_type == COMMA_TOKEN: 181 | expected = STRING_TOKEN 182 | elif token_type == END_JSON: 183 | return obj 184 | else: 185 | raise ParseException('Unexpected token at position %d' % cls.tokens.get_cursor_position()) 186 | 187 | raise ParseException('Illegal token at position %d' % cls.tokens.get_cursor_position()) 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JsonParser 2 | A JSON Parser written in Python3 3 | 4 | # Usage 5 | ## Simple Usage 6 | ```python 7 | import JsonParser 8 | data = JsonParser.parse(raw_data) 9 | ``` 10 | 11 | ## Get JSONObject or JSONArray 12 | This lib provides a custom data type(JSONArray and JSONObject). You can make the parser return this data type by: 13 | ```python 14 | data = JsonParser.parse(raw_data, use_python_data=False) 15 | ``` 16 | 17 | ## Get Prettified Str 18 | ```python 19 | ret = JsonParser.prettify(json_string) 20 | ``` 21 | 22 | --- 23 | 24 | # ~~Usage~~(Deprecated) 25 | 26 | ## Simple Usage 27 | - ~~The major function is in `parser/Parser`.~~ 28 | ```python 29 | from parser.Parser import * 30 | result = Parser.parse(raw_data) 31 | ``` 32 | 33 | ## Generate Tokens 34 | Use `tokenizer/Tokenizer` and `tokenizer/Readers` to generate **Tokens** 35 | ```python 36 | from tokenizer.Reader import * 37 | from tokenizer.Tokenizer import * 38 | from parser.Parser import * 39 | 40 | reader r = Reader(raw_data) 41 | tokens = Tokenizer(r).getTokens() 42 | tokenList = tokens.tokenList 43 | 44 | for i in tokenList : 45 | print(i.getType(), i.value) 46 | 47 | result = Parser.parse(token_list) 48 | ``` 49 | 50 | ## Access entries 51 | The method of accessing elements in `JSONArray` and `JSONObject` is the same as that of using native module(json). 52 | ```python 53 | result = Parser.parse(raw_data) 54 | result[KEY] # For JSONObject 55 | result[INDEX] # For JSONArray 56 | ``` 57 | Also, `JSONArray` supports foreach loop. 58 | 59 | ## Get Python Data 60 | The `JSONObject` and `JSONArray` support getting a python data `dict` and `list`. 61 | ```python 62 | python_data = json_data.to_python() 63 | ``` 64 | or 65 | ```python 66 | python_data = Parser.parse(raw_data, True) 67 | ``` 68 | 69 | # Issues 70 | - ~~Cannot deal with HTML tags(Tags will cause parser error)~~ =fixed -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'parse', 'prettify', 3 | ] 4 | 5 | __author__ = 'Mike He ' 6 | 7 | from .Parser import Parser 8 | from .models.JsonObject import JSONObject 9 | from .models.JsonArray import JSONArray 10 | 11 | 12 | def parse(raw_data, use_python_data=True): 13 | return Parser.parse(raw_data, use_python_data) 14 | 15 | 16 | def prettify(data): 17 | ret = Parser.parse(data, False) 18 | from .util.Stringify import to_string 19 | from .util.Stringify import array_to_string 20 | if isinstance(ret, JSONObject): 21 | return to_string(ret, 0) 22 | elif isinstance(ret, JSONArray): 23 | return array_to_string(ret, 0) 24 | -------------------------------------------------------------------------------- /exceptions/Exceptions.py: -------------------------------------------------------------------------------- 1 | class JsonTypeErrorException(Exception): 2 | def __init__(self, expected='', actual=''): 3 | Exception.__init__(self, "JsonTypeError: Expected %s, Actual %s" % (expected, actual)) 4 | 5 | 6 | class JSONObjectKeyError(Exception): 7 | def __init__(self, msg='KeyError'): 8 | Exception.__init__(self, msg) 9 | 10 | 11 | class ParseException(Exception): 12 | def __init__(self, msg='E'): 13 | msg_dict = { 14 | 'E': 'ParseError', 15 | 'T': 'TypeError', 16 | 'I': 'Illegal Character', 17 | 'T': 'Illegal Token', 18 | 'U': 'Unexpected Token', 19 | } 20 | Exception.__init__(self, msg_dict.get(msg, msg)) 21 | -------------------------------------------------------------------------------- /exceptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AD1024/JsonParser/bcffc25e930613ea9778fae7d8885cdfd9b31745/exceptions/__init__.py -------------------------------------------------------------------------------- /models/JsonArray.py: -------------------------------------------------------------------------------- 1 | from .JsonObject import JSONObject 2 | from ..exceptions.Exceptions import * 3 | from ..util.Stringify import * 4 | 5 | 6 | class JSONArray(list): 7 | def __init__(self): 8 | self.data = list() 9 | list.__init__(self.data) 10 | 11 | def size(self): 12 | return len(self.data) 13 | 14 | def get(self, i): 15 | return self.data[i] 16 | 17 | def append(self, i): 18 | self.data.append(i) 19 | 20 | def _parse_data(self, data): 21 | if type(data) == JSONObject: 22 | ret = {} 23 | for k, v in data.kvMap.items(): 24 | ret.update({k: self._parse_data(v)}) 25 | return ret 26 | elif type(data) == JSONArray: 27 | ret = [] 28 | for i in data.data: 29 | ret.append(self._parse_data(i)) 30 | return ret 31 | else: 32 | return data 33 | 34 | def to_python(self): 35 | return self._parse_data(self) 36 | 37 | def __getitem__(self, i): 38 | return self.data[i] 39 | 40 | def __setitem__(self, i, value): 41 | self.data[i] = value 42 | 43 | def get_json_object(self, i): 44 | ret = self.data[i] 45 | if isinstance(ret, JSONObject): 46 | return ret 47 | raise JsonTypeErrorException('JSONObject', str(type(ret))) 48 | 49 | def get_json_array(self, i): 50 | ret = self.data[i] 51 | if isinstance(ret, JSONArray): 52 | return ret 53 | raise JsonTypeErrorException('JSONArray', str(type(ret))) 54 | 55 | def set_data(self, data): 56 | if type(data) == list: 57 | self.data = data.copy() 58 | else: 59 | raise TypeError('expected list, actual %s' % str(type(data))) 60 | 61 | def __repr__(self): 62 | return array_to_string(self, 0) 63 | 64 | def __contains__(self, item): 65 | return self.data.__contains__(item) 66 | 67 | def __str__(self): 68 | return array_to_string(self, 0) 69 | 70 | def __iter__(self): 71 | return iter(self.data) 72 | 73 | def __eq__(self, array): 74 | if isinstance(array, JSONArray): 75 | if not self.size() == array.size(): 76 | return False 77 | for i in range(0, self.size()): 78 | if not self.get[i] == array.get(i): 79 | return False 80 | return True 81 | return False 82 | -------------------------------------------------------------------------------- /models/JsonObject.py: -------------------------------------------------------------------------------- 1 | from ..util.Stringify import to_string 2 | 3 | 4 | class JSONObject(dict): 5 | def __init__(self): 6 | self.kvMap = dict() 7 | dict.__init__(self.kvMap) 8 | 9 | def put(self, k, v): 10 | self.kvMap.update({k: v}) 11 | 12 | def get(self, k, default=None): 13 | return self.kvMap.get(k, default) 14 | 15 | def get_all(self): 16 | return list(self.kvMap.items()) 17 | 18 | def _parse_dict(self, data): 19 | from models.JsonArray import JSONArray 20 | if type(data) == JSONObject: 21 | ret = {} 22 | for k, v in data.kvMap.items(): 23 | ret.update({k: self._parse_dict(v)}) 24 | return ret 25 | elif type(data) == JSONArray: 26 | array = list() 27 | for i in data.data: 28 | array.append(self._parse_dict(i)) 29 | return array 30 | else: 31 | return data 32 | 33 | def to_python(self): 34 | return self._parse_dict(self) 35 | 36 | def items(self): 37 | return self.kvMap.items() 38 | 39 | def keys(self): 40 | return self.kvMap.keys() 41 | 42 | def update(self, t): 43 | self.kvMap.update(t) 44 | 45 | def set_data(self, data): 46 | if type(data) == dict: 47 | if len(list(filter(lambda x: type(x) == str, data.keys()))) == len(data.keys()): 48 | self.kvMap = data.copy() 49 | else: 50 | raise TypeError('Unexpected type(s) of key(s)') 51 | else: 52 | raise TypeError('expected dict, actual %s' % str(type(data))) 53 | 54 | def __repr__(self): 55 | return to_string(self, 0) 56 | 57 | def __contains__(self, item): 58 | return self.kvMap.__contains__(item) 59 | 60 | def __str__(self): 61 | return to_string(self, 0) 62 | 63 | def __getitem__(self, key): 64 | return self.kvMap.get(key, None) 65 | 66 | def __setitem__(self, key, value): 67 | self.kvMap[key] = value 68 | 69 | def __iter__(self): 70 | return iter(self.kvMap) 71 | 72 | def __eq__(self, obj): 73 | if isinstance(obj, JSONObject): 74 | return self.kvMap == obj.kvMap 75 | return False 76 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AD1024/JsonParser/bcffc25e930613ea9778fae7d8885cdfd9b31745/models/__init__.py -------------------------------------------------------------------------------- /tokenizer/Readers.py: -------------------------------------------------------------------------------- 1 | class Reader(object): 2 | def __init__(self, string=''): 3 | self.data = string 4 | self.cursor = 0 5 | 6 | def size(self): 7 | return len(self.data) 8 | 9 | def read(self, size): 10 | """ 11 | Read a certain length of data 12 | :param size: the length expected to get 13 | :return: str: if size is greater than 14 | the remaining data, all the data will be returned, otherwise a string of length size will be returned 15 | """ 16 | if self.cursor == self.size(): 17 | return None 18 | cur = self.cursor 19 | if cur + size + 1 < self.size(): 20 | ret = self.data[cur:cur + size + 1] 21 | cur += size + 1 22 | else: 23 | ret = self.data[cur:] 24 | cur = self.size() 25 | self.cursor = cur 26 | return ret 27 | 28 | 29 | class PosReader(object): 30 | """ 31 | Read only one character in each query 32 | """ 33 | 34 | def __init__(self, reader): 35 | self.reader = reader 36 | self.data = '' 37 | self.cursor = 0 38 | self._BUFFER_SIZE = 1024 39 | self.request_data() 40 | 41 | # Request new data 42 | def request_data(self): 43 | """ 44 | Request data from the ``Reader`` 45 | :return: None 46 | """ 47 | tmp = self.reader.read(self._BUFFER_SIZE) 48 | if tmp: 49 | self.data = tmp 50 | self.cursor = 0 51 | 52 | def current_pos(self): 53 | """ 54 | Read the character at current position of the cursor 55 | :return: A single character or None if the cursor exceeds the maximum index 56 | """ 57 | if self.cursor - 1 >= len(self.data): 58 | return None 59 | return self.data[max(0, self.cursor - 1)] 60 | 61 | def next_pos(self): 62 | """ 63 | Move the cursor to the next position and then return the data cursor points to 64 | :return: A single character or None if the cursor exceeds the maximum index 65 | """ 66 | if self.has_next(): 67 | ret = self.data[self.cursor] 68 | self.cursor += 1 69 | return ret 70 | return None 71 | 72 | def prev_pos(self): 73 | """ 74 | Move the cursor to previous position or do nothing if current position is 0 75 | :return: None 76 | """ 77 | self.cursor -= 1 78 | self.cursor = max(self.cursor, 0) 79 | 80 | def has_next(self): 81 | """ 82 | Check whether there is remaining data. 83 | If the cursor has reached the end, it will request new data from Reader. 84 | :return: True if there is remaining data either in ``PosReader`` or ``Reader`` 85 | """ 86 | if self.cursor >= len(self.data): 87 | self.request_data() 88 | if self.cursor > 0: 89 | return False 90 | return True 91 | return True 92 | -------------------------------------------------------------------------------- /tokenizer/Token.py: -------------------------------------------------------------------------------- 1 | class Token(object): 2 | def __init__(self, token_type, value): 3 | self.tokenType = token_type 4 | self.value = value 5 | 6 | def get_type(self): 7 | return self.tokenType 8 | 9 | def get_value(self): 10 | return self.value 11 | 12 | def set_value(self, v): 13 | self.value = v 14 | 15 | def __str__(self): 16 | return 'Type:' + str(self.tokenType) + ' Value: ' + str(self.value) 17 | -------------------------------------------------------------------------------- /tokenizer/TokenEnum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TokenEnum(Enum): 5 | # Signal token 6 | BEGIN_OBJECT = 1 7 | BEGIN_ARRAY = 2 8 | END_OBJECT = 4 9 | END_ARRAY = 8 10 | 11 | # variable token 12 | NULL = 16 13 | NUMBER = 32 14 | STRING = 64 15 | BOOL = 128 16 | 17 | # separator token 18 | COLON = 256 19 | COMMA = 512 20 | 21 | # end signal 22 | END_JSON = 65536 23 | -------------------------------------------------------------------------------- /tokenizer/TokenList.py: -------------------------------------------------------------------------------- 1 | class TokenList(object): 2 | def __init__(self): 3 | self.tokenList = list() 4 | self.cursor = 0 5 | 6 | def get_cursor_position(self): 7 | return self.cursor 8 | 9 | def append(self, token): 10 | self.tokenList.append(token) 11 | 12 | def next(self): 13 | ret = self.tokenList[self.cursor] 14 | self.cursor += 1 15 | return ret 16 | 17 | def has_next(self): 18 | return self.cursor < len(self.tokenList) 19 | 20 | def current_token(self): 21 | if self.has_next(): 22 | return self.tokenList[self.cursor] 23 | else: 24 | return None 25 | 26 | def prev_token(self, gen): 27 | if self.cursor == 0: 28 | return None 29 | return self.tokenList[self.cursor - gen] 30 | -------------------------------------------------------------------------------- /tokenizer/Tokenizer.py: -------------------------------------------------------------------------------- 1 | from ..exceptions.Exceptions import * 2 | from .Readers import * 3 | from .Token import * 4 | from .TokenEnum import * 5 | from .TokenList import * 6 | 7 | 8 | class Tokenizer(object): 9 | def __init__(self, reader): 10 | self.ch = '' 11 | self.reader = PosReader(reader) 12 | self.tokenList = TokenList() 13 | self.tokenize() 14 | 15 | def get_tokens(self): 16 | return self.tokenList 17 | 18 | def tokenize(self): 19 | tk = self.parse() 20 | self.tokenList.append(tk) 21 | while tk.get_type() != TokenEnum.END_JSON: 22 | tk = self.parse() 23 | self.tokenList.append(tk) 24 | 25 | def parse(self): 26 | """Parse a string sequence""" 27 | self.ch = '' 28 | 29 | def is_space(char): 30 | return char in ('\n', '\t', '\r', ' ', '') 31 | 32 | def is_hex(char): 33 | return char.isdigit() or (ord(char) in range(ord('a'), ord('g'))) \ 34 | or (ord(char) in range(ord('A'), ord('G'))) 35 | 36 | def is_exp(char): 37 | return char in ('e', 'E') 38 | 39 | def read(): 40 | self.ch = self.reader.next_pos() 41 | 42 | def read_null(): 43 | rem = self.reader.next_pos() + self.reader.next_pos() + self.reader.next_pos() 44 | if rem.lower() != 'ull': 45 | raise JsonTypeErrorException('null', 'n%s' % rem) 46 | else: 47 | return Token(TokenEnum.NULL, 'null') 48 | 49 | def read_bool(s): 50 | s = s.lower() 51 | if s == 't': 52 | rem = self.reader.next_pos() + self.reader.next_pos() + self.reader.next_pos() 53 | else: 54 | rem = self.reader.next_pos() + self.reader.next_pos() + self.reader.next_pos() + self.reader.next_pos() 55 | if rem.lower() != {'t': 'rue', 'f': 'alse'}[s]: 56 | raise JsonTypeErrorException({'t': 'true', 'f': 'false'}[s], s + rem) 57 | else: 58 | return Token(TokenEnum.BOOL, s + rem) 59 | 60 | def read_str(): 61 | ret = '' 62 | while 1: 63 | read() 64 | if self.ch == '\\': 65 | read() 66 | ''' 67 | Temporarily remove escape test 68 | if isEscape(self.ch) : 69 | raise ParseException('I') 70 | ''' 71 | ret += '\\' 72 | self.ch = self.reader.current_pos() 73 | ret += self.ch 74 | if self.ch == 'u': 75 | for i in (1, 2, 3, 4): 76 | read() 77 | if is_hex(self.ch): 78 | ret += self.ch 79 | else: 80 | raise ParseException('I') 81 | ret = str(ret) 82 | elif self.ch == '"': 83 | tmp = self.reader.next_pos() 84 | if tmp not in (']', '}', ',', ':', ' '): 85 | self.ch += tmp 86 | self.reader.prev_pos() 87 | else: 88 | self.reader.prev_pos() 89 | return Token(TokenEnum.STRING, str(ret)) 90 | elif self.ch == '\n' or self.ch == '\r': 91 | ret += str(self.ch) 92 | else: 93 | ret += str(self.ch) 94 | 95 | def read_exp(): 96 | """ 97 | Read an exp form of number 98 | :return: 99 | """ 100 | ret = '' 101 | read() 102 | if self.ch == '+' or self.ch == '-': 103 | '''deal with numbers like 1e+3, 1e-10''' 104 | ret += self.ch 105 | read() 106 | if not self.ch.isdigit(): 107 | raise ParseException('E') 108 | ret += self.ch 109 | read() 110 | while self.ch.isdigit(): 111 | ret += self.ch 112 | read() 113 | if self.ch: 114 | ''' 115 | If the next character of the last of the number is not 116 | the end signal, the cursor should be moved one step back 117 | ''' 118 | self.reader.prev_pos() 119 | else: 120 | '''deal with numbers like 1e10''' 121 | while self.ch.isdigit(): 122 | ret += self.ch 123 | read() 124 | if self.ch: 125 | self.reader.prev_pos() 126 | JsonTypeErrorException('e or E', self.ch) 127 | return ret 128 | 129 | def read_others(): 130 | """ 131 | Read other forms of values 132 | :return: 133 | """ 134 | ret = '' 135 | prev_zero = self.ch == '0' 136 | read() 137 | if self.ch == '.': 138 | '''decimals''' 139 | ret += self.ch 140 | read() 141 | if not self.ch.isdigit(): 142 | raise ParseException('I') 143 | ret += self.ch 144 | read() 145 | while self.ch.isdigit(): 146 | ret += self.ch 147 | read() 148 | if is_exp(self.ch): 149 | ret += self.ch 150 | ret += read_exp() 151 | else: 152 | if self.ch: 153 | self.reader.prev_pos() 154 | elif is_exp(self.ch): 155 | '''exp numbers''' 156 | ret += self.ch 157 | ret += read_exp() 158 | elif self.ch.lower() == 'x' and prev_zero: 159 | '''hex numbers''' 160 | ret += self.ch 161 | read() 162 | while is_hex(self.ch): 163 | ret += self.ch 164 | read() 165 | if self.ch: 166 | self.reader.prev_pos() 167 | else: 168 | self.reader.prev_pos() 169 | return ret 170 | 171 | def read_num(): 172 | ret = '' 173 | if self.ch == '-': 174 | ret += self.ch 175 | read() 176 | if self.ch == '0': 177 | ret += self.ch 178 | ret += read_others() 179 | elif ord(self.ch) in range(ord('1'), ord('9')+1): 180 | ret += self.ch 181 | read() 182 | while self.ch and self.ch.isdigit(): 183 | ret += self.ch 184 | read() 185 | if self.ch: 186 | self.reader.prev_pos() 187 | ret += read_others() 188 | else: 189 | raise ParseException('I') 190 | elif self.ch == '0': 191 | ret += '0' 192 | ret += read_others() 193 | else: 194 | ret += self.ch 195 | read() 196 | while self.ch and self.ch.isdigit(): 197 | ret += self.ch 198 | read() 199 | if self.ch: 200 | self.reader.prev_pos() 201 | ret += read_others() 202 | return Token(TokenEnum.NUMBER, ret) 203 | 204 | while True: 205 | if self.reader.has_next(): 206 | self.ch = self.reader.next_pos() 207 | if not is_space(self.ch): 208 | break 209 | else: 210 | return Token(TokenEnum.END_JSON, None) 211 | if self.ch == '{': 212 | return Token(TokenEnum.BEGIN_OBJECT, self.ch) 213 | elif self.ch == '}': 214 | return Token(TokenEnum.END_OBJECT, self.ch) 215 | elif self.ch == '[': 216 | return Token(TokenEnum.BEGIN_ARRAY, self.ch) 217 | elif self.ch == ']': 218 | return Token(TokenEnum.END_ARRAY, self.ch) 219 | elif self.ch == ',': 220 | return Token(TokenEnum.COMMA, self.ch) 221 | elif self.ch == ':': 222 | return Token(TokenEnum.COLON, self.ch) 223 | elif self.ch == 'n': 224 | return read_null() 225 | elif self.ch.lower() == 't' or self.ch.lower() == 'f': 226 | return read_bool(self.ch) 227 | elif self.ch == '"': 228 | return read_str() 229 | elif self.ch == '-': 230 | return read_num() 231 | if self.ch.isdigit(): 232 | return read_num() 233 | -------------------------------------------------------------------------------- /tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AD1024/JsonParser/bcffc25e930613ea9778fae7d8885cdfd9b31745/tokenizer/__init__.py -------------------------------------------------------------------------------- /util/Stringify.py: -------------------------------------------------------------------------------- 1 | def get_indent(dep): 2 | ret = '' 3 | for i in range(0, dep * 2): 4 | ret += ' ' 5 | return ret 6 | 7 | 8 | def array_to_string(array, dep): 9 | from ..models.JsonArray import JSONArray 10 | from ..models.JsonObject import JSONObject 11 | ret = get_indent(dep) 12 | ret += '[' 13 | dep += 1 14 | for i in range(0, array.size()): 15 | ret += '\n' 16 | item = array.get(i) 17 | if isinstance(item, JSONArray): 18 | ret += array_to_string(item, dep + 1) 19 | elif isinstance(item, JSONObject): 20 | ret += to_string(item, dep + 1) 21 | elif isinstance(item, str): 22 | ret += get_indent(dep) 23 | ret += '\"' + item + '\"' 24 | else: 25 | ret += get_indent(dep) 26 | ret += str(item) 27 | if i < array.size() - 1: 28 | ret += ',' 29 | dep -= 1 30 | ret += '\n' 31 | ret += get_indent(dep) 32 | ret += ']' 33 | return ret 34 | 35 | 36 | def to_string(obj, dep): 37 | from ..models.JsonObject import JSONObject 38 | from ..models.JsonArray import JSONArray 39 | ''' 40 | Stringify json data 41 | ''' 42 | ret = get_indent(dep) 43 | ret += '{' 44 | dep += 1 45 | kv_map = obj.get_all() 46 | for i in range(0, len(kv_map)): 47 | k = kv_map[i][0] 48 | v = kv_map[i][1] 49 | ret += '\n' 50 | ret += get_indent(dep) 51 | ret += '\"' 52 | ret += k 53 | ret += '\" : ' 54 | if isinstance(v, JSONObject): 55 | ret += '\n' 56 | ret += to_string(v, dep + 1) 57 | elif isinstance(v, JSONArray): 58 | ret += '\n' 59 | ret += array_to_string(v, dep + 1) 60 | elif isinstance(v, str): 61 | ret += '\"' + v + '\"' 62 | else: 63 | ret += str(v) 64 | 65 | if i < len(kv_map) - 1: 66 | ret += ',' 67 | 68 | dep -= 1 69 | ret += '\n' + get_indent(dep) 70 | ret += '}' 71 | 72 | return ret 73 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AD1024/JsonParser/bcffc25e930613ea9778fae7d8885cdfd9b31745/util/__init__.py --------------------------------------------------------------------------------