├── .gitignore ├── test.v ├── main.py ├── LICENSE ├── README.md ├── v.lark └── vork ├── tokenizer.py ├── parser.py └── ast.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | *.out 4 | *.cache -------------------------------------------------------------------------------- /test.v: -------------------------------------------------------------------------------- 1 | fn C.test() ?int 2 | 3 | fn test() int { 4 | return C.test() or { return 0 } 5 | } 6 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from vork.tokenizer import * 2 | from vork.parser import * 3 | 4 | 5 | def main(): 6 | workspace = Workspace([]) 7 | workspace.load_main('./') 8 | print(workspace.load_module('main')) 9 | 10 | 11 | if __name__ == '__main__': 12 | main() 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Itay Almog 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vork 2 | 3 | Vork will eventually be a fully fledged V implementation. 4 | 5 | Right now it is just a parser, once I get a full parser then I will start working on code gen. 6 | 7 | ## Example 8 | for now all that it does is read in a file and try to parse it. Right now there is no error 9 | recovery on a syntax error, but there is a nice printout so you can hopefully understand what went wrong. 10 | 11 | The parse output will be printed and is formated in a lisp like way 12 | 13 | ```v 14 | fn fib(n int) int { 15 | if n <= 1 { 16 | return n 17 | } 18 | return fib(n - 1) + fib(n - 2) 19 | } 20 | 21 | fn main() { 22 | i := 0 23 | for i = 0; i < 10; ++i { 24 | println(fib(i)) 25 | } 26 | } 27 | ``` 28 | 29 | ```lisp 30 | (func fib ((n int)) int 31 | (block 32 | (if (<= n 1) 33 | (block 34 | (return n))) 35 | (return (+ (call fib ((- n 1))) (call fib ((- n 2))))))) 36 | (func main () 37 | (block 38 | (var (i) 0) 39 | (for (= i 0) (< i 10) (prefix ++ i) 40 | (block 41 | (call println ((call fib (i)))))))) 42 | ``` 43 | 44 | ## Formal grammar 45 | The [lark](v.lark) file has an old formal grammar I defined which I am going to keep as a reference, maybe after finishing the hand written parser I will go back and rewrite the formal grammar to be updated. 46 | 47 | hopefully by the done I am finished with the parser the (official) formal grammar will be out already :shrug: 48 | 49 | ## Implemented 50 | * All binary and unary expressions 51 | * post fix operators are not added to the ast yet 52 | * Function calls can not take `mut` modifier to expression for now 53 | * Almost full type parsing support 54 | * missing function types 55 | * Functions, methods and interop functions 56 | * missing generics 57 | * missing multiple return value 58 | * missing auto wrapping of optional values 59 | * Module and Imports 60 | * Structs with their access modifiers 61 | * missing the base type 62 | * missing generics 63 | * asserts 64 | * if\if else\else 65 | * Most of the for loops 66 | * in c like for loops can not declare a variable at the start... 67 | * missing for with only condition (`for true`) 68 | * Constants 69 | * Variable declarations 70 | * Enums declarations 71 | * Integer, Float and arrays literals 72 | * or statement 73 | * Super basic type checking 74 | * missing mut checks 75 | * missing unsafe checks 76 | * missing access checks (pub) 77 | * missing implicit enum 78 | 79 | ## Missing 80 | * string and map 81 | * interfaces 82 | * match 83 | * go statement 84 | * attributes 85 | * compile time if 86 | 87 | ## Problems 88 | Right now the parser ignores new lines **completely**, that is because from what I could see the official V compiler also does that, but in an inconsistent way... sometimes it ignores it and sometimes not... 89 | 90 | for the most part it is not actually a problem, but specifically for the `*` operator it makes a problem, because it is used both for deref and for multipication 91 | ```v 92 | a := 123 93 | b := &a 94 | *b = 456 95 | ``` 96 | will not give the correct output! 97 | 98 | the simplest way to get around it for now is to simply seround it with a block 99 | ```v 100 | a := 123 101 | b := &a 102 | {*b = 456} 103 | ``` 104 | 105 | but the real solution is to wait for a formal grammar and see how newlines should actually be handled. 106 | 107 | note that this problem happens on any operator which may be used in both unary and binary way, including `-` and alike... -------------------------------------------------------------------------------- /v.lark: -------------------------------------------------------------------------------- 1 | /////////////////////////////////////////////////// 2 | // V Grammar 3 | /////////////////////////////////////////////////// 4 | // This is the grammar used by the parser to parse 5 | // the V code. I am not sure how close it is to 6 | // the official V compiler but that is what made 7 | // the most sense to me 8 | /////////////////////////////////////////////////// 9 | 10 | /////////////////////////////////////////////////// 11 | // module scope stuff 12 | /////////////////////////////////////////////////// 13 | 14 | start: _module_item* 15 | 16 | _module_item: fn_decl 17 | | method_decl 18 | | struct_decl 19 | | module_decl 20 | | import_decl 21 | | interop_fn_decl 22 | | const_decl 23 | 24 | // Misc 25 | module_decl: "module" NAME ("." NAME)* 26 | import_decl: "import" NAME ("." NAME)* 27 | 28 | // Function declaration 29 | fn_decl: maybe_pub "fn" NAME "(" fn_params ")" fn_return stmt_list 30 | interop_fn_decl: "fn" NAME "." NAME "(" fn_params ")" fn_return 31 | fn_params: [fn_param ("," fn_param)*] 32 | fn_param: NAME maybe_mut type_decl | NAME 33 | fn_return: [type_decl | "(" type_decl ("," type_decl)+ ")"] 34 | 35 | // Struct declaration 36 | struct_decl: maybe_pub "struct" NAME "{" embedded_struct_field struct_fields "}" 37 | struct_fields: (struct_field | struct_access_mod ":")* 38 | struct_field: NAME type_decl 39 | !struct_access_mod: "mut" 40 | | "pub" 41 | | "pub" "mut" 42 | | "pub" "mut" "mut" 43 | embedded_struct_field: (maybe_mut NAME)? 44 | 45 | // Method declaration 46 | method_decl: maybe_pub "fn" "(" NAME maybe_mut type_decl ")" NAME "(" fn_params ")" fn_return stmt_list 47 | 48 | // const declaration 49 | const_decl: "const" "(" (const_item)+ ")" 50 | | "const" const_item 51 | const_item: NAME "=" _expr 52 | 53 | 54 | /////////////////////////////////////////////////// 55 | // Statements 56 | /////////////////////////////////////////////////// 57 | 58 | ?stmt: "return" [_expr ("," _expr)*] -> stmt_return 59 | | "assert" _expr -> stmt_assert 60 | | "for" NAME "in" _expr stmt_list -> stmt_foreach 61 | | "for" NAME "," NAME "in" _expr stmt_list -> stmt_foreach_indexed 62 | | "for" stmt_list-> stmt_forever 63 | | "for" (maybe_var_decl|stmt_assignment) ";" _expr ";" (_expr | stmt_assignment) stmt_list -> stmt_for 64 | | "break" -> stmt_break 65 | | "continue" -> stmt_continue 66 | | stmt_var_decl 67 | | stmt_assignment 68 | | stmt_if 69 | | (expr_fn_call) -> stmt_expr 70 | 71 | stmt_var_decl: var_decl_vars ":=" _expr 72 | !maybe_var_decl: stmt_var_decl? 73 | 74 | ?stmt_assignment: (ident|expr_member_access|expr_index) _op_assignment _expr -> stmt_assign 75 | !_op_assignment: ("+="|"-="|"*="|"/="|"%="|"<<="|">>="|"|="|"&="|"^="|"=") 76 | 77 | stmt_if: "if" _expr stmt_list (stmt_else |) 78 | ?stmt_else: "else" stmt_list -> stmt_else 79 | | "else" stmt_if -> stmt_else_if 80 | 81 | var_decl: maybe_mut NAME 82 | var_decl_vars: var_decl ("," var_decl)* 83 | 84 | stmt_list: "{" stmt* "}" 85 | 86 | /////////////////////////////////////////////////// 87 | // Expressions 88 | /////////////////////////////////////////////////// 89 | 90 | //************************************ 91 | // the full expression precendence 92 | //************************************ 93 | 94 | _expr: expr_logical_or 95 | 96 | ?expr_logical_or: expr_logical_and (_op_logical_or expr_logical_and)* -> expr_binary 97 | ?expr_logical_and: expr_bitwise_or (_op_logical_and expr_bitwise_or)* -> expr_binary 98 | ?expr_bitwise_or: expr_bitwise_xor (_op_bitwise_or expr_bitwise_xor)* -> expr_binary 99 | ?expr_bitwise_xor: expr_bitwise_and (_op_bitwise_xor expr_bitwise_and)* -> expr_binary 100 | ?expr_bitwise_and: expr_equality (_op_bitwise_and expr_equality)* -> expr_binary 101 | ?expr_equality: expr_relational (_op_equality expr_relational)* -> expr_binary 102 | ?expr_relational: expr_shift (_op_relational expr_shift)* -> expr_binary 103 | ?expr_shift: expr_additive (_op_shift expr_additive)* -> expr_binary 104 | ?expr_additive: expr_multiplicative (_op_additive expr_multiplicative)* -> expr_binary 105 | ?expr_multiplicative: expr_unary (_op_multiplicative expr_unary)* -> expr_binary 106 | 107 | ?expr_unary: _op_unary expr_fix -> expr_unary 108 | | expr_fix 109 | 110 | ?expr_fix: (expr_member_access|expr_index|ident) _op_postfix -> expr_postfix 111 | | _op_prefix (expr_member_access|expr_index|ident) -> expr_prefix 112 | | expr_others 113 | 114 | // all the operators 115 | !_op_postfix: ("++"|"--") 116 | !_op_prefix: ("++"|"--") 117 | !_op_unary: ("-"|"!"|"~"|"&"|"*") 118 | !_op_multiplicative: ("*"|"/"|"%") 119 | !_op_additive: ("+"|"-") 120 | !_op_shift: ("<<"|">>") 121 | !_op_relational: (">"|">="|"<="|"<") 122 | !_op_equality: ("=="|"!=") 123 | !_op_bitwise_xor: "^" 124 | !_op_bitwise_and: "&" 125 | !_op_bitwise_or: "|" 126 | !_op_logical_and: "&&" 127 | !_op_logical_or: "||" 128 | 129 | ?expr_others: expr_fn_call 130 | | expr_member_access 131 | | expr_index 132 | | expr_literals 133 | 134 | expr_fn_call: _expr "(" (maybe_mut _expr ("," maybe_mut _expr)*)? ")" 135 | expr_member_access: _expr "." NAME 136 | expr_index: _expr "[" _expr "]" 137 | 138 | ?expr_literals: "(" _expr ")" 139 | | "none" -> const_none 140 | | "false" -> const_false 141 | | "true" -> const_true 142 | | string 143 | | number 144 | | float 145 | | ident 146 | | struct_literal 147 | | struct_literal_named 148 | | array_literal 149 | | array_literal_uninit 150 | 151 | /////////////////////////////////////////////////// 152 | // Literals 153 | /////////////////////////////////////////////////// 154 | 155 | // Struct literal types 156 | // MyStruct{1,2,3} 157 | // MyStruct{a: 1, b: 2, c: 3} 158 | struct_literal: maybe_ref module_path_ident "{" (_expr ("," _expr )*)? "}" 159 | struct_literal_named: maybe_ref module_path_ident "{" struct_literal_named_item (struct_literal_named_item)* "}" 160 | struct_literal_named_item: NAME ":" _expr 161 | 162 | // array literal 163 | array_literal: "[" _expr ("," _expr)* "]" 164 | array_literal_uninit: "[" _expr "]" type_decl 165 | 166 | number: ZERO | DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | BIN_NUMBER 167 | float: FLOAT 168 | string: STRING 169 | ident: NAME 170 | module_path_ident: NAME ("." NAME)* 171 | 172 | /////////////////////////////////////////////////// 173 | // type declarations 174 | /////////////////////////////////////////////////// 175 | 176 | ?type_decl: type_ident 177 | | type_array 178 | | type_map 179 | | type_ref 180 | | type_opt 181 | 182 | type_array: "[" "]" type_decl 183 | type_map: "map" "[" type_decl "]" type_decl 184 | type_ref: "&" type_decl 185 | type_opt: "?" type_decl 186 | type_ident: NAME 187 | 188 | !maybe_pub: "pub"? 189 | !maybe_mut: "mut"? 190 | !maybe_ref: "&"? 191 | 192 | /////////////////////////////////////////////////// 193 | // basic stuff 194 | /////////////////////////////////////////////////// 195 | 196 | STRING : /[ubf]?r?("(?!"").*?(? FLOAT 205 | %import common.CNAME -> NAME 206 | 207 | %ignore /([\t \f\n\r]+)/ 208 | -------------------------------------------------------------------------------- /vork/tokenizer.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class UnknownCharacter(Exception): 5 | pass 6 | 7 | 8 | class CodePosition: 9 | 10 | def __init__(self, start_line, end_line, start_column, end_column): 11 | self.start_line = start_line 12 | self.end_line = end_line 13 | self.start_column = start_column 14 | self.end_column = end_column 15 | 16 | 17 | class Token: 18 | 19 | def __init__(self, pos: CodePosition): 20 | self.pos = pos 21 | 22 | def __repr__(self): 23 | raise NotImplementedError 24 | 25 | 26 | class EofToken(Token): 27 | 28 | def __init__(self): 29 | super(EofToken, self).__init__(None) 30 | 31 | def __repr__(self): 32 | return f'' 33 | 34 | 35 | class IntToken(Token): 36 | 37 | def __init__(self, pos: CodePosition, value: int): 38 | super(IntToken, self).__init__(pos) 39 | self.value = value 40 | 41 | def __repr__(self): 42 | return f'' 43 | 44 | 45 | class FloatToken(Token): 46 | 47 | def __init__(self, pos: CodePosition, value: float): 48 | super(FloatToken, self).__init__(pos) 49 | self.value = value 50 | 51 | def __repr__(self): 52 | return f'' 53 | 54 | 55 | class IdentToken(Token): 56 | 57 | def __init__(self, pos: CodePosition, value: str): 58 | super(IdentToken, self).__init__(pos) 59 | self.value = value 60 | 61 | def __repr__(self): 62 | return f'' 63 | 64 | 65 | class KeywordToken(Token): 66 | 67 | def __init__(self, pos: CodePosition, value: str): 68 | super(KeywordToken, self).__init__(pos) 69 | self.value = value 70 | 71 | def __repr__(self): 72 | return f'' 73 | 74 | 75 | class SymbolToken(Token): 76 | 77 | def __init__(self, pos: CodePosition, value: str): 78 | super(SymbolToken, self).__init__(pos) 79 | self.value = value 80 | 81 | def __repr__(self): 82 | return f'' 83 | 84 | 85 | class Tokenizer: 86 | 87 | def __init__(self, stream: str): 88 | self.stream = stream 89 | self.line = 0 90 | self.column = 0 91 | self.token = Token(None) 92 | 93 | self.before = [] 94 | self.pushes = [] 95 | 96 | def _inc_stream(self, times=1): 97 | # Increment the first thing 98 | while times > 0: 99 | was = self.stream[0] 100 | self.stream = self.stream[1:] 101 | if was == '\n': 102 | self.line += 1 103 | self.column = 0 104 | else: 105 | self.column += 1 106 | times -= 1 107 | 108 | def push(self): 109 | self.pushes.append([self.token]) 110 | 111 | def pop(self): 112 | """ 113 | Will take all the parsed elements since the push and 114 | add them back 115 | """ 116 | items = self.pushes.pop() 117 | self.token = items[0] 118 | self.before = items[1:] + self.before 119 | 120 | def discard(self): 121 | """ 122 | Remove the last push 123 | """ 124 | self.pushes.pop() 125 | 126 | def is_token(self, kind) -> bool: 127 | if isinstance(kind, str): 128 | return isinstance(self.token, SymbolToken) and self.token.value == kind 129 | elif isinstance(self.token, kind): 130 | return True 131 | 132 | def is_keyword(self, ident) -> bool: 133 | if self.is_token(KeywordToken) and self.token.value == ident: 134 | return True 135 | else: 136 | return False 137 | 138 | def match_keyword(self, ident) -> bool: 139 | if self.is_keyword(ident): 140 | self.next_token() 141 | return True 142 | else: 143 | return False 144 | 145 | def match_token(self, kind) -> bool: 146 | if self.is_token(kind): 147 | self.next_token() 148 | return True 149 | else: 150 | return False 151 | 152 | def expect_token(self, kind): 153 | if self.is_token(kind): 154 | self.next_token() 155 | else: 156 | assert False, kind 157 | 158 | def expect_keyword(self, kind): 159 | if self.is_keyword(kind): 160 | self.next_token() 161 | else: 162 | assert False, kind 163 | 164 | def next_token(self): 165 | # We have items that have been saved 166 | if len(self.before) != 0: 167 | self.token = self.before[0] 168 | self.before = self.before[1:] 169 | 170 | # Parse a new item 171 | else: 172 | # Clear unneeded stuff 173 | while True: 174 | # Consume spaces 175 | if len(self.stream) > 0 and self.stream[0].isspace(): 176 | self._inc_stream() 177 | 178 | # Consume multiline comment 179 | elif len(self.stream) > 2 and self.stream[:2] == '/*': 180 | self._inc_stream(2) 181 | nesting = 1 182 | while nesting > 0 and len(self.stream) > 0: 183 | # Nested comment 184 | if len(self.stream) > 2 and self.stream[:2] == '/*': 185 | nesting += 1 186 | self._inc_stream(2) 187 | elif len(self.stream) > 1 and self.stream[:2] == '*/': 188 | nesting -= 1 189 | self._inc_stream(2) 190 | self._inc_stream() 191 | 192 | # Consume one line comments 193 | elif len(self.stream) > 2 and self.stream[:2] == '//': 194 | self._inc_stream(2) 195 | while len(self.stream) > 0: 196 | if self.stream[0] == '\n': 197 | self._inc_stream() 198 | break 199 | self._inc_stream() 200 | 201 | # Nothing left to clear 202 | else: 203 | break 204 | 205 | pos = CodePosition(self.line, self.line, self.column, self.column) 206 | 207 | # End of file 208 | if len(self.stream) == 0: 209 | self.token = EofToken() 210 | 211 | # Integers 212 | elif self.stream[0].isdigit(): 213 | # Figure the base 214 | base = 10 215 | chars = '0123456789' 216 | if self.stream[0] == '0' and len(self.stream) > 3: 217 | if self.stream[1].lower() == 'x': 218 | base = 16 219 | chars = '0123456789abcdefABCDEF' 220 | self._inc_stream(2) 221 | elif self.stream[1].lower() == 'b': 222 | base = 2 223 | chars = '01' 224 | self._inc_stream(2) 225 | 226 | # TODO: octal numbers 227 | 228 | # Get the value and parse it 229 | value = '' 230 | while len(self.stream) > 0 and self.stream[0] in chars: 231 | value += self.stream[0] 232 | self._inc_stream() 233 | 234 | # Check if this will actually be a float 235 | # Only if the base is 10 236 | # TODO: more complete float expressions with e or whatever 237 | if base == 10 and len(self.stream) > 1 and self.stream[0] == '.' and self.stream[1].isdigit(): 238 | self._inc_stream() 239 | after_dot = '' 240 | while len(self.stream) > 0 and self.stream[0].isdigit(): 241 | after_dot += self.stream[0] 242 | self._inc_stream() 243 | self.token = FloatToken(pos, float(value + '.' + after_dot)) 244 | else: 245 | self.token = IntToken(pos, int(value, base)) 246 | 247 | # Identifier token or keywords 248 | elif self.stream[0].isalpha() or self.stream[0] == '_': 249 | value = '' 250 | while len(self.stream) > 0 and (self.stream[0].isalnum() or self.stream[0] == '_'): 251 | value += self.stream[0] 252 | self._inc_stream() 253 | 254 | # Check if a keyword 255 | if value in [ 256 | 'fn', 257 | 'pub', 258 | 'mut', 259 | '__global', 260 | 'if', 261 | 'else', 262 | 'assert', 263 | 'for', 264 | 'in', 265 | 'match', 266 | 'enum', 267 | 'struct', 268 | 'interface', 269 | 'return', 270 | 'const', 271 | 'module', 272 | 'import', 273 | 'defer', 274 | 'go', 275 | 'or', 276 | 'continue', 277 | 'break', 278 | 'goto', 279 | 'type', 280 | 'unsafe', 281 | ]: 282 | self.token = KeywordToken(pos, value) 283 | else: 284 | self.token = IdentToken(pos, value) 285 | 286 | # Special characters 287 | elif self.stream[0] in '()[]{};\'",.:/*-+!%&<>=~^|?': 288 | 289 | # Two character symbols 290 | if len(self.stream) > 1 and self.stream[:2] in [ 291 | '<<', 292 | '>>', 293 | '&&', 294 | '||', 295 | '!=', 296 | '==', 297 | '<=', 298 | '>=', 299 | '+=', 300 | '-=', 301 | '*=', 302 | '/=', 303 | '%=', 304 | '&=', 305 | '|=', 306 | '^=', 307 | '++', 308 | '--', 309 | ':=', 310 | '..', 311 | ]: 312 | self.token = SymbolToken(pos, self.stream[:2]) 313 | self._inc_stream(2) 314 | 315 | # Simple symbols 316 | else: 317 | self.token = SymbolToken(pos, self.stream[0]) 318 | self._inc_stream() 319 | 320 | # Unknown 321 | else: 322 | assert False, f'Unknown character {self.stream[0]}' 323 | 324 | pos.end_column = self.column 325 | pos.end_line = self.line 326 | 327 | # Check if need to add to current save 328 | if len(self.pushes) != 0: 329 | self.pushes[-1].append(self.token) 330 | 331 | return self.token 332 | 333 | -------------------------------------------------------------------------------- /vork/parser.py: -------------------------------------------------------------------------------- 1 | from vork.tokenizer import * 2 | from vork.ast import * 3 | 4 | 5 | class Parser: 6 | 7 | def __init__(self, tokenizer: Tokenizer): 8 | self.t = tokenizer 9 | self.t.next_token() 10 | 11 | self.frame = [] 12 | 13 | ################################################################################################################### 14 | # Expression parsing 15 | # 16 | # See https://www.tutorialspoint.com/go/go_operators_precedence.htm for the table that I used as reference 17 | ################################################################################################################### 18 | 19 | def _parse_literal(self, check_range=True): 20 | 21 | # Integer literal 22 | if self.t.is_token(IntToken): 23 | val = self.t.token.value 24 | self.t.next_token() 25 | return ExprIntegerLiteral(val) 26 | 27 | # Float literal 28 | elif self.t.is_token(FloatToken): 29 | val = self.t.token.value 30 | self.t.next_token() 31 | return ExprFloatLiteral(val) 32 | 33 | # Identifier 34 | elif self.t.is_token(IdentToken): 35 | val = self.t.token.value 36 | self.t.next_token() 37 | return ExprIdentifierLiteral(val) 38 | 39 | # Array literal 40 | elif self.t.match_token('['): 41 | exprs = [] 42 | while not self.t.match_token(']'): 43 | exprs.append(self.parse_expr()) 44 | # TODO: I remember there were some array attributes, will need to look it up 45 | return ExprArrayLiteral(exprs) 46 | 47 | # Parens 48 | elif self.t.match_token('('): 49 | expr = self.parse_expr() 50 | self.t.expect_token(')') 51 | return expr 52 | 53 | else: 54 | assert False, f'Unexpected token {self.t.token}' 55 | 56 | def _parse_postfix(self): 57 | expr = self._parse_literal() 58 | 59 | # Postfix operators 60 | if self.t.is_token('++') or self.t.is_token('--'): 61 | pass 62 | 63 | else: 64 | # Top level expressions 65 | while True: 66 | # Member access 67 | if self.t.match_token('.'): 68 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 69 | expr = ExprMemberAccess(expr, self.t.token.value) 70 | self.t.next_token() 71 | 72 | # Function call 73 | elif self.t.match_token('('): 74 | args = [] 75 | if not self.t.is_token(')'): 76 | args = [self.parse_expr()] 77 | while self.t.match_token(','): 78 | args.append(self.parse_expr()) 79 | self.t.expect_token(')') 80 | expr = ExprCall(expr, args) 81 | 82 | # Array access 83 | elif self.t.match_token('['): 84 | expr = ExprIndexAccess(expr, self.parse_expr()) 85 | self.t.expect_token(']') 86 | 87 | # In expression 88 | elif self.t.match_keyword('in'): 89 | expr = ExprIn(expr, self.parse_expr()) 90 | 91 | # Nothing more, so we probably done 92 | else: 93 | break 94 | 95 | return expr 96 | 97 | # TODO: deref (*), need to figure how to handle the ambiguity with multiplications 98 | def _parse_unary(self): 99 | 100 | # this can be done only one time 101 | if self.t.is_token('-') or self.t.is_token('--') or self.t.is_token('++') or self.t.is_token('&'): 102 | op = self.t.token.value 103 | self.t.next_token() 104 | expr = ExprUnary(op, self._parse_postfix()) 105 | 106 | # These can be done multiple times 107 | elif self.t.is_token('!') or self.t.is_token('~') or self.t.is_token('*'): 108 | op = self.t.token.value 109 | self.t.next_token() 110 | expr = ExprUnary(op, self._parse_unary()) 111 | 112 | # Implicit enum member access 113 | elif self.t.match_token('.'): 114 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 115 | name = self.t.token.value 116 | self.t.next_token() 117 | return ExprImplicitEnum(name) 118 | 119 | else: 120 | self.t.push() 121 | self.t.next_token() 122 | 123 | # Check for ranged array literal 124 | # TODO: for now we only allow for literals 125 | # to be used in the ranged array, is 126 | # that what we really want? 127 | if self.t.is_token('..'): 128 | self.t.pop() 129 | expr_from = self._parse_literal() 130 | self.t.expect_token('..') 131 | expr_to = self._parse_literal() 132 | return ExprRange(expr_from, expr_to) 133 | 134 | self.t.pop() 135 | 136 | expr = self._parse_postfix() 137 | 138 | return expr 139 | 140 | def _parse_multiplicative(self): 141 | expr = self._parse_unary() 142 | 143 | while self.t.is_token('*') or self.t.is_token('/') or self.t.is_token('%'): 144 | op = self.t.token.value 145 | self.t.next_token() 146 | expr = ExprBinary(expr, op, self._parse_unary()) 147 | 148 | return expr 149 | 150 | def _parse_additive(self): 151 | expr = self._parse_multiplicative() 152 | 153 | while self.t.is_token('+') or self.t.is_token('-'): 154 | op = self.t.token.value 155 | self.t.next_token() 156 | expr = ExprBinary(expr, op, self._parse_multiplicative()) 157 | 158 | return expr 159 | 160 | def _parse_shift(self): 161 | expr = self._parse_additive() 162 | 163 | while self.t.is_token('<<') or self.t.is_token('>>'): 164 | op = self.t.token.value 165 | self.t.next_token() 166 | expr = ExprBinary(expr, op, self._parse_additive()) 167 | 168 | return expr 169 | 170 | def _parse_relational(self): 171 | expr = self._parse_shift() 172 | 173 | while self.t.is_token('<') or self.t.is_token('>') or self.t.is_token('<=') or self.t.is_token('>='): 174 | op = self.t.token.value 175 | self.t.next_token() 176 | expr = ExprBinary(expr, op, self._parse_shift()) 177 | 178 | return expr 179 | 180 | def _parse_equality(self): 181 | expr = self._parse_relational() 182 | 183 | while self.t.is_token('==') or self.t.is_token('!='): 184 | op = self.t.token.value 185 | self.t.next_token() 186 | expr = ExprBinary(expr, op, self._parse_relational()) 187 | 188 | return expr 189 | 190 | def _parse_bitwise_and(self): 191 | expr = self._parse_equality() 192 | 193 | while self.t.match_token('&'): 194 | expr = ExprBinary(expr, '&', self._parse_equality()) 195 | 196 | return expr 197 | 198 | def _parse_bitwise_xor(self): 199 | expr = self._parse_bitwise_and() 200 | 201 | while self.t.match_token('^'): 202 | expr = ExprBinary(expr, '^', self._parse_bitwise_and()) 203 | 204 | return expr 205 | 206 | def _parse_bitwise_or(self): 207 | expr = self._parse_bitwise_xor() 208 | 209 | while self.t.match_token('|'): 210 | expr = ExprBinary(expr, '|', self._parse_bitwise_xor()) 211 | 212 | return expr 213 | 214 | def _parse_logical_and(self): 215 | expr = self._parse_bitwise_or() 216 | 217 | while self.t.match_token('&&'): 218 | expr = ExprBinary(expr, '&&', self._parse_bitwise_or()) 219 | 220 | return expr 221 | 222 | def _parse_logical_or(self): 223 | expr = self._parse_logical_and() 224 | 225 | while self.t.match_token('||'): 226 | expr = ExprBinary(expr, '||', self._parse_logical_and()) 227 | 228 | return expr 229 | 230 | def _parse_conditional(self): 231 | 232 | # If expression 233 | if self.t.match_keyword('if'): 234 | condition = self.parse_expr() 235 | block_true = self.parse_stmt_block() 236 | 237 | assert self.t.match_keyword('else') 238 | block_false = self.parse_stmt_block() 239 | 240 | return ExprIf(condition, block_true, block_false) 241 | else: 242 | expr = self._parse_logical_or() 243 | 244 | # Or expression 245 | if self.t.match_keyword('or'): 246 | block = self.parse_stmt_block() 247 | return ExprOr(expr, block) 248 | 249 | return expr 250 | 251 | def _parse_assignment(self): 252 | expr = self._parse_conditional() 253 | 254 | while self.t.is_token('=') or self.t.is_token('+=') or self.t.is_token('-=') or self.t.is_token('*=') or \ 255 | self.t.is_token('/=') or self.t.is_token('%=') or self.t.is_token('>>=') or self.t.is_token('<<=') or \ 256 | self.t.is_token('&=') or self.t.is_token('^=') or self.t.is_token('|='): 257 | op = self.t.token.value 258 | self.t.next_token() 259 | 260 | if isinstance(expr, ExprBinary): 261 | expr = ExprBinary(expr.left, expr.op, ExprBinary(expr.right, op, self._parse_conditional())) 262 | else: 263 | expr = ExprBinary(expr, op, self._parse_conditional()) 264 | 265 | return expr 266 | 267 | def parse_expr(self): 268 | return self._parse_assignment() 269 | 270 | # def parse_mut_expr(self): 271 | # mut = False 272 | # if self.t.match_keyword('mut'): 273 | # mut = True 274 | # return self.parse_expr(), mut 275 | 276 | ################################################################################################################### 277 | # Statement parsing 278 | ################################################################################################################### 279 | 280 | def _parse_var_decl(self): 281 | # Mutable optional 282 | mut = False 283 | if self.t.match_keyword('mut'): 284 | mut = True 285 | 286 | # Get the names 287 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 288 | names = [self.t.token.value] 289 | self.t.next_token() 290 | 291 | while self.t.match_token(','): 292 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 293 | names.append(self.t.token.value) 294 | self.t.next_token() 295 | 296 | # The := 297 | self.t.expect_token(':=') 298 | 299 | # The assigned expression 300 | expr = self.parse_expr() 301 | 302 | return StmtVarDecl(mut, names, expr) 303 | 304 | def parse_stmt(self): 305 | # Return statement 306 | if self.t.match_keyword('return'): 307 | exprs = [] 308 | 309 | # Return should always be before an end of block so that tells us we have no arguments 310 | if not self.t.is_token('}'): 311 | exprs.append(self.parse_expr()) 312 | while self.t.match_token(','): 313 | exprs.append(self.parse_expr()) 314 | 315 | return StmtReturn(exprs) 316 | 317 | # Assert statement 318 | elif self.t.match_keyword('assert'): 319 | return StmtAssert(self.parse_expr()) 320 | 321 | # Parse if 322 | elif self.t.match_keyword('if'): 323 | condition = self.parse_expr() 324 | block_true = self.parse_stmt_block() 325 | block_false = None 326 | 327 | # Else part 328 | if self.t.match_keyword('else'): 329 | # We support `else if` without block before 330 | if self.t.is_keyword('if'): 331 | block_false = StmtBlock(self.frame[-1], [self.parse_stmt()]) 332 | 333 | # The block 334 | else: 335 | block_false = self.parse_stmt_block() 336 | 337 | return StmtIf(condition, block_true, block_false) 338 | 339 | # Block 340 | if self.t.is_token('{'): 341 | return self.parse_stmt_block() 342 | 343 | # For statement 344 | if self.t.match_keyword('for'): 345 | # Check if a foreach 346 | # will match (for name, name in test) and (for name in test) 347 | self.t.push() 348 | self.t.next_token() 349 | if self.t.is_token(','): 350 | self.t.pop() 351 | 352 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 353 | index = self.t.token.value 354 | self.t.next_token() 355 | 356 | self.t.expect_token(',') 357 | 358 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 359 | name = self.t.token.value 360 | self.t.next_token() 361 | 362 | self.t.expect_keyword('in') 363 | 364 | expr = self.parse_expr() 365 | block = self.parse_stmt_block() 366 | return StmtForeach(index, name, expr, block) 367 | 368 | elif self.t.is_keyword('in'): 369 | self.t.pop() 370 | 371 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 372 | name = self.t.token.value 373 | self.t.next_token() 374 | 375 | self.t.expect_keyword('in') 376 | 377 | expr = self.parse_expr() 378 | block = self.parse_stmt_block() 379 | return StmtForeach(None, name, expr, block) 380 | 381 | self.t.pop() 382 | 383 | # Check a forever loop 384 | if self.t.is_token('{'): 385 | block = self.parse_stmt_block() 386 | return StmtFor(None, None, None, block) 387 | 388 | # This is probably a normal c like loop 389 | else: 390 | val = None 391 | cond = None 392 | next = None 393 | 394 | # TODO: support `for condition` loops 395 | 396 | if not self.t.match_token(';'): 397 | # TODO: variable declaration inside this argument 398 | val = self.parse_expr() 399 | self.t.expect_token(';') 400 | 401 | if not self.t.match_token(';'): 402 | cond = self.parse_expr() 403 | self.t.expect_token(';') 404 | 405 | if not self.t.is_token('{'): 406 | next = self.parse_expr() 407 | 408 | block = self.parse_stmt_block() 409 | return StmtFor(val, cond, next, block) 410 | 411 | # Unsafe block 412 | if self.t.match_keyword('unsafe'): 413 | return StmtUnsafe(self.parse_stmt_block()) 414 | 415 | # Defer block 416 | if self.t.match_keyword('defer'): 417 | return StmtDefer(self.parse_stmt_block()) 418 | 419 | # Variable declaration 420 | if self.t.is_keyword('mut'): 421 | return self._parse_var_decl() 422 | 423 | # Might be variable declaration 424 | if self.t.is_token(IdentToken): 425 | self.t.push() 426 | self.t.next_token() 427 | 428 | # This verifies we got a variable declaration (a := ) or (a, b, c := ) 429 | if self.t.is_token(':=') or self.t.is_token(','): 430 | self.t.pop() 431 | return self._parse_var_decl() 432 | else: 433 | self.t.pop() 434 | 435 | # Fallback on expression parsing 436 | return StmtExpr(self.parse_expr()) 437 | 438 | def parse_stmt_block(self): 439 | self.t.expect_token('{') 440 | stmts = [] 441 | block = StmtBlock(self.frame[-1], stmts) 442 | 443 | self.frame.append(block) 444 | while not self.t.match_token('}'): 445 | stmts.append(self.parse_stmt()) 446 | self.frame.pop() 447 | 448 | return block 449 | 450 | ################################################################################################################### 451 | # Declaration parsing 452 | ################################################################################################################### 453 | 454 | def parse_type(self): 455 | # Map 456 | if self.t.match_keyword('map'): 457 | self.t.expect_token('[') 458 | key_type = self.parse_type() 459 | self.t.expect_token(']') 460 | value_type = self.parse_type() 461 | return VMapType(key_type, value_type) 462 | 463 | # Array 464 | if self.t.match_token('['): 465 | self.t.expect_token(']') 466 | value_type = self.parse_type() 467 | return VArrayType(value_type) 468 | 469 | # Optional type 470 | if self.t.match_token('?'): 471 | return VOptionalType(self.parse_type()) 472 | 473 | # Pointer type 474 | if self.t.match_token('&'): 475 | return VPointerType(self.parse_type()) 476 | 477 | # Basic type 478 | # TODO: support types from other modules 479 | elif self.t.is_token(IdentToken): 480 | t = VUnknownType(self.t.token.value) 481 | self.t.next_token() 482 | return t 483 | 484 | else: 485 | assert False, "Invalid type" 486 | 487 | def _parse_func_param(self): 488 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 489 | name = self.t.token.value 490 | self.t.next_token() 491 | 492 | mut = False 493 | if self.t.match_keyword('mut'): 494 | mut = True 495 | 496 | xtype = self.parse_type() 497 | 498 | return FuncParam(mut, name, xtype) 499 | 500 | def _parse_func(self, pub): 501 | 502 | # Method (optional) 503 | method = None 504 | if self.t.match_token('('): 505 | method = self._parse_func_param() 506 | self.t.expect_token(')') 507 | 508 | # Name 509 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 510 | name = self.t.token.value 511 | self.t.next_token() 512 | 513 | interop = False 514 | if self.t.match_token('.'): 515 | assert name == 'C' 516 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 517 | assert not pub, f'Interop functions can not be public!' 518 | interop = True 519 | name = self.t.token.value 520 | self.t.next_token() 521 | 522 | # Parameters 523 | self.t.expect_token('(') 524 | 525 | args = [] 526 | 527 | # Parse arguments if any 528 | if not self.t.is_token(')'): 529 | args.append(self._parse_func_param()) 530 | while self.t.match_token(','): 531 | args.append(self._parse_func_param()) 532 | 533 | self.t.expect_token(')') 534 | 535 | # the return value 536 | ret_type = None 537 | if not self.t.is_token('{') and not self.t.is_token(KeywordToken): 538 | ret_type = self.parse_type() 539 | 540 | func = FuncDecl(pub, interop, name, method, args, ret_type) 541 | 542 | # The code 543 | if not interop: 544 | self.frame.append(func) 545 | func.block = self.parse_stmt_block() 546 | self.frame.pop() 547 | else: 548 | func.block = None 549 | 550 | return func 551 | 552 | def _parse_struct_element(self, access: StructMemberAccess): 553 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 554 | name = self.t.token.value 555 | self.t.next_token() 556 | xtype = self.parse_type() 557 | return StructElement(access, name, xtype) 558 | 559 | def _parse_struct(self, pub): 560 | # Name 561 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 562 | name = self.t.token.value 563 | self.t.next_token() 564 | 565 | self.t.expect_token('{') 566 | 567 | access = StructMemberAccess.PRIVATE 568 | elements = [] 569 | while not self.t.match_token('}'): 570 | # This is an access type 571 | if self.t.match_keyword('pub'): 572 | 573 | if self.t.match_keyword('mut'): 574 | access = StructMemberAccess.PUBLIC_PRIV_MUT 575 | else: 576 | access = StructMemberAccess.PUBLIC 577 | self.t.expect_token(':') 578 | 579 | elif self.t.match_keyword('mut'): 580 | access = StructMemberAccess.PRIVATE_MUT 581 | self.t.expect_token(':') 582 | 583 | elif self.t.match_keyword('__global'): 584 | access = StructMemberAccess.PUBLIC_MUT 585 | self.t.expect_token(':') 586 | 587 | # Probably just a member 588 | else: 589 | elements.append(self._parse_struct_element(access)) 590 | 591 | return StructDecl(pub, name, None, elements) 592 | 593 | def _parse_import_name(self): 594 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 595 | name = self.t.token.value 596 | self.t.next_token() 597 | 598 | while self.t.match_token('.'): 599 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 600 | name += '.' + self.t.token.value 601 | self.t.next_token() 602 | 603 | return name 604 | 605 | def _parse_const(self, pub): 606 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 607 | name = self.t.token.value 608 | self.t.next_token() 609 | self.t.expect_token('=') 610 | expr = self.parse_expr() 611 | return ConstDecl(pub, name, expr) 612 | 613 | def _parse_enum(self, pub): 614 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 615 | name = self.t.token.value 616 | self.t.next_token() 617 | 618 | elements = [] 619 | 620 | self.t.expect_token('{') 621 | while not self.t.match_token('}'): 622 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 623 | elements.append(self.t.token.value) 624 | self.t.next_token() 625 | 626 | return EnumDecl(pub, name, elements) 627 | 628 | def parse_decl(self, pub): 629 | # Anything which may be public 630 | if self.t.match_keyword('pub'): 631 | return self.parse_decl(True) 632 | 633 | # Parse function 634 | elif self.t.match_keyword('fn'): 635 | return self._parse_func(pub) 636 | 637 | # Struct declaration 638 | elif self.t.match_keyword('struct'): 639 | return self._parse_struct(pub) 640 | 641 | elif self.t.match_keyword('enum'): 642 | return self._parse_enum(pub) 643 | 644 | # Module declaration 645 | elif self.t.is_keyword('module'): 646 | assert not pub, "pub may not be used on module" 647 | self.t.next_token() 648 | 649 | assert self.t.is_token(IdentToken), f"Expected name, got {self.t.token}" 650 | mod = ModuleDecl(self.t.token.value) 651 | self.t.next_token() 652 | return mod 653 | 654 | # Import 655 | elif self.t.is_keyword('import'): 656 | assert not pub, "pub may not be used on import" 657 | self.t.next_token() 658 | 659 | # Multi import 660 | if self.t.match_token('('): 661 | imports = [] 662 | while not self.t.match_token(')'): 663 | imports.append(ImportDecl(self._parse_import_name())) 664 | return imports 665 | 666 | # Single import 667 | else: 668 | return ImportDecl(self._parse_import_name()) 669 | 670 | # Constants 671 | elif self.t.match_keyword('const'): 672 | # Multi const decl 673 | if self.t.match_token('('): 674 | constants = [] 675 | while not self.t.match_token(')'): 676 | constants.append(self._parse_const(pub)) 677 | return constants 678 | 679 | # Single const decl 680 | else: 681 | return self._parse_const(pub) 682 | 683 | else: 684 | assert False 685 | 686 | def parse(self): 687 | decls = [] 688 | 689 | while not self.t.is_token(EofToken): 690 | res = self.parse_decl(False) 691 | if isinstance(res, list): 692 | for r in res: 693 | decls.append(r) 694 | else: 695 | decls.append(res) 696 | 697 | return decls -------------------------------------------------------------------------------- /vork/ast.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import * 3 | from enum import Enum 4 | 5 | 6 | ################################################################################################################### 7 | # Forward declare stmts 8 | ################################################################################################################### 9 | 10 | 11 | class VType: 12 | 13 | def __ne__(self, other): 14 | return not (self == other) 15 | 16 | 17 | class Stmt: 18 | 19 | def type_checking(self, function): 20 | raise NotImplementedError 21 | 22 | 23 | class Expr: 24 | 25 | def __init__(self): 26 | self.type = None # type: VType 27 | 28 | def resolve_type(self, function): 29 | """ 30 | :type function: FuncDecl 31 | """ 32 | if self.type is None: 33 | self.type = self._internal_resolve_type(function) 34 | self.type = function.get_module().resolve_type(self.type) 35 | return self.type 36 | 37 | def _internal_resolve_type(self, function): 38 | """ 39 | :type function: FuncDecl 40 | """ 41 | raise NotImplementedError 42 | 43 | ################################################################################################################### 44 | # Statements 45 | ################################################################################################################### 46 | 47 | 48 | class StmtBlock(Stmt): 49 | 50 | def __init__(self, parent, stmts: List[Stmt]): 51 | """ 52 | :type parent: StmtBlock or FuncDecl 53 | """ 54 | self.vars = {} # type: Dict[str, Tuple(VType, bool)] 55 | self.parent = parent 56 | self.stmts = stmts 57 | 58 | def __str__(self, indent=''): 59 | s = '(block\n' 60 | indent += ' ' 61 | for stmt in self.stmts: 62 | s += indent + str(stmt).replace('\n', '\n' + indent) + '\n' 63 | s = s[:-1] 64 | s += ')' 65 | return s 66 | 67 | def get_var(self, name, search_parent: bool = True) -> Tuple[VType, bool] or None: 68 | if name not in self.vars: 69 | if search_parent: 70 | return self.parent.get_var(name) 71 | else: 72 | return None 73 | return self.vars[name] 74 | 75 | def add_var(self, name: str, type: VType, mut: bool): 76 | assert self.get_var(name) is None, f"variable {name} already exists in scope" 77 | self.vars[name] = type, mut 78 | 79 | def type_checking(self, function): 80 | function.push_frame(self) 81 | for stmt in self.stmts: 82 | stmt.type_checking(function) 83 | function.pop_frame() 84 | 85 | 86 | class StmtExpr(Stmt): 87 | 88 | def __init__(self, expr): 89 | self.expr = expr 90 | 91 | def __str__(self): 92 | return str(self.expr) 93 | 94 | def type_checking(self, function): 95 | self.expr.resolve_type(function) 96 | 97 | 98 | class StmtReturn(Stmt): 99 | 100 | def __init__(self, exprs: List[Expr]): 101 | self.exprs = exprs 102 | 103 | def __str__(self): 104 | return f'(return {" ".join(map(str, self.exprs))})' 105 | 106 | def type_checking(self, function): 107 | assert len(self.exprs) <= 1, f'Multiple return values are not supported yet' 108 | 109 | for expr in self.exprs: 110 | expr.resolve_type(function) 111 | assert expr.type == function.ret_type, f'Type mismatch, expected `{function.ret_type}`, got `{expr.type}`' 112 | 113 | 114 | class StmtAssert(Stmt): 115 | 116 | def __init__(self, expr: Expr): 117 | self.expr = expr 118 | 119 | def __str__(self): 120 | return f'(assert {self.expr})' 121 | 122 | def type_checking(self, function): 123 | assert isinstance(self.expr.resolve_type(function), VBool), f'assert requires a boolean expression' 124 | 125 | 126 | class StmtIf(Stmt): 127 | 128 | def __init__(self, condition: Expr, block_true: StmtBlock, block_false: StmtBlock or None): 129 | self.condition = condition 130 | self.block_true = block_true 131 | self.block_false = block_false 132 | 133 | def __str__(self): 134 | s = f'(if {self.condition}' 135 | s += '\n ' + str(self.block_true).replace('\n', '\n ') 136 | if self.block_false is not None: 137 | s += '\n else\n' 138 | s += ' ' + str(self.block_false).replace('\n', '\n ') 139 | s += ')' 140 | return s 141 | 142 | def type_checking(self, function): 143 | assert isinstance(self.condition.resolve_type(function), VBool), f'if condition must be a boolean expression' 144 | self.block_true.type_checking(function) 145 | if self.block_false is not None: 146 | self.block_false.type_checking(function) 147 | 148 | 149 | class StmtVarDecl(Stmt): 150 | 151 | def __init__(self, mut: bool, names: List[str], expr: Expr): 152 | self.mut = mut 153 | self.names = names 154 | self.expr = expr 155 | 156 | def __str__(self): 157 | mut = 'mut ' if self.mut else '' 158 | return f'(var {mut}({" ".join(self.names)}) {self.expr})' 159 | 160 | def type_checking(self, function): 161 | # TODO: support multiple return 162 | xtype = self.expr.resolve_type(function) 163 | function.frame[-1].add_var(self.names[0], xtype, self.mut) 164 | 165 | 166 | class StmtForeach(Stmt): 167 | 168 | def __init__(self, index: str or None, name: str, list: Expr, block: StmtBlock): 169 | self.index = index 170 | self.name = name 171 | self.list = list 172 | self.block = block 173 | 174 | def __str__(self): 175 | name = '' 176 | if self.index is not None: 177 | name += self.index + ' ' 178 | name += self.name 179 | s = f'(foreach {name} {self.list}\n' 180 | s += ' ' + str(self.block).replace('\n', '\n ') 181 | s += ')' 182 | return s 183 | 184 | def type_checking(self, function): 185 | list_type = self.list.resolve_type(function) 186 | 187 | if isinstance(list_type, VArrayType): 188 | self.block.add_var(self.name, list_type.type, False) 189 | if self.index is not None: 190 | self.block.add_var(self.index, VIntegerType(32, True), False) 191 | 192 | elif isinstance(list_type, VMapType): 193 | self.block.add_var(self.name, list_type.value_type, False) 194 | if self.index is not None: 195 | self.block.add_var(self.index, list_type.key_type, False) 196 | 197 | else: 198 | assert False, f'Can not iterate over type `{list_type}`' 199 | 200 | self.block.type_checking(function) 201 | 202 | 203 | class StmtFor(Stmt): 204 | 205 | def __init__(self, value: Expr or StmtVarDecl or None, condition: Expr or None, next: Expr or None, block: StmtBlock): 206 | self.value = value 207 | self.condition = condition 208 | self.next = next 209 | self.block = block 210 | 211 | def __str__(self): 212 | val = str(self.value) if self.value is not None else '()' 213 | cond = str(self.condition) if self.condition is not None else '()' 214 | next = str(self.next) if self.next is not None else '()' 215 | s = f'(for {val} {cond} {next}\n' 216 | s += ' ' + str(self.block).replace('\n', '\n ') 217 | s += ')' 218 | return s 219 | 220 | def type_checking(self, function): 221 | if self.value is not None: 222 | 223 | if isinstance(self.value, Expr): 224 | self.value.resolve_type(function) 225 | 226 | elif isinstance(self.value, StmtVarDecl): 227 | self.value.type_checking(function) 228 | 229 | else: 230 | assert False 231 | 232 | if self.condition is not None: 233 | assert isinstance(self.condition.resolve_type(function), VBool), f'Condition of a for loop must be a boolean expression (got `{self.condition.resolve_type(function)}`)' 234 | 235 | if self.next is not None: 236 | self.next.resolve_type(function) 237 | 238 | self.block.type_checking(function) 239 | 240 | 241 | class StmtUnsafe(Stmt): 242 | 243 | def __init__(self, block: StmtBlock): 244 | self.block = block 245 | 246 | def __str__(self): 247 | return '(unsafe\n ' + str(self.block).replace('\n', '\n ') + ')' 248 | 249 | def type_checking(self, function): 250 | self.block.type_checking(function) 251 | 252 | 253 | class StmtDefer(Stmt): 254 | 255 | def __init__(self, block: StmtBlock): 256 | self.block = block 257 | 258 | def __str__(self): 259 | return '(defer\n ' + str(self.block).replace('\n', '\n ') + ')' 260 | 261 | def type_checking(self, function): 262 | self.block.type_checking(function) 263 | 264 | ################################################################################################################### 265 | # Types 266 | ################################################################################################################### 267 | 268 | 269 | class VUnknownType(VType): 270 | 271 | def __init__(self, name: str): 272 | self.name = name 273 | 274 | def __str__(self): 275 | return f'UnknownType<{self.name}>' 276 | 277 | 278 | class VIntegerType(VType): 279 | 280 | def __init__(self, bits: int, signed: bool): 281 | self.bits = bits 282 | self.signed = signed 283 | 284 | def __str__(self): 285 | # Special cases 286 | if self.signed and self.bits == 32: 287 | return 'int' 288 | elif not self.signed and self.bits == 8: 289 | return 'byte' 290 | elif self.signed: 291 | return f'i{self.bits}' 292 | else: 293 | return f'u{self.bits}' 294 | 295 | def __eq__(self, other): 296 | if isinstance(other, VIntegerType): 297 | return self.signed == other.signed and self.bits == other.bits 298 | return False 299 | 300 | 301 | class VFloatType(VType): 302 | 303 | def __init__(self, bits: int): 304 | self.bits = bits 305 | 306 | def __str__(self): 307 | return f'f{self.bits}' 308 | 309 | def __eq__(self, other): 310 | if isinstance(other, VFloatType): 311 | return self.bits == other.bits 312 | return False 313 | 314 | 315 | class VBool(VType): 316 | 317 | def __init__(self): 318 | pass 319 | 320 | def __str__(self): 321 | return 'bool' 322 | 323 | def __eq__(self, other): 324 | return isinstance(other, VBool) 325 | 326 | # 327 | # class VFuncType(VType): 328 | # 329 | # def __init__(self, args: List[Tuple[VType, bool]], ret: VType or None): 330 | # self.args = args 331 | # self.ret = ret 332 | # 333 | # def __str__(self): 334 | # args = [] 335 | # for arg in self.args: 336 | # a = '' 337 | # if arg[1]: 338 | # a += 'mut ' 339 | # a += arg[0] 340 | # args.append(a) 341 | # 342 | # s = f'fn ({args})' 343 | # 344 | # if self.ret is not None: 345 | # s += ' ' + self.ret 346 | # 347 | # return s 348 | # 349 | # def __eq__(self, other): 350 | # if isinstance(other, VFuncType): 351 | # return self.args == other.args and self.ret == other.ret 352 | # return False 353 | # 354 | 355 | class VArrayType(VType): 356 | 357 | def __init__(self, xtype: VType): 358 | self.type = xtype 359 | 360 | def __str__(self): 361 | return f'[]{self.type}' 362 | 363 | def __eq__(self, other): 364 | if isinstance(other, VArrayType): 365 | return self.type == other.type 366 | return False 367 | 368 | 369 | class VMapType(VType): 370 | 371 | def __init__(self, key_type: VType, value_type: VType): 372 | self.key_type = key_type 373 | self.value_type = value_type 374 | 375 | def __str__(self): 376 | return f'map[{self.key_type}]{self.value_type}' 377 | 378 | def __eq__(self, other): 379 | if isinstance(other, VMapType): 380 | return self.key_type == other.key_type and self.value_type == other.value_type 381 | return False 382 | 383 | 384 | class VOptionalType(VType): 385 | 386 | def __init__(self, xtype: VType): 387 | self.type = xtype 388 | 389 | def __str__(self): 390 | return f'?{self.type}' 391 | 392 | def __eq__(self, other): 393 | if isinstance(other, VOptionalType): 394 | return self.type == other.type 395 | return False 396 | 397 | 398 | class VPointerType(VType): 399 | 400 | def __init__(self, xtype: VType): 401 | self.type = xtype 402 | 403 | def __str__(self): 404 | return f'&{self.type}' 405 | 406 | def __eq__(self, other): 407 | if isinstance(other, VPointerType): 408 | return self.type == other.type 409 | return False 410 | 411 | 412 | ################################################################################################################### 413 | # Expressions 414 | ################################################################################################################### 415 | 416 | 417 | class ExprIntegerLiteral(Expr): 418 | 419 | def __init__(self, value: int): 420 | super(ExprIntegerLiteral, self).__init__() 421 | self.value = value 422 | 423 | def __str__(self): 424 | return str(self.value) 425 | 426 | def _internal_resolve_type(self, function): 427 | # Always an int 428 | return VIntegerType(32, True) 429 | 430 | 431 | class ExprArrayLiteral(Expr): 432 | 433 | def __init__(self, values: List[Expr]): 434 | super(ExprArrayLiteral, self).__init__() 435 | self.values = values 436 | 437 | def __str__(self): 438 | s = '(array\n' 439 | s += ' ' + str(self.values).replace('\n', '\n ') + ')' 440 | return s 441 | 442 | def _internal_resolve_type(self, function): 443 | array_type = None 444 | for element in self.values: 445 | type = element.resolve_type(function) 446 | if array_type is None: 447 | array_type = type 448 | else: 449 | assert type == array_type, f"Type mismatch in array literal, expected `{array_type}`, got `{type}`" 450 | return VArrayType(array_type) 451 | 452 | 453 | class ExprRange(Expr): 454 | 455 | def __init__(self, expr_from: Expr, expr_to: Expr): 456 | super(ExprRange, self).__init__() 457 | self.expr_from = expr_from 458 | self.expr_to = expr_to 459 | 460 | def __str__(self): 461 | return f'(range {self.expr_from} {self.expr_to})' 462 | 463 | def _internal_resolve_type(self, function): 464 | from_type = self.expr_from.resolve_type(function) 465 | to_type = self.expr_to.resolve_type(function) 466 | assert from_type == to_type, f"Type mismatch ({from_type} and {to_type})" 467 | return VArrayType(from_type) 468 | 469 | 470 | class ExprFloatLiteral(Expr): 471 | 472 | def __init__(self, value: int): 473 | super(ExprFloatLiteral, self).__init__() 474 | self.value = value 475 | 476 | def __str__(self): 477 | return str(self.value) 478 | 479 | def _internal_resolve_type(self, function): 480 | return VFloatType(32) 481 | 482 | 483 | class ExprIdentifierLiteral(Expr): 484 | 485 | def __init__(self, name: str): 486 | super(ExprIdentifierLiteral, self).__init__() 487 | self.name = name 488 | 489 | def __str__(self): 490 | return self.name 491 | 492 | def _internal_resolve_type(self, function): 493 | res = function.get_var(self.name) 494 | assert res is not None, f"Unknown identifier `{self.name}`" 495 | 496 | # This is how we store a variable 497 | if isinstance(res, tuple): 498 | return res[0] 499 | 500 | # This is how we store interop functions 501 | elif isinstance(res, dict): 502 | return res 503 | 504 | # Handle constants 505 | elif isinstance(res, ConstDecl): 506 | return res.get_type(function) 507 | 508 | assert False, f'unknown identifier type {res}' 509 | 510 | 511 | class ExprBinary(Expr): 512 | 513 | TYPE_TABLE = { 514 | '+': [VIntegerType, VFloatType], 515 | '-': [VIntegerType, VFloatType], 516 | '*': [VIntegerType, VFloatType], 517 | '/': [VIntegerType, VFloatType], 518 | '%': [VIntegerType], 519 | 520 | '&': [VIntegerType], 521 | '|': [VIntegerType], 522 | '^': [VIntegerType], 523 | 524 | # TODO: restrict with unsigned numbers on the right 525 | '<<': [VIntegerType], 526 | '>>': [VIntegerType], 527 | 528 | '&&': [VBool], 529 | '||': [VBool], 530 | } 531 | 532 | def __init__(self, left: Expr, op: str, right: Expr): 533 | super(ExprBinary, self).__init__() 534 | self.left = left 535 | self.right = right 536 | self.op = op 537 | 538 | def __str__(self): 539 | return f'({self.op} {self.left} {self.right})' 540 | 541 | def _internal_resolve_type(self, function): 542 | left_type = self.left.resolve_type(function) 543 | right_type = self.right.resolve_type(function) 544 | assert left_type == right_type, f"Mismatching types (`{left_type}` and `{right_type}`)" 545 | 546 | # This is part of assignment? 547 | if self.op.endswith('='): 548 | 549 | # relational and equality 550 | if len(self.op) == 2 and self.op[0] in ['>', '<', '=', '!']: 551 | return VBool() 552 | 553 | # Assignment expression 554 | else: 555 | assert left_type.__class__ in ExprBinary.TYPE_TABLE[self.op[-1]], f'Invalid type `{left_type}` for operator `{self.op[-1]}`' 556 | return left_type 557 | 558 | # Normal operators 559 | else: 560 | assert left_type.__class__ in ExprBinary.TYPE_TABLE[ 561 | self.op[-1]], f'Invalid type `{left_type}` for operator `{self.op}`' 562 | return left_type 563 | 564 | 565 | class ExprUnary(Expr): 566 | 567 | def __init__(self, op: str, right: Expr): 568 | super(ExprUnary, self).__init__() 569 | self.right = right 570 | self.op = op 571 | 572 | def __str__(self): 573 | if self.op == '&': 574 | return f'(ref {self.right})' 575 | elif self.op == '*': 576 | return f'(deref {self.right})' 577 | else: 578 | return f'(prefix {self.op} {self.right})' 579 | 580 | def _internal_resolve_type(self, function): 581 | xtype = self.right.resolve_type(function) 582 | 583 | if self.op == '*': 584 | assert isinstance(xtype, VPointerType), f"Tried to dereference a none pointer (`{xtype}`)" 585 | return xtype.type 586 | 587 | elif self.op == '&': 588 | # TODO: check that it is possible to ref the expression 589 | return VPointerType(xtype) 590 | 591 | elif self.op == '!': 592 | assert isinstance(xtype, VBool), f'Invalid type `{xtype}` for operator `{self.op}`' 593 | return xtype 594 | 595 | elif self.op == '~': 596 | assert isinstance(xtype, VIntegerType), f'Invalid type `{xtype}` for operator `{self.op}`' 597 | return xtype 598 | 599 | else: 600 | assert isinstance(xtype, VIntegerType) or isinstance(xtype, VFloatType), f'Invalid type `{xtype}` for operator `{self.op}`' 601 | return xtype 602 | 603 | 604 | class ExprImplicitEnum(Expr): 605 | 606 | def __init__(self, name: str): 607 | super(ExprImplicitEnum, self).__init__() 608 | self.name = name 609 | 610 | def __str__(self): 611 | return f'(implicit {self.name})' 612 | 613 | def _internal_resolve_type(self, function): 614 | assert False, "Implicit enums are not supported yet" 615 | 616 | 617 | class ExprIn(Expr): 618 | 619 | def __init__(self, left: Expr, right: Expr): 620 | super(ExprIn, self).__init__() 621 | self.left = left 622 | self.right = right 623 | 624 | def __str__(self): 625 | return f'(in {self.left} {self.right})' 626 | 627 | def _internal_resolve_type(self, function): 628 | left_type = self.left.resolve_type(function) 629 | right_type = self.left.resolve_type(function) 630 | 631 | if isinstance(right_type, VMapType): 632 | assert left_type == right_type.key_type, f"Type mismatch, expected {right_type.key_type}, got {left_type}" 633 | 634 | elif isinstance(right_type, VArrayType): 635 | assert left_type == VIntegerType(32, True), f"Type mismatch, expected {right_type.type}, got {left_type}" 636 | 637 | return VBool() 638 | 639 | 640 | class ExprPostfix(Expr): 641 | 642 | def __init__(self, left: Expr, op: str): 643 | super(ExprPostfix, self).__init__() 644 | self.op = op 645 | self.left = left 646 | 647 | def __str__(self): 648 | return f'(postfix {self.left} {self.op})' 649 | 650 | def _internal_resolve_type(self, function): 651 | xtype = self.left.resolve_type(function) 652 | assert isinstance(xtype, VIntegerType), f'Invalid type `{xtype}` for operator `{self.op}`' 653 | return xtype 654 | 655 | 656 | class ExprIf(Expr): 657 | 658 | def __init__(self, condition: Expr, block_true: StmtBlock, block_false: StmtBlock): 659 | super(ExprIf, self).__init__() 660 | self.condition = condition 661 | self.block_true = block_true 662 | self.block_false = block_false 663 | 664 | def __str__(self): 665 | s = f'(if {self.condition}\n' 666 | s += ' ' + str(self.block_true).replace('\n', '\n ') + '\n' 667 | s += ' else\n' 668 | s += ' ' + str(self.block_false).replace('\n', '\n ') 669 | s += ')' 670 | return s 671 | 672 | def _internal_resolve_type(self, function): 673 | self.block_true.type_checking(function) 674 | self.block_false.type_checking(function) 675 | 676 | assert len(self.block_true.stmts) != 0 and isinstance(self.block_true.stmts[-1], StmtExpr), f'Last statement of an if expression must be an expression!' 677 | assert len(self.block_false.stmts) != 0 and isinstance(self.block_false.stmts[-1], StmtExpr), f'Last statement of an if expression must be an expression!' 678 | 679 | true_type = self.block_true[-1].expr.resolve_type(function) 680 | false_type = self.block_true[-1].expr.resolve_type(function) 681 | assert true_type == false_type, f'Type mismatch between blocks (got {true_type} and {false_type})' 682 | 683 | return true_type 684 | 685 | 686 | class ExprOr(Expr): 687 | 688 | def __init__(self, expr: Expr, block_error: StmtBlock): 689 | super(ExprOr, self).__init__() 690 | self.expr = expr 691 | self.block_error = block_error 692 | 693 | def __str__(self): 694 | s = f'(or {self.expr}\n' 695 | s += ' ' + str(self.block_error).replace('\n', '\n ') + ')' 696 | return s 697 | 698 | def _internal_resolve_type(self, function): 699 | xtype = self.expr.resolve_type(function) 700 | assert isinstance(xtype, VOptionalType), f'expected an optional type, got `{xtype}`' 701 | 702 | # Make sure the block never exits the block 703 | # TODO: add check for the panic function (or just add a noreturn attribute?) 704 | self.block_error.type_checking(function) 705 | assert len(self.block_error.stmts) != 0, f'or block must return!' 706 | stmt = self.block_error.stmts[-1] 707 | assert isinstance(stmt, StmtReturn), f'or block must return!' 708 | 709 | # Return the underlying type 710 | return xtype.type 711 | 712 | 713 | class ExprMemberAccess(Expr): 714 | 715 | def __init__(self, value: Expr, member: str): 716 | super(ExprMemberAccess, self).__init__() 717 | self.value = value 718 | self.member = member 719 | 720 | def __str__(self): 721 | return f'(member {self.value} {self.member})' 722 | 723 | def _internal_resolve_type(self, function): 724 | value_type = self.value.resolve_type(function) 725 | 726 | # Enum members 727 | if isinstance(value_type, EnumDecl): 728 | assert self.member in value_type.elements, f'Unknown enum field `{self.member}`' 729 | 730 | # TODO: check pub access 731 | 732 | return value_type 733 | 734 | # Struct members 735 | elif isinstance(value_type, StructDecl): 736 | for elem in value_type.elements: 737 | # TODO: access checks 738 | if elem.name == self.member: 739 | return elem.type 740 | 741 | assert False, f"Unknown struct field `{self.member}`" 742 | 743 | # Array type, these are hardcoded 744 | elif isinstance(value_type, VArrayType): 745 | if self.member == 'data': 746 | # TODO: voidptr 747 | assert False 748 | 749 | elif self.member == 'len': 750 | return VIntegerType(32, True) 751 | 752 | elif self.member == 'cap': 753 | return VIntegerType(32, True) 754 | 755 | elif self.member == 'element_size': 756 | return VIntegerType(32, True) 757 | 758 | else: 759 | assert False, f'Unknown array field `{self.member}`' 760 | 761 | # Map type, these are hardcoded 762 | elif isinstance(value_type, VMapType): 763 | if self.member == 'size': 764 | return VIntegerType(32, True) 765 | 766 | else: 767 | assert False, f'Unknown map field `{self.member}`' 768 | 769 | # TODO: string type 770 | 771 | # TODO: Search for methods 772 | 773 | # Interops (should be) 774 | elif isinstance(value_type, dict): 775 | if self.member in value_type: 776 | return value_type[self.member] 777 | else: 778 | assert False, f'Unknown interop function `{self.member}`' 779 | 780 | # Did not find anything 781 | assert False, f'Type `{value_type}` has no members!' 782 | 783 | 784 | class ExprIndexAccess(Expr): 785 | 786 | def __init__(self, value: Expr, index: Expr): 787 | super(ExprIndexAccess, self).__init__() 788 | self.value = value 789 | self.index = index 790 | 791 | def __str__(self): 792 | return f'(index {self.value} {self.index})' 793 | 794 | def _internal_resolve_type(self, function): 795 | value_type = self.value.resolve_type(function) 796 | index_type = self.index.resolve_type(function) 797 | 798 | if isinstance(value_type, VArrayType): 799 | assert index_type == VIntegerType(32, True), f'Type mismatch, expected `int`, got `{index_type}`' 800 | return value_type.type 801 | 802 | elif isinstance(value_type, VMapType): 803 | assert index_type == value_type.key_type, f"Type mistmatch, expected `{value_type.key_type}`, got `{index_type}`" 804 | return value_type.value_type 805 | 806 | else: 807 | assert False, f"type `{value_type}` is not index-able" 808 | 809 | 810 | class ExprCall(Expr): 811 | 812 | def __init__(self, func: Expr, args: List[Expr]): 813 | super(ExprCall, self).__init__() 814 | self.func = func 815 | self.args = args 816 | 817 | def __str__(self): 818 | return f'(call {self.func} ({" ".join(map(str, self.args))}))' 819 | 820 | def _internal_resolve_type(self, function): 821 | func_type = self.func.resolve_type(function) 822 | 823 | assert isinstance(func_type, FuncDecl), f'Not a function!' 824 | assert len(func_type.args) == len(self.args), f'Function expected {len(func_type.args)} arguments, got {len(self.args)}' 825 | 826 | for i in range(len(func_type.args)): 827 | expect_arg_type = func_type.args[i].type 828 | arg_type = self.args[i].resolve_type(function) 829 | assert arg_type == arg_type, f'Type mismatch, expected `{func_type.args[i]}`, got `{arg_type}`' 830 | 831 | return func_type.ret_type 832 | 833 | ################################################################################################################### 834 | # Declarations 835 | ################################################################################################################### 836 | 837 | 838 | class FuncParam: 839 | 840 | def __init__(self, mut: bool, name: str, xtype: VType): 841 | self.mut = mut 842 | self.name = name 843 | self.type = xtype 844 | 845 | def __str__(self): 846 | mut = 'mut ' if self.mut else '' 847 | return f'({mut}{self.name} {self.type})' 848 | 849 | 850 | class FuncDecl: 851 | 852 | def __init__(self, pub: bool, interop: bool, name: str, method: FuncParam, args: List[FuncParam], ret_value: VType): 853 | self.module = None # type: Module 854 | self.pub = pub 855 | self.interop = interop 856 | self.name = name 857 | self.method = method 858 | self.args = args 859 | self.ret_type = ret_value 860 | self.block = None # type: StmtBlock or None 861 | self.frame = [] # type: List[StmtBlock] 862 | 863 | def __str__(self): 864 | pub = 'pub ' if self.pub else '' 865 | block = str(self.block).replace("\n", "\n ") 866 | ret_val = '' if self.ret_type is None else str(self.ret_type) 867 | name = ('C.' if self.interop else '') + self.name 868 | if self.block is not None: 869 | block = f'\n {block}' 870 | else: 871 | block = '' 872 | method = str(self.method) + ' ' if self.method is not None else '' 873 | return f'(func {pub}{name} {method}({" ".join(map(str, self.args))}) {ret_val}{block})' 874 | 875 | def type_checking(self): 876 | if self.block is not None: 877 | self.block.type_checking(self) 878 | 879 | def get_module(self): 880 | assert self.module is not None 881 | return self.module 882 | 883 | def push_frame(self, block): 884 | self.frame.append(block) 885 | 886 | def pop_frame(self): 887 | self.frame.pop() 888 | 889 | def get_var(self, name): 890 | # Check for the stack frames first 891 | for frame in self.frame: 892 | f = frame.get_var(name, False) 893 | if f is not None: 894 | return f 895 | 896 | # Then check for the args 897 | for arg in self.args: 898 | if arg.name == name: 899 | return arg.type, arg.mut 900 | 901 | # Lastly check from the module 902 | return self.module.get_var(name) 903 | 904 | 905 | class StructMemberAccess(Enum): 906 | PRIVATE = 'private' 907 | PRIVATE_MUT = 'private mut' 908 | PUBLIC = 'public' 909 | PUBLIC_PRIV_MUT = 'public, private mut' 910 | PUBLIC_MUT = 'public mut' 911 | 912 | 913 | class StructElement: 914 | 915 | def __init__(self, access: StructMemberAccess, name: str, xtype: VType): 916 | self.access = access 917 | self.name = name 918 | self.type = xtype 919 | 920 | def __str__(self): 921 | return f'({self.access.value} {self.name} {self.type})' 922 | 923 | 924 | class StructDecl: 925 | 926 | def __init__(self, pub: bool, attribute: dict, name: str, base: StructElement or None, elements: List[StructElement]): 927 | self.module = None # type: Module 928 | self.pub = pub 929 | self.attribute = attribute 930 | self.name = name 931 | self.base = base 932 | self.elements = elements 933 | 934 | def __str__(self): 935 | pub = 'pub ' if self.pub else '' 936 | 937 | s = f'(struct {pub}{self.name}\n' 938 | 939 | if self.base is not None: 940 | s += ' ' + str(self.base) + '\n' 941 | 942 | for elem in self.elements: 943 | s += ' ' + str(elem) + '\n' 944 | 945 | s = s[:-1] 946 | s += ')' 947 | return s 948 | 949 | 950 | class ModuleDecl: 951 | 952 | def __init__(self, name: str): 953 | self.name = name 954 | 955 | def __str__(self): 956 | return f'(module {self.name})' 957 | 958 | 959 | class ImportDecl: 960 | 961 | def __init__(self, name: str): 962 | self.name = name 963 | 964 | def __str__(self): 965 | return f'(import {self.name})' 966 | 967 | 968 | class EnumDecl: 969 | 970 | def __init__(self, pub: bool, name: str, elements: List[str]): 971 | self.module = None # type: Module 972 | self.pub = pub 973 | self.name = name 974 | self.elements = elements 975 | 976 | def __str__(self): 977 | pub = 'pub ' if self.pub else '' 978 | s = f'(enum {pub}{self.name}\n' 979 | i = 0 980 | for elem in self.elements: 981 | s += f' ({i} {elem})\n' 982 | i += 1 983 | s = s[:-1] 984 | s += ')' 985 | return s 986 | 987 | 988 | class ConstDecl: 989 | 990 | def __init__(self, pub: bool, name: str, value: Expr): 991 | self.module = None # type: Module 992 | self.pub = pub 993 | self.name = name 994 | self.value = value 995 | 996 | def __str__(self): 997 | pub = 'pub ' if self.pub else '' 998 | return f'(const {pub}{self.name} {self.value})' 999 | 1000 | def type_checking(self): 1001 | self.value.resolve_type(self.module) 1002 | 1003 | def get_type(self, function): 1004 | return self.value.resolve_type(function) 1005 | 1006 | 1007 | class TypeDecl: 1008 | 1009 | def __init__(self, pub: bool, name: str, xtype: VType): 1010 | self.pub = pub 1011 | self.name = name 1012 | self.type = xtype 1013 | 1014 | def __str__(self): 1015 | pub = 'pub ' if self.pub else '' 1016 | return f'(type {pub}{self.name} {self.type})' 1017 | 1018 | def type_checking(self): 1019 | pass 1020 | 1021 | 1022 | class Module: 1023 | 1024 | def __init__(self): 1025 | self.workspace = None # type: Workspace 1026 | self.name = 'main' 1027 | self.decls = {} 1028 | 1029 | def add(self, val): 1030 | # Make sure not in builtin already 1031 | assert self._resolve_builtin(val.name) is None, f'duplicate name `{val.name}` in module `{self.name}`' 1032 | 1033 | # Add the module as long as this is not an import 1034 | if not isinstance(val, Module): 1035 | val.module = self 1036 | 1037 | # Handle interop functions properly 1038 | if isinstance(val, FuncDecl): 1039 | if val.interop: 1040 | assert val.name not in self.decls['C'], f'duplicate name `{val.name}` in module `{self.name}`' 1041 | self.decls['C'][val.name] = val 1042 | else: 1043 | assert val.name not in self.decls, f'duplicate name `{val.name}` in module `{self.name}`' 1044 | self.decls[val.name] = val 1045 | 1046 | # On import simply load the module from the workspace 1047 | elif isinstance(val, ImportDecl): 1048 | self.add(self.workspace.load_module(val.name)) 1049 | 1050 | # Handle the module declaration 1051 | elif isinstance(val, ModuleDecl): 1052 | assert val.name == self.name, f'module declaration and module path mismatch (`{val.name}` and `{self.name}`)!' 1053 | 1054 | # everything else we just add as is 1055 | else: 1056 | self.decls[val.name] = val 1057 | 1058 | def get_var(self, name): 1059 | bnval = self._resolve_builtin(name) 1060 | 1061 | if bnval is not None: 1062 | return bnval 1063 | 1064 | if name in self.decls: 1065 | return self.decls[name] 1066 | 1067 | return None 1068 | 1069 | def resolve_type(self, xtype): 1070 | # Unknown type 1071 | if isinstance(xtype, VUnknownType): 1072 | xtype = self.get_var(xtype.name) 1073 | 1074 | # Array ty[e 1075 | elif isinstance(xtype, VArrayType): 1076 | xtype.type = self.resolve_type(xtype.type) 1077 | 1078 | # Map type 1079 | elif isinstance(xtype, VMapType): 1080 | xtype.key_type = self.resolve_type(xtype.key_type) 1081 | xtype.value_type = self.resolve_type(xtype.value_type) 1082 | 1083 | # Pointer type 1084 | elif isinstance(xtype, VPointerType): 1085 | xtype.type = self.resolve_type(xtype.type) 1086 | 1087 | # Optional type 1088 | elif isinstance(xtype, VOptionalType): 1089 | xtype.type = self.resolve_type(xtype.type) 1090 | 1091 | # Default types, nothing more to resolve 1092 | elif isinstance(xtype, VIntegerType) or isinstance(xtype, VFloatType) or isinstance(xtype, VBool): 1093 | pass 1094 | 1095 | # Enums and structs are already resovled 1096 | elif isinstance(xtype, EnumDecl) or isinstance(xtype, StructDecl) or isinstance(xtype, FuncDecl): 1097 | pass 1098 | 1099 | # Handle a dictionary 1100 | elif isinstance(xtype, dict): 1101 | for key in xtype: 1102 | xtype[key] = self.resolve_type(xtype[key]) 1103 | 1104 | elif isinstance(xtype, TypeDecl): 1105 | xtype = self.resolve_type(xtype.type) 1106 | 1107 | # No return value 1108 | elif xtype is None: 1109 | return None 1110 | 1111 | else: 1112 | assert False, xtype 1113 | 1114 | return xtype 1115 | 1116 | def _resolve_builtin(self, name): 1117 | if self.name == 'builtin': 1118 | return None 1119 | 1120 | bn = self.decls['builtin'] 1121 | assert isinstance(bn, Module), f'error resolving builtin!' 1122 | return bn.get_var(name) 1123 | 1124 | def get_module(self): 1125 | return self 1126 | 1127 | def type_checking(self): 1128 | structs = [] 1129 | constants = [] 1130 | functions = [] 1131 | 1132 | # add to lists everything we will need to resolve 1133 | for r in self.decls: 1134 | decl = self.decls[r] 1135 | if isinstance(decl, StructDecl): 1136 | structs.append(decl) 1137 | elif isinstance(decl, ConstDecl): 1138 | constants.append(decl) 1139 | elif isinstance(decl, FuncDecl): 1140 | functions.append(decl) 1141 | 1142 | # TODO: Functions should return a func type! 1143 | 1144 | # Resolve all the interop stuff 1145 | for r in self.decls['C']: 1146 | r = self.decls['C'][r] 1147 | if isinstance(r, FuncDecl): 1148 | for arg in r.args: 1149 | arg.type = self.resolve_type(arg.type) 1150 | 1151 | if r.ret_type is not None: 1152 | r.ret_type = self.resolve_type(r.ret_type) 1153 | 1154 | # Resolve all the constants 1155 | for const in constants: 1156 | const.type_checking() 1157 | 1158 | # Resolve all of the types inside of functions 1159 | for func in functions: 1160 | for arg in func.args: 1161 | arg.type = self.resolve_type(arg.type) 1162 | 1163 | if func.ret_type is not None: 1164 | func.ret_type = self.resolve_type(func.ret_type) 1165 | 1166 | # finally do type checking on all functions 1167 | for func in functions: 1168 | func.type_checking() 1169 | 1170 | def __str__(self): 1171 | s = '' 1172 | for k in self.decls: 1173 | decl = self.decls[k] 1174 | s += str(decl) + '\n' 1175 | return s[:-1] 1176 | 1177 | 1178 | BOLD = '\033[01m' 1179 | RESET = '\033[0m' 1180 | GREEN = '\033[32m' 1181 | RED = '\033[31m' 1182 | 1183 | 1184 | def load_from_path(module: Module, path: str): 1185 | if os.path.exists(path): 1186 | for file in os.listdir(path): 1187 | if os.path.isfile(file) and file.endswith('.v'): 1188 | # we do the imports here to avoid recursive imports 1189 | from vork.parser import Parser 1190 | from vork.tokenizer import Tokenizer 1191 | with open(file, 'r') as f: 1192 | text = f.read() 1193 | lines = text.splitlines() 1194 | tokenizer = Tokenizer(text) 1195 | parser = Parser(tokenizer) 1196 | 1197 | try: 1198 | # Parse it and add everything to the module 1199 | ast = parser.parse() 1200 | for a in ast: 1201 | module.add(a) 1202 | except Exception as e: 1203 | # TODO: syntax error recovering? 1204 | pos = tokenizer.token.pos 1205 | 1206 | msg = ", ".join(e.args) 1207 | if msg == '': 1208 | msg = 'Unexpected token' 1209 | 1210 | print( 1211 | f'{BOLD}{file}:{pos.start_line + 1}:{pos.start_column + 1}:{RESET} {RED}{BOLD}syntax error:{RESET} {msg}') 1212 | 1213 | line = lines[pos.start_line] 1214 | line = line[:pos.start_column] + BOLD + line[ 1215 | pos.start_column:pos.end_column] + RESET + line[ 1216 | pos.end_column:] 1217 | print(line) 1218 | 1219 | c = '' 1220 | for i in range(pos.start_column): 1221 | if lines[pos.start_line][i] == '\t': 1222 | c += '\t' 1223 | else: 1224 | c += ' ' 1225 | 1226 | print(c + BOLD + RED + '^' + '~' * (pos.end_column - pos.start_column - 1) + RESET) 1227 | print() 1228 | 1229 | 1230 | class Workspace: 1231 | 1232 | def __init__(self, dirs: List[str]): 1233 | self.modules = {} # type: Dict[str, Module] 1234 | self.dirs = dirs 1235 | 1236 | # Create the builtin module 1237 | self.builtin = Module() 1238 | self.builtin.name = 'builtin' 1239 | 1240 | # Add integer types 1241 | self.builtin.add(TypeDecl(True, 'byte', VIntegerType(8, False))) 1242 | self.builtin.add(TypeDecl(True, 'u16', VIntegerType(16, False))) 1243 | self.builtin.add(TypeDecl(True, 'u32', VIntegerType(32, False))) 1244 | self.builtin.add(TypeDecl(True, 'u64', VIntegerType(64, False))) 1245 | self.builtin.add(TypeDecl(True, 'u128', VIntegerType(128, False))) 1246 | self.builtin.add(TypeDecl(True, 'i8', VIntegerType(8, True))) 1247 | self.builtin.add(TypeDecl(True, 'i16', VIntegerType(16, True))) 1248 | self.builtin.add(TypeDecl(True, 'int', VIntegerType(32, True))) 1249 | self.builtin.add(TypeDecl(True, 'i64', VIntegerType(64, True))) 1250 | self.builtin.add(TypeDecl(True, 'i128', VIntegerType(128, True))) 1251 | 1252 | # Add float types 1253 | self.builtin.add(TypeDecl(True, 'f32', VFloatType(32))) 1254 | self.builtin.add(TypeDecl(True, 'f64', VFloatType(64))) 1255 | 1256 | # Add other types 1257 | self.builtin.add(TypeDecl(True, 'bool', VBool())) 1258 | 1259 | def load_main(self, path): 1260 | if 'main' in self.modules: 1261 | return self.modules['main'] 1262 | module = Module() 1263 | module.workspace = self 1264 | module.decls['builtin'] = self.builtin 1265 | module.decls['C'] = dict() 1266 | module.name = 'main' 1267 | self.modules['main'] = module 1268 | load_from_path(self.modules['main'], path) 1269 | module.type_checking() 1270 | 1271 | def load_module(self, name: str): 1272 | # first make sure we don't have it already 1273 | if name in self.modules: 1274 | return self.modules[name] 1275 | 1276 | # Create it 1277 | module = Module() 1278 | module.workspace = self 1279 | module.decls['builtin'] = self.builtin 1280 | module.decls['C'] = dict() 1281 | module.name = name.split('.')[-1] 1282 | self.modules[name] = module 1283 | 1284 | # search for all the files related to the module 1285 | module_path = name.replace('.', '/') 1286 | for path in self.dirs: 1287 | load_from_path(module, path) 1288 | 1289 | # Do the type checking 1290 | module.type_checking() 1291 | 1292 | return module 1293 | 1294 | 1295 | 1296 | 1297 | --------------------------------------------------------------------------------