├── README.md └── src ├── controlflowgraph.py └── traversers ├── astbasetraverser.py └── astfulltraverser.py /README.md: -------------------------------------------------------------------------------- 1 | python-control-flow-graph 2 | ========================= 3 | 4 | Control flow graph generator built from AST with the intention that no 5 | information is lost. 6 | 7 | Inspiration for how to manage loops/trys was taken from PyPy. 8 | Traversers are taken from: 9 | https://launchpad.net/python-static-type-checking/+index 10 | -------------------------------------------------------------------------------- /src/controlflowgraph.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Creates a control flow graph (cfg) 3 | ''' 4 | 5 | from src.traversers.astfulltraverser import AstFullTraverser 6 | import ast 7 | from pprint import pprint 8 | 9 | class Block(): 10 | ''' A basic control flow block. 11 | 12 | It has one entry point and several possible exit points. 13 | Note that the next_block is not necessarily an exit. 14 | ''' 15 | 16 | # Block tags 17 | NORMAL = 0 18 | LOOP_HEADER = 1 19 | 20 | def __init__(self): 21 | # The next block along the function 22 | self.next_block = None 23 | self.has_return = False 24 | # Holds the statements in this block 25 | self.start_line_no = 0 26 | self.statements = [] 27 | self.exit_blocks = [] 28 | # Use to indicate whether the block has been visited. Used for printing 29 | self.marked = False 30 | # Used to describe special blocks 31 | self.tag = Block.NORMAL 32 | # Block which have been absorbed into this one 33 | self.dependents = [] 34 | 35 | def copy_dict(self, copy_to): 36 | ''' Keep the name bindings but copy the class instances. 37 | Both bindings now point to the same variables. 38 | This function is used to simulate C pointers. 39 | TODO: Find a more elegant way of achieving this. ''' 40 | for dependent in self.dependents: 41 | dependent.__dict__ = copy_to.__dict__ 42 | self.__dict__ = copy_to.__dict__ 43 | copy_to.dependents = self.dependents + [self] 44 | 45 | # These are frame blocks. 46 | # Idea for these are from PyPy 47 | F_BLOCK_LOOP = 0 48 | F_BLOCK_EXCEPT = 1 49 | F_BLOCK_FINALLY = 2 50 | F_BLOCK_FINALLY_END = 3 51 | 52 | class ControlFlowGraph(AstFullTraverser): 53 | 54 | def __init__(self): 55 | self.current_block = None 56 | # Used to hold how control flow is nested (e.g. if inside of a for) 57 | self.frame_blocks = [] 58 | self.current_line_num = 0 59 | 60 | def parse_ast(self, source_ast): 61 | self.run(source_ast) 62 | return source_ast 63 | 64 | def parse_file(self, file_path): 65 | source_ast = self.file_to_ast(file_path) 66 | return self.parse_ast(source_ast) 67 | 68 | def file_to_ast(self, file_path): 69 | s = self.get_source(file_path) 70 | return ast.parse(s, filename = file_path, mode = 'exec') 71 | 72 | def get_source(self, fn): 73 | ''' Return the entire contents of the file whose name is given. 74 | Almost most entirely copied from stc. ''' 75 | try: 76 | f = open(fn,'r') 77 | s = f.read() 78 | f.close() 79 | return s 80 | except IOError: 81 | return '' 82 | 83 | def push_frame_block(self, kind, block): 84 | self.frame_blocks.append((kind, block)) 85 | 86 | def pop_frame_block(self, kind, block): 87 | actual_kind, old_block = self.frame_blocks.pop() 88 | assert actual_kind == kind and old_block is block, \ 89 | "mismatched frame blocks" 90 | 91 | def is_empty_block(self, candidate_block): 92 | return not candidate_block.statements 93 | 94 | def check_child_exits(self, candidate_block, after_control_block): 95 | ''' After if and loop blocks an after_if/loop block is created. If the 96 | if/loop blocks are the last in a straight line block of statements 97 | then the after blocks will be empty. All body/then/else exits will 98 | point to this block. If it is empty then swap for the given block. 99 | If it is not then set that block's exit as the given block. ''' 100 | if candidate_block.has_return: 101 | # If the block has a return exit then can not be given another here 102 | return 103 | if self.is_empty_block(candidate_block): 104 | # candidate_block and after_control_block now point to the same 105 | # variables. They are now the same instance. 106 | candidate_block.copy_dict(after_control_block) 107 | return 108 | # This is needed to avoid two "Exits" appearing for the return or yield 109 | # at the end of a function. 110 | if not after_control_block in candidate_block.exit_blocks: 111 | candidate_block.exit_blocks.append(after_control_block) 112 | 113 | def add_to_block(self, node): 114 | ''' We want every try statement to be in its own block. ''' 115 | if not self.current_block: 116 | return 117 | # We only want the 'top level' statements 118 | if self.current_line_num >= node.lineno: 119 | return 120 | # Special cases - test must be in its own block 121 | if isinstance(node, ast.While) or isinstance(node, ast.For): 122 | if not self.is_empty_block(self.current_block): 123 | test_block = self.new_block() 124 | self.current_block.exit_blocks.append(test_block) 125 | self.use_next_block(test_block) 126 | self.current_line_num = node.lineno 127 | for f_block_type, f_block in reversed(self.frame_blocks): 128 | if f_block_type == F_BLOCK_EXCEPT: 129 | # Statement is in a try - set exits to next statement and 130 | # excepts 131 | self.current_block.statements.append(node) 132 | for handler in f_block: 133 | self.current_block.exit_blocks.append(handler) 134 | # Special case 135 | if isinstance(node, ast.While) or isinstance(node, ast.For): 136 | break 137 | next_statement_block = self.new_block() 138 | self.current_block.exit_blocks.append(next_statement_block) 139 | self.use_next_block(next_statement_block) 140 | break 141 | else: 142 | self.current_block.statements.append(node) 143 | 144 | def run(self, root): 145 | self.visit(root) 146 | 147 | def new_block(self): 148 | ''' From pypy. ''' 149 | return Block() 150 | 151 | def use_block(self, block): 152 | ''' From pypy. ''' 153 | self.current_block = block 154 | 155 | def empty_block(self, block): 156 | return not block.statements 157 | 158 | def use_next_block(self, block=None): 159 | """Set this block as the next_block for the last and use it. 160 | From pypy """ 161 | if block is None: 162 | block = self.new_block() 163 | self.current_block.next_block = block 164 | self.use_block(block) 165 | return block 166 | 167 | def add_to_exits(self, source, dest): 168 | source.exit_blocks.append(dest) 169 | 170 | def visit(self, node): 171 | '''Visit a single node. Callers are responsible for visiting children.''' 172 | if self.check_has_return(): 173 | return 174 | self.check_block_num(node) 175 | self.add_to_block(node) 176 | method = getattr(self, 'do_' + node.__class__.__name__) 177 | return method(node) 178 | 179 | def check_block_num(self, node): 180 | ''' Used for display purposes only. Each block is labelled with the 181 | line number of the the first statement in the block. ''' 182 | if not self.current_block: 183 | return 184 | if not self.current_block.start_line_no: 185 | self.current_block.start_line_no = node.lineno 186 | print(self.current_block.start_line_no ) 187 | 188 | def check_has_return(self): 189 | return self.current_block and self.current_block.has_return 190 | 191 | def do_FunctionDef(self, node): 192 | block = self.new_block() 193 | self.use_block(block) 194 | node.initial_block = block 195 | self.exit_block = self.new_block() 196 | # Special case 197 | self.exit_block.start_line_no = "Exit" 198 | for z in node.body: 199 | self.visit(z) 200 | # Here there's a chance that the last block already points the exit. 201 | # Such as yields and returns 202 | for e in self.current_block.exit_blocks: 203 | if e.start_line_no == "Exit": 204 | return 205 | else: 206 | self.check_child_exits(self.current_block, self.exit_block) 207 | 208 | def do_If(self, node): 209 | ''' If an if statement is the last in a straight line then an empty 210 | and unused block will be created as the after_if. ''' 211 | if_block = self.current_block 212 | after_if_block = self.new_block() 213 | # Then block 214 | then_block = self.new_block() 215 | self.add_to_exits(if_block, then_block) 216 | self.use_block(then_block) 217 | for z in node.body: 218 | self.visit(z) 219 | # Make sure the then exits point to the correct place 220 | self.check_child_exits(self.current_block, after_if_block) 221 | # Else block 222 | if node.orelse: 223 | else_block = self.new_block() 224 | self.add_to_exits(if_block, else_block) 225 | self.use_block(else_block) 226 | for z in node.orelse: 227 | self.visit(z) 228 | # Make sure the else exits point to the correct place 229 | self.check_child_exits(self.current_block, after_if_block) 230 | else: 231 | self.add_to_exits(if_block, after_if_block) 232 | # Set the next block of the if to the after_if block 233 | if_block.next = after_if_block 234 | self.use_block(after_if_block) 235 | 236 | def do_While(self, node): 237 | self.do_Loop(node) 238 | 239 | def do_For(self, node): 240 | self.do_Loop(node) 241 | 242 | def do_Loop(self, node): 243 | ''' For and While loops are treated the same. The only difference is 244 | the possibility of iterators in a For loop. 245 | The loop body always returns to test unless there is a break or 246 | return. 247 | The else body is entered when the test is false but not when there 248 | is a break or an exception. 249 | The next block of the test could in theory be the else or after. 250 | But when we access it for the breaks we want it to be the after. ''' 251 | # Put the test in its own block 252 | test_block = self.current_block 253 | 254 | test_block.tag = Block.LOOP_HEADER 255 | self.push_frame_block(F_BLOCK_LOOP, test_block) 256 | 257 | after_loop_block = self.new_block() 258 | loop_body_block = self.new_block() 259 | self.add_to_exits(test_block, loop_body_block) 260 | test_block.next = after_loop_block 261 | self.use_block(loop_body_block) 262 | for z in node.body: 263 | self.visit(z) 264 | self.check_child_exits(self.current_block, test_block) 265 | self.pop_frame_block(F_BLOCK_LOOP, test_block) 266 | 267 | if node.orelse: 268 | else_body = self.new_block() 269 | self.add_to_exits(test_block, else_body) 270 | self.use_block(else_body) 271 | else_body.next = after_loop_block 272 | for z in node.orelse: 273 | self.visit(z) 274 | self.check_child_exits(self.current_block, after_loop_block) 275 | else: 276 | self.add_to_exits(test_block, after_loop_block) 277 | 278 | self.use_next_block(after_loop_block) 279 | 280 | def do_Return(self, node): 281 | ''' End the current block here. 282 | No statements in this block after this are valid. 283 | In a try, returns go to the finally block. ''' 284 | if node.value: 285 | self.visit(node.value) 286 | # Check if the block is an try-finally. 287 | for f_block_type, f_block in reversed(self.frame_blocks): 288 | if f_block_type == F_BLOCK_FINALLY: 289 | return_exit = f_block 290 | break 291 | else: 292 | return_exit = self.exit_block 293 | self.current_block.exit_blocks.append(return_exit) 294 | self.current_block.has_return = True 295 | 296 | def do_Continue(self, node): 297 | ''' Continues can not be in a finally block. 298 | TODO: Fix this up. ''' 299 | if not self.frame_blocks: 300 | self.error("'continue' not properly in loop", node) 301 | current_block, block = self.frame_blocks[-1] 302 | if current_block == F_BLOCK_LOOP: 303 | self.current_block.exit_blocks.append(block) 304 | elif current_block == F_BLOCK_EXCEPT or \ 305 | current_block == F_BLOCK_FINALLY: 306 | # Find the loop 307 | for i in range(len(self.frame_blocks) - 2, -1, -1): 308 | f_type, block = self.frame_blocks[i] 309 | if f_type == F_BLOCK_LOOP: 310 | self.current_block.exit_blocks.append(block) 311 | break 312 | if f_type == F_BLOCK_FINALLY_END: 313 | self.error("'continue' not supported inside 'finally' " 314 | "clause", node) 315 | else: 316 | self.error("'continue' not properly in loop", node) 317 | return 318 | elif current_block == F_BLOCK_FINALLY_END: 319 | self.error("'continue' not supported inside 'finally' clause", node) 320 | self.current_block.has_return = True 321 | 322 | def do_Break(self, node): 323 | ''' A break can only be in a loop. 324 | A break causes the current block to exit to block after the loop 325 | header (its next) ''' 326 | # Find first loop in stack 327 | for f_block_type, f_block in reversed(self.frame_blocks): 328 | if f_block_type == F_BLOCK_LOOP: 329 | self.current_block.exit_blocks.append(f_block.next) 330 | break 331 | else: 332 | self.error("'break' outside loop", node) 333 | self.current_block.has_return = True 334 | 335 | def do_Yield(self, node): 336 | ''' Here we deal with the control flow when the iterator goes through 337 | the function. 338 | We don't set has_return to true since, in theory, it can either 339 | exit or continue from here. ''' 340 | self.current_block.exit_blocks.append(self.exit_block) 341 | next_block = self.new_block() 342 | self.current_block.exit_blocks.append(next_block) 343 | self.use_next_block(next_block) 344 | 345 | def do_Try(self, node): 346 | ''' It is a great ordeal to find out which statements can cause which 347 | exceptions. Assume every statement can cause any exception. So 348 | each statement has its own block and a link to each exception. 349 | 350 | orelse executed if an exception is not raised therefore last try 351 | statement should point to the else. 352 | 353 | nested try-finallys go to each other during a return 354 | TODO''' 355 | after_try_block = self.new_block() 356 | final_block = None 357 | try_body_block = self.new_block() 358 | self.current_block.next_block = try_body_block 359 | orelse_block = self.new_block() 360 | 361 | before_line_no = self.current_line_num 362 | if node.finalbody: 363 | # Either end of orelse or try should point to finally body 364 | final_block = self.new_block() 365 | self.use_block(final_block) 366 | self.push_frame_block(F_BLOCK_FINALLY_END, node) 367 | for z in node.finalbody: 368 | self.visit(z) 369 | self.pop_frame_block(F_BLOCK_FINALLY_END, node) 370 | self.check_child_exits(self.current_block, after_try_block) 371 | self.current_line_num = before_line_no 372 | 373 | before_line_no = self.current_line_num 374 | exception_handlers = [] 375 | for handler in node.handlers: 376 | assert isinstance(handler, ast.ExceptHandler) 377 | initial_handler_block = self.new_block() 378 | self.use_block(initial_handler_block) 379 | for z in handler.body: 380 | self.visit(z) 381 | handler_exit = final_block if node.finalbody else after_try_block 382 | self.check_child_exits(self.current_block, handler_exit) 383 | exception_handlers.append(initial_handler_block) 384 | self.current_line_num = before_line_no 385 | 386 | f_blocks = [] 387 | if node.finalbody: 388 | f_blocks.append((F_BLOCK_FINALLY, final_block)) 389 | if node.handlers: 390 | f_blocks.append((F_BLOCK_EXCEPT, exception_handlers)) 391 | for f in f_blocks: 392 | self.push_frame_block(f[0], f[1]) 393 | self.use_block(try_body_block) 394 | for z in node.body: 395 | self.visit(z) 396 | for f in reversed(f_blocks): 397 | self.pop_frame_block(f[0], f[1]) 398 | 399 | if node.orelse: 400 | orelse_block = self.new_block() 401 | # Last block in body can always go to the orelse 402 | self.check_child_exits(self.current_block, orelse_block) 403 | self.use_block(orelse_block) 404 | for z in node.orelse: 405 | self.visit(z) 406 | orelse_exit = final_block if node.finalbody else after_try_block 407 | self.check_child_exits(self.current_block, orelse_exit) 408 | else: 409 | self.check_child_exits(self.current_block, after_try_block) 410 | 411 | self.use_next_block(after_try_block) 412 | 413 | class PrintCFG(AstFullTraverser): 414 | 415 | def __init__(self, cfg): 416 | self.run(cfg) 417 | 418 | def run(self, node): 419 | self.visit(node) 420 | 421 | def visit(self, node): 422 | '''Visit a single node. Callers are responsible for visiting children.''' 423 | method = getattr(self, 'do_' + node.__class__.__name__) 424 | return method(node) 425 | 426 | def do_FunctionDef(self, node): 427 | print ("CFG for " + node.name) 428 | self.process_blocks(node.initial_block) 429 | 430 | def process_blocks(self, block): 431 | ''' TODO: Handle infinite loops ''' 432 | if block.marked: 433 | return 434 | if block.start_line_no == "Exit": 435 | return 436 | exit_nos = [block.start_line_no for block in block.exit_blocks] 437 | pprint("Block starting at: " + str(block.start_line_no) + " to " + str(exit_nos)) 438 | block.marked = True 439 | for an_exit in block.exit_blocks: 440 | self.process_blocks(an_exit) 441 | if block.next_block: 442 | self.process_blocks(block.next_block) 443 | 444 | 445 | if __name__ == '__main__': 446 | fn = "/homes/dr1810/4thYear/individualProject/pythonTypes/testFiles/test.py" 447 | cfg = ControlFlowGraph() 448 | s_ast = cfg.parse_file(fn) 449 | PrintCFG(s_ast) 450 | 451 | 452 | 453 | 454 | -------------------------------------------------------------------------------- /src/traversers/astbasetraverser.py: -------------------------------------------------------------------------------- 1 | class AstBaseTraverser: 2 | '''The base class for all other traversers.''' 3 | 4 | def __init__(self): 5 | pass 6 | # A unit test now calls self.check_visitor_names(). 7 | 8 | def attribute_base(self,node): 9 | 10 | '''Return the node representing the base of the chain. 11 | Only 'Name' and 'Builtin' nodes represent names. 12 | All other chains have a base that is a constant or nameless dict, list, etc. 13 | ''' 14 | 15 | trace = False 16 | kind = self.kind(node) 17 | if kind in ('Name','Builtin','Str'): 18 | result = node # We have found the base. 19 | elif kind in ('Attribute','Subscript'): 20 | result = self.attribute_base(node.value) 21 | elif kind == 'Call': 22 | result = self.attribute_base(node.func) 23 | else: 24 | # The chain is rooted in a constant or nameless dict, list, etc. 25 | # This is not an error. 26 | # g.trace('*** kind: %s node: %s' % (kind,node)) 27 | result = node 28 | return result 29 | 30 | def attribute_target(self,node): 31 | 32 | '''Return the node representing the target of the chain. 33 | Only 'Name' and 'Builtin' Ops represent names.''' 34 | 35 | trace = True 36 | kind = self.kind(node) 37 | if kind in ('Name','Builtin','Str'): 38 | result = node # We have found the target. 39 | elif kind == 'Attribute': 40 | # result = self.attribute_target(node.attr) ### Always a string. 41 | result = node # node.attr is the target. 42 | elif kind == 'Call': 43 | result = self.attribute_target(node.func) 44 | elif kind == 'Subscript': 45 | result = self.attribute_target(node.value) 46 | else: 47 | assert(False) 48 | # Don't call u.format here. 49 | return None 50 | 51 | return result 52 | #@+node:ekr.20130315140102.9529: *4* bt.check_visitor_names 53 | def check_visitor_names(self,silent=False): 54 | 55 | '''Check that there is an ast.AST node named x 56 | for all visitor methods do_x.''' 57 | 58 | #@+<< define names >> 59 | #@+node:ekr.20130315140102.9531: *5* << define names >> 60 | names = ( 61 | 'Add','And','Assert','Assign','Attribute','AugAssign','AugLoad','AugStore', 62 | 'BinOp','BitAnd','BitOr','BitXor','BoolOp','Break', 63 | 'Builtin', ### Python 3.x only??? 64 | 'Bytes', # Python 3.x only. 65 | 'Call','ClassDef','Compare','Continue', 66 | 'Del','Delete','Dict','DictComp','Div', 67 | 'Ellipsis','Eq','ExceptHandler','Exec','Expr','Expression','ExtSlice', 68 | 'FloorDiv','For','FunctionDef','GeneratorExp','Global','Gt','GtE', 69 | 'If','IfExp','Import','ImportFrom','In','Index','Interactive', 70 | 'Invert','Is','IsNot','LShift','Lambda', 71 | 'List','ListComp','Load','Lt','LtE', 72 | 'Mod','Module','Mult','Name','Not','NotEq','NotIn','Num', 73 | 'Or','Param','Pass','Pow','Print', 74 | 'RShift','Raise','Repr','Return', 75 | 'Set','SetComp','Slice','Store','Str','Sub','Subscript','Suite', 76 | 'Try', # Python 3.x only. 77 | 'TryExcept','TryFinally','Tuple','UAdd','USub','UnaryOp', 78 | 'While','With','Yield', 79 | # Lower case names... 80 | 'arg', # A valid ast.AST node: Python 3. 81 | 'alias', # A valid ast.AST node. 82 | 'arguments', # A valid ast.AST node. 83 | 'comprehension', # A valid ast.AST node. 84 | 'keyword', # A valid ast.AST node(!) 85 | # 'keywords', # A valid field, but not a valid ast.AST node! 86 | # In ast.Call nodes, node.keywords points to a *list* of ast.keyword objects. 87 | # There is never any need to traverse these: 88 | # 'id','n','name','s','str'. 89 | ) 90 | #@-<< define names >> 91 | #@+<< Py2K grammar >> 92 | #@+node:ekr.20130315140102.9530: *5* << Py2k grammar >> 93 | #@@nocolor-node 94 | #@+at 95 | # See 96 | # mod: 97 | # Expression(expr body) 98 | # Interactive(stmt* body) 99 | # Module(stmt* body) 100 | # Suite(stmt* body) # not an actual node, 101 | # stmt: 102 | # Assert(expr test, expr? msg) 103 | # Assign(expr* targets, expr value) 104 | # AugAssign(expr target, operator op, expr value) 105 | # Break 106 | # ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list) 107 | # Continue 108 | # Delete(expr* targets) 109 | # Exec(expr body, expr? globals, expr? locals) 110 | # Expr(expr value) 111 | # For(expr target, expr iter, stmt* body, stmt* orelse) 112 | # FunctionDef(identifier name, arguments args,stmt* body, expr* decorator_list) 113 | # Global(identifier* names) 114 | # If(expr test, stmt* body, stmt* orelse) 115 | # Import(alias* names) 116 | # ImportFrom(identifier? module, alias* names, int? level) 117 | # Pass 118 | # Print(expr? dest, expr* values, bool nl) 119 | # Raise(expr? type, expr? inst, expr? tback) 120 | # Return(expr? value) 121 | # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) 122 | # TryFinally(stmt* body, stmt* finalbody) 123 | # While(expr test, stmt* body, stmt* orelse) 124 | # With(expr context_expr, expr? optional_vars, stmt* body) 125 | # expr: 126 | # Attribute(expr value, identifier attr, expr_context ctx) 127 | # BinOp(expr left, operator op, expr right) 128 | # BoolOp(boolop op, expr* values) 129 | # Call(expr func, expr* args, keyword* keywords, expr? starargs, expr? kwargs) 130 | # Compare(expr left, cmpop* ops, expr* comparators) 131 | # Dict(expr* keys, expr* values) 132 | # DictComp(expr key, expr value, comprehension* generators) 133 | # GeneratorExp(expr elt, comprehension* generators) 134 | # IfExp(expr test, expr body, expr orelse) 135 | # Lambda(arguments args, expr body) 136 | # List(expr* elts, expr_context ctx) 137 | # ListComp(expr elt, comprehension* generators) 138 | # Name(identifier id, expr_context ctx) 139 | # Num(object n) -- a number as a PyObject. 140 | # Repr(expr value) 141 | # Set(expr* elts) 142 | # SetComp(expr elt, comprehension* generators) 143 | # Str(string s) -- need to specify raw, unicode, etc? 144 | # Subscript(expr value, slice slice, expr_context ctx) 145 | # Tuple(expr* elts, expr_context ctx) 146 | # UnaryOp(unaryop op, expr operand) 147 | # Yield(expr? value) 148 | # expr_context: 149 | # AugLoad 150 | # AugStore 151 | # Del 152 | # Load 153 | # Param 154 | # Store 155 | # slice: 156 | # Ellipsis 157 | # Slice(expr? lower, expr? upper, expr? step) 158 | # ExtSlice(slice* dims) 159 | # Index(expr value) 160 | # boolop: 161 | # And | Or 162 | # operator: 163 | # Add | Sub | Mult | Div | Mod | Pow | LShift | RShift | BitOr | BitXor | BitAnd | FloorDiv 164 | # unaryop: 165 | # Invert | Not | UAdd | USub 166 | # cmpop: 167 | # Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn 168 | # excepthandler: 169 | # ExceptHandler(expr? type, expr? name, stmt* body) 170 | # 171 | # Lower case node names: 172 | # alias (identifier name, identifier? asname) 173 | # arguments (expr* args, identifier? vararg, identifier? kwarg, expr* defaults) 174 | # comprehension (expr target, expr iter, expr* ifs) 175 | # keyword (identifier arg, expr value) 176 | #@-<< Py2K grammar >> 177 | #@+<< Py3K grammar >> 178 | #@+node:ekr.20130320161725.9543: *5* << Py3k grammar >> 179 | #@@nocolor-node 180 | #@+at 181 | # 182 | # mod = Module(stmt* body) 183 | # | Interactive(stmt* body) 184 | # | Expression(expr body) 185 | # 186 | # -- not really an actual node but useful in Jython's typesystem. 187 | # | Suite(stmt* body) 188 | # 189 | # stmt = FunctionDef(identifier name, arguments args, 190 | # stmt* body, expr* decorator_list, expr? returns) 191 | # | ClassDef(identifier name, 192 | # expr* bases, 193 | # keyword* keywords, 194 | # expr? starargs, 195 | # expr? kwargs, 196 | # stmt* body, 197 | # expr* decorator_list) 198 | # | Return(expr? value) 199 | # 200 | # | Delete(expr* targets) 201 | # | Assign(expr* targets, expr value) 202 | # | AugAssign(expr target, operator op, expr value) 203 | # 204 | # -- use 'orelse' because else is a keyword in target languages 205 | # | For(expr target, expr iter, stmt* body, stmt* orelse) 206 | # | While(expr test, stmt* body, stmt* orelse) 207 | # | If(expr test, stmt* body, stmt* orelse) 208 | # | With(withitem* items, stmt* body) 209 | # 210 | # | Raise(expr? exc, expr? cause) 211 | # | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) 212 | # | Assert(expr test, expr? msg) 213 | # 214 | # | Import(alias* names) 215 | # | ImportFrom(identifier? module, alias* names, int? level) 216 | # 217 | # | Global(identifier* names) 218 | # | Nonlocal(identifier* names) 219 | # | Expr(expr value) 220 | # | Pass | Break | Continue 221 | # 222 | # -- XXX Jython will be different 223 | # -- col_offset is the byte offset in the utf8 string the parser uses 224 | # attributes (int lineno, int col_offset) 225 | # 226 | # -- BoolOp() can use left & right? 227 | # expr = BoolOp(boolop op, expr* values) 228 | # | BinOp(expr left, operator op, expr right) 229 | # | UnaryOp(unaryop op, expr operand) 230 | # | Lambda(arguments args, expr body) 231 | # | IfExp(expr test, expr body, expr orelse) 232 | # | Dict(expr* keys, expr* values) 233 | # | Set(expr* elts) 234 | # | ListComp(expr elt, comprehension* generators) 235 | # | SetComp(expr elt, comprehension* generators) 236 | # | DictComp(expr key, expr value, comprehension* generators) 237 | # | GeneratorExp(expr elt, comprehension* generators) 238 | # -- the grammar constrains where yield expressions can occur 239 | # | Yield(expr? value) 240 | # | YieldFrom(expr value) 241 | # -- need sequences for compare to distinguish between 242 | # -- x < 4 < 3 and (x < 4) < 3 243 | # | Compare(expr left, cmpop* ops, expr* comparators) 244 | # | Call(expr func, expr* args, keyword* keywords, 245 | # expr? starargs, expr? kwargs) 246 | # | Num(object n) -- a number as a PyObject. 247 | # | Str(string s) -- need to specify raw, unicode, etc? 248 | # | Bytes(bytes s) 249 | # | Ellipsis 250 | # -- other literals? bools? 251 | # 252 | # -- the following expression can appear in assignment context 253 | # | Attribute(expr value, identifier attr, expr_context ctx) 254 | # | Subscript(expr value, slice slice, expr_context ctx) 255 | # | Starred(expr value, expr_context ctx) 256 | # | Name(identifier id, expr_context ctx) 257 | # | List(expr* elts, expr_context ctx) 258 | # | Tuple(expr* elts, expr_context ctx) 259 | # 260 | # -- col_offset is the byte offset in the utf8 string the parser uses 261 | # attributes (int lineno, int col_offset) 262 | # 263 | # expr_context = Load | Store | Del | AugLoad | AugStore | Param 264 | # 265 | # slice = Slice(expr? lower, expr? upper, expr? step) 266 | # | ExtSlice(slice* dims) 267 | # | Index(expr value) 268 | # 269 | # boolop = And | Or 270 | # 271 | # operator = Add | Sub | Mult | Div | Mod | Pow | LShift 272 | # | RShift | BitOr | BitXor | BitAnd | FloorDiv 273 | # 274 | # unaryop = Invert | Not | UAdd | USub 275 | # 276 | # cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn 277 | # 278 | # comprehension = (expr target, expr iter, expr* ifs) 279 | # 280 | # excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) 281 | # attributes (int lineno, int col_offset) 282 | # 283 | # arguments = (arg* args, identifier? vararg, expr? varargannotation, 284 | # arg* kwonlyargs, identifier? kwarg, 285 | # expr? kwargannotation, expr* defaults, 286 | # expr* kw_defaults) 287 | # arg = (identifier arg, expr? annotation) 288 | # 289 | # -- keyword arguments supplied to call 290 | # keyword = (identifier arg, expr value) 291 | # 292 | # -- import name with optional 'as' alias. 293 | # alias = (identifier name, identifier? asname) 294 | # 295 | # withitem = (expr context_expr, expr? optional_vars) 296 | #@-<< Py3K grammar >> 297 | 298 | # Inexpensive, because there are few entries in aList. 299 | aList = [z for z in dir(self) if z.startswith('do_')] 300 | for s in sorted(aList): 301 | name = s[3:] 302 | if name not in names: 303 | if not silent: 304 | assert(False) 305 | assert False,name 306 | # This is useful now that most errors have been caught. 307 | 308 | def find_function_call (self,node): 309 | ''' 310 | Return the static name of the function being called. 311 | 312 | tree is the tree.func part of the Call node.''' 313 | 314 | 315 | kind = self.kind(node) 316 | assert kind not in ('str','Builtin') 317 | if kind == 'Name': 318 | s = node.id 319 | elif kind == 'Attribute': 320 | s = node.attr # node.attr is always a string. 321 | elif kind == 'Call': 322 | s = self.find_function_call(node.func) 323 | elif kind == 'Subscript': 324 | s = None 325 | else: 326 | s = None 327 | return s or '' 328 | 329 | def info (self,node): 330 | return '%s: %9s' % (node.__class__.__name__,id(node)) 331 | 332 | def kind(self,node): 333 | return node.__class__.__name__ 334 | 335 | def op_name (self,node,strict=True): 336 | '''Return the print name of an operator node.''' 337 | 338 | d = { 339 | # Binary operators. 340 | 'Add': '+', 341 | 'BitAnd': '&', 342 | 'BitOr': '|', 343 | 'BitXor': '^', 344 | 'Div': '/', 345 | 'FloorDiv': '//', 346 | 'LShift': '<<', 347 | 'Mod': '%', 348 | 'Mult': '*', 349 | 'Pow': '**', 350 | 'RShift': '>>', 351 | 'Sub': '-', 352 | # Boolean operators. 353 | 'And': ' and ', 354 | 'Or': ' or ', 355 | # Comparison operators 356 | 'Eq': '==', 357 | 'Gt': '>', 358 | 'GtE': '>=', 359 | 'In': ' in ', 360 | 'Is': ' is ', 361 | 'IsNot': ' is not ', 362 | 'Lt': '<', 363 | 'LtE': '<=', 364 | 'NotEq': '!=', 365 | 'NotIn': ' not in ', 366 | # Context operators. 367 | 'AugLoad': '', 368 | 'AugStore': '', 369 | 'Del': '', 370 | 'Load': '', 371 | 'Param': '', 372 | 'Store': '', 373 | # Unary operators. 374 | 'Invert': '~', 375 | 'Not': ' not ', 376 | 'UAdd': '+', 377 | 'USub': '-', 378 | } 379 | name = d.get(self.kind(node),'<%s>' % node.__class__.__name__) 380 | if strict: assert name,self.kind(node) 381 | return name -------------------------------------------------------------------------------- /src/traversers/astfulltraverser.py: -------------------------------------------------------------------------------- 1 | from src.traversers.astbasetraverser import AstBaseTraverser 2 | import ast 3 | class AstFullTraverser(AstBaseTraverser): 4 | 5 | ''' 6 | A super-fast tree traversal class. 7 | 8 | This class defines methods for *all* types of ast.Ast nodes, 9 | except nodes that typically don't need to be visited, such as nodes 10 | referenced by node.ctx and node.op fields. 11 | 12 | Subclasses are, of course, free to add visitors for, say, ast.Load, 13 | nodes. To make this work, subclasses must override visitors for 14 | ast.Node and ast.Attribute nodes so that they call:: 15 | 16 | self.visit(node.ctx) 17 | 18 | At present, such calls are commented out. Furthermore, if a visitor 19 | for ast.Load is provided, visitors for *all* kinds of nodes referenced 20 | by node.ctx fields must also be given. Such is the price of speed. 21 | ''' 22 | 23 | # def __init__(self): 24 | # AstBaseTraverser.__init__(self) 25 | 26 | def run(self,root): 27 | # py==lint: disable=W0221 28 | # Arguments number differs from overridden method. 29 | self.visit(root) 30 | 31 | def do_Bytes(self,node): 32 | pass # Python 3.x only. 33 | 34 | def do_Ellipsis(self,node): 35 | pass 36 | 37 | def do_Num(self,node): 38 | pass # Num(object n) # a number as a PyObject. 39 | 40 | def do_Str(self,node): 41 | pass # represents a string constant. 42 | 43 | def do_str(self, node): 44 | pass 45 | 46 | def do_Set(self, node): 47 | pass 48 | 49 | def do_arguments(self,node): 50 | for z in node.args: 51 | self.visit(z) 52 | for z in node.defaults: 53 | self.visit(z) 54 | 55 | # Python 3: 56 | # arg = (identifier arg, expr? annotation) 57 | 58 | def do_arg(self,node): 59 | if node.annotation: 60 | self.visit(node.annotation) 61 | 62 | def do_Attribute(self,node): 63 | self.visit(node.value) 64 | # self.visit(node.ctx) 65 | 66 | def do_BinOp (self,node): 67 | self.visit(node.left) 68 | # self.op_name(node.op) 69 | self.visit(node.right) 70 | 71 | def do_BoolOp (self,node): 72 | for z in node.values: 73 | self.visit(z) 74 | 75 | def do_Call(self,node): 76 | 77 | self.visit(node.func) 78 | for z in node.args: 79 | self.visit(z) 80 | for z in node.keywords: 81 | self.visit(z) 82 | if getattr(node,'starargs',None): 83 | self.visit(node.starargs) 84 | if getattr(node,'kwargs',None): 85 | self.visit(node.kwargs) 86 | 87 | def do_Compare(self,node): 88 | self.visit(node.left) 89 | for z in node.comparators: 90 | self.visit(z) 91 | 92 | def do_comprehension(self,node): 93 | self.visit(node.target) # A name. 94 | self.visit(node.iter) # An attribute. 95 | for z in node.ifs: 96 | self.visit(z) 97 | 98 | def do_Dict(self,node): 99 | for z in node.keys: 100 | self.visit(z) 101 | for z in node.values: 102 | self.visit(z) 103 | 104 | def do_Expr(self,node): 105 | self.visit(node.value) 106 | 107 | def do_Expression(self,node): 108 | '''An inner expression''' 109 | self.visit(node.body) 110 | 111 | def do_ExtSlice (self,node): 112 | for z in node.dims: 113 | self.visit(z) 114 | 115 | def do_GeneratorExp(self,node): 116 | self.visit(node.elt) 117 | for z in node.generators: 118 | self.visit(z) 119 | 120 | def do_IfExp (self,node): 121 | self.visit(node.body) 122 | self.visit(node.test) 123 | self.visit(node.orelse) 124 | 125 | def do_Index (self,node): 126 | self.visit(node.value) 127 | 128 | def do_keyword(self,node): 129 | self.visit(node.value) 130 | 131 | 132 | def do_List(self,node): 133 | for z in node.elts: 134 | self.visit(z) 135 | # self.visit(node.ctx) 136 | 137 | def do_ListComp(self,node): 138 | elt = self.visit(node.elt) 139 | for z in node.generators: 140 | self.visit(z) 141 | 142 | def do_Name(self,node): 143 | # self.visit(node.ctx) 144 | pass 145 | 146 | # Python 2.x only 147 | # Repr(expr value) 148 | def do_Repr(self,node): 149 | self.visit(node.value) 150 | 151 | def do_Slice (self,node): 152 | if getattr(node,'lower',None): 153 | self.visit(node.lower) 154 | if getattr(node,'upper',None): 155 | self.visit(node.upper) 156 | if getattr(node,'step',None): 157 | self.visit(node.step) 158 | 159 | def do_Subscript(self,node): 160 | self.visit(node.value) 161 | self.visit(node.slice) 162 | # self.visit(node.ctx) 163 | 164 | def do_Tuple(self,node): 165 | for z in node.elts: 166 | self.visit(z) 167 | # self.visit(node.ctx) 168 | 169 | def do_UnaryOp (self,node): 170 | # self.op_name(node.op) 171 | self.visit(node.operand) 172 | 173 | def do_alias (self,node): 174 | # self.visit(node.name) 175 | # if getattr(node,'asname') 176 | # self.visit(node.asname) 177 | pass 178 | 179 | def do_Assert(self,node): 180 | self.visit(node.test) 181 | if node.msg: 182 | self.visit(node.msg) 183 | 184 | def do_Assign(self,node): 185 | for z in node.targets: 186 | self.visit(z) 187 | self.visit(node.value) 188 | 189 | def do_AugAssign(self,node): 190 | self.visit(node.target) 191 | self.visit(node.value) 192 | 193 | def do_Break(self,tree): 194 | pass 195 | 196 | def do_ClassDef (self,node): 197 | for z in node.bases: 198 | self.visit(z) 199 | for z in node.body: 200 | self.visit(z) 201 | for z in node.decorator_list: 202 | self.visit(z) 203 | 204 | def do_Continue(self,tree): 205 | pass 206 | 207 | def do_Delete(self,node): 208 | for z in node.targets: 209 | self.visit(z) 210 | 211 | def do_ExceptHandler(self,node): 212 | if node.type: 213 | self.visit(node.type) 214 | if node.name and isinstance(node.name,ast.Name): 215 | self.visit(node.name) 216 | for z in node.body: 217 | self.visit(z) 218 | 219 | def do_Exec(self,node): 220 | self.visit(node.body) 221 | if getattr(node,'globals',None): 222 | self.visit(node.globals) 223 | if getattr(node,'locals',None): 224 | self.visit(node.locals) 225 | 226 | def do_For (self,node): 227 | self.visit(node.target) 228 | self.visit(node.iter) 229 | for z in node.body: 230 | self.visit(z) 231 | for z in node.orelse: 232 | self.visit(z) 233 | 234 | def do_FunctionDef (self,node): 235 | self.visit(node.args) 236 | for z in node.body: 237 | self.visit(z) 238 | for z in node.decorator_list: 239 | self.visit(z) 240 | 241 | def do_Global(self,node): 242 | pass 243 | 244 | def do_If(self,node): 245 | self.visit(node.test) 246 | for z in node.body: 247 | self.visit(z) 248 | for z in node.orelse: 249 | self.visit(z) 250 | 251 | def do_Import(self,node): 252 | pass 253 | 254 | 255 | def do_ImportFrom(self,node): 256 | # for z in node.names: 257 | # self.visit(z) 258 | pass 259 | 260 | def do_Lambda(self,node): 261 | 262 | self.visit(node.args) 263 | self.visit(node.body) 264 | 265 | def do_Module (self,node): 266 | for z in node.body: 267 | self.visit(z) 268 | 269 | def do_Pass(self,node): 270 | pass 271 | 272 | def do_Print(self,node): 273 | if getattr(node,'dest',None): 274 | self.visit(node.dest) 275 | for expr in node.values: 276 | self.visit(expr) 277 | 278 | def do_Raise(self,node): 279 | if getattr(node,'type',None): 280 | self.visit(node.type) 281 | if getattr(node,'inst',None): 282 | self.visit(node.inst) 283 | if getattr(node,'tback',None): 284 | self.visit(node.tback) 285 | 286 | def do_Return(self,node): 287 | if node.value: 288 | self.visit(node.value) 289 | 290 | def do_Try(self,node): 291 | for z in node.body: 292 | self.visit(z) 293 | for z in node.handlers: 294 | self.visit(z) 295 | for z in node.orelse: 296 | self.visit(z) 297 | for z in node.finalbody: 298 | self.visit(z) 299 | 300 | def do_TryExcept(self,node): 301 | for z in node.body: 302 | self.visit(z) 303 | for z in node.handlers: 304 | self.visit(z) 305 | for z in node.orelse: 306 | self.visit(z) 307 | 308 | def do_TryFinally(self,node): 309 | for z in node.body: 310 | self.visit(z) 311 | for z in node.finalbody: 312 | self.visit(z) 313 | 314 | def do_While (self,node): 315 | self.visit(node.test) 316 | for z in node.body: 317 | self.visit(z) 318 | for z in node.orelse: 319 | self.visit(z) 320 | 321 | def do_With (self,node): 322 | self.visit(node.context_expr) 323 | if node.optional_vars: 324 | self.visit(node.optional_vars) 325 | for z in node.body: 326 | self.visit(z) 327 | 328 | def do_Yield(self,node): 329 | if node.value: 330 | self.visit(node.value) 331 | 332 | def visit(self,node): 333 | '''Visit a *single* ast node. Visitors are responsible for visiting children!''' 334 | assert isinstance(node,ast.AST),node.__class__.__name__ 335 | method_name = 'do_' + node.__class__.__name__ 336 | method = getattr(self,method_name) 337 | return method(node) --------------------------------------------------------------------------------