├── README.md ├── color_log.py ├── dump_python.py ├── judge_injection.py ├── judge_injection.zip └── python_audit.pdf /README.md: -------------------------------------------------------------------------------- 1 | # python 代码审计工具readme 2 | 3 | python audit tool 4 | 5 | --- 6 | # 1,python的语法树 7 | 根据王垠的python静态分析工具[PySonar](https://github.com/yinwang0/pysonar2)得到静态语法树,这是一个庞大的dict结构,递归去除一些不必要的参数得到稍微简单点的一个语法树,以免影响后续分析。 8 | 简单说明一下一个函数的实现,首先是”type”:”FunctionDef”表明这一段代码是函数定义,函数中则会有args,表明函数的参数,lineno是代码所在的行,name是函数名。更详细的接口文档见 9 | https://greentreesnakes.readthedocs.org/en/latest/nodes.html 在这里包含了各个结构的定义,分析整个树就可以依照这个来实现。 10 | # 2,基本原理 11 | 12 | 基本实现原理就是寻找危险函数和可控参数,危险函数有eval,system,popen等系统函数,同时也有咱们自定义的包含这些危险函数的函数,如果这些函数的参数是可控的,就会认为这行代码是有注入风险的,那么这个函数也是有注入风险的. 13 | 14 | 对于可控参数,首先会从函数参数入手,认为函数参数是可控的,分析程序会根据前面的语法树去分析代码结构,发现有将函数参数赋值的操作,并且这个赋值是简单的转换,这些简单的转换包含如下类型: 15 | * (1) 简单的取属性,如get取值,对request单独处理,只认为GET,POST,FILES可控,其他request字段如META,user,session,url等都是不可控的。 16 | * (2) 字符串拼接,被拼接的字符串中包含可控参数,则认为赋值后的值也是可控的 17 | * (3) 列表解析式,如果列表解析式是基于某个可控因子进行迭代的,则认为赋值后的列表也是可控的 18 | * (4) 分片符取值,一般认为分片后的值也是可控的,当然这个也不绝对。 19 | * (5) 一般的函数处理过程:a,函数是常见的字符串操作函数(str,encode,strip等)或者是简单的未过滤函数;b,处理属性;c,如果经过了未知的函数处理则将赋值后的值从可控列表中去掉。 20 | * (6) 如果代码中的if中有exists,isdigit等带可控参数的的return语句,则将该参数从可控参数列表中去掉(if not os.path.isdir(parentPath):return None),或者将可控参数定死在某个范围之内的(if type not in ["R", "B"]:return HttpResponse("2")) 21 | 22 | # 3,使用方法 23 | 使用方法如下: 24 | $ python judge_injection.py -h 25 | Usage: judge_injection.py [options] 26 | 27 | Options: 28 | -h, --help show this help message and exit 29 | -d FILE_PATH, --dir=FILE_PATH 30 | files to be checked 31 | -c, --cmd cmd check 32 | -s, --sql sql check 33 | -a, --all cmd check and sql check 34 | -v, --verbose print all unsafe func 35 | 36 | 默认是对所有情况进行检查,包括代码注入,sql注入,命令注入,xss注入,危险的文件操作等 37 | 38 | # 4,代码结构 39 | judge_injection类负责分析文件,得到一个python语法树,提炼出代码中包含的函数语句,分析每一行代码在碰到函数的时候会调用look_up_arg函数,该函数会得出函数中的可变变量,如果可变变量在危险函数中出现了就认为该外层调用函数是危险的。 40 | 41 | # 5,详细设计文档 42 | 参见https://github.com/shengqi158/pyvulhunter/blob/master/python_audit.pdf 43 | 44 | 45 | -------------------------------------------------------------------------------- /color_log.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2010, 2011 Vinay Sajip. All rights reserved. 3 | # 4 | import logging 5 | import os 6 | 7 | class ColorizingStreamHandler(logging.StreamHandler): 8 | # color names to indices 9 | color_map = { 10 | 'black': 0, 11 | 'red': 1, 12 | 'green': 2, 13 | 'yellow': 3, 14 | 'blue': 4, 15 | 'magenta': 5, 16 | 'cyan': 6, 17 | 'white': 7, 18 | } 19 | 20 | #levels to (background, foreground, bold/intense) 21 | level_map = { 22 | #logging.DEBUG: (None, 'blue', False), 23 | logging.DEBUG: (None, 'white', False), 24 | logging.INFO: (None, 'blue', False), 25 | logging.WARNING: (None, 'yellow', False), 26 | logging.ERROR: (None, 'red', False), 27 | logging.CRITICAL: ('red', 'white', True), 28 | } 29 | csi = '\x1b[' 30 | reset = '\x1b[0m' 31 | 32 | @property 33 | def is_tty(self): 34 | isatty = getattr(self.stream, 'isatty', None) 35 | return isatty and isatty() 36 | 37 | def emit(self, record): 38 | try: 39 | message = self.format(record) 40 | stream = self.stream 41 | stream.write(message) 42 | stream.write(getattr(self, 'terminator', '\n')) 43 | self.flush() 44 | except (KeyboardInterrupt, SystemExit): 45 | raise 46 | except: 47 | self.handleError(record) 48 | 49 | def output_colorized(self, message): 50 | self.stream.write(message) 51 | 52 | def colorize(self, message, record): 53 | if record.levelno in self.level_map: 54 | bg, fg, bold = self.level_map[record.levelno] 55 | params = [] 56 | if bg in self.color_map: 57 | params.append(str(self.color_map[bg] + 40)) 58 | if fg in self.color_map: 59 | params.append(str(self.color_map[fg] + 30)) 60 | if bold: 61 | params.append('1') 62 | if params: 63 | message = ''.join((self.csi, ';'.join(params), 64 | 'm', message, self.reset)) 65 | return message 66 | 67 | def format(self, record): 68 | message = logging.StreamHandler.format(self, record) 69 | if self.is_tty: 70 | # Don't colorize any traceback 71 | parts = message.split('\n', 1) 72 | parts[0] = self.colorize(parts[0], record) 73 | message = '\n'.join(parts) 74 | return message 75 | 76 | def main(): 77 | root = logging.getLogger() 78 | root.setLevel(logging.DEBUG) 79 | root.addHandler(ColorizingStreamHandler()) 80 | logging.debug('DEBUG') 81 | logging.info('INFO') 82 | logging.warning('WARNING') 83 | logging.error('ERROR') 84 | logging.critical('CRITICAL') 85 | 86 | def init_log(log_level): 87 | """log_level = logging.NOTSET logging.DEBUG logging.INFO logging.ERROR logging.CRITICAL""" 88 | root = logging.getLogger() 89 | root.setLevel(log_level) 90 | stream_handler = ColorizingStreamHandler() 91 | formatter = logging.Formatter('[%(funcName)-10s %(lineno)d %(levelname)-8s] %(message)s') 92 | #logging.StreamHandler.setFormatter(formatter) 93 | 94 | stream_handler.setFormatter(formatter) 95 | #root.addHandler(ColorizingStreamHandler()) 96 | root.addHandler(stream_handler) 97 | return root 98 | 99 | if __name__ == '__main__': 100 | # main() 101 | logger = init_log(logging.DEBUG) 102 | logger.debug('DEBUG..........................') 103 | logger.info('INFO----------------------------') 104 | logger.warning('WARNING======================') 105 | logger.error('ERROR**************************') 106 | logger.error('ERROR**************************%r' %({'value':'111'})) 107 | #logger.error('ERROR**************************', {'value':'111'}) 108 | -------------------------------------------------------------------------------- /dump_python.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import re 3 | import sys 4 | import codecs 5 | 6 | from json import JSONEncoder 7 | from ast import * 8 | 9 | 10 | # Is it Python 3? 11 | is_python3 = hasattr(sys.version_info, 'major') and (sys.version_info.major == 3) 12 | 13 | 14 | class AstEncoder(JSONEncoder): 15 | def default(self, o): 16 | if hasattr(o, '__dict__'): 17 | d = o.__dict__ 18 | # workaround: decode strings if it's not Python3 code 19 | if not is_python3: 20 | for k in d: 21 | if isinstance(d[k], str): 22 | if k == 's': 23 | d[k] = lines[d['start']:d['end']] 24 | else: 25 | d[k] = d[k].decode(enc) 26 | d['type'] = o.__class__.__name__ 27 | return d 28 | else: 29 | return str(o) 30 | 31 | 32 | enc = 'latin1' 33 | lines = '' 34 | 35 | def parse_dump(filename, output, end_mark): 36 | try: 37 | if is_python3: 38 | encoder = AstEncoder() 39 | else: 40 | encoder = AstEncoder(encoding=enc) 41 | 42 | tree = parse_file(filename) 43 | encoded = encoder.encode(tree) 44 | f = open(output, "w") 45 | f.write(encoded) 46 | f.close() 47 | finally: 48 | # write marker file to signal write end 49 | f = open(end_mark, "w") 50 | f.close() 51 | 52 | def parse_json(filename): 53 | #print 'parse_json' 54 | try: 55 | if is_python3: 56 | encoder = AstEncoder() 57 | else: 58 | encoder = AstEncoder(encoding=enc) 59 | 60 | tree = parse_file(filename) 61 | encoded = encoder.encode(tree) 62 | return encoded 63 | except: 64 | return "" 65 | 66 | 67 | def parse_file(filename): 68 | # print 'parse file' 69 | global enc, lines 70 | enc, enc_len = detect_encoding(filename) 71 | f = codecs.open(filename, 'r', enc) 72 | lines = f.read() 73 | 74 | # remove BOM 75 | lines = re.sub(u'\ufeff', ' ', lines) 76 | 77 | # replace the encoding decl by spaces to fool python parser 78 | # otherwise you get 'encoding decl in unicode string' syntax error 79 | # print('enc:', enc, 'enc_len', enc_len) 80 | if enc_len > 0: 81 | lines = re.sub('#.*coding\s*[:=]\s*[\w\d\-]+', '#' + ' ' * (enc_len-1), lines) 82 | 83 | f.close() 84 | return parse_string(lines, filename) 85 | 86 | 87 | def parse_string(string, filename=None): 88 | # print 'parse string' 89 | tree = ast.parse(string) 90 | # print 'before improve' 91 | improve_ast(tree, string) 92 | # print 'after improve' 93 | if filename: 94 | tree.filename = filename 95 | return tree 96 | 97 | 98 | # short function for experiments 99 | def p(filename): 100 | parse_dump(filename, "json1", "end1") 101 | 102 | 103 | def detect_encoding(path): 104 | fin = open(path, 'rb') 105 | prefix = str(fin.read(80)) 106 | encs = re.findall('#.*coding\s*[:=]\s*([\w\d\-]+)', prefix) 107 | decl = re.findall('#.*coding\s*[:=]\s*[\w\d\-]+', prefix) 108 | 109 | if encs: 110 | enc1 = encs[0] 111 | enc_len = len(decl[0]) 112 | try: 113 | info = codecs.lookup(enc1) 114 | # print('lookedup: ', info) 115 | except LookupError: 116 | # print('encoding not exist: ' + enc1) 117 | return 'latin1', enc_len 118 | return enc1, enc_len 119 | else: 120 | return 'latin1', -1 121 | 122 | 123 | #------------------------------------------------------------- 124 | # improvements to the AST 125 | #------------------------------------------------------------- 126 | def improve_ast(node, s): 127 | build_index_map(s) 128 | improve_node(node, s) 129 | 130 | 131 | # build global table 'idxmap' for lineno <-> index oonversion 132 | def build_index_map(s): 133 | global line_starts 134 | idx = 0 135 | line_starts = [0] 136 | while idx < len(s): 137 | if s[idx] == '\n': 138 | line_starts.append(idx + 1) 139 | idx += 1 140 | 141 | 142 | # convert (line, col) to offset index 143 | def map_idx(line, col): 144 | return line_starts[line - 1] + col 145 | 146 | 147 | # convert offset index into (line, col) 148 | def map_line_col(idx): 149 | line = 0 150 | for start in line_starts: 151 | if idx < start: 152 | break 153 | line += 1 154 | col = idx - line_starts[line - 1] 155 | return (line, col) 156 | 157 | 158 | def improve_node(node, s): 159 | if isinstance(node, list): 160 | for n in node: 161 | improve_node(n, s) 162 | 163 | elif isinstance(node, AST): 164 | 165 | find_start(node, s) 166 | find_end(node, s) 167 | add_missing_names(node, s) 168 | 169 | for f in node_fields(node): 170 | improve_node(f, s) 171 | 172 | 173 | def find_start(node, s): 174 | ret = None # default value 175 | 176 | if hasattr(node, 'start'): 177 | ret = node.start 178 | 179 | elif isinstance(node, list): 180 | if node != []: 181 | ret = find_start(node[0], s) 182 | 183 | elif isinstance(node, Module): 184 | if node.body != []: 185 | ret = find_start(node.body[0], s) 186 | 187 | elif isinstance(node, BinOp): 188 | leftstart = find_start(node.left, s) 189 | if leftstart != None: 190 | ret = leftstart 191 | else: 192 | ret = map_idx(node.lineno, node.col_offset) 193 | 194 | elif hasattr(node, 'lineno'): 195 | if node.col_offset >= 0: 196 | ret = map_idx(node.lineno, node.col_offset) 197 | else: # special case for """ strings 198 | i = map_idx(node.lineno, node.col_offset) 199 | while i > 0 and i + 2 < len(s) and s[i:i + 3] != '"""' and s[i:i + 3] != "'''": 200 | i -= 1 201 | ret = i 202 | else: 203 | return None 204 | 205 | if ret == None and hasattr(node, 'lineno'): 206 | raise TypeError("got None for node that has lineno", node) 207 | 208 | if isinstance(node, AST) and ret != None: 209 | node.start = ret 210 | 211 | return ret 212 | 213 | 214 | def find_end(node, s): 215 | the_end = None 216 | 217 | if hasattr(node, 'end'): 218 | return node.end 219 | 220 | elif isinstance(node, list): 221 | if node != []: 222 | the_end = find_end(node[-1], s) 223 | 224 | elif isinstance(node, Module): 225 | if node.body != []: 226 | the_end = find_end(node.body[-1], s) 227 | 228 | elif isinstance(node, Expr): 229 | the_end = find_end(node.value, s) 230 | 231 | elif isinstance(node, Str): 232 | i = find_start(node, s) 233 | while s[i] != '"' and s[i] != "'": 234 | i += 1 235 | 236 | if i + 2 < len(s) and s[i:i + 3] == '"""': 237 | q = '"""' 238 | i += 3 239 | elif i + 2 < len(s) and s[i:i + 3] == "'''": 240 | q = "'''" 241 | i += 3 242 | elif s[i] == '"': 243 | q = '"' 244 | i += 1 245 | elif s[i] == "'": 246 | q = "'" 247 | i += 1 248 | else: 249 | print("illegal quote:", i, s[i]) 250 | q = '' 251 | 252 | if q != '': 253 | the_end = end_seq(s, q, i) 254 | 255 | elif isinstance(node, Name): 256 | the_end = find_start(node, s) + len(node.id) 257 | 258 | elif isinstance(node, Attribute): 259 | the_end = end_seq(s, node.attr, find_end(node.value, s)) 260 | 261 | elif isinstance(node, FunctionDef): 262 | the_end = find_end(node.body, s) 263 | 264 | elif isinstance(node, Lambda): 265 | the_end = find_end(node.body, s) 266 | 267 | elif isinstance(node, ClassDef): 268 | the_end = find_end(node.body, s) 269 | 270 | # print will be a Call in Python 3 271 | elif not is_python3 and isinstance(node, Print): 272 | the_end = start_seq(s, '\n', find_start(node, s)) 273 | 274 | elif isinstance(node, Call): 275 | start = find_end(node.func, s) 276 | if start != None: 277 | the_end = match_paren(s, '(', ')', start) 278 | 279 | elif isinstance(node, Yield): 280 | the_end = find_end(node.value, s) 281 | 282 | elif isinstance(node, Return): 283 | if node.value != None: 284 | the_end = find_end(node.value, s) 285 | else: 286 | the_end = find_start(node, s) + len('return') 287 | 288 | elif (isinstance(node, For) or 289 | isinstance(node, While) or 290 | isinstance(node, If) or 291 | isinstance(node, IfExp)): 292 | if node.orelse != []: 293 | the_end = find_end(node.orelse, s) 294 | else: 295 | the_end = find_end(node.body, s) 296 | 297 | elif isinstance(node, Assign) or isinstance(node, AugAssign): 298 | the_end = find_end(node.value, s) 299 | 300 | elif isinstance(node, BinOp): 301 | the_end = find_end(node.right, s) 302 | 303 | elif isinstance(node, BoolOp): 304 | the_end = find_end(node.values[-1], s) 305 | 306 | elif isinstance(node, Compare): 307 | the_end = find_end(node.comparators[-1], s) 308 | 309 | elif isinstance(node, UnaryOp): 310 | the_end = find_end(node.operand, s) 311 | 312 | elif isinstance(node, Num): 313 | the_end = find_start(node, s) + len(str(node.n)) 314 | 315 | elif isinstance(node, List): 316 | the_end = match_paren(s, '[', ']', find_start(node, s)); 317 | 318 | elif isinstance(node, Subscript): 319 | the_end = match_paren(s, '[', ']', find_start(node, s)); 320 | 321 | elif isinstance(node, Tuple): 322 | if node.elts != []: 323 | the_end = find_end(node.elts[-1], s) 324 | 325 | elif isinstance(node, Dict): 326 | the_end = match_paren(s, '{', '}', find_start(node, s)); 327 | 328 | elif ((not is_python3 and isinstance(node, TryExcept)) or 329 | (is_python3 and isinstance(node, Try))): 330 | if node.orelse != []: 331 | the_end = find_end(node.orelse, s) 332 | elif node.handlers != []: 333 | the_end = find_end(node.handlers, s) 334 | else: 335 | the_end = find_end(node.body, s) 336 | 337 | elif isinstance(node, ExceptHandler): 338 | the_end = find_end(node.body, s) 339 | 340 | elif isinstance(node, Pass): 341 | the_end = find_start(node, s) + len('pass') 342 | 343 | elif isinstance(node, Break): 344 | the_end = find_start(node, s) + len('break') 345 | 346 | elif isinstance(node, Continue): 347 | the_end = find_start(node, s) + len('continue') 348 | 349 | elif isinstance(node, Global): 350 | the_end = start_seq(s, '\n', find_start(node, s)) 351 | 352 | elif isinstance(node, Import): 353 | the_end = find_start(node, s) + len('import') 354 | 355 | elif isinstance(node, ImportFrom): 356 | the_end = find_start(node, s) + len('from') 357 | 358 | else: # can't determine node end, set to 3 chars after start 359 | start = find_start(node, s) 360 | if start != None: 361 | the_end = start + 3 362 | 363 | if isinstance(node, AST) and the_end != None: 364 | node.end = the_end 365 | 366 | return the_end 367 | 368 | 369 | def add_missing_names(node, s): 370 | if hasattr(node, 'extra_attr'): 371 | return 372 | 373 | if isinstance(node, list): 374 | for n in node: 375 | add_missing_names(n, s) 376 | 377 | elif isinstance(node, ClassDef): 378 | head = find_start(node, s) 379 | start = s.find("class", head) + len("class") 380 | if start != None: 381 | node.name_node = str_to_name(s, start) 382 | node._fields += ('name_node',) 383 | 384 | elif isinstance(node, FunctionDef): 385 | # skip to "def" because it may contain decorators like @property 386 | head = find_start(node, s) 387 | start = s.find("def", head) + len("def") 388 | if start != None: 389 | node.name_node = str_to_name(s, start) 390 | node._fields += ('name_node',) 391 | 392 | # keyword_start = find_start(node, s) 393 | # node.keyword_node = str_to_name(s, keyword_start) 394 | # node._fields += ('keyword_node',) 395 | 396 | if node.args.vararg != None: 397 | if len(node.args.args) > 0: 398 | vstart = find_end(node.args.args[-1], s) 399 | else: 400 | vstart = find_end(node.name_node, s) 401 | if vstart != None: 402 | vname = str_to_name(s, vstart) 403 | node.vararg_name = vname 404 | else: 405 | node.vararg_name = None 406 | node._fields += ('vararg_name',) 407 | 408 | if node.args.kwarg != None: 409 | if len(node.args.args) > 0: 410 | kstart = find_end(node.args.args[-1], s) 411 | else: 412 | kstart = find_end(node.vararg_name, s) 413 | if kstart: 414 | kname = str_to_name(s, kstart) 415 | node.kwarg_name = kname 416 | else: 417 | node.kwarg_name = None 418 | node._fields += ('kwarg_name',) 419 | 420 | elif isinstance(node, Attribute): 421 | start = find_end(node.value, s) 422 | if start is not None: 423 | name = str_to_name(s, start) 424 | node.attr_name = name 425 | node._fields = ('value', 'attr_name') # remove attr for node size accuracy 426 | 427 | elif isinstance(node, Compare): 428 | start = find_start(node, s) 429 | if start is not None: 430 | node.opsName = convert_ops(node.ops, s, start) 431 | node._fields += ('opsName',) 432 | 433 | elif (isinstance(node, BoolOp) or 434 | isinstance(node, BinOp) or 435 | isinstance(node, UnaryOp) or 436 | isinstance(node, AugAssign)): 437 | if hasattr(node, 'left'): 438 | start = find_end(node.left, s) 439 | else: 440 | start = find_start(node, s) 441 | if start is not None: 442 | ops = convert_ops([node.op], s, start) 443 | else: 444 | ops = [] 445 | if ops != []: 446 | node.op_node = ops[0] 447 | node._fields += ('op_node',) 448 | 449 | elif isinstance(node, Num): 450 | if isinstance(node.n, int) or (not is_python3 and isinstance(node.n, long)): 451 | type = 'int' 452 | node.n = str(node.n) 453 | elif isinstance(node.n, float): 454 | type = 'float' 455 | node.n = str(node.n) 456 | elif isinstance(node.n, complex): 457 | type = 'complex' 458 | node.real = node.n.real 459 | node.imag = node.n.imag 460 | node._fields += ('real', 'imag') 461 | 462 | node.num_type = type 463 | node._fields += ('num_type',) 464 | 465 | node.extra_attr = True 466 | 467 | 468 | #------------------------------------------------------------- 469 | # utilities used by improve AST functions 470 | #------------------------------------------------------------- 471 | 472 | # find a sequence in a string s, returning the start point 473 | def start_seq(s, pat, start): 474 | try: 475 | return s.index(pat, start) 476 | except ValueError: 477 | return len(s) 478 | 479 | 480 | # find a sequence in a string s, returning the end point 481 | def end_seq(s, pat, start): 482 | try: 483 | return s.index(pat, start) + len(pat) 484 | except ValueError: 485 | return len(s) 486 | 487 | 488 | # find matching close paren from start 489 | def match_paren(s, open, close, start): 490 | while start < len(s) and s[start] != open: 491 | start += 1 492 | if start >= len(s): 493 | return len(s) 494 | 495 | left = 1 496 | i = start + 1 497 | while left > 0 and i < len(s): 498 | if s[i] == open: 499 | left += 1 500 | elif s[i] == close: 501 | left -= 1 502 | i += 1 503 | return i 504 | 505 | 506 | # convert string to Name 507 | def str_to_name(s, start): 508 | i = start; 509 | while i < len(s) and not is_alpha(s[i]): 510 | i += 1 511 | name_start = i 512 | 513 | ret = [] 514 | while i < len(s) and is_alpha(s[i]): 515 | ret.append(s[i]) 516 | i += 1 517 | name_end = i 518 | 519 | id1 = ''.join(ret) 520 | if id1 == '': 521 | return None 522 | else: 523 | name = Name(id1, None) 524 | name.start = name_start 525 | name.end = name_end 526 | name.lineno, name.col_offset = map_line_col(name_start) 527 | return name 528 | 529 | 530 | def convert_ops(ops, s, start): 531 | syms = [] 532 | for op in ops: 533 | if type(op) in ops_map: 534 | syms.append(ops_map[type(op)]) 535 | else: 536 | print("[WARNING] operator %s is missing from ops_map, " 537 | "please report the bug on GitHub" % op) 538 | 539 | i = start 540 | j = 0 541 | ret = [] 542 | while i < len(s) and j < len(syms): 543 | oplen = len(syms[j]) 544 | if s[i:i + oplen] == syms[j]: 545 | op_node = Name(syms[j], None) 546 | op_node.start = i 547 | op_node.end = i + oplen 548 | op_node.lineno, op_node.col_offset = map_line_col(i) 549 | ret.append(op_node) 550 | j += 1 551 | i = op_node.end 552 | else: 553 | i += 1 554 | return ret 555 | 556 | 557 | # lookup table for operators for convert_ops 558 | ops_map = { 559 | # compare: 560 | Eq: '==', 561 | NotEq: '!=', 562 | LtE: '<=', 563 | Lt: '<', 564 | GtE: '>=', 565 | Gt: '>', 566 | NotIn: 'not in', 567 | In: 'in', 568 | IsNot: 'is not', 569 | Is: 'is', 570 | 571 | # BoolOp 572 | Or: 'or', 573 | And: 'and', 574 | Not: 'not', 575 | Invert: '~', 576 | 577 | # bit operators 578 | BitOr: '|', 579 | BitAnd: '&', 580 | BitXor: '^', 581 | RShift: '>>', 582 | LShift: '<<', 583 | 584 | 585 | # BinOp 586 | Add: '+', 587 | Sub: '-', 588 | Mult: '*', 589 | Div: '/', 590 | FloorDiv: '//', 591 | Mod: '%', 592 | Pow: '**', 593 | 594 | # UnaryOp 595 | USub: '-', 596 | UAdd: '+', 597 | } 598 | 599 | 600 | # get list of fields from a node 601 | def node_fields(node): 602 | ret = [] 603 | for field in node._fields: 604 | if field != 'ctx' and hasattr(node, field): 605 | ret.append(getattr(node, field)) 606 | return ret 607 | 608 | 609 | # get full source text where the node is from 610 | def node_source(node): 611 | if hasattr(node, 'node_source'): 612 | return node.node_source 613 | else: 614 | return None 615 | 616 | 617 | # utility for getting exact source code part of the node 618 | def src(node): 619 | return node.node_source[node.start: node.end] 620 | 621 | 622 | def start(node): 623 | if hasattr(node, 'start'): 624 | return node.start 625 | else: 626 | return 0 627 | 628 | 629 | def end(node): 630 | if hasattr(node, 'end'): 631 | return node.end 632 | else: 633 | return None 634 | 635 | 636 | def is_alpha(c): 637 | return (c == '_' 638 | or ('0' <= c <= '9') 639 | or ('a' <= c <= 'z') 640 | or ('A' <= c <= 'Z')) 641 | 642 | 643 | # p('/Users/yinwang/Code/django/tests/invalid_models/invalid_models/models.py') 644 | # p('/Users/yinwang/Dropbox/prog/pysonar2/tests/test-unicode/test1.py') 645 | # p('/Users/yinwang/Code/cpython/Lib/lib2to3/tests/data/bom.py') 646 | # p('/Users/yinwang/Code/cpython/Lib/test/test_decimal.py') 647 | # p('/Users/yinwang/Code/cpython/Lib/test/test_pep3131.py') 648 | # p('/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/tarfile.py') 649 | # p('/Users/yinwang/Code/cpython/Lib/lib2to3/tests/data/false_encoding.py') 650 | # p('/System/Library/Frameworks/Python.framework/Versions/2.5/lib/python2.5/test/test_marshal.py') 651 | # p('/System/Library/Frameworks/Python.framework/Versions/2.5/lib/python2.5/lib-tk/Tix.py') 652 | 653 | #p('libssh2_login_test.py.bak') 654 | #p('arg.py') 655 | -------------------------------------------------------------------------------- /judge_injection.py: -------------------------------------------------------------------------------- 1 | #!env python 2 | #coding=utf-8 3 | # 4 | # 5 | # Created Time: Fri 21 Nov 2014 10:49:03 AM GMT-8 6 | # 7 | # FileName: judge_injection.py 8 | # 9 | # Description: 10 | # 11 | # ChangeLog: 12 | 13 | import dump_python 14 | import logging 15 | import color_log 16 | import json 17 | import os 18 | import re 19 | import traceback 20 | import sys 21 | import subprocess 22 | from optparse import OptionParser 23 | from collections import OrderedDict 24 | 25 | #logger = color_log.init_log(logging.DEBUG) 26 | #logger = color_log.init_log(logging.INFO) 27 | #logger = color_log.init_log(logging.WARNING) 28 | logger = color_log.init_log(logging.ERROR) 29 | DEBUG = False 30 | args_ori = set([]) 31 | is_arg_in = False 32 | is_arg_return_op = False 33 | UNSAFE_FUNCS = ["os.system", "os.popen", "os.spawnl",'os.spawnle','os.spawnlp','os.spawnlpe',\ 34 | 'os.spawnv','os.spawnve','os.spawnvp','os.spawnvpe','os.execv','os.execve','os.execvp',\ 35 | 'os.execvpe','os.open', 'os.popen2','os.popen3', 'os.popen4','os.putenv', 'os.rename',\ 36 | 'os.renames','call','Popen','Popen2','getoutput','getstatusoutput','eval','open','file'] 37 | FILE_UNSAFE_FUNCS = set() 38 | FILE_SQL_UNSAFE_FUNCS = set() 39 | UNTREATED_FUNS = set(['open','readline','read','readlines','next','query2dict']) 40 | STR_FUNCS = ['str','unicode','encode','strip','rstrip','lstrip','lower','upper','split','splitlines', 'replace','join'] 41 | OTHER_UNSAFE_FUNC = ['HttpResponse','os.remove','os.rmdir','os.removedirs','os.rmtree','os.unlink','pickle.loads'] 42 | 43 | SAFE_FUNCS = ['safe_eval'] 44 | SQL_FUNCS = ['execute', 'raw'] 45 | CMD_COUNT = 0 46 | used_import_files = [] 47 | import_func_all = {} 48 | REQUEST_VAR = ['GET', 'POST', 'FILES', 'COOKIES', 'REQUEST'] 49 | 50 | 51 | 52 | class judge_injection(object): 53 | """根据语法树自动判断注入攻击""" 54 | def __init__(self, filename, check_type): 55 | try: 56 | self.tree = dump_python.parse_json(filename) 57 | except Exception,e: 58 | self.tree = "{}" 59 | print e 60 | # print 'self.tree',self.tree 61 | self.tree = json.loads(self.tree) 62 | rec_decrease_tree(self.tree) 63 | if DEBUG: 64 | # rec_decrease_tree(self.tree) 65 | try: 66 | fd = open(filename+".json", 'w') 67 | json.dump(self.tree, fd) 68 | fd.flush() 69 | fd.close() 70 | except: 71 | pass 72 | self.filename = self.tree.get("filename") 73 | self.start = self.tree.get("start") 74 | self.body = self.tree.get("body") 75 | self.func = {} 76 | self.func_lines = {}#获取一个函数的执行代码 77 | self.check_type = check_type 78 | with open(self.filename, 'r') as fd: 79 | self.lines = fd.readlines() 80 | self.unsafe_func = set()#记录本文件中自己的危险函数 81 | self.untreated_func = set()#记录那些函数参数到返回值是可控的函数 82 | self.record_unsafe_func = OrderedDict({}) #用于打印危险函数 83 | self.record_other_unsafe_func = OrderedDict({}) #用于打印危险函数 84 | self.record_param = {} 85 | self.import_module = {} 86 | self.import_func = {} 87 | self.arg = {}#主要用于获取类的参数 88 | logger.debug("filename:%s" %(self.filename)) 89 | 90 | 91 | def get_risk_func(self): 92 | """用于输入系统危险函数, not used any more""" 93 | funcs = ["os.system", "os.popen", "subprocess.call", "subprocess.Popen",\ 94 | "commands.getoutput", "commands.getstatusoutput","pickle.loads"] 95 | funcs = ["system", "popen", "call", "Popen", "getoutput", "getstatusoutput", \ 96 | "eval", "spawnl", 'popen2', 'popen3', 'popen4'] 97 | return funcs 98 | 99 | def get_func_objects(self, body, class_name=None): 100 | """获取语法树中的函数结构们""" 101 | for obj in body:#代码行 102 | if obj.get("type") == "FunctionDef": 103 | if class_name: 104 | key = obj.get('name')+":"+class_name 105 | else: 106 | key = obj.get('name')+":" 107 | self.func.setdefault(key, obj) 108 | logger.debug("func:%r" %(obj)) 109 | elif obj.get('type') == 'ClassDef': 110 | self.get_func_objects(obj.get('body'), obj.get('name')) 111 | 112 | return 113 | 114 | def get_import_modules(self, body): 115 | """获取文件的import模块""" 116 | pythonpaths = [] 117 | if os.environ.get('PYTHONPATH'): 118 | pythonpaths = [path for path in os.environ.get('PYTHONPATH').split(":") if 'python' not in path] 119 | #根据是否包含python是否是系统文件虽不是很科学还是可用 120 | pythonpaths.insert(0, os.path.dirname(self.filename)) 121 | for obj in body: 122 | if obj.get('type') == 'Import': 123 | for module in obj.get('names'): 124 | module_py = module.get('name') + '.py' 125 | for path in pythonpaths: 126 | if os.path.isfile(path + '/' + module_py): 127 | self.import_module.setdefault(path + '/' + module_py,{self.filename:module.get('asname')}) 128 | if obj.get('type') == 'ImportFrom': 129 | 130 | for path in pythonpaths: 131 | if obj.get('module'): 132 | module_path = path+'/'+obj.get('module').replace('.','/') 133 | else: 134 | module_path = '' 135 | if os.path.isfile(module_path + '.py'): 136 | self.import_module.setdefault(module_path + '.py') 137 | for func_name in obj.get('names'): 138 | self.import_func.setdefault(func_name.get('asname'),func_name.get('name')) 139 | elif os.path.isdir(module_path): 140 | for name in obj.get('names'): 141 | module_py = module_path + '/' + name.get('name') + '.py' 142 | if os.path.isfile(module_py): 143 | self.import_module.setdefault(module_py) 144 | if obj.get('type') == 'FunctionDef': 145 | self.get_import_modules(obj.get('body')) 146 | 147 | 148 | 149 | def get_func_lines(self, func, func_name): 150 | """获取函数的执行的行,找到func""" 151 | #if "body" in func: 152 | if isinstance(func, dict) and 'body' in func: 153 | lines = func.get('body') 154 | elif isinstance(func, list): 155 | lines = func 156 | elif isinstance(func, dict) and func.get('type') == 'Call': 157 | lines = [func] 158 | else: 159 | lines = [] 160 | 161 | for line in lines: 162 | ast_body = line.get('body') 163 | ast_orelse = line.get('orelse') 164 | ast_handlers = line.get('handlers') 165 | ast_test = line.get('test') 166 | ast_args = line.get('args') 167 | # print "line:",line 168 | if "value" in line and line.get('value') and "func" in line.get("value"): 169 | self.func_lines[func_name].append(line) 170 | continue 171 | elif line.get('type') == 'Call': 172 | self.func_lines[func_name].append(line) 173 | continue 174 | 175 | if ast_body: 176 | self.get_func_lines(ast_body, func_name) 177 | if ast_orelse: 178 | self.get_func_lines(ast_orelse, func_name) 179 | if ast_handlers: 180 | self.get_func_lines(ast_handlers, func_name) 181 | if ast_test and ast_test.get('type') == 'Compare': 182 | if ast_test.get('comparators'): 183 | self.get_func_lines(ast_test.get('comparators'), func_name) 184 | if ast_test.get('left'): 185 | self.get_func_lines(ast_test.get('left'), func_name) 186 | if ast_test and ast_test.get('type') == 'BoolOp': 187 | for value in ast_test.get('values'): 188 | if value.get('comparators'): 189 | self.get_func_lines(value.get('comparators'), func_name) 190 | if value.get('left'): 191 | self.get_func_lines(value.get('left'), func_name) 192 | 193 | if ast_args: 194 | self.get_func_lines(ast_args, func_name) 195 | 196 | 197 | return 198 | 199 | def parse_func(self, func, class_name, analyse_all): 200 | global leafs 201 | global args_ori 202 | global is_arg_in 203 | global CMD_COUNT 204 | global is_arg_return_op 205 | is_arg_return_op = False 206 | arg_leafs = [] 207 | func_name = func.get("name") 208 | logger.debug("function_name:%s" %(func_name)) 209 | args_ori = set([arg.get("id") for arg in func.get('args').get("args")]) #arg.id 210 | if class_name and self.arg.get(class_name): 211 | arg_tmp = set(self.arg.get(class_name)) 212 | args_ori = args_ori|arg_tmp 213 | logger.debug("args:%s" %str(args_ori)) 214 | self.func_lines.setdefault(func_name, []) 215 | self.get_func_lines(func, func_name) 216 | lines = self.func_lines[func_name] 217 | logger.debug("func_lines:%r" %(lines)) 218 | # if analyse_all: 219 | look_up_arg(func, args_ori, arg_leafs,func_name, self.import_func, self.check_type.get('verbose')) 220 | if func_name == '__init__': 221 | self.arg.setdefault(class_name, args_ori) 222 | # self.record_param.setdefault(func_name, args_ori) 223 | self.record_param[func_name] = args_ori 224 | if not analyse_all: 225 | print 'func,record_param:', func_name,self.record_param.get(func_name) 226 | # is_arg_return(func, args_ori) 227 | # print 'is_arg_return_op:',is_arg_return_op 228 | # if is_arg_in and not is_arg_return_op: 229 | # if func_name not in ("__init__"): 230 | # FILE_UNSAFE_FUNCS.add(func_name) 231 | # print "func_lines:", lines 232 | # print "func_:", func 233 | 234 | #对所有有函数执行的语句做进一步处理 235 | for line in lines: 236 | #print "all:%r" %(line) 237 | # print "*"*20 238 | arg_leafs = [] 239 | is_arg_in = False 240 | value = line.get("value") 241 | lineno = line.get("lineno") 242 | if (value and value.get("type") == "Call") or (line and line.get('type') == 'Call'): 243 | logger.debug("value:%r" %(value)) 244 | line_func = value.get("func") if value else line.get('func') 245 | line_func = value if value and value.get('type')=='Call' else line 246 | value_args = value.get('args') if value else line.get('args') 247 | value = value if value else line 248 | func_ids = [] 249 | rec_get_func_ids(line_func, func_ids) 250 | func_ids = set(func_ids) 251 | rec_find_args(value, arg_leafs) 252 | 253 | logger.info("arg_leafs:%r" %(arg_leafs)) 254 | logger.info("func_ids:%r" %(func_ids)) 255 | logger.info("record_param:%r" %(self.record_param.get(func_name))) 256 | # if analyse_all: 257 | # look_up_arg(func, args_ori, arg_leafs,func_name) 258 | # print "UNTREATED_FUNS", UNTREATED_FUNS 259 | if self.check_type.get('cmd') and func_ids and (func_ids&((set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS)))) and arg_leafs: 260 | if self.check_type.get('verbose') and arg_leafs: 261 | print "CMD--FILE:%s,FUNCTION:%s,LINE:%s" %(self.filename, func_name, lineno ) 262 | if set(arg_leafs)&set(self.record_param.get(func_name)): 263 | if not is_arg_return_op and func_name not in ("__init__"): 264 | FILE_UNSAFE_FUNCS.add(func_name) 265 | self.record_unsafe_func.setdefault(lineno, {'func_name':func_name, 'args':args_ori, 'func_ids':func_ids,'arg_leafs':arg_leafs }) 266 | CMD_COUNT = CMD_COUNT + 1 267 | if self.check_type.get('cmd') and func_ids and (func_ids&(set(OTHER_UNSAFE_FUNC))) and arg_leafs: 268 | if set(arg_leafs)&set(self.record_param.get(func_name)): 269 | self.record_other_unsafe_func.setdefault(lineno, {'func_name':func_name, 'args':args_ori, 'func_ids':func_ids,'arg_leafs':arg_leafs }, ) 270 | 271 | 272 | if self.check_type.get('sql') and func_ids and (func_ids&((set(['execute','raw'])|FILE_SQL_UNSAFE_FUNCS))) and arg_leafs: 273 | if self.check_type.get('verbose') and arg_leafs: 274 | print "SQL--FILE:%s,FUNCTION:%s,LINE:%s" %(self.filename, func_name, lineno ) 275 | if len(arg_leafs) != 2 and set(arg_leafs)&set(self.record_param.get(func_name)):#execute,raw在两个参数的情况下django做了过滤 276 | print self.lines[lineno - 1] 277 | FILE_SQL_UNSAFE_FUNCS.add(func_name) 278 | self.record_unsafe_func.setdefault(lineno, {'func_name':func_name, 'args':args_ori, 'func_ids':func_ids,'arg_leafs':arg_leafs }, ) 279 | # print "cmd_count:",CMD_COUNT 280 | 281 | def parse_py(self): 282 | self.get_func_objects(self.body) 283 | 284 | for key, func in self.func.iteritems(): 285 | self.parse_func(func, key.split(":")[1], True) 286 | # print "file_unsafe_func:", FILE_UNSAFE_FUNCS 287 | # print "*****"*50 288 | for key, func in self.func.iteritems(): 289 | self.parse_func(func, key.split(":")[1], False) 290 | for key, func in self.func.iteritems(): 291 | self.parse_func(func, key.split(":")[1], False) 292 | 293 | # print 'COUNT',CMD_COUNT 294 | 295 | 296 | # def chained_relation(record_unsafe_func): 297 | # for key, value in record_unsafe_func.iteritems(): 298 | 299 | 300 | def record_all_func(self): 301 | from copy import deepcopy 302 | record = {} 303 | tmp_record_unsafe_func = deepcopy(self.record_unsafe_func) 304 | for key, value in tmp_record_unsafe_func.iteritems(): 305 | for func_id in value.get('func_ids'): 306 | for func in tmp_record_unsafe_func.values(): 307 | if func_id in func.get('func_name'): 308 | record.setdefault(key, [value.get('func_name'),func_id,str(func.get('func_ids'))]) 309 | 310 | for key, value in record.iteritems(): 311 | logger.error("File:%s,line:%s,function:%s" %(self.filename, key, '--->'.join(value))) 312 | 313 | for key, value in self.record_unsafe_func.iteritems(): 314 | logger.error("maybe injected File:%s,line:%s,function:%s--->%r" %(self.filename, key, value.get('func_name'), value.get('func_ids'))) 315 | print self.lines[key - 1] 316 | if 'request' in value.get('arg_leafs'): 317 | logger.critical("maybe injected File:%s,line:%s,function:%s--->%r" %(self.filename, key, value.get('func_name'), value.get('func_ids'))) 318 | 319 | for key,value in self.record_other_unsafe_func.iteritems(): 320 | logger.error("File:%s,line:%s,function:%s,dangerous_func:%r" %(self.filename, key, value.get('func_name'), value.get('func_ids'))) 321 | print self.lines[key - 1] 322 | 323 | #print "FILE_UNSAFE_FUNCS",FILE_UNSAFE_FUNCS 324 | 325 | 326 | def find_all_leafs(args, leafs): 327 | 328 | for arg in args: 329 | find_arg_leafs(arg, leafs) 330 | 331 | 332 | def find_func_leafs(value, args_ori, target_ids, import_func): 333 | """处理函数情况""" 334 | value_arg_ids = [] 335 | rec_find_args(value, value_arg_ids) 336 | value_func_ids = [] 337 | rec_get_func_ids(value.get('func'), value_func_ids) 338 | value_func_ids = set(value_func_ids) 339 | value_func_type = value.get('func').get('type') 340 | value_func = value.get('func') 341 | (topids, parent) = ([], {}) 342 | rec_get_attr_top_id(value_func, parent, topids) 343 | 344 | if value_arg_ids or topids: 345 | #处理普通方法 346 | if value_func_type == 'Name' and (set(value_arg_ids)&args_ori): 347 | for func_id in set(import_func.keys())&value_func_ids: 348 | value_func_ids.add(import_func.get(func_id)) 349 | value_func_ids.remove(func_id) 350 | 351 | if target_ids and value_func_ids and value_func_ids.issubset((set(STR_FUNCS)|set(UNTREATED_FUNS))): 352 | args_ori.update(target_ids) 353 | logger.info("In Assign,Call:name add(%r) to (%r) where line=(%r)" %(target_ids, args_ori, value.get('lineno'))) 354 | elif target_ids and value_func_ids and value_func_ids&(set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS)): 355 | pass 356 | elif target_ids: 357 | args_ori.difference_update(target_ids) 358 | logger.warn("In Assign,Call delete (%r) from (%r) where line=(%r)" %(target_ids,args_ori,value.get('lineno'))) 359 | 360 | elif value_func_type == 'Attribute':#处理属性方法 361 | 362 | if (set(topids)&set(args_ori)): 363 | if topids[0].lower() == 'request': 364 | if parent and parent.get('type')=='Attribute' and parent.get('attr') in REQUEST_VAR: 365 | if target_ids and not(set(value_arg_ids)&set(target_ids)): 366 | args_ori.update(target_ids) 367 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r)" %(target_ids,args_ori,value.get('lineno'))) 368 | elif parent and parent.get('type')=='Attibute': 369 | args_ori.difference_update(set(target_ids)) 370 | logger.info("In Assign,Call:attr del (%r) to (%r) where line=(%r)" %(target_ids,args_ori,value.get('lineno'))) 371 | elif value_func_ids and value_func_ids.issubset(set(STR_FUNCS)|set(UNTREATED_FUNS)) and set(value_arg_ids)&(args_ori): 372 | if target_ids and not(set(value_arg_ids)&set(target_ids)): 373 | args_ori.update(target_ids) 374 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r)" %(target_ids,args_ori,value.get('lineno'))) 375 | else: 376 | if target_ids and not(set(value_arg_ids)&set(target_ids)): 377 | args_ori.update(target_ids) 378 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r)" %(target_ids,args_ori,value.get('lineno'))) 379 | elif value_func_ids and value_func_ids.issubset(set(STR_FUNCS)|set(UNTREATED_FUNS)) and (set(value_arg_ids)&set(args_ori)): 380 | if target_ids and not set(value_arg_ids)&set(target_ids): 381 | args_ori.update(target_ids) 382 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r)" %(target_ids,args_ori,value.get('lineno'))) 383 | 384 | elif value_func_ids and value_func_ids&(set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS)): 385 | pass 386 | 387 | 388 | 389 | 390 | 391 | def find_arg_leafs(arg, leafs): 392 | """通过递归找到全所有子节点,历史原因复数格式不修正""" 393 | fields = arg.get("_fields") 394 | _type = arg.get('type') 395 | if _type == "Attribute": 396 | parent, topids = {}, [] 397 | rec_get_attr_top_id(arg, parent, topids) 398 | logger.warn("parent:%r,topids:%r" %(parent, topids)) 399 | if topids and 'self' in topids[0].lower() : 400 | leafs.append(topids[0]) 401 | elif topids and topids[0].lower() != 'request' and topids[0].lower() != 'self': 402 | leafs.append(topids[0]) 403 | logger.warn("1parent:%r,topids:%r" %(parent, topids)) 404 | elif topids and parent and parent.get('type')=='Attribute' and parent.get('attr') in REQUEST_VAR: 405 | leafs.append(topids[0]) 406 | logger.warn("2parent:%r,topids:%r" %(parent, topids)) 407 | #find_arg_leafs(arg.get('value'), leafs) 408 | if _type == "Name": 409 | leafs.append(arg.get('id')) 410 | if _type == 'Call': 411 | func_ids = [] 412 | rec_get_func_ids(arg.get('func'), func_ids) 413 | logger.info('func_ids:%r,funcs:%r' %(func_ids,(set(STR_FUNCS)|set(UNTREATED_FUNS)|set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS)))) 414 | if set(func_ids)&(set(STR_FUNCS)|set(UNTREATED_FUNS)|set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS)): 415 | for value in arg.get('args'): 416 | parent, topids = {}, [] 417 | rec_get_attr_top_id(value, parent, topids) 418 | logger.warn("parent:%r,topids:%r" %(parent, topids)) 419 | logger.warn("value:%r," %(value)) 420 | if topids and 'self' in topids[0].lower() : 421 | leafs.append(topids[0]) 422 | elif topids and topids[0].lower() != 'request' and topids[0].lower() != 'self': 423 | leafs.append(topids[0]) 424 | logger.warn("1parent:%r,topids:%r" %(parent, topids)) 425 | elif topids and parent and parent.get('type')=='Attribute' and parent.get('attr') in REQUEST_VAR: 426 | leafs.append(topids[0]) 427 | logger.warn("2parent:%r,topids:%r" %(parent, topids)) 428 | 429 | for arg_item in arg.get('args'): 430 | find_arg_leafs(arg_item, leafs) 431 | if arg.get('func') and arg.get('func').get('type') != 'Name': 432 | find_arg_leafs(arg.get('func'), leafs) 433 | if _type == 'Subscript': 434 | find_arg_leafs(arg.get('value'), leafs) 435 | if _type == "BinOp" and fields: 436 | if "right" in fields: 437 | if arg.get('right').get('type') == "Name": 438 | right_id = arg.get("right").get("id") 439 | if right_id: 440 | leafs.append(right_id) 441 | elif arg.get('right').get('type') == 'Tuple': 442 | for elt in arg.get('right').get('elts'): 443 | find_arg_leafs(elt, leafs) 444 | elif arg.get('right').get('type') == 'Call': 445 | find_arg_leafs(arg.get('right'), leafs) 446 | 447 | if "left" in fields and not arg.get("left").get("_fields"): 448 | left_id = arg.get('left').get('id') 449 | if left_id: 450 | leafs.append(left_id) 451 | if "left" in fields and arg.get("left").get("_fields"): 452 | find_arg_leafs(arg.get("left"), leafs) 453 | 454 | return 455 | 456 | def is_arg_return(func, args_ori): 457 | """ 458 | 判断是否有对arg参数的可控性判断,比如判读是否数字,是否file等 459 | """ 460 | global is_arg_return_op 461 | 462 | if isinstance(func, dict): 463 | lines = func.get('body') 464 | elif isinstance(func, list): 465 | lines = func 466 | 467 | for line in lines: 468 | is_return = False 469 | is_arg_in = False 470 | is_param = False 471 | ast_body = line.get('body') 472 | ast_orelse = line.get('orelse') 473 | ast_handlers = line.get('handlers') 474 | if line.get('type') == "If": 475 | for body in line.get('body'): 476 | if body.get('type') == "Return": 477 | is_return = True 478 | test = line.get('test') 479 | if line.get('test') and line.get('test').get('type') == "UnaryOp": 480 | operand = line.get('test').get('operand') 481 | if operand: 482 | args = [] 483 | rec_find_args(line.get('test'), args) 484 | if set(args)&set(args_ori): 485 | is_arg_in = True 486 | elif test and test.get('type') == 'Compare': 487 | args = [] 488 | for key,value in test.iteritems(): 489 | if key == 'left': 490 | if test[key].get('type') == 'Name': 491 | args = [test[key].get('id')] 492 | if key == 'comparators': 493 | for comparator in test[key]: 494 | if comparator.get('type') in ("List", 'Tuple'): 495 | for elt in comparator.get('elts'): 496 | if elt.get('type') == 'Name': 497 | is_param = True 498 | 499 | if set(args)&set(args_ori) and not is_param: 500 | is_arg_in = True 501 | 502 | is_arg_return_op = is_return&is_arg_in 503 | if is_arg_return_op:#找到即返回 504 | logger.info("is_arg_return:%r" %(line)) 505 | return 506 | if ast_body: 507 | is_arg_return(ast_body, args_ori) 508 | # if ast_orelse: 509 | # is_arg_return(ast_orelse, args_ori) 510 | # if ast_handlers: 511 | # is_arg_return(ast_handlers, args_ori) 512 | 513 | def rec_find_args(operand, args): 514 | if isinstance(operand, list) or isinstance(operand, tuple): 515 | find_all_leafs(operand, args) 516 | elif isinstance(operand, dict): 517 | if operand.get('type') == 'Call': 518 | if "args" in operand: 519 | find_all_leafs(operand.get('args'), args) 520 | if "value" in operand.get('func'): 521 | rec_find_args(operand.get('func').get('value'), args) 522 | elif operand.get('type') == 'UnaryOp':# not param判断中 523 | rec_find_args(operand.get('operand'), args) 524 | elif operand.get('type') == 'BinOp': 525 | find_arg_leafs(operand, args) 526 | 527 | else: 528 | return 529 | 530 | def rec_get_attr_top_id(func, parent, ids):#获取最顶端的值,eg:request,path[0].split('/') 531 | """ 532 | func = {u'_fields': [u'value', u'attr_name'], u'type': u'Attribute', u'attr': u'get', u'value': {u'_fields': [u'value', u'attr_name'], u'type': u'Attribute', u'attr': u'POST', u'value': {u'type': u'Name', u'lineno': 15, u'id': u'request'}, u'lineno': 15}, u'lineno': 15} 533 | ids: 用于回传结果,只有一个 534 | """ 535 | if func.get('type') == 'Name': 536 | ids.append(func.get('id')) 537 | if func.get('type') == 'Attribute': 538 | parent.update(func) 539 | if func.get('value').get('type') == 'Name' and func.get('value').get('id') == 'self': 540 | ids.append('self.'+func.get('attr')) 541 | return 542 | else: 543 | rec_get_attr_top_id(func.get('value'), parent, ids) 544 | if func.get('type') == 'Call': 545 | parent.update(func) 546 | rec_get_attr_top_id(func.get('func'), parent, ids) 547 | if func.get('type') == 'Subscript': 548 | parent.update(func) 549 | rec_get_attr_top_id(func.get('value'), parent, ids) 550 | return 551 | 552 | 553 | def rec_get_targets(targets, out_targets): 554 | """递归找出target""" 555 | for target in targets: 556 | if target.get('type') == 'Subscript': 557 | rec_get_targets([target.get('value')], out_targets) 558 | elif target.get('type') == 'Name': 559 | out_targets.append(target.get('id')) 560 | elif target.get('type') == 'Attribute': 561 | if target.get('value').get('type') == 'Name' and target.get('value').get('id')=='self': 562 | out_targets.append('self.'+target.get('attr')) 563 | 564 | return 565 | 566 | def look_up_arg(func, args_ori, args, func_name, import_func, verbose): 567 | """递归找出危险函数中的参数是否属于函数参数入口的""" 568 | """ 569 | func 代表测试的函数,args_ori是要被测试的函数的参数,args则是危险函数中的参数 570 | """ 571 | global is_arg_in 572 | if isinstance(func, dict) and 'body' in func: 573 | lines = func.get('body') 574 | elif isinstance(func, list): 575 | lines = func 576 | elif isinstance(func, dict) and func.get('type') == 'Call': 577 | lines = [func] 578 | else: 579 | lines = [] 580 | 581 | for line in lines: 582 | # print 'look_up_arg:line:',line 583 | ast_body = line.get('body') 584 | ast_orelse = line.get('orelse') 585 | ast_handlers = line.get('handlers') 586 | ast_test = line.get('test') 587 | ast_args = line.get('args') 588 | #处理单纯属性 589 | if line.get('type') == 'Assign': 590 | target_ids = [] 591 | rec_get_targets(line.get('targets'), target_ids) 592 | else: 593 | target_ids = [] 594 | 595 | if line.get("type") == "Assign" and "value" in line and line.get("value").get("type")=="Name": 596 | if target_ids and line.get("value").get("id") in args_ori: 597 | args_ori.update(target_ids) 598 | logger.info("In Assign,Name add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 599 | 600 | if line.get("type") == "Assign" and "value" in line and line.get("value").get("type")=="Attribute": 601 | value_func = line.get('value').get('value') 602 | if value_func and value_func.get("type") == 'Name': 603 | if target_ids and value_func.get("id") in args_ori: 604 | args_ori.update(target_ids) 605 | logger.info("In Assign,Attr add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 606 | 607 | else: 608 | topids = [] 609 | parent = {} 610 | rec_get_attr_top_id(value_func, parent, topids) 611 | if (set(topids)&set(args_ori)): 612 | if topids and topids[0].lower() == 'request': 613 | if parent and parent.get('type')=='Attribute' and parent.get('attr') in REQUEST_VAR: 614 | args_ori.update(target_ids) 615 | logger.info("In Assign,Attr add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 616 | elif parent and parent.get('type')=='Attribute': 617 | args_ori.difference_update(set(target_ids)) 618 | logger.warn("In Assign,Attr delete (%r) from (%r) where line=(%r)***************************** line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 619 | 620 | #处理字符串拼接过程 621 | if line.get("type") == "Assign" and "value" in line and line.get("value").get("type")=="BinOp": 622 | # right = line.get('value').get('right') 623 | # if right.get('type') == 'Tuple': 624 | # rec_find_args(right.get('elts')) 625 | leafs = [] 626 | find_arg_leafs(line.get("value"), leafs) 627 | logger.info('----%r----%r' %(args_ori, leafs)) 628 | if (set(args_ori)&set(leafs)): 629 | if target_ids: 630 | args_ori.update(target_ids) 631 | logger.info("In Assign,BinOp add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 632 | #列表解析式 633 | if line.get("type") == "Assign" and "value" in line and line.get("value").get("type") in ("ListComp","SetComp"): 634 | generators = line.get('value').get('generators') 635 | leafs = [] 636 | for generator in generators: 637 | find_arg_leafs(generator.get('iter'), leafs) 638 | if target_ids and (set(args_ori)&set(leafs)): 639 | args_ori.update(target_ids) 640 | logger.info("In Assign,ListComp,SetComp add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 641 | 642 | #处理列表中相加 643 | if line.get('type') == 'Assign' and 'value' in line and line.get('value').get('type') in ('List','Tuple'): 644 | leafs = [] 645 | for elt in line.get('value').get('elts'): 646 | find_arg_leafs(elt, leafs) 647 | if (set(args_ori)&set(leafs)): 648 | if target_ids: 649 | args_ori.update(target_ids) 650 | logger.info("In Assign,List add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 651 | 652 | #处理 tmp= {'bb':a}情况 653 | if line.get('type') == 'Assign' and 'value' in line and line.get('value').get('type') in ('Dict'): 654 | leafs = [] 655 | for value in line.get('value').get('values'): 656 | find_arg_leafs(value, leafs) 657 | if (set(args_ori)&set(leafs)): 658 | if target_ids: 659 | args_ori.update(target_ids) 660 | logger.info("In Assign,Dict add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 661 | 662 | 663 | #处理Subscript分片符情况 664 | if line.get('type') == 'Assign' and 'value' in line and line.get('value').get('type')=='Subscript': 665 | value_type = line.get('value').get('value').get('type') 666 | value_func_ids = [] 667 | rec_get_func_ids(line.get('value').get('value'), value_func_ids) 668 | value_func_ids = set(value_func_ids) 669 | value_arg_ids = [] 670 | find_arg_leafs(line.get('value').get('value'), value_arg_ids) 671 | if value_type == 'Attribute': 672 | if value_func_ids and value_func_ids.issubset((set(REQUEST_VAR)|set(STR_FUNCS))): 673 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 674 | args_ori.update(target_ids) 675 | logger.info("In Assign,Subscript add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 676 | 677 | 678 | #处理调用函数后的赋值,像str,get取值都保留 679 | if line.get("type") == "Assign" and "value" in line and line.get("value").get("type")=="Call": 680 | value_arg_ids = [] 681 | rec_find_args(line.get('value'), value_arg_ids) 682 | value_func_ids = [] 683 | rec_get_func_ids(line.get('value').get('func'), value_func_ids) 684 | value_func_ids = set(value_func_ids) 685 | value_func_type = line.get("value").get('func').get('type') 686 | value_func = line.get('value').get('func') 687 | (topids, parent) = ([], {}) 688 | rec_get_attr_top_id(value_func, parent, topids) 689 | logger.info('In Call:topids:%r,value_arg_ids:%r,value_func_ids:%r,line:%r' %(topids,value_arg_ids, value_func_ids,line)) 690 | 691 | if value_arg_ids or topids: 692 | #处理普通方法 693 | if value_func_type == 'Name' and (set(value_arg_ids)&set(args_ori)): 694 | for func_id in set(import_func.keys())&value_func_ids: 695 | value_func_ids.add(import_func.get(func_id)) 696 | value_func_ids.remove(func_id) 697 | 698 | if target_ids and verbose: #开了verbose模式,函数处理后的则直接加入到变量中 699 | args_ori.update(target_ids) 700 | logger.info("In Assign,Call:Verbose Name add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 701 | else: 702 | if target_ids and value_func_ids and value_func_ids.issubset((set(STR_FUNCS)|set(UNTREATED_FUNS))): 703 | args_ori.update(target_ids) 704 | logger.info("In Assign,Call:Name add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 705 | elif target_ids and value_func_ids and (value_func_ids&((set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS)))): 706 | is_arg_in = True 707 | elif target_ids and value_func_ids and set(value_func_ids)&(set(SAFE_FUNCS)) and not verbose: 708 | args_ori.difference_update(target_ids) 709 | logger.warn("In Assign,Call delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 710 | elif target_ids: 711 | args_ori.difference_update(target_ids) 712 | logger.warn("In Assign,Call delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 713 | # for target in target_ids:#处理cmd=int(cmd) 这种情况 714 | # args_ori.difference_update(target_ids) 715 | # if target in args_ori: 716 | # args_ori.discard(target) 717 | # logger.info("arg_id,assign31:%r,args_ori:%r" %(value_arg_ids, args_ori)) 718 | 719 | elif value_func_type == 'Attribute':#处理属性方法,如从dict取值 720 | 721 | if (set(topids)&set(args_ori)): 722 | if topids[0].lower() == 'request': 723 | if parent and parent.get('type')=='Attribute' and parent.get('attr') in REQUEST_VAR: 724 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 725 | args_ori.update(target_ids) 726 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r) type=(%r)" %(target_ids,args_ori,parent.get('lineno'), line)) 727 | elif parent and parent.get('type')=='Attribute': 728 | args_ori.difference_update(set(target_ids))#去除target_ids 729 | logger.warn("In Assign,Call:attr delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 730 | 731 | elif value_func_ids and value_func_ids.issubset(set(STR_FUNCS)|set(UNTREATED_FUNS)) and (set(value_arg_ids)&set(args_ori)): 732 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 733 | args_ori.update(target_ids) 734 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r) type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 735 | elif value_func_ids and set(value_func_ids)&set(SAFE_FUNCS) and not verbose: 736 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 737 | args_ori.difference_update(target_ids) 738 | logger.warn("In Assign,Call:attr delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 739 | else: 740 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 741 | args_ori.update(target_ids) 742 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r) type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 743 | #处理r=unicode(s).encode('utf8') 744 | elif value_func_ids and value_func_ids.issubset(set(STR_FUNCS)|set(UNTREATED_FUNS)) and (set(value_arg_ids)&set(args_ori)): 745 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 746 | args_ori.update(target_ids) 747 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r) type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 748 | 749 | elif value_func_ids and value_func_ids.issubset(set(STR_FUNCS)|set(UNTREATED_FUNS)) and (set(topids)&set(args_ori)): 750 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 751 | args_ori.update(target_ids) 752 | logger.info("In Assign,Call:attr add (%r) to (%r) where line=(%r) type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 753 | elif value_func_ids and set(value_func_ids)&set(SAFE_FUNCS) and not verbose: 754 | if target_ids: 755 | args_ori.difference_update(target_ids) 756 | logger.warn("In Assign,Call:attr delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 757 | 758 | elif verbose: 759 | if target_ids and not (set(value_arg_ids)&set(target_ids)): 760 | args_ori.update(target_ids) 761 | logger.info("In Assign,Call:Verbose attr add (%r) to (%r) where line=(%r) type=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 762 | 763 | 764 | elif value_func_ids and (value_func_ids&(set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS))):#处理危险函数 765 | leafs = [] 766 | leafs = value_arg_ids 767 | if set(leafs)&set(args_ori): 768 | is_arg_in = True 769 | 770 | if line.get('type') == 'Return' and 'value' in line and line.get('value'): 771 | value_id = line.get('value').get('id') 772 | if value_id and value_id in args_ori : 773 | print 'untrited_func_name',func_name 774 | UNTREATED_FUNS.add(func_name) 775 | 776 | if line.get('type') == 'For': 777 | iter_args = [] 778 | find_arg_leafs(line.get('iter'), iter_args) 779 | if set(iter_args)&set(args_ori): 780 | targets = [] 781 | find_arg_leafs(line.get('target'), targets) 782 | if targets: 783 | args_ori.update(targets) 784 | logger.info("In For Call add (%r) to (%r) where line=(%r) line=(%r)" %(target_ids,args_ori,line.get('lineno'), line)) 785 | 786 | if line.get("type") == "Expr" and "value" in line and line.get("value").get("type")=="Call": 787 | value_arg_ids = [] 788 | rec_find_args(line.get('value'), value_arg_ids) 789 | if set(value_arg_ids)&set(args_ori): 790 | is_arg_in = True 791 | 792 | if line.get('type') == 'Call': #处理if语句中中eval类似函数 793 | func_ids = [] 794 | rec_get_func_ids(line.get('func'), func_ids) 795 | args_tmp = [] 796 | rec_find_args(line, args_tmp) 797 | if (set(args_tmp)&args_ori) and func_ids and (set(func_ids)&(set(UNSAFE_FUNCS)|set(FILE_UNSAFE_FUNCS))): 798 | is_arg_in = True 799 | logger.info('type:call') 800 | # if line.get('type') == 'Ififif': 801 | if line.get('type') == 'If': 802 | is_if_return = False 803 | is_if_param = False 804 | is_in_param = False 805 | 806 | if_judge_func = set(['exists','isfile','isdir','isabs','isdigit']) 807 | for body in line.get('body'): 808 | if body.get('type') == 'Return': 809 | is_if_return = True 810 | test = line.get('test') 811 | if test and test.get('type') == 'UnaryOp': 812 | operand = test.get('operand') 813 | args_tmp = [] 814 | if operand: 815 | rec_find_args(operand, args_tmp) 816 | if set(args_tmp)&set(args_ori): 817 | is_if_param = True 818 | func_ids = [] 819 | rec_get_func_ids(operand, func_ids) 820 | if set(func_ids)&if_judge_func and is_if_return and is_if_param: 821 | args_ori.difference_update(args_tmp) 822 | logger.warn("In If delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(args_tmp,args_ori,test.get('lineno'),test.get('type'))) 823 | 824 | if test and test.get('type') == 'Compare': 825 | args_tmp = [] 826 | for key,value in test.iteritems(): 827 | if key == 'left': 828 | if test[key].get('type') == 'Name': 829 | args_tmp = [test[key].get('id')] 830 | if key == 'comparators': 831 | for comparator in test[key]: 832 | if comparator.get('type') in ('List', 'Tuple'): 833 | for elt in comparator.get('elts'): 834 | if elt.get('type') == 'Name' and elt.get('id') in args_ori: 835 | is_in_param = True 836 | if set(args_tmp)&set(args_ori) and is_if_return and not is_in_param: 837 | args_ori.difference_update(args_tmp) 838 | logger.warn("In If delete (%r) from (%r) where line=(%r)***************************** type=(%r)" %(args_tmp,args_ori,test.get('lineno'),test.get('type'))) 839 | 840 | if ast_body: 841 | look_up_arg(ast_body, args_ori, args, func_name, import_func, verbose) 842 | if ast_orelse: 843 | look_up_arg(ast_orelse, args_ori, args, func_name, import_func, verbose) 844 | if ast_handlers: 845 | look_up_arg(ast_handlers, args_ori, args, func_name, import_func, verbose) 846 | if ast_test and ast_test.get('comparators'): 847 | look_up_arg(ast_test.get('comparators'),args_ori, args, func_name, import_func, verbose) 848 | if ast_test and ast_test.get('left'): 849 | look_up_arg(ast_test.get('left'),args_ori, args, func_name, import_func, verbose) 850 | if ast_args : 851 | look_up_arg(ast_args, args_ori, args, func_name, import_func, verbose) 852 | 853 | return 854 | 855 | def get_func_id(func, func_ids): 856 | """获取被调用函数的名称""" 857 | if func.get("type") == "Name": 858 | func_id = func.get('id') 859 | elif func.get('type') == 'Attribute': 860 | if func.get('value').get('type') == 'Name': 861 | module = func.get('value').get('id') 862 | if module in ['os', 'pickle']: 863 | func_id = module + "." + func.get('attr') 864 | else: 865 | func_id = func.get('attr') 866 | elif func.get('value').get('type') == 'Attribute': 867 | func_id = func.get('attr') 868 | elif func.get('value').get('type') == 'Subscript': 869 | func_id = func.get('attr') 870 | else: 871 | func_id = None 872 | else: 873 | func_id = None 874 | if func_id: 875 | func_ids.append(func_id) 876 | 877 | 878 | def rec_get_func_ids(func, func_ids):#处理连续的unicode.encode等 879 | if func.get('type') in ("Name","Attribute"): 880 | get_func_id(func, func_ids) 881 | if 'value' in func and func.get('value').get('func'): 882 | rec_get_func_ids(func.get('value').get('func'), func_ids) 883 | if func.get('type') == 'Call': 884 | rec_get_func_ids(func.get('func'), func_ids) 885 | for args in func.get('args'): 886 | if args.get('type') != 'Name': 887 | rec_get_func_ids(args, func_ids) 888 | 889 | return 890 | 891 | 892 | def get_pythonpaths(): 893 | """获取系统的python路径,返回一个list列表""" 894 | # cmd = "env|grep PYTHONPATH" 895 | # pythonpath_shell = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) 896 | # pythonpath_shell.wait() 897 | # pythonpath = pythonpath_shell.communicate()[0] 898 | # pythonpaths = [] 899 | # if pythonpath: 900 | # pythonpaths_tmp = pythonpath.split("=")[1] 901 | # for path in pythonpaths_tmp: 902 | # if "python" not in path: #目前是根据python字符串特征来判断是否是系统路径,不是很科学 903 | # pythonpaths.append(path) 904 | pythonpath = os.environ.get('PYTHONPATH') 905 | pythonpaths = [path for path in pythonpath.split(':') if 'python' not in path] 906 | 907 | return pythonpaths 908 | 909 | 910 | def get_custom_import_file(file_name, import_module): 911 | """file_name代表当前要分析的文件,import_module就是其导入的模块""" 912 | import_module = "/" + "/".join(import_module.split('.')); 913 | file_path = os.path.dirname(file_name) 914 | import_file = file_path + import_module + ".py" 915 | if os.path.isfile(import_file): 916 | return import_file 917 | else: 918 | for pythonpath in get_pythonpaths(): 919 | import_file = pythonpath + import_module + ".py" 920 | if os.path.isfile(import_file): 921 | return import_file 922 | 923 | return '' 924 | 925 | """ 926 | def decrease_tree(tree): 927 | tree = {k:v for k, v in tree.iteritems() if k not in ['col_offset', 'start', 'end', 'ctx', 'extra_attr']} 928 | for key, value in tree.iteritems(): 929 | if isinstance(value, dict): 930 | decrease_tree(value) 931 | if isinstance(value, list): 932 | for l in value: 933 | if isinstance(l, dict): 934 | decrease_tree(l) 935 | return tree 936 | """ 937 | 938 | def rec_decrease_tree(tree): 939 | if isinstance(tree, dict): 940 | for key in tree.keys(): 941 | if key in ['col_offset', 'start', 'end', 'ctx', 'extra_attr', 'attr_name']: 942 | del(tree[key]) 943 | else: 944 | 945 | if isinstance(tree[key], dict): 946 | rec_decrease_tree(tree[key]) 947 | if isinstance(tree[key], list): 948 | for l in tree[key]: 949 | rec_decrease_tree(l) 950 | 951 | def walk_dir(file_path, file_type='.py'): 952 | files = [] 953 | if os.path.isfile(file_path): 954 | files = [file_path] 955 | elif os.path.isdir(file_path): 956 | for root, dirs, filenames in os.walk(file_path): 957 | for filename in filenames: 958 | # print 'walk_dir:filename', filename 959 | if re.match(".*\.py$", filename.strip()): 960 | files.append(root+"/"+filename) 961 | 962 | return files 963 | 964 | def print_func(filename, lineno): 965 | with open(filename, 'r') as fd: 966 | lines = fd.readlines() 967 | print lines[lineno-1] 968 | 969 | def usage(): 970 | print """用途:本程序主要用于测试py代码中命令注入和sql注入\n用法:python judge_injection.py -d path 971 | path即为需要测试的目录""" 972 | 973 | def main(): 974 | parser = OptionParser() 975 | parser.add_option("-d", "--dir", dest="file_path",help="files to be checked") 976 | parser.add_option("-c", "--cmd", action="store_true", dest="cmd_check",help="cmd check", default=False) 977 | parser.add_option("-s", "--sql", action="store_true", dest="sql_check",help="sql check", default=False) 978 | parser.add_option("-a", "--all", action="store_true", dest="cmd_sql_check",help="cmd check and sql check", default=True) 979 | parser.add_option("-v", "--verbose", action="store_true", dest="verbose",help="print all unsafe func", default=False) 980 | (options, args) = parser.parse_args() 981 | file_path = options.file_path 982 | cmd_check = options.cmd_check 983 | sql_check = options.sql_check 984 | cmd_sql_check = options.cmd_sql_check 985 | verbose = options.verbose 986 | # print "option:", options 987 | # print file_path 988 | # print cmd_check 989 | # print sql_check 990 | # sys.exit() 991 | if cmd_sql_check: 992 | cmd_check = True 993 | sql_check = True 994 | check_type = (cmd_check,sql_check, verbose) 995 | check_type = {'cmd':cmd_check, 'sql':sql_check, 'verbose':verbose} 996 | if not file_path: 997 | usage() 998 | sys.exit() 999 | else: 1000 | if (os.path.isfile(file_path) or os.path.isdir(file_path)): 1001 | files = walk_dir(file_path) 1002 | else: 1003 | print "您输入的文件或者路径不存在" 1004 | sys.exit() 1005 | for filename in files: 1006 | print "filename",filename 1007 | try: 1008 | # judge = judge_injection(filename, check_type) 1009 | # judge.parse_py() 1010 | # judge.record_all_func() 1011 | judge_all(filename, check_type) 1012 | except Exception, e: 1013 | print filename 1014 | traceback.print_exc() 1015 | 1016 | 1017 | def judge_all(filename, check_type): 1018 | global used_import_files 1019 | try: 1020 | judge = judge_injection(filename, check_type) 1021 | judge.get_import_modules(judge.body) 1022 | print judge.import_module 1023 | for import_file, value in judge.import_module.iteritems(): 1024 | if import_file and import_file not in used_import_files: 1025 | used_import_files.append(import_file) 1026 | judge_all(import_file, check_type) 1027 | 1028 | judge.parse_py() 1029 | # if judge.filename not in used_import_files:#将导致导入的模块不会报出问题,要修复 1030 | judge.record_all_func() 1031 | except: 1032 | traceback.print_exc() 1033 | 1034 | 1035 | 1036 | 1037 | if __name__ == "__main__": 1038 | # filename = "libssh2_login_test.py" 1039 | # filename = "libssh2_login_test.py.bak" 1040 | 1041 | # filename = "arg.py" 1042 | # filename = "test3.py" 1043 | #rec_decrease_tree(line) 1044 | 1045 | # files = walk_dir(file_path) 1046 | files = ["libssh2_login_test.py.bak"] 1047 | # files = ["testsql.py"] 1048 | # files = ["test_cmd2.py"] 1049 | # check_type = (True,False) 1050 | # for filename in files: 1051 | # print "filename",filename 1052 | # try: 1053 | # judge = judge_injection(filename, check_type) 1054 | # judge.parse_py() 1055 | # except Exception, e: 1056 | # traceback.print_exc() 1057 | main() 1058 | 1059 | 1060 | 1061 | 1062 | 1063 | -------------------------------------------------------------------------------- /judge_injection.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shengqi158/pyvulhunter/cf6e4b1601b76f0edad2f0a83372e8adcee161b2/judge_injection.zip -------------------------------------------------------------------------------- /python_audit.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shengqi158/pyvulhunter/cf6e4b1601b76f0edad2f0a83372e8adcee161b2/python_audit.pdf --------------------------------------------------------------------------------