├── pyphp ├── __init__.py ├── phpbuiltins │ ├── primitives.py │ ├── datetime.py │ ├── builtin.py │ ├── __init__.py │ ├── string.py │ ├── constants.py │ ├── regex.py │ └── lang.py ├── test │ ├── argv.php │ ├── err_func_missing_arg.php │ ├── test_error_call_undefined_function.php │ ├── test_die.php │ ├── error_messages.php │ ├── test_isset.php │ ├── test_array_same_key_int_str.php │ ├── test_add_operators.php │ ├── test_function_params.php │ ├── test_array_key_stability.php │ ├── phpbuiltins_preg.php │ └── test_operators.php ├── prepr.py ├── errors.py ├── coerce.py ├── scope.py ├── trace.py ├── pyphp.py ├── phpclass.py ├── phpfunction.py ├── phparray.py ├── varref.py ├── php-grammar.txt ├── parser.py ├── executer.py └── compiler.py ├── setup.py ├── .gitignore ├── CHANGELOG.md ├── LICENSE └── README.md /pyphp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyphp/phpbuiltins/primitives.py: -------------------------------------------------------------------------------- 1 | primitives={ 2 | 'false' : False, 3 | 'true' : True, 4 | 'null' : None 5 | } -------------------------------------------------------------------------------- /pyphp/test/argv.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyphp/phpbuiltins/datetime.py: -------------------------------------------------------------------------------- 1 | from builtin import builtin 2 | 3 | @builtin 4 | def date_default_timezone_set(args, executer, local): 5 | print args 6 | -------------------------------------------------------------------------------- /pyphp/test/err_func_missing_arg.php: -------------------------------------------------------------------------------- 1 | 'e','e'=>'qw')); 10 | 11 | 12 | ?> -------------------------------------------------------------------------------- /pyphp/test/test_error_call_undefined_function.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyphp/prepr.py: -------------------------------------------------------------------------------- 1 | def prepr(x, depth=0, dchr=' '): 2 | xt = type(x) 3 | if hasattr(x, 'prepr'): 4 | return x.prepr(depth, dchr) 5 | elif xt in (list, tuple): 6 | dstr = dchr*depth 7 | return '%s%s'%(dstr, ('%s\n'%dstr).join([prepr(ix, depth+1, dchr) for ix in x])) 8 | else: 9 | return repr(x) -------------------------------------------------------------------------------- /pyphp/errors.py: -------------------------------------------------------------------------------- 1 | class ExecuteError(StandardError): 2 | pass 3 | 4 | class StopExecutionError(ExecuteError): 5 | def __init__(self, exitval=0): 6 | self.exitval = exitval 7 | 8 | 9 | class ReturnError(StandardError): 10 | def __init__(self, retval): 11 | self.retval = retval 12 | 13 | -------------------------------------------------------------------------------- /pyphp/test/test_die.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyphp/test/error_messages.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='pyphp', 4 | version='0.1', 5 | description='Php parser, compiler and interpreter, in python', 6 | url='https://github.com/g-i-o-/pyphp', 7 | author='Giovany Vega', 8 | author_email='aleph.omega@gmail.com', 9 | license='MIT', 10 | packages=['pyphp'], 11 | zip_safe=False) -------------------------------------------------------------------------------- /pyphp/phpbuiltins/builtin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Declares a @builtin decorator class for tagging php built-in functions. 3 | """ 4 | 5 | class builtin(object): 6 | "Class for tagging built in functions" 7 | def __init__(self, func): 8 | self.func = func 9 | def __call__(self, *args, **kw): 10 | return self.func(*args, **kw) 11 | def __repr__(self): 12 | return ""%self.func.__name__ 13 | -------------------------------------------------------------------------------- /pyphp/test/test_isset.php: -------------------------------------------------------------------------------- 1 | 1,'w'=>4); 11 | d('$b', $b); 12 | d('isset($b)', isset($b)); 13 | d('isset($b[1])', isset($b[1])); 14 | d('isset($b[30])', isset($b[30])); 15 | echo "test3\n"; 16 | d('isset($c[30])', isset($c[30])); 17 | 18 | 19 | ?> -------------------------------------------------------------------------------- /pyphp/test/test_array_same_key_int_str.php: -------------------------------------------------------------------------------- 1 | 22 | -------------------------------------------------------------------------------- /pyphp/test/test_add_operators.php: -------------------------------------------------------------------------------- 1 | 1,'b'=>3, 'o'=>'a', 10 => 'a10'); 8 | $b = array(5=>5,'c'=>'asd', 'o'=>'b', 10 => 'b10'); 9 | 10 | 11 | test('$a', $a); 12 | test('$b', $a); 13 | 14 | test('1 + 0', 1 + 0); 15 | test('1 + "40 rabbits"', 1 + "40 rabbits"); 16 | test('$a + $b', $a + $b); 17 | test('1 + $a', 1 + $a); 18 | test('"1 d" + $a', "1 d" + $a); 19 | 20 | ?> -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # virtual environments 39 | venv 40 | .venv 41 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 0.2.2 4 | - `var_dump` now properly outputs through executer's `stdout`, instead of 5 | using `print`. 6 | - added default `$_GET`, `$_POST` and `$_FILES` (empty) global variables to executer. 7 | - fixed bug where last literal text segment (right after the last `?>`) was not 8 | being parsed properly. 9 | - fixed broken import bug in `phpbuiltins` package. 10 | 11 | ## 0.2.1 12 | - fixed broken import bug in `phpbuiltins` package. 13 | 14 | ## 0.2 15 | - fixed broken import bug in `phpbuiltins` package. 16 | - Added support for redirecting `stdout` in `AbstractPhpExecuter`. 17 | - Added usage section to readme. 18 | - Added changelog. 19 | -------------------------------------------------------------------------------- /pyphp/test/test_function_params.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyphp/test/test_array_key_stability.php: -------------------------------------------------------------------------------- 1 | 'explicit key 5', 'implicit key 6', 'strkey1' => 'strkey #1', 'key 7', 'strkey2' => 'str key #2', 'key 8'); 4 | 5 | function p($a){ 6 | $i=0; 7 | echo "array("; 8 | foreach($a as $k => $v){ 9 | echo ($i++ > 0 ? ", " : ""); 10 | echo "$k=>$v"; 11 | } 12 | echo ");\n"; 13 | } 14 | 15 | p($a); 16 | $a[] = 'appended_key #1'; 17 | p($a); 18 | array_push($a, 'appended_key #1'); 19 | p($a); 20 | array_pop($a); 21 | p($a); 22 | array_unshift($a, 'unshifted key #1'); 23 | p($a); 24 | array_unshift($a, 'unshifted key #2'); 25 | p($a); 26 | array_push($a, 'pushed key'); 27 | p($a); 28 | $a[] = 'appended_key #2'; 29 | p($a); 30 | 31 | 32 | ?> 33 | -------------------------------------------------------------------------------- /pyphp/phpbuiltins/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Declares a @builtin decorator class for tagging php built-in functions, as well as implements and exports most if not all php built-in functions. 3 | """ 4 | 5 | import pyphp.scope as scope 6 | 7 | import builtin 8 | import string 9 | import regex 10 | import constants 11 | import datetime 12 | import lang 13 | 14 | def gen_builtins(): 15 | modules=[constants, datetime, lang, string, regex] 16 | for module in modules: 17 | for member_name in dir(module): 18 | member = getattr(module, member_name) 19 | if member_name == 'CONSTANTS': 20 | for k,v in member: 21 | yield (k, v) 22 | elif isinstance(member, builtin.builtin): 23 | yield(member_name, member) 24 | 25 | 26 | 27 | 28 | builtins = scope.scope( 29 | dict(x for x in gen_builtins()), 30 | name='phpbuiltins' 31 | ) 32 | -------------------------------------------------------------------------------- /pyphp/test/phpbuiltins_preg.php: -------------------------------------------------------------------------------- 1 | > $retval = '; var_dump($retval); 14 | print ' >> $count = '; var_dump($count); 15 | } 16 | 17 | test1('/fox/', 'badger', 'The lazy fox ran over the brown dog.', 1); 18 | 19 | ?> -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 g-i-o- 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /pyphp/coerce.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for implementing PHP's automatic type coercion. 3 | """ 4 | 5 | import phparray 6 | 7 | def to_string(x): 8 | if x is None: 9 | return '' 10 | elif isinstance(x, phparray.PHPArray): 11 | return 'Array' 12 | else: 13 | return str(x) 14 | 15 | 16 | def to_int(x): 17 | if x in (None, True, False): 18 | return 1 if x else 0 19 | else: 20 | xt = type(x) 21 | if xt is int: 22 | return x 23 | elif xt in (long, float): 24 | return int(x) 25 | elif xt in (str, unicode): 26 | ord_0, ord_9 = ord('0'), ord('9') 27 | sign, val, base = 1, 0, 10 28 | if x[0] in '-+': 29 | if x[0] == '-': 30 | sign = -1 31 | x = x[1:] 32 | for c in x: 33 | ord_c = ord(c) 34 | if ord_0 <= ord_c <= ord_9: 35 | digit = ord_c - ord_0 36 | val = val*base + digit 37 | else: 38 | break 39 | return sign * val 40 | else: 41 | return 0 42 | 43 | def to_bool(x): 44 | if x is None: 45 | return False 46 | else: 47 | xt = type(x) 48 | if xt is bool: 49 | return x 50 | elif xt in (str, unicode): 51 | return len(x) > 0 and x != '0' 52 | elif isinstance(x, pyphp.phparrar.PHPArray): 53 | return len(x) > 0 -------------------------------------------------------------------------------- /pyphp/phpbuiltins/string.py: -------------------------------------------------------------------------------- 1 | from builtin import builtin 2 | import pyphp.phparray as phparray 3 | import pyphp.coerce as coerce 4 | 5 | @builtin 6 | def str_replace(args, executer, local): 7 | import prepr 8 | search, replace, subject = map(executer.get_val, args[:3]) 9 | #print '\n='*90 10 | #print {'search':search,'replace':replace,'subject':subject} 11 | if isinstance(search, phparray.PHPArray): 12 | search = search.values() 13 | else: 14 | search = [search] 15 | if isinstance(replace, phparray.PHPArray): 16 | replace = replace.values() 17 | else: 18 | replace = [replace] * len(search) 19 | rep_len = len(replace) 20 | rep_count = 0 21 | for i, s in enumerate(search): 22 | if i < rep_len: 23 | repl = replace[i] 24 | else: 25 | repl='' 26 | #print ' =>', {'search':s,'replace':repl,'subject':subject} 27 | splits = subject.split(s) 28 | rep_count += len(splits)-1 29 | subject = repl.join(splits) 30 | #print ' ::', repr(subject) 31 | if len(args) >= 4: 32 | executer.set_val(args[3], rep_count) 33 | #print '\n='*90 34 | return subject 35 | 36 | @builtin 37 | def strtolower(args, executer, local): 38 | val = executer.get_val(args[0]) 39 | return coerce.to_string(val).lower() 40 | -------------------------------------------------------------------------------- /pyphp/scope.py: -------------------------------------------------------------------------------- 1 | from prepr import prepr 2 | 3 | class scope(object): 4 | "Represents a scope, a context in which variables and functions are defined and bound to." 5 | def __init__(self, d=None, *parents, **kwargs): 6 | "Creates a scope with a given dictionary and parents. A name argument can be given to name the scope." 7 | self.dict = d if d else {} 8 | self.parents = [p for p in parents if p] 9 | self.name = kwargs['name'] if 'name' in kwargs else None 10 | def __contains__(self, key): 11 | if key in self.dict: 12 | return True 13 | else: 14 | for ps in self.parents: 15 | if key in ps: 16 | return True 17 | return False 18 | def __getitem__(self, key): 19 | if key not in self.dict: 20 | for ps in self.parents: 21 | if key in ps: 22 | return ps[key] 23 | return self.dict[key] 24 | def __setitem__(self, key, val): 25 | self.dict[key] = val 26 | def __iter__(self): 27 | import itertools 28 | return itertools.chain(self.dict, *self.parents) 29 | def __repr__(self): 30 | return '%sscope(%r)%s'%(self.name + '-' if self.name else '', self.dict, ('-[%s]'%(''.join([repr(x) for x in self.parents]))) if self.parents else '') 31 | def prepr(self, depth=0, dchr=' '): 32 | dstr=dchr*depth 33 | return '%s%sscope :\n%s %s%s'%( 34 | dstr, self.name + '-' if self.name else '', 35 | dstr, ('\n%s '%dstr).join(['%s => %r'%(x,y) for x,y in self.dict.items()]), 36 | '\n'+ 37 | '\n'.join([prepr(p, depth+1) for p in self.parents] if self.parents else []) 38 | ) -------------------------------------------------------------------------------- /pyphp/trace.py: -------------------------------------------------------------------------------- 1 | import prepr 2 | trace_stack=[] 3 | tcid = 0 4 | trace_temps = { 5 | 'basic' : [ 6 | "%(indent)s%(name)s { [%(depth)d]", 7 | "%(indent)s} [%(depth)d] // %(name)r" 8 | ], 9 | 'args' : [ 10 | "%(indent)s%(name)s (%(args)s,%(kwargs)r){ [%(depth)d]", 11 | "%(indent)s} [%(depth)d] // %(name)r" 12 | ], 13 | 'tids' : { 14 | "%(indent)s%(name)s (%(args)s,%(kwargs)r){ [%(depth)d] : TPID%(parent_tcid)s -> TCID%(tcid)s", 15 | "%(indent)s} [%(depth)d] // %(name)r : TPID%(parent_tcid)s -> TCID%(tcid)s" 16 | } 17 | } 18 | def trace(fn, show_tids = False): 19 | global trace_temps 20 | pre_tt, post_tt = trace_temps[(show_tids if type(show_tids) is str else 'tids') if show_tids else 'basic'] 21 | 22 | def tfn(*a,**b): 23 | global trace_stack, tcid 24 | cur_tcid = tcid 25 | tcid += 1 26 | trace_stack.append(tcid) 27 | trace_depth = len(trace_stack) 28 | tpid = trace_stack[-2] if trace_depth >= 2 else None 29 | fn_name = fn.__name__ 30 | pdata = { 31 | 'indent' : "|"*trace_depth, 32 | 'name' : fn_name, 33 | 'args' : prepr.prepr(a), 34 | 'kwargs' : b, 35 | 'depth' : trace_depth, 36 | 'parent_tcid': tpid, 37 | 'tcid' : cur_tcid 38 | } 39 | print pre_tt % pdata 40 | rv = fn(*a,**b) 41 | print post_tt % pdata 42 | trace_stack.pop() 43 | return rv 44 | return tfn 45 | 46 | 47 | def trace_obj_calls(obj, filter_fn=None, show_tids=False): 48 | if type(filter_fn) in(list, tuple): 49 | flist = filter_fn 50 | def ffn(x): 51 | return x in flist 52 | if flist[0] == '!': 53 | flist.pop(0) 54 | filter_fn = (lambda x: not ffn(x)) 55 | else: 56 | filter_fn = ffn 57 | elif not filter_fn: 58 | filter_fn = lambda x:True 59 | 60 | for x in dir(obj): 61 | y = getattr(obj, x) 62 | if x[0] != '_' and callable(y) and filter_fn(x): 63 | setattr(obj, x, trace(y, show_tids)) 64 | -------------------------------------------------------------------------------- /pyphp/pyphp.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | import sys 4 | import parser 5 | import compiler 6 | import executer 7 | import scope 8 | import phparray 9 | 10 | NAME ='pyphp.py' 11 | VERSION='0.1' 12 | 13 | USAGE = """{0} 14 | Php parser, compiler and interpreter, in python 15 | v. {1} 16 | 17 | Usage: 18 | {2} [--parse | --compile | --run] script.php [script-args*] 19 | """.format(NAME, VERSION, sys.argv[0]) 20 | 21 | if __name__ == '__main__': 22 | # print sys.argv 23 | action = 'run' 24 | show_globals = False 25 | phpfile=None 26 | phpargv=[] 27 | if len(sys.argv) > 1: 28 | for arg in sys.argv[1:]: 29 | if phpfile is None and len(arg) > 3 and arg[0:2] == '--': 30 | comps = arg[2:].split('=', 1) 31 | key = comps[0] 32 | val = comps[1] if len(comps) > 1 else 1 33 | if key == 'show_globals' and int(val): 34 | show_globals = True 35 | elif key in ('compile', 'run', 'parse'): 36 | action = key 37 | else: 38 | print "Unknown option %s"%arg 39 | else: 40 | if len(phpargv) == 0: 41 | phpfile = arg 42 | phpargv.append(arg) 43 | 44 | init_scope = { 45 | '$argv' : phparray.PHPArray(*enumerate(phpargv)), 46 | '$argc' : len(phpargv) 47 | } 48 | 49 | if phpfile: 50 | if action == 'run': 51 | php_executer = executer.execute_file(phpfile, init_scope) 52 | if show_globals: 53 | print "[ended]\n-- globals --" 54 | for i in executer.globals.dict: 55 | print "%-14s -> %r"%(i, executer.globals[i]) 56 | elif action == 'compile': 57 | code_tree = compiler.compile_file(phpfile) 58 | print "[ended compilation]\n-- code tree --" 59 | print code_tree.prepr() 60 | elif action == 'parse': 61 | tokens = parser.parse_file(phpfile) 62 | print "[ended parsing]\n-- tokens --" 63 | print '\n'.join([repr(x) for x in tokens]) 64 | 65 | else : 66 | print "# No file to %s!"%action 67 | print USAGE 68 | 69 | -------------------------------------------------------------------------------- /pyphp/phpclass.py: -------------------------------------------------------------------------------- 1 | import varref 2 | from prepr import prepr 3 | import phpfunction 4 | import errors 5 | 6 | class PHPClass: 7 | def __init__(self, name, superclass, body, context, filename=None, line_num=0): 8 | self.name = name 9 | self.superclass = superclass 10 | self.body = {} 11 | self.instance_body = [] 12 | if body: 13 | for member in body: 14 | if isinstance(member, phpfunction.PHPFunction): 15 | if member.is_static(): 16 | self.body[member.name] = member 17 | else: 18 | self.instance_body.append(member); 19 | elif isinstance(member, varref.VarDef): 20 | if member.is_static(): 21 | self.body[member.name] = member.default 22 | else: 23 | self.instance_body.append(member); 24 | else: 25 | raise errors.ExecuteError("Invalid class member %r in definition of class %s"%(member, self.name)) 26 | self.context = context 27 | self.context['self'] = self 28 | self.context['parent'] = superclass 29 | self.filename = filename 30 | self.line_num = line_num 31 | # print self.body, self.context 32 | 33 | def __contains__(self, key): 34 | return key in self.body 35 | 36 | def __getitem__(self, key): 37 | # print "getting %s from %s"%(key, self.name) 38 | if key in self.body: 39 | member = self.body[key] 40 | # print " :: ", prepr(self.body, 7) 41 | # print " => ", prepr(member) 42 | if hasattr(member, 'is_static') and not member.is_static(): 43 | raise errors.ExecuteError("Fetching non-static member %r from %r."%(member, self)) 44 | if hasattr(member, 'bind'): 45 | return member.bind(self.context) 46 | else: 47 | return member 48 | else: 49 | if self.superclass: 50 | return self.superclass[key] 51 | else: 52 | return null 53 | 54 | def __setitem__(self, key, value): 55 | # print self 56 | # print key 57 | # print value 58 | # print "@"*160 59 | # print "#"*160 60 | # print "setting %s in %s to %s"%(key, self.name, value) 61 | # print "@"*160 62 | if key not in self.body: 63 | # print "!!!!!!!!!!!!!!!"*160 64 | raise errors.ExecuteError("Cannot set %s on class %s"%(key, self.name)) 65 | self.body[key] = value 66 | 67 | def __call__(self, *args): 68 | print "Called %s with %r"%(self, args) 69 | print qqwweerr 70 | 71 | def __repr__(self): 72 | return ''%(self.name, ' extends %r'%self.superclass.name if self.superclass else '') -------------------------------------------------------------------------------- /pyphp/phpfunction.py: -------------------------------------------------------------------------------- 1 | import errors 2 | import executer 3 | from phpbuiltins import constants 4 | from scope import scope 5 | 6 | 7 | class PHPFunction(): 8 | def __init__(self, name, modifiers, params, body, context = None, filename = None, line_num = 0): 9 | self.name = name 10 | self.modifiers = modifiers 11 | self.params = params 12 | self.body = body 13 | self.context = context 14 | self.filename = filename 15 | self.line_num = line_num 16 | def __repr__(self): 17 | return ''%(self.name, ', '.join([ 18 | '%s%s%s'%('%s '%x[0] if x[0] else '', x[1], ' = %r'%x[2] if len(x) > 2 else '') 19 | for x in self.params 20 | ]), self.filename, self.line_num) 21 | def __call__(self, *args, **kwargs): 22 | if 'context' in kwargs: 23 | context = kwargs['context'] 24 | else: 25 | context = self.context 26 | 27 | caller_filename = kwargs['filename'] if 'filename' in kwargs else None 28 | caller_line_num = kwargs['line_num'] if 'line_num' in kwargs else None 29 | 30 | 31 | # print "Calling %r with %r"%(self, args) 32 | call_context = scope({ 33 | '%func_args' : args, 34 | '__FUNCTION__' : self.name 35 | }, self.context, name='fncall') 36 | 37 | 38 | executer = call_context['%executer'] 39 | 40 | arglen = len(args) 41 | for i, par in enumerate(self.params): 42 | # print '\n\n==\n', par, '\n==\n' 43 | if i < arglen: 44 | val = args[i] 45 | elif len(par) > 2: 46 | val = par[2] 47 | else: 48 | val = None 49 | executer.report_error( 50 | constants.E_WARNING, 51 | "Missing argument %d for %s()%s defined in %s on line %d"%(i+1, self.name, 52 | ', called in %s on line %d and'%(caller_filename, caller_line_num) if caller_filename is not None and caller_line_num is not None else '', 53 | self.filename, self.line_num) 54 | # "Warning: Missing argument 2 for f(), called in /Users/giovanyvega/langdev/php/test/err_func_missing_arg.php on line 7 and defined in /Users/giovanyvega/langdev/php/test/err_func_missing_arg.php on line 3" 55 | ) 56 | # raise errors.ExecuteError("Missing required argument %d for %r"%(i, self)) 57 | 58 | call_context[par[1]] = val 59 | 60 | if self.name == 'library': 61 | print ('='*20 +'\n')*5 62 | print self.body.prepr() 63 | print ('='*20 +'\n')*5 64 | # print executer 65 | # print self.body 66 | # print call_context 67 | 68 | try: 69 | return executer.visit(self.body, call_context) 70 | except errors.ReturnError, rerr: 71 | return rerr.retval 72 | 73 | # raise errors.ExecuteError("Can't execute yet.") 74 | 75 | def is_static(self): 76 | return 'static' in self.modifiers 77 | 78 | def bind(self, context): 79 | self.context = context 80 | return self -------------------------------------------------------------------------------- /pyphp/phpbuiltins/constants.py: -------------------------------------------------------------------------------- 1 | # literals 2 | true = True 3 | false = False 4 | null = None 5 | 6 | # Error constants 7 | E_ERROR = 0x0001 # (integer) Fatal run-time errors. These indicate errors that can not be recovered from, such as a memory allocation problem. Execution of the script is halted. 8 | E_WARNING = 0x0002 # (integer) Run-time warnings (non-fatal errors). Execution of the script is not halted. 9 | E_PARSE = 0x0004 # (integer) Compile-time parse errors. Parse errors should only be generated by the parser. 10 | E_NOTICE = 0x0008 # (integer) Run-time notices. Indicate that the script encountered something that could indicate an error, but could also happen in the normal course of running a script. 11 | E_CORE_ERROR = 0x0010 # (integer) Fatal errors that occur during PHPs initial startup. This is like an E_ERROR, except it is generated by the core of PHP. 12 | E_CORE_WARNING = 0x0020 # (integer) Warnings (non-fatal errors) that occur during PHPs initial startup. This is like an E_WARNING, except it is generated by the core of PHP. 13 | E_COMPILE_ERROR = 0x0040 # (integer) Fatal compile-time errors. This is like an E_ERROR, except it is generated by the Zend Scripting Engine. 14 | E_COMPILE_WARNING = 0x0080 # (integer) Compile-time warnings (non-fatal errors). This is like an E_WARNING, except it is generated by the Zend Scripting Engine. 15 | E_USER_ERROR = 0x0100 # (integer) User-generated error message. This is like an E_ERROR, except it is generated in PHP code by using the PHP function trigger_error(). 16 | E_USER_WARNING = 0x0200 # (integer) User-generated warning message. This is like an E_WARNING, except it is generated in PHP code by using the PHP function trigger_error(). 17 | E_USER_NOTICE = 0x0400 # (integer) User-generated notice message. This is like an E_NOTICE, except it is generated in PHP code by using the PHP function trigger_error(). 18 | E_STRICT = 0x0800 # (integer) Enable to have PHP suggest changes to your code which will ensure the best interoperability and forward compatibility of your code. Since PHP 5 but not included in E_ALL until PHP 5.4.0 19 | E_RECOVERABLE_ERROR = 0x1000 # (integer) Catchable fatal error. It indicates that a probably dangerous error occurred, but did not leave the Engine in an unstable state. If the error is not caught by a user defined handle (see also set_error_handler()), the application aborts as it was an E_ERROR. Since PHP 5.2.0 20 | E_DEPRECATED = 0x2000 # (integer) Run-time notices. Enable this to receive warnings about code that will not work in future versions. Since PHP 5.3.0 21 | E_USER_DEPRECATED = 0x4000 # (integer) User-generated warning message. This is like an E_DEPRECATED, except it is generated in PHP code by using the PHP function trigger_error(). Since PHP 5.3.0 22 | E_ALL = 0x7fff # (integer) All errors and warnings, as supported, except of level E_STRICT prior to PHP 5.4.0. 23 | -------------------------------------------------------------------------------- /pyphp/phpbuiltins/regex.py: -------------------------------------------------------------------------------- 1 | from builtin import builtin 2 | import constants 3 | import pyphp.phparray as phparray 4 | #import re 5 | 6 | WARN_NOT_PCRE = False 7 | USING_PCRE = False 8 | 9 | 10 | 11 | if USING_PCRE: 12 | try: 13 | import pcre as re 14 | USING_PCRE = True 15 | except ImportError, ie: 16 | USING_PCRE = False 17 | 18 | 19 | 20 | 21 | if USING_PCRE: 22 | def parse_regex(pat): 23 | return pat, 0 24 | else: 25 | import re 26 | WARNING_GIVEN = not WARN_NOT_PCRE 27 | def warn(executer): 28 | global WARNING_GIVEN 29 | if not WARNING_GIVEN: 30 | executer.report_error(constants.E_CORE_WARNING, "Python's re module is not completely compatible to PCRE.") 31 | WARNING_GIVEN=True 32 | 33 | def parse_regex(pat): 34 | delimiter = pat[0] 35 | i, e = 1, len(pat) 36 | while i < e: 37 | c = pat[i] 38 | if c == '\\' and i+1= 4: 92 | executer.set_val(args[4], subs_made) 93 | return subject 94 | raise StandardError() 95 | -------------------------------------------------------------------------------- /pyphp/phparray.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class emulating a php array. 3 | php arrays have semantic properties that differ from normal python objects. 4 | PHP arrays are: 5 | associative (keys can be numbers or strings) 6 | elements are iterated on the insertion order of their keys 7 | numeric keys are casted to integers 8 | appended items are assigned a numeric key corresponding to the maximum of the numeric keys plus 1 9 | string keys of numeric integers are casted to integers 10 | an internal iterator index is associated to the array and manipulated through php built-in functions 11 | they are passed by reference, with a copy on write policy 12 | 13 | """ 14 | 15 | class PHPArray: 16 | "Class that emulates a php array" 17 | def __init__(self, *args): 18 | self.keys = [] 19 | self.dict = {} 20 | self.max_i = 0 21 | 22 | for k, v in args: 23 | self[k] = v 24 | 25 | def values(self): 26 | return [self.dict[k] for k in self.keys] 27 | 28 | def keys(self): 29 | return self.keys[:] 30 | 31 | @staticmethod 32 | def _is_numerable(k): 33 | if type(k) in (float, int): 34 | return True 35 | else: 36 | k = str(k) 37 | if k[0] in '0123456789-': 38 | for c in k[1:]: 39 | if c not in '0123456789': 40 | return False 41 | return True 42 | else: 43 | return False 44 | 45 | def _coerce_key_(self, k, for_insertion = True): 46 | if k is None: 47 | if for_insertion: 48 | k = self.max_i 49 | self.max_i += 1 50 | elif self._is_numerable(k): 51 | k = int(k) 52 | if for_insertion: 53 | self.max_i = k 54 | return k 55 | 56 | def __setitem__(self, k, v): 57 | k = self._coerce_key_(k, True) 58 | if not k in self.dict: 59 | self.keys.append(k) 60 | self.dict[k] = v 61 | 62 | def __getitem__(self, k): 63 | return self.dict[k] 64 | 65 | def __contains__(self, k): 66 | return k in self.dict 67 | 68 | def __len__(self): 69 | return len(self.keys) 70 | 71 | def __iter__(self): 72 | return iter(self.keys) 73 | 74 | def __add__(self, other): 75 | return PHPArray(*[(k, arr.dict[k]) for itsme, arr in [(1, self), (0, other)] for k in arr.keys if itsme or k not in self.dict]) 76 | 77 | 78 | def push(self, *items): 79 | for item in items: 80 | key = self.max_i 81 | self.max_i += 1 82 | self.keys.append(key) 83 | self.dict[key] = item 84 | 85 | def pop(self): 86 | if len(self.keys) == 0: 87 | return None 88 | last_key = self.keys.pop(-1) 89 | self.dict.pop(last_key) 90 | self.max_i = max([0] + [x for x in self.keys if type(x) == int]) 91 | 92 | def _gen_newkeys(self, start_i=0): 93 | i, kl, d = start_i, self.keys, self.dict 94 | for k in kl: 95 | v = d[k] 96 | if self._is_numerable(k): 97 | yield i, v 98 | i += 1 99 | else: 100 | yield k, v 101 | self.max_i = i 102 | 103 | def shift(self): 104 | if len(self.keys) == 0: 105 | return None 106 | 107 | self.keys.pop(0) 108 | 109 | new_kv_pair = [kv for kv in self._gen_newkeys()] 110 | self.dict = dict(new_kv_pair) 111 | self.keys = [k for k,v in new_kv_pair] 112 | 113 | def unshift(self, *items): 114 | new_kv_pair = [kv for kv in self._gen_newkeys(len(items))] 115 | self.dict = dict(new_kv_pair) 116 | for i, item in enumerate(items): 117 | self.dict[i] = item 118 | self.keys = range(len(items)) + [k for k,v in new_kv_pair] 119 | 120 | def __repr__(self): 121 | return 'phparray(%s)'%(', '.join([ 122 | '%r=>%r'%(k, self.dict[k]) 123 | for k in self.keys 124 | ])) -------------------------------------------------------------------------------- /pyphp/phpbuiltins/lang.py: -------------------------------------------------------------------------------- 1 | """ 2 | Language-related builtin functions. 3 | isset 4 | defined 5 | require(s) 6 | error_reporting 7 | """ 8 | 9 | from builtin import builtin 10 | import pyphp.errors 11 | import sys 12 | from pyphp.prepr import prepr 13 | 14 | @builtin 15 | def isset(args, executer, local_dict): 16 | er = executer.ERROR_REPORTING 17 | executer.ERROR_REPORTING=0 18 | try: 19 | for a in args: 20 | if not executer.has_val(a): 21 | val_set = False 22 | break; 23 | else: 24 | val_set = True 25 | except: 26 | val_set = False 27 | 28 | # print "isset >> ", val_set 29 | executer.ERROR_REPORTING=er 30 | return val_set 31 | 32 | @builtin 33 | def empty(args, executer, local_dict): 34 | var = executer.get_val(args[0]) 35 | if not var or var in (0, "", "0"): 36 | return False 37 | try: 38 | if len(var) == 0: 39 | return False 40 | except: 41 | pass 42 | return True 43 | 44 | @builtin 45 | def die(args, executer, local): 46 | var = executer.get_val(args[0]) if len(args) > 0 else None 47 | if type(var) is str: 48 | executer.pipe_stdout.write(var) 49 | var = 0 50 | 51 | raise errors.StopExecutionError(var) 52 | 53 | 54 | @builtin 55 | def defined(args, executer, local_dict): 56 | import pyphp.phpfunction as phpfunction 57 | name = executer.get_val(args[0]) 58 | if name[0] != '$' and name in local_dict: 59 | val = local_dict[name] 60 | if not val or not isinstance(val, phpfunction.PHPFunction): 61 | return True 62 | return False 63 | 64 | def include_impl(args, executer, local_dict, require=False, once=False): 65 | # print "include impl : args:%r, executer:%r, local_dict:%r, require:%r, once:%r"%(args, executer, local_dict, require, once) 66 | lf_key='~!loaded_files' 67 | path = executer.get_val(args[0]) 68 | if lf_key not in executer.globals: 69 | # print " --> Creating executer.globals[%r]"%lf_key 70 | executer.globals[lf_key] = {} 71 | print "\n\n%s\n"%("Including file %s"%path) 72 | if once and path in executer.globals[lf_key]: 73 | # print " -once-> file is already included" 74 | pass 75 | else: 76 | import os.path 77 | if os.path.exists(path): 78 | # print " --> including file %r"%path 79 | import pyphp.compiler as compiler 80 | executer.globals[lf_key][path] = True 81 | executer.visit(compiler.compile_file(path), local_dict) 82 | elif require: 83 | raise errors.ExecuteError("required file %s not found!"%path) 84 | 85 | 86 | @builtin 87 | def require_once(args, executer, local_dict): 88 | include_impl(args, executer, local_dict, True, True) 89 | @builtin 90 | def require(args, executer, local_dict): 91 | include_impl(args, executer, local_dict, True, False) 92 | @builtin 93 | def include_once(args, executer, local_dict): 94 | include_impl(args, executer, local_dict, False, True) 95 | @builtin 96 | def include(args, executer, local_dict): 97 | include_impl(args, executer, local_dict, False, False) 98 | 99 | def var_dump_impl(executer, x, depth=0): 100 | import pyphp.phparray as phparray 101 | dstr = " "*depth 102 | t = type(x) 103 | if x is None: 104 | executer.pipe_stdout.write("%sNULL\n"%dstr) 105 | elif x is True: 106 | executer.pipe_stdout.write("%sbool(true)\n"%dstr) 107 | elif x is False: 108 | executer.pipe_stdout.write("%sbool(false)\n"%dstr) 109 | elif t in (int, long): 110 | executer.pipe_stdout.write("%sint(%d)\n"%(dstr, x)) 111 | elif t == str: 112 | executer.pipe_stdout.write("%sstring(%d) %r\n"%(dstr, len(x), x)) 113 | elif isinstance(x, phparray.PHPArray): 114 | executer.pipe_stdout.write("%sarray(%d) {\n"%(dstr, len(x))) 115 | for y in x: 116 | executer.pipe_stdout.write("%s [%r]=>\n"%(dstr, y)) 117 | var_dump_impl(executer, x[y], depth + 1) 118 | executer.pipe_stdout.write("%s}\n"%dstr) 119 | 120 | 121 | @builtin 122 | def var_dump(args, executer, local): 123 | args = [executer.get_val(a) for a in args] 124 | for arg in args: 125 | var_dump_impl(executer, arg) 126 | 127 | @builtin 128 | def ini_set(args, executer, local): 129 | pass 130 | 131 | @builtin 132 | def error_reporting(args, executer, local): 133 | if len(args) > 0: 134 | executer.ERROR_REPORTING = executer.get_val(args[0]) 135 | return executer.ERROR_REPORTING 136 | -------------------------------------------------------------------------------- /pyphp/test/test_operators.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyphp/varref.py: -------------------------------------------------------------------------------- 1 | import phpbuiltins.constants 2 | 3 | VERBOSITY_NONE = 0 4 | VERBOSITY_SHOW_DEBUG = 1 5 | 6 | VERBOSE = VERBOSITY_NONE 7 | 8 | 9 | 10 | class VarDef(object): 11 | def __init__(self, name, modifiers, default): 12 | self.name = name 13 | self.modifiers = modifiers 14 | self.default = default 15 | def __repr__(self): 16 | return "%s%s%s"%('%s '%(' '.join(self.modifiers) if self.modifiers else ''), self.name, ' = %s'%self.default if self.default else '') 17 | def is_static(self): 18 | return 'static' in self.modifiers or 'const' in self.modifiers 19 | 20 | class VarRef(object): 21 | def __init__(self, name, executer, context, particle=''): 22 | self.name = name 23 | self.particle = particle 24 | self.executer = executer 25 | self.context = context 26 | 27 | 28 | def get_context_obj(self, auto_create=False): 29 | if VERBOSE >= VERBOSITY_SHOW_DEBUG: 30 | print "Getting context of %r"%self 31 | if isinstance(self.context, VarRef): 32 | # print " -> context is chained to %r"%self.context 33 | ctx_name, ctx = self.context.get_context_obj() 34 | if self.context.name in ctx: 35 | ctx = ctx[self.context.name] 36 | elif auto_create: 37 | # print "Context does not exists in parent's context %r. Auto-creating %r"%(ctx, self.context.name) 38 | ctx[self.context.name] = {} 39 | # print " --> %r"%ctx 40 | ctx = ctx[self.context.name] 41 | else: 42 | raise IndexError("Cannot get context of %r."%self) 43 | return self.context.name, ctx 44 | elif isinstance(self.context, VarDef): 45 | return self.context.name, self.context.default 46 | else: 47 | # print " -> explicitly stated as %r"%self.context 48 | return "", self.context 49 | 50 | def get(self): 51 | context_name, context = self.get_context_obj(True) 52 | # print "varref context %r[%r]"%(context, self.name) 53 | if self.name not in context: 54 | self.executer.report_error( phpbuiltins.constants.E_WARNING, "%s doesn't exist"%(self.qualified_name()) ) 55 | return None 56 | value = context[self.name] 57 | if isinstance(value, VarDef): 58 | value = value.default 59 | return value; 60 | 61 | def set(self, value): 62 | context_name, context = self.get_context_obj(True) 63 | # print "%r[%r] = %r"%(context, self.name, value) 64 | context[self.name] = value 65 | 66 | def isset(self): 67 | import prepr 68 | try: 69 | #print "Fetching varref context..." 70 | context_name, context = self.get_context_obj(False) 71 | #print " => ", context 72 | except IndexError, ie: 73 | #print " => context not found!!!" 74 | return False 75 | #print " => var %r in context ::"%self.name, self.name in context 76 | if self.name not in context: 77 | #print " => var %r not in context!!"%self.name 78 | return False 79 | return True; 80 | 81 | 82 | def __getitem__(self, index): 83 | return VarRef(index, self.executer, self, '[') 84 | 85 | def getitem(self, index, particle='['): 86 | return VarRef(index, self.executer, self, particle) 87 | 88 | 89 | def __repr__(self): 90 | return "%s(%r, %r)"%(self.__class__.__name__, self.name, self.context) 91 | 92 | def prepr(self, *args): 93 | return self.qualified_name() 94 | 95 | 96 | def qualified_name(self): 97 | import phpclass 98 | name_parts = [] 99 | if self.particle == '[': 100 | name_parts.append(']') 101 | if isinstance(self.name, VarRef): 102 | if self.particle != '[': 103 | name_parts.append('}') 104 | name_parts.append(self.name.qualified_name()) 105 | if self.particle != '[': 106 | name_parts.append('{') 107 | else: 108 | name_parts.append(self.name) 109 | name_parts.append(self.particle) 110 | ctx = self.context 111 | while ctx: 112 | if isinstance(ctx, VarRef): 113 | if ctx.particle == '[': 114 | name_parts.append(']') 115 | if isinstance(ctx.name, VarRef): 116 | if ctx.particle != '[': 117 | name_parts.append('}') 118 | name_parts.append(ctx.name.qualified_name()) 119 | if ctx.particle != '[': 120 | name_parts.append('{') 121 | else: 122 | name_parts.append(ctx.name) 123 | name_parts.append(ctx.particle) 124 | ctx = ctx.context 125 | elif isinstance(ctx, phpclass.PHPClass): 126 | name_parts.append(ctx.name) 127 | ctx = None 128 | else: 129 | ctx = None 130 | # print '\n'*5, '\n\n'.join(['%d=>%r'%(i,x) for i,x in enumerate(name_parts)]), '\n'*5 131 | return ''.join(list(reversed(name_parts))) 132 | -------------------------------------------------------------------------------- /pyphp/php-grammar.txt: -------------------------------------------------------------------------------- 1 | php_file => stmt* 2 | stmt => define_stmt | direct_output | expression_stmt | if_stmt | stmt_block | classdef_stmt | return_stmt | while_stmt | foreach_stmt | echo_stmt | funcdef_stmt | switch_stmt | throw_stmt 3 | stmt_block => '{' [stmt]* '}' 4 | classdef_stmt => CLASS identifier [EXTENDS identifier] '{' classdef_block '}' 5 | classdef_block => [const_vardef_stmt | methoddef_stmt | constdef_stmt]* 6 | 7 | echo_stmt => ECHO expression_list ';' 8 | 9 | while_stmt => WHILE '(' expression_list ')' stmt_block 10 | 11 | for_stmt => FOR '(' expression_list ';' expression_list ';' expression_list ')' stmt_block 12 | foreach_stmt => FOREACH '(' followed_primitive AS followed_primitive [ '=>' followed_primitive ] ')' stmt_block 13 | 14 | throw_stmt => THROW expression_list ';' 15 | try_stmt => TRY stmt_block ([catch_block]+ [finally_block] | finally_block) 16 | catch_block => CATCH '(' parameter ')' stmt_block 17 | finally_block => FINALLY stmt_block 18 | 19 | switch_stmt => SWITCH '(' expression_list ')' '{' switch_case+ [switch_default]'}' 20 | switch_case => CASE expression ':' [stmt]* 21 | switch_default => DEFAULT ':' [stmt]* 22 | 23 | funcdef_stmt => 'function' IDENTIFIER parameter_list stmt_block 24 | 25 | methoddef_stmt => [vardef_decorator]* 'function' IDENTIFIER parameter_list stmt_block 26 | 27 | vardef_stmt => [vardef_decorator]+ VARIABLE ['=' or_expression] ';' 28 | const_vardef_stmt => const IDENTIFIER ['=' or_expression] ';' 29 | 30 | vardef_decorator => 'var' | 'static' | 'private' | 'public' 31 | 32 | parameter_list => '(' [parameter [ ',' parameter ]] ')' 33 | parameter => [IDENTIFIER] [&] VARIABLE ['=' or_expression] 34 | 35 | direct_output => DIRECT_OUTPUT 36 | define_stmt => DEFINE argument_list ';' 37 | return_stmt => RETURN expression ';' 38 | argument_list => '(' argument [ ',' argument ]* ')' 39 | argument => expression 40 | expression_stmt => expression ';' 41 | expression_list => expression [ ',' expression ]* 42 | 43 | expression => print_expression 44 | print_expression => [PRINT] or_expression 45 | or_expression => xor_expression [OR xor_expression]* 46 | xor_expression => and_expression [XOR and_expression]* 47 | and_expression => assignment_expression [AND assignment_expression]* 48 | assignment_expression => conditional_expression [assignment_op conditional_expression] 49 | conditional_expression => sym_or_expression ['?' sym_or_expression ':' sym_or_expression ] 50 | sym_or_expression => sym_and_expression ['||' sym_and_expression]* 51 | sym_and_expression => bit_or_expression ['&&' bit_or_expression ]* 52 | bit_or_expression => bit_xor_expression ['|' bit_xor_expression]* 53 | bit_xor_expression => bit_and_expression ['^' bit_and_expression]* 54 | bit_and_expression => eq_comp_expression ['&' eq_comp_expression]* 55 | eq_comp_expression => order_comp_expression [eq_comp_op order_comp_expression]* 56 | order_comp_expression => bitshift_expression [order_comp_op bitshift_expression]* 57 | bitshift_expression => add_expression [bitshift_op add_expression]* 58 | add_expression => term [add_op term]* 59 | term => negated_typecheck [mult_op negated_typecheck]* 60 | negated_typecheck => ['!'] typecheck 61 | typecheck => factor ['instanceof' typedef] 62 | factor => [unary]* followed_primitive 63 | 64 | 65 | followed_primitive => primitive [follower]* 66 | primitive => STRING | NUMBER | IDENTIFIER | ['&'] VARIABLE | '(' expression ')' | NEW typedef argument_list | array_literal 67 | primitive_follower => static_member_access | member_access | fncall | array_indexing | unary_follower 68 | unary_follower => '++' | '--' 69 | array_literal => ARRAY '(' [array_element [',' array_element]* [',']] ')' 70 | array_element => expression '=>' expression 71 | typedef => member_expression [static_member_access]* 72 | static_member_access => '::' member_expression 73 | member_access => '->' member_expression 74 | member_expression => IDENTIFIER | VARIABLE | '{' expression '}' 75 | fncall => argument_list 76 | array_indexing => '[' expression ']' 77 | if_stmt => IF '(' expression ')' stmt [ELSE stmt] 78 | 79 | or_operator => 'or' | '||' 80 | xor_operator => 'xor' 81 | and_operator => 'and' | '&&' 82 | assignment_operator => '=' | '+=' | '-=' | '*=' | '/=' | '.=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' 83 | comp_operator => '=='| '!=' | '===' | '!==' | '<' | '<=' | '>' | '>=' | 'instanceof' 84 | add_operator => '+' | '-' | '.' 85 | term_operator => '*' | '/' | '%' 86 | unary_operator => '!' | '~' | '++' | '--' | '+' | '-' | '@' 87 | 88 | 89 | 1+1*5 == 3+5 90 | 91 | AssociativityOperators 92 | 93 | left , 94 | left or 95 | left xor 96 | left and 97 | right print 98 | right = += -= *= /= .= %= &= |= ^= <<= >>= 99 | left ? : 100 | left || 101 | left && 102 | left | 103 | left ^ 104 | left & 105 | non-associative == != === !== 106 | non-associative < <= > >= 107 | left << >> 108 | left + - . 109 | left * / % 110 | right ! ~ ++ -- (int) (float) (string) (array) (object) @ 111 | 112 | 113 | right [ 114 | 115 | 116 | new_expression => 'new' quialified_identifier [argument_list] 117 | quialified_identifier => identifier ['::' identifier]* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pyphp 2 | ===== 3 | 4 | Php parser, compiler and interpreter for python 5 | 6 | 7 | Usage 8 | ---- 9 | 10 | ###Executing php code 11 | You can execute php code using the `pyphp.executer` module. The `execute_file` reads a php file and executes it, while `execute_php` executes given php code. 12 | 13 | 14 | - To run a php script: 15 | ```python 16 | import pyphp.executer 17 | pyphp.executer.execute_file('my_php_code.php') 18 | ``` 19 | 20 | my_php_code.php contents: 21 | ```php 22 | Hello 23 | ``` 24 | 25 | - To run php code 26 | ```python 27 | import pyphp.executer 28 | 29 | phpcode = r'Hello '; 30 | 31 | pyphp.executer.execute_php(phpcode) 32 | ``` 33 | output: 34 | ``` 35 | Hello World. 36 | ``` 37 | - You can redirect the script's output to any file-like object, such as `StringIO` 38 | ```python 39 | import pyphp.executer 40 | import StringIO 41 | 42 | stdout = StringIO.StringIO() 43 | phpcode = r'Hello '; 44 | 45 | pyphp.executer.execute_php(phpcode, stdout=stdout) 46 | 47 | print repr(`stdout.getvalue()) 48 | ``` 49 | output: 50 | ``` 51 | 'Hello World.\n' 52 | ``` 53 | 54 | - You can run the php file directly by using the module as a script: 55 | ```sh 56 | python pyphph/executer.py phpscript.php arg1 arg2 ... 57 | ``` 58 | 59 | 60 | 61 | 62 | 63 | ###Using the parser 64 | The parser breaks up the php code into a list of tokens. 65 | 66 | - This will parse a php file in the same directory and return it as a list of tokens. 67 | ``` python 68 | import pyphp.parser 69 | token_list = pyphp.parser.parse_file("my_php_code.php") 70 | ``` 71 | 72 | 73 | - You can also parse the php code directly: 74 | ``` python 75 | import pyphp.parser 76 | 77 | phpcode = r'Hello '; 78 | 79 | token_list = pyphp.parser.parse_php(phpcode) 80 | ``` 81 | 82 | - `token_list` would be a list like this: 83 | ``` 84 | [Token['DIRECT_OUTPUT', 'Hello '], 85 | Token['WS'], 86 | Token['IDENTIFIER', 'echo'], 87 | Token['WS'], 88 | Token['STRING', 'World\n'], 89 | Token[';']] 90 | ``` 91 | this token list can then be fed to the compiler to generate a syntax tree. 92 | 93 | ###Using the compiler 94 | The compiler takes a list of tokens a creates a syntax tree representing the code 95 | 96 | - This will compile a php file in the same directory and return it as a syntax tree. 97 | ``` python 98 | import pyphp.compiler 99 | syntax_tree = pyphp.compiler.compile_file("my_php_code.php") 100 | ``` 101 | 102 | - You can also compile the php code : 103 | ``` python 104 | import pyphp.compiler 105 | 106 | phpcode = r'Hello '; 107 | 108 | syntax_tree = pyphp.compiler.compile_php(phpcode) 109 | ``` 110 | 111 | - Or compile the list of tokens directly: 112 | ``` python 113 | import pyphp.parser 114 | import pyphp.compiler 115 | 116 | phpcode = r'Hello '; 117 | 118 | token_list = pyphp.parser.parse_php(phpcode) 119 | 120 | syntax_tree = pyphp.compiler.compile_php(token_list) 121 | ``` 122 | 123 | - `syntax_tree` would be a `TreeNode` object like this: 124 | ``` 125 | TreeNode[ 126 | TreeNode['Hello '], 127 | TreeNode[ 128 | TreeNode[ 129 | Token['STRING', 'World\n'] 130 | ] 131 | ] 132 | ] 133 | ``` 134 | This `TreeNode` object can then be fed to `execute_php` to run the code. 135 | 136 | 137 | ###PHP Library support 138 | Currently, only a minimal set of PHP's language features and built in library is supported, but the plan is to support as most of it as possible. 139 | 140 | Language features not supported yet are namespaces, anonymous functions, lots of other stuff. 141 | 142 | Below is the official subset of PHP's built in features currently supported. 143 | 144 | 145 | | name | description | supported | 146 | |-----|----|-----| 147 | | `true` | true literal | yes | 148 | | `false` | false literal | yes | 149 | | `null` | null literal | yes | 150 | | `E_ERROR` | error constant | yes | 151 | | `E_WARNING` | error constant | yes | 152 | | `E_PARSE` | error constant | yes | 153 | | `E_NOTICE` | error constant | yes | 154 | | `E_CORE_ERROR` | error constant | yes | 155 | | `E_CORE_WARNING` | error constant | yes | 156 | | `E_COMPILE_ERROR` | error constant | yes | 157 | | `E_COMPILE_WARNING` | error constant | yes | 158 | | `E_USER_ERROR` | error constant | yes | 159 | | `E_USER_WARNING` | error constant | yes | 160 | | `E_USER_NOTICE` | error constant | yes | 161 | | `E_STRICT` | error constant | yes | 162 | | `E_RECOVERABLE_ERROR` | error constant | yes | 163 | | `E_DEPRECATED` | error constant | yes | 164 | | `E_USER_DEPRECATED` | error constant | yes | 165 | | `E_ALL` | error constant | yes | 166 | | `isset` | Checks if a variable is set | yes | 167 | | `empty` | checks if a variable is empty | yes | 168 | | `die` | outputs to stdout and halts execution | yes | 169 | | `defined` | returns whether a constant is defined or not | yes | 170 | | `require_once` | requires a php script once | yes | 171 | | `require` | requires a php script | yes | 172 | | `include_once` | includes a php script once | yes | 173 | | `include` | includes a php script | yes | 174 | | `var_dump` | dumps a variable to stdout | yes | 175 | | `ini_set` | modifies settings derived from php.ini | no-op | 176 | | `error_reporting` | modifies error reporting level| yes | 177 | |`date_default_timezone_set`|sets default datetime zone| no-op| 178 | -------------------------------------------------------------------------------- /pyphp/parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | # Verbosity levels for this module 4 | VERBOSITY_NONE = 0 5 | VERBOSITY_LINENUMS = 1 6 | VERBOSITY_UNPARSEABLE_CHARS = 2 7 | # current verbosity level 8 | VERBOSE = VERBOSITY_NONE 9 | 10 | # Regular expressions for the parsed tokens 11 | RE_line_comment = re.compile(r'(#|//)[^\n]*($|\n)') 12 | #RE_cpp_comment = re.compile(r'/\*([^*]|\*+[^/])*\*/') 13 | RE_cpp_comment = re.compile(r'/\*([^*]|\*)*?\*/') 14 | RE_whitespace = re.compile(r'[\s\n]*') 15 | RE_until_php_tag = re.compile(r'(([^<]|<[^?]|<\?[^p]|<\?p[^h]|<\?ph[^p])*)<\?php') 16 | RE_php_end_tag = re.compile(r'\?>') 17 | RE_float = re.compile(r'(\d*\.\d+|\d+\.)([eE][+-]?\d+)?') 18 | RE_hexnumber = re.compile(r'0x[a-fA-F\d]+') 19 | RE_number = re.compile(r'\d+') 20 | # RE_identifier = re.compile(r'\w[\w\d_]+') 21 | RE_identifier = re.compile(r'[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*') 22 | # RE_variable = re.compile(r'\$\w[\w\d_]+') 23 | RE_variable = re.compile(r'\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*') 24 | RE_sq_string = re.compile(r"'(([^']|\\')*)'") 25 | RE_dq_string = re.compile(r'"(([^"]|\\"|{\s*\$\w[\w\d_]+\["(([^"]|\\")*)"\]\s*})*)"') 26 | RE_misc = re.compile(r'([,();[\]{}])') 27 | #RE_op_1 = re.compile('\|\||\|=|\||^=|\?|>>=|>>|>=|>|=>|===|==|=|<>|<=|<<=|<<|<|::|:|/=|/|.=|.|-=|-|\+=|\+|\*=|\*|&=|&|&&|%=|%|!==|!=|!') 28 | RE_op_1 = re.compile('!==|===|instanceof|&&|\|\|') 29 | RE_op_2 = re.compile('\.=|!=|==|<=|>=|->|=>|&|\||\^') 30 | RE_op_3 = re.compile('!|=|<|>|~|^|::|:|\?|--|\.|\+\+|-|\+|\*|/') 31 | 32 | STR_ESCAPE = { 33 | 'n' : '\x0a' , 'r' : '\x0d' , 't' : '\x09' , 'v' : '\x0b' , 34 | 'e' : '\x1b' , 'f' : '\x0c' , '\\': '\\' , '$' : '$' , '"' : '"' 35 | } 36 | 37 | RE_start_whitespace = re.compile(r'[\s\n]') 38 | RE_start_comment = re.compile(r'#|\/(\/|\*)') 39 | RE_start_variable = re.compile(r'\$[a-zA-Z_\x7f-\xff]') 40 | RE_start_identifier = re.compile(r'[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*') 41 | RE_start_number = re.compile(r'\d|\.\d') 42 | RE_start_php_end = re.compile(r'\?>') 43 | RE_start_sq_string = re.compile(r"'") 44 | RE_start_dq_string = re.compile(r'"') 45 | RE_start_op = re.compile(r'!==|===|instanceof|&&|-|\*|/|\+\+|--|\+|\|\||\.=|\.|!=|==|<=|>=|->|=>|&|\||\^|!|=|<|>|~|^|::|:|\?') 46 | RE_start_misc = RE_misc 47 | 48 | RE_arrow_op = re.compile(r'->') 49 | 50 | # Tokenized operators 51 | TOKEN_OR_OP = ('or', '||') 52 | TOKEN_XOR_OP = ('xor',) 53 | TOKEN_AND_OP = ('and', '&&') 54 | TOKEN_ASSIGNMENT_OP = ('=', '+=', '-=', '*=', '/=', '.=', '%=', '&=', '|=', '^=', '<<=', '>>=', '.=') 55 | TOKEN_BIT1_OP = ('|', '&') 56 | TOKEN_COMP_OP = ('==', '!=', '===', '!==', '<', '<=', '>', '>=', 'instanceof') 57 | TOKEN_ADD_OP = ('+', '-' , '.') 58 | TOKEN_TERM_OP = ('*', '/' , '%') 59 | TOKEN_UNARY_OP = ('!', '~' , '++', '--', '+', '-', '@') 60 | 61 | # Token Names 62 | TOKEN_SET_OP = ['!==', '===', '!=', '==', '<=', '>=', '->', '=>', '!', '=', '<', '>', '~', '^', '::', ':', '?', 'instanceof'] 63 | TOKEN_SET_MISC = ['[', ',', '.', '(', ')', ';', '[', ']', '{', '}'] 64 | TOKEN_STRING = "STRING" 65 | TOKEN_INTERPOLATED_STRING = "INTSTRING" 66 | TOKEN_NUMBER = "NUMBER" 67 | TOKEN_VARIABLE = "VARIABLE" 68 | TOKEN_IDENTIFIER = "IDENTIFIER" 69 | TOKEN_WS = "WS" 70 | TOKEN_COMMENT = "COMMENT" 71 | TOKEN_DIRECT_OUTPUT = "DIRECT_OUTPUT" 72 | TOKEN_EOF = "EOF" 73 | 74 | class ParseError(StandardError): 75 | def __init__(self, message="", parser=None): 76 | msg=message 77 | if parser: 78 | code, i, filename = parser.code, parser.i, parser.filename 79 | codelen = len(code) 80 | line_starts = [(pos, x) for pos, x in enumerate(code) if x == '\n' and pos < i] 81 | line = len(line_starts) + 1 82 | linestart_idx = 0 if line == 1 else line_starts[-1][0]+1 83 | lineend_idx = i 84 | while code[lineend_idx] != '\n' and lineend_idx < codelen: 85 | lineend_idx += 1 86 | errline = code[linestart_idx:lineend_idx] 87 | arrowline = " "*(i - linestart_idx + 3*len([x for x in errline if x == '\t'])) + '^' 88 | errline = ''.join([" " if x == '\t' else x for x in errline]) 89 | msg = "File \"%s\", line %s : %s\n%s\n%s"%(filename, line, message, errline, arrowline) 90 | super(StandardError, self).__init__(msg) 91 | 92 | class TokenList(list): 93 | pass 94 | class Token(): 95 | def __init__(self, parts, filename, linenum): 96 | self.parts = parts 97 | self.filename = filename 98 | self.line_num = linenum 99 | def __len__(self): 100 | return len(self.parts) 101 | def __getitem__(self, idx): 102 | return self.parts[idx] 103 | def __repr__(self): 104 | return "Token%r%s"%(self.parts, "[file:%r,line:%s]"%(self.filename, self.line_num) if VERBOSE >= VERBOSITY_LINENUMS else '') 105 | 106 | class Parser: 107 | def __init__(self, code, filename=''): 108 | self.code = code 109 | self.i=0 110 | self.filename=filename 111 | self.line_num=1; 112 | self.in_code = False 113 | self.tokens = TokenList() 114 | self.tokens 115 | 116 | def parse(self): 117 | codelen = len(self.code) 118 | php_code = self.code 119 | while self.i < codelen: 120 | if self.in_code: 121 | self.parse_all() 122 | self.parse_phpendtag() 123 | if VERBOSE >= VERBOSITY_UNPARSEABLE_CHARS: 124 | if self.i < len(self.code): 125 | c=php_code[self.i] 126 | print ParseError("", self) 127 | else: 128 | self.read_outcode() 129 | # self.i += 1 130 | return self.tokens 131 | 132 | def parse_phpendtag(self): 133 | if RE_php_end_tag.match(self.code, self.i): 134 | self.i += 2 135 | # print ParseError() 136 | self.in_code = False 137 | 138 | def parse_all(self): 139 | loop=True 140 | codelen = len(self.code) 141 | code = self.code 142 | while loop and self.i < codelen: 143 | old_i = self.i 144 | c = code[self.i] 145 | nc = code[self.i + 1] if self.i + 1 < codelen else None 146 | if RE_start_whitespace.match(code, self.i) is not None: 147 | self.parse_whitespace() 148 | elif RE_start_comment.match(code, self.i): 149 | self.parse_comments() 150 | elif RE_start_php_end.match(code, self.i): 151 | self.parse_phpendtag() 152 | self.read_outcode() 153 | elif RE_start_sq_string.match(code, self.i): 154 | self.parse_string() 155 | elif RE_start_dq_string.match(code, self.i): 156 | self.parse_interp_string() 157 | elif RE_start_variable.match(code, self.i): 158 | self.parse_variable() 159 | elif RE_start_number.match(code, self.i): 160 | self.parse_number() 161 | elif RE_start_op.match(code, self.i): 162 | self.parse_op() 163 | elif RE_start_identifier.match(code, self.i): 164 | self.parse_identifier() 165 | elif RE_start_misc.match(code, self.i): 166 | self.parse_misc() 167 | else: 168 | raise ParseError("Cannot parse.", self) 169 | loop = old_i != self.i 170 | 171 | def parse_op(self): 172 | op_m = RE_op_1.match(self.code, self.i) 173 | if not op_m: 174 | op_m = RE_op_2.match(self.code, self.i) 175 | if not op_m: 176 | op_m = RE_op_3.match(self.code, self.i) 177 | if not op_m: 178 | return 179 | m_text = op_m.group(0) 180 | if len(m_text) > 0: 181 | self.tokens.append(Token([m_text], self.filename, self.line_num)) 182 | self.i += len(m_text) 183 | 184 | def parse_misc(self, tokens = None): 185 | m_m = RE_misc.match(self.code, self.i) 186 | if not m_m: 187 | print ParseError("cant parse misc") 188 | return 189 | m_text = m_m.group(0) 190 | if len(m_text) > 0: 191 | if tokens is None: 192 | tokens = self.tokens 193 | tokens.append(Token([m_text], self.filename, self.line_num)) 194 | self.i += len(m_text) 195 | 196 | def parse_string(self): 197 | code = self.code 198 | if code[self.i] != "'": 199 | raise ParseError("Non-Interpolated string must start with a '", self) 200 | clen = len(code) 201 | self.i += 1 202 | start = self.i 203 | while self.i < clen: 204 | c = code[self.i] 205 | if c == '\\' and self.i + 1 < clen and code[self.i+1] == "'": 206 | self.i+=2 207 | elif c == "'": 208 | break 209 | else: 210 | self.i+=1 211 | else: 212 | raise ParseError("Expected en of string, not end of file.", self) 213 | print ParseError('', self) 214 | print [start, self.i], code[start-1:self.i+1] 215 | s_text = code[start:self.i] 216 | self.tokens.append(Token([TOKEN_STRING, s_text], self.filename, self.line_num)) 217 | self.i += 1 218 | 219 | def parse_interp_string(self, tokens = None): 220 | code = self.code 221 | if code[self.i] != '"': 222 | raise ParseError('Interpolated string must start with a "') 223 | self.i+=1 224 | start=self.i 225 | txt=[] 226 | text_seq=[] 227 | interp=[] 228 | clen = len(self.code) 229 | interp_count=0 230 | 231 | while self.i < clen: 232 | c = self.code[self.i] 233 | if c=='"': 234 | break 235 | elif c == "\\": 236 | c = self.code[self.i + 1] 237 | if c in STR_ESCAPE: 238 | txt.append(STR_ESCAPE[c]) 239 | self.i += 2 240 | else: 241 | m = RE_octal_chr.match(self.code, self.i) 242 | base=8 243 | if not m: 244 | m = RE_hex_chr.match(self.code, self.i) 245 | base=16 246 | if m: 247 | num = m.group(1) 248 | txt.append(chr( int(num, base) )) 249 | self.i += len(m.group(0)) 250 | else: 251 | txt.append("\\" + c) 252 | self.i += 2 253 | elif c == '$': 254 | if not RE_start_variable.match(code, self.i): 255 | txt.append(code[self.i]) 256 | self.i+=1 257 | else: 258 | interp_tokens = self.parse_interpolation(TokenList()) 259 | if interp_tokens is None: 260 | txt.append(code[self.i]) 261 | self.i+=1 262 | else: 263 | ep = self.i 264 | text_seq.append(''.join(txt)) 265 | text_seq.append(interp_tokens) 266 | interp_count += 1 267 | txt = [] 268 | elif c == '{': 269 | sp = self.i 270 | self.i += 1# + len(self.read_whitespace()) 271 | c = code[self.i] 272 | if c == '$': 273 | interp_tokens = self.parse_interpolation(TokenList()) 274 | if interp_tokens is not None: 275 | self.parse_whitespace(interp_tokens) 276 | if code[self.i] != '}': # interpolation must end in a '}' 277 | interp_tokens = None 278 | if interp_tokens is None: 279 | txt.append(code[sp: self.i]) 280 | else: 281 | ep = self.i 282 | text_seq.append(''.join(txt)) 283 | text_seq.append(interp_tokens) 284 | interp_count += 1 285 | txt = [] 286 | else: 287 | txt.append(code[sp: self.i]) 288 | else: 289 | txt.append(c) 290 | self.i += 1 291 | if len(txt) > 0: 292 | text_seq.append(''.join(txt)) 293 | if tokens is None: 294 | tokens = self.tokens 295 | if interp_count == 0: 296 | tokens.append(Token([TOKEN_STRING, ''.join(text_seq)], self.filename, self.line_num)) 297 | else: 298 | tokens.append(Token([TOKEN_INTERPOLATED_STRING, text_seq], self.filename, self.line_num)) 299 | self.i += 1 300 | 301 | def parse_interpolation(self, tokens, allow_identifier=False, allow_number=False): 302 | code = self.code 303 | if RE_start_variable.match(code, self.i): 304 | self.parse_variable(tokens) 305 | elif allow_identifier and RE_start_identifier.match(code, self.i): 306 | self.parse_identifier(tokens) 307 | elif allow_number and RE_start_number.match(code, self.i): 308 | self.parse_number(tokens) 309 | else: 310 | return None # invalid token, but without any errors, since this function is internal 311 | if RE_arrow_op.match(code, self.i): 312 | tokens.append(Token(['->'], self.filename, self.line_num)) 313 | self.i += 2 314 | self.parse_whitespace(tokens) 315 | return self.parse_interpolation(tokens, True, False) 316 | elif code[self.i] == '[': 317 | self.i += 1 318 | self.parse_whitespace(tokens) 319 | interp = self.parse_interpolation(tokens, True, True) 320 | self.parse_whitespace(tokens) 321 | if code[self.i] == ']': 322 | return interp 323 | else: 324 | return None 325 | else: 326 | return tokens 327 | 328 | def parse_number(self, tokens=None): 329 | var_m = RE_float.match(self.code, self.i) 330 | num_t = 1 331 | if not var_m: 332 | var_m = RE_hexnumber.match(self.code, self.i) 333 | num_t = 2 334 | if not var_m: 335 | var_m = RE_number.match(self.code, self.i) 336 | num_t = 3 337 | if not var_m: 338 | return 339 | var_text = var_m.group(0) 340 | if len(var_text) > 0: 341 | num = float(var_text) if num_t == 1 else int(var_text, 16 if num_t == 2 else (8 if var_text[0] == '0' else 10)) 342 | if tokens is None: 343 | tokens = self.tokens 344 | tokens.append(Token([TOKEN_NUMBER, num], self.filename, self.line_num)) 345 | self.i += len(var_text) 346 | 347 | def parse_variable(self, tokens=None): 348 | var_text = self.read_variable() 349 | if not var_text: 350 | return 351 | if len(var_text) > 0: 352 | if tokens is None: 353 | tokens = self.tokens 354 | tokens.append(Token([TOKEN_VARIABLE, var_text], self.filename, self.line_num)) 355 | self.i += len(var_text) 356 | 357 | def read_variable(self): 358 | var_m = RE_variable.match(self.code, self.i) 359 | if not var_m: 360 | return 361 | else: 362 | return var_m.group(0) 363 | 364 | def read_whitespace(self): 365 | return RE_whitespace.match(self.code, self.i).group(0) 366 | 367 | def parse_identifier(self, tokens=None): 368 | id_m = RE_identifier.match(self.code, self.i) 369 | if not id_m: 370 | return 371 | id_text = id_m.group(0) 372 | if len(id_text) > 0: 373 | if tokens is None: 374 | tokens = self.tokens 375 | tokens.append(Token([TOKEN_IDENTIFIER, id_text], self.filename, self.line_num)) 376 | self.i += len(id_text) 377 | 378 | 379 | def parse_whitespace(self, tokens=None): 380 | ws_text = self.read_whitespace() 381 | if tokens is None: 382 | tokens = self.tokens 383 | if len(ws_text) > 0: 384 | tokens.append(Token([TOKEN_WS], self.filename, self.line_num)) 385 | self.i += len(ws_text) 386 | self.line_num += len([x for x in ws_text if x == '\n']) 387 | 388 | def parse_comments(self): 389 | c_m = RE_line_comment.match(self.code, self.i) 390 | if c_m is None: 391 | c_m = RE_cpp_comment.match(self.code, self.i) 392 | if c_m is not None: 393 | c_text = c_m.group(0) 394 | self.tokens.append(Token([TOKEN_COMMENT, c_text], self.filename, self.line_num)) 395 | self.line_num += len([x for x in c_m.group(0) if x == '\n']) 396 | self.i += len(c_text) 397 | 398 | def read_outcode(self): 399 | outcode_m = RE_until_php_tag.match(self.code, self.i) 400 | if outcode_m : 401 | outcode_text = outcode_m.group(1) 402 | if len(outcode_text) > 0: 403 | self.tokens.append(Token([TOKEN_DIRECT_OUTPUT, outcode_text], self.filename, self.line_num)) 404 | self.i += len(outcode_text) + 5 405 | # print ParseError("", self) 406 | self.line_num += len([x for x in outcode_m.group(0) if x == '\n']) 407 | else: 408 | outcode_text = self.code[self.i:] 409 | self.i = len(self.code) 410 | self.tokens.append(Token([TOKEN_DIRECT_OUTPUT, outcode_text], self.filename, self.line_num)) 411 | self.in_code = True 412 | 413 | def parse_file(php_file, state=None): 414 | code=None 415 | from os.path import abspath 416 | abs_php_file = abspath(php_file) 417 | with file(abs_php_file) as finp: 418 | code = finp.read() 419 | P = Parser(code, abs_php_file) 420 | return P.parse() 421 | 422 | def parse_php(php_code, state=None): 423 | P = Parser(php_code) 424 | return P.parse() 425 | 426 | 427 | TEST_CODE = """ 440 | what what!! 441 | """ 467 | 468 | def test(*args, **kw): 469 | if len(args) > 0: 470 | kw['code'] = args[0] 471 | if 'filename' in kw: 472 | filename = kw['filename'] 473 | print "parsing file : %r"%filename 474 | from os.path import abspath 475 | with file(abspath(filename)) as finp: 476 | print finp.read() 477 | print "----" 478 | parsed_code = parse_file(filename) 479 | elif 'code' in kw: 480 | code = kw['code'] 481 | print "parsing php code :\n%s"%code 482 | print "----" 483 | parsed_code = parse_php(code) 484 | print 485 | print "Parsed Code:\n", '\n'.join([`x` for x in parsed_code]) 486 | 487 | 488 | if __name__ == '__main__': 489 | import sys 490 | if len(sys.argv) >= 2: 491 | test(filename=sys.argv[1]) 492 | else: 493 | test(code=TEST_CODE) 494 | -------------------------------------------------------------------------------- /pyphp/executer.py: -------------------------------------------------------------------------------- 1 | import parser 2 | import compiler 3 | import phpbuiltins 4 | import prepr 5 | from phpbuiltins.primitives import primitives 6 | import phpbuiltins.constants as constants 7 | import phpclass 8 | import phpfunction 9 | import phparray 10 | import coerce 11 | from varref import VarRef, VarDef 12 | from errors import ExecuteError, ReturnError, StopExecutionError 13 | import trace 14 | import sys 15 | from scope import scope 16 | 17 | 18 | # Verbosity levels for this module 19 | VERBOSITY_NONE = 0 20 | VERBOSITY_NOTIFY_RUNNING = 1 21 | VERBOSITY_SHOW_DEBUG = 2 22 | VERBOSITY_SHOW_FN_CALLS = 3 23 | VERBOSITY_SHOW_VISITED_NODES = 4 24 | # current verbosity level 25 | VERBOSE = VERBOSITY_NONE 26 | 27 | 28 | 29 | class AbstractPhpExecuter(object): 30 | 31 | def __init__(self, code_tree, initial_scope=None, 32 | stdout=None, stderr=None, stdin=None): 33 | self.code_tree = code_tree 34 | self.filename = code_tree.filename 35 | self.globals = self.make_global_scope(initial_scope) 36 | self.ERROR_REPORTING = constants.E_ALL 37 | self.last_node = None 38 | self.last_scope = None 39 | self.pipe_stdout = stdout if stdout else sys.stdout 40 | self.pipe_stderr = stderr if stderr else sys.stderr 41 | self.pipe_stdin = stdin if stdin else sys.stdin 42 | 43 | if VERBOSE >= VERBOSITY_SHOW_VISITED_NODES: 44 | trace.trace_obj_calls(self, ['!', 'visit', 'get_val', 'report_error'], 'args') 45 | 46 | error_prefixes = dict([(getattr(constants, k), (v, sev)) for v, ks, sev in ( 47 | ('Fatal error' , ['E_CORE_ERROR', 'E_ERROR', 'E_USER_ERROR', 'E_COMPILE_ERROR'], 2), 48 | ('Warning' , ['E_CORE_WARNING', 'E_WARNING', 'E_USER_WARNING', 'E_COMPILE_WARNING'], 0), 49 | ('Parse error' , ['E_PARSE'], 0), 50 | ('Notice' , ['E_NOTICE', 'E_USER_NOTICE'], 0), 51 | ('Strict Standards' , ['E_STRICT'], 0), 52 | ('Catchable fatal error' , ['E_RECOVERABLE_ERROR'], 1), 53 | ('Deprecated' , ['E_DEPRECATED', 'E_USER_DEPRECATED'], 0) 54 | ) for k in ks]) 55 | 56 | def report_error(self, err_type, msg): 57 | prefix, severity = self.error_prefixes.get(err_type, 'Error') 58 | if (err_type & self.ERROR_REPORTING) != 0: 59 | print "\n%s: %s"%(prefix, msg) 60 | if severity > 0: 61 | raise StopExecutionError("\n%s: %s"%(prefix, msg)) 62 | 63 | 64 | def make_global_scope(self, initial_scope=None): 65 | if initial_scope is None: 66 | initial_scope = {} 67 | global_scope = { 68 | '$_SERVER' : phparray.PHPArray( 69 | ('SCRIPT_NAME', self.filename) 70 | ), 71 | '$_GET' : phparray.PHPArray(), 72 | '$_POST' : phparray.PHPArray(), 73 | '$_FILES' : phparray.PHPArray() 74 | } 75 | global_scope.update(initial_scope) 76 | return scope(global_scope, {'%executer':self}, phpbuiltins.builtins, name='global') 77 | 78 | def __call__(self): 79 | return self.execute() 80 | 81 | def execute(self): 82 | if VERBOSE >= VERBOSITY_NOTIFY_RUNNING: 83 | print "Running %r\n\n---\n"%self.code_tree 84 | try: 85 | return self.visit(self.code_tree, self.globals) 86 | except StopExecutionError, e: 87 | if self.last_node: 88 | print self.last_node.prepr() 89 | print self.last_scope.prepr() 90 | except StandardError, e: 91 | if self.last_node: 92 | print "Error ocurred in %s (line %s)"%(self.last_node.filename, self.last_node.line_num) 93 | print self.last_node.prepr() 94 | print self.last_scope.prepr() 95 | raise 96 | 97 | def visit(self, tree_node, local_dict): 98 | last_context = self.last_node, self.last_scope 99 | self.last_node, self.last_scope = tree_node, local_dict 100 | if VERBOSE >= VERBOSITY_SHOW_VISITED_NODES: 101 | print "Visiting %s (line %s) : %s %s"%(tree_node.filename, tree_node.line_num, tree_node.name, prepr.prepr(local_dict)) 102 | fn = getattr(self, 'exec_%s'%tree_node.name, None) 103 | if fn is not None: 104 | retval = fn(tree_node, local_dict) 105 | else: 106 | retval = self.visit_default(tree_node, local_dict) 107 | self.last_node, self.last_scope = last_context 108 | return retval 109 | def visit_default(self, tree_node, local_dict=None): 110 | if VERBOSE >= VERBOSITY_SHOW_DEBUG: 111 | print tree_node.prepr() 112 | raise ExecuteError("Cannot visit node %r, visitor method 'exec_%s' not found.\n"%(tree_node.name, tree_node.name)) 113 | 114 | def has_val(self, var_ref): 115 | if isinstance(var_ref, VarRef): 116 | return var_ref.isset() 117 | else: 118 | return True 119 | 120 | def get_val(self, var_ref): 121 | if isinstance(var_ref, VarRef): 122 | return var_ref.get() 123 | else: 124 | return var_ref 125 | 126 | def set_val(self, var_ref, value): 127 | if isinstance(var_ref, VarRef): 128 | return var_ref.set(value) 129 | else: 130 | raise ExecuteError("Cannot assign %r to %r"%(var_ref, value)) 131 | 132 | class PhpExecuter(AbstractPhpExecuter): 133 | def exec_php_file(self, node, local): 134 | rv = None 135 | for child in node.children: 136 | rv = self.visit(child, local) 137 | return rv 138 | 139 | def exec_stmt_block(self, node, local): 140 | rv = None 141 | for child in node.children: 142 | rv = self.visit(child, local) 143 | return rv 144 | 145 | def exec_echo(self, node, local): 146 | echo_args = [self.get_val(self.visit(subnode, local)) for subnode in node.children] 147 | self.pipe_stdout.write(''.join(echo_args)) 148 | return 1 149 | 150 | exec_print_expression = exec_echo 151 | 152 | def exec_define(self, node, local): 153 | args = [self.visit(i, local) for i in node.children] 154 | self.globals[args[0]] = self.get_val(args[1]) 155 | 156 | def exec_assignment_expression(self, node, local): 157 | child_len = len(node.children) 158 | rhs_value = self.get_val(self.visit(node.children[ child_len-1 ], local)) 159 | lhs_idx = child_len - 2 160 | while lhs_idx >= 0: 161 | op_node = node.children[ lhs_idx ] 162 | assign_op = op_node.name 163 | lhs_var_ref = self.visit(op_node.children[0], local) 164 | self.set_val(lhs_var_ref, rhs_value) 165 | lhs_idx -= 1 166 | return rhs_value 167 | 168 | 169 | def exec_bit_and_expression(self, node, local): 170 | val = -1 171 | for subnode in node.children: 172 | if subnode.name == '&': 173 | subnode = subnode.children[0] 174 | val &= coerce.to_int(self.get_val(self.visit(subnode, local))) 175 | return val 176 | 177 | def exec_bit_or_expression(self, node, local): 178 | val = 0 179 | for subnode in node.children: 180 | val |= int(self.get_val(self.visit(subnode, local))) 181 | return val 182 | 183 | def exec_bit_or_expression(self, node, local): 184 | val = 0 185 | for subnode in node.children: 186 | val ^= int(self.get_val(self.visit(subnode, local))) 187 | return val 188 | 189 | #def exec_bitshift_expression(self, node, local): 190 | # num = int(self.get_val(self.visit(subnode, local))) 191 | # num, bits = [int(self.get_val(self.visit(subnode, local))) for subnode in node.children[:2]] 192 | # return num << 193 | 194 | #def exec_expression_list(self, node, local): 195 | # pass 196 | #def exec_for_stmt(self, node, local): 197 | # pass 198 | #def exec_foreach_stmt(self, node, local): 199 | # pass 200 | def exec_return_stmt(self, node, local): 201 | raise ReturnError(self.visit(node.children[0], local) if len(node.children) > 0 else None) 202 | #def exec_term_expression(self, node, local): 203 | # pass 204 | #def exec_typecheck(self, node, local): 205 | # pass 206 | 207 | def exec_if(self, node, local): 208 | condition = self.visit(node.children[0], local) 209 | if condition: 210 | return self.visit(node.children[1], local) 211 | elif len(node.children) > 2: 212 | return self.visit(node.children[2], local) 213 | 214 | def exec_eq_comp_expression(self, node, local): 215 | val=None 216 | for i, subnode in enumerate(node.children): 217 | if not i : 218 | val = self.get_val(self.visit(subnode, local)) 219 | else : 220 | op = subnode.name 221 | val2 = self.get_val(self.visit(subnode.children[0], local)) 222 | if op == '==': 223 | val = (val2 == val) 224 | elif op in ('!=', '<>'): 225 | val = (val2 != val) 226 | elif op == '===': 227 | val = (val2 is val) 228 | elif op == '!==': 229 | val = not (val2 is val) 230 | return val 231 | 232 | def exec_order_comp_expression(self, node, local): 233 | val=None 234 | for i, subnode in enumerate(node.children): 235 | if not i : 236 | val = self.get_val(self.visit(subnode, local)) 237 | else : 238 | op = subnode.name 239 | val2 = self.get_val(self.visit(subnode.children[0], local)) 240 | if op == '<': 241 | val = (val < val2) 242 | elif op in ('<='): 243 | val = (val <= val2) 244 | elif op == '>': 245 | val = (val > val2) 246 | elif op == '>=': 247 | val = (val >= val2) 248 | return val 249 | 250 | def exec_direct_output(self, node, local): 251 | self.pipe_stdout.write(node.children[0]) 252 | 253 | def exec_primitive(self, node, local): 254 | subnode = node.children[0] 255 | if isinstance(subnode, parser.Token): 256 | token = subnode 257 | if token[0] in (parser.TOKEN_STRING, parser.TOKEN_NUMBER): 258 | return token[1] 259 | elif token[0] == parser.TOKEN_IDENTIFIER: 260 | lcaseid = token[1].lower() 261 | if lcaseid in primitives: 262 | return primitives[lcaseid] 263 | else: 264 | return VarRef(token[1], self, local) 265 | elif token[0] == parser.TOKEN_VARIABLE: 266 | # print "Var Ref %r on %r with %r"%(token[1], self, local.prepr()) 267 | return VarRef(token[1], self, local) 268 | elif token[0] == parser.TOKEN_INTERPOLATED_STRING: 269 | text = ''.join([coerce.to_string(self.get_val(self.visit(x, local)) if isinstance(x, compiler.TreeNode) else x) for x in token[1]]) 270 | return text 271 | elif isinstance(subnode, compiler.TreeNode): 272 | return self.visit(subnode, local) 273 | raise ExecuteError("invalid primitive %r"%subnode) 274 | 275 | def exec_array_literal(self, node, local): 276 | elements = [self.visit(subnode, local) for subnode in node.children] 277 | return phparray.PHPArray(*elements) 278 | 279 | def exec_array_element(self, node, local): 280 | if len(node.children) > 1: 281 | key, value = [self.get_val(self.visit(subnode, local)) for subnode in node.children[:2]] 282 | else: 283 | key, value = None, self.get_val(self.visit(node.children[0], local)) 284 | return key, value 285 | 286 | def exec_member_expression(self, node, local): 287 | subnode = node.children[0] 288 | if isinstance(subnode, parser.Token): 289 | token = subnode 290 | if token[0] == (parser.TOKEN_VARIABLE, parser.TOKEN_IDENTIFIER): 291 | return VarRef(token[1], self, local, '->') 292 | else: 293 | return self.visit(subnode) 294 | 295 | def exec_followed_primitive(self, node, local): 296 | fprimitive = self.visit(node.children[0], local) 297 | 298 | # print factor, node, local 299 | for f_idx in range(1, len(node.children)): 300 | # print fprimitive, node.children[f_idx] 301 | fprimitive = self.apply_follower(fprimitive, node.children[f_idx], local) 302 | 303 | return fprimitive 304 | 305 | def exec_negated(self, node, local): 306 | # print node.prepr() 307 | val = self.get_val(self.visit(node.children[0], local)) 308 | # print val, not val 309 | return not val 310 | 311 | def exec_factor(self, node, local): 312 | unaries = node.children[0] 313 | if unaries.name != "unary_op": 314 | unaries = None 315 | 316 | old_reporting = self.ERROR_REPORTING 317 | if unary_idx is not None and '@' in unaries.children: 318 | self.ERROR_REPORTING = 0 319 | 320 | factor = self.visit(node.children[0 if unaries is None else 1], local) 321 | 322 | # print factor 323 | if unary_idx is not None: 324 | for unary in node.children[0].children: 325 | self.apply_unary(factor, unary) 326 | 327 | self.ERROR_REPORTING = old_reporting 328 | return factor 329 | 330 | def exec_classdef(self, node, local): 331 | print "%"*100 332 | cls_name, scls_name, body = node.children 333 | if scls_name and not scls_name[1] in local: 334 | raise ExecuteError("Undefined superclass %s"%scls_name[1]) 335 | class_context = scope({}, local, name=cls_name[1]) 336 | if body: 337 | body = self.visit(body, class_context) 338 | defined_class = phpclass.PHPClass(cls_name[1], local[scls_name[1]] if scls_name else None, body, class_context, filename=node.filename, line_num=node.line_num) 339 | #print "class %s defined"%defined_class 340 | #print "%"*100 341 | self.globals[defined_class.name] = defined_class 342 | return defined_class 343 | 344 | def exec_classdef_block(self, node, local): 345 | members = [ 346 | self.visit(member, local) for member in node.children 347 | ] 348 | return members 349 | 350 | def exec_vardef_stmt(self, node, local): 351 | if len(node.children) == 2: 352 | modifiers, name = node.children 353 | initval = None 354 | else: 355 | modifiers, name, initval = node.children 356 | if initval: 357 | initval = self.get_val(self.visit(initval, local)) 358 | # print name 359 | return VarDef(name[1], modifiers, initval) 360 | 361 | def exec_conditional_expression(self, node, local): 362 | condition, true_block, false_block = node.children[:3] 363 | if self.get_val(self.visit(condition, local)): 364 | return self.visit(true_block, local) 365 | else: 366 | return self.visit(false_block, local) 367 | 368 | 369 | def exec_const_vardef_stmt(self, node, local): 370 | name = node.children[0][1] 371 | val = self.get_val(self.visit(node.children[1], local)) if len(node.children) > 1 else None 372 | return VarDef(name, ['const'], val) 373 | 374 | def exec_funcdef(self, node, local): 375 | name, params, body = node.children 376 | fn = phpfunction.PHPFunction(name[1], None, self.visit(params, local), body, filename=node.filename, line_num=node.line_num) 377 | fn.context = self.globals 378 | #print "Defined function ::: %r"%fn 379 | self.globals[name[1]] = fn 380 | # print local 381 | return fn 382 | 383 | def exec_methoddef(self, node, local): 384 | modifiers, name, params, body = node.children 385 | return phpfunction.PHPFunction(name[1], modifiers, self.visit(params, local), body, filename=node.filename, line_num=node.line_num) 386 | 387 | def exec_parameter_list(self, node, local): 388 | return [self.visit(subnode, local) for subnode in node.children] 389 | 390 | def exec_parameter(self, node, local): 391 | hint, param, default = node.children 392 | if hint: 393 | hint = hint[1] 394 | has_default = False 395 | if default: 396 | has_default = True 397 | default = self.get_val(self.visit(default, local)) 398 | param = [hint, param[1]] 399 | if has_default: 400 | param.append(default) 401 | return param 402 | 403 | def exec_and_expression(self, node, local): 404 | for subnode in node.children: 405 | if subnode.name in ('&&', 'and'): 406 | subnode = subnode.children[0] 407 | val = self.get_val(self.visit(subnode, local)) 408 | if not val: 409 | return False 410 | return val 411 | 412 | exec_sym_and_expression = exec_and_expression 413 | 414 | def exec_or_expression(self, node, local): 415 | # print node.prepr() 416 | for i, subnode in enumerate(node.children): 417 | print subnode 418 | val = self.get_val(self.visit(subnode.children[0] if i else subnode, local)) 419 | if val: 420 | return val 421 | return False 422 | 423 | exec_sym_or_expression = exec_or_expression 424 | 425 | def exec_xor_expression(self, node, local): 426 | lval = False 427 | for subnode in node.children: 428 | val = self.get_val(self.visit(subnode, local)) 429 | lval = (val and not lval) or (lval and not val) 430 | return lval 431 | 432 | def apply_follower(self, factor, follower, local): 433 | # print follower 434 | follower_fn = "apply_%s_follower"%follower.name 435 | if hasattr(self, follower_fn): 436 | return getattr(self, follower_fn)(factor, follower, local) 437 | else: 438 | raise ExecuteError("Unknown follower for %s."%follower.prepr()) 439 | 440 | def apply_unary(self, factor, unary): 441 | # ('!', '~' , '++', '--', '+', '-', '@') 442 | if unary == '!': 443 | factor = not self.get_val(factor) 444 | elif unary == '~': 445 | factor = ~ self.get_val(factor) 446 | elif unary == '+': 447 | factor = self.get_val(factor) 448 | elif unary == '-': 449 | factor = self.get_val(factor) 450 | elif unary == '++': 451 | fval = self.get_val(factor) 452 | self.set_val(factor, fval + 1) 453 | factor = self.get_val(factor) 454 | elif unary == '--': 455 | fval = self.get_val(factor) 456 | self.set_val(factor, fval - 1) 457 | factor = self.get_val(factor) 458 | return factor 459 | 460 | def apply_array_indexing_follower(self, factor, follower, local): 461 | index = self.get_val(self.visit(follower.children[0], local)) 462 | if isinstance(factor, VarRef): 463 | return factor[index] 464 | else: 465 | raise ExecuteError("Cannot index %r "%factor) 466 | def apply_fncall_follower(self, factor, follower, local): 467 | if VERBOSE >= VERBOSITY_SHOW_FN_CALLS: 468 | print "Calling %s\n on %s"%(follower.prepr(), factor) 469 | 470 | func_name=factor 471 | factor = self.get_val(factor) 472 | if type(factor) is str: 473 | func_name = factor 474 | if factor in self.globals: 475 | factor = self.globals[factor] 476 | else: 477 | raise ExecuteError("Function %s does not exists", factor) 478 | 479 | if factor is None: 480 | if isinstance(func_name, VarRef): 481 | func_name = func_name.qualified_name() 482 | func_type = 'method' if '::' in func_name or '->' in func_name else 'function' 483 | self.report_error(constants.E_ERROR, 'Call to undefined %s %s() in %s on line %d'%(func_type, func_name, follower.filename, follower.line_num)) 484 | raise ExecuteError("null is not callable.") 485 | elif isinstance(factor, phpbuiltins.builtin.builtin): 486 | args = [ self.visit(x, local) for x in follower.children[0].children] 487 | if VERBOSE >= VERBOSITY_SHOW_FN_CALLS: 488 | print "calling builtin %r with %r"%(factor, args) 489 | retval = factor(args, self, local) 490 | else: 491 | args = [ self.get_val(self.visit(x, local)) for x in follower.children[0].children] 492 | if VERBOSE >= VERBOSITY_SHOW_FN_CALLS: 493 | print "calling %r with %r"%(factor, args) 494 | retval = factor(*args, filename=follower.filename, line_num=follower.line_num) 495 | # print "Return value : ", retval 496 | return retval 497 | 498 | def apply_static_member_access_follower(self, factor, follower, local): 499 | subnode = follower.children[0].children[0] 500 | if isinstance(subnode, parser.Token): 501 | member_name = subnode[1] 502 | else: 503 | member_name = self.get_val(self.visit(subnode, local)) 504 | if isinstance(factor, VarRef): 505 | return factor.getitem(member_name, '::') 506 | else: 507 | return factor[member_name] 508 | 509 | def exec_add_expression(self, node, local): 510 | # print node.prepr() 511 | value = None 512 | for i, child in enumerate(node.children): 513 | if i > 0: 514 | op = child.name 515 | arg = self.get_val(self.visit(child.children[0], local)) 516 | if op == '.': 517 | value = coerce.to_string(value) + coerce.to_string(arg) 518 | elif op == '+': 519 | val_is_array = isinstance(value, phparray.PHPArray) 520 | arg_is_array = isinstance(arg, phparray.PHPArray) 521 | if val_is_array or arg_is_array: 522 | if val_is_array and arg_is_array: 523 | value = value + arg 524 | else: 525 | self.report_error(constants.E_ERROR, 'Unsupported operand types in %s on line %d'%(child.filename, child.line_num)) 526 | else: 527 | value = self.coerce_numeric(value) + self.coerce_numeric(arg) 528 | elif op == '-': 529 | value = self.coerce_numeric(value) - self.coerce_numeric(arg) 530 | else: 531 | value = self.get_val(self.visit(child, local)) 532 | return value 533 | 534 | def coerce_numeric(self, x): 535 | if type(x) in (int, long, float): 536 | return x 537 | elif type(x) in (str, unicode) : 538 | m = parser.RE_float.match(x) 539 | if m: 540 | return float(m.group(0)) 541 | m = parser.RE_number.match(x) 542 | if m: 543 | return int(m.group(0)) 544 | return 0 545 | 546 | def execute_file(phpfile, global_dict=None, **kwargs): 547 | if type(phpfile) is str: 548 | phpfile = parser.parse_file(phpfile) 549 | if isinstance(phpfile, parser.TokenList): 550 | phpfile = compiler.compile_php(phpfile) 551 | return execute_php(phpfile, global_dict, **kwargs) 552 | 553 | 554 | def execute_php(phpcode, global_dict=None, **kwargs): 555 | if type(phpcode) is str: 556 | phpcode = parser.parse_php(phpcode) 557 | if isinstance(phpcode, parser.TokenList): 558 | phpcode = compiler.compile_php(phpcode) 559 | if not isinstance(phpcode, compiler.TreeNode): 560 | raise ArgumentError("Given argument is not php code %r"%phpcode) 561 | E = PhpExecuter(phpcode, global_dict, **kwargs) 562 | E.execute() 563 | return E 564 | 565 | 566 | if __name__ == '__main__': 567 | import sys 568 | # print sys.argv 569 | show_globals = False 570 | phpfile=None 571 | phpargv=[] 572 | if len(sys.argv) > 1: 573 | for arg in sys.argv[1:]: 574 | if phpfile is None and len(arg) > 3 and arg[0:2] == '--': 575 | comps = arg[2:].split('=', 1) 576 | key = comps[0] 577 | val = comps[1] if len(comps) > 1 else 1 578 | if key == 'show_globals' and int(val): 579 | show_globals = True 580 | else: 581 | print "Unknown option %s"%arg 582 | else: 583 | if len(phpargv) == 0: 584 | phpfile = arg 585 | phpargv.append(arg) 586 | 587 | init_scope = { 588 | 'argv' : phpargv, 589 | 'argc' : len(phpargv) 590 | } 591 | 592 | if phpfile: 593 | executer = execute_file(phpfile, init_scope) 594 | else : 595 | executer = execute_php(parser.TEST_CODE) 596 | 597 | if show_globals: 598 | print "[ended]\n-- globals --" 599 | for i in executer.globals.dict: 600 | print "%-14s -> %r"%(i, executer.globals[i]) 601 | -------------------------------------------------------------------------------- /pyphp/compiler.py: -------------------------------------------------------------------------------- 1 | import parser 2 | import weakref 3 | 4 | # Verbosity levels for this module 5 | VERBOSITY_NONE = 0 6 | VERBOSITY_SHOW_COMPILED_STATEMENTS = 1 7 | VERBOSITY_SHOW_SKIPPED_WS = 2 8 | VERBOSITY_SHOW_READ_TOKENS = 3 9 | VERBOSITY_TRACE_GRAMMAR = 4 10 | # current verbosity level 11 | VERBOSE = 0 12 | 13 | # currently understood PHP keywords 14 | KEYWORD_DEFINE = 'define'; 15 | KEYWORD_IF = 'if'; 16 | KEYWORD_ELSE = 'else'; 17 | KEYWORD_CLASS = 'class'; 18 | KEYWORD_VAR = 'var'; 19 | KEYWORD_NEW = 'new'; 20 | KEYWORD_FOR = 'for'; 21 | KEYWORD_ARRAY = 'array'; 22 | KEYWORD_FOREACH= 'foreach'; 23 | KEYWORD_RETURN = 'return'; 24 | KEYWORD_FUNCTION = 'function'; 25 | KEYWORD_PUBLIC = 'public'; 26 | KEYWORD_PROTECTED = 'protected'; 27 | KEYWORD_PRIVATE = 'private'; 28 | KEYWORD_EXTENDS = 'extends'; 29 | KEYWORD_INSTANCEOF= 'instanceof'; 30 | KEYWORD_CONST = 'const'; 31 | KEYWORD_ECHO = 'echo'; 32 | KEYWORD_PRINT = 'print'; 33 | KEYWORD_SWITCH = 'switch'; 34 | KEYWORD_CASE = 'case'; 35 | KEYWORD_THROW = 'throw'; 36 | KEYWORD_TRY = 'try'; 37 | KEYWORD_CATCH = 'catch'; 38 | KEYWORD_FINALLY = 'finally'; 39 | KEYWORD_DEFAULT = 'default'; 40 | KEYWORD_WHILE = 'while'; 41 | 42 | VARDEF_DECORATORS = ('var', 'public', 'private', 'protected', 'static'); 43 | 44 | # operators 45 | OPERATORS_MULT = ('*', '/', '%') 46 | OPERATORS_ADD = ('+', '-', '.') 47 | OPERATORS_BITSHIFT = ('<<', '>>') 48 | OPERATORS_ORDER_COMP = ('<', '<=', '>', '>=') 49 | OPERATORS_EQ_COMP = ('==', '!=', '===', '!==', '<>') 50 | OPERATORS_ASSIGNMENT = ('=', '+=', '-=', '*=', '/=', '.=', '%=', '&=', '|=', '^=', '<<=', '>>=') #, '=>') 51 | 52 | 53 | # associativity constanpytt 54 | RIGHT_ASSOCIATIVE = True 55 | LEFT_ASSOCIATIVE = False 56 | 57 | class CompileError(StandardError): 58 | "An Error ocurring during the compilation phase" 59 | pass 60 | 61 | class TreeNode(object): 62 | "A node in the Abstract Syntax Tree generated from the code" 63 | def __init__(self, node_name, children, filename=None, line_num=0): 64 | "Create a TreeNode with the given name and each children appended to it" 65 | self.name = node_name 66 | self.filename=filename 67 | self.line_num=line_num 68 | self.parent = None 69 | self.children = [] 70 | for child in children: 71 | self.addChild(child) 72 | def addChild(self, child): 73 | "Appends a child to this TreeNode.If the child is itself a TreeNode, then the child's parent is set to this." 74 | self.children.append(child) 75 | if isinstance(child, TreeNode): 76 | child.parent = weakref.ref(self) 77 | def getParent(self): 78 | "Returns this TreeNode's parent, if any has been set." 79 | if self.parent: 80 | return self.parent() 81 | else: 82 | return None 83 | def __repr__(self): 84 | "Returns a string representation of this TreeNode" 85 | return "TreeNode<%s>%r"%(self.name, self.children) 86 | def prepr(self, depth=0, dch=' '): 87 | dstr = dch*depth 88 | return "%sTreeNode<%s, file:%r, line:%s>[\n%s\n%s]"%( 89 | dstr, self.name, self.filename, self.line_num, "\n".join([x.prepr(depth+1) if hasattr(x,'prepr') else (dch*(depth+1) + str(x)) for x in self.children]), dstr 90 | ) 91 | 92 | class Compiler(object): 93 | def __init__(self, tokens=None): 94 | self.i = 0 95 | self.tokens = tokens 96 | self.tree = None 97 | self.indent = 0 98 | 99 | if VERBOSE >= VERBOSITY_TRACE_GRAMMAR: 100 | import trace 101 | trace.trace_obj_calls(self, ['!', 'cur_filename_line', 'cur_token', 'compile', 'compile_php_file', 'skip_comments_and_ws', 'skip_to_next']) 102 | 103 | #for i in dir(self): 104 | # at = getattr(self, i) 105 | # if i[0] != '_' and callable(at) and i not in []: 106 | # setattr(self, i, trace(at)) 107 | 108 | # Utility member functions 109 | def cur_filename_line(self): 110 | tok = self.cur_token(False) 111 | return tok.filename, tok.line_num 112 | def cur_token(self, can_verbose = True): 113 | "Returns the current token." 114 | if self.i < len(self.tokens): 115 | tok = self.tokens[self.i] 116 | elif len(self.tokens) > 0: 117 | ltok = self.tokens[-1] 118 | tok = parser.Token((parser.TOKEN_EOF,), ltok.filename, ltok.line_num) 119 | else: 120 | tok = parser.Token((parser.TOKEN_EOF,), None, -1) 121 | 122 | if can_verbose and VERBOSE >= VERBOSITY_SHOW_READ_TOKENS: 123 | print "%s%s"%(" "*self.indent, tok) 124 | return tok 125 | def skip_to_next(self): 126 | "Advances to the next non-ws, non-comment token." 127 | self.i += 1 128 | self.skip_comments_and_ws() 129 | def skip_comments_and_ws(self): 130 | "Consumes tokens untils a non-ws, non-comment token is found." 131 | tok = self.cur_token() 132 | while self.i < len(self.tokens) and tok[0] in (parser.TOKEN_COMMENT, parser.TOKEN_WS): 133 | if VERBOSE >= VERBOSITY_SHOW_SKIPPED_WS: 134 | print "Skipping ws : %r"%tok 135 | self.i += 1 136 | tok = self.cur_token() 137 | def expect_token(self, *parts): 138 | "checks the current token against the given parts arguments, and throws an error if they are not equal." 139 | token = self.cur_token() 140 | for i, p in enumerate(parts): 141 | if token[i] != p: 142 | raise CompileError("File:%s, line:%s, Expected %s instead of %s"%(token.filename, token.line_num, ' '.join([str(x) for x in parts]), ' '.join([str(x) for x in token]))) 143 | def expect_sequence(self, *expected_tokens): 144 | sequence=[] 145 | for exp_tok in expected_tokens: 146 | if callable(exp_tok): 147 | sequence.append(exp_tok()) 148 | else: 149 | sequence.append(self.cur_token()) 150 | self.expect_token(*exp_tok) 151 | self.skip_to_next() 152 | return sequence 153 | def compile_delimited_var_list(self, list_name, element_compiler, delimiter_set, right_associative=True): 154 | "Compiles a list of variables following the rule list_name -> element [delimiter element]*" 155 | def var_list_gen(): 156 | yield (None, element_compiler()) 157 | while self.cur_token()[0] in delimiter_set: 158 | tok = self.cur_token() 159 | self.skip_to_next(); 160 | yield (tok, element_compiler()) 161 | exp_list=[] 162 | if right_associative: 163 | is_first=True 164 | for token, element in var_list_gen(): 165 | if is_first: 166 | exp_list.append(element) 167 | is_first = False 168 | else: 169 | exp_list.append(TreeNode(token[0], [element], token.filename, token.line_num)) 170 | else: 171 | last_element=None 172 | for token, element in var_list_gen(): 173 | if last_element: 174 | exp_list.append(TreeNode(token[0], [last_element], token.filename, token.line_num)) 175 | last_element = element 176 | exp_list.append(last_element) 177 | return TreeNode(list_name, exp_list, exp_list[0].filename, exp_list[0].line_num) if len(exp_list) > 1 else exp_list[0] 178 | # Grammar Compilation functions 179 | def compile(self, tokens=None): # => php_file 180 | "Starting point, resets this compiler's state and starts the compilation process" 181 | self.i=0 182 | self.indent=0 183 | self.tree=self.compile_php_file() 184 | return self.tree 185 | def compile_php_file(self): # php_file => stmt+ 186 | fn, ln = self.cur_filename_line() 187 | stmts=[] 188 | while self.i < len(self.tokens): 189 | stmts.append(self.compile_stmt()) 190 | return TreeNode("php_file", stmts, fn, ln) 191 | def compile_stmt(self): # stmt => define_stmt | direct_output | expression_stmt | if_stmt | stmt_block | classdef_stmt | return_stmt | foreach_stmt | switch_stmt | throw_stmt | try_stmt 192 | self.skip_comments_and_ws() 193 | token = self.cur_token() 194 | stmt = None 195 | if VERBOSE >= VERBOSITY_SHOW_READ_TOKENS: 196 | print "compiling stmt, %s"%token 197 | if token[0] == parser.TOKEN_IDENTIFIER: 198 | ident_lc_name = token[1].lower(); 199 | if ident_lc_name == KEYWORD_DEFINE: 200 | stmt = self.compile_define_stmt() 201 | elif ident_lc_name == KEYWORD_IF: 202 | stmt = self.compile_if_stmt() 203 | elif ident_lc_name == KEYWORD_FOR: 204 | stmt = self.compile_for_stmt() 205 | elif ident_lc_name == KEYWORD_FUNCTION: 206 | stmt = self.compile_funcdef_stmt() 207 | elif ident_lc_name == KEYWORD_WHILE: 208 | stmt = self.compile_while_stmt() 209 | elif ident_lc_name == KEYWORD_FOREACH: 210 | stmt = self.compile_foreach_stmt() 211 | elif ident_lc_name == KEYWORD_RETURN: 212 | stmt = self.compile_return_stmt() 213 | elif ident_lc_name == KEYWORD_CLASS: 214 | stmt = self.compile_classdef_stmt() 215 | elif ident_lc_name == KEYWORD_SWITCH: 216 | stmt = self.compile_switch_stmt() 217 | elif ident_lc_name == KEYWORD_THROW: 218 | stmt = self.compile_throw_stmt() 219 | elif ident_lc_name == KEYWORD_TRY: 220 | stmt = self.compile_try_stmt() 221 | elif ident_lc_name == KEYWORD_ECHO: 222 | stmt = self.compile_echo_stmt() 223 | elif token[0] == parser.TOKEN_DIRECT_OUTPUT: 224 | out_str = token[1] 225 | if len(out_str) > 0 and out_str[0] == '\n': 226 | out_str = out_str[1:] 227 | if len(out_str) > 0 and out_str[-1] == '\n': 228 | out_str = out_str[:-1] 229 | stmt = TreeNode("direct_output", [out_str], token.filename, token.line_num) 230 | self.skip_to_next() 231 | elif token[0] == '{': 232 | stmt = self.compile_stmt_block() 233 | 234 | if stmt is None: 235 | stmt = self.compile_expression_stmt() 236 | 237 | if stmt is None: 238 | raise CompileError("File:%s, line:%s, Expected stmt instead of %s"%(token.filename, token.line_num, ' '.join([str(x) for x in token]))) 239 | 240 | if VERBOSE >= VERBOSITY_SHOW_COMPILED_STATEMENTS: 241 | print "@@ %s"%stmt.prepr() 242 | return stmt 243 | def compile_echo_stmt(self): # echo_stmt => ECHO expression_list 244 | fn, ln = self.cur_filename_line() 245 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_ECHO) 246 | self.skip_to_next() 247 | args = []; 248 | if self.cur_token()[0] != ';': 249 | args = self.compile_expression_list().children 250 | self.expect_token(';') 251 | self.skip_to_next() 252 | return TreeNode('echo', args, fn, ln) 253 | 254 | def compile_stmt_block(self): # stmt_block => '{' [stmt]* '}' 255 | tok = self.tokens[self.i] 256 | fn = tok.filename 257 | ln = tok.line_num 258 | self.expect_token('{') 259 | self.skip_to_next() 260 | tok = self.cur_token() 261 | stmts = [] 262 | while tok[0] != '}': 263 | stmts.append(self.compile_stmt()) 264 | tok = self.cur_token() 265 | self.skip_to_next() 266 | return TreeNode('stmt_block', stmts, fn, ln) 267 | def compile_define_stmt(self): # define_stmt => 'define' argument_list ';' 268 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_DEFINE) 269 | self.skip_to_next() 270 | argument_list = self.compile_argument_list() 271 | if len(argument_list.children) != 2: 272 | raise CompileError("define stmt accepts only 2 arguments."); 273 | self.expect_token(';') 274 | self.skip_to_next() 275 | return TreeNode(KEYWORD_DEFINE, argument_list.children, argument_list.filename, argument_list.line_num) 276 | def compile_return_stmt(self): # return_stmt => RETURN expression ';' 277 | fn, ln = self.cur_filename_line() 278 | seq = self.expect_sequence( 279 | (parser.TOKEN_IDENTIFIER, KEYWORD_RETURN), 280 | self.compile_expression, 281 | (';', ) 282 | ) 283 | return TreeNode('return_stmt', [seq[1]], fn, ln) 284 | 285 | def compile_switch_stmt(self): # switch_stmt => SWITCH '(' expression_list ')' '{' switch_case+ [switch_default]'}' 286 | fn, ln = self.cur_filename_line() 287 | seq = self.expect_sequence( 288 | (parser.TOKEN_IDENTIFIER, KEYWORD_SWITCH), 289 | ('(', ), 290 | self.compile_expression_list, 291 | (')', ), 292 | ('{', ) 293 | ) 294 | cases = [seq[2]] 295 | tok = self.cur_token() 296 | while tok[0] != '}': 297 | self.expect_token(parser.TOKEN_IDENTIFIER) 298 | if tok[1] == KEYWORD_CASE: 299 | cases.append(self.compile_switch_case()) 300 | elif tok[1] == KEYWORD_DEFAULT: 301 | cases.append(self.compile_switch_default()) 302 | else: 303 | raise CompileError("expected 'case', 'default' or '}'.") 304 | tok = self.cur_token() 305 | self.skip_to_next() 306 | return TreeNode('switch_stmt', cases, fn, ln) 307 | 308 | def compile_switch_case(self): # switch_case => CASE expression ':' [stmt]* 309 | fn, ln = self.cur_filename_line() 310 | seq = self.expect_sequence( 311 | (parser.TOKEN_IDENTIFIER, KEYWORD_CASE), 312 | self.compile_expression_list, 313 | (':', ) 314 | ) 315 | tok = self.cur_token() 316 | statements = [] 317 | while not (tok[0] == '}' or (tok[0] == parser.TOKEN_IDENTIFIER and tok[1] in (KEYWORD_CASE, KEYWORD_DEFAULT))): 318 | statements.append(self.compile_stmt()) 319 | tok = self.cur_token() 320 | return TreeNode('switch_case', [seq[1], statements], fn, ln) 321 | 322 | def compile_switch_default(self): # switch_default => DEFAULT ':' [stmt]* 323 | fn, ln = self.cur_filename_line() 324 | seq = self.expect_sequence( 325 | (parser.TOKEN_IDENTIFIER, KEYWORD_DEFAULT), 326 | (':', ) 327 | ) 328 | tok = self.cur_token() 329 | statements = [] 330 | while not (tok[0] == '}' or (tok[0] == parser.TOKEN_IDENTIFIER and tok[1] in (KEYWORD_CASE, KEYWORD_DEFAULT))): 331 | statements.append(self.compile_stmt()) 332 | tok = self.cur_token() 333 | return TreeNode('switch_default', statements, fn, ln) 334 | 335 | def compile_throw_stmt(self): # throw_stmt => THROW expression_list ';' 336 | fn, ln = self.cur_filename_line() 337 | seq = self.expect_sequence( 338 | (parser.TOKEN_IDENTIFIER, KEYWORD_THROW), 339 | self.compile_expression_list, 340 | (';', ) 341 | ) 342 | return TreeNode('throw_stmt', [seq[1]], fn, ln) 343 | 344 | def compile_try_stmt(self): # try_stmt => TRY stmt_block [CATCH '(' parameter ')' stmt_block]* [FINALLY stmt_block] 345 | fn, ln = self.cur_filename_line() 346 | try_seq = self.expect_sequence( 347 | (parser.TOKEN_IDENTIFIER, KEYWORD_TRY), 348 | self.compile_stmt_block 349 | ) 350 | catch_blocks = [] 351 | tok = self.cur_token() 352 | while tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_CATCH: 353 | catch_blocks.append(self.compile_catch_block()) 354 | tok = self.cur_token() 355 | 356 | finally_block = self.compile_finally_block() if tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_FINALLY else None 357 | 358 | if len(catch_blocks) == 0 and finally_block is None: 359 | raise CompileError('expected catch or finally in try statement.'); 360 | 361 | return TreeNode('try_stmt', [try_seq[1], catch_blocks, finally_block], fn, ln) 362 | 363 | def compile_catch_block(self): # catch_block => CATCH '(' parameter ')' stmt_block 364 | fn, ln = self.cur_filename_line() 365 | catch_seq = self.expect_sequence( 366 | (parser.TOKEN_IDENTIFIER, KEYWORD_CATCH), 367 | ('(', ), 368 | self.compile_parameter 369 | (')', ), 370 | self.compile_stmt_block 371 | ) 372 | return TreeNode('catch_block', [seq[2], seq[4]], fn, ln) 373 | 374 | def compile_finally_block(self): # finally_block => FINALLY stmt_block 375 | fn, ln = self.cur_filename_line() 376 | catch_seq = self.expect_sequence( 377 | (parser.TOKEN_IDENTIFIER, KEYWORD_FINALLY), 378 | self.compile_stmt_block 379 | ) 380 | return TreeNode('finally_block', [seq[1]], fn, ln) 381 | 382 | 383 | def compile_for_stmt (self): # for_stmt => FOR '(' expression_list ';' expression_list ';' expression_list ')' stmt_block 384 | fn, ln = self.cur_filename_line() 385 | seq = self.expect_sequence( 386 | (parser.TOKEN_IDENTIFIER, KEYWORD_FOR), 387 | ('(', ), 388 | self.compile_expression_list, 389 | (';', ), 390 | self.compile_expression_list, 391 | (';', ), 392 | self.compile_expression_list, 393 | (')', ), 394 | self.compile_stmt_block 395 | ) 396 | return TreeNode('for_stmt', [seq[2], seq[4], seq[6], seq[8]], fn, ln) 397 | 398 | def compile_while_stmt(self): # while_stmt => WHILE '(' expression_list ')' stmt_block 399 | fn, ln = self.cur_filename_line() 400 | seq = self.expect_sequence( 401 | (parser.TOKEN_IDENTIFIER, KEYWORD_WHILE), 402 | ('(', ), 403 | self.compile_expression_list, 404 | (')', ), 405 | self.compile_stmt_block 406 | ) 407 | return TreeNode('while_stmt', [seq[2], seq[4]], fn, ln) 408 | 409 | def compile_foreach_stmt(self): # foreach_stmt => FOREACH '(' VARIABLE AS VARIABLE [ '=>' VARIABLE ] ')' stmt_block 410 | fn, ln = self.cur_filename_line() 411 | seq = self.expect_sequence( 412 | (parser.TOKEN_IDENTIFIER, KEYWORD_FOREACH), 413 | ('(', ), 414 | self.compile_followed_primitive, 415 | (parser.TOKEN_IDENTIFIER, 'as'), 416 | self.compile_followed_primitive 417 | ) 418 | seq2 = None 419 | if self.cur_token()[0] == '=>': 420 | seq2 = self.expect_sequence( 421 | ('=>', ), 422 | self.compile_followed_primitive 423 | ) 424 | seq3 = self.expect_sequence( 425 | (')', ), 426 | self.compile_stmt_block 427 | ) 428 | return TreeNode("foreach_stmt", [seq[2], seq[4], seq2[1] if seq2 is not None else None, seq3[1]], fn, ln) 429 | def compile_argument_list(self): # argument_list => '(' [argument [ ',' argument ]*] ')' 430 | fn, ln = self.cur_filename_line() 431 | args_list=[] 432 | tok = self.tokens[self.i] 433 | self.expect_token('(') 434 | self.skip_to_next() 435 | while self.cur_token()[0] != ')' : 436 | args_list.append(self.compile_argument()) 437 | if self.cur_token()[0] == ')': 438 | break 439 | else: 440 | self.expect_token(',') 441 | self.skip_to_next() 442 | self.skip_to_next() 443 | return TreeNode("argument_list", args_list, fn, ln) 444 | 445 | def compile_argument(self): # argument => expression 446 | return self.compile_expression() 447 | 448 | def compile_expression_stmt(self): # expression_stmt => expression ';' 449 | expr = self.compile_expression(); 450 | self.expect_token(';') 451 | self.skip_to_next() 452 | return expr; 453 | 454 | def compile_expression_list(self): # expression_list => expression [ ',' expression ]* 455 | fn, ln = self.cur_filename_line() 456 | l = [self.compile_expression()] 457 | while self.cur_token()[0] == ',': 458 | self.skip_to_next() 459 | l.append(self.compile_expression()) 460 | return TreeNode('expression_list', l, fn, ln) 461 | def compile_expression (self): # expression => print_expression 462 | return self.compile_print_expression() 463 | def compile_print_expression (self): # expression => [PRINT] or_expression 464 | tok = self.cur_token() 465 | if tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_PRINT: 466 | self.skip_to_next() 467 | return TreeNode('print_expression', [self.compile_or_expression()], tok.filename, tok.line_num) 468 | else: 469 | return self.compile_or_expression() 470 | def compile_or_expression (self): # or_expression => xor_expression [OR xor_expression]* 471 | return self.compile_delimited_var_list("or_expression", self.compile_xor_expression, [parser.TOKEN_IDENTIFIER, 'or']) 472 | def compile_xor_expression (self): # xor_expression => and_expression [XOR and_expression]* 473 | return self.compile_delimited_var_list("xor_expression", self.compile_and_expression, [parser.TOKEN_IDENTIFIER, 'xor']) 474 | def compile_and_expression (self): # and_expression => assignment_expression [AND assignment_expression]* 475 | return self.compile_delimited_var_list("and_expression", self.compile_assignment_expression, [parser.TOKEN_IDENTIFIER, 'and']) 476 | def compile_assignment_expression (self): # assignment_expression => conditional_expression [assignment_op conditional_expression] 477 | return self.compile_delimited_var_list("assignment_expression", self.compile_conditional_expression, OPERATORS_ASSIGNMENT, LEFT_ASSOCIATIVE) 478 | def compile_conditional_expression(self): # conditional_expression => sym_or_expression ['?' sym_or_expression ':' sym_or_expression ] 479 | fn, ln = self.cur_filename_line() 480 | exp1 = self.compile_sym_or_expression() 481 | if self.cur_token()[0] == '?': 482 | seq = self.expect_sequence( 483 | ['?'], 484 | self.compile_sym_or_expression, 485 | [':'], 486 | self.compile_sym_or_expression 487 | ) 488 | return TreeNode('conditional_expression', [exp1, seq[1], seq[3]], fn, ln) 489 | return exp1 490 | def compile_sym_or_expression (self): # sym_or_expression => sym_and_expression ['||' sym_and_expression]* 491 | return self.compile_delimited_var_list("sym_or_expression", self.compile_sym_and_expression, ['||']) 492 | def compile_sym_and_expression (self): # sym_and_expression => bit_or_expression ['&&' bit_or_expression ]* 493 | return self.compile_delimited_var_list("sym_and_expression", self.compile_bit_or_expression, ['&&']) 494 | def compile_bit_or_expression (self): # bit_or_expression => bit_xor_expression ['|' bit_xor_expression]* 495 | return self.compile_delimited_var_list("bit_or_expression", self.compile_bit_xor_expression, ['|']) 496 | def compile_bit_xor_expression (self): # bit_or_expression => bit_and_expression ['|' bit_and_expression]* 497 | return self.compile_delimited_var_list("bit_xor_expression", self.compile_bit_and_expression, ['^']) 498 | def compile_bit_and_expression (self): # bit_and_expression => eq_comp_expression ['&' eq_comp_expression]* 499 | return self.compile_delimited_var_list("bit_and_expression", self.compile_eq_comp_expression, ['&']) 500 | def compile_eq_comp_expression (self): # eq_comp_expression => order_comp_expression [eq_comp_op order_comp_expression]* 501 | return self.compile_delimited_var_list("eq_comp_expression", self.compile_order_comp_expression, OPERATORS_EQ_COMP) 502 | def compile_order_comp_expression (self): # order_comp_expression => bitshift_expression [order_comp_op bitshift_expression]* 503 | return self.compile_delimited_var_list("order_comp_expression", self.compile_bitshift_expression, OPERATORS_ORDER_COMP) 504 | def compile_bitshift_expression (self): # bitshift_expression => add_expression [bitshift_op add_expression]* 505 | return self.compile_delimited_var_list("bitshift_expression", self.compile_add_expression, OPERATORS_BITSHIFT) 506 | def compile_add_expression (self): # add_expression => term [add_op term]* 507 | return self.compile_delimited_var_list("add_expression", self.compile_term, OPERATORS_ADD) 508 | def compile_term (self): # term => negated_typecheck [mult_op negated_typecheck]* 509 | return self.compile_delimited_var_list("term_expression", self.compile_negated_typecheck, OPERATORS_MULT) 510 | def compile_negated_typecheck (self): # negated_typecheck => ['!'] typecheck 511 | fn, ln = self.cur_filename_line() 512 | negated = False 513 | if self.cur_token()[0] == '!': 514 | negated = True 515 | self.skip_to_next() 516 | tchk = self.compile_typecheck() 517 | if negated: 518 | return TreeNode('negated', [tchk]) 519 | else: 520 | return tchk 521 | def compile_typecheck (self): # typecheck => factor ['instanceof' typedef] 522 | fn, ln = self.cur_filename_line() 523 | factor = self.compile_factor() 524 | tok = self.cur_token() 525 | if tok[0] == KEYWORD_INSTANCEOF: 526 | self.skip_to_next() 527 | td = self.compile_typedef() 528 | return TreeNode('typecheck', [factor, td], fn, ln) 529 | return factor 530 | 531 | def compile_factor(self): # factor => [unary_operator]* followed_primitive 532 | args=[] 533 | unary_ops = [] 534 | while self.cur_token()[0] in parser.TOKEN_UNARY_OP: 535 | unary_ops.append(self.cur_token()) 536 | self.skip_to_next() 537 | if len(unary_ops) > 0: 538 | args.append(TreeNode("unary_op", [x[0] for x in unary_ops], unary_ops[0].filename, unary_ops[0].line_num)) 539 | args.append(self.compile_followed_primitive()) 540 | return TreeNode("factor", args, args[0].filename, args[0].line_num) if len(args) > 1 else args[0] 541 | 542 | def compile_followed_primitive(self): # followed_primitive => primitive [primitive_follower]* 543 | args=[] 544 | args.append(self.compile_primitive()) 545 | while True: 546 | try: 547 | follower = self.compile_primitive_follower() 548 | args.append(follower) 549 | except CompileError, ce: 550 | break 551 | return TreeNode("followed_primitive", args, args[0].filename, args[0].line_num) if len(args) > 1 else args[0] 552 | 553 | 554 | def compile_primitive(self): # primitive => string | number | identifier | variable | '(' expression ')' | NEW typedef argument_list | array_literal 555 | tok=self.cur_token() 556 | last_i = self.i 557 | if tok[0] == '&': 558 | ref_type = True 559 | self.skip_to_next() 560 | tok2=self.cur_token() 561 | if tok2[0] == parser.TOKEN_VARIABLE: 562 | self.skip_to_next() 563 | return TreeNode("primitive", [tok2, tok], tok.filename, tok.line_num) 564 | else: 565 | raise CompileError("Expected variable after & token"); 566 | else: 567 | ref_type = True 568 | if tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_NEW: 569 | seq = self.expect_sequence( 570 | (parser.TOKEN_IDENTIFIER, KEYWORD_NEW), 571 | self.compile_typedef, 572 | self.compile_argument_list 573 | ) 574 | return TreeNode("primitive", [tok, seq[1], seq[2]], tok.filename, tok.line_num) 575 | elif tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_ARRAY: 576 | return TreeNode("primitive", [self.compile_array_literal()], tok.filename, tok.line_num) 577 | elif tok[0] in (parser.TOKEN_STRING, parser.TOKEN_NUMBER, parser.TOKEN_IDENTIFIER, parser.TOKEN_VARIABLE): 578 | self.skip_to_next() 579 | return TreeNode("primitive", [tok], tok.filename, tok.line_num) 580 | elif tok[0] == parser.TOKEN_INTERPOLATED_STRING: 581 | interpolations = tok[1] 582 | for i, interp in enumerate(interpolations): 583 | if isinstance(interp, parser.TokenList): 584 | if VERBOSE > VERBOSITY_NONE: 585 | print "==================================================" 586 | C = Compiler(interp) 587 | interpolations[i] = C.compile_factor() 588 | if VERBOSE > VERBOSITY_NONE: 589 | print "==================================================" 590 | self.skip_to_next() 591 | return TreeNode("primitive", [tok], tok.filename, tok.line_num) 592 | else: 593 | self.expect_token('(') 594 | self.skip_to_next() 595 | expr = self.compile_expression() 596 | self.expect_token(')') 597 | self.skip_to_next() 598 | return TreeNode("primitive", [expr], expr.filename, expr.line_num) 599 | 600 | def compile_array_literal(self): # array_literal => ARRAY '(' [array_element [',' array_element]*] ')' 601 | fn, ln = self.cur_filename_line() 602 | self.expect_sequence( 603 | (parser.TOKEN_IDENTIFIER, KEYWORD_ARRAY), 604 | ('(', ) 605 | ) 606 | args=[] 607 | if self.cur_token()[0] != ')': 608 | args.append(self.compile_array_element()) 609 | while self.cur_token()[0] == ',': 610 | self.skip_to_next() 611 | if self.cur_token()[0] == ')': 612 | break 613 | args.append(self.compile_array_element()) 614 | self.expect_sequence( (')', ) ) 615 | return TreeNode('array_literal', args, fn, ln) 616 | 617 | def compile_array_element(self): # array_element => expression '=>' expression 618 | fn, ln = self.cur_filename_line() 619 | expr = [self.compile_expression()]; 620 | if self.cur_token()[0] == '=>': 621 | self.skip_to_next() 622 | expr.append(self.compile_expression()) 623 | return TreeNode('array_element', expr, fn, ln) 624 | 625 | 626 | def compile_typedef(self): # member_expression [static_member_access]* 627 | fn, ln = self.cur_filename_line() 628 | args=[] 629 | args.append(self.compile_member_expression()) 630 | while True: 631 | tok = self.cur_token() 632 | if tok[0] == '::': 633 | args.append(self.compile_static_member_access()) 634 | else: 635 | break 636 | return TreeNode("typedef", args, fn, ln) 637 | 638 | def compile_primitive_follower(self): # primitive_follower => static_member_access | member_access | fncall | array_indexing | unary_follower 639 | tok = self.cur_token() 640 | follower=None 641 | if tok[0] == '::': 642 | follower = self.compile_static_member_access() 643 | elif tok[0] == '->': 644 | follower = self.compile_member_access() 645 | elif tok[0] == '(': 646 | follower = self.compile_fncall() 647 | elif tok[0] == '[': 648 | follower = self.compile_array_indexing() 649 | elif tok[0] in ('++', '--'): 650 | follower = self.compile_unary_follower() 651 | else: 652 | raise CompileError("Expected -> [ or (, not %s"%self.cur_token()[0]) 653 | return follower # TreeNode('follower', follower) 654 | 655 | def compile_unary_follower(self): # unary_follower => '++' | '--' 656 | tok = self.cur_token() 657 | if tok[0] in ('++', '--'): 658 | self.skip_to_next() 659 | return TreeNode('unary_follower', [tok], tok.filename, tok.line_num) 660 | else: 661 | raise CompileError('Expected ++ or --') 662 | 663 | def compile_static_member_access(self): # static_member_access => '::' member_expression 664 | fn, ln = self.cur_filename_line() 665 | self.expect_token('::') 666 | self.skip_to_next() 667 | return TreeNode('static_member_access', [self.compile_member_expression()], fn, ln) 668 | 669 | def compile_member_access(self): # member_access => '->' member_expression 670 | fn, ln = self.cur_filename_line() 671 | self.expect_token('->') 672 | self.skip_to_next() 673 | return TreeNode('member_access', [self.compile_member_expression()], fn, ln) 674 | 675 | def compile_member_expression(self): # member_expression => IDENTIFIER | VARIABLE | '{' expression '}' 676 | tok = self.cur_token() 677 | if tok[0] == '{': 678 | self.skip_to_next() 679 | expr = self.compile_expression() 680 | self.expect_token('}') 681 | self.skip_to_next() 682 | return TreeNode('member_expression', [expr], expr.filename, expr.line_num) 683 | else: 684 | if tok[0] not in (parser.TOKEN_IDENTIFIER, parser.TOKEN_VARIABLE): 685 | raise CompileError("Expected identifier or variable not %s"%tok[0]) 686 | self.skip_to_next() 687 | return TreeNode('member_expression', [tok], tok.filename, tok.line_num) 688 | 689 | def compile_fncall(self): # fncall => argument_list 690 | al = self.compile_argument_list() 691 | return TreeNode('fncall', [al], al.filename, al.line_num) 692 | 693 | def compile_array_indexing(self): # array_indexing => '[' expression ']' 694 | fn, ln = self.cur_filename_line() 695 | self.expect_token('[') 696 | self.skip_to_next() 697 | expr= None 698 | if self.cur_token()[0] != ']' : 699 | expr = self.compile_expression() 700 | self.expect_token(']') 701 | self.skip_to_next() 702 | return TreeNode('array_indexing', [expr], fn, ln) 703 | 704 | def compile_if_stmt(self): # if_stmt => IF '(' expression ')' stmt 705 | args = [] 706 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_IF) 707 | self.skip_to_next() 708 | self.expect_token('(') 709 | self.skip_to_next() 710 | args.append(self.compile_expression()) 711 | self.expect_token(')') 712 | self.skip_to_next() 713 | args.append(self.compile_stmt()) 714 | tok = self.cur_token() 715 | if tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_ELSE: 716 | self.skip_to_next() 717 | args.append(self.compile_stmt()) 718 | return TreeNode("if", args, args[0].filename, args[0].line_num) 719 | 720 | def compile_classdef_stmt(self): # classdef_stmt => CLASS identifier [EXTENDS identifier] '{' [classdef_block] '}' 721 | fn, ln = self.cur_filename_line() 722 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_CLASS) 723 | self.skip_to_next() 724 | self.expect_token(parser.TOKEN_IDENTIFIER) 725 | class_name = self.cur_token() 726 | self.skip_to_next() 727 | tok = self.cur_token() 728 | if tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_EXTENDS: 729 | self.skip_to_next() 730 | self.expect_token(parser.TOKEN_IDENTIFIER) 731 | superclass_name = self.cur_token() 732 | self.skip_to_next() 733 | else: 734 | superclass_name = None 735 | self.expect_token('{') 736 | self.skip_to_next() 737 | classdef = self.compile_classdef_block() if self.cur_token()[0] != '}' else None 738 | self.expect_token('}') 739 | self.skip_to_next() 740 | return TreeNode('classdef', [class_name, superclass_name, classdef], fn, ln) 741 | 742 | def compile_classdef_block (self): # classdef_block => [const_vardef_stmt | methoddef_stmt | constdef_stmt]* 743 | fn, ln = self.cur_filename_line() 744 | args=[] 745 | tok = self.cur_token() 746 | while tok[0] != '}': 747 | stmt=None 748 | if tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_CONST: 749 | stmt = self.compile_const_vardef_stmt() 750 | else: 751 | start_i = self.i 752 | while tok[0] == parser.TOKEN_IDENTIFIER and tok[1] in VARDEF_DECORATORS: 753 | self.skip_to_next() 754 | tok = self.cur_token() 755 | 756 | if tok[0] == parser.TOKEN_VARIABLE: 757 | self.i = start_i 758 | stmt = self.compile_vardef_stmt() 759 | elif tok[0] == parser.TOKEN_IDENTIFIER and tok[1] == KEYWORD_FUNCTION: 760 | self.i = start_i 761 | stmt = self.compile_methoddef_stmt() 762 | else: 763 | raise CompileError("File:%s, line:%s, Expected stmt instead of %s"%(tok.filename, tok.line_num, ' '.join([str(x) for x in tok]))) 764 | args.append(stmt) 765 | # self.skip_to_next() 766 | tok = self.cur_token() 767 | return TreeNode('classdef_block', args, fn, ln) 768 | def compile_methoddef_stmt (self): # methoddef_stmt => [vardef_decorator]* 'function' IDENTIFIER parameter_list stmt_block 769 | fn, ln = self.cur_filename_line() 770 | decorators = [] 771 | tok = self.cur_token() 772 | while tok[0] == parser.TOKEN_IDENTIFIER and tok[1] in VARDEF_DECORATORS: 773 | decorators.append(tok[1]) 774 | self.skip_to_next() 775 | tok = self.cur_token() 776 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_FUNCTION) 777 | self.skip_to_next() 778 | self.expect_token(parser.TOKEN_IDENTIFIER) 779 | func_name = self.cur_token() 780 | self.skip_to_next() 781 | params = self.compile_parameter_list() 782 | block = self.compile_stmt_block() 783 | return TreeNode('methoddef', [decorators, func_name, params, block], fn, ln) 784 | def compile_funcdef_stmt (self): # funcdef_stmt => 'function' IDENTIFIER parameter_list stmt_block 785 | fn, ln = self.cur_filename_line() 786 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_FUNCTION) 787 | self.skip_to_next() 788 | self.expect_token(parser.TOKEN_IDENTIFIER) 789 | func_name = self.cur_token() 790 | self.skip_to_next() 791 | params = self.compile_parameter_list() 792 | block = self.compile_stmt_block() 793 | return TreeNode('funcdef', [func_name, params, block], fn, ln) 794 | def compile_parameter_list(self): # parameter_list => '(' [parameter [ ',' parameter ]] ')' 795 | fn, ln = self.cur_filename_line() 796 | param_list=[] 797 | self.expect_token('(') 798 | self.skip_to_next() 799 | while self.cur_token()[0] != ')' : 800 | param_list.append(self.compile_parameter()) 801 | if self.cur_token()[0] == ')': 802 | break 803 | else: 804 | self.expect_token(',') 805 | self.skip_to_next() 806 | self.skip_to_next() 807 | return TreeNode("parameter_list", param_list, fn, ln) 808 | def compile_parameter (self): # parameter => [IDENTIFIER] VARIABLE ['=' or_expression] 809 | fn, ln = self.cur_filename_line() 810 | typedef, varname, assign, is_ref = None, None, None, False 811 | tok = self.cur_token() 812 | if tok[0] == parser.TOKEN_IDENTIFIER: 813 | typedef = tok 814 | self.skip_to_next() 815 | if tok[0] == '&': 816 | is_ref = True 817 | self.skip_to_next() 818 | self.expect_token(parser.TOKEN_VARIABLE) 819 | varname = self.cur_token() 820 | self.skip_to_next() 821 | if self.cur_token()[0] == '=': 822 | self.skip_to_next() 823 | assign = self.compile_or_expression() 824 | return TreeNode("parameter", [typedef, varname, assign, is_ref], fn, ln) 825 | 826 | def compile_vardef_stmt (self): # vardef_stmt => [vardef_decorator]+ VARIABLE ['=' or_expression] ';' 827 | fn, ln = self.cur_filename_line() 828 | decorators = [] 829 | tok = self.cur_token() 830 | while tok[0] == parser.TOKEN_IDENTIFIER and tok[1] in VARDEF_DECORATORS: 831 | decorators.append(tok[1]) 832 | self.skip_to_next() 833 | tok = self.cur_token() 834 | self.expect_token(parser.TOKEN_VARIABLE) 835 | variable = self.cur_token() 836 | self.skip_to_next() 837 | definition = None 838 | if self.cur_token()[0] == '=': 839 | self.skip_to_next() 840 | definition = self.compile_or_expression() 841 | self.expect_token(';') 842 | self.skip_to_next() 843 | return TreeNode('vardef_stmt', [decorators, variable, definition], fn, ln) 844 | def compile_const_vardef_stmt(self): # const_vardef_stmt=>const IDENTIFIER ['=' or_expression] ';' 845 | fn, ln = self.cur_filename_line() 846 | args=[] 847 | self.expect_token(parser.TOKEN_IDENTIFIER, KEYWORD_CONST) 848 | self.skip_to_next() 849 | self.expect_token(parser.TOKEN_IDENTIFIER) 850 | args.append(self.cur_token()) 851 | self.skip_to_next() 852 | if self.cur_token()[0] == '=': 853 | self.skip_to_next() 854 | args.append(self.compile_or_expression()) 855 | self.expect_token(';') 856 | self.skip_to_next() 857 | return TreeNode('const_vardef_stmt', args, fn, ln) 858 | def compile_vardef_decorator (self): # vardef_decorator => 'var' | 'static' | 'private' | 'public' 859 | pass 860 | 861 | 862 | def compile_file(php_file): 863 | if type(php_file) is str: 864 | php_file = parser.parse_file(php_file) 865 | return compile_php(php_file) 866 | 867 | def compile_php(php_tokens): 868 | if type(php_tokens) is str: 869 | php_tokens = parser.parse_php(php_tokens) 870 | if not isinstance(php_tokens, parser.TokenList): 871 | raise ArgumentError("Given argument is not php code, nor a list of tokens %r"%php_tokens) 872 | C = Compiler(php_tokens) 873 | return C.compile() 874 | 875 | 876 | def test(*args, **kw): 877 | global VERBOSE 878 | VERBOSE = 999 879 | if len(args) > 0: 880 | kw['code'] = args[0] 881 | if 'filename' in kw: 882 | filename = kw['filename'] 883 | print "parsing file : %r"%filename 884 | with file(filename) as finp: 885 | print finp.read() 886 | print "----" 887 | php_code = parser.parse_file(filename) 888 | elif 'code' in kw: 889 | code = kw['code'] 890 | print "parsing php code :\n%s"%code 891 | print "----" 892 | php_code = parser.parse_php(code) 893 | print "----" 894 | print "Parsed Code:\n", php_code 895 | 896 | compiled_code = compile_php(php_code) 897 | print 898 | def print_node(node, depth=0): 899 | print " "*depth, 900 | if isinstance(node, TreeNode): 901 | print node.name 902 | for c in node.children: 903 | print_node(c, depth+1) 904 | else: 905 | print node 906 | 907 | print "Compiled Code:\n" 908 | print compiled_code.prepr() 909 | 910 | 911 | if __name__ == '__main__': 912 | import sys 913 | if len(sys.argv) >= 2: 914 | test(filename=sys.argv[1]) 915 | else: 916 | test(code=parser.TEST_CODE) --------------------------------------------------------------------------------