├── .gitignore ├── pfpp └── __init__.py └── test └── test_pfpp.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | -------------------------------------------------------------------------------- /pfpp/__init__.py: -------------------------------------------------------------------------------- 1 | import inspect, ast, re, compiler 2 | from pprint import pprint 3 | from copy import copy 4 | 5 | def function_to_ast(fun): 6 | definition = inspect.getsource(fun) 7 | lines = definition.split("\n") 8 | # remove whitespace 9 | lines[0] = lines[0].rstrip() 10 | m = re.match('^\s*', lines[0]) 11 | if m: 12 | space_offset = m.span()[1] 13 | new_source = [] 14 | for line in lines: 15 | new_source.append(line[space_offset:]) 16 | return ast.parse("\n".join(new_source)) 17 | else: 18 | return ast.parse(definition) 19 | 20 | class FunctionalVisitor(ast.NodeVisitor): 21 | def __init__(self, func_name, globals): 22 | self.assigned_vars = [] 23 | self.globals = globals 24 | self.func_name = func_name 25 | self.problems = [] 26 | super(ast.NodeVisitor, self).__init__() 27 | 28 | def visit_Global(self, node): 29 | self.problems.append('accesses global variables') 30 | 31 | def visit_Print(self, node): 32 | self.problems.append('prints which is a side-effect') 33 | 34 | def visit_Call(self, node): 35 | if type(node.func) == ast.Name: 36 | func = self.globals[node.func.id] 37 | try: 38 | # catches recursive functions 39 | # which would cause an infinite loop 40 | if func.__name__ != self.func_name: 41 | if not is_functional(func): 42 | self.problems.append('calls %s which is not strictly functional' % func.__name__) 43 | except: 44 | pass 45 | 46 | if type(node.func) == ast.Attribute: 47 | if type(node.func.value) == ast.Name: 48 | self.problems.append('calling "%s.%s" may produce side-effects' % (node.func.value.id, node.func.attr)) 49 | 50 | def visit_Assign(self, node): 51 | for target in node.targets: 52 | target_names = [] 53 | if type(target) == ast.Tuple: 54 | for item in target.elts: 55 | if type(item) == ast.Name: 56 | target_names.append(item.id) 57 | if type(target) == ast.Name: 58 | target_names.append(target.id) 59 | if type(target) == ast.Subscript: 60 | if type(target.value) == ast.Name: 61 | target_names.append(target.value.id) 62 | for target_name in target_names: 63 | if target_name in self.assigned_vars: 64 | self.problems.append('variable "%s" is assigned to more than once' % target_name) 65 | else: 66 | self.assigned_vars.append(target_name) 67 | self.visit(target) 68 | self.visit(node.value) 69 | 70 | def is_functional(fun): 71 | fv = FunctionalVisitor(func_name=fun.__name__, globals=fun.func_globals) 72 | fv.visit(function_to_ast(fun)) 73 | if len(fv.problems): 74 | for problem in set(fv.problems): 75 | print('function %s: %s' % (fun.__name__, problem)) 76 | return False 77 | return True 78 | 79 | def functional(fun): 80 | ''' a wrapper that will stop execution if a function 81 | is not strictly functional''' 82 | if not is_functional(fun): 83 | quit('The function "%s" is not strictly functional.' % fun.__name__) 84 | ast_code = ast.fix_missing_locations(parallelize(fun)) 85 | code = compile(ast_code, '', 'exec') 86 | exec code in fun.func_globals 87 | responses = {} 88 | fun = fun.func_globals[fun.__name__] 89 | __rm__ = ResultsManager() 90 | fun.func_globals['__rm__'] = __rm__ 91 | def memoized_fun(*args): 92 | if args in responses: 93 | return responses[args] 94 | responses[args] = fun(*args) 95 | return responses[args] 96 | memoized_fun.__name__ = fun.__name__ 97 | memoized_fun.__doc__ = fun.__doc__ 98 | return memoized_fun 99 | 100 | import multiprocessing 101 | import multiprocessing.pool 102 | 103 | class ResultsManager(object): 104 | def __init__(self): 105 | self.results = {} 106 | self.pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() + 1) 107 | 108 | def __getitem__(self, item): 109 | if isinstance(self.results[item], multiprocessing.pool.ApplyResult): 110 | self.results[item] = self.results[item].get() 111 | return self.results[item] 112 | 113 | def __setitem__(self, item, value): 114 | self.results[item] = value 115 | 116 | def run(self, function, args): 117 | return self.pool.apply_async(function, args) 118 | 119 | def reset(self): 120 | self.results = {} 121 | 122 | class ParallelizingTransformer(ast.NodeTransformer): 123 | def __init__(self): 124 | self.seen_variables = {} 125 | super(ast.NodeTransformer, self).__init__() 126 | 127 | def visit_FunctionDef(self, node): 128 | new_node = copy(node) 129 | new_node.body = [] 130 | new_node.body.append(ast.Expr(value=ast.Call(func=ast.Attribute(value=ast.Name(id='__rm__', ctx=ast.Load()), attr='reset', ctx=ast.Load()), args=[], keywords=[], starargs=None, kwargs=None))) 131 | for item in node.body: 132 | new_node.body.append(self.visit(item)) 133 | return new_node 134 | 135 | def visit_Assign(self, node): 136 | # we only want to perform parallelization under certain conditions 137 | if isinstance(node.targets[0], ast.Name) and isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Name): 138 | 139 | original_target = node.targets[0].id 140 | self.seen_variables[original_target] = True 141 | original_function = node.value.func.id 142 | original_args = node.value.args 143 | new_node = copy(node) 144 | new_node.targets = [ast.Subscript(value=ast.Name(id='__rm__', ctx=ast.Load()), slice=ast.Index(value=ast.Str(s=original_target)), ctx=ast.Store())] 145 | new_node.value = ast.Call(func=ast.Attribute(value=ast.Name(id='__rm__', ctx=ast.Load()), attr='run', ctx=ast.Load()), args=[ast.Name(id=node.value.func.id, ctx=ast.Load()), ast.List(elts=[], ctx=ast.Load())], keywords=[], starargs=None, kwargs=None) 146 | return ast.copy_location(new_node, node) 147 | 148 | def visit_Name(self, node): 149 | if node.id in self.seen_variables: 150 | node = ast.Subscript(value=ast.Name(id='__rm__', ctx=ast.Load()), slice=ast.Index(value=ast.Str(s=node.id)), ctx=ast.Load()) 151 | return node 152 | 153 | def parallelize(fun): 154 | pt = ParallelizingTransformer() 155 | return pt.visit(function_to_ast(fun)) 156 | 157 | from time import sleep 158 | 159 | def x(): 160 | sleep(2) 161 | return 10 162 | 163 | def y(): 164 | sleep(2) 165 | return 20 166 | 167 | def z(): 168 | a = x() 169 | b = y() 170 | return a + b 171 | 172 | z = functional(z) 173 | 174 | if __name__ == '__main__': 175 | print z() 176 | -------------------------------------------------------------------------------- /test/test_pfpp.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.realpath(__file__ + '/../../')) 3 | from pfpp import is_functional, functional, parallelize, ast, function_to_ast 4 | 5 | def uses_globals(): 6 | global a 7 | 8 | def calling_a_method(): 9 | awesome.callmethod() 10 | 11 | def double_assign(): 12 | awesome = 1 13 | awesome = 2 14 | 15 | def some_function(): 16 | pass 17 | 18 | def calling_a_function(): 19 | some_function() 20 | 21 | def subscript_assignment(): 22 | lines = [] 23 | lines[0] = 1 24 | x = [] 25 | x[:] = [1,2,3] 26 | 27 | def tuple_assignment(): 28 | x = 0 29 | y = 0 30 | x, y = 1, 1 31 | 32 | def not_functional(): 33 | a = 10 34 | a = 20 35 | 36 | def print_is_a_side_effect(): 37 | print('I produce side effects') 38 | 39 | def calls_a_non_functional_function(): 40 | print_is_a_side_effect() 41 | 42 | def assigns_to_a_non_functional_function(): 43 | x = print_is_a_side_effect() 44 | 45 | def check(fun, expected): 46 | assert is_functional(fun) == expected, '%s %s supposed to be functional' % \ 47 | (fun.__name__, expected and 'was' or 'was NOT') 48 | 49 | def test_is_functional(): 50 | yield check, calling_a_method, False 51 | yield check, double_assign, False 52 | yield check, calling_a_function, True 53 | yield check, subscript_assignment, False 54 | yield check, tuple_assignment, False 55 | yield check, calls_a_non_functional_function, False 56 | yield check, print_is_a_side_effect, False 57 | yield check, assigns_to_a_non_functional_function, False 58 | yield check, uses_globals, False 59 | 60 | def the_simplest_function(): 61 | return 10 62 | 63 | def pre_simple_parallelization(): 64 | x = the_simplest_function() 65 | 66 | def simple_parallelization(): 67 | __rm__.reset() 68 | __rm__['x'] = __rm__.run(the_simplest_function, []) 69 | 70 | def pre_retrieve_results(): 71 | x = the_simplest_function() 72 | return x 73 | 74 | def retrieve_results(): 75 | __rm__.reset() 76 | __rm__['x'] = __rm__.run(the_simplest_function, []) 77 | return __rm__['x'] 78 | 79 | def pre_several_results(): 80 | x = the_simplest_function() 81 | y = the_simplest_function() 82 | return x + y 83 | 84 | def several_results(): 85 | __rm__.reset() 86 | __rm__['x'] = __rm__.run(the_simplest_function, []) 87 | __rm__['y'] = __rm__.run(the_simplest_function, []) 88 | return __rm__['x'] + __rm__['y'] 89 | 90 | def ast_dump_scrub(node): 91 | import re 92 | d = ast.dump(node) 93 | return re.sub("FunctionDef\(name='[^']*'", '', d) 94 | 95 | def test_parallelization(): 96 | print ast_dump_scrub(parallelize(pre_retrieve_results)) 97 | print ast_dump_scrub(function_to_ast(retrieve_results)) 98 | assert ast_dump_scrub(parallelize(pre_simple_parallelization))== \ 99 | ast_dump_scrub(function_to_ast(simple_parallelization)) 100 | assert ast_dump_scrub(parallelize(pre_retrieve_results))== \ 101 | ast_dump_scrub(function_to_ast(retrieve_results)) 102 | assert ast_dump_scrub(parallelize(pre_several_results))== \ 103 | ast_dump_scrub(function_to_ast(several_results)) 104 | --------------------------------------------------------------------------------