├── README.md
├── gevent_profiler
    └── __init__.py
├── setup.py
└── tests
    ├── attach.py
    └── attach_on_signal.py


/README.md:
--------------------------------------------------------------------------------
 1 | # gevent-profiler
 2 | 
 3 | This module provides a simple way to get detailed profiling information
 4 | about a Python process that uses the `gevent` library.  The normal Python
 5 | profilers are not nearly as useful in this context, due to `gevent`'s
 6 | greenlet threading model.
 7 | 
 8 | ## Installation
 9 | 
10 | ```bash
11 | $ sudo python setup.py install
12 | ```
13 | 
14 | ## Usage
15 | 
16 | To generate profiling information for a single function call:
17 | 
18 | ```python
19 | from gevent import monkey
20 | monkey.patch_all()
21 | import gevent_profiler
22 | 
23 | def my_func(a, b, c):
24 |     print a, b, c
25 | 
26 | gevent_profiler.profile(my_func, 1, 2, c=3)
27 | ```
28 | 
29 | To generate profiling information for an arbitrary section of code:
30 | 
31 | ```python
32 | from gevent import monkey
33 | monkey.patch_all()
34 | import gevent_profiler
35 | 
36 | gevent_profiler.attach()
37 | for x in range(42):
38 |     print pow(x, 2)
39 | gevent_profiler.detach()
40 | ```
41 | 
42 | To start generating profiling information when a specific signal is received,
43 | and to stop after a set amount of time has elapsed:
44 | 
45 | ```python
46 | from gevent import monkey
47 | monkey.patch_all()
48 | import gevent_profiler
49 | 
50 | gevent_profiler.attach_on_signal(signum=signal.SIGUSR1, duration=60)
51 | 
52 | x = 2
53 | while True:
54 |     print pow(x, 50000)
55 | ```
56 | 
57 | To profile a Python app from the command line:
58 | 
59 | ```bash
60 | $ python gevent_profiler/__init__.py --help
61 | $ python gevent_profiler/__init__.py my_app.py
62 | ```
63 | 
64 | ## Options
65 | 
66 | Set the filename for the stats file.  Defaults to `sys.stdout`.  May be set to `None` to disable.
67 | 
68 | ```python
69 | gevent_profiler.set_stats_output('my-stats.txt')
70 | ```
71 | 
72 | Set the filename for the summary file.  Defaults to `sys.stdout`.  May be set to `None` to disable.
73 | 
74 | ```python
75 | gevent_profiler.set_summary_output('my-summary.txt')
76 | ```
77 | 
78 | Set the filename for the trace file.  Defaults to `sys.stdout`.  May be set to `None` to disable.
79 | 
80 | ```python
81 | gevent_profiler.set_trace_output('my-trace.txt')
82 | ```
83 | 
84 | Print runtime statistics as percentages of total runtime rather than absolute measurements in seconds:
85 | 
86 | ```python
87 | gevent_profiler.print_percentages(True)
88 | ```
89 | 
90 | Count time blocking on IO towards the execution totals for each function:
91 | 
92 | ```python
93 | gevent_profiler.time_blocking(True)
94 | ```
95 | 


--------------------------------------------------------------------------------
/gevent_profiler/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Profiler for gevent
  3 | """
  4 | import os
  5 | import sys
  6 | import time
  7 | import gevent
  8 | import signal
  9 | import inspect
 10 | 
 11 | _gls = {}
 12 | _curr_gl = None
 13 | _states = {}
 14 | _curr_states = {}
 15 | 
 16 | _stats_output_file = sys.stdout
 17 | _summary_output_file = sys.stdout
 18 | _trace_output_file = sys.stdout
 19 | 
 20 | _print_percentages = False
 21 | _time_blocking = False
 22 | 
 23 | _attach_expiration = None
 24 | 
 25 | _trace_began_at = None
 26 | 
 27 | class _State:
 28 | 	def __init__(self):
 29 | 		self.modulename = None
 30 | 		self.co_name = None
 31 | 		self.filename = None
 32 | 		self.line_no = None
 33 | 		self.start_time = None
 34 | 		self.full_class = None
 35 | 		self.elapsed = 0.0
 36 | 		self.depth = 0
 37 | 		self.calls = []
 38 | 		self.parent = None
 39 | 	def __str__(self):
 40 | 		first = self.modulename
 41 | 		if self.full_class:
 42 | 			true_class = self.full_class
 43 | 			# use inspect to find the method's true class
 44 | 			for cls in inspect.getmro(self.full_class):
 45 | 				if self.co_name in cls.__dict__:
 46 | 					fnc = cls.__dict__[self.co_name]
 47 | 					if hasattr(fnc, "func_code") and fnc.func_code.co_filename == self.filename and fnc.func_code.co_firstlineno == self.line_no:
 48 | 						true_class = cls
 49 | 			first = "%s.%s" % (true_class.__module__, true_class.__name__)
 50 | 		return "%s.%s" % (first, self.co_name)
 51 | 
 52 | def _modname(path):
 53 |     """Return a plausible module name for the path."""
 54 | 
 55 |     base = os.path.basename(path)
 56 |     filename, ext = os.path.splitext(base)
 57 |     return filename
 58 | 
 59 | def _globaltrace(frame, event, arg):
 60 | 	global _curr_gl
 61 | 
 62 | 	if _attach_expiration is not None and time.time() > _attach_expiration:
 63 | 		detach()
 64 | 		return
 65 | 
 66 | 	gl = gevent.greenlet.getcurrent()
 67 | 	if gl not in _states:
 68 | 		_states[gl] = _State()
 69 | 		_curr_states[gl] = _states[gl]
 70 | 
 71 | 	if _curr_gl is not gl:
 72 | 		if _curr_gl is not None:
 73 | 			_stop_timing(_curr_gl)
 74 | 		_curr_gl = gl
 75 | 		_start_timing(_curr_gl)
 76 | 
 77 | 	code = frame.f_code
 78 | 	filename = code.co_filename
 79 | 	if filename:
 80 | 		modulename = _modname(filename)
 81 | 		if modulename is not None:
 82 | 			_print_trace("[%s] call: %s: %s\n" % (gl, modulename, code.co_name))
 83 | 	state = _State()
 84 | 	_curr_states[gl].calls.append(state)
 85 | 	state.parent = _curr_states[gl]
 86 | 	_curr_states[gl] = state
 87 | 
 88 | 	state.modulename = modulename
 89 | 	state.filename = filename
 90 | 	state.line_no = code.co_firstlineno
 91 | 	state.co_name = code.co_name
 92 | 	state.start_time = time.time()
 93 | 	if 'self' in frame.f_locals:
 94 | 		state.full_class = type(frame.f_locals['self'])
 95 | 
 96 | 	tracefunc = _getlocaltrace(state)
 97 | 	state.localtracefunc = tracefunc
 98 | 
 99 | 	if modulename == 'hub' and code.co_name == 'switch' and not _time_blocking:
100 | 		_stop_timing(gl)
101 | 
102 | 	return tracefunc
103 | 
104 | def _getlocaltrace(state):
105 | 	def _localtrace(frame, event, arg):
106 | 		if _attach_expiration is not None and time.time() > _attach_expiration:
107 | 			detach()
108 | 			return
109 | 		
110 | 		if event == 'return':
111 | 			gl = gevent.greenlet.getcurrent()
112 | 			code = frame.f_code
113 | 			filename = code.co_filename
114 | 			modulename = None
115 | 			if filename:
116 | 				modulename = _modname(filename)
117 | 			if modulename is not None:
118 | 				_print_trace("[%s] return: %s: %s: %s\n" % (gl, modulename, code.co_name, code.co_firstlineno))
119 | 			if state.start_time is not None:
120 | 				state.elapsed += time.time() - state.start_time
121 | 			assert _curr_states[gl].parent is not None
122 | 			_curr_states[gl] = _curr_states[gl].parent
123 | 			return None
124 | 		
125 | 		return state.localtracefunc
126 | 	return _localtrace
127 | 
128 | def _stop_timing(gl):
129 | 
130 | 	def _stop_timing_r(state):
131 | 		if state.start_time is not None:
132 | 			state.elapsed += time.time() - state.start_time
133 | 			state.start_time = None
134 | 		if state.parent is not None:
135 | 			_stop_timing_r(state.parent)
136 | 
137 | 	if gl not in _curr_states:
138 | 		#if we're reattaching later, it's possible to call stop_timing
139 | 		#without a full set of current state
140 | 		return
141 | 	curr_state = _curr_states[gl]
142 | 	_stop_timing_r(curr_state)
143 | 
144 | def _start_timing(gl):
145 | 
146 | 	def _start_timing_r(state):
147 | 		state.start_time = time.time()
148 | 		if state.parent is not None:
149 | 			_start_timing_r(state.parent)
150 | 
151 | 	if gl not in _curr_states:
152 | 		#if we're reattaching later, it's possible to call start_timing
153 | 		#without a full set of current state
154 | 		return
155 | 	curr_state = _curr_states[gl]
156 | 	_start_timing_r(curr_state)
157 | 
158 | class _CallSummary:
159 | 	def __init__(self, name):
160 | 		self.name = name
161 | 		self.cumulative = 0.0
162 | 		self.count = 0
163 | 		self.own_cumulative = 0.0
164 | 		self.children_cumulative = 0.0
165 | 
166 | def _sum_calls(state, call_summaries):
167 | 	key = str(state)
168 | 	if key in call_summaries:
169 | 		call = call_summaries[key]
170 | 	else:
171 | 		call = _CallSummary(key)
172 | 		call_summaries[key] = call
173 | 
174 | 	call.count += 1
175 | 
176 | 	child_exec_time = 0.0
177 | 	for child in state.calls:
178 | 		child_exec_time += _sum_calls(child, call_summaries)
179 | 	
180 | 	call.cumulative += state.elapsed
181 | 	call.own_cumulative += state.elapsed - child_exec_time
182 | 	call.children_cumulative += child_exec_time
183 | 	return state.elapsed
184 | 
185 | def _maybe_open_file(f):
186 | 	if f is None:
187 | 		return None
188 | 	else:
189 | 		return open(f, 'w')
190 | 
191 | def _maybe_write(output_file, message):
192 | 	if output_file is not None:
193 | 		output_file.write(message)
194 | 
195 | def _maybe_flush(f):
196 | 	if f is not None:
197 | 		f.flush()
198 | 
199 | def _print_trace(msg):
200 | 	_maybe_write(_trace_output_file, msg)
201 | 
202 | def _print_stats_header(header):
203 | 	_maybe_write(_stats_output_file, "%40s %5s %12s %12s %12s\n" % header)
204 | 	_maybe_write(_stats_output_file, "="*86 + "\n")
205 | 
206 | def _print_stats(stats):
207 | 	_maybe_write(_stats_output_file, "%40s %5d %12f %12f %12f\n" % stats)
208 | 
209 | def _print_state(state, depth=0):
210 | 	_maybe_write(_summary_output_file, "%s %s %f\n" % ("."*depth, str(state), state.elapsed))
211 | 	for call in state.calls:
212 | 		_print_state(call, depth+2)
213 | 
214 | def _print_output(duration):
215 | 	call_summaries = {}
216 | 	for gl in _states.keys():
217 | 		_sum_calls(_states[gl], call_summaries)
218 | 
219 | 	call_list = []
220 | 	for name in call_summaries:
221 | 		cs = call_summaries[name]
222 | 		call_list.append( (cs.cumulative, cs) )
223 | 	call_list.sort(reverse=True)
224 | 
225 | 	output = []
226 | 
227 | 	col_names = ["Call Name", "Count", "Cumulative", "Own Cumul", "Child Cumul", "Per Call", "Own/Total"]
228 | 
229 | 	output.append(col_names)
230 | 
231 | 	for _,c in call_list:
232 | 		cumulative = c.cumulative
233 | 		own_cumulative = c.own_cumulative
234 | 		children_cumulative = c.children_cumulative
235 | 		per_call = cumulative / c.count
236 | 		if cumulative == 0:
237 | 			own_ratio = "inf"
238 | 		else:
239 | 			own_ratio = "%6.2f" % (own_cumulative / cumulative * 100)
240 | 
241 | 		col_data = [c.name, "%d" % c.count, "%12f" % cumulative, "%12f" % own_cumulative, "%12f" % children_cumulative, "%12f" % per_call, own_ratio]
242 | 		if _print_percentages:
243 | 			col_data[2] += " (%6.2f)" % (cumulative * 100 / duration)
244 | 			col_data[3] += " (%6.2f)" % (own_cumulative * 100 / duration)
245 | 			col_data[4] += " (%6.2f)" % (children_cumulative * 100 / duration)
246 | 
247 | 		output.append(col_data)
248 | 
249 | 	# max widths
250 | 	widths = [max([len(row[x]) for row in output]) for x in xrange(len(output[0]))]
251 | 	# build row strings
252 | 	fmt_out = [" ".join([x.ljust(widths[i]) for i, x in enumerate(row)]) for row in output]
253 | 	# insert col separation row
254 | 	fmt_out.insert(1, " ".join([''.ljust(widths[i], '=') for i in xrange(len(widths))]))
255 | 	# write them!
256 | 	map(lambda x: _maybe_write(_stats_output_file, "%s\n" % x), fmt_out)
257 | 
258 | 	_maybe_flush(_stats_output_file)
259 | 
260 | 	for gl in _states.keys():
261 | 		_maybe_write(_summary_output_file, "%s\n" % gl)
262 | 		_print_state(_states[gl])
263 | 		_maybe_write(_summary_output_file, "\n")
264 | 	_maybe_flush(_summary_output_file)
265 | 
266 | def attach(duration=0):
267 | 	"""
268 | 	Start execution tracing
269 | 	Tracing will stop automatically in 'duration' seconds.  If duration is zero, the 
270 | 	trace won't stop until detach is called.
271 | 	"""
272 | 	global _attach_expiration
273 | 	global _trace_began_at
274 | 	if _attach_expiration is not None:
275 | 		# already attached
276 | 		return
277 | 	now = time.time()
278 | 	if duration != 0:
279 | 		_attach_expiration = now + duration
280 | 	_trace_began_at = now
281 | 	sys.settrace(_globaltrace)
282 | 
283 | def detach():
284 | 	"""
285 | 	Finish execution tracing, print the results and reset internal state
286 | 	"""
287 | 	global _gls
288 | 	global current_gl
289 | 	global _states
290 | 	global _curr_states
291 | 	global _attach_expiration
292 | 	global _trace_began_at
293 | 
294 | 	# do we have a current trace?
295 | 	if not _trace_began_at:
296 | 		return
297 | 
298 | 	duration = time.time() - _trace_began_at
299 | 	_attach_expiration = None
300 | 	sys.settrace(None)
301 | 	_maybe_flush(_trace_output_file)
302 | 	_print_output(duration)
303 | 	_gls = {}
304 | 	_curr_gl = None
305 | 	_states = {}
306 | 	_curr_states = {}
307 | 	_trace_began_at = None
308 | 	curr_state = None
309 | 
310 | def profile(func, *args, **kwargs):
311 | 	"""
312 | 	Takes a function and the arguments to pass to that function and runs it
313 | 	with profiling enabled.  On completion of that function, the profiling 
314 | 	results are printed.  The return value of the profiled method is then
315 | 	returned.
316 | 	"""
317 | 	sys.settrace(_globaltrace)
318 | 	trace_began_at = time.time()
319 | 	retval = func(*args, **kwargs)
320 | 	sys.settrace(None)
321 | 	_maybe_flush(_trace_output_file)
322 | 	_print_output(time.time() - trace_began_at)
323 | 
324 | 	return retval
325 | 
326 | def set_stats_output(f):
327 | 	"""
328 | 	Takes a filename and will write the call timing statistics there
329 | 	"""
330 | 	global _stats_output_file
331 | 	_stats_output_file = _maybe_open_file(f)
332 | 
333 | def set_summary_output(f):
334 | 	"""
335 | 	Takes a filename and will write the execution summary there
336 | 	"""
337 | 	global _summary_output_file
338 | 	_summary_output_file = _maybe_open_file(f)
339 | 
340 | def set_trace_output(f):
341 | 	"""
342 | 	Takes a filename and writes the execution trace information there
343 | 	"""
344 | 	global _trace_output_file
345 | 	_trace_output_file = _maybe_open_file(f)
346 | 
347 | def print_percentages(enabled=True):
348 | 	"""
349 | 	Pass True if you want statistics to be output as percentages of total
350 | 	run time instead of absolute measurements.
351 | 	"""
352 | 	global _print_percentages
353 | 	_print_percentages = enabled
354 | 
355 | def time_blocking(enabled=True):
356 | 	"""
357 | 	Pass True if you want to count time blocking on IO towards the execution
358 | 	totals for each function.  The default setting for this is False, which
359 | 	is probably what you're looking for in most cases.
360 | 	"""
361 | 	global _time_blocking
362 | 	_time_blocking = enabled
363 | 
364 | def set_attach_duration(attach_duration=60):
365 | 	"""
366 | 	Set the duration that attach/detach are allowed to operate for.
367 | 	Will automatically detach after that time if any profile call is made.
368 | 	By default this time period is 60 seconds. Set to 0 to disable.
369 | 	"""
370 | 	global _attach_duration
371 | 	_attach_duration = attach_duration
372 | 
373 | def attach_on_signal(signum=signal.SIGUSR1, duration=60):
374 | 	"""
375 | 	Sets up signal handlers so that, upon receiving the specified signal,
376 | 	the process starts outputting a full execution trace.  At the expiration
377 | 	of the specified duration, a summary of all the greenlet activity during
378 | 	that period is output.
379 | 	See set_summary_output and set_trace_output for information about how
380 | 	to configure where the output goes.
381 | 	By default, the signal is SIGUSR1.
382 | 	"""
383 | 	new_handler = lambda signum, frame: attach(duration=duration)
384 | 	signal.signal(signum, new_handler)
385 | 
386 | if __name__ == "__main__":
387 | 	from optparse import OptionParser
388 | 	parser = OptionParser()
389 | 	parser.add_option("-a", "--stats", dest="stats",
390 | 			help="write the stats to a file",
391 | 			metavar="STATS_FILE")
392 | 	parser.add_option("-s", "--summary", dest="summary",
393 | 			help="write the summary to a file",
394 | 			metavar="SUMMARY_FILE")
395 | 	parser.add_option("-t", "--trace", dest="trace",
396 | 			help="write the trace to a file",
397 | 			metavar="TRACE_FILE")
398 | 	parser.add_option("-p", "--percentages", dest="percentages",
399 | 			action='store_false',
400 | 			help="print stats as percentages of total runtime")
401 | 	parser.add_option("-b", "--blocking", dest="blocking",
402 | 			action='store_false',
403 | 			help="count blocked time toward execution totals")
404 | 	(options, args) = parser.parse_args()
405 | 	if options.stats is not None:
406 | 		set_stats_output(options.stats)
407 | 	if options.summary is not None:
408 | 		set_summary_output(options.summary)
409 | 	if options.trace is not None:
410 | 		set_trace_output(options.trace)
411 | 	if options.percentages is not None:
412 | 		print_percentages()
413 | 	if options.blocking is not None:
414 | 		time_blocking()
415 | 	if len(args) < 1:
416 | 		print "what file should i be profiling?"
417 | 		sys.exit(1)
418 | 	file = args[0]
419 | 
420 | 	trace_began_at = time.time()
421 | 	sys.settrace(_globaltrace)
422 | 	execfile(file)
423 | 	sys.settrace(None)
424 | 	_print_output(time.time() - trace_began_at)
425 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | 
 3 | long_description = """Gevent profiler"""
 4 | 
 5 | py_modules = [
 6 | 		"gevent_profiler.__init__"
 7 | 		#"gevent_profiler.gevent_profiler"
 8 | 		]
 9 | 
10 | setup(name = 'python-gevent-profiler',
11 | 	  version='0.2',
12 | 	  description='profiling utilities for gevent',
13 | 	  long_description = long_description,
14 | 	  author='meebo',
15 | 	  author_email='server@meebo.com',
16 | 	  url='http://random.meebo.com',
17 | 	  py_modules = py_modules
18 | )
19 | 
20 | 


--------------------------------------------------------------------------------
/tests/attach.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | 
 4 | sys.path = ['.'] + sys.path
 5 | 
 6 | import time
 7 | import gevent
 8 | import gevent_profiler
 9 | from gevent import monkey
10 | 
11 | def eat_up_cpu():
12 | 	for x in range(100):
13 | 		for y in range(100):
14 | 			z = x * y
15 | 
16 | def eat_up_some_more_cpu():
17 | 	for x in range(100):
18 | 		for y in range(100):
19 | 			z = x * y
20 | 
21 | def task():
22 | 	time.sleep(3)
23 | 	eat_up_cpu()
24 | 	eat_up_some_more_cpu()
25 | 	print "hi!"
26 | 
27 | def main():
28 | 	monkey.patch_all()
29 | 
30 | 	tasks = []
31 | 
32 | 	gevent_profiler.attach()
33 | 	g = gevent.spawn(eat_up_cpu)
34 | 	g.join()
35 | 	gevent_profiler.detach()
36 | 
37 | if __name__ == "__main__":
38 | 	main()
39 | 
40 | 


--------------------------------------------------------------------------------
/tests/attach_on_signal.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path = ['.'] + sys.path
 4 | 
 5 | import time
 6 | import gevent
 7 | import gevent_profiler
 8 | from gevent import monkey
 9 | 
10 | def eat_up_cpu():
11 | 	for x in range(100):
12 | 		for y in range(100):
13 | 			z = x * y
14 | 
15 | def eat_up_some_more_cpu():
16 | 	for x in range(100):
17 | 		for y in range(100):
18 | 			z = x * y
19 | 
20 | def task():
21 | 	time.sleep(3)
22 | 	eat_up_cpu()
23 | 	eat_up_some_more_cpu()
24 | 	print "hi!"
25 | 
26 | def main():
27 | 	monkey.patch_all()
28 | 
29 | 	tasks = []
30 | 
31 | 	gevent_profiler.attach_on_signal(duration=5)
32 | 
33 | 	while True:
34 | 		for x in range(3):
35 | 			y = gevent.spawn(task)
36 | 			tasks.append(y)
37 | 		
38 | 		gevent.joinall(tasks)
39 | 
40 | if __name__ == "__main__":
41 | 	main()
42 | 
43 | 


--------------------------------------------------------------------------------