├── tests ├── profilers │ ├── __init__.py │ ├── cpu_profiler_test.py │ ├── allocation_profiler_test.py │ └── block_profiler_test.py ├── reporters │ ├── __init__.py │ ├── profile_reporter_test.py │ ├── span_reporter_test.py │ ├── error_reporter_test.py │ └── process_reporter_test.py ├── config_test.py ├── config_loader_test.py ├── frame_cache_test.py ├── runtime_test.py ├── api_request_test.py ├── message_queue_test.py ├── test_server.py ├── metric_test.py └── agent_test.py ├── stackimpact ├── profilers │ ├── __init__.py │ ├── cpu_profiler.py │ ├── allocation_profiler.py │ └── block_profiler.py ├── reporters │ ├── __init__.py │ ├── span_reporter.py │ ├── process_reporter.py │ ├── error_reporter.py │ └── profile_reporter.py ├── __init__.py ├── utils.py ├── config.py ├── frame.py ├── frame_cache.py ├── config_loader.py ├── message_queue.py ├── api_request.py ├── runtime.py ├── metric.py └── agent.py ├── .gitignore ├── publish.sh ├── examples ├── focused │ └── app.py ├── manual │ └── app.py ├── aws-lambda │ └── aws_lambda.py └── demo │ └── flask_app.py ├── LICENSE.txt ├── setup.py ├── README.md └── README.rst /tests/profilers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/reporters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stackimpact/profilers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stackimpact/reporters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/reporters/profile_reporter_test.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | ._ 3 | ._.* 4 | conf 5 | *.pyc 6 | build 7 | .bin 8 | log/* 9 | logs/* 10 | !.gitkeep 11 | tmp 12 | .tmp 13 | dist 14 | stackimpact.egg-info 15 | __pycache__ 16 | venv* 17 | env/ 18 | -------------------------------------------------------------------------------- /stackimpact/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | from .agent import Agent 4 | 5 | _agent = None 6 | 7 | def start(**kwargs): 8 | global _agent 9 | 10 | if not _agent: 11 | _agent = Agent() 12 | 13 | _agent.start(**kwargs) 14 | return _agent 15 | -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | pandoc --from=markdown --to=rst --output=README.rst 'README.md' 6 | 7 | python3 -m unittest discover -v -s tests -p *_test.py 8 | 9 | rm -f dist/*.tar.gz 10 | python setup.py sdist 11 | 12 | for bundle in dist/*.tar.gz; do 13 | echo "Publishing $bundle..." 14 | twine upload $bundle 15 | done 16 | -------------------------------------------------------------------------------- /stackimpact/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import uuid 4 | import base64 5 | import hashlib 6 | 7 | 8 | 9 | def millis(): 10 | return int(round(time.time() * 1000)) 11 | 12 | 13 | def timestamp(): 14 | return int(time.time()) 15 | 16 | 17 | def base64_encode(s): 18 | return base64.b64encode(s.encode('utf-8')).decode('utf-8') 19 | 20 | 21 | def base64_decode(b): 22 | return base64.b64decode(b).decode('utf-8') 23 | 24 | 25 | def generate_uuid(): 26 | return str(uuid.uuid4()) 27 | 28 | 29 | def generate_sha1(text): 30 | sha1_hash = hashlib.sha1() 31 | sha1_hash.update(text.encode('utf-8')) 32 | return sha1_hash.hexdigest() 33 | 34 | -------------------------------------------------------------------------------- /tests/config_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | 4 | import stackimpact 5 | 6 | 7 | class ConfigTestCase(unittest.TestCase): 8 | 9 | def test_set_get_props(self): 10 | stackimpact._agent = None 11 | agent = stackimpact.start( 12 | dashboard_address = 'http://localhost:5001', 13 | agent_key = 'key1', 14 | app_name = 'TestPythonApp', 15 | debug = True 16 | ) 17 | 18 | self.assertFalse(agent.config.is_profiling_disabled()) 19 | agent.config.set_profiling_disabled(True) 20 | self.assertTrue(agent.config.is_profiling_disabled()) 21 | 22 | agent.destroy() 23 | 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /examples/focused/app.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import random 3 | import time 4 | import sys 5 | import threading 6 | sys.path.append(".") 7 | import stackimpact 8 | 9 | agent = stackimpact.start( 10 | agent_key = 'agent key here', 11 | app_name = 'MyPythonApp') 12 | 13 | 14 | def simulate_cpu_work(): 15 | for j in range(0, 100000): 16 | random.randint(1, 1000000) 17 | 18 | 19 | def handle_some_event(): 20 | span = agent.profile('some event') 21 | 22 | simulate_cpu_work() 23 | 24 | span.stop() 25 | 26 | response = { 27 | "statusCode": 200, 28 | "body": 'Done' 29 | } 30 | 31 | return response 32 | 33 | 34 | # Simulate events 35 | while True: 36 | handle_some_event() 37 | time.sleep(2) 38 | 39 | -------------------------------------------------------------------------------- /stackimpact/config.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | class Config(object): 4 | def __init__(self, agent): 5 | self.agent = agent 6 | self.agent_enabled = False 7 | self.profiling_disabled = False 8 | self.config_lock = threading.Lock() 9 | 10 | 11 | def set_agent_enabled(self, val): 12 | with self.config_lock: 13 | self.agent_enabled = val 14 | 15 | def is_agent_enabled(self): 16 | with self.config_lock: 17 | val = self.agent_enabled 18 | return val 19 | 20 | def set_profiling_disabled(self, val): 21 | with self.config_lock: 22 | self.profiling_disabled = val 23 | 24 | def is_profiling_disabled(self): 25 | with self.config_lock: 26 | val = self.profiling_disabled 27 | return val 28 | 29 | -------------------------------------------------------------------------------- /tests/config_loader_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import json 4 | 5 | import stackimpact 6 | from test_server import TestServer 7 | 8 | 9 | 10 | class ConfigLoaderTest(unittest.TestCase): 11 | 12 | def test_load(self): 13 | server = TestServer(5008) 14 | server.set_response_data('{"profiling_disabled":"yes"}') 15 | server.start() 16 | 17 | stackimpact._agent = None 18 | agent = stackimpact.start( 19 | dashboard_address = 'http://localhost:5008', 20 | agent_key = 'key1', 21 | app_name = 'TestPythonApp', 22 | debug = True 23 | ) 24 | 25 | agent.config_loader.load() 26 | 27 | self.assertTrue(agent.config.is_profiling_disabled()) 28 | 29 | agent.destroy() 30 | server.join() 31 | 32 | if __name__ == '__main__': 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /tests/frame_cache_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import threading 4 | import os 5 | 6 | import stackimpact 7 | 8 | 9 | class FrameCacheTestCase(unittest.TestCase): 10 | 11 | def test_skip_stack(self): 12 | stackimpact._agent = None 13 | agent = stackimpact.start( 14 | dashboard_address = 'http://localhost:5001', 15 | agent_key = 'key1', 16 | app_name = 'TestPythonApp', 17 | debug = True 18 | ) 19 | 20 | test_agent_file = os.path.realpath(stackimpact.__file__) 21 | self.assertTrue(agent.frame_cache.is_agent_frame(test_agent_file)) 22 | 23 | test_system_file = os.path.realpath(threading.__file__) 24 | self.assertTrue(agent.frame_cache.is_system_frame(test_system_file)) 25 | 26 | agent.destroy() 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /examples/manual/app.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import random 3 | import time 4 | import sys 5 | import threading 6 | sys.path.append(".") 7 | import stackimpact 8 | 9 | agent = stackimpact.start( 10 | agent_key = 'agent key here', 11 | app_name = 'MyPythonApp', 12 | auto_profiling = False) 13 | 14 | 15 | agent.start_cpu_profiler() 16 | 17 | for j in range(0, 1000000): 18 | random.randint(1, 1000000) 19 | 20 | agent.stop_cpu_profiler() 21 | 22 | 23 | ''' 24 | agent.start_allocation_profiler() 25 | 26 | mem1 = [] 27 | for i in range(0, 1000): 28 | obj1 = {'v': random.randint(0, 1000000)} 29 | mem1.append(obj1) 30 | 31 | agent.stop_allocation_profiler() 32 | ''' 33 | 34 | 35 | ''' 36 | agent.start_block_profiler() 37 | 38 | def blocking_call(): 39 | time.sleep(0.1) 40 | 41 | for i in range(5): 42 | blocking_call() 43 | 44 | agent.stop_block_profiler() 45 | ''' -------------------------------------------------------------------------------- /examples/aws-lambda/aws_lambda.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import stackimpact 3 | import random 4 | import threading 5 | import time 6 | import signal 7 | 8 | agent = stackimpact.start( 9 | agent_key = 'agent key here', 10 | app_name = 'LambdaDemoPython', 11 | app_environment = 'prod', 12 | block_profiler_disabled = True) 13 | 14 | 15 | def simulate_cpu_work(): 16 | for j in range(0, 100000): 17 | random.randint(1, 1000000) 18 | 19 | mem = [] 20 | def simulate_mem_leak(): 21 | for i in range(0, 1000): 22 | obj = {'v': random.randint(0, 1000000)} 23 | mem.append(obj) 24 | 25 | def handler(event, context): 26 | span = agent.profile() 27 | 28 | simulate_cpu_work() 29 | simulate_mem_leak() 30 | 31 | span.stop() 32 | 33 | response = { 34 | "statusCode": 200, 35 | "body": 'Done' 36 | } 37 | 38 | return response 39 | 40 | 41 | -------------------------------------------------------------------------------- /tests/reporters/span_reporter_test.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import unittest 4 | import random 5 | import threading 6 | import sys 7 | import traceback 8 | 9 | import stackimpact 10 | from stackimpact.runtime import min_version 11 | 12 | 13 | class SpanReporterTestCase(unittest.TestCase): 14 | 15 | def test_record_span(self): 16 | stackimpact._agent = None 17 | agent = stackimpact.start( 18 | dashboard_address = 'http://localhost:5001', 19 | agent_key = 'key1', 20 | app_name = 'TestPythonApp', 21 | debug = True 22 | ) 23 | agent.span_reporter.start() 24 | 25 | for i in range(10): 26 | agent.span_reporter.record_span("span1", 10); 27 | 28 | span_counters = agent.span_reporter.span_counters; 29 | agent.span_reporter.report(); 30 | 31 | counter = span_counters['span1'] 32 | #print(counter) 33 | 34 | self.assertEqual(counter.name, 'span1') 35 | self.assertEqual(counter.measurement, 10000) 36 | 37 | agent.destroy() 38 | 39 | 40 | if __name__ == '__main__': 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/reporters/error_reporter_test.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import unittest 4 | import random 5 | import threading 6 | import sys 7 | import traceback 8 | 9 | import stackimpact 10 | from stackimpact.runtime import min_version 11 | 12 | 13 | class ErrorReporterTestCase(unittest.TestCase): 14 | 15 | def test_add_exception(self): 16 | stackimpact._agent = None 17 | agent = stackimpact.start( 18 | dashboard_address = 'http://localhost:5001', 19 | agent_key = 'key1', 20 | app_name = 'TestPythonApp', 21 | debug = True 22 | ) 23 | agent.error_reporter.start() 24 | 25 | try: 26 | raise ValueError('test_exc_1') 27 | except: 28 | traceback.print_exc() 29 | 30 | time.sleep(1.1) 31 | 32 | profile_handled_exc = agent.error_reporter.profile 33 | #print(profile_handled_exc) 34 | 35 | self.assertTrue('ValueError: test_exc_1' in str(profile_handled_exc)) 36 | self.assertTrue('test_add_exception' in str(profile_handled_exc)) 37 | 38 | agent.destroy() 39 | 40 | 41 | if __name__ == '__main__': 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /stackimpact/frame.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | 4 | class Frame(object): 5 | 6 | def __init__(self, func_name, filename, lineno): 7 | self.func_name = func_name 8 | self.filename = filename 9 | self.lineno = lineno 10 | 11 | self.cached_str = None 12 | 13 | self._skip = False 14 | 15 | 16 | def match(self, other): 17 | return ((other.func_name is None or other.func_name == self.func_name) and 18 | (other.filename is None or other.filename == self.filename) and 19 | (other.lineno is None or other.lineno == self.lineno)) 20 | 21 | 22 | def __eq__(self, other): 23 | return (self.func_name == other.func_name and 24 | self.filename == other.filename and 25 | self.lineno == other.lineno) 26 | 27 | def __str__(self): 28 | if not self.cached_str: 29 | if self.lineno is not None and self.lineno > 0: 30 | self.cached_str = '{0} ({1}:{2})'.format(self.func_name, self.filename, self.lineno) 31 | else: 32 | self.cached_str = '{0} ({1})'.format(self.func_name, self.filename) 33 | 34 | return self.cached_str 35 | -------------------------------------------------------------------------------- /tests/runtime_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import signal 3 | import os 4 | 5 | import stackimpact 6 | from stackimpact.runtime import runtime_info, register_signal 7 | 8 | 9 | class RuntimeTestCase(unittest.TestCase): 10 | 11 | def test_register_signal(self): 12 | if runtime_info.OS_WIN: 13 | return 14 | 15 | result = {'handler': 0} 16 | 17 | def _handler(signum, frame): 18 | result['handler'] += 1 19 | 20 | register_signal(signal.SIGUSR1, _handler) 21 | 22 | os.kill(os.getpid(), signal.SIGUSR1) 23 | os.kill(os.getpid(), signal.SIGUSR1) 24 | 25 | signal.signal(signal.SIGUSR1, signal.SIG_DFL) 26 | 27 | self.assertEqual(result['handler'], 2) 28 | 29 | 30 | '''def test_register_signal_default(self): 31 | result = {'handler': 0} 32 | 33 | def _handler(signum, frame): 34 | result['handler'] += 1 35 | 36 | register_signal(signal.SIGUSR1, _handler, once = True) 37 | 38 | os.kill(os.getpid(), signal.SIGUSR1) 39 | os.kill(os.getpid(), signal.SIGUSR1) 40 | 41 | self.assertEqual(result['handler'], 1)''' 42 | 43 | 44 | if __name__ == '__main__': 45 | unittest.main() 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/profilers/cpu_profiler_test.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import unittest 4 | import random 5 | import threading 6 | import sys 7 | import traceback 8 | 9 | import stackimpact 10 | from stackimpact.runtime import runtime_info 11 | 12 | 13 | class CPUProfilerTestCase(unittest.TestCase): 14 | 15 | def test_record_profile(self): 16 | if runtime_info.OS_WIN: 17 | return 18 | 19 | stackimpact._agent = None 20 | agent = stackimpact.start( 21 | dashboard_address = 'http://localhost:5001', 22 | agent_key = 'key1', 23 | app_name = 'TestPythonApp', 24 | auto_profiling = False, 25 | debug = True 26 | ) 27 | 28 | agent.cpu_reporter.profiler.reset() 29 | 30 | def record(): 31 | agent.cpu_reporter.profiler.start_profiler() 32 | time.sleep(2) 33 | agent.cpu_reporter.profiler.stop_profiler() 34 | 35 | 36 | record_t = threading.Thread(target=record) 37 | record_t.start() 38 | 39 | def cpu_work_main_thread(): 40 | for i in range(0, 1000000): 41 | text = "text1" + str(i) 42 | text = text + "text2" 43 | cpu_work_main_thread() 44 | 45 | record_t.join() 46 | 47 | profile = agent.cpu_reporter.profiler.build_profile(2)[0]['profile'].to_dict() 48 | #print(profile) 49 | 50 | self.assertTrue('cpu_work_main_thread' in str(profile)) 51 | 52 | agent.destroy() 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, StackImpact GmbH. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the name of the StackImpact GmbH nor the 11 | names of its contributors may be used to endorse or promote products 12 | derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | def read(fname): 5 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 6 | 7 | setup( 8 | name = 'stackimpact', 9 | version = '1.2.6', 10 | description = 'StackImpact Python Profiler', 11 | long_description = read('README.rst'), 12 | author = 'StackImpact', 13 | author_email = 'devops@stackimpact.com', 14 | url = 'https://stackimpact.com', 15 | license = 'BSD', 16 | keywords = [ 17 | "cpu profiler", 18 | "memory profiler", 19 | "blocking call profiler" 20 | "error monitoring" 21 | "health metrics" 22 | "garbage collection metrics" 23 | ], 24 | classifiers = [ 25 | 'Development Status :: 5 - Production/Stable', 26 | 'Environment :: Web Environment', 27 | 'Environment :: Console', 28 | 'Intended Audience :: Developers', 29 | 'Intended Audience :: System Administrators', 30 | 'License :: OSI Approved :: BSD License', 31 | 'Operating System :: OS Independent', 32 | 'Programming Language :: Python', 33 | 'Programming Language :: Python :: 2.7', 34 | 'Programming Language :: Python :: 3.4', 35 | 'Programming Language :: Python :: 3.5', 36 | 'Programming Language :: Python :: 3.6', 37 | 'Programming Language :: Python :: 3.7', 38 | 'Programming Language :: Python :: Implementation :: CPython', 39 | 'Topic :: Software Development', 40 | 'Topic :: System :: Monitoring', 41 | ], 42 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4', 43 | packages = find_packages(exclude=[ 44 | 'examples.*', 'examples', 45 | 'test.*', 'test']) 46 | ) 47 | -------------------------------------------------------------------------------- /tests/api_request_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import json 4 | import os 5 | import socket 6 | 7 | import stackimpact 8 | from stackimpact.api_request import APIRequest 9 | 10 | from test_server import TestServer 11 | 12 | 13 | class ApiRequestTestCase(unittest.TestCase): 14 | 15 | def test_post(self): 16 | server = TestServer(5001) 17 | server.set_response_data(json.dumps({'c': 3, 'd': 4})) 18 | server.start() 19 | 20 | stackimpact._agent = None 21 | agent = stackimpact.start( 22 | dashboard_address = 'http://localhost:5001', 23 | agent_key = 'key1', 24 | app_name = 'TestPythonApp', 25 | app_environment = 'test', 26 | app_version = '1.1.1', 27 | debug = True 28 | ) 29 | 30 | api_request = APIRequest(agent) 31 | 32 | api_request.post('test', {'a': 1, 'b': 2}) 33 | data = json.loads(server.get_request_data()) 34 | self.assertEqual(data['run_id'], agent.run_id) 35 | self.assertEqual(data['run_ts'], agent.run_ts) 36 | self.assertEqual(data['process_id'], os.getpid()) 37 | self.assertEqual(data['host_name'], socket.gethostname()) 38 | self.assertEqual(data['runtime_type'], 'python') 39 | self.assertEqual(data['runtime_version'], '{0.major}.{0.minor}.{0.micro}'.format(sys.version_info)) 40 | self.assertEqual(data['agent_version'], agent.AGENT_VERSION) 41 | self.assertEqual(data['app_name'], 'TestPythonApp') 42 | self.assertEqual(data['app_environment'], 'test') 43 | self.assertEqual(data['app_version'], '1.1.1') 44 | self.assertEqual(data['payload'], {'a': 1, 'b': 2}) 45 | 46 | agent.destroy() 47 | server.join() 48 | 49 | 50 | if __name__ == '__main__': 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /tests/profilers/allocation_profiler_test.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import unittest 4 | import random 5 | import threading 6 | 7 | import stackimpact 8 | from stackimpact.runtime import min_version, runtime_info 9 | 10 | 11 | class AllocationProfilerTestCase(unittest.TestCase): 12 | 13 | def test_record_allocation_profile(self): 14 | if runtime_info.OS_WIN or not min_version(3, 4): 15 | return 16 | 17 | stackimpact._agent = None 18 | agent = stackimpact.start( 19 | dashboard_address = 'http://localhost:5001', 20 | agent_key = 'key1', 21 | app_name = 'TestPythonApp', 22 | auto_profiling = False, 23 | debug = True 24 | ) 25 | 26 | agent.allocation_reporter.profiler.reset() 27 | 28 | mem1 = [] 29 | def mem_leak(n = 100000): 30 | mem2 = [] 31 | for i in range(0, n): 32 | mem1.append(random.randint(0, 1000)) 33 | mem2.append(random.randint(0, 1000)) 34 | 35 | def mem_leak2(): 36 | mem_leak() 37 | 38 | def mem_leak3(): 39 | mem_leak2() 40 | 41 | def mem_leak4(): 42 | mem_leak3() 43 | 44 | def mem_leak5(): 45 | mem_leak4() 46 | 47 | result = {} 48 | def record(): 49 | agent.allocation_reporter.profiler.start_profiler() 50 | time.sleep(2) 51 | agent.allocation_reporter.profiler.stop_profiler() 52 | 53 | t = threading.Thread(target=record) 54 | t.start() 55 | 56 | # simulate leak 57 | mem_leak5() 58 | 59 | t.join() 60 | 61 | profile = agent.allocation_reporter.profiler.build_profile(2)[0]['profile'].to_dict() 62 | #print(str(profile)) 63 | 64 | self.assertTrue('allocation_profiler_test.py' in str(profile)) 65 | 66 | agent.destroy() 67 | 68 | 69 | if __name__ == '__main__': 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /tests/message_queue_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import json 4 | 5 | import stackimpact 6 | from stackimpact.utils import timestamp 7 | 8 | from test_server import TestServer 9 | 10 | 11 | 12 | class MessageQueueTest(unittest.TestCase): 13 | 14 | 15 | def test_flush(self): 16 | server = TestServer(5005) 17 | server.start() 18 | 19 | stackimpact._agent = None 20 | agent = stackimpact.start( 21 | dashboard_address = 'http://localhost:5005', 22 | agent_key = 'key1', 23 | app_name = 'TestPythonApp', 24 | debug = True 25 | ) 26 | 27 | m = { 28 | 'm1': 1 29 | } 30 | agent.message_queue.add('t1', m) 31 | 32 | m = { 33 | 'm2': 2 34 | } 35 | agent.message_queue.add('t1', m) 36 | 37 | agent.message_queue.queue[0]['added_at'] = timestamp() - 20 * 60 38 | 39 | agent.message_queue.flush() 40 | 41 | data = json.loads(server.get_request_data()) 42 | self.assertEqual(data['payload']['messages'][0]['content']['m2'], 2) 43 | 44 | agent.destroy() 45 | server.join() 46 | 47 | 48 | def test_flush_fail(self): 49 | server = TestServer(5006) 50 | server.set_response_data("unparsablejson") 51 | server.start() 52 | 53 | stackimpact._agent = None 54 | agent = stackimpact.start( 55 | dashboard_address = 'http://localhost:5006', 56 | agent_key = 'key1', 57 | app_name = 'TestPythonApp', 58 | debug = True 59 | ) 60 | 61 | m = { 62 | 'm1': 1 63 | } 64 | agent.message_queue.add('t1', m) 65 | 66 | m = { 67 | 'm2': 2 68 | } 69 | agent.message_queue.add('t1', m) 70 | 71 | agent.message_queue.flush() 72 | self.assertEqual(len(agent.message_queue.queue), 2) 73 | 74 | agent.destroy() 75 | server.join() 76 | 77 | if __name__ == '__main__': 78 | unittest.main() 79 | -------------------------------------------------------------------------------- /stackimpact/reporters/span_reporter.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import threading 4 | import traceback 5 | import collections 6 | 7 | from ..runtime import runtime_info, patch, unpatch 8 | from ..metric import Metric 9 | from ..metric import Breakdown 10 | from ..frame import Frame 11 | 12 | 13 | class SpanReporter(object): 14 | 15 | MAX_QUEUED_EXC = 100 16 | 17 | 18 | def __init__(self, agent): 19 | self.agent = agent 20 | self.started = False 21 | self.report_timer = None 22 | self.span_counters = None 23 | self.span_lock = threading.Lock() 24 | 25 | 26 | def setup(self): 27 | pass 28 | 29 | 30 | def destroy(self): 31 | pass 32 | 33 | 34 | def reset(self): 35 | self.span_counters = dict() 36 | 37 | 38 | def start(self): 39 | if not self.agent.get_option('auto_profiling'): 40 | return 41 | 42 | if self.started: 43 | return 44 | self.started = True 45 | 46 | self.reset() 47 | 48 | self.report_timer = self.agent.schedule(60, 60, self.report) 49 | 50 | 51 | def stop(self): 52 | if not self.started: 53 | return 54 | self.started = False 55 | 56 | self.report_timer.cancel() 57 | self.report_timer = None 58 | 59 | 60 | def record_span(self, name, duration): 61 | if not self.started: 62 | return 63 | 64 | counter = None 65 | if name in self.span_counters: 66 | counter = self.span_counters[name] 67 | else: 68 | with self.span_lock: 69 | counter = Breakdown(name) 70 | self.span_counters[name] = counter 71 | 72 | counter.update_p95(duration * 1000) 73 | 74 | 75 | def report(self): 76 | for name, counter in self.span_counters.items(): 77 | counter.evaluate_p95(); 78 | 79 | metric = Metric(self.agent, Metric.TYPE_STATE, Metric.CATEGORY_SPAN, counter.name, Metric.UNIT_MILLISECOND) 80 | measurement = metric.create_measurement(Metric.TRIGGER_TIMER, counter.measurement, 60) 81 | self.agent.message_queue.add('metric', metric.to_dict()) 82 | 83 | self.reset() 84 | -------------------------------------------------------------------------------- /stackimpact/frame_cache.py: -------------------------------------------------------------------------------- 1 | 2 | import threading 3 | import os 4 | import re 5 | import importlib 6 | 7 | from .runtime import runtime_info 8 | 9 | if runtime_info.GEVENT: 10 | import gevent 11 | 12 | class FrameCache(object): 13 | MAX_CACHE_SIZE = 2500 14 | 15 | def __init__(self, agent): 16 | self.agent = agent 17 | self.agent_frame_cache = None 18 | self.system_frame_cache = None 19 | 20 | self.include_agent_frames = None 21 | self.include_system_frames = None 22 | 23 | self.agent_dir = os.path.dirname(os.path.realpath(__file__)) 24 | self.system_dir = os.path.dirname(os.path.realpath(threading.__file__)) 25 | if runtime_info.GEVENT: 26 | self.gevent_dir = os.path.dirname(os.path.realpath(gevent.__file__)) 27 | 28 | 29 | def start(self): 30 | self.agent_frame_cache = dict() 31 | self.system_frame_cache = dict() 32 | 33 | self.include_agent_frames = self.agent.get_option('include_agent_frames') 34 | self.include_system_frames = self.agent.get_option('include_system_frames') 35 | 36 | def stop(self): 37 | pass 38 | 39 | 40 | def is_agent_frame(self, filename): 41 | if filename in self.agent_frame_cache: 42 | return self.agent_frame_cache[filename] 43 | 44 | agent_frame = False 45 | 46 | if not self.include_agent_frames: 47 | if filename.startswith(self.agent_dir): 48 | agent_frame = True 49 | 50 | if len(self.agent_frame_cache) < self.MAX_CACHE_SIZE: 51 | self.agent_frame_cache[filename] = agent_frame 52 | 53 | return agent_frame 54 | 55 | 56 | def is_system_frame(self, filename): 57 | if filename in self.system_frame_cache: 58 | return self.system_frame_cache[filename] 59 | 60 | system_frame = False 61 | 62 | if not self.include_system_frames: 63 | if (filename.startswith(self.system_dir) or 64 | (runtime_info.GEVENT and filename.startswith(self.gevent_dir))): 65 | system_frame = True 66 | 67 | if len(self.system_frame_cache) < self.MAX_CACHE_SIZE: 68 | self.system_frame_cache[filename] = system_frame 69 | 70 | return system_frame 71 | -------------------------------------------------------------------------------- /tests/test_server.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import threading 4 | import time 5 | 6 | from io import BytesIO 7 | 8 | 9 | try: 10 | # python 2 11 | from BaseHTTPServer import HTTPServer,BaseHTTPRequestHandler 12 | except: 13 | # python 3 14 | from http.server import HTTPServer,BaseHTTPRequestHandler 15 | 16 | import gzip 17 | 18 | 19 | class TestServer(threading.Thread): 20 | def __init__(self, port, delay = None, handler_func = None): 21 | self.port = port 22 | RequestHandler.delay = delay 23 | RequestHandler.handler_func = [handler_func] 24 | RequestHandler.response_data = '{}' 25 | RequestHandler.response_code = 200 26 | threading.Thread.__init__(self) 27 | self.server = HTTPServer(('localhost', self.port), RequestHandler) 28 | 29 | def get_request_data(self): 30 | return RequestHandler.request_data 31 | 32 | def set_response_data(self, response_data): 33 | RequestHandler.response_data = response_data 34 | 35 | def set_response_code(self, response_code): 36 | RequestHandler.response_code = response_code 37 | 38 | def run(self): 39 | self.server.handle_request() 40 | 41 | 42 | class RequestHandler(BaseHTTPRequestHandler): 43 | delay = None 44 | handler_func = None 45 | request_data = None 46 | response_data = '{}' 47 | response_code = 200 48 | 49 | 50 | def do_GET(self): 51 | if self.delay: 52 | time.sleep(self.delay) 53 | 54 | if RequestHandler.handler_func: 55 | RequestHandler.handler_func[0]() 56 | 57 | self.send_response(RequestHandler.response_code) 58 | self.send_header('Content-Type', 'application/json') 59 | self.end_headers() 60 | self.wfile.write(RequestHandler.response_data.encode('utf-8')) 61 | 62 | 63 | def do_POST(self): 64 | if self.delay: 65 | time.sleep(self.delay) 66 | 67 | self.request_url = self.path 68 | content_len = int(self.headers.get('content-length')) 69 | 70 | decompressed_data = gzip.GzipFile(fileobj=BytesIO(self.rfile.read(content_len))).read() 71 | RequestHandler.request_data = decompressed_data.decode('utf-8') 72 | 73 | self.send_response(RequestHandler.response_code) 74 | self.send_header('Content-Type', 'application/json') 75 | self.end_headers() 76 | self.wfile.write(RequestHandler.response_data.encode('utf-8')) 77 | -------------------------------------------------------------------------------- /tests/reporters/process_reporter_test.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import unittest 4 | import sys 5 | 6 | import stackimpact 7 | from stackimpact.runtime import runtime_info, min_version 8 | from stackimpact.metric import Metric 9 | 10 | 11 | class ProcessReporterTestCase(unittest.TestCase): 12 | 13 | def test_report(self): 14 | stackimpact._agent = None 15 | agent = stackimpact.start( 16 | dashboard_address = 'http://localhost:5001', 17 | agent_key = 'key1', 18 | app_name = 'TestPythonApp', 19 | debug = True 20 | ) 21 | agent.process_reporter.start() 22 | 23 | agent.process_reporter.report() 24 | time.sleep(0.1) 25 | agent.process_reporter.report() 26 | 27 | metrics = agent.process_reporter.metrics 28 | 29 | if not runtime_info.OS_WIN: 30 | self.is_valid(metrics, Metric.TYPE_COUNTER, Metric.CATEGORY_CPU, Metric.NAME_CPU_TIME, 0, float("inf")) 31 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_CPU, Metric.NAME_CPU_USAGE, 0, float("inf")) 32 | 33 | if not runtime_info.OS_WIN: 34 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_MEMORY, Metric.NAME_MAX_RSS, 0, float("inf")) 35 | 36 | if runtime_info.OS_LINUX: 37 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_MEMORY, Metric.NAME_CURRENT_RSS, 0, float("inf")) 38 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_MEMORY, Metric.NAME_VM_SIZE, 0, float("inf")) 39 | 40 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_GC, Metric.NAME_GC_COUNT, 0, float("inf")) 41 | if min_version(3, 4): 42 | self.is_valid(metrics, Metric.TYPE_COUNTER, Metric.CATEGORY_GC, Metric.NAME_GC_COLLECTIONS, 0, float("inf")) 43 | self.is_valid(metrics, Metric.TYPE_COUNTER, Metric.CATEGORY_GC, Metric.NAME_GC_COLLECTED, 0, float("inf")) 44 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_GC, Metric.NAME_GC_UNCOLLECTABLE, 0, float("inf")) 45 | 46 | self.is_valid(metrics, Metric.TYPE_STATE, Metric.CATEGORY_RUNTIME, Metric.NAME_THREAD_COUNT, 0, float("inf")) 47 | 48 | agent.destroy() 49 | 50 | 51 | def is_valid(self, metrics, typ, category, name, min_value, max_value): 52 | key = typ + category + name 53 | 54 | self.assertTrue(key in metrics, key) 55 | 56 | m = metrics[key] 57 | if m.has_measurement(): 58 | #print(typ, category, name, m.measurement.value) 59 | self.assertTrue(m.measurement.value >= min_value and m.measurement.value <= max_value, key) 60 | 61 | 62 | if __name__ == '__main__': 63 | unittest.main() 64 | -------------------------------------------------------------------------------- /stackimpact/config_loader.py: -------------------------------------------------------------------------------- 1 | from .api_request import APIRequest 2 | from .utils import timestamp 3 | 4 | 5 | class ConfigLoader(object): 6 | LOAD_DELAY = 2 7 | LOAD_INTERVAL = 120 8 | 9 | 10 | def __init__(self, agent): 11 | self.agent = agent 12 | self.load_timer = None 13 | self.last_load_ts = 0 14 | 15 | 16 | def start(self): 17 | if self.agent.get_option('auto_profiling'): 18 | self.load_timer = self.agent.schedule(self.LOAD_DELAY, self.LOAD_INTERVAL, self.load) 19 | 20 | 21 | def stop(self): 22 | if self.load_timer: 23 | self.load_timer.cancel() 24 | self.load_timer = None 25 | 26 | 27 | def load(self, with_interval=False): 28 | now = timestamp() 29 | if with_interval and self.last_load_ts > now - self.LOAD_INTERVAL: 30 | return 31 | 32 | self.last_load_ts = now; 33 | 34 | 35 | try: 36 | api_request = APIRequest(self.agent) 37 | config = api_request.post('config', {}) 38 | 39 | # agent_enabled yes|no 40 | if 'agent_enabled' in config: 41 | self.agent.config.set_agent_enabled(config['agent_enabled'] == 'yes') 42 | else: 43 | self.agent.config.set_agent_enabled(False) 44 | 45 | # profiling_disabled yes|no 46 | if 'profiling_disabled' in config: 47 | self.agent.config.set_profiling_disabled(config['profiling_disabled'] == 'yes') 48 | else: 49 | self.agent.config.set_profiling_disabled(False) 50 | 51 | 52 | if self.agent.config.is_agent_enabled() and not self.agent.config.is_profiling_disabled(): 53 | self.agent.cpu_reporter.start() 54 | self.agent.allocation_reporter.start() 55 | self.agent.block_reporter.start() 56 | self.agent.tf_reporter.start() 57 | else: 58 | self.agent.cpu_reporter.stop() 59 | self.agent.allocation_reporter.stop() 60 | self.agent.block_reporter.stop() 61 | self.agent.tf_reporter.stop() 62 | 63 | if self.agent.config.is_agent_enabled(): 64 | self.agent.error_reporter.start() 65 | self.agent.span_reporter.start() 66 | self.agent.process_reporter.start() 67 | self.agent.log('Agent activated') 68 | else: 69 | self.agent.error_reporter.stop() 70 | self.agent.span_reporter.stop() 71 | self.agent.process_reporter.stop() 72 | self.agent.log('Agent deactivated') 73 | 74 | 75 | except Exception: 76 | self.agent.log('Error loading config') 77 | self.agent.exception() 78 | -------------------------------------------------------------------------------- /stackimpact/message_queue.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import threading 3 | import copy 4 | 5 | from .api_request import APIRequest 6 | from .utils import timestamp, base64_encode 7 | 8 | 9 | class MessageQueue(object): 10 | FLUSH_INTERVAL = 5; 11 | MESSAGE_TTL = 10 * 60 12 | 13 | 14 | def __init__(self, agent): 15 | self.agent = agent 16 | self.queue = [] 17 | self.queue_lock = threading.Lock() 18 | self.flush_timer = None 19 | self.backoff_seconds = 0 20 | self.last_flush_ts = 0 21 | 22 | 23 | def start(self): 24 | if self.agent.get_option('auto_profiling'): 25 | self.flush_timer = self.agent.schedule(self.FLUSH_INTERVAL, self.FLUSH_INTERVAL, self.flush) 26 | 27 | 28 | def stop(self): 29 | if self.flush_timer: 30 | self.flush_timer.cancel() 31 | self.flush_timer = None 32 | 33 | 34 | def add(self, topic, message): 35 | entry = { 36 | 'topic': topic, 37 | 'content': message, 38 | 'added_at': timestamp() 39 | } 40 | 41 | with self.queue_lock: 42 | self.queue.append(entry) 43 | 44 | self.agent.log('Added message to the queue for topic: ' + topic) 45 | self.agent.log(message) 46 | 47 | 48 | def flush(self, with_interval=False): 49 | if len(self.queue) == 0: 50 | return 51 | 52 | now = timestamp() 53 | if with_interval and self.last_flush_ts > now - self.FLUSH_INTERVAL: 54 | return 55 | 56 | # flush only if backoff time is elapsed 57 | if self.last_flush_ts + self.backoff_seconds > now: 58 | return 59 | 60 | # expire old messages 61 | with self.queue_lock: 62 | self.queue = [m for m in self.queue if m['added_at'] >= now - self.MESSAGE_TTL] 63 | 64 | # read queue 65 | outgoing = None 66 | with self.queue_lock: 67 | outgoing = self.queue 68 | self.queue = [] 69 | 70 | # remove added_at 71 | outgoing_copy = copy.deepcopy(outgoing) 72 | for m in outgoing_copy: 73 | del m['added_at'] 74 | 75 | payload = { 76 | 'messages': outgoing_copy 77 | } 78 | 79 | self.last_flush_ts = now 80 | 81 | try: 82 | api_request = APIRequest(self.agent) 83 | api_request.post('upload', payload) 84 | 85 | # reset backoff 86 | self.backoff_seconds = 0 87 | except Exception: 88 | self.agent.log('Error uploading messages to dashboard, backing off next upload') 89 | self.agent.exception() 90 | 91 | self.queue_lock.acquire() 92 | self.queue[:0] = outgoing 93 | self.queue_lock.release() 94 | 95 | # increase backoff up to 1 minute 96 | if self.backoff_seconds == 0: 97 | self.backoff_seconds = 10 98 | elif self.backoff_seconds * 2 < 60: 99 | self.backoff_seconds *= 2 100 | 101 | -------------------------------------------------------------------------------- /stackimpact/api_request.py: -------------------------------------------------------------------------------- 1 | import json 2 | import gzip 3 | import sys 4 | import os 5 | import socket 6 | 7 | from io import BytesIO 8 | 9 | from .utils import timestamp, base64_encode 10 | from .runtime import runtime_info 11 | 12 | 13 | if runtime_info.PYTHON_2: 14 | from urllib2 import urlopen 15 | from urllib2 import Request 16 | from urllib import urlencode 17 | else: 18 | from urllib.request import urlopen 19 | from urllib.request import Request 20 | from urllib.parse import urlencode 21 | 22 | 23 | class APIRequest(object): 24 | def __init__(self, agent): 25 | self.agent = agent 26 | 27 | def post(self, endpoint, payload): 28 | agent_key_64 = base64_encode(self.agent.get_option('agent_key') + ':').replace('\n', '') 29 | headers = { 30 | 'Accept-Encoding': 'gzip', 31 | 'Authorization': "Basic %s" % agent_key_64, 32 | 'Content-Type': 'application/json', 33 | 'Content-Encoding': 'gzip' 34 | } 35 | 36 | host_name = 'undefined' 37 | try: 38 | host_name = socket.gethostname() 39 | except Exception: 40 | self.agent.exception() 41 | 42 | req_body = { 43 | 'runtime_type': 'python', 44 | 'runtime_version': '{0.major}.{0.minor}.{0.micro}'.format(sys.version_info), 45 | 'runtime_path': sys.prefix, 46 | 'agent_version': self.agent.AGENT_VERSION, 47 | 'app_name': self.agent.get_option('app_name'), 48 | 'app_version': self.agent.get_option('app_version'), 49 | 'app_environment': self.agent.get_option('app_environment'), 50 | 'host_name': self.agent.get_option('host_name', host_name), 51 | 'process_id': os.getpid(), 52 | 'run_id': self.agent.run_id, 53 | 'run_ts': self.agent.run_ts, 54 | 'sent_at': timestamp(), 55 | 'payload': payload, 56 | } 57 | 58 | gzip_out = BytesIO() 59 | with gzip.GzipFile(fileobj=gzip_out, mode="w") as out_file: 60 | out_file.write(json.dumps(req_body).encode('utf-8')) 61 | out_file.close() 62 | 63 | gzip_out_val = gzip_out.getvalue() 64 | if isinstance(gzip_out_val, str): 65 | req_body_gzip = bytearray(gzip_out.getvalue()) 66 | else: 67 | req_body_gzip = gzip_out.getvalue() 68 | 69 | request = Request( 70 | url = self.agent.get_option('dashboard_address') + '/agent/v1/' + endpoint, 71 | data = req_body_gzip, 72 | headers = headers) 73 | 74 | response = urlopen(request, timeout = 20) 75 | 76 | result_data = response.read() 77 | 78 | if response.info(): 79 | content_type = response.info().get('Content-Encoding') 80 | if content_type == 'gzip': 81 | result_data = gzip.GzipFile('', 'r', 0, BytesIO(result_data)).read() 82 | 83 | response.close() 84 | 85 | return json.loads(result_data.decode('utf-8')) 86 | 87 | 88 | def python_version(): 89 | [sys.version_info.major,'',sys.version_info.minor + sys.version_info.micro] 90 | -------------------------------------------------------------------------------- /tests/profilers/block_profiler_test.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import time 4 | import unittest 5 | import random 6 | import threading 7 | 8 | 9 | try: 10 | # python 2 11 | from urllib2 import urlopen 12 | except ImportError: 13 | # python 3 14 | from urllib.request import urlopen 15 | 16 | 17 | import stackimpact 18 | from stackimpact.runtime import min_version, runtime_info 19 | from test_server import TestServer 20 | 21 | 22 | class BlockProfilerTestCase(unittest.TestCase): 23 | def test_record_block_profile(self): 24 | if runtime_info.OS_WIN: 25 | return 26 | 27 | stackimpact._agent = None 28 | agent = stackimpact.start( 29 | dashboard_address = 'http://localhost:5001', 30 | agent_key = 'key1', 31 | app_name = 'TestPythonApp', 32 | auto_profiling = False, 33 | debug = True 34 | ) 35 | 36 | agent.block_reporter.profiler.reset() 37 | 38 | lock = threading.Lock() 39 | event = threading.Event() 40 | 41 | def lock_lock(): 42 | lock.acquire() 43 | time.sleep(0.5) 44 | lock.release() 45 | 46 | def lock_wait(): 47 | lock.acquire() 48 | lock.release() 49 | 50 | 51 | def event_lock(): 52 | time.sleep(0.5) 53 | event.set() 54 | 55 | 56 | def event_wait(): 57 | event.wait() 58 | 59 | 60 | def handler(): 61 | time.sleep(0.4) 62 | 63 | def url_wait(): 64 | server = TestServer(5010, 0.4, handler) 65 | server.start() 66 | urlopen('http://localhost:5010') 67 | server.join() 68 | 69 | 70 | result = {} 71 | def record(): 72 | agent.block_reporter.profiler.start_profiler() 73 | time.sleep(2) 74 | agent.block_reporter.profiler.stop_profiler() 75 | 76 | record_t = threading.Thread(target=record) 77 | record_t.start() 78 | 79 | # simulate lock 80 | t = threading.Thread(target=lock_lock) 81 | t.start() 82 | 83 | t = threading.Thread(target=lock_wait) 84 | t.start() 85 | 86 | # simulate event 87 | t = threading.Thread(target=event_lock) 88 | t.start() 89 | 90 | t = threading.Thread(target=event_wait) 91 | t.start() 92 | 93 | # simulate network 94 | t = threading.Thread(target=url_wait) 95 | t.start() 96 | 97 | # make sure signals are delivered in python 2, when main thread is waiting 98 | if runtime_info.PYTHON_2: 99 | while record_t.is_alive(): 100 | pass 101 | 102 | record_t.join() 103 | 104 | profile = agent.block_reporter.profiler.build_profile(2)[0]['profile'].to_dict() 105 | #print(profile) 106 | 107 | self.assertTrue('lock_wait' in str(profile)) 108 | self.assertTrue('event_wait' in str(profile)) 109 | self.assertTrue('url_wait' in str(profile)) 110 | 111 | agent.destroy() 112 | 113 | 114 | if __name__ == '__main__': 115 | unittest.main() 116 | -------------------------------------------------------------------------------- /tests/metric_test.py: -------------------------------------------------------------------------------- 1 | 2 | import unittest 3 | import sys 4 | import random 5 | 6 | import stackimpact 7 | from stackimpact.metric import Metric,Breakdown 8 | 9 | 10 | class MetricTestCase(unittest.TestCase): 11 | 12 | def test_counter_metric(self): 13 | stackimpact._agent = None 14 | agent = stackimpact.start( 15 | dashboard_address = 'http://localhost:5001', 16 | agent_key = 'key1', 17 | app_name = 'TestPythonApp', 18 | debug = True 19 | ) 20 | 21 | m = Metric(agent, Metric.TYPE_COUNTER, Metric.CATEGORY_CPU, Metric.NAME_CPU_USAGE, Metric.UNIT_NONE) 22 | 23 | m.create_measurement(Metric.TRIGGER_TIMER, 100) 24 | self.assertFalse(m.has_measurement()) 25 | 26 | m.create_measurement(Metric.TRIGGER_TIMER, 110) 27 | self.assertEqual(m.measurement.value, 10) 28 | 29 | m.create_measurement(Metric.TRIGGER_TIMER, 115) 30 | self.assertEqual(m.measurement.value, 5) 31 | 32 | agent.destroy() 33 | 34 | 35 | 36 | def test_profile_filter(self): 37 | root = Breakdown('root') 38 | root.measurement = 10 39 | 40 | child1 = Breakdown('child1') 41 | child1.measurement = 9 42 | root.add_child(child1) 43 | 44 | child2 = Breakdown('child2') 45 | child2.measurement = 1 46 | root.add_child(child2) 47 | 48 | child2child1 = Breakdown('child2child1') 49 | child2child1.measurement = 1 50 | child2.add_child(child2child1) 51 | 52 | root.filter(2, 3, 100) 53 | 54 | self.assertTrue(root.find_child('child1')) 55 | self.assertTrue(root.find_child('child2')) 56 | self.assertFalse(child2.find_child('child2child1')) 57 | 58 | 59 | def test_profile_depth(self): 60 | root = Breakdown("root") 61 | 62 | child1 = Breakdown("child1") 63 | root.add_child(child1) 64 | 65 | child2 = Breakdown("child2") 66 | root.add_child(child2) 67 | 68 | child2child1 = Breakdown("child2child1") 69 | child2.add_child(child2child1) 70 | 71 | self.assertEqual(root.depth(), 3) 72 | self.assertEqual(child1.depth(), 1) 73 | self.assertEqual(child2.depth(), 2) 74 | 75 | 76 | def test_profile_p95(self): 77 | root = Breakdown("root") 78 | 79 | child1 = Breakdown("child1") 80 | root.add_child(child1) 81 | 82 | child2 = Breakdown("child2") 83 | root.add_child(child2) 84 | 85 | child2child1 = Breakdown("child2child1") 86 | child2.add_child(child2child1) 87 | 88 | child2child1.update_p95(6.5) 89 | child2child1.update_p95(4.2) 90 | child2child1.update_p95(5.0) 91 | child2child1.evaluate_p95() 92 | root.propagate() 93 | 94 | self.assertEqual(root.measurement, 6.5) 95 | 96 | 97 | def test_profile_p95_big(self): 98 | root = Breakdown("root") 99 | 100 | for i in range(0, 10000): 101 | root.update_p95(200.0 + random.randint(0, 50)) 102 | 103 | root.evaluate_p95() 104 | 105 | self.assertTrue(root.measurement >= 200 and root.measurement <= 250) 106 | 107 | 108 | 109 | if __name__ == '__main__': 110 | unittest.main() 111 | 112 | 113 | -------------------------------------------------------------------------------- /examples/demo/flask_app.py: -------------------------------------------------------------------------------- 1 | #env AGENT_KEY=agnetkeyhere FLASK_APP=examples/demo/flask_app.py flask run -p 5010 2 | 3 | from __future__ import print_function 4 | 5 | import os 6 | import time 7 | import sys 8 | import threading 9 | import subprocess 10 | import collections 11 | import random 12 | import traceback 13 | from flask import Flask 14 | 15 | 16 | try: 17 | # python 2 18 | from urllib2 import urlopen 19 | except ImportError: 20 | # python 3 21 | from urllib.request import urlopen 22 | 23 | sys.path.append(".") 24 | import stackimpact 25 | 26 | 27 | 28 | # StackImpact agent initialization 29 | agent = stackimpact.start( 30 | agent_key = os.environ['AGENT_KEY'], 31 | dashboard_address = os.environ['DASHBOARD_ADDRESS'], 32 | app_name = 'ExamplePythonFlaskApp', 33 | app_version = '1.0.0', 34 | debug = True) 35 | 36 | 37 | 38 | # Simulate CPU intensive work 39 | def simulate_cpu(): 40 | duration = 10 * 60 * 60 41 | usage = 10 42 | 43 | while True: 44 | for j in range(0, duration): 45 | for i in range(0, usage * 15000): 46 | text = "text1" + str(i) 47 | text = text + "text2" 48 | 49 | time.sleep(1 - usage/100) 50 | 51 | t = threading.Thread(target=simulate_cpu) 52 | t.start() 53 | 54 | 55 | # Simulate memory leak 56 | def simulate_mem_leak(): 57 | while True: 58 | mem1 = [] 59 | 60 | for j in range(0, 1800): 61 | mem2 = [] 62 | for i in range(0, 1000): 63 | obj1 = {'v': random.randint(0, 1000000)} 64 | mem1.append(obj1) 65 | 66 | obj2 = {'v': random.randint(0, 1000000)} 67 | mem2.append(obj2) 68 | 69 | time.sleep(1) 70 | 71 | t = threading.Thread(target=simulate_mem_leak) 72 | t.start() 73 | 74 | 75 | # Simulate lock 76 | def simulate_lock(): 77 | lock = threading.Lock() 78 | 79 | def lock_wait(): 80 | lock.acquire() 81 | lock.release() 82 | 83 | while True: 84 | lock.acquire() 85 | 86 | t = threading.Thread(target=lock_wait) 87 | t.start() 88 | 89 | time.sleep(1) 90 | 91 | lock.release() 92 | 93 | time.sleep(1) 94 | 95 | t = threading.Thread(target=simulate_lock) 96 | t.start() 97 | 98 | 99 | # Simulate exceptions 100 | def simulate_exceptions(): 101 | while True: 102 | try: 103 | raise ValueError('some error') 104 | except: 105 | traceback.print_exc() 106 | pass 107 | 108 | time.sleep(2) 109 | 110 | 111 | t = threading.Thread(target=simulate_exceptions) 112 | t.start() 113 | 114 | 115 | # Simulate http server 116 | def simulate_http_traffic(): 117 | while True: 118 | try: 119 | urlopen('http://localhost:5010', timeout=10) 120 | time.sleep(2) 121 | except: 122 | traceback.print_exc() 123 | pass 124 | 125 | 126 | t = threading.Thread(target=simulate_http_traffic) 127 | t.start() 128 | 129 | 130 | def cpu_work(): 131 | for i in range(0, 1000000): 132 | text = "text1" + str(i) 133 | text = text + "text2" 134 | 135 | 136 | app = Flask(__name__) 137 | 138 | @app.route('/') 139 | def hello_world(): 140 | time.sleep(0.5) 141 | 142 | cpu_work() 143 | 144 | return 'Hello' 145 | 146 | 147 | -------------------------------------------------------------------------------- /stackimpact/runtime.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import sys 4 | import re 5 | import os 6 | import signal 7 | from functools import wraps 8 | try: 9 | import resource 10 | except ImportError: 11 | pass 12 | 13 | 14 | class runtime_info(object): 15 | OS_LINUX = (sys.platform.startswith('linux')) 16 | OS_DARWIN = (sys.platform == 'darwin') 17 | OS_WIN = (sys.platform == 'win32') 18 | PYTHON_2 = (sys.version_info.major == 2) 19 | PYTHON_3 = (sys.version_info.major == 3) 20 | GEVENT = False 21 | 22 | try: 23 | import gevent 24 | if hasattr(gevent, '_threading'): 25 | runtime_info.GEVENT = True 26 | except ImportError: 27 | pass 28 | 29 | 30 | VM_RSS_REGEXP = re.compile('VmRSS:\s+(\d+)\s+kB') 31 | VM_SIZE_REGEXP = re.compile('VmSize:\s+(\d+)\s+kB') 32 | 33 | 34 | def min_version(major, minor=0): 35 | return (sys.version_info.major == major and sys.version_info.minor >= minor) 36 | 37 | 38 | def read_cpu_time(): 39 | rusage = resource.getrusage(resource.RUSAGE_SELF) 40 | return int((rusage.ru_utime + rusage.ru_stime) * 1e9) # nanoseconds 41 | 42 | 43 | def read_max_rss(): 44 | rusage = resource.getrusage(resource.RUSAGE_SELF) 45 | 46 | if runtime_info.OS_DARWIN: 47 | return int(rusage.ru_maxrss / 1000) # KB 48 | else: 49 | return rusage.ru_maxrss # KB 50 | 51 | 52 | def read_current_rss(): 53 | pid = os.getpid() 54 | 55 | output = None 56 | try: 57 | f = open('/proc/{0}/status'.format(os.getpid())) 58 | output = f.read() 59 | f.close() 60 | except Exception: 61 | return None 62 | 63 | match = VM_RSS_REGEXP.search(output) 64 | if match: 65 | return int(float(match.group(1))) 66 | 67 | return None 68 | 69 | 70 | def read_vm_size(): 71 | pid = os.getpid() 72 | 73 | output = None 74 | try: 75 | f = open('/proc/{0}/status'.format(os.getpid())) 76 | output = f.read() 77 | f.close() 78 | except Exception: 79 | return None 80 | 81 | match = VM_SIZE_REGEXP.search(output) 82 | if match: 83 | return int(float(match.group(1))) 84 | 85 | return None 86 | 87 | 88 | def patch(obj, func_name, before_func, after_func): 89 | if not hasattr(obj, func_name): 90 | return False 91 | 92 | target_func = getattr(obj, func_name) 93 | 94 | # already patched 95 | if hasattr(target_func, '__stackimpact_orig__'): 96 | return True 97 | 98 | @wraps(target_func) 99 | def wrapper(*args, **kwargs): 100 | data = None 101 | 102 | if before_func: 103 | args, kwargs, data = before_func(args, kwargs) 104 | 105 | ret = target_func(*args, **kwargs) 106 | 107 | if after_func: 108 | after_func(args, kwargs, ret, data) 109 | 110 | return ret 111 | 112 | wrapper.__orig__ = target_func 113 | setattr(obj, func_name, wrapper) 114 | 115 | return True 116 | 117 | 118 | def unpatch(obj, func_name): 119 | if not hasattr(obj, func_name): 120 | return 121 | 122 | wrapper = getattr(obj, func_name) 123 | if not hasattr(wrapper, '__stackimpact_orig__'): 124 | return 125 | 126 | setattr(obj, func_name, getattr(wrapper, '__stackimpact_orig__')) 127 | 128 | 129 | def register_signal(signal_number, handler_func, once=False): 130 | prev_handler = None 131 | 132 | def _handler(signum, frame): 133 | skip_prev = handler_func(signum, frame) 134 | 135 | if not skip_prev: 136 | if callable(prev_handler): 137 | if once: 138 | signal.signal(signum, prev_handler) 139 | prev_handler(signum, frame) 140 | elif prev_handler == signal.SIG_DFL and once: 141 | signal.signal(signum, signal.SIG_DFL) 142 | os.kill(os.getpid(), signum) 143 | 144 | prev_handler = signal.signal(signal_number, _handler) 145 | 146 | -------------------------------------------------------------------------------- /stackimpact/profilers/cpu_profiler.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import os 4 | import sys 5 | import time 6 | import threading 7 | import re 8 | import signal 9 | 10 | from ..runtime import min_version, runtime_info 11 | from ..metric import Metric 12 | from ..metric import Breakdown 13 | from ..frame import Frame 14 | 15 | 16 | 17 | class CPUProfiler(object): 18 | SAMPLING_RATE = 0.01 19 | MAX_TRACEBACK_SIZE = 25 # number of frames 20 | 21 | 22 | def __init__(self, agent): 23 | self.agent = agent 24 | self.ready = False 25 | self.profile = None 26 | self.profile_lock = threading.Lock() 27 | self.prev_signal_handler = None 28 | self.sampler_active = False 29 | 30 | 31 | def setup(self): 32 | if self.agent.get_option('cpu_profiler_disabled'): 33 | return 34 | 35 | if not runtime_info.OS_LINUX and not runtime_info.OS_DARWIN: 36 | self.agent.log('CPU profiler is only supported on Linux and OS X.') 37 | return 38 | 39 | def _sample(signum, signal_frame): 40 | if self.sampler_active: 41 | return 42 | self.sampler_active = True 43 | 44 | with self.profile_lock: 45 | try: 46 | self.process_sample(signal_frame) 47 | signal_frame = None 48 | except Exception: 49 | self.agent.exception() 50 | 51 | self.sampler_active = False 52 | 53 | self.prev_signal_handler = signal.signal(signal.SIGPROF, _sample) 54 | 55 | self.ready = True 56 | 57 | 58 | def reset(self): 59 | self.profile = Breakdown('Execution call graph', Breakdown.TYPE_CALLGRAPH) 60 | 61 | 62 | def start_profiler(self): 63 | self.agent.log('Activating CPU profiler.') 64 | 65 | signal.setitimer(signal.ITIMER_PROF, self.SAMPLING_RATE, self.SAMPLING_RATE) 66 | 67 | 68 | def stop_profiler(self): 69 | signal.setitimer(signal.ITIMER_PROF, 0) 70 | 71 | 72 | def destroy(self): 73 | if not self.ready: 74 | return 75 | 76 | signal.signal(signal.SIGPROF, self.prev_signal_handler) 77 | 78 | 79 | def build_profile(self, duration): 80 | with self.profile_lock: 81 | self.profile.propagate() 82 | self.profile.evaluate_percent(duration / self.SAMPLING_RATE) 83 | self.profile.filter(2, 1, 100) 84 | 85 | return [{ 86 | 'category': Metric.CATEGORY_CPU_PROFILE, 87 | 'name': Metric.NAME_MAIN_THREAD_CPU_USAGE, 88 | 'unit': Metric.UNIT_PERCENT, 89 | 'unit_interval': None, 90 | 'profile': self.profile 91 | }] 92 | 93 | 94 | def process_sample(self, signal_frame): 95 | if self.profile: 96 | start = time.clock() 97 | if signal_frame: 98 | stack = self.recover_stack(signal_frame) 99 | if stack: 100 | self.update_profile(self.profile, stack) 101 | 102 | stack = None 103 | 104 | 105 | def recover_stack(self, signal_frame): 106 | stack = [] 107 | 108 | depth = 0 109 | while signal_frame is not None and depth <= self.MAX_TRACEBACK_SIZE: 110 | if signal_frame.f_code and signal_frame.f_code.co_name and signal_frame.f_code.co_filename: 111 | func_name = signal_frame.f_code.co_name 112 | filename = signal_frame.f_code.co_filename 113 | lineno = signal_frame.f_lineno 114 | 115 | if self.agent.frame_cache.is_agent_frame(filename): 116 | return None 117 | 118 | frame = Frame(func_name, filename, lineno) 119 | stack.append(frame) 120 | 121 | signal_frame = signal_frame.f_back 122 | 123 | depth += 1 124 | 125 | if len(stack) == 0: 126 | return None 127 | else: 128 | return stack 129 | 130 | 131 | def update_profile(self, profile, stack): 132 | current_node = profile 133 | 134 | for frame in reversed(stack): 135 | current_node = current_node.find_or_add_child(str(frame)) 136 | current_node.set_type(Breakdown.TYPE_CALLSITE) 137 | 138 | current_node.increment(0, 1) 139 | -------------------------------------------------------------------------------- /stackimpact/profilers/allocation_profiler.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import sys 4 | import time 5 | import re 6 | import threading 7 | 8 | from ..runtime import min_version, runtime_info, read_vm_size 9 | from ..utils import timestamp 10 | from ..metric import Metric 11 | from ..metric import Breakdown 12 | 13 | if min_version(3, 4): 14 | import tracemalloc 15 | 16 | 17 | class AllocationProfiler(object): 18 | MAX_TRACEBACK_SIZE = 25 # number of frames 19 | MAX_MEMORY_OVERHEAD = 10 * 1e6 # 10MB 20 | MAX_PROFILED_ALLOCATIONS = 25 21 | 22 | 23 | def __init__(self, agent): 24 | self.agent = agent 25 | self.ready = False 26 | self.profile = None 27 | self.profile_lock = threading.Lock() 28 | self.overhead_monitor = None 29 | self.start_ts = None 30 | 31 | 32 | def setup(self): 33 | if self.agent.get_option('allocation_profiler_disabled'): 34 | return 35 | 36 | if not runtime_info.OS_LINUX and not runtime_info.OS_DARWIN: 37 | self.agent.log('CPU profiler is only supported on Linux and OS X.') 38 | return 39 | 40 | if not min_version(3, 4): 41 | self.agent.log('Memory allocation profiling is available for Python 3.4 or higher') 42 | return 43 | 44 | self.ready = True 45 | 46 | 47 | def reset(self): 48 | self.profile = Breakdown('Allocation call graph', Breakdown.TYPE_CALLGRAPH) 49 | 50 | 51 | def start_profiler(self): 52 | self.agent.log('Activating memory allocation profiler.') 53 | 54 | def start(): 55 | tracemalloc.start(self.MAX_TRACEBACK_SIZE) 56 | self.agent.run_in_main_thread(start) 57 | 58 | self.start_ts = time.time() 59 | 60 | def monitor_overhead(): 61 | if tracemalloc.is_tracing() and tracemalloc.get_tracemalloc_memory() > self.MAX_MEMORY_OVERHEAD: 62 | self.agent.log('Allocation profiler memory overhead limit exceeded: {0} bytes'.format(tracemalloc.get_tracemalloc_memory())) 63 | self.stop_profiler() 64 | 65 | self.overhead_monitor = self.agent.schedule(0.5, 0.5, monitor_overhead) 66 | 67 | 68 | def stop_profiler(self): 69 | self.agent.log('Deactivating memory allocation profiler.') 70 | 71 | with self.profile_lock: 72 | if self.overhead_monitor: 73 | self.overhead_monitor.cancel() 74 | self.overhead_monitor = None 75 | 76 | if tracemalloc.is_tracing(): 77 | snapshot = tracemalloc.take_snapshot() 78 | self.agent.log('Allocation profiler memory overhead {0} bytes'.format(tracemalloc.get_tracemalloc_memory())) 79 | tracemalloc.stop() 80 | self.process_snapshot(snapshot, time.time() - self.start_ts) 81 | 82 | 83 | def build_profile(self, duration): 84 | with self.profile_lock: 85 | self.profile.normalize(duration) 86 | self.profile.propagate() 87 | self.profile.floor() 88 | self.profile.filter(2, 1000, float("inf")) 89 | 90 | return [{ 91 | 'category': Metric.CATEGORY_MEMORY_PROFILE, 92 | 'name': Metric.NAME_UNCOLLECTED_ALLOCATIONS, 93 | 'unit': Metric.UNIT_BYTE, 94 | 'unit_interval': 1, 95 | 'profile': self.profile 96 | }] 97 | 98 | 99 | def destroy(self): 100 | pass 101 | 102 | 103 | def process_snapshot(self, snapshot, duration): 104 | stats = snapshot.statistics('traceback') 105 | 106 | for stat in stats[:self.MAX_PROFILED_ALLOCATIONS]: 107 | if stat.traceback: 108 | skip_stack = False 109 | for frame in stat.traceback: 110 | if self.agent.frame_cache.is_agent_frame(frame.filename): 111 | skip_stack = True 112 | break 113 | if skip_stack: 114 | continue 115 | 116 | current_node = self.profile 117 | for frame in reversed(stat.traceback): 118 | if frame.filename == '': 119 | continue 120 | 121 | frame_name = '{0}:{1}'.format(frame.filename, frame.lineno) 122 | current_node = current_node.find_or_add_child(frame_name) 123 | current_node.set_type(Breakdown.TYPE_CALLSITE) 124 | current_node.increment(stat.size, stat.count) 125 | -------------------------------------------------------------------------------- /stackimpact/reporters/process_reporter.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import re 4 | import os 5 | import gc 6 | import threading 7 | import multiprocessing 8 | 9 | 10 | from ..runtime import runtime_info, min_version, read_cpu_time, read_max_rss, read_current_rss, read_vm_size 11 | from ..metric import Metric 12 | 13 | class ProcessReporter(object): 14 | 15 | def __init__(self, agent): 16 | self.agent = agent 17 | self.started = False 18 | self.metrics = None 19 | self.report_timer = None 20 | 21 | 22 | def setup(self): 23 | pass 24 | 25 | 26 | def destroy(self): 27 | pass 28 | 29 | 30 | def reset(self): 31 | pass 32 | 33 | 34 | def start(self): 35 | if not self.agent.get_option('auto_profiling'): 36 | return 37 | 38 | if self.started: 39 | return 40 | self.started = True 41 | 42 | self.reset() 43 | 44 | self.report_timer = self.agent.schedule(60, 60, self.report) 45 | 46 | 47 | def stop(self): 48 | if not self.started: 49 | return 50 | self.started = False 51 | 52 | self.report_timer.cancel() 53 | self.report_timer = None 54 | 55 | 56 | def reset(self): 57 | self.metrics = {} 58 | 59 | 60 | def report(self): 61 | # CPU 62 | if not runtime_info.OS_WIN: 63 | cpu_time = read_cpu_time() 64 | if cpu_time != None: 65 | cpu_time_metric = self.report_metric(Metric.TYPE_COUNTER, Metric.CATEGORY_CPU, Metric.NAME_CPU_TIME, Metric.UNIT_NANOSECOND, cpu_time) 66 | if cpu_time_metric.has_measurement(): 67 | cpu_usage = (cpu_time_metric.measurement.value / (60 * 1e9)) * 100 68 | try: 69 | cpu_usage = cpu_usage / multiprocessing.cpu_count() 70 | except Exception: 71 | pass 72 | 73 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_CPU, Metric.NAME_CPU_USAGE, Metric.UNIT_PERCENT, cpu_usage) 74 | 75 | 76 | # Memory 77 | if not runtime_info.OS_WIN: 78 | max_rss = read_max_rss() 79 | if max_rss != None: 80 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_MEMORY, Metric.NAME_MAX_RSS, Metric.UNIT_KILOBYTE, max_rss) 81 | 82 | if runtime_info.OS_LINUX: 83 | current_rss = read_current_rss() 84 | if current_rss != None: 85 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_MEMORY, Metric.NAME_CURRENT_RSS, Metric.UNIT_KILOBYTE, current_rss) 86 | 87 | vm_size = read_vm_size() 88 | if vm_size != None: 89 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_MEMORY, Metric.NAME_VM_SIZE, Metric.UNIT_KILOBYTE, vm_size) 90 | 91 | 92 | # GC stats 93 | gc_count0, gc_count1, gc_count2 = gc.get_count() 94 | total_gc_count = gc_count0 + gc_count1 + gc_count2 95 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_GC, Metric.NAME_GC_COUNT, Metric.UNIT_NONE, total_gc_count) 96 | 97 | if min_version(3, 4): 98 | gc_stats = gc.get_stats() 99 | if gc_stats and gc_stats[0] and gc_stats[1] and gc_stats[2]: 100 | total_collections = gc_stats[0]['collections'] + gc_stats[1]['collections'] + gc_stats[2]['collections'] 101 | self.report_metric(Metric.TYPE_COUNTER, Metric.CATEGORY_GC, Metric.NAME_GC_COLLECTIONS, Metric.UNIT_NONE, total_collections) 102 | 103 | total_collected = gc_stats[0]['collected'] + gc_stats[1]['collected'] + gc_stats[2]['collected'] 104 | self.report_metric(Metric.TYPE_COUNTER, Metric.CATEGORY_GC, Metric.NAME_GC_COLLECTED, Metric.UNIT_NONE, total_collected) 105 | 106 | total_uncollectable = gc_stats[0]['uncollectable'] + gc_stats[1]['uncollectable'] + gc_stats[2]['uncollectable'] 107 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_GC, Metric.NAME_GC_UNCOLLECTABLE, Metric.UNIT_NONE, total_uncollectable) 108 | 109 | # Runtime 110 | thread_count = threading.active_count() 111 | self.report_metric(Metric.TYPE_STATE, Metric.CATEGORY_RUNTIME, Metric.NAME_THREAD_COUNT, Metric.UNIT_NONE, thread_count) 112 | 113 | 114 | def report_metric(self, typ, category, name, unit, value): 115 | key = typ + category + name 116 | metric = None 117 | if key not in self.metrics: 118 | metric = Metric(self.agent, typ, category, name, unit) 119 | self.metrics[key] = metric 120 | else: 121 | metric = self.metrics[key] 122 | 123 | metric.create_measurement(Metric.TRIGGER_TIMER, value) 124 | 125 | if metric.has_measurement(): 126 | self.agent.message_queue.add('metric', metric.to_dict()) 127 | 128 | return metric 129 | 130 | -------------------------------------------------------------------------------- /stackimpact/profilers/block_profiler.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import os 4 | import sys 5 | import time 6 | import threading 7 | import re 8 | import signal 9 | 10 | from ..runtime import min_version, runtime_info 11 | from ..metric import Metric 12 | from ..metric import Breakdown 13 | from ..frame import Frame 14 | 15 | if runtime_info.GEVENT: 16 | import gevent 17 | 18 | 19 | class BlockProfiler(object): 20 | SAMPLING_RATE = 0.05 21 | MAX_TRACEBACK_SIZE = 25 # number of frames 22 | 23 | 24 | def __init__(self, agent): 25 | self.agent = agent 26 | self.ready = False 27 | self.profile = None 28 | self.profile_lock = threading.Lock() 29 | self.prev_signal_handler = None 30 | self.sampler_active = False 31 | 32 | 33 | def setup(self): 34 | if self.agent.get_option('block_profiler_disabled'): 35 | return 36 | 37 | if not runtime_info.OS_LINUX and not runtime_info.OS_DARWIN: 38 | self.agent.log('CPU profiler is only supported on Linux and OS X.') 39 | return 40 | 41 | sample_time = self.SAMPLING_RATE * 1000 42 | 43 | main_thread_id = None 44 | if runtime_info.GEVENT: 45 | main_thread_id = gevent._threading.get_ident() 46 | else: 47 | main_thread_id = threading.current_thread().ident 48 | 49 | def _sample(signum, signal_frame): 50 | if self.sampler_active: 51 | return 52 | self.sampler_active = True 53 | 54 | with self.profile_lock: 55 | try: 56 | self.process_sample(signal_frame, sample_time, main_thread_id) 57 | signal_frame = None 58 | except Exception: 59 | self.agent.exception() 60 | 61 | self.sampler_active = False 62 | 63 | self.prev_signal_handler = signal.signal(signal.SIGALRM, _sample) 64 | 65 | self.ready = True 66 | 67 | 68 | def destroy(self): 69 | if not self.ready: 70 | return 71 | 72 | signal.signal(signal.SIGALRM, self.prev_signal_handler) 73 | 74 | 75 | def reset(self): 76 | self.profile = Breakdown('Execution call graph', Breakdown.TYPE_CALLGRAPH) 77 | 78 | 79 | def start_profiler(self): 80 | self.agent.log('Activating block profiler.') 81 | 82 | signal.setitimer(signal.ITIMER_REAL, self.SAMPLING_RATE, self.SAMPLING_RATE) 83 | 84 | 85 | def stop_profiler(self): 86 | signal.setitimer(signal.ITIMER_REAL, 0) 87 | 88 | self.agent.log('Deactivating block profiler.') 89 | 90 | 91 | def build_profile(self, duration): 92 | with self.profile_lock: 93 | self.profile.normalize(duration) 94 | self.profile.propagate() 95 | self.profile.floor() 96 | self.profile.filter(2, 1, float("inf")) 97 | 98 | return [{ 99 | 'category': Metric.CATEGORY_BLOCK_PROFILE, 100 | 'name': Metric.NAME_BLOCKING_CALL_TIMES, 101 | 'unit': Metric.UNIT_MILLISECOND, 102 | 'unit_interval': 1, 103 | 'profile': self.profile 104 | }] 105 | 106 | 107 | def process_sample(self, signal_frame, sample_time, main_thread_id): 108 | if self.profile: 109 | start = time.clock() 110 | 111 | current_frames = sys._current_frames() 112 | items = current_frames.items() 113 | for thread_id, thread_frame in items: 114 | if thread_id == main_thread_id: 115 | thread_frame = signal_frame 116 | 117 | stack = self.recover_stack(thread_frame) 118 | if stack: 119 | current_node = self.profile 120 | for frame in reversed(stack): 121 | current_node = current_node.find_or_add_child(str(frame)) 122 | current_node.set_type(Breakdown.TYPE_CALLSITE) 123 | current_node.increment(sample_time, 1) 124 | 125 | thread_id, thread_frame, stack = None, None, None 126 | 127 | items = None 128 | current_frames = None 129 | 130 | 131 | def recover_stack(self, thread_frame): 132 | stack = [] 133 | 134 | system_only = True 135 | depth = 0 136 | while thread_frame is not None and depth <= self.MAX_TRACEBACK_SIZE: 137 | if thread_frame.f_code and thread_frame.f_code.co_name and thread_frame.f_code.co_filename: 138 | func_name = thread_frame.f_code.co_name 139 | filename = thread_frame.f_code.co_filename 140 | lineno = thread_frame.f_lineno 141 | 142 | if self.agent.frame_cache.is_agent_frame(filename): 143 | return None 144 | 145 | if not self.agent.frame_cache.is_system_frame(filename): 146 | system_only = False 147 | 148 | frame = Frame(func_name, filename, lineno) 149 | stack.append(frame) 150 | 151 | thread_frame = thread_frame.f_back 152 | 153 | depth += 1 154 | 155 | if system_only: 156 | return None 157 | 158 | if len(stack) == 0: 159 | return None 160 | else: 161 | return stack 162 | -------------------------------------------------------------------------------- /tests/agent_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import threading 4 | import random 5 | import time 6 | 7 | import stackimpact 8 | from stackimpact.runtime import runtime_info, min_version 9 | 10 | 11 | # python3 -m unittest discover -v -s tests -p *_test.py 12 | 13 | class AgentTestCase(unittest.TestCase): 14 | 15 | def test_run_in_main_thread(self): 16 | if runtime_info.OS_WIN: 17 | return 18 | 19 | stackimpact._agent = None 20 | agent = stackimpact.start( 21 | dashboard_address = 'http://localhost:5001', 22 | agent_key = 'key1', 23 | app_name = 'TestPythonApp', 24 | debug = True 25 | ) 26 | 27 | result = {} 28 | 29 | def _run(): 30 | result['thread_id'] = threading.current_thread().ident 31 | 32 | def _thread(): 33 | agent.run_in_main_thread(_run) 34 | 35 | t = threading.Thread(target=_thread) 36 | t.start() 37 | t.join() 38 | 39 | self.assertEqual(result['thread_id'], threading.current_thread().ident) 40 | 41 | agent.destroy() 42 | 43 | 44 | def test_profile(self): 45 | if runtime_info.OS_WIN: 46 | return 47 | 48 | stackimpact._agent = None 49 | agent = stackimpact.start( 50 | dashboard_address = 'http://localhost:5001', 51 | agent_key = 'key1', 52 | app_name = 'TestPythonApp', 53 | debug = True 54 | ) 55 | 56 | agent.cpu_reporter.start() 57 | 58 | span = agent.profile() 59 | for i in range(0, 2000000): 60 | random.randint(1, 1000000) 61 | span.stop() 62 | 63 | agent.cpu_reporter.report() 64 | 65 | self.assertTrue('test_profile' in str(agent.message_queue.queue)) 66 | 67 | agent.destroy() 68 | 69 | 70 | def test_with_profile(self): 71 | if runtime_info.OS_WIN: 72 | return 73 | 74 | stackimpact._agent = None 75 | agent = stackimpact.start( 76 | dashboard_address = 'http://localhost:5001', 77 | agent_key = 'key1', 78 | app_name = 'TestPythonApp', 79 | debug = True 80 | ) 81 | 82 | agent.cpu_reporter.start() 83 | 84 | with agent.profile(): 85 | for i in range(0, 2000000): 86 | random.randint(1, 1000000) 87 | 88 | agent.cpu_reporter.report() 89 | 90 | self.assertTrue('test_with_profile' in str(agent.message_queue.queue)) 91 | 92 | agent.destroy() 93 | 94 | 95 | def test_cpu_profile(self): 96 | stackimpact._agent = None 97 | agent = stackimpact.start( 98 | dashboard_address = 'http://localhost:5001', 99 | agent_key = 'key1', 100 | app_name = 'TestPythonApp', 101 | auto_profiling = False, 102 | debug = True 103 | ) 104 | 105 | messages = [] 106 | def add_mock(topic, message): 107 | messages.append(message) 108 | agent.message_queue.add = add_mock 109 | 110 | agent.start_cpu_profiler() 111 | 112 | for j in range(0, 2000000): 113 | random.randint(1, 1000000) 114 | 115 | agent.stop_cpu_profiler() 116 | 117 | self.assertTrue('test_cpu_profile' in str(messages)) 118 | 119 | agent.destroy() 120 | 121 | 122 | def test_allocation_profile(self): 123 | if runtime_info.OS_WIN or not min_version(3, 4): 124 | return 125 | 126 | stackimpact._agent = None 127 | agent = stackimpact.start( 128 | dashboard_address = 'http://localhost:5001', 129 | agent_key = 'key1', 130 | app_name = 'TestPythonApp', 131 | auto_profiling = False, 132 | debug = True 133 | ) 134 | 135 | messages = [] 136 | def add_mock(topic, message): 137 | messages.append(message) 138 | agent.message_queue.add = add_mock 139 | 140 | agent.start_allocation_profiler() 141 | 142 | mem1 = [] 143 | for i in range(0, 1000): 144 | obj1 = {'v': random.randint(0, 1000000)} 145 | mem1.append(obj1) 146 | 147 | agent.stop_allocation_profiler() 148 | 149 | self.assertTrue('agent_test.py' in str(messages)) 150 | 151 | agent.destroy() 152 | 153 | 154 | def test_block_profile(self): 155 | if runtime_info.OS_WIN or not min_version(3, 4): 156 | return 157 | 158 | stackimpact._agent = None 159 | agent = stackimpact.start( 160 | dashboard_address = 'http://localhost:5001', 161 | agent_key = 'key1', 162 | app_name = 'TestPythonApp', 163 | auto_profiling = False, 164 | debug = True 165 | ) 166 | 167 | messages = [] 168 | def add_mock(topic, message): 169 | messages.append(message) 170 | agent.message_queue.add = add_mock 171 | 172 | agent.start_block_profiler() 173 | 174 | def blocking_call(): 175 | time.sleep(0.1) 176 | 177 | for i in range(5): 178 | blocking_call() 179 | 180 | agent.stop_block_profiler() 181 | 182 | self.assertTrue('blocking_call' in str(messages)) 183 | 184 | agent.destroy() 185 | 186 | 187 | if __name__ == '__main__': 188 | unittest.main() 189 | -------------------------------------------------------------------------------- /stackimpact/reporters/error_reporter.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import threading 4 | import traceback 5 | import collections 6 | 7 | from ..runtime import runtime_info, patch, unpatch 8 | from ..metric import Metric 9 | from ..metric import Breakdown 10 | from ..frame import Frame 11 | 12 | 13 | class ErrorReporter(object): 14 | MAX_QUEUED_EXC = 100 15 | 16 | 17 | def __init__(self, agent): 18 | self.agent = agent 19 | self.ready = False 20 | self.started = False 21 | self.process_timer = None 22 | self.report_timer = None 23 | self.exc_queue = collections.deque() 24 | self.profile = None 25 | self.profile_lock = threading.Lock() 26 | self.added_exceptions = None 27 | 28 | 29 | def setup(self): 30 | if self.agent.get_option('error_profiler_disabled'): 31 | return 32 | 33 | def _exc_info(args, kwargs, ret, data): 34 | try: 35 | if not self.agent.agent_started or self.agent.agent_destroyed: 36 | return 37 | 38 | if len(self.exc_queue) <= self.MAX_QUEUED_EXC: 39 | self.exc_queue.append(ret) 40 | 41 | except Exception: 42 | self.agent.log('exc_info wrapper exception') 43 | 44 | patch(sys, 'exc_info', None, _exc_info) 45 | 46 | self.ready = True 47 | 48 | 49 | def destroy(self): 50 | if not self.ready: 51 | return 52 | 53 | unpatch(sys, 'exc_info') 54 | 55 | 56 | def reset(self): 57 | with self.profile_lock: 58 | self.profile = Breakdown('Error call graph', Breakdown.TYPE_ERROR) 59 | self.added_exceptions = {} 60 | 61 | 62 | def start(self): 63 | if self.agent.get_option('error_profiler_disabled'): 64 | return 65 | 66 | if not self.agent.get_option('auto_profiling'): 67 | return 68 | 69 | if self.started: 70 | return 71 | self.started = True 72 | 73 | self.reset() 74 | 75 | 76 | self.process_timer = self.agent.schedule(1, 1, self.process) 77 | self.report_timer = self.agent.schedule(60, 60, self.report) 78 | 79 | 80 | def stop(self): 81 | if not self.started: 82 | return 83 | self.started = False 84 | 85 | self.process_timer.cancel() 86 | self.process_timer = None 87 | 88 | self.report_timer.cancel() 89 | self.report_timer = None 90 | 91 | 92 | def report(self): 93 | with self.profile_lock: 94 | self.profile.propagate() 95 | 96 | metric = Metric(self.agent, Metric.TYPE_PROFILE, Metric.CATEGORY_ERROR_PROFILE, Metric.NAME_HANDLED_EXCEPTIONS, Metric.UNIT_NONE) 97 | measurement = metric.create_measurement(Metric.TRIGGER_TIMER, self.profile.measurement, 60, self.profile) 98 | self.agent.message_queue.add('metric', metric.to_dict()) 99 | 100 | self.reset() 101 | 102 | 103 | def process(self): 104 | while True: 105 | try: 106 | exc = self.exc_queue.pop() 107 | self.update_profile(exc) 108 | except IndexError: 109 | return 110 | 111 | 112 | def recover_stack(self, exc): 113 | stack = [] 114 | 115 | _, _, tb = exc 116 | 117 | tb_stack = traceback.extract_tb(tb, 25) 118 | for tb_frame in tb_stack: 119 | func_name = tb_frame[2] 120 | filename = tb_frame[0] 121 | lineno = tb_frame[1] 122 | 123 | if self.agent.frame_cache.is_agent_frame(filename): 124 | return None 125 | 126 | if not self.agent.frame_cache.is_system_frame(filename): 127 | frame = Frame(func_name, filename, lineno) 128 | stack.append(frame) 129 | 130 | return stack 131 | 132 | 133 | def update_profile(self, exc): 134 | with self.profile_lock: 135 | exc_type, exc_obj, _ = exc 136 | if not exc_type or not exc_obj: 137 | return 138 | 139 | exc_id = str(id(exc_obj)) 140 | if exc_id in self.added_exceptions: 141 | return 142 | else: 143 | self.added_exceptions[exc_id] = True 144 | 145 | 146 | stack = self.recover_stack(exc) 147 | if not stack: 148 | return 149 | 150 | current_node = self.profile 151 | 152 | for frame in reversed(stack): 153 | current_node = current_node.find_or_add_child(str(frame)) 154 | current_node.set_type(Breakdown.TYPE_CALLSITE) 155 | 156 | message = '' 157 | if exc_type: 158 | message += exc_type.__name__ 159 | 160 | exc_msg = str(exc_obj) 161 | if exc_msg: 162 | message += ': ' + exc_msg 163 | 164 | if message == '': 165 | message = 'Undefined' 166 | 167 | message_node = current_node.find_child(message) 168 | if message_node == None: 169 | if len(current_node.children) < 5: 170 | message_node = current_node.find_or_add_child(message) 171 | else: 172 | message_node = current_node.find_or_add_child('Other') 173 | 174 | message_node.set_type(Breakdown.TYPE_ERROR) 175 | message_node.increment(1, 0) 176 | -------------------------------------------------------------------------------- /stackimpact/reporters/profile_reporter.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import os 4 | import sys 5 | import time 6 | import threading 7 | import re 8 | import random 9 | 10 | from ..runtime import min_version, runtime_info 11 | from ..utils import timestamp 12 | from ..metric import Metric 13 | from ..metric import Breakdown 14 | from ..frame import Frame 15 | 16 | 17 | class ProfilerConfig(object): 18 | 19 | def __init__(self): 20 | self.log_prefix = None 21 | self.max_profile_duration = None 22 | self.max_span_duration = None 23 | self.max_span_count = None 24 | self.span_interval = None 25 | self.report_interval = None 26 | self.report_only = False 27 | 28 | 29 | class ProfileReporter: 30 | 31 | def __init__(self, agent, profiler, config): 32 | self.agent = agent 33 | self.profiler = profiler 34 | self.config = config 35 | self.started = False 36 | self.span_timer = None 37 | self.span_timeout = None 38 | self.random_timer = None 39 | self.report_timer = None 40 | self.profile_start_ts = None 41 | self.profile_duration = None 42 | self.span_count = None 43 | self.span_active = False 44 | self.span_start_ts = None 45 | self.span_trigger = None 46 | 47 | 48 | def setup(self): 49 | self.profiler.setup() 50 | 51 | 52 | def start(self): 53 | if not self.profiler.ready: 54 | return 55 | 56 | if self.started: 57 | return 58 | self.started = True 59 | 60 | self.reset() 61 | 62 | if self.agent.get_option('auto_profiling'): 63 | if not self.config.report_only: 64 | def random_delay(): 65 | timeout = random.randint(0, round(self.config.span_interval - self.config.max_span_duration)) 66 | self.random_timer = self.agent.delay(timeout, self.start_profiling, False, True) 67 | 68 | self.span_timer = self.agent.schedule(0, self.config.span_interval, random_delay) 69 | 70 | self.report_timer = self.agent.schedule(self.config.report_interval, self.config.report_interval, self.report) 71 | 72 | 73 | def stop(self): 74 | if not self.started: 75 | return 76 | 77 | self.started = False 78 | 79 | if self.span_timer: 80 | self.span_timer.cancel() 81 | self.span_timer = None 82 | 83 | if self.random_timer: 84 | self.random_timer.cancel() 85 | self.random_timer = None 86 | 87 | if self.report_timer: 88 | self.report_timer.cancel() 89 | self.report_timer = None 90 | 91 | self.stop_profiling() 92 | 93 | 94 | def destroy(self): 95 | self.profiler.destroy() 96 | 97 | 98 | def reset(self): 99 | self.profiler.reset() 100 | self.profile_start_ts = timestamp() 101 | self.profile_duration = 0 102 | self.span_count = 0 103 | self.span_trigger = Metric.TRIGGER_TIMER 104 | 105 | 106 | def start_profiling(self, api_call, with_timeout): 107 | if not self.started: 108 | return False 109 | 110 | if self.profile_duration > self.config.max_profile_duration: 111 | self.agent.log(self.config.log_prefix + ': max profiling duration reached.') 112 | return False 113 | 114 | if api_call and self.span_count > self.config.max_span_count: 115 | self.agent.log(self.config.log_prefix + ': max recording count reached.') 116 | return False 117 | 118 | if self.agent.profiler_active: 119 | self.agent.log(self.config.log_prefix + ': profiler lock exists.') 120 | return False 121 | 122 | self.agent.profiler_active = True 123 | self.agent.log(self.config.log_prefix + ': started.') 124 | 125 | try: 126 | self.profiler.start_profiler() 127 | except Exception: 128 | self.agent.profiler_active = False 129 | self.exception() 130 | return False 131 | 132 | if with_timeout: 133 | self.span_timeout = self.agent.delay(self.config.max_span_duration, self.stop_profiling) 134 | 135 | self.span_count = self.span_count + 1 136 | self.span_active = True 137 | self.span_start_ts = time.time() 138 | 139 | if api_call: 140 | self.span_trigger = Metric.TRIGGER_API 141 | 142 | return True 143 | 144 | 145 | def stop_profiling(self): 146 | if not self.span_active: 147 | return 148 | self.span_active = False 149 | 150 | try: 151 | self.profile_duration = self.profile_duration + time.time() - self.span_start_ts 152 | self.profiler.stop_profiler() 153 | except Exception: 154 | self.exception() 155 | 156 | self.agent.profiler_active = False 157 | 158 | if self.span_timeout: 159 | self.span_timeout.cancel() 160 | 161 | self.agent.log(self.config.log_prefix + ': stopped.') 162 | 163 | 164 | def report(self, with_interval=False): 165 | if not self.started: 166 | return 167 | 168 | if with_interval: 169 | if self.profile_start_ts > timestamp() - self.config.report_interval: 170 | return 171 | elif self.profile_start_ts < timestamp() - 2 * self.config.report_interval: 172 | self.reset() 173 | return 174 | 175 | if not self.config.report_only and self.profile_duration == 0: 176 | return 177 | 178 | self.agent.log(self.config.log_prefix + ': reporting profile.') 179 | 180 | profile_data = self.profiler.build_profile(self.profile_duration) 181 | 182 | for data in profile_data: 183 | metric = Metric(self.agent, Metric.TYPE_PROFILE, data['category'], data['name'], data['unit']) 184 | metric.create_measurement(self.span_trigger, data['profile'].measurement, data['unit_interval'], data['profile']) 185 | self.agent.message_queue.add('metric', metric.to_dict()) 186 | 187 | self.reset() 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StackImpact Python Profiler 2 | 3 | ## Overview 4 | 5 | StackImpact is a production-grade performance profiler built for both production and development environments. It gives developers continuous and historical code-level view of application performance that is essential for locating CPU, memory allocation and I/O hot spots as well as latency bottlenecks. Included runtime metrics and error monitoring complement profiles for extensive performance analysis. Learn more at [stackimpact.com](https://stackimpact.com/). 6 | 7 | ![dashboard](https://stackimpact.com/img/readme/hotspots-cpu-1.4-python.png) 8 | 9 | #### Features 10 | 11 | * Continuous hot spot profiling of CPU usage, memory allocation and blocking calls. 12 | * Error and exception monitoring. 13 | * Health monitoring including CPU, memory, garbage collection and other runtime metrics. 14 | * Alerts on profile anomalies. 15 | * Team access. 16 | 17 | Learn more on the [features](https://stackimpact.com/features/) page (with screenshots). 18 | 19 | 20 | #### How it works 21 | 22 | The StackImpact profiler agent is imported into a program and used as a normal package. When the program runs, various sampling profilers are started and stopped automatically by the agent and/or programmatically using the agent methods. The agent periodically reports recorded profiles and metrics to the StackImpact Dashboard. The agent can also operate in manual mode, which should be used in development only. 23 | 24 | 25 | #### Documentation 26 | 27 | See full [documentation](https://stackimpact.com/docs/) for reference. 28 | 29 | 30 | 31 | ## Supported environment 32 | 33 | * Linux, OS X or Windows. Python version 2.7, 3.4 or higher. 34 | * Memory allocation profiler and some GC metrics are only available for Python 3. 35 | * Profilers only support Linux and OS X. 36 | * Time (blocking call) profiler supports threads and gevent. 37 | * On unix systems the profilers use the following signals: SIGPROF, SIGALRM, SIGUSR2. Only SIGUSR2 is handled transparently, i.e. it should not conflict with previousely registered handlers. 38 | 39 | 40 | ## Getting started 41 | 42 | 43 | #### Create StackImpact account 44 | 45 | Sign up for a free trial account at [stackimpact.com](https://stackimpact.com) (also with GitHub login). 46 | 47 | 48 | #### Installing the agent 49 | 50 | Install the Python agent by running 51 | 52 | ``` 53 | pip install stackimpact 54 | ``` 55 | 56 | And import the package in your application 57 | 58 | ```python 59 | import stackimpact 60 | ``` 61 | 62 | 63 | #### Configuring the agent 64 | 65 | Start the agent in the main thread by specifying the agent key and application name. The agent key can be found in your account's Configuration section. 66 | 67 | ```python 68 | agent = stackimpact.start( 69 | agent_key = 'agent key here', 70 | app_name = 'MyPythonApp') 71 | ``` 72 | 73 | Add the agent initialization to the worker code, e.g. wsgi.py, if applicable. 74 | 75 | All initialization options: 76 | 77 | * `agent_key` (Required) The access key for communication with the StackImpact servers. 78 | * `app_name` (Required) A name to identify and group application data. Typically, a single codebase, deployable unit or executable module corresponds to one application. 79 | * `app_version` (Optional) Sets application version, which can be used to associate profiling information with the source code release. 80 | * `app_environment` (Optional) Used to differentiate applications in different environments. 81 | * `host_name` (Optional) By default, host name will be the OS hostname. 82 | * `auto_profiling` (Optional) If set to `False`, disables automatic profiling and reporting. Focused or manual profiling should be used instead. Useful for environments without support for timers or background tasks. 83 | * `debug` (Optional) Enables debug logging. 84 | * `cpu_profiler_disabled`, `allocation_profiler_disabled`, `block_profiler_disabled`, `error_profiler_disabled` (Optional) Disables respective profiler when `True`. 85 | * `include_agent_frames` (Optional) Set to `True` to not exclude agent stack frames from profile call graphs. 86 | * `auto_destroy` (Optional) Set to `False` to disable agent's exit handlers. If necessary, call `destroy()` to gracefully shutdown the agent. 87 | 88 | 89 | #### Focused profiling 90 | 91 | Use `agent.profile(name)` to instruct the agent when to start and stop profiling. The agent decides if and which profiler is activated. Normally, this method should be used in repeating code, such as request or event handlers. In addition to more precise profiling, timing information will also be reported for the profiled spans. Usage example: 92 | 93 | ```python 94 | span = agent.profile('span1'); 95 | 96 | # your code here 97 | 98 | span.stop(); 99 | ``` 100 | 101 | Alternatively, a `with` statement can be used: 102 | 103 | ```python 104 | with agent.profile('span1'): 105 | # your code ehere 106 | ``` 107 | 108 | 109 | #### Manual profiling 110 | 111 | *Manual profiling should not be used in production!* 112 | 113 | By default, the agent starts and stops profiling automatically. Manual profiling allows to start and stop profilers directly. It is suitable for profiling short-lived programs and should not be used for long-running production applications. Automatic profiling should be disabled with `auto_profiling: False`. 114 | 115 | ```python 116 | # Start CPU profiler. 117 | agent.start_cpu_profiler(); 118 | ``` 119 | 120 | ```python 121 | # Stop CPU profiler and report the recorded profile to the Dashboard. 122 | agent.stop_cpu_profiler(); 123 | ``` 124 | 125 | ```python 126 | # Start blocking call profiler. 127 | agent.start_block_profiler(); 128 | ``` 129 | 130 | ```python 131 | # Stop blocking call profiler and report the recorded profile to the Dashboard. 132 | agent.stop_block_profiler(); 133 | ``` 134 | 135 | ```python 136 | # Start heap allocation profiler. 137 | agent.start_allocation_profiler(); 138 | ``` 139 | 140 | ```python 141 | # Stop heap allocation profiler and report the recorded profile to the Dashboard. 142 | agent.stop_allocation_profiler(); 143 | ``` 144 | 145 | #### Analyzing performance data in the Dashboard 146 | 147 | Once your application is restarted, you can start observing continuous CPU, memory, I/O, and other hot spot profiles, execution bottlenecks as well as process metrics in the [Dashboard](https://dashboard.stackimpact.com/). 148 | 149 | 150 | #### Troubleshooting 151 | 152 | To enable debug logging, add `debug = True` to startup options. If the debug log doesn't give you any hints on how to fix a problem, please report it to our support team in your account's Support section. 153 | 154 | 155 | ## Overhead 156 | 157 | The agent overhead is measured to be less than 1% for applications under high load. 158 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | StackImpact Python Profiler 2 | =========================== 3 | 4 | Overview 5 | -------- 6 | 7 | StackImpact is a production-grade performance profiler built for both 8 | production and development environments. It gives developers continuous 9 | and historical code-level view of application performance that is 10 | essential for locating CPU, memory allocation and I/O hot spots as well 11 | as latency bottlenecks. Included runtime metrics and error monitoring 12 | complement profiles for extensive performance analysis. Learn more at 13 | `stackimpact.com `__. 14 | 15 | .. figure:: https://stackimpact.com/img/readme/hotspots-cpu-1.4-python.png 16 | :alt: dashboard 17 | 18 | dashboard 19 | 20 | Features 21 | ^^^^^^^^ 22 | 23 | - Continuous hot spot profiling of CPU usage, memory allocation and 24 | blocking calls. 25 | - Error and exception monitoring. 26 | - Health monitoring including CPU, memory, garbage collection and other 27 | runtime metrics. 28 | - Alerts on profile anomalies. 29 | - Team access. 30 | 31 | Learn more on the `features `__ page 32 | (with screenshots). 33 | 34 | How it works 35 | ^^^^^^^^^^^^ 36 | 37 | The StackImpact profiler agent is imported into a program and used as a 38 | normal package. When the program runs, various sampling profilers are 39 | started and stopped automatically by the agent and/or programmatically 40 | using the agent methods. The agent periodically reports recorded 41 | profiles and metrics to the StackImpact Dashboard. The agent can also 42 | operate in manual mode, which should be used in development only. 43 | 44 | Documentation 45 | ^^^^^^^^^^^^^ 46 | 47 | See full `documentation `__ for 48 | reference. 49 | 50 | Supported environment 51 | --------------------- 52 | 53 | - Linux, OS X or Windows. Python version 2.7, 3.4 or higher. 54 | - Memory allocation profiler and some GC metrics are only available for 55 | Python 3. 56 | - Profilers only support Linux and OS X. 57 | - Time (blocking call) profiler supports threads and gevent. 58 | - On unix systems the profilers use the following signals: SIGPROF, 59 | SIGALRM, SIGUSR2. Only SIGUSR2 is handled transparently, i.e. it 60 | should not conflict with previousely registered handlers. 61 | 62 | Getting started 63 | --------------- 64 | 65 | Create StackImpact account 66 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 67 | 68 | Sign up for a free trial account at 69 | `stackimpact.com `__ (also with GitHub login). 70 | 71 | Installing the agent 72 | ^^^^^^^^^^^^^^^^^^^^ 73 | 74 | Install the Python agent by running 75 | 76 | :: 77 | 78 | pip install stackimpact 79 | 80 | And import the package in your application 81 | 82 | .. code:: python 83 | 84 | import stackimpact 85 | 86 | Configuring the agent 87 | ^^^^^^^^^^^^^^^^^^^^^ 88 | 89 | Start the agent in the main thread by specifying the agent key and 90 | application name. The agent key can be found in your account's 91 | Configuration section. 92 | 93 | .. code:: python 94 | 95 | agent = stackimpact.start( 96 | agent_key = 'agent key here', 97 | app_name = 'MyPythonApp') 98 | 99 | Add the agent initialization to the worker code, e.g. wsgi.py, if 100 | applicable. 101 | 102 | All initialization options: 103 | 104 | - ``agent_key`` (Required) The access key for communication with the 105 | StackImpact servers. 106 | - ``app_name`` (Required) A name to identify and group application 107 | data. Typically, a single codebase, deployable unit or executable 108 | module corresponds to one application. 109 | - ``app_version`` (Optional) Sets application version, which can be 110 | used to associate profiling information with the source code release. 111 | - ``app_environment`` (Optional) Used to differentiate applications in 112 | different environments. 113 | - ``host_name`` (Optional) By default, host name will be the OS 114 | hostname. 115 | - ``auto_profiling`` (Optional) If set to ``False``, disables automatic 116 | profiling and reporting. Focused or manual profiling should be used 117 | instead. Useful for environments without support for timers or 118 | background tasks. 119 | - ``debug`` (Optional) Enables debug logging. 120 | - ``cpu_profiler_disabled``, ``allocation_profiler_disabled``, 121 | ``block_profiler_disabled``, ``error_profiler_disabled`` (Optional) 122 | Disables respective profiler when ``True``. 123 | - ``include_agent_frames`` (Optional) Set to ``True`` to not exclude 124 | agent stack frames from profile call graphs. 125 | - ``auto_destroy`` (Optional) Set to ``False`` to disable agent's exit 126 | handlers. If necessary, call ``destroy()`` to gracefully shutdown the 127 | agent. 128 | 129 | Focused profiling 130 | ^^^^^^^^^^^^^^^^^ 131 | 132 | Use ``agent.profile(name)`` to instruct the agent when to start and stop 133 | profiling. The agent decides if and which profiler is activated. 134 | Normally, this method should be used in repeating code, such as request 135 | or event handlers. In addition to more precise profiling, timing 136 | information will also be reported for the profiled spans. Usage example: 137 | 138 | .. code:: python 139 | 140 | span = agent.profile('span1'); 141 | 142 | # your code here 143 | 144 | span.stop(); 145 | 146 | Alternatively, a ``with`` statement can be used: 147 | 148 | .. code:: python 149 | 150 | with agent.profile('span1'): 151 | # your code ehere 152 | 153 | Manual profiling 154 | ^^^^^^^^^^^^^^^^ 155 | 156 | *Manual profiling should not be used in production!* 157 | 158 | By default, the agent starts and stops profiling automatically. Manual 159 | profiling allows to start and stop profilers directly. It is suitable 160 | for profiling short-lived programs and should not be used for 161 | long-running production applications. Automatic profiling should be 162 | disabled with ``auto_profiling: False``. 163 | 164 | .. code:: python 165 | 166 | # Start CPU profiler. 167 | agent.start_cpu_profiler(); 168 | 169 | .. code:: python 170 | 171 | # Stop CPU profiler and report the recorded profile to the Dashboard. 172 | agent.stop_cpu_profiler(); 173 | 174 | .. code:: python 175 | 176 | # Start blocking call profiler. 177 | agent.start_block_profiler(); 178 | 179 | .. code:: python 180 | 181 | # Stop blocking call profiler and report the recorded profile to the Dashboard. 182 | agent.stop_block_profiler(); 183 | 184 | .. code:: python 185 | 186 | # Start heap allocation profiler. 187 | agent.start_allocation_profiler(); 188 | 189 | .. code:: python 190 | 191 | # Stop heap allocation profiler and report the recorded profile to the Dashboard. 192 | agent.stop_allocation_profiler(); 193 | 194 | Analyzing performance data in the Dashboard 195 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 196 | 197 | Once your application is restarted, you can start observing continuous 198 | CPU, memory, I/O, and other hot spot profiles, execution bottlenecks as 199 | well as process metrics in the 200 | `Dashboard `__. 201 | 202 | Troubleshooting 203 | ^^^^^^^^^^^^^^^ 204 | 205 | To enable debug logging, add ``debug = True`` to startup options. If the 206 | debug log doesn't give you any hints on how to fix a problem, please 207 | report it to our support team in your account's Support section. 208 | 209 | Overhead 210 | -------- 211 | 212 | The agent overhead is measured to be less than 1% for applications under 213 | high load. 214 | -------------------------------------------------------------------------------- /stackimpact/metric.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import threading 4 | import random 5 | import math 6 | 7 | from .utils import timestamp, generate_uuid, generate_sha1 8 | 9 | class Metric(object): 10 | 11 | TYPE_STATE = 'state' 12 | TYPE_COUNTER = 'counter' 13 | TYPE_PROFILE = 'profile' 14 | TYPE_TRACE = 'trace' 15 | 16 | CATEGORY_CPU = 'cpu' 17 | CATEGORY_MEMORY = 'memory' 18 | CATEGORY_GC = 'gc' 19 | CATEGORY_RUNTIME = 'runtime' 20 | CATEGORY_SPAN = 'span' 21 | CATEGORY_CPU_PROFILE = 'cpu-profile' 22 | CATEGORY_MEMORY_PROFILE = 'memory-profile' 23 | CATEGORY_BLOCK_PROFILE = 'block-profile' 24 | CATEGORY_ERROR_PROFILE = 'error-profile' 25 | 26 | NAME_CPU_TIME = 'CPU time' 27 | NAME_CPU_USAGE = 'CPU usage' 28 | NAME_MAIN_THREAD_CPU_USAGE = 'Main thread CPU usage' 29 | NAME_MAX_RSS = 'Max RSS' 30 | NAME_CURRENT_RSS = 'Current RSS' 31 | NAME_VM_SIZE = 'VM Size' 32 | NAME_GC_COUNT = 'Uncollected objects' 33 | NAME_GC_COLLECTIONS = 'Collections' 34 | NAME_GC_COLLECTED = 'Collected objects' 35 | NAME_GC_UNCOLLECTABLE = 'Uncollectable objects' 36 | NAME_THREAD_COUNT = 'Active threads' 37 | NAME_UNCOLLECTED_ALLOCATIONS = 'Uncollected allocations' 38 | NAME_BLOCKING_CALL_TIMES = 'Blocking call times' 39 | NAME_HANDLED_EXCEPTIONS = 'Handled exceptions' 40 | NAME_TF_OPERATION_TIMES = 'TensorFlow operation times' 41 | NAME_TF_OPERATION_ALLOCATION_RATE = 'TensorFlow operation allocation rate' 42 | 43 | UNIT_NONE = '' 44 | UNIT_MILLISECOND = 'millisecond' 45 | UNIT_MICROSECOND = 'microsecond' 46 | UNIT_NANOSECOND = 'nanosecond' 47 | UNIT_BYTE = 'byte' 48 | UNIT_KILOBYTE = 'kilobyte' 49 | UNIT_PERCENT = 'percent' 50 | 51 | TRIGGER_TIMER = 'timer' 52 | TRIGGER_API = 'api' 53 | 54 | 55 | def __init__(self, agent, typ, category, name, unit): 56 | metric_id = generate_sha1("{0}{1}{2}{3}{4}{5}{6}".format( 57 | agent.get_option('app_name'), 58 | agent.get_option('app_environment'), 59 | agent.get_option('host_name'), 60 | typ, category, name, unit)) 61 | 62 | self.agent = agent 63 | self.id = metric_id 64 | self.typ = typ 65 | self.category = category 66 | self.name = name 67 | self.unit = unit 68 | self.measurement = None 69 | self.has_last_value = False 70 | self.last_value = None 71 | 72 | 73 | def has_measurement(self): 74 | return self.measurement != None 75 | 76 | 77 | def create_measurement(self, trigger, value, duration = None, breakdown = None): 78 | ready = True 79 | 80 | if self.typ == Metric.TYPE_COUNTER: 81 | if not self.has_last_value: 82 | ready = False 83 | self.has_last_value = True 84 | self.last_value = value 85 | else: 86 | tmp_value = value 87 | value = value - self.last_value 88 | self.last_value = tmp_value 89 | 90 | if ready: 91 | self.measurement = Measurement( 92 | generate_uuid(), 93 | trigger, 94 | value, 95 | duration, 96 | breakdown, 97 | timestamp()) 98 | 99 | 100 | def to_dict(self): 101 | measurement_map = None 102 | if self.measurement: 103 | measurement_map = self.measurement.to_dict() 104 | 105 | metric_map = { 106 | 'id': self.id, 107 | 'type': self.typ, 108 | 'category': self.category, 109 | 'name': self.name, 110 | 'unit': self.unit, 111 | 'measurement': measurement_map, 112 | } 113 | 114 | return metric_map 115 | 116 | 117 | 118 | class Measurement: 119 | def __init__(self, id, trigger, value, duration, breakdown, timestamp): 120 | self.id = id 121 | self.trigger = trigger 122 | self.value = value 123 | self.duration = duration 124 | self.breakdown = breakdown 125 | self.timestamp = timestamp 126 | 127 | def to_dict(self): 128 | breakdown_map = None 129 | if self.breakdown: 130 | breakdown_map = self.breakdown.to_dict() 131 | 132 | measurement_map = { 133 | 'id': self.id, 134 | 'trigger': self.trigger, 135 | 'value': self.value, 136 | 'duration': self.duration, 137 | 'breakdown': breakdown_map, 138 | 'timestamp': self.timestamp, 139 | } 140 | 141 | return measurement_map 142 | 143 | 144 | class Breakdown: 145 | 146 | TYPE_CALLGRAPH = 'callgraph' 147 | TYPE_DEVICE = 'device' 148 | TYPE_CALLSITE = 'callsite' 149 | TYPE_OPERATION = 'operation' 150 | TYPE_ERROR = 'error' 151 | 152 | RESERVOIR_SIZE = 1000 153 | 154 | def __init__(self, name, typ = None): 155 | self.name = name 156 | self.type = typ 157 | self.metadata = dict() 158 | self.children = dict() 159 | self.measurement = 0 160 | self.num_samples = 0 161 | self.reservoir = [] 162 | 163 | 164 | def set_type(self, typ): 165 | self.type = typ 166 | 167 | 168 | def add_metadata(self, key, value): 169 | self.metadata[key] = value 170 | 171 | 172 | def get_metadata(self, key): 173 | if key in self.metadata: 174 | return self.metadata[key] 175 | else: 176 | return None 177 | 178 | 179 | def find_child(self, name): 180 | if name in self.children: 181 | return self.children[name] 182 | 183 | return None 184 | 185 | 186 | def max_child(self): 187 | max_ch = None 188 | for name, child in self.children.items(): 189 | if max_ch is None or child.measurement > max_ch.measurement: 190 | max_ch = child 191 | 192 | return max_ch 193 | 194 | 195 | def min_child(self): 196 | min_ch = None 197 | for name, child in self.children.items(): 198 | if min_ch == None or child.measurement < min_ch.measurement: 199 | min_ch = child 200 | 201 | return min_ch 202 | 203 | 204 | def add_child(self, child): 205 | self.children[child.name] = child 206 | 207 | 208 | def remove_child(self, child): 209 | del self.children[child.name] 210 | 211 | 212 | def find_or_add_child(self, name): 213 | child = self.find_child(name) 214 | if child == None: 215 | child = Breakdown(name) 216 | self.add_child(child) 217 | 218 | return child 219 | 220 | 221 | def filter(self, from_level, min_measurement, max_measurement): 222 | self.filter_level(1, from_level, min_measurement, max_measurement) 223 | 224 | 225 | def filter_level(self, current_level, from_level, min_measurement, max_measurement): 226 | for name in list(self.children.keys()): 227 | child = self.children[name] 228 | if current_level >= from_level and (child.measurement < min_measurement or child.measurement > max_measurement): 229 | del self.children[name] 230 | else: 231 | child.filter_level(current_level + 1, from_level, min_measurement, max_measurement) 232 | 233 | 234 | def depth(self): 235 | max_depth = 0 236 | 237 | for name, child in self.children.items(): 238 | child_depth = child.depth() 239 | if child_depth > max_depth: 240 | max_depth = child_depth 241 | 242 | return max_depth + 1 243 | 244 | 245 | def propagate(self): 246 | for name, child in self.children.items(): 247 | child.propagate() 248 | self.measurement += child.measurement 249 | self.num_samples += child.num_samples 250 | 251 | 252 | def increment(self, value, count): 253 | self.measurement += value 254 | self.num_samples += count 255 | 256 | 257 | def update_p95(self, value): 258 | r_len = 0 259 | r_exists = True 260 | 261 | if self.reservoir == None: 262 | r_exists = False 263 | else: 264 | r_len = len(self.reservoir) 265 | 266 | if not r_exists: 267 | self.reservoir = [] 268 | 269 | if r_len < self.RESERVOIR_SIZE: 270 | self.reservoir.append(value) 271 | else: 272 | self.reservoir[random.randint(0, self.RESERVOIR_SIZE - 1)] = value 273 | 274 | self.num_samples += 1 275 | 276 | 277 | def evaluate_p95(self): 278 | if self.reservoir != None and len(self.reservoir) > 0: 279 | self.reservoir.sort() 280 | index = int(len(self.reservoir) / 100 * 95) 281 | self.measurement = self.reservoir[index] 282 | 283 | self.reservoir = self.reservoir[:0] 284 | 285 | for name, child in self.children.items(): 286 | child.evaluate_p95() 287 | 288 | 289 | def evaluate_percent(self, total_samples): 290 | self.measurement = (self.num_samples / total_samples) * 100 291 | 292 | for name, child in self.children.items(): 293 | child.evaluate_percent(total_samples) 294 | 295 | 296 | def convert_to_percent(self, total): 297 | self.measurement = (self.measurement / total) * 100 298 | 299 | for name, child in self.children.items(): 300 | child.convert_to_percent(total) 301 | 302 | 303 | def normalize(self, factor): 304 | self.measurement = self.measurement / factor 305 | self.num_samples = int(math.ceil(self.num_samples / factor)) 306 | 307 | for name, child in self.children.items(): 308 | child.normalize(factor) 309 | 310 | 311 | def scale(self, factor): 312 | self.measurement = self.measurement * factor 313 | self.num_samples = int(math.ceil(self.num_samples * factor)) 314 | 315 | for name, child in self.children.items(): 316 | child.scale(factor) 317 | 318 | 319 | def round(self): 320 | self.measurement = round(self.measurement) 321 | 322 | for name, child in self.children.items(): 323 | child.round() 324 | 325 | 326 | def floor(self): 327 | self.measurement = int(self.measurement) 328 | 329 | for name, child in self.children.items(): 330 | child.floor() 331 | 332 | 333 | def to_dict(self): 334 | children_map = [] 335 | for name, child in self.children.items(): 336 | children_map.append(child.to_dict()) 337 | 338 | node_map = { 339 | "name": self.name, 340 | "metadata": self.metadata, 341 | "measurement": self.measurement, 342 | "num_samples": self.num_samples, 343 | "children": children_map, 344 | } 345 | 346 | return node_map 347 | 348 | 349 | def __str__(self): 350 | return self.dump_level(0) 351 | 352 | 353 | def dump_level(self, level): 354 | dump_str = '' 355 | 356 | for i in range(0, level): 357 | dump_str += ' ' 358 | 359 | dump_str += '{0} - {1} ({2})\n'.format(self.name, self.measurement, self.num_samples) 360 | for name, child in self.children.items(): 361 | dump_str += child.dump_level(level + 1) 362 | 363 | return dump_str 364 | -------------------------------------------------------------------------------- /stackimpact/agent.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import time 4 | import datetime 5 | import sys 6 | import traceback 7 | import socket 8 | import threading 9 | import os 10 | import signal 11 | import atexit 12 | import platform 13 | import random 14 | import math 15 | 16 | from .runtime import min_version, runtime_info, register_signal 17 | from .utils import timestamp, generate_uuid 18 | from .config import Config 19 | from .config_loader import ConfigLoader 20 | from .message_queue import MessageQueue 21 | from .frame_cache import FrameCache 22 | from .reporters.process_reporter import ProcessReporter 23 | from .reporters.profile_reporter import ProfileReporter, ProfilerConfig 24 | from .reporters.error_reporter import ErrorReporter 25 | from .reporters.span_reporter import SpanReporter 26 | from .profilers.cpu_profiler import CPUProfiler 27 | from .profilers.allocation_profiler import AllocationProfiler 28 | from .profilers.block_profiler import BlockProfiler 29 | 30 | 31 | class Span(object): 32 | 33 | def __init__(self, stop_func = None): 34 | if stop_func: 35 | self.stop_func = stop_func 36 | else: 37 | self.stop_func = None 38 | 39 | 40 | def stop(self): 41 | if self.stop_func: 42 | self.stop_func() 43 | 44 | 45 | def __enter__(self): 46 | pass 47 | 48 | 49 | def __exit__(self, exc_type, exc_value, traceback): 50 | self.stop() 51 | 52 | 53 | class Agent(object): 54 | 55 | AGENT_VERSION = "1.2.6" 56 | SAAS_DASHBOARD_ADDRESS = "https://agent-api.stackimpact.com" 57 | 58 | def __init__(self, **kwargs): 59 | self.agent_started = False 60 | self.agent_destroyed = False 61 | 62 | self.profiler_active = False 63 | self.span_active = False 64 | 65 | self.main_thread_func = None 66 | 67 | self.run_ts = None 68 | self.run_id = None 69 | self.config = Config(self) 70 | self.config_loader = ConfigLoader(self) 71 | self.message_queue = MessageQueue(self) 72 | self.frame_cache = FrameCache(self) 73 | self.process_reporter = ProcessReporter(self) 74 | self.error_reporter = ErrorReporter(self) 75 | self.span_reporter = SpanReporter(self) 76 | 77 | config = ProfilerConfig() 78 | config.log_prefix = 'CPU profiler' 79 | config.max_profile_duration = 20 80 | config.max_span_duration = 5 81 | config.max_span_count = 30 82 | config.span_interval = 20 83 | config.report_interval = 120 84 | self.cpu_reporter = ProfileReporter(self, CPUProfiler(self), config) 85 | 86 | config = ProfilerConfig() 87 | config.log_prefix = 'Allocation profiler' 88 | config.max_profile_duration = 20 89 | config.max_span_duration = 5 90 | config.max_span_count = 30 91 | config.span_interval = 20 92 | config.report_interval = 120 93 | self.allocation_reporter = ProfileReporter(self, AllocationProfiler(self), config) 94 | 95 | config = ProfilerConfig() 96 | config.log_prefix = 'Block profiler' 97 | config.max_profile_duration = 20 98 | config.max_span_duration = 5 99 | config.max_span_count = 30 100 | config.span_interval = 20 101 | config.report_interval = 120 102 | self.block_reporter = ProfileReporter(self, BlockProfiler(self), config) 103 | 104 | self.options = None 105 | 106 | 107 | def get_option(self, name, default_val=None): 108 | if name not in self.options: 109 | return default_val 110 | else: 111 | return self.options[name] 112 | 113 | 114 | def start(self, **kwargs): 115 | if not min_version(2, 7) and not min_version(3, 4): 116 | raise Exception('Supported Python versions 2.6 or higher and 3.4 or higher') 117 | 118 | if platform.python_implementation() != 'CPython': 119 | raise Exception('Supported Python interpreter is CPython') 120 | 121 | if self.agent_destroyed: 122 | self.log('Destroyed agent cannot be started') 123 | return 124 | 125 | if self.agent_started: 126 | return 127 | 128 | self.options = kwargs 129 | 130 | if 'auto_profiling' not in self.options: 131 | self.options['auto_profiling'] = True 132 | 133 | if 'dashboard_address' not in self.options: 134 | self.options['dashboard_address'] = self.SAAS_DASHBOARD_ADDRESS 135 | 136 | if 'agent_key' not in self.options: 137 | raise Exception('missing option: agent_key') 138 | 139 | if 'app_name' not in self.options: 140 | raise Exception('missing option: app_name') 141 | 142 | if 'host_name' not in self.options: 143 | self.options['host_name'] = socket.gethostname() 144 | 145 | self.run_id = generate_uuid() 146 | self.run_ts = timestamp() 147 | 148 | self.config_loader.start() 149 | self.message_queue.start() 150 | self.frame_cache.start() 151 | 152 | self.cpu_reporter.setup() 153 | self.allocation_reporter.setup() 154 | self.block_reporter.setup() 155 | self.span_reporter.setup() 156 | self.error_reporter.setup() 157 | self.process_reporter.setup() 158 | 159 | # execute main_thread_func in main thread on signal 160 | def _signal_handler(signum, frame): 161 | if(self.main_thread_func): 162 | func = self.main_thread_func 163 | self.main_thread_func = None 164 | try: 165 | func() 166 | except Exception: 167 | self.exception() 168 | 169 | return True 170 | 171 | if not runtime_info.OS_WIN: 172 | register_signal(signal.SIGUSR2, _signal_handler) 173 | 174 | if self.get_option('auto_destroy') is None or self.get_option('auto_destroy') is True: 175 | # destroy agent on exit 176 | def _exit_handler(*arg): 177 | if not self.agent_started or self.agent_destroyed: 178 | return 179 | 180 | try: 181 | self.message_queue.flush() 182 | self.destroy() 183 | except Exception: 184 | self.exception() 185 | 186 | 187 | atexit.register(_exit_handler) 188 | 189 | if not runtime_info.OS_WIN: 190 | register_signal(signal.SIGQUIT, _exit_handler, once = True) 191 | register_signal(signal.SIGINT, _exit_handler, once = True) 192 | register_signal(signal.SIGTERM, _exit_handler, once = True) 193 | register_signal(signal.SIGHUP, _exit_handler, once = True) 194 | 195 | 196 | self.agent_started = True 197 | self.log('Agent started') 198 | 199 | 200 | def enable(self): 201 | if not self.config.is_agent_enabled(): 202 | self.cpu_reporter.start() 203 | self.allocation_reporter.start() 204 | self.block_reporter.start() 205 | self.span_reporter.start() 206 | self.error_reporter.start() 207 | self.process_reporter.start() 208 | self.config.set_agent_enabled(True) 209 | 210 | 211 | def disable(self): 212 | if self.config.is_agent_enabled(): 213 | self.cpu_reporter.stop() 214 | self.allocation_reporter.stop() 215 | self.block_reporter.stop() 216 | self.span_reporter.stop() 217 | self.error_reporter.stop() 218 | self.process_reporter.stop() 219 | self.config.set_agent_enabled(False) 220 | 221 | 222 | def profile(self, name='Default'): 223 | if not self.agent_started or self.span_active: 224 | return Span(None) 225 | 226 | self.span_active = True 227 | 228 | selected_reporter = None 229 | active_reporters = [] 230 | if self.cpu_reporter.started: 231 | active_reporters.append(self.cpu_reporter) 232 | if self.allocation_reporter.started: 233 | active_reporters.append(self.allocation_reporter) 234 | if self.block_reporter.started: 235 | active_reporters.append(self.block_reporter) 236 | 237 | if len(active_reporters) > 0: 238 | selected_reporter = active_reporters[int(math.floor(random.random() * len(active_reporters)))] 239 | if not selected_reporter.start_profiling(True, True): 240 | selected_reporter = None 241 | 242 | start_timestamp = time.time() 243 | 244 | def stop_func(): 245 | if selected_reporter: 246 | selected_reporter.stop_profiling() 247 | 248 | duration = time.time() - start_timestamp 249 | self.span_reporter.record_span(name, duration) 250 | 251 | if not self.get_option('auto_profiling'): 252 | self.config_loader.load(True) 253 | if selected_reporter: 254 | selected_reporter.report(True); 255 | self.message_queue.flush(True) 256 | 257 | self.span_active = False 258 | 259 | return Span(stop_func) 260 | 261 | 262 | def _start_profiler(self, reporter): 263 | if not self.agent_started or self.get_option('auto_profiling'): 264 | return 265 | 266 | self.span_active = True 267 | 268 | reporter.start() 269 | reporter.start_profiling(True, False) 270 | 271 | 272 | def _stop_profiler(self, reporter): 273 | if not self.agent_started or self.get_option('auto_profiling'): 274 | return 275 | 276 | reporter.stop_profiling() 277 | reporter.report(False) 278 | reporter.stop() 279 | self.message_queue.flush(False) 280 | 281 | self.span_active = False 282 | 283 | 284 | def start_cpu_profiler(self): 285 | self._start_profiler(self.cpu_reporter) 286 | 287 | 288 | def stop_cpu_profiler(self): 289 | self._stop_profiler(self.cpu_reporter) 290 | 291 | 292 | def start_allocation_profiler(self): 293 | self._start_profiler(self.allocation_reporter) 294 | 295 | 296 | def stop_allocation_profiler(self): 297 | self._stop_profiler(self.allocation_reporter) 298 | 299 | 300 | def start_block_profiler(self): 301 | self._start_profiler(self.block_reporter) 302 | 303 | 304 | def stop_block_profiler(self): 305 | self._stop_profiler(self.block_reporter) 306 | 307 | 308 | def destroy(self): 309 | if not self.agent_started: 310 | self.log('Agent has not been started') 311 | return 312 | 313 | if self.agent_destroyed: 314 | return 315 | 316 | self.config_loader.stop() 317 | self.message_queue.stop() 318 | self.frame_cache.stop() 319 | self.cpu_reporter.stop() 320 | self.allocation_reporter.stop() 321 | self.block_reporter.stop() 322 | self.error_reporter.stop() 323 | self.span_reporter.stop() 324 | self.process_reporter.stop() 325 | 326 | self.cpu_reporter.destroy() 327 | self.allocation_reporter.destroy() 328 | self.block_reporter.destroy() 329 | self.error_reporter.destroy() 330 | self.span_reporter.destroy() 331 | self.process_reporter.destroy() 332 | 333 | self.agent_destroyed = True 334 | self.log('Agent destroyed') 335 | 336 | 337 | def log_prefix(self): 338 | return '[' + datetime.datetime.now().strftime('%H:%M:%S.%f') + '] StackImpact ' + self.AGENT_VERSION + ':' 339 | 340 | 341 | def log(self, message): 342 | if self.get_option('debug'): 343 | print(self.log_prefix(), message) 344 | 345 | 346 | def print_err(self, *args, **kwargs): 347 | print(*args, file=sys.stderr, **kwargs) 348 | 349 | 350 | def error(self, message): 351 | if self.get_option('debug'): 352 | self.print_err(self.log_prefix(), message) 353 | 354 | 355 | def exception(self): 356 | if self.get_option('debug'): 357 | traceback.print_exc() 358 | 359 | 360 | def delay(self, timeout, func, *args): 361 | def func_wrapper(): 362 | try: 363 | func(*args) 364 | except Exception: 365 | self.exception() 366 | 367 | t = threading.Timer(timeout, func_wrapper, ()) 368 | t.start() 369 | 370 | return t 371 | 372 | 373 | def schedule(self, timeout, interval, func, *args): 374 | tw = TimerWraper() 375 | 376 | def func_wrapper(): 377 | start = time.time() 378 | 379 | try: 380 | func(*args) 381 | except Exception: 382 | self.exception() 383 | 384 | with tw.cancel_lock: 385 | if not tw.canceled: 386 | tw.timer = threading.Timer(abs(interval - (time.time() - start)), func_wrapper, ()) 387 | tw.timer.start() 388 | 389 | tw.timer = threading.Timer(timeout, func_wrapper, ()) 390 | tw.timer.start() 391 | 392 | return tw 393 | 394 | 395 | def run_in_thread(self, func): 396 | def func_wrapper(): 397 | try: 398 | func() 399 | except Exception: 400 | self.exception() 401 | 402 | t = threading.Thread(target=func_wrapper) 403 | t.start() 404 | return t 405 | 406 | 407 | def run_in_main_thread(self, func): 408 | if self.main_thread_func: 409 | return False 410 | 411 | self.main_thread_func = func 412 | os.kill(os.getpid(), signal.SIGUSR2) 413 | 414 | return True 415 | 416 | 417 | 418 | class TimerWraper(object): 419 | def __init__(self): 420 | self.timer = None 421 | self.cancel_lock = threading.Lock() 422 | self.canceled = False 423 | 424 | def cancel(self): 425 | with self.cancel_lock: 426 | self.canceled = True 427 | self.timer.cancel() 428 | 429 | --------------------------------------------------------------------------------