├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── dive.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | dist 4 | *.egg-info 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Audrey Dutcher 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include README.md 3 | include LICENSE 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | god I hate that I had to write this 2 | 3 | # Dumpster Dive dot py 4 | 5 | This is a library for interactively exploring the heap of a python program to track down leaks. 6 | The basic intuition is that you _know_ that your program shouldn't be taking 2 gigabytes of memory at this point, and it's probably a bunch of objects of a single type taking up all that memory. 7 | But how do you figure out who's holding the references to them? 8 | 9 | Python's built in `gc` module gives you the tools to figure it out, but it's a tedious process and if you do it in an interactive session you're going to be generating a bunch of additional references that will complicate your search pretty drastically. 10 | This tool was built after several hours of frustration with this! 11 | 12 | Then, to detox, I learned `prompt_toolkit` and made the prompt pretty. 13 | 14 | # Installation 15 | 16 | `pip install dive` 17 | 18 | # Usage 19 | 20 | ```python 21 | import dive 22 | dive.start() 23 | ``` 24 | 25 | This will start an interactive session in which you will be empowered to traverse the heap, scanning for references to your least favorite objects. 26 | 27 | If you already know what objects are the culprit you can start with `dive.search(name)`, where name is a substring matching the type names you're interested in. 28 | 29 | ## Traversal 30 | 31 | At every point, you'll be shown a list of objects, and asked to pick a command to run on one of them. 32 | You are shown the first 10 items by default, you can show more by running `list` or `list n` to jump to position `n` in the list. 33 | 34 | - Running `return n` will return the object at index n from the initial call into dive. 35 | - Running `refs n` will let you explore a list of all the objects referring to the object at index n. 36 | - Running `down n` will let you explore all the objects that the object at index n refers to. 37 | - Running `up` will pop you up a level of exploration. 38 | 39 | Run `help` for more help. 40 | -------------------------------------------------------------------------------- /dive.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import traceback 3 | import types 4 | from pprint import pformat, pprint 5 | 6 | from prompt_toolkit import prompt 7 | from pygments.token import Token 8 | from prompt_toolkit import print_formatted_text 9 | from prompt_toolkit.formatted_text import PygmentsTokens 10 | from prompt_toolkit.styles import Style 11 | 12 | SEQUENCE_TYPES = [list, set, tuple] 13 | MAPPING_TYPES = [dict] 14 | MAGIC_HOLDER = [object()] 15 | # the magic object is a unique object that we toss into gc.get_referrers to 16 | # identify which object gc.get_referrer is returning which is actually the 17 | # tuple of objects that are the args to gc.get_referrer. we hold it in a list 18 | # so we can easily isolate which results need to be discarded by its inclusion. 19 | 20 | def search(ty_filter): 21 | all_objects = [x for x in gc.get_objects() if ty_filter in repr(type(x))] 22 | mark_mine(all_objects) 23 | return _main(SearchGarbage(all_objects, 'Objects of type like %s' % ty_filter)) 24 | 25 | def start(): 26 | hist = {} 27 | mark_mine(hist) 28 | for x in gc.get_objects(): 29 | if type(x) in hist: 30 | hist[type(x)].append(x) 31 | else: 32 | hist[type(x)] = [x] 33 | mark_mine(hist[type(x)]) 34 | return _main(SearchGarbage(hist, 'Histogram of types in memory', sort=lambda x: -len(hist[x]))) 35 | 36 | def _main(dumpster): 37 | try: 38 | dumpster.run() 39 | return None 40 | except ReturnException as e: 41 | return e.value 42 | except: # pylint: disable=bare-except 43 | #import ipdb, sys; ipdb.post_mortem(sys.exc_info()[2]) 44 | traceback.print_exc() 45 | return None 46 | finally: 47 | clear_mine() 48 | gc.collect() 49 | 50 | class ContinueException(Exception): 51 | pass 52 | class BreakException(Exception): 53 | pass 54 | class ReturnException(Exception): 55 | def __init__(self, v): 56 | super(ReturnException, self).__init__() 57 | self.value = v 58 | 59 | mine = [] 60 | 61 | def mark_mine(obj): 62 | mine.append(obj) 63 | return obj 64 | 65 | def is_mine(obj): 66 | if obj is mine or obj is MAGIC_HOLDER or obj is MAGIC_HOLDER[0]: 67 | return True 68 | if type(obj) is tuple and len(obj) > 0 and obj[-1] is MAGIC_HOLDER[0]: 69 | return True 70 | if type(obj) is types.FrameType and obj.f_code.co_filename.endswith('dive.py'): 71 | return True 72 | for m in mine: 73 | if m is obj: 74 | return True 75 | return False 76 | 77 | def clear_mine(): 78 | global mine 79 | mine = [] 80 | 81 | # styling stuff... why is this difficult 82 | our_style = Style.from_dict({ 83 | 'prompt': '#209000 bold', 84 | 'pygments.number': '#ff0000 bold', 85 | 'pygments.error': '#ffff00 bold', 86 | 'pygments.key': '#ff8888', 87 | }) 88 | 89 | def print_tokens(tokens): 90 | print_formatted_text(PygmentsTokens(tokens), style=our_style) 91 | 92 | 93 | class SearchGarbage(object): # pylint: disable=useless-object-inheritance 94 | def __init__(self, body, context, sort=None): 95 | self.idx_mode = type(body) in SEQUENCE_TYPES 96 | self.dict_mode = type(body) in MAPPING_TYPES 97 | self.dir_mode = not (self.idx_mode or self.dict_mode) 98 | self.last_offset = 0 99 | self.context = context 100 | 101 | if self.idx_mode: 102 | self.keys = None 103 | self.values = [x for x in body if not is_mine(x)] 104 | elif self.dict_mode: 105 | self.keys = sorted(body.keys(), key=sort) 106 | self.values = [body[x] for x in self.keys] 107 | else: 108 | self.keys = sorted((x for x in dir(body) if not isprop(body, x)), key=sort) 109 | self.values = [getattr(body, x) for x in self.keys] 110 | 111 | if self.keys is not None: 112 | mark_mine(self.keys) 113 | mark_mine(self.values) 114 | 115 | def list_items(self, offset=None, count=10): 116 | if offset is None: 117 | offset = self.last_offset 118 | for i in range(offset, min(offset + count, len(self.values))): 119 | if self.keys is not None: 120 | print_tokens([ 121 | (Token.Number, str(i)), 122 | (Token, ': '), 123 | (Token.Key, str(self.keys[i])), 124 | (Token, ' => '), 125 | (Token, meaningful_repr(self.values[i])), 126 | ]) 127 | else: 128 | print_tokens([ 129 | (Token.Number, str(i)), 130 | (Token, ': '), 131 | (Token, meaningful_repr(self.values[i])), 132 | ]) 133 | self.last_offset = offset + count 134 | 135 | def deeper(self, body, context, failure_advice): 136 | dumpster = SearchGarbage(body, context) 137 | if len(dumpster.values) > 0: 138 | dumpster.run() 139 | self.list_items(self.last_offset - 10) 140 | else: 141 | print_tokens([ 142 | (Token.Error, 'There are no objects in this view!\n'), 143 | (Token, failure_advice), 144 | ]) 145 | 146 | def validate_idx(self, idx): 147 | try: 148 | idx = int(idx) 149 | except ValueError: 150 | print('Not an index!') 151 | raise ContinueException() 152 | if idx < 0 or idx >= len(self.values): 153 | print('Not in range!') 154 | raise ContinueException() 155 | return idx 156 | 157 | def validate_key(self, key): 158 | if self.keys is None: 159 | return self.validate_idx(key) 160 | 161 | for i, maybe in enumerate(self.keys): 162 | if type(maybe) is str and maybe == key: 163 | return i 164 | if key.isdigit(): 165 | return self.validate_idx(key) 166 | else: 167 | print('Not a key!') 168 | raise ContinueException() 169 | 170 | def run(self): 171 | self.last_offset = 0 172 | self.list_items() 173 | last_cmd = None 174 | 175 | while True: 176 | try: 177 | opt = prompt([('class:prompt', u'%s: ' % self.context)], style=our_style) 178 | if opt == '': 179 | if last_cmd is None: 180 | continue 181 | opt = last_cmd 182 | else: 183 | last_cmd = opt 184 | 185 | args = opt.split() 186 | if len(args) == 0 or not hasattr(self, 'cmd_' + args[0]): 187 | print('Not a command. Type "help" for help.') 188 | else: 189 | getattr(self, 'cmd_' + args[0])(*args[1:]) 190 | 191 | except ContinueException: 192 | continue 193 | except BreakException: 194 | return True 195 | 196 | def cmd_help(self, *args): # pylint: disable=unused-argument,no-self-use 197 | print('help - show this message') 198 | print('list [n] - show 10 items from the list at index n') 199 | print('show n - show detail of item at index or key n') 200 | print('fullshow n - show ALL detail of item at index or key n') 201 | print('refs n - explore objects that refer to item at index or key n') 202 | print('down n - explore objects that the object at index or key n refers to') 203 | print('return n - pick item at index or key n') 204 | print('up - cancel selection') 205 | print('quit - abort everything') 206 | 207 | def cmd_list(self, *args): 208 | if len(args) == 1: 209 | idx = self.validate_idx(args[0]) 210 | elif len(args) == 0: 211 | idx = None 212 | else: 213 | print('Syntax: list [n]') 214 | raise ContinueException() 215 | 216 | self.list_items(idx) 217 | 218 | def cmd_show(self, *args): 219 | if len(args) == 1: 220 | idx = self.validate_key(args[0]) 221 | else: 222 | print('Syntax: show n') 223 | raise ContinueException() 224 | 225 | print(safe_repr(self.values[idx])) 226 | 227 | def cmd_fullshow(self, *args): 228 | if len(args) == 1: 229 | idx = self.validate_key(args[0]) 230 | else: 231 | print('Syntax: fullshow n') 232 | raise ContinueException() 233 | 234 | pprint(self.values[idx]) 235 | 236 | def cmd_refs(self, *args): 237 | if len(args) == 1: 238 | idx = self.validate_key(args[0]) 239 | else: 240 | print('Syntax: refs n') 241 | raise ContinueException() 242 | 243 | self.deeper(gc.get_referrers(self.values[idx], MAGIC_HOLDER[0]), 'Objects referring to %s' % meaningful_repr(self.values[idx]), 'The object may be interned and have some refs hidden.') 244 | 245 | def cmd_down(self, *args): 246 | if len(args) == 1: 247 | idx = self.validate_key(args[0]) 248 | else: 249 | print('Syntax: down n') 250 | raise ContinueException() 251 | 252 | self.deeper(self.values[idx], 'Objects referred to by %s' % meaningful_repr(self.values[idx]), 'Try looking at a non-empty object') 253 | 254 | def cmd_return(self, *args): 255 | if len(args) == 1: 256 | idx = self.validate_key(args[0]) 257 | else: 258 | print('Syntax: return n') 259 | raise ContinueException() 260 | 261 | raise ReturnException(self.values[idx]) 262 | 263 | def cmd_up(self, *args): # pylint: disable=unused-argument,no-self-use 264 | raise BreakException() 265 | 266 | def cmd_quit(self, *args): # pylint: disable=unused-argument,no-self-use 267 | raise ReturnException(None) 268 | 269 | def meaningful_repr(x): 270 | if type(x) is dict: 271 | return 'dict with %d keys %s' % (len(x), shorten(x.keys())) 272 | elif type(x) is tuple: 273 | return 'tuple with %d items %s' % (len(x), shorten(x)) 274 | elif type(x) is list: 275 | return 'list with %d items %s' % (len(x), shorten(x)) 276 | elif type(x) is set: 277 | return 'set with %d items %s' % (len(x), shorten(x)) 278 | else: 279 | r = repr(x) 280 | if '\n' in r: 281 | r = r.split('\n', 1)[0] 282 | if len(r) > 80: 283 | r = r[:80] + '...' 284 | return r 285 | 286 | def shorten(seq): 287 | out = '[' 288 | first = True 289 | for key in seq: 290 | if first: 291 | out += safe_repr(key) 292 | first = False 293 | else: 294 | out += ', ' + safe_repr(key) 295 | if len(out) > 50: 296 | return out[:50] + '...]' 297 | return out + ']' 298 | 299 | def safe_repr(x): 300 | try: 301 | r = pformat(x) 302 | except Exception: 303 | # Fall back to using only the class name 304 | r = "[{}] (repr failed)".format(x.__class__) 305 | if len(r) > 10**4: 306 | r = r[:10**4] + '...' 307 | if len(r.splitlines()) > 50: 308 | r = '\n'.join(r.splitlines()[:50]) + '\n...' 309 | return r 310 | 311 | def isprop(obj, attr): 312 | if attr not in dir(type(obj)): 313 | return False 314 | return type(getattr(type(obj), attr)) is property 315 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | long_description = ''' 2 | This is a library for interactively exploring the heap of a python program to track down leaks. 3 | The basic intuition is that you _know_ that your program shouldn't be taking 2 gigabytes of memory at this point, and it's probably a bunch of objects of a single type taking up all that memory. 4 | But how do you figure out who's holding the references to them? 5 | 6 | Python's built in `gc` module gives you the tools to figure it out, but it's a tedious process and if you do it in an interactive session you're going to be generating a bunch of additional references that will complicate your search pretty drastically. 7 | This tool was built after several hours of frustration with this! 8 | 9 | Then, to detox, I learned `prompt_toolkit` and made the prompt pretty. 10 | ''' 11 | 12 | from setuptools import setup 13 | setup(name='dive', 14 | version='0.3', 15 | py_modules=['dive'], 16 | install_requires=['prompt_toolkit, Pygments'], 17 | author_email='audrey@rhelmot.io', 18 | author='rhelmot', 19 | url='https://github.com/rhelmot/dumpsterdiver', 20 | description='A tool for interactively traversing the python heap to find memory leaks', 21 | license='MIT', 22 | keywords='heap memory leak interative garbage explore', 23 | ) 24 | --------------------------------------------------------------------------------