├── .gitignore ├── LICENSE ├── README.md ├── cortex_profiler.py ├── screenshot.png └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.egg-info 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jonatan Liljedahl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PC sampling profiler for Cortex-M MCUs 2 | 3 | This script uses a telnet connection to a running openocd server to sample the program counter. 4 | The PC sampling is fully non-intrusive and does not disturb the CPU while running. 5 | It works also for the simpler Cortex-M models without SWO. 6 | 7 | A table of statistics is displayed that shows how often the CPU is executing inside each function. 8 | 9 | ![screenshot](screenshot.png) 10 | 11 | The script detects if the ELF symbol file is modified and resets itself, which is useful if you leave the profiler running while developing. 12 | 13 | ## More detail 14 | 15 | Functions can be split up in sections for further detail by the use of this GCC macro, which generates a FUNC symbol of size 0: 16 | 17 | ```c 18 | #define FUNC_SYMB(l) asm(".thumb_func\\n" l "$uid%=:" :::) 19 | ``` 20 | 21 | Which can then be used like this: 22 | 23 | ```c 24 | void foo(void) { 25 | FUNC_SYMB("a"); 26 | // code here 27 | FUNC_SYMB("b"); 28 | // more code here 29 | // etc.. 30 | } 31 | ``` 32 | 33 | ## Installation 34 | 35 | ``` 36 | $ pip3 install git+https://github.com/lijon/cortex-profiler 37 | ``` 38 | 39 | Or download this repo and run `pip3 install .` in the directory. -------------------------------------------------------------------------------- /cortex_profiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # (C)2023 Jonatan Liljedahl - http://kymatica.com 4 | # 5 | # based on https://gist.github.com/ynsta/7df418cb27b908391f86 6 | 7 | # TODO: 8 | # - keyboard input to reset? maybe: press ctrl-C again to quit, any other key to reset? 9 | # - instead of detecting ELF mtime change, it would be better to detect device reset via openocd if possible. maybe an option would be to have a separate "trigger file" that our vscode launch action can touch? 10 | 11 | import sys 12 | import time 13 | import os 14 | import telnetlib 15 | import subprocess 16 | from bisect import bisect_right 17 | import operator 18 | import argparse 19 | 20 | class UltimateHelpFormatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter): 21 | pass 22 | 23 | class OpenOCDCMSampler(object): 24 | 25 | def __del__(self): 26 | if self.net: 27 | cmd = b'exit\r\n' 28 | self.net.write(cmd) 29 | self.net.read_until(cmd, 1) 30 | self.net.close() 31 | 32 | def connect(self, host='localhost', port=4444): 33 | self.net = None 34 | self.net = telnetlib.Telnet(host, port) 35 | self.net.read_very_eager() 36 | 37 | def getpc(self): 38 | cmd = b'mrw 0xE000101C\r\n' 39 | self.net.write(cmd) 40 | res = self.net.read_until(b'\r\n\r> ', 1) 41 | 42 | if res: 43 | prefix = res[0:16] 44 | num = res[16:-5] 45 | res = res[-15:0] 46 | 47 | if prefix == cmd: 48 | return int(num,16) 49 | 50 | return 0 51 | 52 | 53 | def initSymbols(self, elf, readelf, demangle): 54 | proc = subprocess.Popen([readelf, '-sW', elf], stdout=subprocess.PIPE) 55 | self.elfmtime = os.path.getmtime(elf) 56 | self.table = [] 57 | self.indexes = set() 58 | for line in iter(proc.stdout.readline, b''): 59 | field = line.decode('ascii').split() 60 | try: 61 | if field[3] == 'FUNC': 62 | addr = int(field[1], 16) 63 | func = field[7] 64 | size = int(field[2]) 65 | if addr not in self.indexes: 66 | self.table.append((addr, func, size)) 67 | self.indexes.add(addr) 68 | except IndexError: 69 | pass 70 | 71 | # demangle c++ function names 72 | if not demangle is None and demangle != "": 73 | # NOTE: arm-none-eabi-c++filt expects prefix "_Z", while c++filt expects "__Z". 74 | # that's the only difference - prepending function name with extra underscore makes it work with x86 c++filt. 75 | names = [item[1] for item in self.table if item[1].startswith("_Z")] 76 | names_str = '\r\n'.join(names) 77 | proc = subprocess.Popen([demangle, '--no-params'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) 78 | proc_stdout = proc.communicate(input=bytes(names_str, encoding='ascii'))[0] 79 | names_demangled = proc_stdout.decode('ascii').split("\r\n") 80 | if len(names_demangled) == len(names): 81 | idx_in = 0 82 | for idx_out in range(len(self.table)): 83 | if self.table[idx_out][1] == names[idx_in]: 84 | item = self.table[idx_out] 85 | self.table[idx_out] = (item[0], names_demangled[idx_in], item[2]) 86 | idx_in += 1 87 | else: 88 | print("Warning: got {len(names_demangled)} demangled function names, expected {len(names)}") 89 | 90 | # find marked subsections of functions 91 | self.table.sort() 92 | parent = '' 93 | parentend = 0 94 | for i, (addr, symb, size) in enumerate(self.table): 95 | if size == 0 and addr < parentend: 96 | symb = symb[:symb.find("$uid")] 97 | self.table[i] = (addr, symb, parent) 98 | else: 99 | self.table[i] = (addr, symb, None) 100 | parent = symb 101 | parentend = addr+size 102 | 103 | self.addrs = [ x for (x, y, z) in self.table ] 104 | 105 | def func(self, pc): 106 | 107 | if pc == 0 or pc == 0xFFFFFFFF: 108 | return ('', 0, None) 109 | 110 | # find where pc lands between addresses, ignoring size 111 | i = bisect_right(self.addrs, pc) 112 | if i: 113 | addr, symb, parent = self.table[i-1] 114 | return (symb, addr, parent) 115 | 116 | return ('', 0, None) 117 | 118 | def cli(): 119 | 120 | help = '''A telnet connection to a running openocd server is used to sample the program counter. 121 | A table of statistics is displayed that shows how often the CPU is executing inside each function. 122 | 123 | Functions can be split up in sections for further detail by the use of this GCC macro, 124 | which generates a FUNC symbol of size 0: 125 | 126 | #define FUNC_SYMB(l) asm(".thumb_func\\n" l "$uid%=:" :::) 127 | ''' 128 | 129 | ap = argparse.ArgumentParser(description = "PC sampling profiler for ARM Cortex-M.", epilog=help, formatter_class=UltimateHelpFormatter) 130 | ap.add_argument("filename", help = "ELF file with symbols") 131 | ap.add_argument("-r","--rate", default=0.005, type=float, help = "sampling rate limit (seconds)") 132 | ap.add_argument("-i","--interval", default=1, type=float, help = "display update interval (seconds)") 133 | ap.add_argument("-l","--limit", default=50, type=int, help = "display the top N functions") 134 | ap.add_argument("-H","--host", default='localhost', help = "openocd telnet host") 135 | ap.add_argument("-p","--port", default=4444, type=int, help = "openocd telnet port") 136 | ap.add_argument("-e","--readelf", default="arm-none-eabi-readelf", help = "readelf command") 137 | ap.add_argument("-d","--demangle", default="arm-none-eabi-c++filt", help = "C++ demangle command") 138 | args = ap.parse_args() 139 | 140 | sampler = OpenOCDCMSampler() 141 | elf = args.filename; 142 | sampler.initSymbols(elf, args.readelf, args.demangle) 143 | 144 | try: 145 | sampler.connect(args.host, args.port) 146 | except: 147 | print("Error: Could not connect to openocd server at",args.host,"port",args.port) 148 | print("Make sure you have a running instance of openocd and that the port matches.") 149 | exit(-1) 150 | 151 | ratelimit = args.rate 152 | interval = args.interval 153 | 154 | total = 0 155 | countmap = { } 156 | childmap = { } 157 | start = time.time() 158 | start0 = start 159 | 160 | try: 161 | while True: 162 | try: 163 | func, addr, parent = sampler.func((sampler.getpc())) 164 | except ConnectionResetError: 165 | sampler.net = None 166 | print("Connection lost") 167 | exit(-1) 168 | 169 | if not addr: 170 | continue 171 | 172 | total += 1 173 | 174 | if parent: 175 | if parent not in childmap: 176 | childmap[parent] = { } 177 | p = childmap[parent] 178 | 179 | if func not in p: 180 | p[func] = 0 181 | p[func] += 1 182 | 183 | func = parent 184 | 185 | if func not in countmap: 186 | countmap[func] = 0 187 | countmap[func] += 1 188 | 189 | cur = time.time() 190 | if cur - start > interval: 191 | if os.path.getmtime(elf) > sampler.elfmtime: 192 | total = 0 193 | start0 = cur 194 | countmap = { } 195 | childmap = { } 196 | sampler.initSymbols(elf, args.readelf) 197 | continue 198 | 199 | print ('\x1b[2J\x1b[H') 200 | tmp = sorted(countmap.items(), key=operator.itemgetter(1), reverse=True) 201 | tmp = tmp[:args.limit] 202 | for k, v in tmp: 203 | print ('\x1b[90m> \x1b[96m{:05.2f}% \x1b[92m{}'.format((v * 100.) / total, k)) 204 | if k in childmap: 205 | child = sorted(childmap[k].items(), key=operator.itemgetter(1), reverse=True) 206 | for ck, cv in child: 207 | print (' \x1b[36m{:05.2f}% \x1b[90m- \x1b[34m{}'.format((cv * 100.) / total, ck)) 208 | start = cur 209 | print () 210 | print ('\x1b[0m{} samples, {:05.2f} samples/second'.format(total, total/(cur-start0))) 211 | 212 | time.sleep(ratelimit) 213 | 214 | except KeyboardInterrupt: 215 | pass 216 | 217 | ### 218 | 219 | if __name__ == "__main__": 220 | cli() 221 | 222 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lijon/cortex-profiler/7cb3d2da57518a7ddc0e71c8b634d1eebdc69111/screenshot.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='cortex-profiler', 5 | version='1.0.0', 6 | description='PC sampling profiler for ARM Cortex-M', 7 | py_modules=['cortex_profiler'], 8 | url='https://github.com/lijon/cortex-profiler', 9 | author='Jonatan Liljedahl', 10 | author_email='lijon@kymatica.com', 11 | entry_points={ 12 | 'console_scripts': [ 13 | 'cortex_profiler = cortex_profiler:cli', 14 | ], 15 | }, 16 | classifiers=[ 17 | 'Intended Audience :: Developers', 18 | 'Topic :: Software Development :: Embedded Systems', 19 | 'License :: OSI Approved :: MIT License', 20 | 'Programming Language :: Python', 21 | 'Programming Language :: Python :: 3', 22 | ], 23 | ) --------------------------------------------------------------------------------