├── .github └── workflows │ └── ccpp.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── _config.yml ├── build.sh ├── dep └── lua-5.3.6.tar.gz ├── src ├── CMakeLists.txt └── plua.cpp ├── test ├── call.png ├── mem_ALLOC_SIZE.png ├── test_cpu.lua └── test_mem.lua └── tools ├── flamegraph.pl ├── plua.go ├── png.go ├── pprof └── show.sh /.github/workflows/ccpp.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v1 12 | 13 | - name: prepare-lua 14 | run: | 15 | sudo apt-get install libreadline-dev -y 16 | sudo wget https://www.lua.org/ftp/lua-5.3.4.tar.gz 17 | sudo tar -xf lua-5.3.4.tar.gz 18 | cd lua-5.3.4 19 | sudo make linux 20 | sudo make install 21 | 22 | - name: make 23 | run: | 24 | sudo ./build.sh 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | build 4 | cmake-build-debug 5 | debug 6 | bin 7 | obj 8 | tags 9 | .vscode 10 | init 11 | tatus 12 | v_init 13 | GPATH 14 | GRTAGS 15 | GTAGS 16 | proto_md5 17 | CMakeSettings.json 18 | 19 | *~ 20 | .*.swp 21 | .vs/ 22 | .a 23 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(PLUA_PROJECT) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g3 -std=c++11 -DNDEBUG -O3 ") 5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin) 6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin) 7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin) 8 | add_subdirectory(src) 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 zhao xin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pLua 2 | 3 | [](https://github.com/esrrhs/pLua) 4 | [](https://github.com/esrrhs/pLua) 5 | [](https://github.com/esrrhs/pLua/actions) 6 | 7 | Lua 性能分析工具 8 | 9 | ## 简介 10 | 类似于gperftools,可分析Lua程序的热点及内存分配情况 11 | 12 | 13 | ## 特性 14 | - 简单,只需几行代码,即可输出结果,或通过[hookso](https://github.com/esrrhs/hookso)注入,不用修改代码 15 | - 准确,相比lua hook,定时采样的方式更能准确捕获lua执行的热点,使用ITIMER_PROF剔除sleep等堆栈 16 | - 轻量,因为是采样的,相比直接按行lua hook,能最小程度影响宿主程序的运行 17 | - 直观,输出调用图,能直观的看到热点和调用关系,可兼容[gperftools](https://github.com/gperftools/gperftools)的pprof工具,可生成[火焰图](https://github.com/brendangregg/FlameGraph) 18 | 19 | ## 编译 20 | * 安装lua 21 | * 编译插件libplua.so及tools 22 | ```shell 23 | # ./build.sh 24 | ``` 25 | * 安装火焰图的依赖项 26 | ```shell 27 | # yum install perl-open.noarch 28 | ``` 29 | 30 | ## 使用 31 | #### 获取CPU采样数据 32 | * 修改Lua code 33 | ``` lua 34 | -- 引入libplua.so 35 | local p = require "libplua" 36 | -- 开启采样 37 | -- 参数1:采样时间(秒),0表示一直采样 38 | -- 参数2:采样结果文件 39 | p.start(0, "call.pro") 40 | 41 | do_some_thing() 42 | 43 | -- 结束采样,输出结果文件 44 | p.stop() 45 | 46 | ``` 47 | * 或者用[hookso](https://github.com/esrrhs/hookso)注入 48 | ```shell 49 | a) 首先获取进程中的Lua_State指针,比如进程的xxx.so调用了lua_settop(L)函数,那么就取第一个参数 50 | # ./hookso arg $PID xxx.so lua_settop 1 51 | 123456 52 | 53 | b) 加载libplua.so 54 | # ./hookso dlopen $PID ./libplua.so 55 | 56 | c) 执行libplua.so的lrealstart手动开启,等价于lrealstart(L, 0, "./call.pro") 57 | # ./hookso call $PID libplua.so lrealstart i=123456 i=0 s="./call.pro" 58 | 59 | c) 执行libclua.so的lrealstop手动关闭,等价于lrealstop(L) 60 | # ./hookso call $PID libplua.so lrealstop i=123456 61 | ``` 62 | #### 获取内存采样数据 63 | * 修改Lua code 64 | ``` lua 65 | -- 引入libplua.so 66 | local p = require "libplua" 67 | -- 开启采样 68 | -- 参数1:采样时间(秒),0表示一直采样 69 | -- 参数2:采样结果文件名,会生成2个采样文件,分别代表内存分配大小、内存占用大小 70 | p.start_mem(0, "mem.pro") 71 | 72 | do_some_thing() 73 | 74 | -- 结束采样,输出结果文件 75 | p.stop_mem() 76 | 77 | ``` 78 | * 或者用[hookso](https://github.com/esrrhs/hookso)注入 79 | ```shell 80 | a) 首先获取进程中的Lua_State指针,比如进程的xxx.so调用了lua_settop(L)函数,那么就取第一个参数 81 | # ./hookso arg $PID xxx.so lua_settop 1 82 | 123456 83 | 84 | b) 加载libplua.so 85 | # ./hookso dlopen $PID ./libplua.so 86 | 87 | c) 执行libplua.so的lrealstartmem手动开启,等价于lrealstartmem(L, 0, "./call.pro") 88 | # ./hookso call $PID libplua.so lrealstartmem i=123456 i=0 s="./call.pro" 89 | 90 | c) 执行libclua.so的lrealstopmem手动关闭,等价于lrealstopmem(L) 91 | # ./hookso call $PID libplua.so lrealstopmem i=123456 92 | ``` 93 | 94 | ## 示例 95 | #### 运行test目录下的lua 96 | ```shell 97 | # lua test_cpu.lua 98 | # lua test_mem.lua 99 | ``` 100 | 101 | #### 生成可视化结果 102 | 使用tools目录下的show.sh脚本生成gperftools风格png及火焰图 103 | ```shell 104 | # cd tools 105 | # ./show.sh ../test 106 | ``` 107 | 108 | #### 查看test_cpu.lua的热点 109 | ![image](test/call.png) 110 | 111 | #### 查看test_mem.lua的内存采样 112 | * 内存分配大小 113 | ![image](test/mem_ALLOC_SIZE.png) 114 | 115 | ## 其他 116 | [lua全家桶](https://github.com/esrrhs/lua-family-bucket) 117 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | dir=$(cd `dirname $0`;pwd) 3 | projectdir=$dir 4 | builddir="$projectdir/build" 5 | rundir="$projectdir/bin" 6 | luadir="$projectdir/dep/lua-5.3.6" 7 | lua="$projectdir/dep/lua-5.3.6.tar.gz" 8 | 9 | if [ -f "$lua" ] && [ ! -d "$luadir" ]; then 10 | cd $projectdir/dep && tar zxvf $lua 11 | cd $projectdir && cp $projectdir/dep/lua-5.3.6/src/*.h $projectdir/src 12 | fi 13 | 14 | if [ ! -d "$rundir" ]; then 15 | mkdir -p $rundir && cd $rundir 16 | fi 17 | 18 | if [ -d "$builddir" ]; then 19 | rm $builddir -rf 20 | mkdir -p $builddir && cd $builddir 21 | else 22 | mkdir -p $builddir && cd $builddir 23 | fi 24 | 25 | cmake ../ 26 | make 27 | 28 | cd ../tools 29 | GO111MODULE=off go build plua.go 30 | GO111MODULE=off go build png.go 31 | 32 | chmod a+x pprof 33 | chmod a+x *.pl 34 | chmod a+x *.sh 35 | -------------------------------------------------------------------------------- /dep/lua-5.3.6.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/esrrhs/pLua/c734adc80b9cbc18136a0aa6ae9b47dec9f13e11/dep/lua-5.3.6.tar.gz -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(plua) 2 | aux_source_directory(./ plua_src) 3 | add_library(plua SHARED ${plua_src}) 4 | TARGET_LINK_LIBRARIES(plua) 5 | -------------------------------------------------------------------------------- /src/plua.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | extern "C" { 32 | #include "lua.h" 33 | #include "lualib.h" 34 | #include "lauxlib.h" 35 | } 36 | 37 | const int open_debug = 0; 38 | int gSampleCount = 0; 39 | std::string gFilename; 40 | lua_State *gL = 0; 41 | int gRunning = 0; 42 | 43 | #define LLOG(...) if (open_debug) {llog("[DEBUG] ", __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__);} 44 | #define LERR(...) if (open_debug) {llog("[ERROR] ", __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__);} 45 | 46 | void llog(const char *header, const char *file, const char *func, int pos, const char *fmt, ...) { 47 | FILE *pLog = NULL; 48 | time_t clock1; 49 | struct tm *tptr; 50 | va_list ap; 51 | 52 | pLog = fopen("plua.log", "a+"); 53 | if (pLog == NULL) { 54 | return; 55 | } 56 | 57 | clock1 = time(0); 58 | tptr = localtime(&clock1); 59 | 60 | struct timeval tv; 61 | gettimeofday(&tv, NULL); 62 | 63 | fprintf(pLog, "===========================[%d.%d.%d, %d.%d.%d %llu]%s:%d,%s:===========================\n%s", 64 | tptr->tm_year + 1990, tptr->tm_mon + 1, 65 | tptr->tm_mday, tptr->tm_hour, tptr->tm_min, 66 | tptr->tm_sec, (long long) ((tv.tv_sec) * 1000 + (tv.tv_usec) / 1000), file, pos, func, header); 67 | 68 | va_start(ap, fmt); 69 | vfprintf(pLog, fmt, ap); 70 | fprintf(pLog, "\n\n"); 71 | va_end(ap); 72 | 73 | va_start(ap, fmt); 74 | vprintf(fmt, ap); 75 | printf("\n\n"); 76 | va_end(ap); 77 | 78 | fclose(pLog); 79 | } 80 | 81 | static const int MAX_FUNC_NAME_SIZE = 127; 82 | 83 | static std::string get_funcname(lua_State *L, lua_Debug *ar) { 84 | char buf[MAX_FUNC_NAME_SIZE + 1] = {0}; 85 | if (*ar->namewhat != '\0') /* is there a name from code? */ { 86 | snprintf(buf, MAX_FUNC_NAME_SIZE, "%s '%s'", ar->namewhat, ar->name); /* use it */ 87 | } else if (*ar->what == 'm') /* main? */ { 88 | snprintf(buf, MAX_FUNC_NAME_SIZE, "main chunk"); 89 | } else if (*ar->what != 'C') /* for Lua functions, use */ { 90 | snprintf(buf, MAX_FUNC_NAME_SIZE, "function <%s:%d>", ar->short_src, ar->linedefined); 91 | } else /* nothing left... */ { 92 | snprintf(buf, MAX_FUNC_NAME_SIZE, "%s(%d): %s\n", ar->short_src, ar->currentline, ar->name ? ar->name : "?"); 93 | } 94 | return buf; 95 | } 96 | 97 | static int lastlevel(lua_State *L) { 98 | lua_Debug ar; 99 | int li = 1, le = 1; 100 | /* find the bottom index of the call stack. */ 101 | while (lua_getstack(L, le, &ar)) { 102 | li = le; 103 | le *= 2; 104 | } 105 | /* do a binary search */ 106 | while (li < le) { 107 | int m = (li + le) / 2; 108 | if (lua_getstack(L, m, &ar)) li = m + 1; 109 | else le = m; 110 | } 111 | return le - 1; 112 | } 113 | 114 | std::unordered_map gString2Id; 115 | std::unordered_map gId2String; 116 | 117 | static const char *IGNORE_NAME[] = {"?", "function 'xpcall'", "upvalue 'xpcall'", "field 'xpcall'", 118 | "function 'pcall'", "upvalue 'pcall'", "field 'pcall'", 119 | "function", "local 'func'"}; 120 | static const int VALID_MIN_ID = sizeof(IGNORE_NAME) / sizeof(const char *); 121 | 122 | static const int MAX_STACK_SIZE = 64; 123 | 124 | static const int CPU_SAMPLE_ITER = 10; 125 | 126 | struct CallStack { 127 | int depth; 128 | int stack[MAX_STACK_SIZE]; 129 | }; 130 | 131 | struct CallStackHash { 132 | size_t operator()(const CallStack &cs) const { 133 | size_t hash = 0; 134 | for (int i = 0; i < cs.depth; i++) { 135 | int id = cs.stack[i]; 136 | hash = (hash << 8) | (hash >> (8 * (sizeof(hash) - 1))); 137 | hash += (id * 31) + (id * 7) + (id * 3); 138 | } 139 | return hash; 140 | } 141 | }; 142 | 143 | struct CallStackEqual { 144 | bool operator()(const CallStack &cs1, const CallStack &cs2) const { 145 | if (cs1.depth != cs2.depth) { 146 | return false; 147 | } 148 | return memcmp(cs1.stack, cs2.stack, sizeof(int) * cs1.depth) == 0; 149 | } 150 | }; 151 | 152 | struct ProfileData { 153 | std::unordered_map callstack; 154 | int total = 0; 155 | int fd = 0; 156 | }; 157 | 158 | ProfileData gProfileData; 159 | 160 | static void flush_file(int fd, const char *buf, size_t len) { 161 | while (len > 0) { 162 | ssize_t r = write(fd, buf, len); 163 | buf += r; 164 | len -= r; 165 | } 166 | } 167 | 168 | static void flush() { 169 | if (gProfileData.total <= 0) { 170 | return; 171 | } 172 | 173 | LLOG("flush..."); 174 | 175 | for (auto iter = gProfileData.callstack.begin(); iter != gProfileData.callstack.end(); iter++) { 176 | const CallStack &cs = iter->first; 177 | int count = iter->second; 178 | 179 | flush_file(gProfileData.fd, (const char *) &count, sizeof(count)); 180 | flush_file(gProfileData.fd, (const char *) &cs, sizeof(cs)); 181 | } 182 | 183 | int total_len = 0; 184 | for (auto iter = gString2Id.begin(); iter != gString2Id.end(); iter++) { 185 | const std::string &str = iter->first; 186 | int id = iter->second; 187 | 188 | if (id < VALID_MIN_ID) { 189 | continue; 190 | } 191 | 192 | int len = str.length(); 193 | len = len > MAX_FUNC_NAME_SIZE ? MAX_FUNC_NAME_SIZE : len; 194 | flush_file(gProfileData.fd, str.c_str(), len); 195 | flush_file(gProfileData.fd, (const char *) &len, sizeof(len)); 196 | 197 | flush_file(gProfileData.fd, (const char *) &id, sizeof(id)); 198 | total_len++; 199 | } 200 | 201 | flush_file(gProfileData.fd, (const char *) &total_len, sizeof(total_len)); 202 | 203 | int total = gProfileData.total; 204 | LLOG("flush ok %d", gProfileData.total); 205 | 206 | gProfileData.total = 0; 207 | gProfileData.callstack.clear(); 208 | 209 | if (gProfileData.fd != 0) { 210 | close(gProfileData.fd); 211 | gProfileData.fd = 0; 212 | } 213 | 214 | printf("pLua flush ok %d\n", total); 215 | } 216 | 217 | static int lrealstopsafe(lua_State *L) { 218 | gRunning = 0; 219 | 220 | struct itimerval timer; 221 | timer.it_interval.tv_sec = 0; 222 | timer.it_interval.tv_usec = 0; 223 | timer.it_value = timer.it_interval; 224 | int ret = setitimer(ITIMER_PROF, &timer, NULL); 225 | if (ret != 0) { 226 | LERR("setitimer fail %d", ret); 227 | return ret; 228 | } 229 | 230 | flush(); 231 | 232 | lua_sethook(L, 0, 0, 0); 233 | 234 | return 0; 235 | } 236 | 237 | extern "C" int lrealstop(lua_State *L) { 238 | gRunning = 0; 239 | return 0; 240 | } 241 | 242 | static void get_cur_callstack(lua_State *L, CallStack &cs) { 243 | lua_Debug ar; 244 | 245 | int last = lastlevel(L); 246 | int i = 0; 247 | 248 | cs.depth = 0; 249 | 250 | while (lua_getstack(L, last, &ar) && i < MAX_STACK_SIZE) { 251 | lua_getinfo(L, "Slnt", &ar); 252 | auto funcname = get_funcname(L, &ar); 253 | 254 | i++; 255 | last--; 256 | 257 | int id = 0; 258 | auto iter = gString2Id.find(funcname); 259 | if (iter == gString2Id.end()) { 260 | id = gString2Id.size(); 261 | gString2Id[funcname] = id; 262 | gId2String[id] = funcname; 263 | } else { 264 | id = iter->second; 265 | } 266 | 267 | if (id < VALID_MIN_ID) { 268 | continue; 269 | } 270 | 271 | LLOG("%s %d %d", funcname, id, last); 272 | 273 | cs.stack[cs.depth] = id; 274 | cs.depth++; 275 | } 276 | } 277 | 278 | static void SignalHandlerHook(lua_State *L, lua_Debug *par) { 279 | lua_sethook(L, 0, 0, 0); 280 | 281 | LLOG("Hook..."); 282 | 283 | if (gRunning == 0 || (gSampleCount != 0 && gSampleCount <= gProfileData.total)) { 284 | LLOG("lrealstop..."); 285 | lrealstopsafe(L); 286 | return; 287 | } 288 | 289 | CallStack cs; 290 | get_cur_callstack(L, cs); 291 | 292 | gProfileData.callstack[cs]++; 293 | gProfileData.total++; 294 | } 295 | 296 | static void SignalHandler(int sig, siginfo_t *sinfo, void *ucontext) { 297 | lua_sethook(gL, SignalHandlerHook, LUA_MASKCOUNT, 1); 298 | } 299 | 300 | static int lrealstartsafe(lua_State *L) { 301 | if (gRunning) { 302 | LERR("start again, failed"); 303 | return -1; 304 | } 305 | gRunning = 1; 306 | 307 | for (int i = 0; i < VALID_MIN_ID; i++) { 308 | gString2Id[IGNORE_NAME[i]] = i; 309 | gId2String[i] = IGNORE_NAME[i]; 310 | } 311 | 312 | struct sigaction sa; 313 | sa.sa_sigaction = SignalHandler; 314 | sa.sa_flags = SA_RESTART | SA_SIGINFO; 315 | sigemptyset(&sa.sa_mask); 316 | 317 | if (sigaction(SIGPROF, &sa, NULL) == -1) { 318 | LERR("sigaction(SIGALRM) failed"); 319 | return -1; 320 | } 321 | 322 | int fd = open(gFilename.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0666); 323 | if (fd < 0) { 324 | LERR("open file fail %s", gFilename.c_str()); 325 | return -1; 326 | } 327 | 328 | gProfileData.fd = fd; 329 | 330 | struct itimerval timer; 331 | timer.it_interval.tv_sec = 0; 332 | timer.it_interval.tv_usec = CPU_SAMPLE_ITER * 1000; 333 | timer.it_value = timer.it_interval; 334 | int ret = setitimer(ITIMER_PROF, &timer, NULL); 335 | if (ret != 0) { 336 | LERR("setitimer fail %d", ret); 337 | return -1; 338 | } 339 | 340 | gProfileData.total = 0; 341 | gProfileData.callstack.clear(); 342 | 343 | return 0; 344 | } 345 | 346 | static void StartHandlerHook(lua_State *L, lua_Debug *par) { 347 | lrealstartsafe(L); 348 | lua_sethook(L, 0, 0, 0); 349 | } 350 | 351 | extern "C" int lrealstart(lua_State *L, int second, const char *file) { 352 | if (gRunning) { 353 | LERR("start again, failed"); 354 | return -1; 355 | } 356 | 357 | gL = L; 358 | gSampleCount = second * 1000 / CPU_SAMPLE_ITER; 359 | gFilename = file; 360 | 361 | // lrealstart可能被注入调用,在StartHandlerHook里面执行具体的逻辑 362 | lua_sethook(gL, StartHandlerHook, LUA_MASKCOUNT, 1); 363 | 364 | LLOG("lstart %u %s", gSampleCount, file); 365 | 366 | return 0; 367 | } 368 | 369 | static int lstart(lua_State *L) { 370 | int second = (int) lua_tointeger(L, 1); 371 | const char *file = lua_tostring(L, 2); 372 | int ret = lrealstart(L, second, file); 373 | lua_pushinteger(L, ret); 374 | return 1; 375 | } 376 | 377 | static int lstop(lua_State *L) { 378 | LLOG("lstop %s", gFilename.c_str()); 379 | int ret = lrealstop(L); 380 | lrealstopsafe(L); 381 | lua_pushinteger(L, ret); 382 | return 1; 383 | } 384 | 385 | //////////////////////////////////mem profiler start//////////////////////////////////////// 386 | 387 | static const int MEM_PROFILE_RATE = 524288; // 512K 388 | 389 | struct MemInfo { 390 | MemInfo() {} 391 | 392 | MemInfo(uint32_t a, uint32_t f) { 393 | allocs = a; 394 | frees = f; 395 | } 396 | 397 | uint32_t allocs = 0; 398 | uint32_t frees = 0; 399 | }; 400 | 401 | struct CallStackPointerHash { 402 | size_t operator()(CallStack *cs) const { 403 | size_t hash = 0; 404 | for (int i = 0; i < cs->depth; i++) { 405 | int id = cs->stack[i]; 406 | hash = (hash << 8) | (hash >> (8 * (sizeof(hash) - 1))); 407 | hash += (id * 31) + (id * 7) + (id * 3); 408 | } 409 | return hash; 410 | } 411 | }; 412 | 413 | struct CallStackPointerEqual { 414 | bool operator()(CallStack *cs1, CallStack *cs2) const { 415 | if (cs1->depth != cs2->depth) { 416 | return false; 417 | } 418 | return memcmp(cs1->stack, cs2->stack, sizeof(int) * cs1->depth) == 0; 419 | } 420 | }; 421 | 422 | struct MemProfileData { 423 | std::unordered_map callstack; 424 | std::unordered_map ptr2Callstack; 425 | int total = 0; 426 | lua_Alloc oldAlloc = NULL; 427 | ssize_t nextSample = 0; 428 | uint64_t rand = 0; 429 | int alloc_size_fd = 0; 430 | int usage_fd = 0; 431 | int is_in_hook = 0; 432 | void *hook_alloc_ptr = 0; 433 | size_t hook_alloc_sz = 0; 434 | }; 435 | 436 | MemProfileData gMemProfileData; 437 | 438 | // 移植自gperftools的Sampler::NextRandom 439 | static uint64_t next_random(uint64_t rnd) { 440 | const uint64_t prng_mult = 0x5DEECE66DULL; 441 | const uint64_t prng_add = 0xB; 442 | const uint64_t prng_mod_power = 48; 443 | const uint64_t prng_mod_mask = ~((~static_cast(0)) << prng_mod_power); 444 | return (prng_mult * rnd + prng_add) & prng_mod_mask; 445 | } 446 | 447 | #define MAX_SSIZE (static_cast(static_cast(static_cast(-1)) >> 1)) 448 | 449 | // 移植自gperftools的Sampler::PickNextSamplingPoint 450 | static ssize_t gen_next_sample() { 451 | gMemProfileData.rand = next_random(gMemProfileData.rand); 452 | // Take the top 26 bits as the random number 453 | // (This plus the 1<<58 sampling bound give a max possible step of 454 | // 5194297183973780480 bytes.) 455 | const uint64_t prng_mod_power = 48; // Number of bits in prng 456 | // The uint32_t cast is to prevent a (hard-to-reproduce) NAN 457 | // under piii debug for some binaries. 458 | double q = static_cast(gMemProfileData.rand >> (prng_mod_power - 26)) + 1.0; 459 | // Put the computed p-value through the CDF of a geometric. 460 | double interval = (log2(q) - 26) * (-log(2.0) * MEM_PROFILE_RATE); 461 | 462 | // Very large values of interval overflow ssize_t. If we happen to 463 | // hit such improbable condition, we simply cheat and clamp interval 464 | // to largest supported value. 465 | return static_cast(std::min(interval, static_cast(MAX_SSIZE))); 466 | } 467 | 468 | static void flush_mem_left(int fd) { 469 | int total_len = 0; 470 | for (auto iter = gString2Id.begin(); iter != gString2Id.end(); iter++) { 471 | const std::string &str = iter->first; 472 | int id = iter->second; 473 | 474 | if (id < VALID_MIN_ID) { 475 | continue; 476 | } 477 | 478 | int len = str.length(); 479 | len = len > MAX_FUNC_NAME_SIZE ? MAX_FUNC_NAME_SIZE : len; 480 | flush_file(fd, str.c_str(), len); 481 | flush_file(fd, (const char *) &len, sizeof(len)); 482 | 483 | flush_file(fd, (const char *) &id, sizeof(id)); 484 | total_len++; 485 | } 486 | 487 | flush_file(fd, (const char *) &total_len, sizeof(total_len)); 488 | 489 | if (fd != 0) { 490 | close(fd); 491 | } 492 | } 493 | 494 | static void flush_mem_alloc_size() { 495 | int fd = gMemProfileData.alloc_size_fd; 496 | 497 | for (auto iter = gMemProfileData.callstack.begin(); iter != gMemProfileData.callstack.end(); iter++) { 498 | const CallStack &cs = *iter->first; 499 | auto &mem_info = iter->second; 500 | 501 | flush_file(fd, (const char *) &mem_info.allocs, sizeof(mem_info.allocs)); 502 | flush_file(fd, (const char *) &cs, sizeof(cs)); 503 | } 504 | 505 | flush_mem_left(fd); 506 | gMemProfileData.alloc_size_fd = 0; 507 | } 508 | 509 | static void flush_mem_usage() { 510 | int fd = gMemProfileData.usage_fd; 511 | 512 | for (auto iter = gMemProfileData.callstack.begin(); iter != gMemProfileData.callstack.end(); iter++) { 513 | const CallStack &cs = *iter->first; 514 | auto &mem_info = iter->second; 515 | auto usage = (int) mem_info.allocs - (int) mem_info.frees; 516 | if (usage <= 0) { 517 | continue; 518 | } 519 | 520 | flush_file(fd, (const char *) &usage, sizeof(usage)); 521 | flush_file(fd, (const char *) &cs, sizeof(cs)); 522 | } 523 | 524 | flush_mem_left(fd); 525 | gMemProfileData.usage_fd = 0; 526 | } 527 | 528 | static void flush_mem() { 529 | if (gMemProfileData.total <= 0) { 530 | return; 531 | } 532 | 533 | LLOG("flush..."); 534 | 535 | flush_mem_alloc_size(); 536 | flush_mem_usage(); 537 | 538 | int total = gMemProfileData.total; 539 | LLOG("flush ok %d", gMemProfileData.total); 540 | 541 | gMemProfileData.total = 0; 542 | for (auto iter = gMemProfileData.callstack.begin(); iter != gMemProfileData.callstack.end(); iter++) { 543 | delete iter->first; 544 | } 545 | gMemProfileData.callstack.clear(); 546 | gMemProfileData.ptr2Callstack.clear(); 547 | 548 | printf("pLua flush ok %d\n", total); 549 | } 550 | 551 | static int lrealstopmemsafe(lua_State *L) { 552 | 553 | lua_setallocf(L, gMemProfileData.oldAlloc, NULL); 554 | 555 | gRunning = 0; 556 | 557 | flush_mem(); 558 | 559 | lua_sethook(L, 0, 0, 0); 560 | 561 | return 0; 562 | } 563 | 564 | extern "C" int lrealstopmem(lua_State *L) { 565 | gRunning = 0; 566 | return 0; 567 | } 568 | 569 | static void my_lua_Alloc_safe() { 570 | gMemProfileData.is_in_hook++; 571 | 572 | auto hook_alloc_ptr = gMemProfileData.hook_alloc_ptr; 573 | auto hook_alloc_sz = gMemProfileData.hook_alloc_sz; 574 | 575 | CallStack cs; 576 | get_cur_callstack(gL, cs); // 内部可能触发再次分配内存 577 | 578 | auto it = gMemProfileData.callstack.find(&cs); 579 | CallStack *pointer_cs = 0; 580 | if (it == gMemProfileData.callstack.end()) { 581 | auto new_cs = new CallStack(); 582 | memcpy(new_cs, &cs, sizeof(cs)); 583 | gMemProfileData.callstack[new_cs] = MemInfo(1, 0); 584 | pointer_cs = new_cs; 585 | } else { 586 | auto &mem_info = it->second; 587 | mem_info.allocs++; 588 | pointer_cs = it->first; 589 | } 590 | 591 | // add new pointer 592 | gMemProfileData.ptr2Callstack[hook_alloc_ptr] = pointer_cs; 593 | 594 | LLOG("alloc %p %u", hook_alloc_ptr, hook_alloc_sz); 595 | 596 | gMemProfileData.is_in_hook--; 597 | } 598 | 599 | static void AllocMemHandlerHook(lua_State *L, lua_Debug *par) { 600 | my_lua_Alloc_safe(); 601 | lua_sethook(L, 0, 0, 0); 602 | } 603 | 604 | static void *my_lua_Alloc(void *ud, void *ptr, size_t osize, size_t nsize) { 605 | size_t realosize = (ptr) ? osize : 0; 606 | 607 | if (gRunning == 0) { 608 | lrealstopmemsafe(gL); 609 | return gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 610 | } 611 | 612 | if (realosize < nsize) { 613 | LLOG("my_lua_Alloc alloc %p %p %u %u", ud, ptr, osize, nsize); 614 | 615 | if (gMemProfileData.is_in_hook) { 616 | return gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 617 | } 618 | 619 | // check stop if set sample count 620 | if (gSampleCount != 0 && gSampleCount <= gMemProfileData.total) { 621 | LLOG("lrealstopmem..."); 622 | lrealstopmem(gL); 623 | return gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 624 | } 625 | 626 | // is alloc 627 | size_t alloc_sz = nsize - realosize; 628 | 629 | if (realosize > 0 && ptr) { 630 | // remove old pointer 631 | gMemProfileData.ptr2Callstack.erase(ptr); 632 | } 633 | 634 | // 是否命中采样 635 | if (static_cast(alloc_sz) < gMemProfileData.nextSample) { 636 | LLOG("not hit %ld %u", gMemProfileData.nextSample, alloc_sz); 637 | gMemProfileData.nextSample -= static_cast(alloc_sz); 638 | LLOG("nextSample %ld", gMemProfileData.nextSample); 639 | return gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 640 | } 641 | 642 | gMemProfileData.nextSample = gen_next_sample(); 643 | LLOG("gen_next_sample nextSample %ld", gMemProfileData.nextSample); 644 | 645 | // start profile 646 | gMemProfileData.total++; 647 | 648 | // 防止重入,get_cur_callstack是可能触发lua内存分配的。当再次重入,只是设置下hook,等原来hook退出后清空 649 | lua_sethook(gL, AllocMemHandlerHook, LUA_MASKCOUNT, 1); 650 | 651 | void *alloc_ptr = gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 652 | 653 | // 记录参数 654 | gMemProfileData.hook_alloc_ptr = alloc_ptr; 655 | gMemProfileData.hook_alloc_sz = alloc_sz; 656 | 657 | LLOG("prealloc %p %u", alloc_ptr, alloc_sz); 658 | 659 | return alloc_ptr; 660 | } else if (realosize > nsize) { 661 | LLOG("my_lua_Alloc free %p %p %u %u", ud, ptr, osize, nsize); 662 | 663 | // remove old pointer 664 | auto it = gMemProfileData.ptr2Callstack.find(ptr); 665 | if (it == gMemProfileData.ptr2Callstack.end()) { 666 | return gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 667 | } 668 | 669 | CallStack *cs = it->second; 670 | gMemProfileData.ptr2Callstack.erase(it); 671 | 672 | auto it2 = gMemProfileData.callstack.find(cs); 673 | if (it2 != gMemProfileData.callstack.end()) { 674 | auto &mem_info = it2->second; 675 | mem_info.frees++; 676 | 677 | LLOG("free %p %u %u", ptr, realosize, nsize); 678 | } 679 | 680 | void *ret = gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 681 | if (nsize > 0 && ret) { 682 | // add new pointer 683 | gMemProfileData.ptr2Callstack[ret] = cs; 684 | } 685 | 686 | return ret; 687 | } else { 688 | return gMemProfileData.oldAlloc(ud, ptr, osize, nsize); 689 | } 690 | } 691 | 692 | static std::string get_mem_filename(std::string path, const std::string &suffix) { 693 | auto pos = path.find_last_of('.'); 694 | if (pos == std::string::npos) { 695 | return path; 696 | } 697 | path.insert(pos, suffix); 698 | return path; 699 | } 700 | 701 | static int lrealstartmemsafe(lua_State *L) { 702 | if (gRunning) { 703 | LERR("start again, failed"); 704 | return -1; 705 | } 706 | gRunning = 1; 707 | 708 | for (int i = 0; i < VALID_MIN_ID; i++) { 709 | gString2Id[IGNORE_NAME[i]] = i; 710 | gId2String[i] = IGNORE_NAME[i]; 711 | } 712 | 713 | int fd = open(get_mem_filename(gFilename, "_ALLOC_SIZE").c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0666); 714 | if (fd < 0) { 715 | LERR("open file fail %s", gFilename.c_str()); 716 | return -1; 717 | } 718 | gMemProfileData.alloc_size_fd = fd; 719 | 720 | fd = open(get_mem_filename(gFilename, "_USAGE").c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0666); 721 | if (fd < 0) { 722 | LERR("open file fail %s", gFilename.c_str()); 723 | return -1; 724 | } 725 | gMemProfileData.usage_fd = fd; 726 | 727 | gMemProfileData.total = 0; 728 | for (auto iter = gMemProfileData.callstack.begin(); iter != gMemProfileData.callstack.end(); iter++) { 729 | delete iter->first; 730 | } 731 | gMemProfileData.callstack.clear(); 732 | gMemProfileData.ptr2Callstack.clear(); 733 | 734 | // replace the realloc 735 | gMemProfileData.oldAlloc = lua_getallocf(L, NULL); 736 | lua_setallocf(L, my_lua_Alloc, NULL); 737 | gMemProfileData.rand = time(NULL); 738 | // Step it forward 20 times for good measure 739 | for (int i = 0; i < 20; i++) { 740 | gMemProfileData.rand = next_random(gMemProfileData.rand); 741 | } 742 | gMemProfileData.nextSample = gen_next_sample(); 743 | gMemProfileData.is_in_hook = 0; 744 | gMemProfileData.hook_alloc_ptr = 0; 745 | gMemProfileData.hook_alloc_sz = 0; 746 | 747 | return 0; 748 | } 749 | 750 | static void StartMemHandlerHook(lua_State *L, lua_Debug *par) { 751 | lrealstartmemsafe(L); 752 | lua_sethook(L, 0, 0, 0); 753 | } 754 | 755 | extern "C" int lrealstartmem(lua_State *L, int count, const char *file) { 756 | if (gRunning) { 757 | LERR("start again, failed"); 758 | return -1; 759 | } 760 | 761 | gL = L; 762 | gSampleCount = count; 763 | gFilename = file; 764 | gMemProfileData.oldAlloc = NULL; 765 | gMemProfileData.nextSample = 0; 766 | 767 | // lrealstartmem可能被注入调用,在StartMemHandlerHook里面执行具体的逻辑 768 | lua_sethook(gL, StartMemHandlerHook, LUA_MASKCOUNT, 1); 769 | 770 | LLOG("lstart %u %s", gSampleCount, file); 771 | 772 | return 0; 773 | } 774 | 775 | static int lstart_mem(lua_State *L) { 776 | int count = (int) lua_tointeger(L, 1); 777 | const char *file = lua_tostring(L, 2); 778 | int ret = lrealstartmem(L, count, file); 779 | lua_pushinteger(L, ret); 780 | return 1; 781 | } 782 | 783 | static int lstop_mem(lua_State *L) { 784 | LLOG("lstop %s", gFilename.c_str()); 785 | int ret = lrealstopmem(L); 786 | lrealstopmemsafe(L); 787 | lua_pushinteger(L, ret); 788 | return 1; 789 | } 790 | 791 | //////////////////////////////////mem profiler end////////////////////////////////////////// 792 | 793 | extern "C" int luaopen_libplua(lua_State *L) { 794 | luaL_checkversion(L); 795 | luaL_Reg l[] = { 796 | // for cpu 797 | {"start", lstart}, 798 | {"stop", lstop}, 799 | 800 | // for memory 801 | {"start_mem", lstart_mem}, 802 | {"stop_mem", lstop_mem}, 803 | 804 | {NULL, NULL}, 805 | }; 806 | luaL_newlib(L, l); 807 | return 1; 808 | } 809 | -------------------------------------------------------------------------------- /test/call.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/esrrhs/pLua/c734adc80b9cbc18136a0aa6ae9b47dec9f13e11/test/call.png -------------------------------------------------------------------------------- /test/mem_ALLOC_SIZE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/esrrhs/pLua/c734adc80b9cbc18136a0aa6ae9b47dec9f13e11/test/mem_ALLOC_SIZE.png -------------------------------------------------------------------------------- /test/test_cpu.lua: -------------------------------------------------------------------------------- 1 | package.cpath = "../bin/?.so;" .. package.cpath 2 | 3 | function test() 4 | 5 | for i = 1, 300 do 6 | test1(i) 7 | end 8 | 9 | end 10 | 11 | function test1(n) 12 | 13 | for i = 1, 1000 do 14 | local j = i*n 15 | local f = "test2"..((i % 5) + 1) 16 | _G[f](j) 17 | end 18 | 19 | end 20 | 21 | function test21(n) 22 | 23 | for i = 1, 1000 do 24 | local j = i*n 25 | end 26 | 27 | end 28 | 29 | function test22(n) 30 | 31 | for i = 1, 2000 do 32 | local j = i*n 33 | end 34 | 35 | end 36 | 37 | function test23(n) 38 | 39 | for i = 1, 3000 do 40 | local j = i*n 41 | end 42 | 43 | end 44 | 45 | function test24(n) 46 | 47 | for i = 1, 4000 do 48 | local j = i*n 49 | end 50 | 51 | end 52 | 53 | function test25(n) 54 | 55 | for i = 1, 5000 do 56 | local j = i*n 57 | end 58 | 59 | end 60 | 61 | local p = require "libplua" 62 | 63 | p.start(0, "call.pro") 64 | 65 | test() 66 | 67 | p.stop() 68 | 69 | -------------------------------------------------------------------------------- /test/test_mem.lua: -------------------------------------------------------------------------------- 1 | package.cpath = "../bin/?.so;" .. package.cpath 2 | 3 | gt = {} 4 | 5 | function table_insert1(n) 6 | local t = {} 7 | table.insert(t, n) 8 | end 9 | 10 | function table_insert2(n) 11 | local t = {} 12 | table.insert(t, n) 13 | table_insert1(n) 14 | end 15 | 16 | function table_insert3(n) 17 | local t = {} 18 | table.insert(t, n) 19 | table_insert2(n) 20 | end 21 | 22 | function table_insert_global(n) 23 | table.insert(gt, n) 24 | end 25 | 26 | function test() 27 | for i = 1, 1000000 do 28 | table_insert1(i) 29 | table_insert2(i) 30 | table_insert3(i) 31 | end 32 | 33 | for i = 1, 1000000 do 34 | table_insert_global(i) 35 | end 36 | 37 | end 38 | 39 | local p = require "libplua" 40 | 41 | p.start_mem(0, "mem.pro") 42 | 43 | test() 44 | 45 | collectgarbage("collect") 46 | 47 | p.stop_mem() 48 | 49 | -------------------------------------------------------------------------------- /tools/flamegraph.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # 3 | # flamegraph.pl flame stack grapher. 4 | # 5 | # This takes stack samples and renders a call graph, allowing hot functions 6 | # and codepaths to be quickly identified. Stack samples can be generated using 7 | # tools such as DTrace, perf, SystemTap, and Instruments. 8 | # 9 | # USAGE: ./flamegraph.pl [options] input.txt > graph.svg 10 | # 11 | # grep funcA input.txt | ./flamegraph.pl [options] > graph.svg 12 | # 13 | # Then open the resulting .svg in a web browser, for interactivity: mouse-over 14 | # frames for info, click to zoom, and ctrl-F to search. 15 | # 16 | # Options are listed in the usage message (--help). 17 | # 18 | # The input is stack frames and sample counts formatted as single lines. Each 19 | # frame in the stack is semicolon separated, with a space and count at the end 20 | # of the line. These can be generated for Linux perf script output using 21 | # stackcollapse-perf.pl, for DTrace using stackcollapse.pl, and for other tools 22 | # using the other stackcollapse programs. Example input: 23 | # 24 | # swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1 25 | # 26 | # An optional extra column of counts can be provided to generate a differential 27 | # flame graph of the counts, colored red for more, and blue for less. This 28 | # can be useful when using flame graphs for non-regression testing. 29 | # See the header comment in the difffolded.pl program for instructions. 30 | # 31 | # The input functions can optionally have annotations at the end of each 32 | # function name, following a precedent by some tools (Linux perf's _[k]): 33 | # _[k] for kernel 34 | # _[i] for inlined 35 | # _[j] for jit 36 | # _[w] for waker 37 | # Some of the stackcollapse programs support adding these annotations, eg, 38 | # stackcollapse-perf.pl --kernel --jit. They are used merely for colors by 39 | # some palettes, eg, flamegraph.pl --color=java. 40 | # 41 | # The output flame graph shows relative presence of functions in stack samples. 42 | # The ordering on the x-axis has no meaning; since the data is samples, time 43 | # order of events is not known. The order used sorts function names 44 | # alphabetically. 45 | # 46 | # While intended to process stack samples, this can also process stack traces. 47 | # For example, tracing stacks for memory allocation, or resource usage. You 48 | # can use --title to set the title to reflect the content, and --countname 49 | # to change "samples" to "bytes" etc. 50 | # 51 | # There are a few different palettes, selectable using --color. By default, 52 | # the colors are selected at random (except for differentials). Functions 53 | # called "-" will be printed gray, which can be used for stack separators (eg, 54 | # between user and kernel stacks). 55 | # 56 | # HISTORY 57 | # 58 | # This was inspired by Neelakanth Nadgir's excellent function_call_graph.rb 59 | # program, which visualized function entry and return trace events. As Neel 60 | # wrote: "The output displayed is inspired by Roch's CallStackAnalyzer which 61 | # was in turn inspired by the work on vftrace by Jan Boerhout". See: 62 | # https://blogs.oracle.com/realneel/entry/visualizing_callstacks_via_dtrace_and 63 | # 64 | # Copyright 2016 Netflix, Inc. 65 | # Copyright 2011 Joyent, Inc. All rights reserved. 66 | # Copyright 2011 Brendan Gregg. All rights reserved. 67 | # 68 | # CDDL HEADER START 69 | # 70 | # The contents of this file are subject to the terms of the 71 | # Common Development and Distribution License (the "License"). 72 | # You may not use this file except in compliance with the License. 73 | # 74 | # You can obtain a copy of the license at docs/cddl1.txt or 75 | # http://opensource.org/licenses/CDDL-1.0. 76 | # See the License for the specific language governing permissions 77 | # and limitations under the License. 78 | # 79 | # When distributing Covered Code, include this CDDL HEADER in each 80 | # file and include the License file at docs/cddl1.txt. 81 | # If applicable, add the following below this CDDL HEADER, with the 82 | # fields enclosed by brackets "[]" replaced with your own identifying 83 | # information: Portions Copyright [yyyy] [name of copyright owner] 84 | # 85 | # CDDL HEADER END 86 | # 87 | # 11-Oct-2014 Adrien Mahieux Added zoom. 88 | # 21-Nov-2013 Shawn Sterling Added consistent palette file option 89 | # 17-Mar-2013 Tim Bunce Added options and more tunables. 90 | # 15-Dec-2011 Dave Pacheco Support for frames with whitespace. 91 | # 10-Sep-2011 Brendan Gregg Created this. 92 | 93 | use strict; 94 | 95 | use Getopt::Long; 96 | 97 | use open qw(:std :utf8); 98 | 99 | # tunables 100 | my $encoding; 101 | my $fonttype = "Verdana"; 102 | my $imagewidth = 1200; # max width, pixels 103 | my $frameheight = 16; # max height is dynamic 104 | my $fontsize = 12; # base text size 105 | my $fontwidth = 0.59; # avg width relative to fontsize 106 | my $minwidth = 0.1; # min function width, pixels 107 | my $nametype = "Function:"; # what are the names in the data? 108 | my $countname = "samples"; # what are the counts in the data? 109 | my $colors = "hot"; # color theme 110 | my $bgcolors = ""; # background color theme 111 | my $nameattrfile; # file holding function attributes 112 | my $timemax; # (override the) sum of the counts 113 | my $factor = 1; # factor to scale counts by 114 | my $hash = 0; # color by function name 115 | my $palette = 0; # if we use consistent palettes (default off) 116 | my %palette_map; # palette map hash 117 | my $pal_file = "palette.map"; # palette map file name 118 | my $stackreverse = 0; # reverse stack order, switching merge end 119 | my $inverted = 0; # icicle graph 120 | my $flamechart = 0; # produce a flame chart (sort by time, do not merge stacks) 121 | my $negate = 0; # switch differential hues 122 | my $titletext = ""; # centered heading 123 | my $titledefault = "Flame Graph"; # overwritten by --title 124 | my $titleinverted = "Icicle Graph"; # " " 125 | my $searchcolor = "rgb(230,0,230)"; # color for search highlighting 126 | my $notestext = ""; # embedded notes in SVG 127 | my $subtitletext = ""; # second level title (optional) 128 | my $help = 0; 129 | 130 | sub usage { 131 | die < outfile.svg\n 133 | --title TEXT # change title text 134 | --subtitle TEXT # second level title (optional) 135 | --width NUM # width of image (default 1200) 136 | --height NUM # height of each frame (default 16) 137 | --minwidth NUM # omit smaller functions (default 0.1 pixels) 138 | --fonttype FONT # font type (default "Verdana") 139 | --fontsize NUM # font size (default 12) 140 | --countname TEXT # count type label (default "samples") 141 | --nametype TEXT # name type label (default "Function:") 142 | --colors PALETTE # set color palette. choices are: hot (default), mem, 143 | # io, wakeup, chain, java, js, perl, red, green, blue, 144 | # aqua, yellow, purple, orange 145 | --bgcolors COLOR # set background colors. gradient choices are yellow 146 | # (default), blue, green, grey; flat colors use "#rrggbb" 147 | --hash # colors are keyed by function name hash 148 | --cp # use consistent palette (palette.map) 149 | --reverse # generate stack-reversed flame graph 150 | --inverted # icicle graph 151 | --flamechart # produce a flame chart (sort by time, do not merge stacks) 152 | --negate # switch differential hues (blue<->red) 153 | --notes TEXT # add notes comment in SVG (for debugging) 154 | --help # this message 155 | 156 | eg, 157 | $0 --title="Flame Graph: malloc()" trace.txt > graph.svg 158 | USAGE_END 159 | } 160 | 161 | GetOptions( 162 | 'fonttype=s' => \$fonttype, 163 | 'width=i' => \$imagewidth, 164 | 'height=i' => \$frameheight, 165 | 'encoding=s' => \$encoding, 166 | 'fontsize=f' => \$fontsize, 167 | 'fontwidth=f' => \$fontwidth, 168 | 'minwidth=f' => \$minwidth, 169 | 'title=s' => \$titletext, 170 | 'subtitle=s' => \$subtitletext, 171 | 'nametype=s' => \$nametype, 172 | 'countname=s' => \$countname, 173 | 'nameattr=s' => \$nameattrfile, 174 | 'total=s' => \$timemax, 175 | 'factor=f' => \$factor, 176 | 'colors=s' => \$colors, 177 | 'bgcolors=s' => \$bgcolors, 178 | 'hash' => \$hash, 179 | 'cp' => \$palette, 180 | 'reverse' => \$stackreverse, 181 | 'inverted' => \$inverted, 182 | 'flamechart' => \$flamechart, 183 | 'negate' => \$negate, 184 | 'notes=s' => \$notestext, 185 | 'help' => \$help, 186 | ) or usage(); 187 | $help && usage(); 188 | 189 | # internals 190 | my $ypad1 = $fontsize * 3; # pad top, include title 191 | my $ypad2 = $fontsize * 2 + 10; # pad bottom, include labels 192 | my $ypad3 = $fontsize * 2; # pad top, include subtitle (optional) 193 | my $xpad = 10; # pad lefm and right 194 | my $framepad = 1; # vertical padding for frames 195 | my $depthmax = 0; 196 | my %Events; 197 | my %nameattr; 198 | 199 | if ($flamechart && $titletext eq "") { 200 | $titletext = "Flame Chart"; 201 | } 202 | 203 | if ($titletext eq "") { 204 | unless ($inverted) { 205 | $titletext = $titledefault; 206 | } else { 207 | $titletext = $titleinverted; 208 | } 209 | } 210 | 211 | if ($nameattrfile) { 212 | # The name-attribute file format is a function name followed by a tab then 213 | # a sequence of tab separated name=value pairs. 214 | open my $attrfh, $nameattrfile or die "Can't read $nameattrfile: $!\n"; 215 | while (<$attrfh>) { 216 | chomp; 217 | my ($funcname, $attrstr) = split /\t/, $_, 2; 218 | die "Invalid format in $nameattrfile" unless defined $attrstr; 219 | $nameattr{$funcname} = { map { split /=/, $_, 2 } split /\t/, $attrstr }; 220 | } 221 | } 222 | 223 | if ($notestext =~ /[<>]/) { 224 | die "Notes string can't contain < or >" 225 | } 226 | 227 | # background colors: 228 | # - yellow gradient: default (hot, java, js, perl) 229 | # - green gradient: mem 230 | # - blue gradient: io, wakeup, chain 231 | # - gray gradient: flat colors (red, green, blue, ...) 232 | if ($bgcolors eq "") { 233 | # choose a default 234 | if ($colors eq "mem") { 235 | $bgcolors = "green"; 236 | } elsif ($colors =~ /^(io|wakeup|chain)$/) { 237 | $bgcolors = "blue"; 238 | } elsif ($colors =~ /^(red|green|blue|aqua|yellow|purple|orange)$/) { 239 | $bgcolors = "grey"; 240 | } else { 241 | $bgcolors = "yellow"; 242 | } 243 | } 244 | my ($bgcolor1, $bgcolor2); 245 | if ($bgcolors eq "yellow") { 246 | $bgcolor1 = "#eeeeee"; # background color gradient start 247 | $bgcolor2 = "#eeeeb0"; # background color gradient stop 248 | } elsif ($bgcolors eq "blue") { 249 | $bgcolor1 = "#eeeeee"; $bgcolor2 = "#e0e0ff"; 250 | } elsif ($bgcolors eq "green") { 251 | $bgcolor1 = "#eef2ee"; $bgcolor2 = "#e0ffe0"; 252 | } elsif ($bgcolors eq "grey") { 253 | $bgcolor1 = "#f8f8f8"; $bgcolor2 = "#e8e8e8"; 254 | } elsif ($bgcolors =~ /^#......$/) { 255 | $bgcolor1 = $bgcolor2 = $bgcolors; 256 | } else { 257 | die "Unrecognized bgcolor option \"$bgcolors\"" 258 | } 259 | 260 | # SVG functions 261 | { package SVG; 262 | sub new { 263 | my $class = shift; 264 | my $self = {}; 265 | bless ($self, $class); 266 | return $self; 267 | } 268 | 269 | sub header { 270 | my ($self, $w, $h) = @_; 271 | my $enc_attr = ''; 272 | if (defined $encoding) { 273 | $enc_attr = qq{ encoding="$encoding"}; 274 | } 275 | $self->{svg} .= < 277 | 278 | 279 | 280 | 281 | SVG 282 | } 283 | 284 | sub include { 285 | my ($self, $content) = @_; 286 | $self->{svg} .= $content; 287 | } 288 | 289 | sub colorAllocate { 290 | my ($self, $r, $g, $b) = @_; 291 | return "rgb($r,$g,$b)"; 292 | } 293 | 294 | sub group_start { 295 | my ($self, $attr) = @_; 296 | 297 | my @g_attr = map { 298 | exists $attr->{$_} ? sprintf(qq/$_="%s"/, $attr->{$_}) : () 299 | } qw(id class); 300 | push @g_attr, $attr->{g_extra} if $attr->{g_extra}; 301 | if ($attr->{href}) { 302 | my @a_attr; 303 | push @a_attr, sprintf qq/xlink:href="%s"/, $attr->{href} if $attr->{href}; 304 | # default target=_top else links will open within SVG 305 | push @a_attr, sprintf qq/target="%s"/, $attr->{target} || "_top"; 306 | push @a_attr, $attr->{a_extra} if $attr->{a_extra}; 307 | $self->{svg} .= sprintf qq/\n/, join(' ', (@a_attr, @g_attr)); 308 | } else { 309 | $self->{svg} .= sprintf qq/\n/, join(' ', @g_attr); 310 | } 311 | 312 | $self->{svg} .= sprintf qq/%s<\/title>/, $attr->{title} 313 | if $attr->{title}; # should be first element within g container 314 | } 315 | 316 | sub group_end { 317 | my ($self, $attr) = @_; 318 | $self->{svg} .= $attr->{href} ? qq/<\/a>\n/ : qq/<\/g>\n/; 319 | } 320 | 321 | sub filledRectangle { 322 | my ($self, $x1, $y1, $x2, $y2, $fill, $extra) = @_; 323 | $x1 = sprintf "%0.1f", $x1; 324 | $x2 = sprintf "%0.1f", $x2; 325 | my $w = sprintf "%0.1f", $x2 - $x1; 326 | my $h = sprintf "%0.1f", $y2 - $y1; 327 | $extra = defined $extra ? $extra : ""; 328 | $self->{svg} .= qq/\n/; 329 | } 330 | 331 | sub stringTTF { 332 | my ($self, $id, $x, $y, $str, $extra) = @_; 333 | $x = sprintf "%0.2f", $x; 334 | $id = defined $id ? qq/id="$id"/ : ""; 335 | $extra ||= ""; 336 | $self->{svg} .= qq/$str<\/text>\n/; 337 | } 338 | 339 | sub svg { 340 | my $self = shift; 341 | return "$self->{svg}\n"; 342 | } 343 | 1; 344 | } 345 | 346 | sub namehash { 347 | # Generate a vector hash for the name string, weighting early over 348 | # later characters. We want to pick the same colors for function 349 | # names across different flame graphs. 350 | my $name = shift; 351 | my $vector = 0; 352 | my $weight = 1; 353 | my $max = 1; 354 | my $mod = 10; 355 | # if module name present, trunc to 1st char 356 | $name =~ s/.(.*?)`//; 357 | foreach my $c (split //, $name) { 358 | my $i = (ord $c) % $mod; 359 | $vector += ($i / ($mod++ - 1)) * $weight; 360 | $max += 1 * $weight; 361 | $weight *= 0.70; 362 | last if $mod > 12; 363 | } 364 | return (1 - $vector / $max) 365 | } 366 | 367 | sub color { 368 | my ($type, $hash, $name) = @_; 369 | my ($v1, $v2, $v3); 370 | 371 | if ($hash) { 372 | $v1 = namehash($name); 373 | $v2 = $v3 = namehash(scalar reverse $name); 374 | } else { 375 | $v1 = rand(1); 376 | $v2 = rand(1); 377 | $v3 = rand(1); 378 | } 379 | 380 | # theme palettes 381 | if (defined $type and $type eq "hot") { 382 | my $r = 205 + int(50 * $v3); 383 | my $g = 0 + int(230 * $v1); 384 | my $b = 0 + int(55 * $v2); 385 | return "rgb($r,$g,$b)"; 386 | } 387 | if (defined $type and $type eq "mem") { 388 | my $r = 0; 389 | my $g = 190 + int(50 * $v2); 390 | my $b = 0 + int(210 * $v1); 391 | return "rgb($r,$g,$b)"; 392 | } 393 | if (defined $type and $type eq "io") { 394 | my $r = 80 + int(60 * $v1); 395 | my $g = $r; 396 | my $b = 190 + int(55 * $v2); 397 | return "rgb($r,$g,$b)"; 398 | } 399 | 400 | # multi palettes 401 | if (defined $type and $type eq "java") { 402 | # Handle both annotations (_[j], _[i], ...; which are 403 | # accurate), as well as input that lacks any annotations, as 404 | # best as possible. Without annotations, we get a little hacky 405 | # and match on java|org|com, etc. 406 | if ($name =~ m:_\[j\]$:) { # jit annotation 407 | $type = "green"; 408 | } elsif ($name =~ m:_\[i\]$:) { # inline annotation 409 | $type = "aqua"; 410 | } elsif ($name =~ m:^L?(java|javax|jdk|net|org|com|io|sun)/:) { # Java 411 | $type = "green"; 412 | } elsif ($name =~ /:::/) { # Java, typical perf-map-agent method separator 413 | $type = "green"; 414 | } elsif ($name =~ /::/) { # C++ 415 | $type = "yellow"; 416 | } elsif ($name =~ m:_\[k\]$:) { # kernel annotation 417 | $type = "orange"; 418 | } elsif ($name =~ /::/) { # C++ 419 | $type = "yellow"; 420 | } else { # system 421 | $type = "red"; 422 | } 423 | # fall-through to color palettes 424 | } 425 | if (defined $type and $type eq "perl") { 426 | if ($name =~ /::/) { # C++ 427 | $type = "yellow"; 428 | } elsif ($name =~ m:Perl: or $name =~ m:\.pl:) { # Perl 429 | $type = "green"; 430 | } elsif ($name =~ m:_\[k\]$:) { # kernel 431 | $type = "orange"; 432 | } else { # system 433 | $type = "red"; 434 | } 435 | # fall-through to color palettes 436 | } 437 | if (defined $type and $type eq "js") { 438 | # Handle both annotations (_[j], _[i], ...; which are 439 | # accurate), as well as input that lacks any annotations, as 440 | # best as possible. Without annotations, we get a little hacky, 441 | # and match on a "/" with a ".js", etc. 442 | if ($name =~ m:_\[j\]$:) { # jit annotation 443 | if ($name =~ m:/:) { 444 | $type = "green"; # source 445 | } else { 446 | $type = "aqua"; # builtin 447 | } 448 | } elsif ($name =~ /::/) { # C++ 449 | $type = "yellow"; 450 | } elsif ($name =~ m:/.*\.js:) { # JavaScript (match "/" in path) 451 | $type = "green"; 452 | } elsif ($name =~ m/:/) { # JavaScript (match ":" in builtin) 453 | $type = "aqua"; 454 | } elsif ($name =~ m/^ $/) { # Missing symbol 455 | $type = "green"; 456 | } elsif ($name =~ m:_\[k\]:) { # kernel 457 | $type = "orange"; 458 | } else { # system 459 | $type = "red"; 460 | } 461 | # fall-through to color palettes 462 | } 463 | if (defined $type and $type eq "wakeup") { 464 | $type = "aqua"; 465 | # fall-through to color palettes 466 | } 467 | if (defined $type and $type eq "chain") { 468 | if ($name =~ m:_\[w\]:) { # waker 469 | $type = "aqua" 470 | } else { # off-CPU 471 | $type = "blue"; 472 | } 473 | # fall-through to color palettes 474 | } 475 | 476 | # color palettes 477 | if (defined $type and $type eq "red") { 478 | my $r = 200 + int(55 * $v1); 479 | my $x = 50 + int(80 * $v1); 480 | return "rgb($r,$x,$x)"; 481 | } 482 | if (defined $type and $type eq "green") { 483 | my $g = 200 + int(55 * $v1); 484 | my $x = 50 + int(60 * $v1); 485 | return "rgb($x,$g,$x)"; 486 | } 487 | if (defined $type and $type eq "blue") { 488 | my $b = 205 + int(50 * $v1); 489 | my $x = 80 + int(60 * $v1); 490 | return "rgb($x,$x,$b)"; 491 | } 492 | if (defined $type and $type eq "yellow") { 493 | my $x = 175 + int(55 * $v1); 494 | my $b = 50 + int(20 * $v1); 495 | return "rgb($x,$x,$b)"; 496 | } 497 | if (defined $type and $type eq "purple") { 498 | my $x = 190 + int(65 * $v1); 499 | my $g = 80 + int(60 * $v1); 500 | return "rgb($x,$g,$x)"; 501 | } 502 | if (defined $type and $type eq "aqua") { 503 | my $r = 50 + int(60 * $v1); 504 | my $g = 165 + int(55 * $v1); 505 | my $b = 165 + int(55 * $v1); 506 | return "rgb($r,$g,$b)"; 507 | } 508 | if (defined $type and $type eq "orange") { 509 | my $r = 190 + int(65 * $v1); 510 | my $g = 90 + int(65 * $v1); 511 | return "rgb($r,$g,0)"; 512 | } 513 | 514 | return "rgb(0,0,0)"; 515 | } 516 | 517 | sub color_scale { 518 | my ($value, $max) = @_; 519 | my ($r, $g, $b) = (255, 255, 255); 520 | $value = -$value if $negate; 521 | if ($value > 0) { 522 | $g = $b = int(210 * ($max - $value) / $max); 523 | } elsif ($value < 0) { 524 | $r = $g = int(210 * ($max + $value) / $max); 525 | } 526 | return "rgb($r,$g,$b)"; 527 | } 528 | 529 | sub color_map { 530 | my ($colors, $func) = @_; 531 | if (exists $palette_map{$func}) { 532 | return $palette_map{$func}; 533 | } else { 534 | $palette_map{$func} = color($colors, $hash, $func); 535 | return $palette_map{$func}; 536 | } 537 | } 538 | 539 | sub write_palette { 540 | open(FILE, ">$pal_file"); 541 | foreach my $key (sort keys %palette_map) { 542 | print FILE $key."->".$palette_map{$key}."\n"; 543 | } 544 | close(FILE); 545 | } 546 | 547 | sub read_palette { 548 | if (-e $pal_file) { 549 | open(FILE, $pal_file) or die "can't open file $pal_file: $!"; 550 | while ( my $line = ) { 551 | chomp($line); 552 | (my $key, my $value) = split("->",$line); 553 | $palette_map{$key}=$value; 554 | } 555 | close(FILE) 556 | } 557 | } 558 | 559 | my %Node; # Hash of merged frame data 560 | my %Tmp; 561 | 562 | # flow() merges two stacks, storing the merged frames and value data in %Node. 563 | sub flow { 564 | my ($last, $this, $v, $d) = @_; 565 | 566 | my $len_a = @$last - 1; 567 | my $len_b = @$this - 1; 568 | 569 | my $i = 0; 570 | my $len_same; 571 | for (; $i <= $len_a; $i++) { 572 | last if $i > $len_b; 573 | last if $last->[$i] ne $this->[$i]; 574 | } 575 | $len_same = $i; 576 | 577 | for ($i = $len_a; $i >= $len_same; $i--) { 578 | my $k = "$last->[$i];$i"; 579 | # a unique ID is constructed from "func;depth;etime"; 580 | # func-depth isn't unique, it may be repeated later. 581 | $Node{"$k;$v"}->{stime} = delete $Tmp{$k}->{stime}; 582 | if (defined $Tmp{$k}->{delta}) { 583 | $Node{"$k;$v"}->{delta} = delete $Tmp{$k}->{delta}; 584 | } 585 | delete $Tmp{$k}; 586 | } 587 | 588 | for ($i = $len_same; $i <= $len_b; $i++) { 589 | my $k = "$this->[$i];$i"; 590 | $Tmp{$k}->{stime} = $v; 591 | if (defined $d) { 592 | $Tmp{$k}->{delta} += $i == $len_b ? $d : 0; 593 | } 594 | } 595 | 596 | return $this; 597 | } 598 | 599 | # parse input 600 | my @Data; 601 | my @SortedData; 602 | my $last = []; 603 | my $time = 0; 604 | my $delta = undef; 605 | my $ignored = 0; 606 | my $line; 607 | my $maxdelta = 1; 608 | 609 | # reverse if needed 610 | foreach (<>) { 611 | chomp; 612 | $line = $_; 613 | if ($stackreverse) { 614 | # there may be an extra samples column for differentials 615 | # XXX todo: redo these REs as one. It's repeated below. 616 | my($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); 617 | my $samples2 = undef; 618 | if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { 619 | $samples2 = $samples; 620 | ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); 621 | unshift @Data, join(";", reverse split(";", $stack)) . " $samples $samples2"; 622 | } else { 623 | unshift @Data, join(";", reverse split(";", $stack)) . " $samples"; 624 | } 625 | } else { 626 | unshift @Data, $line; 627 | } 628 | } 629 | 630 | if ($flamechart) { 631 | # In flame chart mode, just reverse the data so time moves from left to right. 632 | @SortedData = reverse @Data; 633 | } else { 634 | @SortedData = sort @Data; 635 | } 636 | 637 | # process and merge frames 638 | foreach (@SortedData) { 639 | chomp; 640 | # process: folded_stack count 641 | # eg: func_a;func_b;func_c 31 642 | my ($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); 643 | unless (defined $samples and defined $stack) { 644 | ++$ignored; 645 | next; 646 | } 647 | 648 | # there may be an extra samples column for differentials: 649 | my $samples2 = undef; 650 | if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { 651 | $samples2 = $samples; 652 | ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); 653 | } 654 | $delta = undef; 655 | if (defined $samples2) { 656 | $delta = $samples2 - $samples; 657 | $maxdelta = abs($delta) if abs($delta) > $maxdelta; 658 | } 659 | 660 | # for chain graphs, annotate waker frames with "_[w]", for later 661 | # coloring. This is a hack, but has a precedent ("_[k]" from perf). 662 | if ($colors eq "chain") { 663 | my @parts = split ";--;", $stack; 664 | my @newparts = (); 665 | $stack = shift @parts; 666 | $stack .= ";--;"; 667 | foreach my $part (@parts) { 668 | $part =~ s/;/_[w];/g; 669 | $part .= "_[w]"; 670 | push @newparts, $part; 671 | } 672 | $stack .= join ";--;", @parts; 673 | } 674 | 675 | # merge frames and populate %Node: 676 | $last = flow($last, [ '', split ";", $stack ], $time, $delta); 677 | 678 | if (defined $samples2) { 679 | $time += $samples2; 680 | } else { 681 | $time += $samples; 682 | } 683 | } 684 | flow($last, [], $time, $delta); 685 | 686 | warn "Ignored $ignored lines with invalid format\n" if $ignored; 687 | unless ($time) { 688 | warn "ERROR: No stack counts found\n"; 689 | my $im = SVG->new(); 690 | # emit an error message SVG, for tools automating flamegraph use 691 | my $imageheight = $fontsize * 5; 692 | $im->header($imagewidth, $imageheight); 693 | $im->stringTTF(undef, int($imagewidth / 2), $fontsize * 2, 694 | "ERROR: No valid input provided to flamegraph.pl."); 695 | print $im->svg; 696 | exit 2; 697 | } 698 | if ($timemax and $timemax < $time) { 699 | warn "Specified --total $timemax is less than actual total $time, so ignored\n" 700 | if $timemax/$time > 0.02; # only warn is significant (e.g., not rounding etc) 701 | undef $timemax; 702 | } 703 | $timemax ||= $time; 704 | 705 | my $widthpertime = ($imagewidth - 2 * $xpad) / $timemax; 706 | my $minwidth_time = $minwidth / $widthpertime; 707 | 708 | # prune blocks that are too narrow and determine max depth 709 | while (my ($id, $node) = each %Node) { 710 | my ($func, $depth, $etime) = split ";", $id; 711 | my $stime = $node->{stime}; 712 | die "missing start for $id" if not defined $stime; 713 | 714 | if (($etime-$stime) < $minwidth_time) { 715 | delete $Node{$id}; 716 | next; 717 | } 718 | $depthmax = $depth if $depth > $depthmax; 719 | } 720 | 721 | # draw canvas, and embed interactive JavaScript program 722 | my $imageheight = (($depthmax + 1) * $frameheight) + $ypad1 + $ypad2; 723 | $imageheight += $ypad3 if $subtitletext ne ""; 724 | my $titlesize = $fontsize + 5; 725 | my $im = SVG->new(); 726 | my ($black, $vdgrey, $dgrey) = ( 727 | $im->colorAllocate(0, 0, 0), 728 | $im->colorAllocate(160, 160, 160), 729 | $im->colorAllocate(200, 200, 200), 730 | ); 731 | $im->header($imagewidth, $imageheight); 732 | my $inc = < 734 | 735 | 736 | 737 | 738 | 739 | 750 | 1151 | INC 1152 | $im->include($inc); 1153 | $im->filledRectangle(0, 0, $imagewidth, $imageheight, 'url(#background)'); 1154 | $im->stringTTF("title", int($imagewidth / 2), $fontsize * 2, $titletext); 1155 | $im->stringTTF("subtitle", int($imagewidth / 2), $fontsize * 4, $subtitletext) if $subtitletext ne ""; 1156 | $im->stringTTF("details", $xpad, $imageheight - ($ypad2 / 2), " "); 1157 | $im->stringTTF("unzoom", $xpad, $fontsize * 2, "Reset Zoom", 'class="hide"'); 1158 | $im->stringTTF("search", $imagewidth - $xpad - 100, $fontsize * 2, "Search"); 1159 | $im->stringTTF("ignorecase", $imagewidth - $xpad - 16, $fontsize * 2, "ic"); 1160 | $im->stringTTF("matched", $imagewidth - $xpad - 100, $imageheight - ($ypad2 / 2), " "); 1161 | 1162 | if ($palette) { 1163 | read_palette(); 1164 | } 1165 | 1166 | # draw frames 1167 | $im->group_start({id => "frames"}); 1168 | while (my ($id, $node) = each %Node) { 1169 | my ($func, $depth, $etime) = split ";", $id; 1170 | my $stime = $node->{stime}; 1171 | my $delta = $node->{delta}; 1172 | 1173 | $etime = $timemax if $func eq "" and $depth == 0; 1174 | 1175 | my $x1 = $xpad + $stime * $widthpertime; 1176 | my $x2 = $xpad + $etime * $widthpertime; 1177 | my ($y1, $y2); 1178 | unless ($inverted) { 1179 | $y1 = $imageheight - $ypad2 - ($depth + 1) * $frameheight + $framepad; 1180 | $y2 = $imageheight - $ypad2 - $depth * $frameheight; 1181 | } else { 1182 | $y1 = $ypad1 + $depth * $frameheight; 1183 | $y2 = $ypad1 + ($depth + 1) * $frameheight - $framepad; 1184 | } 1185 | 1186 | my $samples = sprintf "%.0f", ($etime - $stime) * $factor; 1187 | (my $samples_txt = $samples) # add commas per perlfaq5 1188 | =~ s/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/$1,/g; 1189 | 1190 | my $info; 1191 | if ($func eq "" and $depth == 0) { 1192 | $info = "all ($samples_txt $countname, 100%)"; 1193 | } else { 1194 | my $pct = sprintf "%.2f", ((100 * $samples) / ($timemax * $factor)); 1195 | my $escaped_func = $func; 1196 | # clean up SVG breaking characters: 1197 | $escaped_func =~ s/&/&/g; 1198 | $escaped_func =~ s//>/g; 1200 | $escaped_func =~ s/"/"/g; 1201 | $escaped_func =~ s/_\[[kwij]\]$//; # strip any annotation 1202 | unless (defined $delta) { 1203 | $info = "$escaped_func ($samples_txt $countname, $pct%)"; 1204 | } else { 1205 | my $d = $negate ? -$delta : $delta; 1206 | my $deltapct = sprintf "%.2f", ((100 * $d) / ($timemax * $factor)); 1207 | $deltapct = $d > 0 ? "+$deltapct" : $deltapct; 1208 | $info = "$escaped_func ($samples_txt $countname, $pct%; $deltapct%)"; 1209 | } 1210 | } 1211 | 1212 | my $nameattr = { %{ $nameattr{$func}||{} } }; # shallow clone 1213 | $nameattr->{title} ||= $info; 1214 | $im->group_start($nameattr); 1215 | 1216 | my $color; 1217 | if ($func eq "--") { 1218 | $color = $vdgrey; 1219 | } elsif ($func eq "-") { 1220 | $color = $dgrey; 1221 | } elsif (defined $delta) { 1222 | $color = color_scale($delta, $maxdelta); 1223 | } elsif ($palette) { 1224 | $color = color_map($colors, $func); 1225 | } else { 1226 | $color = color($colors, $hash, $func); 1227 | } 1228 | $im->filledRectangle($x1, $y1, $x2, $y2, $color, 'rx="2" ry="2"'); 1229 | 1230 | my $chars = int( ($x2 - $x1) / ($fontsize * $fontwidth)); 1231 | my $text = ""; 1232 | if ($chars >= 3) { # room for one char plus two dots 1233 | $func =~ s/_\[[kwij]\]$//; # strip any annotation 1234 | $text = substr $func, 0, $chars; 1235 | substr($text, -2, 2) = ".." if $chars < length $func; 1236 | $text =~ s/&/&/g; 1237 | $text =~ s//>/g; 1239 | } 1240 | $im->stringTTF(undef, $x1 + 3, 3 + ($y1 + $y2) / 2, $text); 1241 | 1242 | $im->group_end($nameattr); 1243 | } 1244 | $im->group_end(); 1245 | 1246 | print $im->svg; 1247 | 1248 | if ($palette) { 1249 | write_palette(); 1250 | } 1251 | 1252 | # vim: ts=8 sts=8 sw=8 noexpandtab 1253 | -------------------------------------------------------------------------------- /tools/plua.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/binary" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "math" 9 | "os" 10 | "strings" 11 | ) 12 | 13 | const ( 14 | MAX_FUNC_NAME_SIZE = 127 15 | MAX_STACK_SIZE = 64 16 | ) 17 | 18 | type CallStack struct { 19 | count int 20 | deps int 21 | stacks []int 22 | } 23 | 24 | type FileData struct { 25 | str2id map[string]int 26 | id2str map[int]string 27 | callstack []CallStack 28 | } 29 | 30 | func main() { 31 | input := flag.String("i", "", "input file") 32 | pprof := flag.String("pprof", "", "gen pprof symbolized-profiles") 33 | 34 | flag.Parse() 35 | 36 | if len(*input) == 0 { 37 | flag.Usage() 38 | os.Exit(1) 39 | } 40 | 41 | filedata, ok := parse(*input) 42 | if !ok { 43 | os.Exit(1) 44 | } 45 | 46 | if *pprof != "" { 47 | showpprof(filedata, *pprof) 48 | } 49 | } 50 | 51 | func parse(filename string) (*FileData, bool) { 52 | 53 | data, err := ioutil.ReadFile(filename) 54 | if err != nil { 55 | fmt.Printf("ReadFile fail %v\n", err) 56 | return nil, false 57 | } 58 | 59 | if len(data) < 4 { 60 | fmt.Printf("data error too small\n") 61 | return nil, false 62 | } 63 | 64 | namemaplen := int(binary.LittleEndian.Uint32(data[len(data)-4 : len(data)])) 65 | if namemaplen < 0 { 66 | fmt.Printf("name map len fail %v\n", namemaplen) 67 | return nil, false 68 | } 69 | 70 | str2id := make(map[string]int) 71 | id2str := make(map[int]string) 72 | 73 | namenum := 0 74 | end := len(data) - 4 75 | for i := 0; i < len(data) && namenum < namemaplen; i++ { 76 | start := end - 4 77 | if start < 0 || end < 0 { 78 | fmt.Printf("get id fail %v %v\n", start, end) 79 | return nil, false 80 | } 81 | id := int(binary.LittleEndian.Uint32(data[start:end])) 82 | end -= 4 83 | 84 | start = end - 4 85 | if start < 0 || end < 0 { 86 | fmt.Printf("get name len fail %v %v\n", start, end) 87 | return nil, false 88 | } 89 | namelen := int(binary.LittleEndian.Uint32(data[start:end])) 90 | end -= 4 91 | 92 | if namelen <= 0 || namelen > MAX_FUNC_NAME_SIZE { 93 | fmt.Printf("name len error %v\n", namelen) 94 | return nil, false 95 | } 96 | 97 | start = end - namelen 98 | str := string(data[start:end]) 99 | end -= namelen 100 | 101 | str2id[str] = id 102 | id2str[id] = str 103 | 104 | namenum++ 105 | } 106 | 107 | callstack := make([]CallStack, 0) 108 | for end > 0 { 109 | stacks := make([]int, MAX_STACK_SIZE) 110 | for i := 0; i < MAX_STACK_SIZE; i++ { 111 | start := end - 4 112 | stack := int(binary.LittleEndian.Uint32(data[start:end])) 113 | end -= 4 114 | stacks[MAX_STACK_SIZE-i-1] = stack 115 | } 116 | 117 | start := end - 4 118 | deps := int(binary.LittleEndian.Uint32(data[start:end])) 119 | end -= 4 120 | 121 | if deps <= 0 || deps > MAX_STACK_SIZE { 122 | fmt.Printf("deps error %v\n", deps) 123 | return nil, false 124 | } 125 | 126 | start = end - 4 127 | count := int(binary.LittleEndian.Uint32(data[start:end])) 128 | end -= 4 129 | 130 | if count <= 0 { 131 | fmt.Printf("count error %v\n", count) 132 | return nil, false 133 | } 134 | 135 | cs := CallStack{ 136 | count: count, 137 | deps: deps, 138 | stacks: stacks[0:deps], 139 | } 140 | 141 | callstack = append(callstack, cs) 142 | } 143 | 144 | filedata := &FileData{ 145 | str2id: str2id, 146 | id2str: id2str, 147 | callstack: callstack, 148 | } 149 | 150 | return filedata, true 151 | } 152 | 153 | func showpprof(filedata *FileData, filename string) { 154 | 155 | var output []byte 156 | 157 | output = append(output, []byte("--- symbol\n")...) 158 | output = append(output, []byte("binary=pLua\n")...) 159 | 160 | for id, str := range filedata.id2str { 161 | name := strings.Replace(str, "<", "'", -1) 162 | name = strings.Replace(name, ">", "'", -1) 163 | name = strings.Replace(name, "\"", "\\\"", -1) 164 | name = strings.ToValidUTF8(name, "?") 165 | tmp := fmt.Sprintf("0x%016x %s\n", id+0xFF000000, name) 166 | output = append(output, []byte(tmp)...) 167 | } 168 | 169 | output = append(output, []byte("---\n")...) 170 | output = append(output, []byte("--- profile\n")...) 171 | 172 | pack32 := func(v uint32) { 173 | var buff [4]byte 174 | binary.LittleEndian.PutUint32(buff[:], v) 175 | output = append(output, buff[:]...) 176 | } 177 | 178 | // print header (64-bit style) 179 | // (zero) (header-size) (version) (sample-period) (zero) 180 | header := []byte{0, 0, 3, 0, 0, 0, 1, 0, 0, 0} 181 | for _, h := range header { 182 | pack32(uint32(h)) 183 | } 184 | 185 | total := 0 186 | 187 | for _, cs := range filedata.callstack { 188 | pack32(uint32(cs.count)) 189 | pack32(uint32(cs.count / int(math.Pow(2, 32)))) 190 | total += cs.count 191 | pack32(uint32(cs.deps)) 192 | pack32(uint32(cs.deps / int(math.Pow(2, 32)))) 193 | for i := len(cs.stacks) - 1; i >= 0; i-- { 194 | csp := cs.stacks[i] 195 | pack32(uint32(csp + 0xFF000000)) 196 | pack32(uint32(0)) 197 | } 198 | } 199 | 200 | f, err := os.Create(filename) 201 | if err != nil { 202 | fmt.Println(err) 203 | os.Exit(1) 204 | } 205 | defer f.Close() 206 | f.Write(output) 207 | 208 | fmt.Printf("total sample %v\n", total) 209 | } 210 | -------------------------------------------------------------------------------- /tools/png.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "flag" 6 | "fmt" 7 | "github.com/goccy/go-graphviz" 8 | "io/ioutil" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | 14 | input := flag.String("i", "", "input file") 15 | png := flag.String("png", "", "gen png file") 16 | 17 | flag.Parse() 18 | 19 | if len(*input) == 0 { 20 | flag.Usage() 21 | os.Exit(1) 22 | } 23 | 24 | if *png != "" || *input != "" { 25 | showpng(*input, *png) 26 | } 27 | } 28 | 29 | func showpng(dot string, png string) { 30 | inputfile, err := os.Open(dot) 31 | if err != nil { 32 | fmt.Println(err) 33 | os.Exit(1) 34 | } 35 | 36 | inputdata, err := ioutil.ReadAll(inputfile) 37 | if err != nil { 38 | fmt.Println(err) 39 | os.Exit(1) 40 | } 41 | 42 | graph, err := graphviz.ParseBytes(inputdata) 43 | if err != nil { 44 | fmt.Println(err) 45 | os.Exit(1) 46 | } 47 | 48 | if png != "" { 49 | g := graphviz.New() 50 | 51 | var buf bytes.Buffer 52 | if err := g.Render(graph, graphviz.PNG, &buf); err != nil { 53 | fmt.Println(err) 54 | os.Exit(1) 55 | } 56 | 57 | _, err = g.RenderImage(graph) 58 | if err != nil { 59 | fmt.Println(err) 60 | os.Exit(1) 61 | } 62 | 63 | if err := g.RenderFilename(graph, graphviz.PNG, png); err != nil { 64 | fmt.Println(err) 65 | os.Exit(1) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /tools/show.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | INPUT=$1 4 | 5 | for NAME in $(ls -l $INPUT/*.pro | awk '{print $9}' | sed "s/.pro$//g"); do 6 | 7 | echo "show " $NAME 8 | ./plua -i $NAME.pro -pprof $NAME.prof 9 | if [ $? -ne 0 ]; then 10 | echo "$NAME pprof fail" 11 | exit 1 12 | fi 13 | ./pprof --dot $NAME.prof > $NAME.dot 14 | if [ $? -ne 0 ]; then 15 | echo "$NAME dot fail" 16 | exit 1 17 | fi 18 | ./png -i $NAME.dot -png $NAME.png 19 | if [ $? -ne 0 ]; then 20 | echo "$NAME png fail" 21 | exit 1 22 | fi 23 | ./pprof --collapsed $NAME.prof > $NAME.fl 24 | if [ $? -ne 0 ]; then 25 | echo "$NAME collapsed fail" 26 | exit 1 27 | fi 28 | ./flamegraph.pl $NAME.fl > $NAME.svg 29 | if [ $? -ne 0 ]; then 30 | echo "$NAME svg fail" 31 | exit 1 32 | fi 33 | echo $NAME "ok" 34 | done 35 | 36 | echo "done!" 37 | --------------------------------------------------------------------------------