├── .gitignore ├── profile.h ├── makefile ├── imap.h ├── README.md ├── profile.lua ├── rdtsc.h ├── imap.c └── profile.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.s 3 | test_imap 4 | test_imap.c 5 | old_profile.lua 6 | *.out 7 | test.lua 8 | *.dSYM -------------------------------------------------------------------------------- /profile.h: -------------------------------------------------------------------------------- 1 | #ifndef _PROFILE_H_ 2 | #define _PROFILE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #define prealloc realloc 8 | #define pmalloc malloc 9 | #define pfree free 10 | #define pcalloc calloc 11 | 12 | int luaopen_profile_c(lua_State* L); 13 | 14 | #endif -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | 2 | all: macosx 3 | 4 | macosx: 5 | clang -undefined dynamic_lookup --shared -Wall -DUSE_RDTSC -g -O2 \ 6 | -o profile.so \ 7 | imap.c profile.c 8 | 9 | linux: 10 | gcc -shared -fPIC -Wall -g -O2 -DUSE_RDTSC \ 11 | -o profile.so \ 12 | imap.c profile.c 13 | 14 | clean: 15 | rm -rf profile.so 16 | 17 | .PHONY : all clean macosx linux -------------------------------------------------------------------------------- /imap.h: -------------------------------------------------------------------------------- 1 | #ifndef _IMAP_H_ 2 | #define _IMAP_H_ 3 | 4 | 5 | #include 6 | #include 7 | struct imap_context; 8 | 9 | 10 | struct imap_context* imap_create(); 11 | void imap_free(struct imap_context* imap); 12 | 13 | // the value is no-null point 14 | void imap_set(struct imap_context* imap, uint64_t key, void* value); 15 | 16 | void* imap_remove(struct imap_context* imap, uint64_t key); 17 | void* imap_query(struct imap_context* imap, uint64_t key); 18 | 19 | typedef void(*observer)(uint64_t key, void* value, void* ud); 20 | void imap_dump(struct imap_context* imap, observer observer_cb, void* ud); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lua profile lib 2 | 3 | profile c and lua fucntion and support coroutine yield. 4 | 5 | ~~~.lua 6 | local profile = require "profile" 7 | 8 | profile.start() 9 | 10 | -- your code 11 | 12 | profile.dstop(32) -- dump top 32 call info 13 | -- output example 14 | --[[ 15 | 16 | ------- dump profile ------- 17 | [1] userdata: 0x7fc7e0c09990 name:loop file:[L]@test.lua:48 count:3 total:0.272042s ave:0.090681s percent:55.05% 18 | [2] userdata: 0x7fc7e0c09970 name:call_func file:[L]@test.lua:37 count:3000000 total:0.222086s ave:0.000000s percent:44.95% 19 | [3] userdata: 0x7fc7e0c099e0 name:foo2 file:[L]@test.lua:62 count:1 total:0.000000s ave:0.000000s percent:0% 20 | [4] userdata: 0x7fc7e0c09a30 name:foo file:[L]@test.lua:68 count:1 total:0.000000s ave:0.000000s percent:0% 21 | 22 | ]] 23 | 24 | ~~~ 25 | -------------------------------------------------------------------------------- /profile.lua: -------------------------------------------------------------------------------- 1 | local c = require "profile.c" 2 | c.init() 3 | local mark = c.mark 4 | 5 | local M = { 6 | start = c.start, 7 | stop = c.stop, 8 | } 9 | 10 | local old_co_create = coroutine.create 11 | local old_co_wrap = coroutine.wrap 12 | 13 | 14 | function coroutine.create(f) 15 | return old_co_create(function (...) 16 | mark() 17 | return f(...) 18 | end) 19 | end 20 | 21 | 22 | function coroutine.wrap(f) 23 | return old_co_wrap(function (...) 24 | mark() 25 | return f(...) 26 | end) 27 | end 28 | 29 | 30 | function M.dump(records) 31 | local ret = {"------- dump profile -------"} 32 | for i,v in ipairs(records) do 33 | local s = string.format("[%d] %s name:%s file:[%s]%s:%d count:%d total:%fs ave:%fs percent:%.4g%%", 34 | i, v.point, v.name, v.flag, v.source, v.line, v.count, v.all_cost, v.ave_cost, v.percent*100) 35 | ret[#ret+1] = s 36 | end 37 | return table.concat(ret, "\n") 38 | end 39 | 40 | 41 | function M.dstop(count) 42 | local records = c.stop(count) 43 | local s = M.dump(records) 44 | print(s) 45 | end 46 | 47 | 48 | return M 49 | -------------------------------------------------------------------------------- /rdtsc.h: -------------------------------------------------------------------------------- 1 | #ifndef __RDTSC_H_DEFINED__ 2 | #define __RDTSC_H_DEFINED__ 3 | 4 | 5 | #if defined(__i386__) 6 | 7 | static __inline__ unsigned long long rdtsc(void) 8 | { 9 | unsigned long long int x; 10 | __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); 11 | return x; 12 | } 13 | #elif defined(__x86_64__) 14 | 15 | static __inline__ unsigned long long rdtsc(void) 16 | { 17 | unsigned hi, lo; 18 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 19 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 20 | } 21 | 22 | #elif defined(__powerpc__) 23 | 24 | static __inline__ unsigned long long rdtsc(void) 25 | { 26 | unsigned long long int result=0; 27 | unsigned long int upper, lower,tmp; 28 | __asm__ volatile( 29 | "0: \n" 30 | "\tmftbu %0 \n" 31 | "\tmftb %1 \n" 32 | "\tmftbu %2 \n" 33 | "\tcmpw %2,%0 \n" 34 | "\tbne 0b \n" 35 | : "=r"(upper),"=r"(lower),"=r"(tmp) 36 | ); 37 | result = upper; 38 | result = result<<32; 39 | result = result|lower; 40 | 41 | return(result); 42 | } 43 | 44 | #else 45 | 46 | #error "No tick counter is available!" 47 | 48 | #endif 49 | 50 | 51 | /* $RCSfile: $ $Author: kazutomo $ 52 | * $Revision: 1.6 $ $Date: 2005/04/13 18:49:58 $ 53 | */ 54 | 55 | #endif -------------------------------------------------------------------------------- /imap.c: -------------------------------------------------------------------------------- 1 | #include "imap.h" 2 | #include "profile.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum imap_status { 9 | IS_NONE, 10 | IS_EXIST, 11 | IS_REMOVE, 12 | }; 13 | 14 | struct imap_slot { 15 | uint64_t key; 16 | void* value; 17 | enum imap_status status; 18 | struct imap_slot* next; 19 | }; 20 | 21 | struct imap_context { 22 | struct imap_slot* slots; 23 | size_t size; 24 | size_t count; 25 | struct imap_slot* lastfree; 26 | }; 27 | 28 | #define DEFAULT_IMAP_SLOT_SIZE 8 29 | 30 | struct imap_context * 31 | imap_create() { 32 | struct imap_context* imap = (struct imap_context*)pmalloc(sizeof(*imap)); 33 | imap->slots = (struct imap_slot*)pcalloc(DEFAULT_IMAP_SLOT_SIZE, sizeof(struct imap_slot)); 34 | imap->size = DEFAULT_IMAP_SLOT_SIZE; 35 | imap->count = 0; 36 | imap->lastfree = imap->slots + imap->size; 37 | return imap; 38 | } 39 | 40 | 41 | void 42 | imap_free(struct imap_context* imap) { 43 | pfree(imap->slots); 44 | pfree(imap); 45 | } 46 | 47 | 48 | static inline uint64_t 49 | _imap_hash(struct imap_context* imap, uint64_t key) { 50 | uint64_t hash = key % (uint64_t)(imap->size); 51 | return hash; 52 | } 53 | 54 | 55 | static void 56 | _imap_rehash(struct imap_context* imap) { 57 | size_t new_sz = DEFAULT_IMAP_SLOT_SIZE; 58 | struct imap_slot* old_slots = imap->slots; 59 | size_t old_count = imap->count; 60 | size_t old_size = imap->size; 61 | while(new_sz <= imap->count) { 62 | new_sz *= 2; 63 | } 64 | 65 | struct imap_slot* new_slots = (struct imap_slot*)pcalloc(new_sz, sizeof(struct imap_slot)); 66 | imap->lastfree = new_slots + new_sz; 67 | imap->size = new_sz; 68 | imap->slots = new_slots; 69 | imap->count = 0; 70 | 71 | size_t i=0; 72 | for(i=0; istatus; 75 | if(status == IS_EXIST) { 76 | imap_set(imap, p->key, p->value); 77 | } 78 | } 79 | 80 | assert(old_count == imap->count); 81 | pfree(old_slots); 82 | } 83 | 84 | 85 | static struct imap_slot * 86 | _imap_query(struct imap_context* imap, uint64_t key) { 87 | uint64_t hash = _imap_hash(imap, key); 88 | struct imap_slot* p = &(imap->slots[hash]); 89 | if(p->status != IS_NONE) { 90 | while(p) { 91 | if(p->key == key && p->status == IS_EXIST) { 92 | return p; 93 | } 94 | p = p->next; 95 | } 96 | } 97 | return NULL; 98 | } 99 | 100 | 101 | void * 102 | imap_query(struct imap_context* imap, uint64_t key) { 103 | struct imap_slot* p = _imap_query(imap, key); 104 | if(p) { 105 | return p->value; 106 | } 107 | return NULL; 108 | } 109 | 110 | 111 | 112 | static struct imap_slot * 113 | _imap_getfree(struct imap_context* imap) { 114 | while(imap->lastfree > imap->slots) { 115 | imap->lastfree--; 116 | if(imap->lastfree->status == IS_NONE) { 117 | return imap->lastfree; 118 | } 119 | } 120 | return NULL; 121 | } 122 | 123 | 124 | 125 | void 126 | imap_set(struct imap_context* imap, uint64_t key, void* value) { 127 | assert(value); 128 | uint64_t hash = _imap_hash(imap, key); 129 | struct imap_slot* p = &(imap->slots[hash]); 130 | if(p->status == IS_EXIST) { 131 | struct imap_slot* np = p; 132 | while(np) { 133 | if(np->key == key && np->status == IS_EXIST) { 134 | np->value = value; 135 | return; 136 | } 137 | np = np->next; 138 | } 139 | 140 | np = _imap_getfree(imap); 141 | if(np == NULL) { 142 | _imap_rehash(imap); 143 | imap_set(imap, key, value); 144 | return; 145 | } 146 | 147 | uint64_t main_hash = _imap_hash(imap, p->key); 148 | np->next = p->next; 149 | p->next = np; 150 | if(main_hash == hash) { 151 | p = np; 152 | }else { 153 | np->key = p->key; 154 | np->value = p->value; 155 | np->status = IS_EXIST; 156 | } 157 | } 158 | 159 | imap->count++; 160 | p->status = IS_EXIST; 161 | p->key = key; 162 | p->value = value; 163 | } 164 | 165 | 166 | void * 167 | imap_remove(struct imap_context* imap, uint64_t key) { 168 | struct imap_slot* p = _imap_query(imap, key); 169 | if(p) { 170 | imap->count--; 171 | p->status = IS_REMOVE; 172 | return p->value; 173 | } 174 | return NULL; 175 | } 176 | 177 | 178 | void 179 | imap_dump(struct imap_context* imap, observer observer_cb, void* ud) { 180 | size_t i=0; 181 | for(i=0; isize; i++) { 182 | struct imap_slot* v = &imap->slots[i]; 183 | if(v->status == IS_EXIST) { 184 | observer_cb(v->key, v->value, ud); 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /profile.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #include "profile.h" 11 | #include "imap.h" 12 | 13 | #define get_item(context, idx) &((context)->record_pool.pool[idx]) 14 | #define cap_item(context) ((context)->record_pool.cap) 15 | 16 | #define NANOSEC 1000000000 17 | #define MICROSEC 1000000 18 | #define MILLISEC 1000 19 | #define MAX_SOURCE_LEN 128 20 | #define MAX_NAME_LEN 32 21 | #define MAX_CALL_SIZE 1024 22 | #define MAX_CI_SIZE 256 23 | #define DEFAULT_POOL_ITEM_COUNT 64 24 | 25 | struct record_item { 26 | const void* point; 27 | int count; 28 | char source[MAX_SOURCE_LEN]; 29 | char name[MAX_NAME_LEN]; 30 | int line; 31 | char flag; 32 | uint64_t all_cost; 33 | double ave_cost; 34 | double percent; 35 | }; 36 | 37 | struct call_frame { 38 | const void* point; 39 | const char* source; 40 | const char* name; 41 | bool tail; 42 | char flag; 43 | int line; 44 | uint64_t record_time; 45 | uint64_t call_time; 46 | uint64_t ret_time; 47 | uint64_t sub_cost; 48 | uint64_t real_cost; 49 | }; 50 | 51 | 52 | struct call_state { 53 | int top; 54 | double leave_time; 55 | double enter_time; 56 | struct call_frame call_list[0]; 57 | }; 58 | 59 | struct call_info { 60 | struct call_state* cs; 61 | lua_State* co; 62 | }; 63 | 64 | struct profile_context { 65 | struct { 66 | struct record_item* pool; 67 | size_t cap; 68 | size_t sz; 69 | } record_pool; 70 | struct imap_context* imap; 71 | 72 | bool start; 73 | struct imap_context* co_map; 74 | 75 | int ci_top; 76 | struct call_info ci_list[0]; 77 | }; 78 | 79 | static const char KEY = 'k'; 80 | 81 | static struct profile_context * 82 | profile_create() { 83 | struct profile_context* context = (struct profile_context*)pmalloc( 84 | sizeof(struct profile_context) + sizeof(struct call_info)*MAX_CI_SIZE); 85 | 86 | context->start = false; 87 | context->imap = imap_create(); 88 | context->co_map = imap_create(); 89 | context->ci_top = 0; 90 | context->record_pool.pool = (struct record_item*)pmalloc(sizeof(struct record_item)*DEFAULT_POOL_ITEM_COUNT); 91 | context->record_pool.sz = DEFAULT_POOL_ITEM_COUNT; 92 | context->record_pool.cap = 0; 93 | return context; 94 | } 95 | 96 | static void 97 | _ob_free_call_state(uint64_t key, void* value, void* ud) { 98 | pfree(value); 99 | } 100 | 101 | static void 102 | profile_free(struct profile_context* context) { 103 | pfree(context->record_pool.pool); 104 | imap_free(context->imap); 105 | 106 | imap_dump(context->co_map, _ob_free_call_state, NULL); 107 | imap_free(context->co_map); 108 | pfree(context); 109 | } 110 | 111 | static void 112 | _ob_reset_call_state(uint64_t key, void* value, void* ud) { 113 | struct call_state* cs = (struct call_state*)value; 114 | cs->top = 0; 115 | } 116 | 117 | 118 | static void 119 | profile_reset(struct profile_context* context) { 120 | context->record_pool.cap = 0; 121 | context->ci_top = 0; 122 | imap_dump(context->co_map, _ob_reset_call_state, NULL); 123 | imap_free(context->imap); 124 | context->imap = imap_create(); 125 | } 126 | 127 | 128 | static inline struct call_info * 129 | push_callinfo(struct profile_context* context) { 130 | if(context->ci_top >= MAX_CI_SIZE) { 131 | assert(false); 132 | } 133 | return &context->ci_list[context->ci_top++]; 134 | } 135 | 136 | 137 | static inline struct call_info * 138 | pop_callinfo(struct profile_context* context) { 139 | if(context->ci_top<=0) { 140 | assert(false); 141 | } 142 | return &context->ci_list[--context->ci_top]; 143 | } 144 | 145 | 146 | static struct call_state * 147 | get_call_state(struct profile_context* context, lua_State* co, int* co_status) { 148 | int ci_top = context->ci_top; 149 | struct call_info* cur_co_info = NULL; 150 | struct call_info* pre_co_info = NULL; 151 | if(ci_top > 0) { 152 | cur_co_info = &context->ci_list[ci_top-1]; 153 | } 154 | if(ci_top >= 2) { 155 | pre_co_info = &context->ci_list[ci_top-2]; 156 | } 157 | if(cur_co_info && cur_co_info->co == co) { 158 | *co_status = 0; 159 | return cur_co_info->cs; 160 | } 161 | 162 | uint64_t key = (uint64_t)((uintptr_t)co); 163 | struct call_state* cs = imap_query(context->co_map, key); 164 | if(cs == NULL) { 165 | cs = (struct call_state*)pmalloc(sizeof(struct call_state) + sizeof(struct call_frame)*MAX_CALL_SIZE); 166 | cs->top = 0; 167 | cs->enter_time = 0.0; 168 | cs->leave_time = 0.0; 169 | imap_set(context->co_map, key, cs); 170 | } 171 | 172 | if(pre_co_info && cs == pre_co_info->cs) { // pop co 173 | *co_status = -1; 174 | }else { // push co 175 | struct call_info* ci = push_callinfo(context); 176 | ci->cs = cs; 177 | ci->co = co; 178 | *co_status = 1; 179 | } 180 | 181 | return cs; 182 | } 183 | 184 | 185 | static inline struct call_frame * 186 | push_callframe(struct call_state* cs) { 187 | if(cs->top >= MAX_CALL_SIZE) { 188 | assert(false); 189 | } 190 | return &cs->call_list[cs->top++]; 191 | } 192 | 193 | static inline struct call_frame * 194 | pop_callframe(struct call_state* cs) { 195 | if(cs->top<=0) { 196 | assert(false); 197 | } 198 | return &cs->call_list[--cs->top]; 199 | } 200 | 201 | static inline struct call_frame * 202 | cur_callframe(struct call_state* cs) { 203 | if(cs->top<=0) { 204 | return NULL; 205 | } 206 | 207 | uint64_t idx = cs->top-1; 208 | return &cs->call_list[idx]; 209 | } 210 | 211 | static struct record_item * 212 | record_item_new(struct profile_context* context) { 213 | if(context->record_pool.cap >= context->record_pool.sz) { 214 | size_t new_sz = context->record_pool.sz * 2; 215 | struct record_item* new_pool = (struct record_item*)prealloc(context->record_pool.pool, new_sz*sizeof(struct record_item)); 216 | assert(new_pool); 217 | context->record_pool.pool = new_pool; 218 | context->record_pool.sz = new_sz; 219 | } 220 | 221 | return &context->record_pool.pool[context->record_pool.cap++]; 222 | } 223 | 224 | 225 | static void 226 | record_item_add(struct profile_context* context, struct call_frame* frame) { 227 | uint64_t key = (uint64_t)((uintptr_t)frame->point); 228 | uint64_t record_pos = (uint64_t)((uintptr_t)imap_query(context->imap, key)); 229 | struct record_item* item = NULL; 230 | 231 | if(record_pos==0) { 232 | item = record_item_new(context); 233 | size_t pos = context->record_pool.cap; 234 | item->point = frame->point; 235 | item->count = 0; 236 | item->flag = frame->flag; 237 | strncpy(item->source, frame->source, sizeof(item->source)); 238 | item->source[MAX_SOURCE_LEN-1] = '\0'; // padding zero terimal 239 | strncpy(item->name, frame->name, sizeof(item->name)); 240 | item->name[MAX_NAME_LEN-1] = '\0'; // padding zero terimal 241 | item->line = frame->line; 242 | item->all_cost = 0; 243 | item->ave_cost = 0.0; 244 | item->percent = 0.0; 245 | imap_set(context->imap, key, (void*)(pos)); 246 | } else { 247 | item = get_item(context, record_pos-1); 248 | } 249 | 250 | item->count++; 251 | item->all_cost += frame->real_cost; 252 | } 253 | 254 | 255 | 256 | #ifdef USE_RDTSC 257 | #include "rdtsc.h" 258 | static inline uint64_t 259 | gettime() { 260 | return rdtsc(); 261 | } 262 | 263 | static inline double 264 | realtime(uint64_t t) { 265 | return (double) t / (2000000000); 266 | } 267 | #else 268 | #ifdef __EMSCRIPTEN__ 269 | #include 270 | static inline uint64_t 271 | gettime() { 272 | uint64_t n = (uint64_t)emscripten_get_now(); 273 | return n; 274 | } 275 | 276 | static inline double 277 | realtime(uint64_t t) { 278 | return (double)t / MILLISEC; 279 | } 280 | #else 281 | static inline uint64_t 282 | gettime() { 283 | struct timespec ti; 284 | // clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ti); 285 | // clock_gettime(CLOCK_MONOTONIC, &ti); 286 | clock_gettime(CLOCK_REALTIME, &ti); // would be faster 287 | 288 | long sec = ti.tv_sec & 0xffff; 289 | long nsec = ti.tv_nsec; 290 | 291 | return sec * NANOSEC + nsec; 292 | } 293 | 294 | static inline double 295 | realtime(uint64_t t) { 296 | return (double)t / NANOSEC; 297 | } 298 | #endif 299 | #endif 300 | 301 | 302 | static inline struct profile_context * 303 | _get_profile(lua_State* L) { 304 | lua_rawgetp(L, LUA_REGISTRYINDEX, (void *)&KEY); 305 | struct profile_context* addr = (struct profile_context*)lua_touserdata(L, -1); 306 | lua_pop(L, 1); 307 | return addr; 308 | } 309 | 310 | 311 | static void 312 | _resolve_hook(lua_State* L, lua_Debug* arv) { 313 | uint64_t cur_time = gettime(); 314 | struct profile_context* context = _get_profile(L); 315 | if(!context->start) { 316 | return; 317 | } 318 | 319 | int event = arv->event; 320 | lua_Debug ar; 321 | int ret = lua_getstack(L, 0, &ar); 322 | const void* point = NULL; 323 | const char* source = NULL; 324 | const char* name = NULL; 325 | char flag = 'L'; 326 | int line = -1; 327 | if(!ret) { 328 | return; 329 | } 330 | 331 | int co_status = 0; 332 | struct call_state* cs = get_call_state(context, L, &co_status); 333 | double co_cost = 0.0; 334 | if(co_status == 1) { 335 | cs->enter_time = cur_time; 336 | if(cs->leave_time > 0.0) { 337 | co_cost = cs->enter_time - cs->leave_time; 338 | assert(co_cost>=0.0); 339 | } 340 | 341 | }else if(co_status == -1) { 342 | struct call_info* ci = pop_callinfo(context); 343 | ci->cs->leave_time = cur_time; 344 | co_cost = ci->cs->leave_time - ci->cs->enter_time; 345 | assert(co_cost>=0.0); 346 | } 347 | 348 | #ifdef OPEN_DEBUG 349 | printf("hook L:%p ci_count:%d name:%s source:%s:%d event:%d\n", L, context->ci_top, name, source, line, event); 350 | #endif 351 | if(event == LUA_HOOKCALL || event == LUA_HOOKTAILCALL) { 352 | #ifdef USE_EXPORT_NAME 353 | lua_getinfo(L, "nSlf", &ar); 354 | name = ar.name; 355 | #else 356 | lua_getinfo(L, "Slf", &ar); 357 | #endif 358 | point = lua_topointer(L, -1); 359 | line = ar.linedefined; 360 | source = ar.source; 361 | if (ar.what[0] == 'C' && event == LUA_HOOKCALL) { 362 | lua_Debug ar2; 363 | int i=0; 364 | do { 365 | i++; 366 | ret = lua_getstack(L, i, &ar2); 367 | flag = 'C'; 368 | if(ret) { 369 | lua_getinfo(L, "Sl", &ar2); 370 | if(ar2.what[0] != 'C') { 371 | line = ar2.currentline; 372 | source = ar2.source; 373 | break; 374 | } 375 | } 376 | }while(ret); 377 | } 378 | 379 | struct call_frame* frame = push_callframe(cs); 380 | frame->point = point; 381 | frame->flag = flag; 382 | frame->tail = event == LUA_HOOKTAILCALL; 383 | frame->source = (source)?(source):("null"); 384 | frame->name = (name)?(name):("null"); 385 | frame->line = line; 386 | frame->record_time = cur_time; 387 | frame->sub_cost = 0; 388 | frame->call_time = gettime(); 389 | 390 | }else if(event == LUA_HOOKRET) { 391 | int len = cs->top; 392 | if(len <= 0) { 393 | return; 394 | } 395 | bool tail_call = false; 396 | do { 397 | struct call_frame* cur_frame = pop_callframe(cs); 398 | cur_frame->sub_cost += co_cost; 399 | uint64_t total_cost = cur_time - cur_frame->call_time; 400 | uint64_t real_cost = total_cost - cur_frame->sub_cost; 401 | cur_frame->ret_time = cur_time; 402 | cur_frame->real_cost = real_cost; 403 | record_item_add(context, cur_frame); 404 | struct call_frame* pre_frame = cur_callframe(cs); 405 | if(pre_frame) { 406 | tail_call = cur_frame->tail; 407 | cur_time = gettime(); 408 | uint64_t s = cur_time - cur_frame->record_time; 409 | pre_frame->sub_cost += s; 410 | }else { 411 | tail_call = false; 412 | } 413 | }while(tail_call); 414 | } 415 | } 416 | 417 | 418 | static int 419 | _lstart(lua_State* L) { 420 | struct profile_context* context = _get_profile(L); 421 | context->start = true; 422 | lua_sethook(L, _resolve_hook, LUA_MASKCALL | LUA_MASKRET, 0); 423 | return 0; 424 | } 425 | 426 | 427 | static int 428 | _lmark(lua_State* L) { 429 | struct profile_context* context = _get_profile(L); 430 | lua_State* co = lua_tothread(L, 1); 431 | if(co == NULL) { 432 | co = L; 433 | } 434 | if(context->start) { 435 | lua_sethook(co, _resolve_hook, LUA_MASKCALL | LUA_MASKRET, 0); 436 | } 437 | lua_pushboolean(L, context->start); 438 | return 1; 439 | } 440 | 441 | static int 442 | _lunmark(lua_State* L) { 443 | lua_State* co = lua_tothread(L, 1); 444 | if(co == NULL) { 445 | co = L; 446 | } 447 | lua_sethook(co, NULL, 0, 0); 448 | return 0; 449 | } 450 | 451 | 452 | struct dump_arg { 453 | int stage; 454 | struct profile_context* context; 455 | uint64_t total; 456 | 457 | int cap; 458 | struct record_item** records; 459 | }; 460 | 461 | static void 462 | _observer(uint64_t key, void* value, void* ud) { 463 | struct dump_arg* args = (struct dump_arg*)ud; 464 | size_t pos = (size_t)((uintptr_t)value); 465 | struct record_item* item = get_item(args->context, pos-1); 466 | 467 | if(args->stage == 0) { 468 | args->total += item->all_cost; 469 | item->ave_cost = realtime(item->all_cost) / item->count; 470 | }else if(args->stage == 1) { 471 | item->percent = (double)(item->all_cost) / (double)(args->total); 472 | args->records[args->cap++] = item; 473 | } 474 | } 475 | 476 | 477 | static int 478 | _compar(const void* v1, const void* v2) { 479 | struct record_item* a = *(struct record_item**)v1; 480 | struct record_item* b = *(struct record_item**)v2; 481 | signed long long f = b->all_cost - a->all_cost; 482 | return (f<0)?(-1):(1); 483 | } 484 | 485 | 486 | static void 487 | _item2table(lua_State* L, struct record_item* v) { 488 | char s[2] = {0}; 489 | lua_newtable(L); 490 | lua_pushlightuserdata(L, (void*)v->point); 491 | lua_setfield(L, -2, "point"); 492 | 493 | lua_pushstring(L, v->name); 494 | lua_setfield(L, -2, "name"); 495 | 496 | s[0] = v->flag; 497 | lua_pushstring(L, s); 498 | lua_setfield(L, -2, "flag"); 499 | 500 | lua_pushstring(L, v->source); 501 | lua_setfield(L, -2, "source"); 502 | 503 | lua_pushinteger(L, v->line); 504 | lua_setfield(L, -2, "line"); 505 | 506 | lua_pushinteger(L, v->count); 507 | lua_setfield(L, -2, "count"); 508 | 509 | lua_pushnumber(L, realtime(v->all_cost)); 510 | lua_setfield(L, -2, "all_cost"); 511 | 512 | lua_pushnumber(L, v->ave_cost); 513 | lua_setfield(L, -2, "ave_cost"); 514 | 515 | lua_pushnumber(L, v->percent); 516 | lua_setfield(L, -2, "percent"); 517 | } 518 | 519 | 520 | static void 521 | _ob_clear(uint64_t key, void* value, void* ud) { 522 | struct imap_context* co_map = (struct imap_context*)ud; 523 | struct call_state* cs = (struct call_state*)value; 524 | #ifdef OPEN_DEBUG 525 | int i; 526 | printf("---- lua_state:%llx ----\n", key); 527 | for(i=0; itop; i++) { 528 | struct call_frame* frame = &cs->call_list[i]; 529 | printf("[%d] name:%s source:%s:%d\n", i, frame->name, frame->source, frame->line); 530 | } 531 | #endif 532 | imap_remove(co_map, key); 533 | pfree(cs); 534 | } 535 | 536 | static void 537 | _clear_call_state(struct profile_context* context) { 538 | imap_dump(context->co_map, _ob_clear, context->co_map); 539 | } 540 | 541 | static void 542 | dump_record_items(lua_State *L, struct profile_context* context) { 543 | if (!context) { 544 | context = _get_profile(L); 545 | } 546 | 547 | size_t sz = context->record_pool.cap; 548 | size_t count = (size_t)luaL_optinteger(L, 1, sz); 549 | count = (count > sz)?(sz):(count); 550 | 551 | struct dump_arg arg; 552 | arg.context = context; 553 | arg.stage = 0; 554 | arg.total = 0.0; 555 | arg.cap = 0; 556 | arg.records = (struct record_item**)pmalloc(sz*sizeof(struct record_item*)); 557 | 558 | // calculate total and ave_cost 559 | imap_dump(context->imap, _observer, (void*)&arg); 560 | 561 | // calculate percent 562 | arg.stage = 1; 563 | imap_dump(context->imap, _observer, (void*)&arg); 564 | 565 | // sort record 566 | qsort((void*)arg.records, arg.cap, sizeof(struct record_item*), _compar); 567 | 568 | lua_newtable(L); 569 | int i=0; 570 | for(i=0; istart = false; 589 | // reset 590 | profile_reset(context); 591 | return 1; 592 | } 593 | 594 | 595 | static int 596 | _ldump(lua_State* L) { 597 | dump_record_items(L, NULL); 598 | return 1; 599 | } 600 | 601 | static int 602 | _linit(lua_State* L) { 603 | struct profile_context* context = _get_profile(L); 604 | if(context) { 605 | luaL_error(L, "profile context already initialized!"); 606 | } 607 | 608 | context = profile_create(); 609 | 610 | // init registry 611 | lua_pushlightuserdata(L, context); 612 | lua_rawsetp(L, LUA_REGISTRYINDEX, (void *)&KEY); 613 | return 0; 614 | } 615 | 616 | static int 617 | _ldestory(lua_State* L) { 618 | struct profile_context* context = _get_profile(L); 619 | if(context) { 620 | profile_free(context); 621 | 622 | // reset registry 623 | lua_pushlightuserdata(L, (void *)&KEY); 624 | lua_pushnil(L); 625 | lua_settable(L, LUA_REGISTRYINDEX); 626 | } 627 | return 0; 628 | } 629 | 630 | 631 | int 632 | luaopen_profile_c(lua_State* L) { 633 | luaL_checkversion(L); 634 | luaL_Reg l[] = { 635 | {"start", _lstart}, 636 | {"stop", _lstop}, 637 | {"mark", _lmark}, 638 | {"unmark", _lunmark}, 639 | {"init", _linit}, 640 | {"destory", _ldestory}, 641 | {"dump", _ldump}, 642 | {NULL, NULL}, 643 | }; 644 | luaL_newlib(L, l); 645 | return 1; 646 | } 647 | 648 | 649 | --------------------------------------------------------------------------------