├── .gitignore ├── Makefile ├── README.md ├── unit_perf.c └── unit_perf.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | obj-m := unit_perf.o 2 | 3 | KDIR := /lib/modules/`uname -r`/build 4 | 5 | modules: 6 | make -C $(KDIR) M=$(PWD) modules 7 | 8 | clean: 9 | @rm -rf *.ko *.o *.mod.o *.mod.c .symvers .tmp_versions modules.order Module.symvers 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # unit_perf 2 | 3 | ## Introduction 4 | It is a performance measurement tool as the perf supplement. 5 | 6 | As we know, the perf is one good tool to find the performance bottleneck. But sometimes it is not good enough to profile our own codes especially the codes is only one part of whole system. In this case, we may don't care about the cost of the whole system. As a result, the unwanted results from other parts of system may occupies the most of top list, even we could not get the result we care really. 7 | 8 | Now you cold use this unit_perf to get the performance of the codes you specify the aid codes clearly. Maybe they are hooks, functions, even some lines of codes. 9 | 10 | And it provides some other help macros to debug issues. For example, you could use UP_PID_INFO_LOG to pring the log when current pid equals the specific pid by /proc/unit_perf/monitor_pid 11 | 12 | And you could get the result or reset it by proc 13 | 14 | ## Usage 15 | ###When you want to find the bottleneck in you codes, you could use the monitor point to check it. 16 | 1. Use up_add_monitor to add the monitor point name; 17 | ATTENSION: It should be invoked in process/thread context. Because it will allocate memory with GFP_KERNEL 18 | 2. Invoke the up_start_monitor when reach the monitor point; 19 | 3. Invoke the up_end_monitor after the monitor point; 20 | ATTENTION: The monitor name is the index of unit perf, so you should keep it consistent. 21 | 4. Check the result: 22 | cat /proc/unit_perf/top_list; 23 | 5. Reset the result if necessary 24 | cat /proc/unit_perf/reset_result 25 | 6. Use up_remove_monitor to remove the monitor point name. 26 | 7. Use /proc/unit_perf/monitor_pid to set the monitor pid which could filter the logs. 27 | 28 | 29 | NOTE: You could use UP_AUTO_START_FUNC_MONITOR and UP_AUTO_END_FUNC_MONITOR to avoid fill the function name by yourself. 30 | 31 | ###When you want to check the performance of one function, you could use up_func_once to get it. 32 | 1. Create one function whose signature is like up_test_func (Defined in unit_perf.h); 33 | 2. Invoke the up_func_once(_preempt/bh/irq) according to your requirement. 34 | 3. Check the result by dmesg 35 | 36 | ### Example 37 | Assume you want to check the performance of __nf_conntrack_alloc. 38 | 1. Invoke the up_add_monitor("__nf_conntrack_alloc") in nf_conntrack_init_net; 39 | 2. Invoke the up_start_monitor("__nf_conntrack_alloc") at the entry of __nf_conntrack_alloc; 40 | 3. Invoke the up_end_monitor("__nf_conntrack_alloc") at the exit of __nf_conntrack_alloc; 41 | 4. Invoke the up_remove_monitor("__nf_conntrack_alloc") in nf_conntrack_cleanup_net; 42 | 43 | Note: Actuall you use any name as the monitor name when check the performance of __nf_conntrack_alloc. 44 | 45 | ## How to integrate it into kernel 46 | ### As a dynamic module 47 | It is very easy. Just make it and insmod it. 48 | But it means the kernel core codes could not use the unit_perf unless you modify the kernel codes. 49 | In this case, you only use the unit_perf with another dynamic module 50 | 51 | ### As the kernel core 52 | 1. Because current unit_perf uses the x86 insturction "rdtscll", so I put the unit_perf.c into the arch/x86/unit_perf/, and put the unit_perf.h into the include/linux/ 53 | 2. Modify the arch/x86/Kbuild 54 | 55 | +obj-$(CONFIG_UNIT_PERF) += unit_perf/ 56 | 3. Modify the arch/x86/Kconfig.debug 57 | 58 | +config UNIT_PERF 59 | + bool "Unit performance profile" 60 | + default n 61 | + ---help--- 62 | + Enable the unit perf funciton. You could specify the codes which you want to monitor. 63 | 4. Add one Makefile in arch/x86/unit_perf/ 64 | 65 | + 66 | +obj-$(CONFIG_UNIT_PERF) := unit_perf.o 67 | + 68 | Now you could use the unit_perf everywhere. 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /unit_perf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | MODULE_LICENSE("GPL"); 13 | MODULE_AUTHOR("Feng Gao "); 14 | MODULE_DESCRIPTION("unit_perf: Used to profile the specific codes"); 15 | MODULE_ALIAS("Unit Perf"); 16 | 17 | /********************************* Platform Selection ********************************************/ 18 | #define UNIT_PERF_X86 19 | 20 | /**********************************************************************************************/ 21 | //#define TEST_UNIT_PERF 22 | 23 | #define UNIT_PERF_DIR_NAME "unit_perf" 24 | struct proc_dir_entry *unit_perf_dir = NULL; 25 | #define UNIT_PERF_TOP_LIST "top_list" 26 | struct proc_dir_entry *unit_perf_top_proc = NULL; 27 | #define UNIT_PERF_RESET_RESULT "reset_result" 28 | struct proc_dir_entry *unit_perf_reset_proc = NULL; 29 | #define UNIT_PERF_MONITOR_PID "monitor_pid" 30 | struct proc_dir_entry *unit_perf_mpid_proc = NULL; 31 | unsigned long g_up_monitor_pid __read_mostly = 0; 32 | 33 | struct cpu_cost_stats { 34 | unsigned long long start; 35 | unsigned long long cost; 36 | unsigned long long overflow; 37 | unsigned long long call_times; 38 | }; 39 | 40 | #define UNIT_PERF_MONITOR_NAME_SIZE (32) 41 | struct monitor_stats { 42 | char name[UNIT_PERF_MONITOR_NAME_SIZE]; 43 | struct cpu_cost_stats __percpu *cost_stats; 44 | }; 45 | 46 | struct monitor_result { 47 | char name[UNIT_PERF_MONITOR_NAME_SIZE]; 48 | unsigned long long overflow; 49 | unsigned long long cost; 50 | unsigned long long call_times; 51 | unsigned long long average; 52 | }; 53 | 54 | #define UNIT_PERF_SLOT_CNT (128) 55 | 56 | struct unit_perf_monitor { 57 | struct monitor_stats monitors[UNIT_PERF_SLOT_CNT]; 58 | u32 monitor_cnt; 59 | spinlock_t lock; 60 | }; 61 | 62 | struct unit_perf_monitor *g_up_monitor; 63 | typedef void (*up_test_func) (void *); 64 | 65 | /**********************************************************************************************/ 66 | static int insert_monitor(struct unit_perf_monitor *monitor, const char *name); 67 | static void remove_monitor(struct unit_perf_monitor *monitor, int md); 68 | 69 | static struct unit_perf_monitor * unit_perf_monitor_alloc(void); 70 | static void unit_perf_monitor_free(struct unit_perf_monitor *monitor); 71 | 72 | 73 | #ifdef UNIT_PERF_X86 74 | #define UP_GET_CPU_CYCLES(x) rdtscll((x)) 75 | #endif 76 | 77 | /**********************************************************************************************/ 78 | int up_add_monitor(const char *name) 79 | { 80 | struct unit_perf_monitor *monitor; 81 | int ret = -1; 82 | 83 | rcu_read_lock(); 84 | monitor = rcu_dereference(g_up_monitor); 85 | if (monitor) { 86 | ret = insert_monitor(monitor, name); 87 | } else { 88 | pr_err("UnitPerf: g_up_monitor is null\n"); 89 | } 90 | 91 | rcu_read_unlock(); 92 | 93 | return ret; 94 | } 95 | EXPORT_SYMBOL(up_add_monitor); 96 | 97 | void up_remove_monitor(int md) 98 | { 99 | struct unit_perf_monitor *monitor; 100 | 101 | rcu_read_lock(); 102 | monitor = rcu_dereference(g_up_monitor); 103 | if (monitor) { 104 | remove_monitor(monitor, md); 105 | } 106 | rcu_read_unlock(); 107 | } 108 | EXPORT_SYMBOL(up_remove_monitor); 109 | 110 | void up_start_monitor(int md) 111 | { 112 | struct unit_perf_monitor *monitor; 113 | 114 | if (unlikely(md >= UNIT_PERF_SLOT_CNT || md < 0)) { 115 | return; 116 | } 117 | 118 | rcu_read_lock(); 119 | monitor = rcu_dereference(g_up_monitor); 120 | if (likely(monitor)) { 121 | struct cpu_cost_stats *cost_stats = per_cpu_ptr(monitor->monitors[md].cost_stats, 122 | smp_processor_id()); 123 | if (likely(cost_stats)) { 124 | UP_GET_CPU_CYCLES(cost_stats->start); 125 | } else { 126 | if (net_ratelimit()) { 127 | pr_err("UnitPerf: CPU(%d) md(%d) cost_stats is NULL\n", 128 | smp_processor_id(), md); 129 | } 130 | } 131 | } 132 | rcu_read_unlock(); 133 | } 134 | EXPORT_SYMBOL(up_start_monitor); 135 | 136 | void up_end_monitor(int md) 137 | { 138 | struct unit_perf_monitor *monitor; 139 | unsigned long long end_time; 140 | 141 | if (unlikely(md >= UNIT_PERF_SLOT_CNT || md < 0)) { 142 | return; 143 | } 144 | 145 | UP_GET_CPU_CYCLES(end_time); 146 | 147 | rcu_read_lock(); 148 | monitor = rcu_dereference(g_up_monitor); 149 | if (likely(monitor)) { 150 | struct cpu_cost_stats *cost_stats = per_cpu_ptr(monitor->monitors[md].cost_stats, 151 | smp_processor_id()); 152 | 153 | if (likely(cost_stats)) { 154 | if (likely(cost_stats->start)) { 155 | unsigned long long old_cost = cost_stats->cost; 156 | unsigned long long cost = end_time-cost_stats->start; 157 | 158 | cost_stats->cost += cost; 159 | cost_stats->start = 0; 160 | cost_stats->call_times++; 161 | 162 | if (cost_stats->cost < old_cost) { 163 | //Overflow happens 164 | cost_stats->overflow++; 165 | } 166 | } else { 167 | if (net_ratelimit()) { 168 | pr_err("UnitPerf: CPU(%d) md(%d) cost_stats->start is 0\n", 169 | smp_processor_id(), md); 170 | } 171 | } 172 | } else { 173 | if (net_ratelimit()) { 174 | pr_err("UnitPerf: CPU(%d) md(%d) cost_stats is NULL\n", 175 | smp_processor_id(), md); 176 | } 177 | } 178 | } 179 | rcu_read_unlock(); 180 | } 181 | EXPORT_SYMBOL(up_end_monitor); 182 | 183 | void up_func_once(const char *name, up_test_func cb, void *data) 184 | { 185 | unsigned long long start, end; 186 | 187 | UP_GET_CPU_CYCLES(start); 188 | cb(data); 189 | UP_GET_CPU_CYCLES(end); 190 | 191 | printk(KERN_INFO "%s costs %llu cycles\n", name, end-start); 192 | } 193 | EXPORT_SYMBOL(up_func_once); 194 | 195 | void up_func_once_preempt(const char *name, up_test_func cb, void *data) 196 | { 197 | preempt_disable(); 198 | up_func_once(name, cb, data); 199 | preempt_enable(); 200 | } 201 | EXPORT_SYMBOL(up_func_once_preempt); 202 | 203 | void up_func_once_bh(const char *name, up_test_func cb, void *data) 204 | { 205 | local_bh_disable(); 206 | up_func_once(name, cb, data); 207 | local_bh_enable(); 208 | } 209 | EXPORT_SYMBOL(up_func_once_bh); 210 | 211 | void up_func_once_irq(const char *name, up_test_func cb, void *data) 212 | { 213 | local_irq_disable(); 214 | up_func_once(name, cb, data); 215 | local_irq_enable(); 216 | } 217 | EXPORT_SYMBOL(up_func_once_irq); 218 | 219 | static int insert_monitor(struct unit_perf_monitor *monitor, const char *name) 220 | { 221 | struct cpu_cost_stats *stats; 222 | u32 cpu, i; 223 | 224 | if (monitor->monitor_cnt >= UNIT_PERF_SLOT_CNT) { 225 | pr_err("UnitPerf: Slots are full\n"); 226 | return -1; 227 | } 228 | 229 | spin_lock(&monitor->lock); 230 | 231 | for (i = 0; i < UNIT_PERF_SLOT_CNT; ++i) { 232 | if ('\0' == monitor->monitors[i].name[0]) { 233 | break; 234 | } 235 | } 236 | if (i == UNIT_PERF_SLOT_CNT) { 237 | //it is full 238 | pr_err("UnitPerf: Slots are full\n"); 239 | spin_unlock(&monitor->lock); 240 | return -1; 241 | } 242 | 243 | memset(monitor->monitors[i].name, 0, sizeof(monitor->monitors[i].name)); 244 | for_each_online_cpu(cpu) { 245 | stats = per_cpu_ptr(monitor->monitors[i].cost_stats, cpu); 246 | memset(stats, 0, sizeof(*stats)); 247 | } 248 | strncpy(monitor->monitors[i].name, name, sizeof(monitor->monitors[i].name)-1); 249 | monitor->monitor_cnt++; 250 | spin_unlock(&monitor->lock); 251 | 252 | return i; 253 | } 254 | 255 | /* 256 | Should protected by rcu lock 257 | */ 258 | static void remove_monitor(struct unit_perf_monitor *monitor, int md) 259 | { 260 | if (md < 0 || md >= UNIT_PERF_SLOT_CNT) { 261 | pr_err("UnitPerf: Invalid md\n"); 262 | return; 263 | } 264 | 265 | spin_lock(&monitor->lock); 266 | if (monitor->monitors[md].name[0] != '\0') { 267 | monitor->monitors[md].name[0] = '\0'; 268 | monitor->monitor_cnt--; 269 | } else { 270 | pr_err("UnitPerf: The monitor is removed already\n"); 271 | } 272 | spin_unlock(&monitor->lock); 273 | } 274 | 275 | static void unit_perf_monitor_free(struct unit_perf_monitor *monitor) 276 | { 277 | if (monitor) { 278 | u32 i; 279 | 280 | for (i = 0; i < UNIT_PERF_SLOT_CNT; ++i) { 281 | free_percpu(monitor->monitors[i].cost_stats); 282 | } 283 | 284 | kfree(monitor); 285 | } 286 | } 287 | 288 | static struct unit_perf_monitor * unit_perf_monitor_alloc(void) 289 | { 290 | struct unit_perf_monitor *monitor; 291 | u32 i; 292 | 293 | monitor = kmalloc(sizeof(*monitor), GFP_KERNEL); 294 | if (!monitor) { 295 | return NULL; 296 | } 297 | memset(monitor, 0, sizeof(*monitor)); 298 | 299 | spin_lock_init(&monitor->lock); 300 | 301 | for (i = 0; i < UNIT_PERF_SLOT_CNT; ++i) { 302 | monitor->monitors[i].cost_stats = alloc_percpu(struct cpu_cost_stats); 303 | if (!monitor->monitors[i].cost_stats) { 304 | goto error; 305 | } 306 | } 307 | 308 | return monitor; 309 | 310 | error: 311 | unit_perf_monitor_free(monitor); 312 | return NULL; 313 | } 314 | 315 | static void *up_generic_seq_start(struct seq_file *s, loff_t *pos) 316 | { 317 | return 0 == *pos ? pos : NULL; 318 | } 319 | 320 | static void *up_generic_seq_next(struct seq_file *s, void *v, loff_t *pos) 321 | { 322 | return NULL; 323 | } 324 | 325 | static void up_generic_seq_stop(struct seq_file *s, void *v) 326 | { 327 | } 328 | 329 | static void get_total_cpu_stats(struct monitor_result *result, struct monitor_stats *stats) 330 | { 331 | struct cpu_cost_stats *cost_stats; 332 | u32 cpu; 333 | 334 | strcpy(result->name, stats->name); 335 | 336 | for_each_online_cpu(cpu) { 337 | unsigned long long old_cost = result->cost; 338 | 339 | cost_stats = per_cpu_ptr(stats->cost_stats, cpu); 340 | 341 | result->call_times += cost_stats->call_times; 342 | result->overflow += cost_stats->overflow; 343 | result->cost += cost_stats->cost; 344 | if (result->cost < old_cost) { 345 | result->overflow++; 346 | } 347 | 348 | if (result->call_times) { 349 | result->average = cost_stats->cost; 350 | do_div(result->average, result->call_times); 351 | } else { 352 | result->average = 0; 353 | } 354 | } 355 | } 356 | 357 | static int monitor_result_reverse_cmp(const void *a, const void *b) 358 | { 359 | const struct monitor_result *s1 = a; 360 | const struct monitor_result *s2 = b; 361 | 362 | if (s1->name[0] == '\0') { 363 | return 1; 364 | } else if (s2->name [0] == '\0') { 365 | return -1; 366 | } 367 | 368 | /* Compare the overflow firstly */ 369 | if (s1->overflow < s2->overflow) { 370 | return 1; 371 | } else if (s1->overflow > s2->overflow) { 372 | return -1; 373 | } else { 374 | if (s1->cost < s2->cost) { 375 | return 1; 376 | } else if (s1->cost > s2->cost) { 377 | return -1; 378 | } else { 379 | return 0; 380 | } 381 | } 382 | } 383 | 384 | static void monitor_result_swap(void *a, void *b, int size) 385 | { 386 | struct monitor_result *s1 = a; 387 | struct monitor_result *s2 = b; 388 | struct monitor_result tmp = *s1; 389 | 390 | *s1 = *s2; 391 | *s2 = tmp; 392 | } 393 | 394 | static int up_top_seq_show(struct seq_file *s, void *v) 395 | { 396 | struct unit_perf_monitor *monitor; 397 | 398 | rcu_read_lock(); 399 | monitor = rcu_dereference(g_up_monitor); 400 | if (monitor && monitor->monitor_cnt) { 401 | u32 result_cnt = monitor->monitor_cnt; 402 | struct monitor_result *result = kmalloc(sizeof(*result)*result_cnt, GFP_KERNEL); 403 | 404 | if (result) { 405 | u32 i; 406 | u32 copy = 0; 407 | 408 | memset(result, 0, sizeof(*result)*result_cnt); 409 | 410 | for (i = 0; i < UNIT_PERF_SLOT_CNT; ++i) { 411 | struct monitor_stats *pos; 412 | pos = monitor->monitors+i; 413 | 414 | if (pos->name[0] != '\0') { 415 | get_total_cpu_stats(result+copy, pos); 416 | ++copy; 417 | } 418 | 419 | if (copy >= result_cnt) { 420 | break; 421 | } 422 | } 423 | 424 | sort(result, result_cnt, sizeof(*result), 425 | monitor_result_reverse_cmp, 426 | monitor_result_swap); 427 | 428 | seq_printf(s, "%-32s %-10s %-8s %-22s %-22s\n", 429 | "monitor", "call_times", "overflow", "total_costs", "average_cost"); 430 | 431 | for (i = 0; i < result_cnt; ++i) { 432 | seq_printf(s, "%-32s %-10llu %-8llu %-22llu %-22llu\n", 433 | result[i].name, result[i].call_times, 434 | result[i].overflow, result[i].cost, result[i].average); 435 | } 436 | kfree(result); 437 | } else { 438 | printk(KERN_ERR "Fail to allocate result memory\n"); 439 | } 440 | } else { 441 | seq_printf(s, "No monitor point\n"); 442 | } 443 | 444 | rcu_read_unlock(); 445 | 446 | return 0; 447 | } 448 | 449 | static int up_reset_seq_show(struct seq_file *s, void *v) 450 | { 451 | struct unit_perf_monitor *monitor; 452 | 453 | seq_printf(s, "Reset the stats of monitor stats\n"); 454 | 455 | monitor = unit_perf_monitor_alloc(); 456 | if (monitor) { 457 | struct unit_perf_monitor *old_monitor; 458 | 459 | rcu_read_lock(); 460 | old_monitor = rcu_dereference(g_up_monitor); 461 | if (old_monitor) { 462 | u32 i; 463 | 464 | *monitor = *old_monitor; 465 | for (i = 0; i < UNIT_PERF_SLOT_CNT; ++i) { 466 | struct cpu_cost_stats *stats; 467 | u32 cpu; 468 | 469 | for_each_online_cpu(cpu) { 470 | stats = per_cpu_ptr(monitor->monitors[i].cost_stats, cpu); 471 | memset(stats, 0, sizeof(*stats)); 472 | } 473 | } 474 | } 475 | rcu_read_unlock(); 476 | 477 | //Replace the old one 478 | rcu_assign_pointer(g_up_monitor, monitor); 479 | synchronize_rcu(); 480 | 481 | unit_perf_monitor_free(old_monitor); 482 | } else { 483 | seq_printf(s, "Fail to allocate monitor"); 484 | } 485 | 486 | return 0; 487 | } 488 | 489 | 490 | static const struct seq_operations up_top_seq_ops = { 491 | .start = up_generic_seq_start, 492 | .next = up_generic_seq_next, 493 | .stop = up_generic_seq_stop, 494 | .show = up_top_seq_show 495 | }; 496 | 497 | static const struct seq_operations up_reset_seq_ops = { 498 | .start = up_generic_seq_start, 499 | .next = up_generic_seq_next, 500 | .stop = up_generic_seq_stop, 501 | .show = up_reset_seq_show 502 | }; 503 | 504 | static int up_top_proc_open(struct inode *inode, struct file *file) 505 | { 506 | return seq_open(file, &up_top_seq_ops); 507 | } 508 | 509 | static int up_reset_proc_open(struct inode *inode, struct file *file) 510 | { 511 | return seq_open(file, &up_reset_seq_ops); 512 | } 513 | 514 | static const struct file_operations up_top_proc_fops = { 515 | .owner = THIS_MODULE, 516 | .open = up_top_proc_open, 517 | .read = seq_read, 518 | .llseek = seq_lseek, 519 | .release = seq_release 520 | }; 521 | 522 | static const struct file_operations up_reset_proc_fops = { 523 | .owner = THIS_MODULE, 524 | .open = up_reset_proc_open, 525 | .read = seq_read, 526 | .llseek = seq_lseek, 527 | .release = seq_release 528 | }; 529 | 530 | static int up_show_monitor_pid(char *page, char **start, off_t offset, 531 | int count, int *eof, void *data) 532 | { 533 | return snprintf(page, count, "%lu\n", g_up_monitor_pid); 534 | } 535 | 536 | static int up_store_monitor_pid(struct file *file, const char *buffer, 537 | unsigned long count, void *data) 538 | { 539 | char buf[32] = {0}; 540 | unsigned long copy_bytes = sizeof(buf)-1; 541 | char *p = (char *)buf; 542 | 543 | if (copy_bytes > count) { 544 | copy_bytes = count; 545 | } 546 | 547 | if (copy_from_user(buf, buffer, copy_bytes)) { 548 | return count; 549 | } 550 | 551 | g_up_monitor_pid = simple_strtoul(p, &p, 10); 552 | 553 | printk(KERN_INFO "Unit Perf: The monitor pid is updated to %lu\n", g_up_monitor_pid); 554 | 555 | return copy_bytes; 556 | } 557 | 558 | #ifdef TEST_UNIT_PERF 559 | static int g_test1, g_test2, g_test_monitor; 560 | static void test_monitor(void) 561 | { 562 | g_test1 = up_add_monitor("test1"); 563 | g_test2 = up_add_monitor("test2"); 564 | g_test_monitor = up_add_monitor("test_monitor"); 565 | up_start_monitor(g_test_monitor); 566 | //UP_AUTO_START_FUNC_MONITOR(); 567 | up_start_monitor(g_test1); 568 | up_start_monitor(g_test2); 569 | up_end_monitor(g_test1); 570 | up_end_monitor(g_test2); 571 | up_start_monitor(g_test1); 572 | up_start_monitor(g_test2); 573 | up_end_monitor(g_test1); 574 | up_end_monitor(g_test2); 575 | up_start_monitor(g_test1); 576 | up_start_monitor(g_test2); 577 | up_end_monitor(g_test1); 578 | up_end_monitor(g_test2); 579 | up_start_monitor(g_test1); 580 | up_start_monitor(g_test2); 581 | up_end_monitor(g_test1); 582 | up_end_monitor(g_test2); 583 | //UP_AUTO_END_FUNC_MONITOR(); 584 | } 585 | 586 | static void remove_test_monitor(void) 587 | { 588 | up_end_monitor(g_test_monitor); 589 | up_remove_monitor(g_test1); 590 | up_remove_monitor(g_test2); 591 | up_remove_monitor(g_test_monitor); 592 | } 593 | #endif 594 | 595 | 596 | static int __init unit_perf_init(void) 597 | { 598 | int ret = -ENOENT; 599 | 600 | printk(KERN_INFO "Unit Perf init\n"); 601 | 602 | unit_perf_dir = proc_mkdir(UNIT_PERF_DIR_NAME, NULL); 603 | if (!unit_perf_dir) { 604 | printk(KERN_ERR "Fail to create unit_perf proc dir\n"); 605 | goto err1; 606 | } 607 | 608 | unit_perf_top_proc = proc_create_data(UNIT_PERF_TOP_LIST, 0400, unit_perf_dir, 609 | &up_top_proc_fops, NULL); 610 | if (!unit_perf_top_proc) { 611 | printk(KERN_ERR "Fail to craete the unit_perf top file\n"); 612 | goto err2; 613 | } 614 | unit_perf_reset_proc = proc_create_data(UNIT_PERF_RESET_RESULT, 0400, unit_perf_dir, 615 | &up_reset_proc_fops, NULL); 616 | if (!unit_perf_reset_proc) { 617 | printk(KERN_ERR "Fail to create the unit_perf reset file\n"); 618 | goto err3; 619 | } 620 | unit_perf_mpid_proc = create_proc_entry(UNIT_PERF_MONITOR_PID, S_IFREG | S_IRUGO | S_IWUSR, unit_perf_dir); 621 | if (!unit_perf_mpid_proc) { 622 | printk(KERN_ERR "Fail to create the unit_perf monitor_pid file\n"); 623 | goto err4; 624 | } 625 | unit_perf_mpid_proc->read_proc = up_show_monitor_pid; 626 | unit_perf_mpid_proc->write_proc = up_store_monitor_pid; 627 | unit_perf_mpid_proc->data = NULL; 628 | 629 | g_up_monitor = unit_perf_monitor_alloc(); 630 | if (!g_up_monitor) { 631 | ret = -ENOMEM; 632 | printk(KERN_ERR "Fail to init unit_perf monitor\n"); 633 | goto err5; 634 | } 635 | 636 | printk(KERN_INFO "Unit Perf is ready now\n"); 637 | 638 | #ifdef TEST_UNIT_PERF 639 | test_monitor(); 640 | #endif 641 | return 0; 642 | 643 | err5: 644 | remove_proc_entry(UNIT_PERF_MONITOR_PID, unit_perf_dir); 645 | err4: 646 | remove_proc_entry(UNIT_PERF_RESET_RESULT, unit_perf_dir); 647 | err3: 648 | remove_proc_entry(UNIT_PERF_TOP_LIST, unit_perf_dir); 649 | err2: 650 | remove_proc_entry(UNIT_PERF_DIR_NAME, NULL); 651 | err1: 652 | return ret; 653 | } 654 | 655 | static void __exit unit_perf_exit(void) 656 | { 657 | struct unit_perf_monitor *monitor = g_up_monitor; 658 | 659 | #ifdef TEST_UNIT_PERF 660 | remove_test_monitor(); 661 | #endif 662 | 663 | rcu_assign_pointer(g_up_monitor, NULL); 664 | 665 | synchronize_rcu(); 666 | 667 | unit_perf_monitor_free(monitor); 668 | 669 | remove_proc_entry(UNIT_PERF_MONITOR_PID, unit_perf_dir); 670 | remove_proc_entry(UNIT_PERF_RESET_RESULT, unit_perf_dir); 671 | remove_proc_entry(UNIT_PERF_TOP_LIST, unit_perf_dir); 672 | remove_proc_entry(UNIT_PERF_DIR_NAME, NULL); 673 | printk(KERN_INFO "Unit Perf exit now\n"); 674 | } 675 | 676 | 677 | module_init(unit_perf_init); 678 | module_exit(unit_perf_exit); 679 | 680 | -------------------------------------------------------------------------------- /unit_perf.h: -------------------------------------------------------------------------------- 1 | #ifndef UNIT_PERF_H_ 2 | #define UNIT_PERF_H_ 3 | 4 | /* 5 | Usage: 6 | When you want to find the bottleneck in you codes, you could use the monitor point to check it. 7 | 1. Use up_add_monitor to add the monitor point name; 8 | ATTENSION: It should be invoked in process/thread context. Because it will allocate memory with GFP_KERNEL 9 | 2. Invoke the up_start_monitor when reach the monitor point; 10 | 3. Invoke the up_end_monitor when reach the monitor point; 11 | ATTENTION: The monitor name is the index of unit perf. 12 | 4. Check the result: 13 | cat /proc/unit_perf/top_list; 14 | 5. Reset the result if necessary 15 | cat /proc/unit_perf/reset_result 16 | 6. Use up_remove_monitor to remove the monitor point name. 17 | 18 | When you want to check the performance of fixed codes, you could use up_func_once to get it. 19 | 1. Create one function whose signature is like up_test_func; 20 | 2. Invoke the up_func_once(_preempt/bh/irq) according to your requirement. 21 | 3. Check the result by dmesg 22 | 23 | */ 24 | 25 | typedef void (*up_test_func) (void *); 26 | 27 | #ifdef CONFIG_UNIT_PERF 28 | extern int up_add_monitor(const char *name); 29 | extern void up_remove_monitor(int md); 30 | extern void up_start_monitor(int md); 31 | extern void up_end_monitor(int md); 32 | 33 | /* Invoke the func directly */ 34 | extern void up_func_once(const char *name, up_test_func cb, void *data); 35 | /* Disable the preempt and enable again */ 36 | extern void up_func_once_preempt(const char *name, up_test_func cb, void *data); 37 | /* Disable the softirq and enable again */ 38 | extern void up_func_once_bh(const char *name, up_test_func cb, void *data); 39 | /* Disable the interrutp and enable again */ 40 | extern void up_func_once_irq(const char *name, up_test_func cb, void *data); 41 | #else 42 | 43 | static inline int up_add_monitor(const char *name) 44 | { 45 | return 0; 46 | } 47 | 48 | static inline void up_remove_monitor(int md) 49 | { 50 | } 51 | 52 | static inline void up_start_monitor(int md) 53 | { 54 | 55 | } 56 | 57 | static inline void up_end_monitor(int md) 58 | { 59 | } 60 | 61 | static inline void up_func_once(const char *name, up_test_func cb, void *data) 62 | { 63 | } 64 | static inline void up_func_once_preempt(const char *name, up_test_func cb, void *data) 65 | { 66 | } 67 | static inline void up_func_once_bh(const char *name, up_test_func cb, void *data) 68 | { 69 | } 70 | static void up_func_once_irq(const char *name, up_test_func cb, void *data) 71 | { 72 | } 73 | 74 | #endif 75 | 76 | #define UP_AUTO_START_FUNC_MONITOR() up_start_monitor(__FUNCTION__) 77 | #define UP_AUTO_END_FUNC_MONITOR() up_end_monitor(__FUNCTION__) 78 | 79 | 80 | 81 | extern unsigned long g_up_monitor_pid __read_mostly; 82 | 83 | #define UP_PID_INFO_LOG(...) \ 84 | if (g_up_monitor_pid == current->pid) { \ 85 | printk(KERN_INFO "[UnitPerf]:"__VA_ARGS__); \ 86 | } 87 | 88 | #endif 89 | 90 | 91 | --------------------------------------------------------------------------------