├── README.md ├── main.c ├── Makefile ├── backtrace.h ├── backtrace.c └── uallsyms.c /README.md: -------------------------------------------------------------------------------- 1 | # backtrace 2 | A user-space simulation dump_stack(), based on mips. 3 | 4 | It supplies two APIs: 5 | show_backtrace(): show backtrace of function caller tree. 6 | addr_to_name(): given an addr, get the function name it belongs to. 7 | 8 | Any problems, please contact casper10_zhen@hotmail.com 9 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "backtrace.h" 3 | 4 | int func2(int a, int b); 5 | int func1(int a, int b); 6 | int func0(int a, int b); 7 | 8 | int func2(int a, int b) 9 | { 10 | int c = a * b; 11 | printf("%s: c = %d\n", __FUNCTION__, c); 12 | show_backtrace(); 13 | return c; 14 | } 15 | 16 | int func1(int a, int b) 17 | { 18 | int c = func2(a, b); 19 | printf("%s: c = %d\n", __FUNCTION__, c); 20 | return c; 21 | } 22 | 23 | int func0(int a, int b) 24 | { 25 | int c = func1(a, b); 26 | printf("%s: c = %d\n", __FUNCTION__, c); 27 | return c; 28 | } 29 | 30 | int main() 31 | { 32 | int a = 4, b = 5; 33 | int (*funcptr)(int, int) = func0; 34 | 35 | int c = func0(a, b); 36 | printf("%s: c = %d\n", __FUNCTION__, c); 37 | 38 | printf("funcptr's name = %s\n", addr_to_name((unsigned long)funcptr)); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export TOOLCHAIN=/workshop/backtrace/build_dir/staging_dir/toolchain-mips_r2_gcc-4.6-linaro_uClibc-0.9.33.2/bin 2 | export CC=$(TOOLCHAIN)/mips-openwrt-linux-uclibc-gcc 3 | export LD=$(TOOLCHAIN)/mips-openwrt-linux-uclibc-ld 4 | export OBJDUMP=$(TOOLCHAIN)/mips-openwrt-linux-uclibc-objdump 5 | 6 | CFLAGS =-I/workshop/backtrace/build_dir/staging_dir/target-mips_r2_uClibc-0.9.33.2/usr/include -Os -Wall -Werror --std=gnu99 -Wmissing-declarations 7 | LDFLAGS=-L/workshop/backtrace/build_dir/staging_dir/target-mips_r2_uClibc-0.9.33.2/usr/lib 8 | export STAGING_DIR=/workshop/backtrace/build_dir/staging_dir 9 | 10 | OBJS = backtrace.o main.o 11 | 12 | testbt.o: $(OBJS) 13 | $(LD) -r -o $@ $(OBJS) 14 | 15 | %.o:%.c 16 | $(CC) $(CFLAGS) -c -o $@ $< 17 | 18 | uallsyms: uallsyms.c 19 | gcc uallsyms.c -o uallsyms 20 | 21 | all: testbt.o uallsyms 22 | @echo Compile $@ 23 | nm -n testbt.o | ./uallsyms > tmp_uallsyms1.S 24 | $(CC) -c tmp_uallsyms1.S -o uallsyms1.o 25 | $(CC) $(LDFLAGS) $(OBJS) uallsyms1.o -o testbt1.o 26 | nm -n testbt1.o | ./uallsyms > tmp_uallsyms2.S 27 | $(CC) -c tmp_uallsyms2.S -o uallsyms2.o 28 | $(LD) -n -r $(OBJS) uallsyms2.o -o testbt2.o 29 | $(CC) $(LDFLAGS) testbt2.o -o testbt 30 | ls -l testbt 31 | 32 | clean: 33 | rm -f *.S *.o testbt uallsyms 34 | 35 | -------------------------------------------------------------------------------- /backtrace.h: -------------------------------------------------------------------------------- 1 | #ifndef __BACKTRACE_H 2 | #define __BACKTRACE_H 3 | #include //for struct pt_regs 4 | 5 | /* 6 | * Major opcodes; before MIPS IV cop1x was called cop3. 7 | */ 8 | enum major_op { 9 | spec_op, bcond_op, j_op, jal_op, 10 | beq_op, bne_op, blez_op, bgtz_op, 11 | addi_op, addiu_op, slti_op, sltiu_op, 12 | andi_op, ori_op, xori_op, lui_op, 13 | cop0_op, cop1_op, cop2_op, cop1x_op, 14 | beql_op, bnel_op, blezl_op, bgtzl_op, 15 | daddi_op, daddiu_op, ldl_op, ldr_op, 16 | spec2_op, jalx_op, mdmx_op, spec3_op, 17 | lb_op, lh_op, lwl_op, lw_op, 18 | lbu_op, lhu_op, lwr_op, lwu_op, 19 | sb_op, sh_op, swl_op, sw_op, 20 | sdl_op, sdr_op, swr_op, cache_op, 21 | ll_op, lwc1_op, lwc2_op, pref_op, 22 | lld_op, ldc1_op, ldc2_op, ld_op, 23 | sc_op, swc1_op, swc2_op, major_3b_op, 24 | scd_op, sdc1_op, sdc2_op, sd_op 25 | }; 26 | 27 | /* 28 | * func field of spec opcode. 29 | */ 30 | enum spec_op { 31 | sll_op, movc_op, srl_op, sra_op, 32 | sllv_op, pmon_op, srlv_op, srav_op, 33 | jr_op, jalr_op, movz_op, movn_op, 34 | syscall_op, break_op, spim_op, sync_op, 35 | mfhi_op, mthi_op, mflo_op, mtlo_op, 36 | dsllv_op, spec2_unused_op, dsrlv_op, dsrav_op, 37 | mult_op, multu_op, div_op, divu_op, 38 | dmult_op, dmultu_op, ddiv_op, ddivu_op, 39 | add_op, addu_op, sub_op, subu_op, 40 | and_op, or_op, xor_op, nor_op, 41 | spec3_unused_op, spec4_unused_op, slt_op, sltu_op, 42 | dadd_op, daddu_op, dsub_op, dsubu_op, 43 | tge_op, tgeu_op, tlt_op, tltu_op, 44 | teq_op, spec5_unused_op, tne_op, spec6_unused_op, 45 | dsll_op, spec7_unused_op, dsrl_op, dsra_op, 46 | dsll32_op, spec8_unused_op, dsrl32_op, dsra32_op 47 | }; 48 | 49 | /* 50 | * bitfields depend from byteorder. 51 | */ 52 | #ifdef __MIPSEB__ 53 | struct j_format { /* Jump format */ 54 | unsigned int opcode : 6; 55 | unsigned int target : 26; 56 | }; 57 | 58 | struct i_format { /* Immediate format (addi, lw, ...) */ 59 | unsigned int opcode : 6; 60 | unsigned int rs : 5; 61 | unsigned int rt : 5; 62 | signed int simmediate : 16; 63 | }; 64 | 65 | struct u_format { /* Unsigned immediate format (ori, xori, ...) */ 66 | unsigned int opcode : 6; 67 | unsigned int rs : 5; 68 | unsigned int rt : 5; 69 | unsigned int uimmediate : 16; 70 | }; 71 | 72 | struct c_format { /* Cache (>= R6000) format */ 73 | unsigned int opcode : 6; 74 | unsigned int rs : 5; 75 | unsigned int c_op : 3; 76 | unsigned int cache : 2; 77 | unsigned int simmediate : 16; 78 | }; 79 | 80 | struct r_format { /* Register format */ 81 | unsigned int opcode : 6; 82 | unsigned int rs : 5; 83 | unsigned int rt : 5; 84 | unsigned int rd : 5; 85 | unsigned int re : 5; 86 | unsigned int func : 6; 87 | }; 88 | 89 | struct sp3_format { /* special-3 format */ 90 | unsigned int opcode : 6; 91 | unsigned int rs : 5; 92 | unsigned int rt : 5; 93 | unsigned int offset : 10; 94 | unsigned int sp3_opcode : 6; 95 | }; 96 | 97 | struct lx_format { /* LX format */ 98 | unsigned int opcode : 6; 99 | unsigned int base : 5; 100 | unsigned int index : 5; 101 | unsigned int rd: 5; 102 | unsigned int lx_opcode : 5; 103 | unsigned int sp3_opcode : 6; 104 | }; 105 | 106 | struct p_format { /* Performance counter format (R10000) */ 107 | unsigned int opcode : 6; 108 | unsigned int rs : 5; 109 | unsigned int rt : 5; 110 | unsigned int rd : 5; 111 | unsigned int re : 5; 112 | unsigned int func : 6; 113 | }; 114 | 115 | struct f_format { /* FPU register format */ 116 | unsigned int opcode : 6; 117 | unsigned int : 1; 118 | unsigned int fmt : 4; 119 | unsigned int rt : 5; 120 | unsigned int rd : 5; 121 | unsigned int re : 5; 122 | unsigned int func : 6; 123 | }; 124 | 125 | struct ma_format { /* FPU multipy and add format (MIPS IV) */ 126 | unsigned int opcode : 6; 127 | unsigned int fr : 5; 128 | unsigned int ft : 5; 129 | unsigned int fs : 5; 130 | unsigned int fd : 5; 131 | unsigned int func : 4; 132 | unsigned int fmt : 2; 133 | }; 134 | 135 | #elif defined(__MIPSEL__) 136 | 137 | struct j_format { /* Jump format */ 138 | unsigned int target : 26; 139 | unsigned int opcode : 6; 140 | }; 141 | 142 | struct i_format { /* Immediate format */ 143 | signed int simmediate : 16; 144 | unsigned int rt : 5; 145 | unsigned int rs : 5; 146 | unsigned int opcode : 6; 147 | }; 148 | 149 | struct u_format { /* Unsigned immediate format */ 150 | unsigned int uimmediate : 16; 151 | unsigned int rt : 5; 152 | unsigned int rs : 5; 153 | unsigned int opcode : 6; 154 | }; 155 | 156 | struct c_format { /* Cache (>= R6000) format */ 157 | unsigned int simmediate : 16; 158 | unsigned int cache : 2; 159 | unsigned int c_op : 3; 160 | unsigned int rs : 5; 161 | unsigned int opcode : 6; 162 | }; 163 | 164 | struct r_format { /* Register format */ 165 | unsigned int func : 6; 166 | unsigned int re : 5; 167 | unsigned int rd : 5; 168 | unsigned int rt : 5; 169 | unsigned int rs : 5; 170 | unsigned int opcode : 6; 171 | }; 172 | 173 | struct sp3_format { /* special-3 format */ 174 | unsigned int sp3_opcode : 6; 175 | unsigned int offset : 10; 176 | unsigned int rt : 5; 177 | unsigned int rs : 5; 178 | unsigned int opcode : 6; 179 | }; 180 | 181 | struct lx_format { /* LX format */ 182 | unsigned int sp3_opcode : 6; 183 | unsigned int lx_opcode : 5; 184 | unsigned int rd: 5; 185 | unsigned int index : 5; 186 | unsigned int base : 5; 187 | unsigned int opcode : 6; 188 | }; 189 | 190 | struct p_format { /* Performance counter format (R10000) */ 191 | unsigned int func : 6; 192 | unsigned int re : 5; 193 | unsigned int rd : 5; 194 | unsigned int rt : 5; 195 | unsigned int rs : 5; 196 | unsigned int opcode : 6; 197 | }; 198 | 199 | struct f_format { /* FPU register format */ 200 | unsigned int func : 6; 201 | unsigned int re : 5; 202 | unsigned int rd : 5; 203 | unsigned int rt : 5; 204 | unsigned int fmt : 4; 205 | unsigned int : 1; 206 | unsigned int opcode : 6; 207 | }; 208 | 209 | struct ma_format { /* FPU multipy and add format (MIPS IV) */ 210 | unsigned int fmt : 2; 211 | unsigned int func : 4; 212 | unsigned int fd : 5; 213 | unsigned int fs : 5; 214 | unsigned int ft : 5; 215 | unsigned int fr : 5; 216 | unsigned int opcode : 6; 217 | }; 218 | 219 | #else /* !defined (__MIPSEB__) && !defined (__MIPSEL__) */ 220 | #error "MIPS but neither __MIPSEL__ nor __MIPSEB__?" 221 | #endif 222 | 223 | union mips_instruction { 224 | unsigned int word; 225 | unsigned short halfword[2]; 226 | unsigned char byte[4]; 227 | struct j_format j_format; 228 | struct i_format i_format; 229 | struct u_format u_format; 230 | struct c_format c_format; 231 | struct r_format r_format; 232 | struct f_format f_format; 233 | struct ma_format ma_format; 234 | }; 235 | 236 | char * addr_to_name(unsigned long addr); 237 | void show_backtrace(); 238 | 239 | #endif 240 | 241 | -------------------------------------------------------------------------------- /backtrace.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include //for struct pt_regs 7 | #include 8 | #include "backtrace.h" 9 | 10 | typedef unsigned char u8; 11 | 12 | /* 13 | * These will be re-linked against their real values 14 | * during the second link stage. 15 | */ 16 | extern const unsigned long uallsyms_addresses[] __attribute__((weak)); 17 | extern const u8 uallsyms_names[] __attribute__((weak)); 18 | 19 | /* 20 | * Tell the compiler that the count isn't in the small data section if the arch 21 | * has one (eg: FRV). 22 | */ 23 | extern const unsigned long uallsyms_num_syms 24 | __attribute__((weak, section(".rodata"))); 25 | 26 | #ifdef DEBUG_BACKTRACE 27 | static int nested_depth = 10; 28 | #define _CORE_SIZE (2* 1024 * 1024) 29 | #else 30 | #endif 31 | 32 | struct mips_frame_info { 33 | void *func; 34 | unsigned long * ra; 35 | unsigned long func_size; 36 | int frame_size; 37 | int pc_offset; 38 | }; 39 | 40 | static inline int is_jal_jalr_jr_ins(union mips_instruction *ip) 41 | { 42 | if (ip->j_format.opcode == jal_op) 43 | return 1; 44 | if (ip->r_format.opcode != spec_op) 45 | return 0; 46 | return ip->r_format.func == jalr_op || ip->r_format.func == jr_op; 47 | } 48 | 49 | static inline int is_sp_move_ins(union mips_instruction *ip) 50 | { 51 | /* addiu/daddiu sp,sp,-imm */ 52 | if (ip->i_format.rs != 29 || ip->i_format.rt != 29) 53 | return 0; 54 | if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op) 55 | return 1; 56 | return 0; 57 | } 58 | 59 | static inline int is_ra_save_ins(union mips_instruction *ip) 60 | { 61 | /* sw / sd $ra, offset($sp) */ 62 | return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) && 63 | ip->i_format.rs == 29 && 64 | ip->i_format.rt == 31; 65 | } 66 | 67 | /* address validation: addr must belong to symbol list - uallsyms_addresses[] */ 68 | static inline int is_usym_addr(unsigned long addr) 69 | { 70 | if (!uallsyms_addresses || uallsyms_num_syms <= 0) 71 | return 0; 72 | if (addr >= uallsyms_addresses[0] && addr < uallsyms_addresses[uallsyms_num_syms - 1]) 73 | return 1; 74 | return 0; 75 | } 76 | 77 | static pid_t hold_ra() 78 | { 79 | /* do nothing. */ 80 | return getpid(); 81 | } 82 | 83 | /* get general registers value.*/ 84 | static __always_inline void prepare_frametrace(struct pt_regs *regs) 85 | { 86 | pid_t _i = 0; 87 | _i = hold_ra(); //ensure ra is pointed to the function itself. 88 | printf("%d\n", _i); 89 | 90 | __asm__ __volatile__( 91 | ".set noreorder\n\t" 92 | ".set push\n\t" 93 | ".set noat\n\t" 94 | 95 | "1: la $1, 1b\n\t" 96 | "sw $1, %0\n\t" 97 | "sw $29, %1\n\t" 98 | "sw $31, %2\n\t" 99 | 100 | ".set pop\n\t" 101 | ".set reorder\n\t" 102 | : "=m" (regs->cp0_epc), 103 | "=m" (regs->regs[29]), "=m" (regs->regs[31]) 104 | : : "memory"); 105 | } 106 | 107 | /* search for the position of target in ascending order base[]. 108 | return index in base[] if found target; 109 | return the index that 'target' should be inserted into base[] if not found target, if target should be inserted at the front, return -1. */ 110 | static int bsearch_index(const unsigned long base[], int len, unsigned long target) 111 | { 112 | int start = 0, end = len - 1; 113 | int middle; 114 | 115 | if (!base || base[0] > target) 116 | { 117 | return -1; 118 | } 119 | if (base[len - 1] < target) 120 | { 121 | return (len - 1); 122 | } 123 | 124 | /* binary search */ 125 | while (start <= end) 126 | { 127 | middle = (start + end) / 2; 128 | if (base[middle] == target) 129 | { 130 | return middle; 131 | } 132 | if (base[end] == target) 133 | { 134 | return end; 135 | } 136 | if ((start + 1 == end) || (start == end)) 137 | { 138 | return start; 139 | } 140 | 141 | /* always ensure start < target, and end > end */ 142 | if (base[middle] > target) 143 | { 144 | end = middle; 145 | } 146 | if (base[middle] < target) 147 | { 148 | start = middle; 149 | } 150 | } 151 | 152 | return -1; 153 | } 154 | 155 | /* get function name at index of symbol table. */ 156 | static char * get_func_name(int index) 157 | { 158 | int pos = 0, len; 159 | char * name = NULL; 160 | 161 | if (index >= 0 && index < uallsyms_num_syms) 162 | { 163 | /* find the position of function name */ 164 | for (pos = 0; index > 0; index--) 165 | { 166 | pos += uallsyms_names[pos] + 1; 167 | } 168 | 169 | /* len(1)-type(1)-name */ 170 | len = uallsyms_names[pos] - 1; //len is include type, so name length is len-1 171 | name = (char *)malloc(sizeof(char) * (len + 1)); 172 | if (!name) 173 | return NULL; 174 | 175 | memset(name, 0, len + 1); 176 | pos = pos + 2; //skip len and type(t, T etc.) 177 | for (index = 0; index < len; index++, pos++) 178 | { 179 | name[index] = uallsyms_names[pos]; 180 | } 181 | } 182 | 183 | return name; 184 | } 185 | 186 | /* turn addr to function name. */ 187 | char * addr_to_name(unsigned long addr) 188 | { 189 | int index = 0; 190 | 191 | if (!is_usym_addr(addr)) 192 | { 193 | return NULL; 194 | } 195 | 196 | index = bsearch_index(uallsyms_addresses, uallsyms_num_syms, addr); 197 | 198 | return get_func_name(index); 199 | } 200 | 201 | /* recursively look for sp and ra. sp for stack address space, ra for text section. */ 202 | static void print_caller(unsigned long sp, unsigned long ra) 203 | { 204 | int index = 0; 205 | union mips_instruction * ip; 206 | unsigned int max_insns; 207 | unsigned int i; 208 | struct mips_frame_info info; 209 | char * caller_name = NULL; 210 | 211 | memset(&info, 0, sizeof(info)); 212 | 213 | if (!is_usym_addr(ra)) 214 | { 215 | return; 216 | } 217 | 218 | /* looking for the position of function where ra is pointed at. */ 219 | #if 0 220 | while (index < uallsyms_num_syms) 221 | { 222 | if (uallsyms_addresses[index] <= ra && uallsyms_addresses[index + 1] > ra) 223 | { 224 | break; 225 | } 226 | index++; 227 | } 228 | //printf("index = %d\n", index); 229 | #else 230 | index = bsearch_index(uallsyms_addresses, uallsyms_num_syms, ra); 231 | #endif 232 | 233 | ip = (union mips_instruction * )uallsyms_addresses[index]; //the first instruction in function 234 | if (!ip) 235 | { 236 | return; 237 | } 238 | 239 | caller_name = get_func_name(index); 240 | printf("\t=>%s()+0x%lx\n", caller_name, ra - (unsigned long)ip); 241 | 242 | /* only search in instructions already executed. */ 243 | max_insns = (ra - (unsigned long)ip) / sizeof(union mips_instruction); 244 | if (max_insns == 0) 245 | { 246 | max_insns = 128U; /* unknown function size */ 247 | } 248 | max_insns = max_insns < 128U ? max_insns : 128U; 249 | 250 | info.func = ip; 251 | info.frame_size = 0; 252 | info.func_size = 0; //not used 253 | info.pc_offset = -1; //not used 254 | info.ra = NULL; 255 | 256 | /* find sp and ra (userspace functions use fp, so sp is not changed) */ 257 | for (i = 0; i < max_insns; i++, ip++) 258 | { 259 | if (is_jal_jalr_jr_ins(ip)) 260 | break; 261 | if (!info.frame_size) { 262 | if (is_sp_move_ins(ip)) 263 | { 264 | info.frame_size = - ip->i_format.simmediate; //size of function stack 265 | } 266 | continue; 267 | } 268 | if (info.pc_offset == -1 && is_ra_save_ins(ip)) { //find ra 269 | info.ra = (unsigned long *)(sp + ip->i_format.simmediate); 270 | break; 271 | } 272 | } 273 | 274 | /* jump to caller's stack. */ 275 | sp = sp + info.frame_size; 276 | 277 | if (caller_name) 278 | free(caller_name); 279 | 280 | if (info.ra) 281 | { 282 | print_caller(sp, *(info.ra)); 283 | } 284 | } 285 | 286 | /* print back trace functions */ 287 | void show_backtrace() 288 | { 289 | struct pt_regs regs; 290 | prepare_frametrace(®s); 291 | 292 | printf("Call trace:\n"); 293 | print_caller(regs.regs[29], regs.regs[31]); 294 | printf("\n"); 295 | } 296 | 297 | -------------------------------------------------------------------------------- /uallsyms.c: -------------------------------------------------------------------------------- 1 | /* Generate assembler source containing symbol information 2 | * 3 | * Copyright 2002 by Kai Germaschewski 4 | * 5 | * This software may be used and distributed according to the terms 6 | * of the GNU General Public License, incorporated herein by reference. 7 | * 8 | * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S 9 | * 10 | * Table compression uses all the unused char codes on the symbols and 11 | * maps these to the most used substrings (tokens). For instance, it might 12 | * map char code 0xF7 to represent "write_" and then in every symbol where 13 | * "write_" appears it can be replaced by 0xF7, saving 5 bytes. 14 | * The used codes themselves are also placed in the table so that the 15 | * decompresion can work without "special cases". 16 | * Applied to kernel symbols, this usually produces a compression ratio 17 | * of about 50%. 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #ifndef ARRAY_SIZE 27 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 28 | #endif 29 | 30 | #define KSYM_NAME_LEN 128 31 | 32 | struct sym_entry { 33 | unsigned long long addr; 34 | unsigned int len; 35 | unsigned int start_pos; 36 | unsigned char *sym; 37 | }; 38 | 39 | struct text_range { 40 | const char *stext, *etext; 41 | unsigned long long start, end; 42 | }; 43 | 44 | static unsigned long long _text; 45 | static unsigned long long _end; 46 | static int reach_fini = 0; 47 | 48 | static struct text_range text_ranges[] = { 49 | { "_stext", "_etext" }, 50 | { "_sinittext", "_einittext" }, 51 | { "_stext_l1", "_etext_l1" }, /* Blackfin on-chip L1 inst SRAM */ 52 | { "_stext_l2", "_etext_l2" }, /* Blackfin on-chip L2 SRAM */ 53 | }; 54 | #define text_range_text (&text_ranges[0]) 55 | #define text_range_inittext (&text_ranges[1]) 56 | 57 | static char *extra_symbols[] = { 58 | "uallsyms_addresses", 59 | "uallsyms_num_syms", 60 | "uallsyms_names", 61 | NULL, 62 | }; 63 | 64 | static struct sym_entry *table; 65 | static unsigned int table_size, table_cnt; 66 | static int all_symbols = 0; 67 | static char symbol_prefix_char = '\0'; 68 | 69 | #define IGNORE_FINI_SEC 70 | 71 | int token_profit[0x10000]; 72 | 73 | /* the table that holds the result of the compression */ 74 | unsigned char best_table[256][2]; 75 | unsigned char best_table_len[256]; 76 | 77 | 78 | static void usage(void) 79 | { 80 | fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=] < in.map > out.S\n"); 81 | exit(1); 82 | } 83 | 84 | /* 85 | * This ignores the intensely annoying "mapping symbols" found 86 | * in ARM ELF files: $a, $t and $d. 87 | */ 88 | static inline int is_arm_mapping_symbol(const char *str) 89 | { 90 | return str[0] == '$' && strchr("atd", str[1]) 91 | && (str[2] == '\0' || str[2] == '.'); 92 | } 93 | 94 | static int read_symbol_tr(const char *sym, unsigned long long addr) 95 | { 96 | size_t i; 97 | struct text_range *tr; 98 | 99 | for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) { 100 | tr = &text_ranges[i]; 101 | 102 | if (strcmp(sym, tr->stext) == 0) { 103 | tr->start = addr; 104 | return 0; 105 | } else if (strcmp(sym, tr->etext) == 0) { 106 | tr->end = addr; 107 | return 0; 108 | } 109 | } 110 | 111 | return 1; 112 | } 113 | 114 | static int is_extra_symbols(const char * sym) 115 | { 116 | int i; 117 | 118 | for (i = 0; extra_symbols[i]; i++) 119 | { 120 | if (strcmp(sym, extra_symbols[i]) == 0) 121 | return 1; 122 | } 123 | 124 | return 0; 125 | } 126 | 127 | static int read_symbol(FILE *in, struct sym_entry *s) 128 | { 129 | char str[500]; 130 | char *sym, stype; 131 | int rc; 132 | 133 | rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str); 134 | if (rc != 3) { 135 | if (rc != EOF) { 136 | /* skip line */ 137 | fgets(str, 500, in); 138 | } 139 | return -1; 140 | } 141 | 142 | sym = str; 143 | /* skip prefix char */ 144 | if (symbol_prefix_char && str[0] == symbol_prefix_char) 145 | sym++; 146 | 147 | /* Ignore most absolute/undefined (?) symbols. */ 148 | if (strcmp(sym, "__start") == 0) 149 | _text = s->addr; 150 | #ifdef IGNORE_FINI_SEC 151 | else if (strcmp(sym, "_fini") == 0) 152 | #else 153 | else if (strcmp(sym, "_end") == 0) 154 | #endif 155 | { 156 | /* suppose all funcs we've defined are before _fini section. */ 157 | reach_fini = 1; 158 | _end = s->addr; 159 | return -1; 160 | } 161 | 162 | else if (is_extra_symbols(sym)) 163 | { 164 | /* we don't need uallsyms_addresses, uallsyms_num_syms and uallsyms_names */ 165 | return -1; 166 | } 167 | else if (read_symbol_tr(sym, s->addr) == 0) 168 | /* nothing to do */; 169 | else if (toupper(stype) == 'A') 170 | { 171 | /* Keep these useful absolute symbols */ 172 | if (strcmp(sym, "__kernel_syscall_via_break") && 173 | strcmp(sym, "__kernel_syscall_via_epc") && 174 | strcmp(sym, "__kernel_sigtramp") && 175 | strcmp(sym, "__gp")) 176 | return -1; 177 | 178 | } 179 | else if (toupper(stype) == 'U' || 180 | is_arm_mapping_symbol(sym)) 181 | return -1; 182 | /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */ 183 | else if (str[0] == '$') 184 | return -1; 185 | /* exclude debugging symbols */ 186 | else if (stype == 'N') 187 | return -1; 188 | 189 | /* include the type field in the symbol name, so that it gets 190 | * compressed together */ 191 | s->len = strlen(str) + 1; 192 | s->sym = malloc(s->len + 1); 193 | if (!s->sym) { 194 | fprintf(stderr, "kallsyms failure: " 195 | "unable to allocate required amount of memory\n"); 196 | exit(EXIT_FAILURE); 197 | } 198 | strcpy((char *)s->sym + 1, str); 199 | s->sym[0] = stype; 200 | 201 | return 0; 202 | } 203 | 204 | static int symbol_valid_tr(struct sym_entry *s) 205 | { 206 | size_t i; 207 | struct text_range *tr; 208 | 209 | for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) { 210 | tr = &text_ranges[i]; 211 | 212 | if (s->addr >= tr->start && s->addr <= tr->end) 213 | return 1; 214 | } 215 | 216 | return 0; 217 | } 218 | 219 | static int symbol_valid(struct sym_entry *s) 220 | { 221 | /* Symbols which vary between passes. Passes 1 and 2 must have 222 | * identical symbol lists. The kallsyms_* symbols below are only added 223 | * after pass 1, they would be included in pass 2 when --all-symbols is 224 | * specified so exclude them to get a stable symbol list. 225 | */ 226 | static char *special_symbols[] = { 227 | "uallsyms_addresses", 228 | "uallsyms_num_syms", 229 | "uallsyms_names", 230 | "kallsyms_markers", 231 | "kallsyms_token_table", 232 | "kallsyms_token_index", 233 | 234 | /* Exclude linker generated symbols which vary between passes */ 235 | "_SDA_BASE_", /* ppc */ 236 | "_SDA2_BASE_", /* ppc */ 237 | NULL }; 238 | int i; 239 | int offset = 1; 240 | 241 | /* skip prefix char */ 242 | if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) 243 | offset++; 244 | 245 | /* if --all-symbols is not specified, then symbols outside the text 246 | * and inittext sections are discarded */ 247 | if (!all_symbols) { 248 | if (symbol_valid_tr(s) == 0) 249 | return 0; 250 | /* Corner case. Discard any symbols with the same value as 251 | * _etext _einittext; they can move between pass 1 and 2 when 252 | * the kallsyms data are added. If these symbols move then 253 | * they may get dropped in pass 2, which breaks the kallsyms 254 | * rules. 255 | */ 256 | if ((s->addr == text_range_text->end && 257 | strcmp((char *)s->sym + offset, text_range_text->etext)) || 258 | (s->addr == text_range_inittext->end && 259 | strcmp((char *)s->sym + offset, text_range_inittext->etext))) 260 | return 0; 261 | } 262 | 263 | /* Exclude symbols which vary between passes. */ 264 | if (strstr((char *)s->sym + offset, "_compiled.")) 265 | return 0; 266 | 267 | for (i = 0; special_symbols[i]; i++) 268 | if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 ) 269 | return 0; 270 | 271 | return 1; 272 | } 273 | 274 | static void read_map(FILE *in) 275 | { 276 | while (!feof(in)) { 277 | if (table_cnt >= table_size) { 278 | table_size += 10000; 279 | table = realloc(table, sizeof(*table) * table_size); 280 | if (!table) { 281 | fprintf(stderr, "out of memory\n"); 282 | exit (1); 283 | } 284 | } 285 | if (read_symbol(in, &table[table_cnt]) == 0) { 286 | table[table_cnt].start_pos = table_cnt; 287 | table_cnt++; 288 | } 289 | #ifdef IGNORE_FINI_SEC 290 | if (1 == reach_fini) { 291 | break; 292 | } 293 | #endif 294 | } 295 | } 296 | 297 | static void output_label(char *label) 298 | { 299 | if (symbol_prefix_char) 300 | printf(".globl %c%s\n", symbol_prefix_char, label); 301 | else 302 | printf(".globl %s\n", label); 303 | printf("\tALGN\n"); 304 | if (symbol_prefix_char) 305 | printf("%c%s:\n", symbol_prefix_char, label); 306 | else 307 | printf("%s:\n", label); 308 | } 309 | 310 | /* uncompress a compressed symbol. When this function is called, the best table 311 | * might still be compressed itself, so the function needs to be recursive */ 312 | static int expand_symbol(unsigned char *data, int len, char *result) 313 | { 314 | int c, rlen, total=0; 315 | 316 | while (len) { 317 | c = *data; 318 | /* if the table holds a single char that is the same as the one 319 | * we are looking for, then end the search */ 320 | if (best_table[c][0]==c && best_table_len[c]==1) { 321 | *result++ = c; 322 | total++; 323 | } else { 324 | /* if not, recurse and expand */ 325 | rlen = expand_symbol(best_table[c], best_table_len[c], result); 326 | total += rlen; 327 | result += rlen; 328 | } 329 | data++; 330 | len--; 331 | } 332 | *result=0; 333 | 334 | return total; 335 | } 336 | 337 | static void write_src(void) 338 | { 339 | unsigned int i, k, off; 340 | unsigned int best_idx[256]; 341 | unsigned int *markers; 342 | char buf[KSYM_NAME_LEN]; 343 | 344 | //printf("#include \n"); 345 | printf("#if BITS_PER_LONG == 64\n"); 346 | printf("#define PTR .quad\n"); 347 | printf("#define ALGN .align 8\n"); 348 | printf("#else\n"); 349 | printf("#define PTR .long\n"); 350 | printf("#define ALGN .align 4\n"); 351 | printf("#endif\n"); 352 | 353 | printf("\t.section .rodata, \"a\"\n"); 354 | 355 | /* Provide proper symbols relocatability by their '_text' 356 | * relativeness. The symbol names cannot be used to construct 357 | * normal symbol references as the list of symbols contains 358 | * symbols that are declared static and are private to their 359 | * .o files. This prevents .tmp_kallsyms.o or any other 360 | * object from referencing them. 361 | */ 362 | output_label("uallsyms_addresses"); 363 | for (i = 0; i < table_cnt; i++) { 364 | if (toupper(table[i].sym[0]) != 'A') { 365 | if (_text <= table[i].addr) 366 | printf("\tPTR\t__start + %#llx\n", 367 | table[i].addr - _text); 368 | else 369 | printf("\tPTR\t__start - %#llx\n", 370 | _text - table[i].addr); 371 | } else { 372 | printf("\tPTR\t%#llx\n", table[i].addr); 373 | } 374 | } 375 | printf("\n"); 376 | 377 | output_label("uallsyms_num_syms"); 378 | printf("\tPTR\t%d\n", table_cnt); 379 | printf("\n"); 380 | 381 | /* table of offset markers, that give the offset in the compressed stream 382 | * every 256 symbols */ 383 | markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256)); 384 | if (!markers) { 385 | fprintf(stderr, "kallsyms failure: " 386 | "unable to allocate required memory\n"); 387 | exit(EXIT_FAILURE); 388 | } 389 | 390 | output_label("uallsyms_names"); 391 | off = 0; 392 | for (i = 0; i < table_cnt; i++) { 393 | if ((i & 0xFF) == 0) 394 | markers[i >> 8] = off; 395 | 396 | printf("\t.byte 0x%02x", table[i].len); 397 | for (k = 0; k < table[i].len; k++) 398 | printf(", 0x%02x", table[i].sym[k]); 399 | printf("\n"); 400 | 401 | off += table[i].len + 1; 402 | } 403 | printf("\n"); 404 | #if 0 405 | output_label("kallsyms_markers"); 406 | for (i = 0; i < ((table_cnt + 255) >> 8); i++) 407 | printf("\tPTR\t%d\n", markers[i]); 408 | printf("\n"); 409 | 410 | free(markers); 411 | 412 | output_label("kallsyms_token_table"); 413 | off = 0; 414 | for (i = 0; i < 256; i++) { 415 | best_idx[i] = off; 416 | expand_symbol(best_table[i], best_table_len[i], buf); 417 | printf("\t.asciz\t\"%s\"\n", buf); 418 | off += strlen(buf) + 1; 419 | } 420 | printf("\n"); 421 | 422 | output_label("kallsyms_token_index"); 423 | for (i = 0; i < 256; i++) 424 | printf("\t.short\t%d\n", best_idx[i]); 425 | printf("\n"); 426 | #endif 427 | } 428 | 429 | 430 | /* table lookup compression functions */ 431 | 432 | /* count all the possible tokens in a symbol */ 433 | static void learn_symbol(unsigned char *symbol, int len) 434 | { 435 | int i; 436 | 437 | for (i = 0; i < len - 1; i++) 438 | token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; 439 | } 440 | 441 | /* decrease the count for all the possible tokens in a symbol */ 442 | static void forget_symbol(unsigned char *symbol, int len) 443 | { 444 | int i; 445 | 446 | for (i = 0; i < len - 1; i++) 447 | token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; 448 | } 449 | 450 | /* remove all the invalid symbols from the table and do the initial token count */ 451 | static void build_initial_tok_table(void) 452 | { 453 | unsigned int i, pos; 454 | 455 | pos = 0; 456 | for (i = 0; i < table_cnt; i++) { 457 | if ( symbol_valid(&table[i]) ) { 458 | if (pos != i) 459 | table[pos] = table[i]; 460 | learn_symbol(table[pos].sym, table[pos].len); 461 | pos++; 462 | } 463 | } 464 | table_cnt = pos; 465 | } 466 | 467 | static void *find_token(unsigned char *str, int len, unsigned char *token) 468 | { 469 | int i; 470 | 471 | for (i = 0; i < len - 1; i++) { 472 | if (str[i] == token[0] && str[i+1] == token[1]) 473 | return &str[i]; 474 | } 475 | return NULL; 476 | } 477 | 478 | /* replace a given token in all the valid symbols. Use the sampled symbols 479 | * to update the counts */ 480 | static void compress_symbols(unsigned char *str, int idx) 481 | { 482 | unsigned int i, len, size; 483 | unsigned char *p1, *p2; 484 | 485 | for (i = 0; i < table_cnt; i++) { 486 | 487 | len = table[i].len; 488 | p1 = table[i].sym; 489 | 490 | /* find the token on the symbol */ 491 | p2 = find_token(p1, len, str); 492 | if (!p2) continue; 493 | 494 | /* decrease the counts for this symbol's tokens */ 495 | forget_symbol(table[i].sym, len); 496 | 497 | size = len; 498 | 499 | do { 500 | *p2 = idx; 501 | p2++; 502 | size -= (p2 - p1); 503 | memmove(p2, p2 + 1, size); 504 | p1 = p2; 505 | len--; 506 | 507 | if (size < 2) break; 508 | 509 | /* find the token on the symbol */ 510 | p2 = find_token(p1, size, str); 511 | 512 | } while (p2); 513 | 514 | table[i].len = len; 515 | 516 | /* increase the counts for this symbol's new tokens */ 517 | learn_symbol(table[i].sym, len); 518 | } 519 | } 520 | 521 | /* search the token with the maximum profit */ 522 | static int find_best_token(void) 523 | { 524 | int i, best, bestprofit; 525 | 526 | bestprofit=-10000; 527 | best = 0; 528 | 529 | for (i = 0; i < 0x10000; i++) { 530 | if (token_profit[i] > bestprofit) { 531 | best = i; 532 | bestprofit = token_profit[i]; 533 | } 534 | } 535 | return best; 536 | } 537 | 538 | /* this is the core of the algorithm: calculate the "best" table */ 539 | static void optimize_result(void) 540 | { 541 | int i, best; 542 | 543 | /* using the '\0' symbol last allows compress_symbols to use standard 544 | * fast string functions */ 545 | for (i = 255; i >= 0; i--) { 546 | 547 | /* if this table slot is empty (it is not used by an actual 548 | * original char code */ 549 | if (!best_table_len[i]) { 550 | 551 | /* find the token with the breates profit value */ 552 | best = find_best_token(); 553 | 554 | /* place it in the "best" table */ 555 | best_table_len[i] = 2; 556 | best_table[i][0] = best & 0xFF; 557 | best_table[i][1] = (best >> 8) & 0xFF; 558 | 559 | /* replace this token in all the valid symbols */ 560 | compress_symbols(best_table[i], i); 561 | } 562 | } 563 | } 564 | 565 | /* start by placing the symbols that are actually used on the table */ 566 | static void insert_real_symbols_in_table(void) 567 | { 568 | unsigned int i, j, c; 569 | 570 | memset(best_table, 0, sizeof(best_table)); 571 | memset(best_table_len, 0, sizeof(best_table_len)); 572 | 573 | for (i = 0; i < table_cnt; i++) { 574 | for (j = 0; j < table[i].len; j++) { 575 | c = table[i].sym[j]; 576 | best_table[c][0]=c; 577 | best_table_len[c]=1; 578 | } 579 | } 580 | } 581 | 582 | static void optimize_token_table(void) 583 | { 584 | build_initial_tok_table(); 585 | 586 | insert_real_symbols_in_table(); 587 | 588 | /* When valid symbol is not registered, exit to error */ 589 | if (!table_cnt) { 590 | fprintf(stderr, "No valid symbol.\n"); 591 | exit(1); 592 | } 593 | 594 | optimize_result(); 595 | } 596 | 597 | /* guess for "linker script provide" symbol */ 598 | static int may_be_linker_script_provide_symbol(const struct sym_entry *se) 599 | { 600 | const char *symbol = (char *)se->sym + 1; 601 | int len = se->len - 1; 602 | 603 | if (len < 8) 604 | return 0; 605 | 606 | if (symbol[0] != '_' || symbol[1] != '_') 607 | return 0; 608 | 609 | /* __start_XXXXX */ 610 | if (!memcmp(symbol + 2, "start_", 6)) 611 | return 1; 612 | 613 | /* __stop_XXXXX */ 614 | if (!memcmp(symbol + 2, "stop_", 5)) 615 | return 1; 616 | 617 | /* __end_XXXXX */ 618 | if (!memcmp(symbol + 2, "end_", 4)) 619 | return 1; 620 | 621 | /* __XXXXX_start */ 622 | if (!memcmp(symbol + len - 6, "_start", 6)) 623 | return 1; 624 | 625 | /* __XXXXX_end */ 626 | if (!memcmp(symbol + len - 4, "_end", 4)) 627 | return 1; 628 | 629 | return 0; 630 | } 631 | 632 | static int prefix_underscores_count(const char *str) 633 | { 634 | const char *tail = str; 635 | 636 | while (*tail != '_') 637 | tail++; 638 | 639 | return tail - str; 640 | } 641 | 642 | static int compare_symbols(const void *a, const void *b) 643 | { 644 | const struct sym_entry *sa; 645 | const struct sym_entry *sb; 646 | int wa, wb; 647 | 648 | sa = a; 649 | sb = b; 650 | 651 | /* sort by address first */ 652 | if (sa->addr > sb->addr) 653 | return 1; 654 | if (sa->addr < sb->addr) 655 | return -1; 656 | 657 | /* sort by "weakness" type */ 658 | wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W'); 659 | wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W'); 660 | if (wa != wb) 661 | return wa - wb; 662 | 663 | /* sort by "linker script provide" type */ 664 | wa = may_be_linker_script_provide_symbol(sa); 665 | wb = may_be_linker_script_provide_symbol(sb); 666 | if (wa != wb) 667 | return wa - wb; 668 | 669 | /* sort by the number of prefix underscores */ 670 | wa = prefix_underscores_count((const char *)sa->sym + 1); 671 | wb = prefix_underscores_count((const char *)sb->sym + 1); 672 | if (wa != wb) 673 | return wa - wb; 674 | 675 | /* sort by initial order, so that other symbols are left undisturbed */ 676 | return sa->start_pos - sb->start_pos; 677 | } 678 | 679 | static void sort_symbols(void) 680 | { 681 | qsort(table, table_cnt, sizeof(struct sym_entry), compare_symbols); 682 | } 683 | 684 | int main(int argc, char **argv) 685 | { 686 | if (argc >= 2) { 687 | int i; 688 | for (i = 1; i < argc; i++) { 689 | if(strcmp(argv[i], "--all-symbols") == 0) 690 | all_symbols = 1; 691 | else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) { 692 | char *p = &argv[i][16]; 693 | /* skip quote */ 694 | if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) 695 | p++; 696 | symbol_prefix_char = *p; 697 | } else 698 | usage(); 699 | } 700 | } else if (argc != 1) 701 | usage(); 702 | 703 | read_map(stdin); 704 | sort_symbols(); 705 | //optimize_token_table(); 706 | write_src(); 707 | 708 | return 0; 709 | } 710 | 711 | --------------------------------------------------------------------------------