├── LICENSE ├── Makefile ├── README ├── auto_crash.c ├── auto_test.c ├── crash.c ├── crash.h ├── crash_msg.h ├── crashd.c ├── naive.c └── test.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2010 Gilad Ben-Yossef 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Add debug information and asserts? 3 | DEBUG_BUILD ?= y 4 | 5 | # Build multi threaded version of the library? 6 | USE_THREADS ?= y 7 | 8 | # Location and prefix of cross compiler, Linux kernel style 9 | CROSS_COMPILE ?= 10 | 11 | AS = $(CROSS_COMPILE)as 12 | LD = $(CROSS_COMPILE)ld 13 | CC = $(CROSS_COMPILE)gcc 14 | CPP = $(CC) -E 15 | AR = $(CROSS_COMPILE)ar 16 | NM = $(CROSS_COMPILE)nm 17 | STRIP = $(CROSS_COMPILE)strip 18 | OBJCOPY = $(CROSS_COMPILE)objcopy 19 | OBJDUMP = $(CROSS_COMPILE)objdump 20 | 21 | 22 | CFLAGS := -Wall 23 | LDFLAGS := 24 | 25 | LIBS_CFLAGS := $(CFLAGS) 26 | LIBS_LDFLAGS := $(LDLAGS) -lrt -shared 27 | 28 | ifdef USE_THREADS 29 | LIBS_CFLAGS += -pthread -D_REENTRANT -DUSE_THREADS 30 | LIBS_LDFLAGS += -pthread 31 | endif 32 | 33 | ifdef DEBUG_BUILD 34 | LIBS_CFLAGS += -g 35 | else 36 | LIBS_CFLAGS += -DNDEBUG 37 | endif 38 | 39 | TESTS_CFLAGS = $(CFLAGS) -g -pthread -D_REENTRANT 40 | TESTS_LDFLAGS = $(LDFLAGS) -rdynamic -lrt 41 | 42 | LIB_API_VERSION = 1 43 | LIB_VERSION = 1.0.0 44 | 45 | .PHONY: all clean libs tests dist 46 | 47 | all: libs tests 48 | 49 | # The regular and automagic version of the library 50 | 51 | libs: libcrash.so.$(LIB_VERSION) libauto_crash.so.$(LIB_VERSION) 52 | 53 | libcrash.so.$(LIB_VERSION): crash.c crashd.c crash.h crash_msg.h Makefile 54 | $(CC) -fPIC crash.c crashd.c $(LIBS_CFLAGS) $(LIBS_LDFLAGS) \ 55 | -Wl,-soname,libcrash.so.$(LIB_API_VERSION) -o libcrash.so.$(LIB_VERSION) 56 | 57 | libauto_crash.so.$(LIB_VERSION): auto_crash.c libcrash.so.$(LIB_VERSION) crash.h Makefile 58 | $(CC) -fPIC auto_crash.c libcrash.so.$(LIB_VERSION) $(LIBS_CFLAGS) $(LIBS_LDFLAGS) \ 59 | -Wl,-soname,libauto_crash.so.$(LIB_API_VERSION) -o libauto_crash.so.$(LIB_VERSION) 60 | 61 | 62 | ifdef USE_THREADS 63 | 64 | # Unit tests. 65 | # Make sure to ldconfig -n `pwd` before running 66 | 67 | tests: test auto_test naive_test 68 | 69 | test: libcrash.so.$(LIB_VERSION) test.c crash.c crashd.c crash.h crash_msg.h Makefile 70 | $(CC) test.c crash.c crashd.c $(TESTS_CFLAGS) $(TESTS_LDFLAGS) -o test 71 | 72 | auto_test: libauto_crash.so.$(LIB_VERSION) auto_test.c crash.h crash_msg.h Makefile 73 | $(CC) auto_test.c $(TESTS_CFLAGS) $(TESTS_LDFLAGS) -o auto_test 74 | 75 | naive_test: naive.c Makefile 76 | $(CC) naive.c $(TESTS_CFLAGS) $(TESTS_LDFLAGS) -o naive_test 77 | 78 | else 79 | 80 | tests: 81 | @echo Tests not built due to lack of thread support. 82 | endif 83 | 84 | dist: clean 85 | rm -f libcrash-$(LIB_VERSION).tar.bz2 && \ 86 | rm -rf libcrash-$(LIB_VERSION) && \ 87 | mkdir libcrash-$(LIB_VERSION) && \ 88 | cp *.c *.h Makefile README libcrash-$(LIB_VERSION) && \ 89 | tar jcvf libcrash-$(LIB_VERSION).tar.bz2 libcrash-$(LIB_VERSION) 90 | rm -rf libcrash-$(LIB_VERSION) 91 | 92 | clean: 93 | rm -f *.o naive_test auto_test test *~ libcrash.so.$(LIB_VERSION) \ 94 | libauto_crash.so.$(LIB_VERSION) 95 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | libcrash 3 | 4 | Building 5 | ~~~~~~~~ 6 | 7 | # make 8 | 9 | or 10 | 11 | # CROSS_COMPILE=/path/to/toolchain/toolchain_refix- make 12 | 13 | Testing 14 | ~~~~~~~ 15 | 16 | Two tests and one utility program for testing are provided. 17 | 18 | test: normal applciation test (application calls 19 | registration function). 20 | 21 | auto_test: automagic usage test (library loads magically 22 | with unaltered program) 23 | 24 | To run the tests - 25 | 26 | for normal usage: 27 | 28 | # ./test 29 | 30 | for automagicall usage: 31 | 32 | # ldconfig -n . 33 | # LD_PRELOAD=libauto_crash.so.1 ./auto_test 34 | 35 | Usage 36 | ~~~~~ 37 | 38 | Have your program link with libcrash.so.1 and call register_crash_handler() once 39 | for each process (not thread): 40 | 41 | int register_crash_handler( 42 | const char * process_name, /* Name of Process, such as argv[0] */ 43 | unsigned char * assert_buf_ptr /* Pointer to program assert buffer */ 44 | ); 45 | 46 | Re-call after fork(). 47 | 48 | The registrtation functrion return 0 if all is well, and a negative number 49 | with the value of the errno function that cause it to fail otherwise. 50 | 51 | See test.c for usage example. 52 | 53 | Normally running the program should then use the library and we'll send crash info to 54 | tdout 55 | 56 | Alternatibly, you can use 57 | # LD_PRELOAD=libauto_crash.so.1 ./program 58 | 59 | To run the library with an unchanged (but non SUID) binary program with no source alterations. 60 | 61 | Happy hacking, 62 | gby 63 | 64 | -------------------------------------------------------------------------------- /auto_crash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "crash.h" 3 | 4 | /* OK, we want a way to call the register function (not to mention load the library) 5 | * without messing with source files and re-compiling. Here how it works: 6 | * 7 | * We create a library with a single init function that calls the registration for us 8 | * using a function constructor hook and we'll force the dynamic linker to loda it 9 | * in run time using LD_PRELOAD magic. 10 | * 11 | * Pretty, clever, isn't it? :-) 12 | */ 13 | 14 | void __attribute__ ((constructor)) debug_auto_init(void); 15 | 16 | /* Buffer for assert info that we need to supply */ 17 | static unsigned char buf[128]; 18 | 19 | void debug_auto_init(void) { 20 | 21 | /* Print some meaningfull message so they'll know that we're running */ 22 | fprintf(stderr, "\n*** libcrash automagically loaded. Registering..."); 23 | 24 | /* Blast us if we know what's the process name. We just use [AUTOMAGIC] */ 25 | 26 | if(register_crash_handler("[AUTOMAGIC]", buf)) 27 | fprintf(stderr, "Failed!\n"); 28 | else 29 | fprintf(stderr, "Success.\n"); 30 | 31 | return; 32 | } 33 | 34 | /* THE END */ 35 | -------------------------------------------------------------------------------- /auto_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void print_message_function (void *ptr); 8 | 9 | 10 | int 11 | main (void) 12 | { 13 | pthread_t thread1; 14 | pthread_t thread2; 15 | 16 | printf("Starting first run\n"); 17 | 18 | pthread_create (&thread1, NULL, (void *) &print_message_function, NULL); 19 | 20 | pthread_create (&thread2, NULL, (void *) &print_message_function, NULL); 21 | 22 | pthread_join (thread2, NULL); 23 | 24 | /* Not reached */ 25 | printf("This should never happen!\n"); 26 | pthread_join (thread1, NULL); 27 | 28 | return 0; 29 | } 30 | 31 | 32 | void 33 | croak (void) 34 | { 35 | int *ip = (int *) 17; 36 | char * p = sbrk(0); 37 | 38 | memset(p-1024, 42, 1024); 39 | write(3000, "xxx", 3); 40 | *ip = 7; 41 | } 42 | 43 | 44 | void 45 | die (void) 46 | { 47 | volatile int i= 12; 48 | croak (); 49 | i++; 50 | return; 51 | } 52 | 53 | 54 | void 55 | print_message_function(void *dummy) 56 | { 57 | 58 | printf ("Morituri te salutant!\n"); 59 | die (); 60 | 61 | pthread_exit (0); 62 | } 63 | -------------------------------------------------------------------------------- /crash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #define __USE_GNU /* Needed to get REG_EIP from ucontext.h */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #ifdef USE_THREADS 18 | #include 19 | #define _XOPEN_SOURCE 600 20 | #include 21 | #endif /* USE_THREADS */ 22 | 23 | #include "crash.h" 24 | #include "crash_msg.h" 25 | 26 | /* Global static crash message buffer */ 27 | static struct crash_message_struct g_crash_msg; 28 | 29 | /* Pipe file descriptor to crashd */ 30 | static int g_logfd = -1; 31 | 32 | /* Pointer to global assert info, supplied during registration */ 33 | static unsigned char * g_assert_buf_ptr = NULL; 34 | 35 | #ifdef USE_THREADS 36 | /* Spinlock protecting access to the fault handler in multi-threaded setups */ 37 | static pthread_spinlock_t g_thread_lock; 38 | #endif /* USE_THREADS */ 39 | 40 | /* gettid in non offical so not in glibc headers. This works though */ 41 | inline pid_t gettid (void) 42 | { 43 | return syscall(__NR_gettid); 44 | } 45 | 46 | /* Get a backtrace from a signal handler. 47 | * array is place to put array 48 | * size is it's size 49 | * context is a pointer to the mysterious signal ahndler 3rd parameter with the registers 50 | * distance is the distance is calls from the signal handler 51 | * 52 | */ 53 | inline unsigned int signal_backtrace(void ** array, unsigned int size, ucontext_t * context, unsigned int distance) { 54 | 55 | /* WARNING: If you ever remove the inline from the function prototype, 56 | * adjust this to match!!! 57 | */ 58 | #define IP_STACK_FRAME_NUMBER (3) 59 | 60 | unsigned int ret = backtrace(array, size); 61 | distance += IP_STACK_FRAME_NUMBER; 62 | 63 | assert(distance <= size); 64 | 65 | /* OK, here is the tricky part: 66 | * 67 | * Linux signal handling on some archs works by the kernel replacing, in situ, the 68 | * return address of the faulting function on the faulting thread user space stack with 69 | * that of the Glibc signal unwind handling routine and coercing user space to just to 70 | * glibc signal handler preamble. Later the signal unwind handling routine undo this. 71 | * 72 | * What this means for us is that the backtrace we get is missing the single most important 73 | * bit of information: the addres of the faulting function. 74 | * 75 | * We get it back using the undocumented 3rs parameter to the signal handler call back 76 | * with used in it's SA_SIGINFO form which contains access to the registers kept during 77 | * the fault. We grab the IP from there and 'fix' the backtrace. 78 | * 79 | * This needs to be different per arch, of course. 80 | */ 81 | 82 | #ifdef __i386__ 83 | array[distance] = (void *)(context->uc_mcontext.gregs[REG_EIP]); 84 | #endif /* __i386__ */ 85 | 86 | #ifdef __PPC__ 87 | array[distance] = (void *)(context->uc_mcontext.regs->nip); 88 | #endif /* __PPC__ */ 89 | 90 | return ret; 91 | } 92 | 93 | 94 | /* The fault handler function. 95 | * 96 | * OK. The rules of the battle are those: 97 | * 98 | * 1. Can't use any function that relies on malloc and friends working as the malloc arena may be corrupt. 99 | * 2. Can only use a the POSIX.1-2003 list of async-safe functions. 100 | * 3. Some of the functions on the list are not always safe (like fork when atfork() is used), 101 | * so need to avoid these also. 102 | * 4. No locking allowed. We don't know in what state the process/thread was when the exception 103 | * occured. 104 | */ 105 | void fault_handler (int signal, siginfo_t * siginfo, void *context) 106 | { 107 | int i, ret; 108 | 109 | #ifdef USE_THREADS 110 | 111 | ret = pthread_spin_trylock(&g_thread_lock); 112 | 113 | if (EBUSY == ret) { 114 | /* Think of the following as an async-signal safe super sched_yield that 115 | * yields even to threads with lower real-time priority */ 116 | sigset_t smask; 117 | sigemptyset(&smask); 118 | pselect(0, NULL, NULL, NULL, NULL, &smask); 119 | } 120 | 121 | #endif /* USE_THREADS */ 122 | 123 | /* Get the backtrace. See signal_backtrace for the parameters */ 124 | 125 | g_crash_msg.num_backtrace_frames = signal_backtrace(g_crash_msg.backtrace, 126 | CRASH_MAX_BACKTRACE_DEPTH, context, 0); 127 | 128 | /* Grab the kernel thread id. Because signal handler are shared between all 129 | * threads of the same process, this can only be doen in fault time. */ 130 | 131 | g_crash_msg.thread_id = gettid(); 132 | 133 | /* Grab the signal number */ 134 | g_crash_msg.signal_number = signal; 135 | 136 | /* Grab time stamp */ 137 | clock_gettime(CLOCK_REALTIME, &g_crash_msg.timestamp); 138 | 139 | /* Copy the assert buffer without using strings.h fucntions. */ 140 | for(i=0; i< CRASH_ASSERT_BUFFER_SIZE; ++i) { 141 | g_crash_msg.assert_buffer[i] = *(g_assert_buf_ptr++); 142 | } 143 | 144 | if (siginfo) /* No reasons for this to be NULL, but still... */ 145 | { 146 | /* See description of these in crash_msg.h */ 147 | g_crash_msg.signal_code = siginfo->si_code; 148 | g_crash_msg.fault_address = siginfo->si_addr; 149 | g_crash_msg.signal_errno = siginfo->si_errno; 150 | g_crash_msg.handler_errno = errno; 151 | } 152 | 153 | retry_write: 154 | 155 | ret = write(g_logfd, &g_crash_msg, sizeof(g_crash_msg)); 156 | 157 | /* If we got interrupt by a signal, retry the write. 158 | * This shouldn't really happen since we mask all signals 159 | * during the handler run via sigaction sa_mask field but 160 | * it can't hurt to test. 161 | * 162 | * It's useless to test for any other condition since we 163 | * can't do anything if we fail 164 | */ 165 | if(ret && EINTR==errno) goto retry_write; 166 | 167 | /* We use backtrace_symbols_fd rather then backtrace_symbols since 168 | * the latter uses malloc to allocate memory and if we got here 169 | * because of malloc arena curroption we'll double fault. 170 | */ 171 | backtrace_symbols_fd(g_crash_msg.backtrace, g_crash_msg.num_backtrace_frames, g_logfd); 172 | 173 | close(g_logfd); 174 | 175 | /* Produce a core dump for post morteum debugging */ 176 | abort(); 177 | 178 | assert(0 /* Not Reached */); 179 | 180 | return; 181 | } 182 | 183 | /* Set the FD_CLOEXEC flag of desc if value is nonzero, 184 | or clear the flag if value is 0. 185 | Return 0 on success, or -1 on error with errno set. */ 186 | 187 | int set_cloexec_flag (int desc, int value) 188 | { 189 | int oldflags = fcntl(desc, F_GETFD, 0); 190 | 191 | /* If reading the flags failed, return error indication now. */ 192 | if (oldflags < 0) { 193 | return oldflags; 194 | } 195 | 196 | /* Set just the flag we want to set. */ 197 | if (value != 0) { 198 | oldflags |= FD_CLOEXEC; 199 | } else { 200 | oldflags &= ~FD_CLOEXEC; 201 | } 202 | 203 | /* Store modified flag word in the descriptor. */ 204 | return fcntl(desc, F_SETFD, oldflags); 205 | } 206 | 207 | /* Registration function. Needs to be called once by each process (not thread) 208 | * process_name is argv[0] or whatever you'd like. 209 | * assert_buf_ptr needs to point to the 128 byte assert buffer. 210 | * */ 211 | int register_crash_handler(const char * process_name, unsigned char * assert_buf_ptr) 212 | { 213 | struct sigaction act; /* Signal handler register struct */ 214 | int ret; /* Return value for various calls */ 215 | int pfd[2]; /* Pipe file descriptor array */ 216 | 217 | /* See ahead about these two: */ 218 | void * dummy_trace_array[1]; 219 | unsigned int dummy_trace_size; 220 | 221 | assert(sizeof(g_crash_msg) <= CRASH_MAX_MSG_SIZE); 222 | 223 | if(!process_name || !assert_buf_ptr) { 224 | return EINVAL; 225 | } 226 | 227 | #ifdef USE_THREADS 228 | 229 | ret = pthread_spin_init(&g_thread_lock, 0); 230 | if(ret) { 231 | return ret; 232 | } 233 | #endif /* USE_THREADS */ 234 | 235 | 236 | /* If we're called again (perhaps after a fork() ), the pipe is already open. 237 | * That's just fine with us */ 238 | 239 | if(-1 == g_logfd) { 240 | 241 | /* Grab us a pipe to communicate with our crash daemon */ 242 | ret = pipe(pfd); 243 | 244 | if(-1 == ret) { 245 | return errno; 246 | } 247 | 248 | g_logfd = pfd[1]; /* Grab the write end of the pipe */ 249 | 250 | /* If the caller program execs, we want the pipe to close, 251 | * because it's not likely a random program will have the 252 | * right signal handler set to use the crash daemon. */ 253 | ret = set_cloexec_flag(g_logfd, 1); 254 | 255 | if(-1 == ret) { 256 | return errno; 257 | } 258 | 259 | /* Set our daemon up */ 260 | crashd_main(1, process_name, pfd); 261 | 262 | close(pfd[0]); 263 | 264 | } 265 | 266 | /* This requires some explaining: 267 | * In theory, neither backtrace nor backtrace_symbold_fd call malloc and friends so 268 | * we are able to use them in a an exception handler safely. 269 | * 270 | * In practice recent glibc versions put these function in a seperate shared library 271 | * called libgcc_s.so when gets loaded automagically by the dynamic linker when any these 272 | * of these functions are first used and, you guessed it, the dynamic linker uses malloc 273 | * in the process to get some internal buffer. 274 | * 275 | * We therefore give these a dummy call here during registration to assure that the library 276 | * gets loaded where it's safe to malloc. 277 | */ 278 | 279 | dummy_trace_size = backtrace(dummy_trace_array, 1); 280 | backtrace_symbols_fd (dummy_trace_array, dummy_trace_size, -1); 281 | 282 | /* This data we can already grab during registration, not need to wait for crash */ 283 | g_crash_msg.magic = CRASH_MSG_MAGIC; 284 | memcpy(g_crash_msg.process_name, process_name, strnlen(process_name, CRASH_MAX_PROCESS_NAME_SIZE)+1); 285 | g_crash_msg.process_id = getpid(); 286 | 287 | g_assert_buf_ptr = assert_buf_ptr; 288 | 289 | /* Prepare a sigaction struct for exception handler registrations */ 290 | memset(&act, 0, sizeof (act)); 291 | act.sa_sigaction = fault_handler; 292 | /* No signals during handler run, please */ 293 | sigfillset (&act.sa_mask); 294 | /* We want the 3 parameter form of the handler with the siginfo_t addtional data */ 295 | act.sa_flags = SA_SIGINFO; 296 | 297 | 298 | /* Register the handler for all exception signals. */ 299 | ret = sigaction (SIGSEGV, &act, NULL); 300 | ret |= sigaction (SIGILL, &act, NULL); 301 | ret |= sigaction (SIGFPE, &act, NULL); 302 | ret |= sigaction (SIGBUS, &act, NULL); 303 | ret |= sigaction (SIGQUIT, &act, NULL); 304 | 305 | 306 | return ret; 307 | } 308 | -------------------------------------------------------------------------------- /crash.h: -------------------------------------------------------------------------------- 1 | #ifndef CRASH_H 2 | #define CRASH_H 3 | 4 | #include 5 | 6 | /* Main library registration function. 7 | * Call this once for each process (not thread). Recall after fork() if you want 8 | * process name and ID to be updated 9 | * 10 | */ 11 | int register_crash_handler( 12 | const char * process_name, /* Name of Process, such as argv[0] */ 13 | unsigned char * assert_buf_ptr /* Pointer to assert buffer */ 14 | ); 15 | 16 | /* Asks this thread to dump. You can use this for asserts. */ 17 | int static inline crash_dump(void) { 18 | asm volatile ("" : : : "memory"); 19 | return raise(SIGQUIT); 20 | } 21 | 22 | #ifdef USE_THREADS 23 | 24 | /* Ask some other thread to dump. You can use this for asserts. */ 25 | int static inline crash_dump_thread(pthread_t thread) { 26 | asm volatile ("" : : : "memory"); 27 | return pthread_kill(thread, SIGUSR1); 28 | } 29 | #endif /* USE_THREADS */ 30 | 31 | 32 | #endif /* CRASH_H */ 33 | 34 | -------------------------------------------------------------------------------- /crash_msg.h: -------------------------------------------------------------------------------- 1 | #ifndef CRASH_MSG_H_ 2 | #define CRASH_MSG_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | struct crash_message_struct; 9 | 10 | #define CRASH_MAX_PROCESS_NAME_SIZE (12) 11 | #define CRASH_MAX_BACKTRACE_DEPTH (25) 12 | #define CRASH_ASSERT_BUFFER_SIZE (128) 13 | #define CRASH_MAX_MSG_SIZE (4096) 14 | #define CRASH_MSG_MAGIC (0xdeadbeefUL) 15 | #define CRASH_ANCILLARY_DATA_SIZE (CRASH_MAX_MSG_SIZE - sizeof(struct crash_message_struct)) 16 | 17 | /* Dead man switch: we wait this many seconds since first being tickled 18 | * until we continue processing crash, whether full crash information 19 | * has been received or not 20 | * */ 21 | #define READ_TIMEOUT (10) 22 | 23 | void crashd_main(char daemonise_flag, const char * progname, int pfd[]); 24 | 25 | 26 | struct crash_message_struct { 27 | /* Magic number */ 28 | unsigned long magic; 29 | /* Process name as passed during registration */ 30 | char process_name[CRASH_MAX_PROCESS_NAME_SIZE]; 31 | /* The process PID */ 32 | pid_t process_id; 33 | /* The process kernel thread id (struct task -> pid), NOT pthread_self() */ 34 | pid_t thread_id; 35 | /* The number of the exception signal */ 36 | unsigned int signal_number; 37 | /* The signal code from siginfo_t. Provides exception reason */ 38 | unsigned int signal_code; 39 | /* Fault address, if relevant */ 40 | void * fault_address; 41 | /* The last error as reported via siginfo_t. Seems to be always 0 */ 42 | unsigned int signal_errno; 43 | /* The last error in errno when the exception handler got called. */ 44 | unsigned int handler_errno; 45 | /* Number of stack frames we got */ 46 | size_t num_backtrace_frames; 47 | /* A time stamp */ 48 | struct timespec timestamp; 49 | /* Buffer for assert data */ 50 | unsigned char assert_buffer[CRASH_ASSERT_BUFFER_SIZE]; 51 | /* Stack backtrace */ 52 | void *backtrace[CRASH_MAX_BACKTRACE_DEPTH]; 53 | /* Place holder for ancillary data, such as symbol trace sent as seperate message */ 54 | char ancillary_data[0]; 55 | }; 56 | 57 | #endif /* CRASH_MSG_H_*/ 58 | -------------------------------------------------------------------------------- /crashd.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "crash_msg.h" 18 | 19 | /* The buffer holds the message + ancillary data, such as symbol stack traced 20 | * The pointer is used to cash the buffer to the header */ 21 | static char crash_msg_buf[CRASH_MAX_MSG_SIZE]; 22 | static struct crash_message_struct * crash_msg = (struct crash_message_struct *)&crash_msg_buf; 23 | 24 | /* A simple compiler only memory barrier, both read and write */ 25 | #define mb(x) asm volatile ("" : : : "memory") 26 | 27 | /* When this is set from SIGTERM signal handler it's 28 | * time to terminate. 29 | * 30 | * NOTE: It's a very good idea to kill crashd before any process 31 | * relying on it for exception handling. 32 | */ 33 | static char terminate_flag = 0; 34 | 35 | /* When this is set from the SIGALRM signal handler 36 | * it means our time to read crash details is out 37 | */ 38 | 39 | static char timeout_flag = 0; 40 | 41 | /* This translates a signal code into a readable string */ 42 | static inline char * code2str(int code, int signal) { 43 | 44 | switch(code) { 45 | case SI_USER: 46 | return "kill, sigsend or raise "; 47 | case SI_KERNEL: 48 | return "kernel"; 49 | case SI_QUEUE: 50 | return "sigqueue"; 51 | } 52 | 53 | if(SIGILL==signal) switch(code) { 54 | case ILL_ILLOPC: 55 | return "illegal opcode"; 56 | case ILL_ILLOPN: 57 | return "illegal operand"; 58 | case ILL_ILLADR: 59 | return "illegal addressing mode"; 60 | case ILL_ILLTRP: 61 | return "illegal trap"; 62 | case ILL_PRVOPC: 63 | return "privileged register"; 64 | case ILL_COPROC: 65 | return "coprocessor error"; 66 | case ILL_BADSTK: 67 | return "internal stack error"; 68 | } 69 | 70 | if(SIGFPE==signal) switch(code) { 71 | case FPE_INTDIV: 72 | return "integer divide by zero"; 73 | case FPE_INTOVF: 74 | return "integer overflow"; 75 | case FPE_FLTDIV: 76 | return "floating point divide by zero"; 77 | case FPE_FLTOVF: 78 | return "floating point overflow"; 79 | case FPE_FLTUND: 80 | return "floating point underflow"; 81 | case FPE_FLTRES: 82 | return "floating point inexact result"; 83 | case FPE_FLTINV: 84 | return "floating point invalid operation"; 85 | case FPE_FLTSUB: 86 | return "subscript out of range"; 87 | } 88 | 89 | if(SIGSEGV==signal) switch(code) { 90 | case SEGV_MAPERR: 91 | return "address not mapped to object"; 92 | case SEGV_ACCERR: 93 | return "invalid permissions for mapped object"; 94 | } 95 | 96 | if(SIGBUS==signal) switch(code) { 97 | case BUS_ADRALN: 98 | return "invalid address alignment"; 99 | case BUS_ADRERR: 100 | return "non-existent physical address"; 101 | case BUS_OBJERR: 102 | return "object specific hardware error"; 103 | } 104 | 105 | if(SIGTRAP==signal) switch(code) { 106 | case TRAP_BRKPT: 107 | return "process breakpoint"; 108 | case TRAP_TRACE: 109 | return "process trace trap"; 110 | } 111 | 112 | return "Unhandled signal handler"; 113 | } 114 | 115 | /* Call this to reboot. Production version must be asaync-signal safe */ 116 | static void inline do_reboot(void) { 117 | 118 | #ifdef NDEBUG 119 | 120 | char * reboot_argv[] = { "reboot", NULL}; 121 | char * reboot_env[] = {NULL}; 122 | 123 | execve("/sbin/reboot", reboot_argv, reboot_env); 124 | 125 | #else /* NDEBUG */ 126 | 127 | fprintf(stderr, "Boo!!! would have rebooted but running in debug mode. Have a nice day.\n"); 128 | exit(3); 129 | 130 | #endif /* NDEBUG */ 131 | 132 | /* NOT REACHED */ 133 | return; 134 | } 135 | 136 | /* Handle the crash data 137 | * This is just an example: it speqs the entire message to stderr in human readable form 138 | */ 139 | static void handle_crash(void) { 140 | 141 | int i; 142 | 143 | assert(crash_msg != NULL); 144 | assert(sizeof(crash_msg->assert_buffer[0])==sizeof(unsigned char)); 145 | 146 | 147 | fprintf (stderr, 148 | "\n********************************" 149 | "\n* EXCEPTION CAUGHT *" 150 | "\n********************************\n" 151 | "Process name: %s\n" 152 | "Process ID: %d\n" 153 | "Thread ID: %d\n" 154 | "Exception: %s\n" 155 | "Reason: %s\n" 156 | "Fault Address: %p\n" 157 | "Signal error: %s\n" 158 | "Last error: %s\n" 159 | "Time stamp: %s" 160 | "Assert buffer: %s", 161 | crash_msg->process_name, 162 | crash_msg->process_id, 163 | crash_msg->thread_id, 164 | strsignal(crash_msg->signal_number), 165 | code2str(crash_msg->signal_code, crash_msg->signal_number), 166 | crash_msg->fault_address, 167 | strerror(crash_msg->signal_errno), 168 | strerror(crash_msg->handler_errno), 169 | ctime(&(crash_msg->timestamp.tv_sec)), 170 | crash_msg->assert_buffer 171 | 172 | ); 173 | 174 | fprintf(stderr, "\nStack trace addresses:\n"); 175 | for(i=0; i< crash_msg->num_backtrace_frames; ++i) { 176 | fprintf(stderr, "[%d] %p\n", i, crash_msg->backtrace[i]); 177 | } 178 | 179 | fprintf(stderr, "\nAncillary data follows:\n"); 180 | fprintf(stderr, "%s\n",crash_msg->ancillary_data); 181 | fflush(NULL); 182 | 183 | return; 184 | } 185 | 186 | static void term_sig_handler(int signal) { 187 | 188 | terminate_flag = 1; 189 | mb(); 190 | return; 191 | } 192 | 193 | /* Timeout reading crash data */ 194 | static void alarm_sig_handler(int signal) { 195 | 196 | timeout_flag = 1; 197 | mb(); 198 | return; 199 | } 200 | 201 | /* Our very own fault handler. 202 | * If we ever got it it means something is very very wrong. 203 | * Trying to save debug info is useless. We probably got here 204 | * because of a fault when processing some crash. The chances 205 | * are very slim that we'll be able to save any meaningfull 206 | * data and we risk getting stuck instead of resetting the system, 207 | * so we just reboot 208 | */ 209 | static void fault_sig_handler(int signal) { 210 | 211 | do_reboot(); 212 | return; 213 | } 214 | 215 | 216 | /* Utility function to register a simple signal handler with no flags 217 | * (as opposed to signal(2)) 218 | */ 219 | static int register_signal(int signo, sighandler_t handler) { 220 | 221 | struct sigaction act; 222 | 223 | memset(&act, 0, sizeof (act)); 224 | act.sa_handler = handler; 225 | sigemptyset (&act.sa_mask); 226 | act.sa_flags = 0; 227 | 228 | return sigaction (signo, &act, NULL); 229 | } 230 | 231 | /* The main deal */ 232 | void crashd_main(char daemonise_flag, const char * progname, int pfd[]) 233 | { 234 | int ret, fd; 235 | char * p = crash_msg_buf; 236 | int remaining_bytes = CRASH_MAX_MSG_SIZE; 237 | fd_set rfds; 238 | 239 | ret = fork(); 240 | 241 | if(ret) { 242 | return; 243 | } else { 244 | close(pfd[1]); 245 | fd = pfd[0]; 246 | } 247 | 248 | /* This forks again, closing stdin/out/err and loose our TTY, if asked to. */ 249 | if(daemonise_flag) { 250 | ret = daemon(0, 1); 251 | if(-1==ret) goto bail_out; 252 | } 253 | 254 | /* Register all signal handlers for timeout, kill and fault */ 255 | ret = register_signal(SIGTERM, term_sig_handler); 256 | if(-1 == ret) goto bail_out; 257 | 258 | ret = register_signal(SIGALRM, alarm_sig_handler); 259 | if(-1 == ret) goto bail_out; 260 | 261 | ret = register_signal(SIGSEGV, fault_sig_handler); 262 | if(-1 == ret) goto bail_out; 263 | 264 | ret = register_signal(SIGILL, fault_sig_handler); 265 | if(-1 == ret) goto bail_out; 266 | 267 | ret = register_signal(SIGFPE, fault_sig_handler); 268 | if(-1 == ret) goto bail_out; 269 | 270 | ret = register_signal(SIGBUS, fault_sig_handler); 271 | if(-1 == ret) goto bail_out; 272 | 273 | 274 | /* OK, wait for someone to tickle us */ 275 | 276 | FD_ZERO(&rfds); 277 | FD_SET(fd, &rfds); 278 | 279 | ret = select(fd+1, &rfds, NULL, NULL, NULL); 280 | 281 | /* Deal correctly with random harmless signals 282 | * Especially useful for when we run under debugger */ 283 | while(-1 == ret && EINTR == errno) { 284 | 285 | mb(); 286 | if(terminate_flag) exit(0); 287 | 288 | ret = select(fd+1, &rfds, NULL, NULL, NULL); 289 | } 290 | 291 | if(-1==ret) goto bail_out; 292 | 293 | /* OK, we have action. First thing arm the timer */ 294 | (void)alarm(READ_TIMEOUT); 295 | 296 | /* This crazy loop reads the message in, possbly in several parts. 297 | * We continue when we're done or when it's time to leave. 298 | */ 299 | 300 | do { 301 | 302 | ret = read(fd, p, remaining_bytes); 303 | 304 | /* We need to exit if the end closed the pipe or if we asked to terminate */ 305 | if((0 == ret) || terminate_flag) { 306 | break; 307 | } 308 | 309 | /* Oh oh... we're late. Time out. */ 310 | if(timeout_flag) break; 311 | 312 | /* Handle random signals nicely */ 313 | if(-1 == ret && EINTR == errno) continue; 314 | 315 | /* Read errors make us nervous. log and bail out */ 316 | if(-1 == ret) break; 317 | 318 | p += ret; 319 | remaining_bytes -= ret; 320 | 321 | } while (ret && (remaining_bytes > 0)); 322 | 323 | assert(CRASH_MSG_MAGIC == crash_msg->magic); 324 | 325 | /* Make sure the process name has an ending NULL */ 326 | crash_msg->process_name[CRASH_MAX_PROCESS_NAME_SIZE-1] = '\0'; 327 | 328 | close(fd); 329 | 330 | /* Go process the crash */ 331 | handle_crash(); 332 | 333 | exit(0); 334 | 335 | bail_out: 336 | /* Oy very... if we got here it means that the crash daemon has itself 337 | * encountered some error. We simply record it in the usuall format and 338 | * initaite a crash as normal. 339 | * 340 | * We don't bother with the backtrace symbols since there is only main here 341 | * but we do put a meaningful error message as ancillary data. 342 | * 343 | * Hope this never happens... 344 | */ 345 | 346 | strncpy(crash_msg->process_name, progname, CRASH_MAX_PROCESS_NAME_SIZE-1); 347 | crash_msg->process_id = getpid(); 348 | crash_msg->thread_id = getpid(); 349 | crash_msg->handler_errno = errno; 350 | clock_gettime(CLOCK_REALTIME, &crash_msg->timestamp); 351 | crash_msg->num_backtrace_frames=backtrace(crash_msg->backtrace, CRASH_MAX_BACKTRACE_DEPTH); 352 | snprintf(crash_msg->ancillary_data, CRASH_ANCILLARY_DATA_SIZE-1, 353 | "crashd bailing out due to %s\n",strerror(errno)); 354 | 355 | handle_crash(); 356 | 357 | 358 | /* THE END */ 359 | return; 360 | } 361 | 362 | 363 | 364 | -------------------------------------------------------------------------------- /naive.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #define __USE_GNU /* Needed to get REG_EIP from ucontext.h */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | /* gettid in non offical so not in glibc headers. This works though */ 22 | inline pid_t gettid (void) 23 | { 24 | return syscall(__NR_gettid); 25 | } 26 | 27 | /* This translates a signal code into a readable string */ 28 | static inline char * code2str(int code, int signal) { 29 | 30 | switch(code) { 31 | case SI_USER: 32 | return "kill, sigsend or raise "; 33 | case SI_KERNEL: 34 | return "kernel"; 35 | case SI_QUEUE: 36 | return "sigqueue"; 37 | } 38 | 39 | if(SIGILL==signal) switch(code) { 40 | case ILL_ILLOPC: 41 | return "illegal opcode"; 42 | case ILL_ILLOPN: 43 | return "illegal operand"; 44 | case ILL_ILLADR: 45 | return "illegal addressing mode"; 46 | case ILL_ILLTRP: 47 | return "illegal trap"; 48 | case ILL_PRVOPC: 49 | return "privileged register"; 50 | case ILL_COPROC: 51 | return "coprocessor error"; 52 | case ILL_BADSTK: 53 | return "internal stack error"; 54 | } 55 | 56 | if(SIGFPE==signal) switch(code) { 57 | case FPE_INTDIV: 58 | return "integer divide by zero"; 59 | case FPE_INTOVF: 60 | return "integer overflow"; 61 | case FPE_FLTDIV: 62 | return "floating point divide by zero"; 63 | case FPE_FLTOVF: 64 | return "floating point overflow"; 65 | case FPE_FLTUND: 66 | return "floating point underflow"; 67 | case FPE_FLTRES: 68 | return "floating point inexact result"; 69 | case FPE_FLTINV: 70 | return "floating point invalid operation"; 71 | case FPE_FLTSUB: 72 | return "subscript out of range"; 73 | } 74 | 75 | if(SIGSEGV==signal) switch(code) { 76 | case SEGV_MAPERR: 77 | return "address not mapped to object"; 78 | case SEGV_ACCERR: 79 | return "invalid permissions for mapped object"; 80 | } 81 | 82 | if(SIGBUS==signal) switch(code) { 83 | case BUS_ADRALN: 84 | return "invalid address alignment"; 85 | case BUS_ADRERR: 86 | return "non-existent physical address"; 87 | case BUS_OBJERR: 88 | return "object specific hardware error"; 89 | } 90 | 91 | if(SIGTRAP==signal) switch(code) { 92 | case TRAP_BRKPT: 93 | return "process breakpoint"; 94 | case TRAP_TRACE: 95 | return "process trace trap"; 96 | } 97 | 98 | return "Unhandled signal handler"; 99 | } 100 | 101 | void fault_handler (int signal, siginfo_t * siginfo, void *context) 102 | { 103 | 104 | #define MAX_FRAMES 25 105 | 106 | void * frames[MAX_FRAMES]; 107 | int num_frames; 108 | struct timespec timestamp; 109 | char ** symbols; 110 | int i; 111 | 112 | /* Grab time stamp */ 113 | clock_gettime(CLOCK_REALTIME, ×tamp); 114 | 115 | fprintf (stderr, 116 | "\n********************************" 117 | "\n* EXCEPTION CAUGHT *" 118 | "\n********************************\n" 119 | "Process ID: %d\n" 120 | "Thread ID: %d\n" 121 | "Exception: %s\n" 122 | "Reason: %s\n" 123 | "Fault Address: %p\n" 124 | "Signal error: %s\n" 125 | "Last error: %s\n" 126 | "Time stamp: %s", 127 | getpid(), 128 | gettid(), 129 | strsignal(signal), 130 | code2str(siginfo->si_code, signal), 131 | siginfo->si_addr, 132 | strerror(siginfo->si_errno), 133 | strerror(errno), 134 | ctime(×tamp.tv_sec) 135 | ); 136 | 137 | /* Get the backtrace. */ 138 | num_frames = backtrace(frames, MAX_FRAMES); 139 | 140 | symbols = backtrace_symbols(frames, num_frames); 141 | 142 | if(symbols) { 143 | for(i=0; i< num_frames; i++) { 144 | fprintf(stderr, "%s\n", symbols[i]); 145 | } 146 | } 147 | 148 | fflush(NULL); 149 | 150 | free(symbols); 151 | 152 | /* Produce a core dump for in depth debugging */ 153 | abort(); 154 | 155 | return; 156 | } 157 | 158 | 159 | 160 | 161 | void print_message_function (void *ptr); 162 | unsigned char buf[128]; 163 | 164 | int 165 | main ( int argc, char * argv[]) 166 | { 167 | pthread_t thread1; 168 | pthread_t thread2; 169 | int ret; 170 | struct sigaction act; /* Signal handler register struct */ 171 | 172 | /* Prepare a sigaction struct for exception handler registrations */ 173 | memset(&act, 0, sizeof (act)); 174 | act.sa_sigaction = fault_handler; 175 | /* No signals during handler run, please */ 176 | sigfillset (&act.sa_mask); 177 | /* We want the 3 parameter form of the handler with the siginfo_t addtional data */ 178 | act.sa_flags = SA_SIGINFO; 179 | 180 | 181 | /* Register the handler for all exception signals. */ 182 | ret = sigaction (SIGSEGV, &act, NULL); 183 | ret |= sigaction (SIGILL, &act, NULL); 184 | ret |= sigaction (SIGFPE, &act, NULL); 185 | ret |= sigaction (SIGBUS, &act, NULL); 186 | ret |= sigaction (SIGQUIT, &act, NULL); 187 | 188 | printf("Starting first run\n"); 189 | fflush(NULL); 190 | 191 | /* create two threads and let them race */ 192 | pthread_create (&thread1, NULL, (void *) &print_message_function, NULL); 193 | 194 | pthread_create (&thread2, NULL, (void *) &print_message_function, NULL); 195 | 196 | pthread_join (thread2, NULL); 197 | 198 | /* Not reached */ 199 | printf("This should never happen!\n"); 200 | fflush(NULL); 201 | assert(0); 202 | pthread_join (thread1, NULL); 203 | 204 | return 0; 205 | } 206 | 207 | 208 | /* This function generaters a fault. 209 | * We try to REALLY be nasty and screw things up bad. 210 | */ 211 | 212 | void 213 | croak (void) 214 | { 215 | int *ip = (int *) 17; 216 | 217 | /* Do a simple system that fails so that errno has some interesting 218 | * value to check 219 | */ 220 | write(3000, "xxx", 3); 221 | 222 | /* Try to put 7 in address 17. This is an illegal memory access. 223 | * Sit back and watch the fire works... 224 | */ 225 | *ip = 7; 226 | } 227 | 228 | 229 | 230 | /* A filler function so that we'll have a meanigful stack. 231 | * The volatile int is used to keep the compiler from optimizing 232 | * this function away 233 | */ 234 | 235 | void 236 | die (void) 237 | { 238 | volatile int i= 12; 239 | croak (); 240 | i++; 241 | return; 242 | } 243 | 244 | 245 | /* The test thread function */ 246 | void 247 | print_message_function(void *dummy) 248 | { 249 | 250 | /* Latin: "those who about to die sallute you". */ 251 | printf ("Morituri te salutant!\n"); 252 | fflush(NULL); 253 | 254 | /* Call the crasher functions */ 255 | die (); 256 | 257 | pthread_exit (0); 258 | } 259 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "crash.h" 11 | 12 | void print_message_function (void *ptr); 13 | char buf[128]; 14 | 15 | int 16 | main ( int argc, char * argv[]) 17 | { 18 | pthread_t thread1; 19 | pthread_t thread2; 20 | int ret = 0; 21 | 22 | /* Prepare asser buffer */ 23 | strcat(buf, (char *)"We're doomed!"); 24 | 25 | printf("Starting first run\n"); 26 | fflush(NULL); 27 | 28 | /* register */ 29 | ret = register_crash_handler(argv[0],(unsigned char *)&buf); 30 | assert(ret==0); 31 | 32 | /* create two threads and let them race */ 33 | pthread_create (&thread1, NULL, (void *) &print_message_function, NULL); 34 | 35 | pthread_create (&thread2, NULL, (void *) &print_message_function, NULL); 36 | 37 | pthread_join (thread2, NULL); 38 | 39 | /* Not reached */ 40 | printf("This should never happen!\n"); 41 | fflush(NULL); 42 | assert(0); 43 | pthread_join (thread1, NULL); 44 | 45 | return 0; 46 | } 47 | 48 | 49 | void * kill_malloc(size_t size, const void *caller) { 50 | printf("Malloc called from %p\n", caller); 51 | abort(); 52 | } 53 | 54 | void * kill_realloc(void *ptr, size_t size, const void *caller) { 55 | printf("Realloc called from %p\n", caller); 56 | abort(); 57 | } 58 | 59 | void kill_free(void *ptr, const void *caller) { 60 | printf("Free called from %p\n", caller); 61 | abort(); 62 | } 63 | 64 | void * kill_memalign(size_t alignment, size_t size, const void *caller) { 65 | printf("Memalign called from %p\n", caller); 66 | abort(); 67 | } 68 | 69 | /* This function generaters a fault. 70 | * We try to REALLY be nasty and screw things up bad. 71 | */ 72 | 73 | void 74 | croak (void) 75 | { 76 | int *ip = (int *) 17; 77 | char * p = sbrk(0); 78 | 79 | /* try to catch implicit malloc calls */ 80 | __malloc_hook = kill_malloc; 81 | __realloc_hook = kill_realloc; 82 | __free_hook = kill_free; 83 | __memalign_hook = kill_memalign; 84 | 85 | 86 | /* Corrupt the malloc arena as a real fault would do. 87 | */ 88 | memset(p-1024, 42, 1024); 89 | 90 | /* Do a simple system that fails so that errno has some interesting 91 | * value to check 92 | */ 93 | write(3000, "xxx", 3); 94 | 95 | /* Try to put 7 in address 17. This is an illegal memory access. 96 | * Sit back and watch the fire works... 97 | */ 98 | *ip = 7; 99 | } 100 | 101 | 102 | 103 | /* A filler function so that we'll have a meanigful stack. 104 | * The volatile int is used to keep the compiler from optimizing 105 | * this function away 106 | */ 107 | 108 | void 109 | die (void) 110 | { 111 | volatile int i= 12; 112 | croak (); 113 | i++; 114 | return; 115 | } 116 | 117 | 118 | /* The test thread function */ 119 | void 120 | print_message_function(void *dummy) 121 | { 122 | 123 | /* Latin: "those who about to die sallute you". */ 124 | printf ("Morituri te salutant!\n"); 125 | fflush(NULL); 126 | 127 | /* Call the crasher functions */ 128 | die (); 129 | 130 | pthread_exit (0); 131 | } 132 | 133 | /* THE END */ 134 | --------------------------------------------------------------------------------