├── .gitignore ├── CHANGELOG.md ├── Gemfile ├── Manifest.txt ├── README.md ├── Rakefile ├── allocation_sampler.gemspec ├── ext └── allocation_sampler │ ├── allocation_sampler.c │ ├── extconf.rb │ └── sort_r.h ├── lib ├── allocation_sampler.rb └── allocation_sampler │ └── version.rb └── test └── test_allocation_sampler.rb /.gitignore: -------------------------------------------------------------------------------- 1 | /tmp 2 | *.so 3 | *.bundle 4 | Gemfile.lock 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | === 1.0.0 / 2018-08-06 2 | 3 | * 1 major enhancement 4 | 5 | * Birthday! 6 | 7 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | gem 'rake' 6 | gem 'rake-compiler' 7 | gem 'minitest' 8 | -------------------------------------------------------------------------------- /Manifest.txt: -------------------------------------------------------------------------------- 1 | CHANGELOG.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | ext/allocation_sampler/allocation_sampler.c 6 | ext/allocation_sampler/sort_r.h 7 | ext/allocation_sampler/extconf.rb 8 | lib/allocation_sampler.rb 9 | test/test_allocation_sampler.rb 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # allocation_sampler 2 | 3 | * https://github.com/tenderlove/allocation_sampler 4 | 5 | ## DESCRIPTION: 6 | 7 | A sampling allocation profiler. This keeps track of allocations, but only on 8 | specified intervals. Useful for profiling allocations in programs where there 9 | is a time limit on completion of the program. 10 | 11 | ## SYNOPSIS: 12 | 13 | ```ruby 14 | as = ObjectSpace::AllocationSampler.new(interval: 1) 15 | as.enable 16 | 10.times { Object.new } 17 | as.disable 18 | 19 | as.result # => {"Object"=>{""=>{1=>10}}} 20 | ``` 21 | 22 | ## LICENSE: 23 | 24 | (The MIT License) 25 | 26 | Copyright (c) 2018 Aaron Patterson 27 | 28 | Permission is hereby granted, free of charge, to any person obtaining 29 | a copy of this software and associated documentation files (the 30 | 'Software'), to deal in the Software without restriction, including 31 | without limitation the rights to use, copy, modify, merge, publish, 32 | distribute, sublicense, and/or sell copies of the Software, and to 33 | permit persons to whom the Software is furnished to do so, subject to 34 | the following conditions: 35 | 36 | The above copyright notice and this permission notice shall be 37 | included in all copies or substantial portions of the Software. 38 | 39 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 40 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 41 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 42 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 43 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 44 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 45 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 46 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require "bundler/gem_tasks" 3 | require "rake/testtask" 4 | require "rake/extensiontask" 5 | 6 | Rake::TestTask.new(:test) do |t| 7 | t.libs << "test" 8 | t.libs << "lib" 9 | t.test_files = FileList["test/**/test_*.rb"] 10 | end 11 | 12 | Rake::ExtensionTask.new("allocation_sampler") 13 | 14 | task default: %i(compile test) 15 | -------------------------------------------------------------------------------- /allocation_sampler.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "lib/allocation_sampler/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = %q{allocation_sampler} 7 | s.version = ObjectSpace::AllocationSampler::VERSION 8 | s.authors = ['Aaron Patterson'] 9 | s.email = ['aaron@tenderlovemaking.com'] 10 | s.summary = 'A sampling allocation profiler.' 11 | s.description = "This keeps track of allocations, but only onspecified intervals. Useful for profiling allocations in programs where there is a time limit on completion of the program." 12 | s.extensions = %w(ext/allocation_sampler/extconf.rb) 13 | s.files = Dir.chdir(File.expand_path('..', __FILE__)) do 14 | %x(git ls-files -z).split("\x0").reject { |f| f.match(%r{^(test|spec|features|bin|\.)/}) } 15 | end 16 | 17 | s.homepage = 'https://github.com/tenderlove/allocation_sampler' 18 | s.licenses = ['MIT'] 19 | end 20 | -------------------------------------------------------------------------------- /ext/allocation_sampler/allocation_sampler.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sort_r.h" 5 | 6 | typedef struct { 7 | char frames; 8 | size_t capa; 9 | size_t next_free; 10 | size_t prev_free; 11 | size_t record_count; 12 | union { 13 | VALUE *frames; 14 | int *lines; 15 | } as; 16 | } sample_buffer_t; 17 | 18 | typedef struct { 19 | size_t interval; 20 | size_t allocation_count; 21 | size_t overall_samples; 22 | sample_buffer_t * stack_samples; 23 | sample_buffer_t * lines_samples; 24 | VALUE newobj_hook; 25 | } trace_stats_t; 26 | 27 | typedef struct { 28 | sample_buffer_t * frames; 29 | sample_buffer_t * lines; 30 | } compare_data_t; 31 | 32 | static void 33 | free_sample_buffer(sample_buffer_t *buffer) 34 | { 35 | if (buffer->frames) { 36 | xfree(buffer->as.lines); 37 | } else { 38 | xfree(buffer->as.frames); 39 | } 40 | xfree(buffer); 41 | } 42 | 43 | static sample_buffer_t * 44 | alloc_lines_buffer(size_t size) 45 | { 46 | sample_buffer_t * samples = xcalloc(sizeof(sample_buffer_t), 1); 47 | samples->as.lines = xcalloc(sizeof(int), size); 48 | samples->capa = size; 49 | samples->frames = 0; 50 | return samples; 51 | } 52 | 53 | static sample_buffer_t * 54 | alloc_frames_buffer(size_t size) 55 | { 56 | sample_buffer_t * samples = xcalloc(sizeof(sample_buffer_t), 1); 57 | samples->as.frames = xcalloc(sizeof(VALUE), size); 58 | samples->capa = size; 59 | samples->frames = 1; 60 | return samples; 61 | } 62 | 63 | static void 64 | ensure_sample_buffer_capa(sample_buffer_t * buffer, size_t size) 65 | { 66 | /* If we can't fit all the samples in the buffer, double the buffer size. */ 67 | while (buffer->capa <= (buffer->next_free - 1) + (size + 2)) { 68 | buffer->capa *= 2; 69 | if (buffer->frames) { 70 | buffer->as.frames = xrealloc(buffer->as.frames, sizeof(VALUE) * buffer->capa); 71 | } else { 72 | buffer->as.lines = xrealloc(buffer->as.lines, sizeof(int) * buffer->capa); 73 | } 74 | } 75 | } 76 | 77 | static void 78 | dealloc(void *ptr) 79 | { 80 | trace_stats_t * stats = (trace_stats_t *)ptr; 81 | sample_buffer_t * frames; 82 | sample_buffer_t * lines; 83 | 84 | frames = stats->stack_samples; 85 | lines = stats->lines_samples; 86 | 87 | if (frames && lines) { 88 | free_sample_buffer(frames); 89 | free_sample_buffer(lines); 90 | } 91 | xfree(stats); 92 | } 93 | 94 | static VALUE 95 | make_frame_info(VALUE *frames, int *lines) 96 | { 97 | size_t count, i; 98 | VALUE rb_frames; 99 | 100 | count = *frames; 101 | frames++; 102 | lines++; 103 | 104 | rb_frames = rb_ary_new_capa(count); 105 | 106 | for(i = 0; i < count; i++, frames++, lines++) { 107 | VALUE line = INT2NUM(*lines); 108 | rb_ary_push(rb_frames, rb_ary_new3(2, ULL2NUM(*frames), line)); 109 | } 110 | 111 | return rb_frames; 112 | } 113 | 114 | static int 115 | compare(const void* l, const void* r, void* ctx) 116 | { 117 | compare_data_t *compare_data = (compare_data_t *)ctx; 118 | sample_buffer_t *stacks = compare_data->frames; 119 | sample_buffer_t *lines = compare_data->lines; 120 | 121 | size_t left_offset = *(const size_t*)l; 122 | size_t right_offset = *(const size_t*)r; 123 | 124 | size_t lstack = *(stacks->as.frames + left_offset); 125 | size_t rstack = *(stacks->as.frames + right_offset); 126 | 127 | if (lstack == rstack) { 128 | /* Compare the stack plus type info */ 129 | int stack_cmp = memcmp(stacks->as.frames + left_offset, 130 | stacks->as.frames + right_offset, 131 | (lstack + 3) * sizeof(VALUE *)); 132 | 133 | if (stack_cmp == 0) { 134 | /* If the stacks are the same, check the line numbers */ 135 | int line_cmp = memcmp(lines->as.lines + left_offset + 1, 136 | lines->as.lines + right_offset + 1, 137 | lstack * sizeof(int)); 138 | 139 | return line_cmp; 140 | } else { 141 | return stack_cmp; 142 | } 143 | } else { 144 | if (lstack < rstack) { 145 | return -1; 146 | } else { 147 | return 1; 148 | } 149 | } 150 | } 151 | 152 | static void 153 | mark(void * ptr) 154 | { 155 | trace_stats_t * stats = (trace_stats_t *)ptr; 156 | sample_buffer_t * stacks; 157 | 158 | stacks = stats->stack_samples; 159 | 160 | if (stacks) { 161 | VALUE * frame = stacks->as.frames; 162 | 163 | while(frame < stacks->as.frames + stacks->next_free) { 164 | size_t stack_size; 165 | VALUE * head; 166 | 167 | stack_size = *frame; 168 | frame++; /* First element is the stack size */ 169 | head = frame; 170 | 171 | for(; frame < (head + stack_size); frame++) { 172 | rb_gc_mark(*frame); 173 | } 174 | frame++; /* Frame info */ 175 | rb_gc_mark(*frame); 176 | frame++; /* Next Head */ 177 | } 178 | } 179 | 180 | if (stats->newobj_hook) { 181 | rb_gc_mark(stats->newobj_hook); 182 | } 183 | } 184 | 185 | static const rb_data_type_t trace_stats_type = { 186 | "ObjectSpace/AllocationSampler", 187 | {mark, dealloc, 0,}, 188 | 0, 0, 189 | #ifdef RUBY_TYPED_FREE_IMMEDIATELY 190 | RUBY_TYPED_FREE_IMMEDIATELY, 191 | #endif 192 | }; 193 | 194 | static VALUE 195 | user_class(VALUE klass, VALUE obj) 196 | { 197 | if (RTEST(klass) && !(RB_TYPE_P(obj, T_IMEMO) || RB_TYPE_P(obj, T_NODE)) && BUILTIN_TYPE(klass) == T_CLASS) { 198 | return rb_class_path_cached(rb_class_real(klass)); 199 | } else { 200 | return Qnil; 201 | } 202 | } 203 | 204 | #define BUF_SIZE 2048 205 | 206 | static void 207 | newobj(VALUE tpval, void *ptr) 208 | { 209 | trace_stats_t * stats = (trace_stats_t *)ptr; 210 | 211 | if (!(stats->allocation_count % stats->interval)) { 212 | rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval); 213 | VALUE obj = rb_tracearg_object(tparg); 214 | VALUE klass = RBASIC_CLASS(obj); 215 | VALUE uc = user_class(klass, obj); 216 | 217 | if (!NIL_P(uc)) { 218 | VALUE frames_buffer[BUF_SIZE]; 219 | int lines_buffer[BUF_SIZE]; 220 | 221 | VALUE path = rb_tracearg_path(tparg); 222 | 223 | if (RTEST(path)) { 224 | sample_buffer_t * stack_samples; 225 | sample_buffer_t * lines_samples; 226 | 227 | int num = rb_profile_frames(0, sizeof(frames_buffer) / sizeof(VALUE), frames_buffer, lines_buffer); 228 | if (!stats->stack_samples) { 229 | stats->stack_samples = alloc_frames_buffer(num * 100); 230 | stats->lines_samples = alloc_lines_buffer(num * 100); 231 | } 232 | stack_samples = stats->stack_samples; 233 | lines_samples = stats->lines_samples; 234 | 235 | ensure_sample_buffer_capa(stack_samples, num + 2); 236 | ensure_sample_buffer_capa(lines_samples, num + 2); 237 | 238 | stack_samples->prev_free = stack_samples->next_free; 239 | lines_samples->prev_free = lines_samples->next_free; 240 | 241 | stack_samples->as.frames[stack_samples->next_free] = (VALUE)num; 242 | lines_samples->as.lines[lines_samples->next_free] = (VALUE)num; 243 | 244 | memcpy(stack_samples->as.frames + stack_samples->next_free + 1, frames_buffer, num * sizeof(VALUE *)); 245 | memcpy(lines_samples->as.lines + lines_samples->next_free + 1, lines_buffer, num * sizeof(int)); 246 | 247 | /* We're not doing de-duping right now, so just set the stack count to 0xdeadbeef */ 248 | stack_samples->as.frames[stack_samples->next_free + num + 1] = 0xdeadbeef; 249 | stack_samples->as.frames[stack_samples->next_free + num + 2] = uc; 250 | 251 | lines_samples->as.lines[stack_samples->next_free + num + 1] = 0xdeadbeef; 252 | lines_samples->as.lines[stack_samples->next_free + num + 2] = uc; 253 | 254 | stack_samples->next_free += (num + 3); 255 | lines_samples->next_free += (num + 3); 256 | 257 | stack_samples->record_count++; 258 | lines_samples->record_count++; 259 | 260 | stats->overall_samples++; 261 | } 262 | } 263 | } 264 | stats->allocation_count++; 265 | } 266 | 267 | static VALUE 268 | allocate(VALUE klass) 269 | { 270 | trace_stats_t * stats; 271 | stats = xcalloc(sizeof(trace_stats_t), 1); 272 | stats->interval = 1; 273 | stats->newobj_hook = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj, stats); 274 | 275 | return TypedData_Wrap_Struct(klass, &trace_stats_type, stats); 276 | } 277 | 278 | VALUE rb_cAllocationSampler; 279 | 280 | static VALUE 281 | enable(VALUE self) 282 | { 283 | trace_stats_t * stats; 284 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 285 | rb_tracepoint_enable(stats->newobj_hook); 286 | return Qnil; 287 | } 288 | 289 | static VALUE 290 | disable(VALUE self) 291 | { 292 | trace_stats_t * stats; 293 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 294 | rb_tracepoint_disable(stats->newobj_hook); 295 | return Qnil; 296 | } 297 | 298 | static int 299 | sort_frames(const void *left, const void *right) 300 | { 301 | const VALUE *vleft = (const VALUE *)left; 302 | const VALUE *vright = (const VALUE *)right; 303 | /* Sort so that 0 is always at the right */ 304 | if (*vleft == *vright) { 305 | return 0; 306 | } else { 307 | if (*vleft == 0) { 308 | return 1; 309 | } else if (*vright == 0) { 310 | return -1; 311 | } 312 | } 313 | return *vleft - *vright; 314 | } 315 | 316 | static VALUE 317 | frames(VALUE self) 318 | { 319 | trace_stats_t * stats; 320 | sample_buffer_t * frame_buffer; 321 | VALUE frames; 322 | VALUE *samples; 323 | VALUE *head; 324 | VALUE rb_cFrame; 325 | 326 | size_t buffer_size; 327 | 328 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 329 | 330 | frame_buffer = stats->stack_samples; 331 | 332 | if (!frame_buffer) { 333 | return Qnil; 334 | } 335 | 336 | buffer_size = frame_buffer->next_free; 337 | 338 | samples = xcalloc(sizeof(VALUE), buffer_size); 339 | memcpy(samples, frame_buffer->as.frames, buffer_size * sizeof(VALUE)); 340 | 341 | /* Clear anything that's not a frame */ 342 | for(head = samples; head < (samples + buffer_size - 1); head++) { 343 | size_t frame_count; 344 | frame_count = *head; 345 | 346 | *head = 0; 347 | head++; /* Skip the count */ 348 | head += frame_count; /* Skip the stack */ 349 | *head = 0; /* Set the de-dup count to 0 */ 350 | head++; 351 | *head = 0; /* Set the type to 0 */ 352 | } 353 | 354 | qsort(samples, buffer_size, sizeof(VALUE *), sort_frames); 355 | 356 | frames = rb_hash_new(); 357 | 358 | rb_cFrame = rb_const_get(rb_cAllocationSampler, rb_intern("Frame")); 359 | 360 | for(head = samples; head < (samples + buffer_size); ) { 361 | if (*head == 0) 362 | break; 363 | 364 | VALUE file; 365 | VALUE frame; 366 | 367 | file = rb_profile_frame_absolute_path(*(VALUE *)head); 368 | if (NIL_P(file)) 369 | file = rb_profile_frame_path(*head); 370 | 371 | VALUE args[3]; 372 | 373 | args[0] = ULL2NUM(*head); 374 | args[1] = rb_profile_frame_full_label(*head); 375 | args[2] = file; 376 | 377 | frame = rb_class_new_instance(3, args, rb_cFrame); 378 | 379 | rb_hash_aset(frames, ULL2NUM(*head), frame); 380 | 381 | /* Skip duplicates */ 382 | VALUE *cmp; 383 | for (cmp = head + 1; cmp < (samples + buffer_size); cmp++) { 384 | if (*cmp != *head) { 385 | break; 386 | } 387 | } 388 | head = cmp; 389 | } 390 | 391 | xfree(samples); 392 | 393 | return frames; 394 | } 395 | 396 | static VALUE 397 | samples(VALUE self) 398 | { 399 | trace_stats_t * stats; 400 | sample_buffer_t * frames; 401 | sample_buffer_t * lines; 402 | size_t *record_offsets; 403 | VALUE result = Qnil; 404 | 405 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 406 | 407 | frames = stats->stack_samples; 408 | lines = stats->lines_samples; 409 | 410 | if (frames && lines) { 411 | size_t i, j; 412 | size_t * head; 413 | VALUE * frame = frames->as.frames; 414 | compare_data_t compare_ctx; 415 | compare_ctx.frames = frames; 416 | compare_ctx.lines = lines; 417 | 418 | record_offsets = xcalloc(sizeof(size_t), frames->record_count); 419 | head = record_offsets; 420 | 421 | i = 0; 422 | while(frame < frames->as.frames + frames->next_free) { 423 | *head = i; /* Store the frame start offset */ 424 | head++; /* Move to the next entry in record_offsets */ 425 | i += (*frame + 3); /* Increase the offset */ 426 | frame += (*frame + 3); /* Move to the next frame */ 427 | } 428 | 429 | sort_r(record_offsets, frames->record_count, sizeof(size_t), compare, &compare_ctx); 430 | 431 | VALUE unique_frames = rb_ary_new(); 432 | 433 | for(i = 0; i < frames->record_count; ) { 434 | size_t current = record_offsets[i]; 435 | size_t count = 0; 436 | 437 | /* Count any duplicate stacks ahead of us in the array */ 438 | for (j = i+1; j < frames->record_count; j++) { 439 | size_t next = record_offsets[j]; 440 | int same = compare(¤t, &next, &compare_ctx); 441 | 442 | if (same == 0) { 443 | count++; 444 | } else { 445 | break; 446 | } 447 | } 448 | 449 | i = j; 450 | 451 | size_t stack_size = *(frames->as.frames + current); 452 | 453 | VALUE type = *(frames->as.frames + current + stack_size + 2); 454 | 455 | rb_ary_push(unique_frames, 456 | rb_ary_new3(3, 457 | type, 458 | INT2NUM(count + 1), 459 | make_frame_info(frames->as.frames + current, lines->as.lines + current))); 460 | 461 | } 462 | 463 | xfree(record_offsets); 464 | 465 | result = unique_frames; 466 | } 467 | 468 | return result; 469 | } 470 | 471 | static VALUE 472 | initialize(int argc, VALUE *argv, VALUE self) 473 | { 474 | VALUE opts; 475 | trace_stats_t * stats; 476 | 477 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 478 | rb_scan_args(argc, argv, ":", &opts); 479 | if (!NIL_P(opts)) { 480 | ID ids[2]; 481 | VALUE args[2]; 482 | ids[0] = rb_intern("interval"); 483 | rb_get_kwargs(opts, ids, 0, 1, args); 484 | 485 | if (args[0] != Qundef) { 486 | stats->interval = NUM2INT(args[0]); 487 | } 488 | } 489 | 490 | return self; 491 | } 492 | 493 | static VALUE 494 | interval(VALUE self) 495 | { 496 | trace_stats_t * stats; 497 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 498 | return INT2NUM(stats->interval); 499 | } 500 | 501 | static VALUE 502 | allocation_count(VALUE self) 503 | { 504 | trace_stats_t * stats; 505 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 506 | return INT2NUM(stats->allocation_count); 507 | } 508 | 509 | static VALUE 510 | overall_samples(VALUE self) 511 | { 512 | trace_stats_t * stats; 513 | TypedData_Get_Struct(self, trace_stats_t, &trace_stats_type, stats); 514 | return INT2NUM(stats->overall_samples); 515 | } 516 | 517 | void 518 | Init_allocation_sampler(void) 519 | { 520 | VALUE rb_mObjSpace = rb_const_get(rb_cObject, rb_intern("ObjectSpace")); 521 | 522 | rb_cAllocationSampler = rb_define_class_under(rb_mObjSpace, "AllocationSampler", rb_cObject); 523 | rb_define_alloc_func(rb_cAllocationSampler, allocate); 524 | rb_define_method(rb_cAllocationSampler, "initialize", initialize, -1); 525 | rb_define_method(rb_cAllocationSampler, "enable", enable, 0); 526 | rb_define_method(rb_cAllocationSampler, "disable", disable, 0); 527 | rb_define_method(rb_cAllocationSampler, "frames", frames, 0); 528 | rb_define_method(rb_cAllocationSampler, "samples", samples, 0); 529 | rb_define_method(rb_cAllocationSampler, "interval", interval, 0); 530 | rb_define_method(rb_cAllocationSampler, "allocation_count", allocation_count, 0); 531 | rb_define_method(rb_cAllocationSampler, "overall_samples", overall_samples, 0); 532 | } 533 | -------------------------------------------------------------------------------- /ext/allocation_sampler/extconf.rb: -------------------------------------------------------------------------------- 1 | require 'mkmf' 2 | $CFLAGS << " -Wno-declaration-after-statement" 3 | create_makefile('allocation_sampler') 4 | -------------------------------------------------------------------------------- /ext/allocation_sampler/sort_r.h: -------------------------------------------------------------------------------- 1 | /* Obtained from https://github.com/noporpoise/sort_r on 2018-09-19 */ 2 | /* Isaac Turner 29 April 2014 Public Domain */ 3 | #ifndef SORT_R_H_ 4 | #define SORT_R_H_ 5 | 6 | #include 7 | #include 8 | 9 | /* 10 | 11 | sort_r function to be exported. 12 | 13 | Parameters: 14 | base is the array to be sorted 15 | nel is the number of elements in the array 16 | width is the size in bytes of each element of the array 17 | compar is the comparison function 18 | arg is a pointer to be passed to the comparison function 19 | 20 | void sort_r(void *base, size_t nel, size_t width, 21 | int (*compar)(const void *_a, const void *_b, void *_arg), 22 | void *arg); 23 | 24 | */ 25 | 26 | #define _SORT_R_INLINE inline 27 | 28 | #if (defined __APPLE__ || defined __MACH__ || defined __DARWIN__ || \ 29 | defined __FreeBSD__ || defined __DragonFly__) 30 | # define _SORT_R_BSD 31 | #elif (defined _GNU_SOURCE || defined __gnu_hurd__ || defined __GNU__ || \ 32 | defined __linux__ || defined __MINGW32__ || defined __GLIBC__) 33 | # define _SORT_R_LINUX 34 | #elif (defined _WIN32 || defined _WIN64 || defined __WINDOWS__) 35 | # define _SORT_R_WINDOWS 36 | # undef _SORT_R_INLINE 37 | # define _SORT_R_INLINE __inline 38 | #else 39 | /* Using our own recursive quicksort sort_r_simple() */ 40 | #endif 41 | 42 | #if (defined NESTED_QSORT && NESTED_QSORT == 0) 43 | # undef NESTED_QSORT 44 | #endif 45 | 46 | /* swap a, b iff a>b */ 47 | /* __restrict is same as restrict but better support on old machines */ 48 | static _SORT_R_INLINE int sort_r_cmpswap(char *__restrict a, char *__restrict b, size_t w, 49 | int (*compar)(const void *_a, const void *_b, 50 | void *_arg), 51 | void *arg) 52 | { 53 | char tmp, *end = a+w; 54 | if(compar(a, b, arg) > 0) { 55 | for(; a < end; a++, b++) { tmp = *a; *a = *b; *b = tmp; } 56 | return 1; 57 | } 58 | return 0; 59 | } 60 | 61 | /* Implement recursive quicksort ourselves */ 62 | /* Note: quicksort is not stable, equivalent values may be swapped */ 63 | static _SORT_R_INLINE void sort_r_simple(void *base, size_t nel, size_t w, 64 | int (*compar)(const void *_a, const void *_b, 65 | void *_arg), 66 | void *arg) 67 | { 68 | char *b = (char *)base, *end = b + nel*w; 69 | if(nel < 7) { 70 | /* Insertion sort for arbitrarily small inputs */ 71 | char *pi, *pj; 72 | for(pi = b+w; pi < end; pi += w) { 73 | for(pj = pi; pj > b && sort_r_cmpswap(pj-w,pj,w,compar,arg); pj -= w) {} 74 | } 75 | } 76 | else 77 | { 78 | /* nel > 6; Quicksort */ 79 | 80 | /* Use median of first, middle and last items as pivot */ 81 | char *x, *y, *xend, ch; 82 | char *pl, *pr; 83 | char *last = b+w*(nel-1), *tmp; 84 | char *l[3]; 85 | l[0] = b; 86 | l[1] = b+w*(nel/2); 87 | l[2] = last; 88 | 89 | if(compar(l[0],l[1],arg) > 0) { tmp=l[0]; l[0]=l[1]; l[1]=tmp; } 90 | if(compar(l[1],l[2],arg) > 0) { 91 | tmp=l[1]; l[1]=l[2]; l[2]=tmp; /* swap(l[1],l[2]) */ 92 | if(compar(l[0],l[1],arg) > 0) { tmp=l[0]; l[0]=l[1]; l[1]=tmp; } 93 | } 94 | 95 | /* swap l[id], l[2] to put pivot as last element */ 96 | for(x = l[1], y = last, xend = x+w; xcompar)(a, b, ss->arg); 165 | } 166 | 167 | #endif 168 | 169 | #if defined _SORT_R_LINUX 170 | 171 | typedef int(* __compar_d_fn_t)(const void *, const void *, void *); 172 | extern void qsort_r(void *base, size_t nel, size_t width, 173 | __compar_d_fn_t __compar, void *arg) 174 | __attribute__((nonnull (1, 4))); 175 | 176 | #endif 177 | 178 | /* implementation */ 179 | 180 | static _SORT_R_INLINE void sort_r(void *base, size_t nel, size_t width, 181 | int (*compar)(const void *_a, const void *_b, void *_arg), 182 | void *arg) 183 | { 184 | #if defined _SORT_R_LINUX 185 | 186 | #if defined __GLIBC__ && ((__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8)) 187 | 188 | /* no qsort_r in glibc before 2.8, need to use nested qsort */ 189 | sort_r_simple(base, nel, width, compar, arg); 190 | 191 | #else 192 | 193 | qsort_r(base, nel, width, compar, arg); 194 | 195 | #endif 196 | 197 | #elif defined _SORT_R_BSD 198 | 199 | struct sort_r_data tmp; 200 | tmp.arg = arg; 201 | tmp.compar = compar; 202 | qsort_r(base, nel, width, &tmp, sort_r_arg_swap); 203 | 204 | #elif defined _SORT_R_WINDOWS 205 | 206 | struct sort_r_data tmp; 207 | tmp.arg = arg; 208 | tmp.compar = compar; 209 | qsort_s(base, nel, width, sort_r_arg_swap, &tmp); 210 | 211 | #else 212 | 213 | /* Fall back to our own quicksort implementation */ 214 | sort_r_simple(base, nel, width, compar, arg); 215 | 216 | #endif 217 | } 218 | 219 | #endif /* !NESTED_QSORT */ 220 | 221 | #undef _SORT_R_INLINE 222 | #undef _SORT_R_WINDOWS 223 | #undef _SORT_R_LINUX 224 | #undef _SORT_R_BSD 225 | 226 | #endif /* SORT_R_H_ */ 227 | -------------------------------------------------------------------------------- /lib/allocation_sampler.rb: -------------------------------------------------------------------------------- 1 | require 'allocation_sampler/version' 2 | require 'allocation_sampler.so' 3 | require 'delegate' 4 | require 'set' 5 | require 'cgi/escape' 6 | 7 | module ObjectSpace 8 | class AllocationSampler 9 | 10 | class Frame 11 | attr_reader :id, :name, :path, :children 12 | 13 | def initialize id, name, path 14 | @id = id 15 | @name = name 16 | @path = path 17 | end 18 | end 19 | 20 | class Result 21 | class Frame < DelegateClass(AllocationSampler::Frame) 22 | attr_reader :line, :children 23 | attr_accessor :samples, :total_samples 24 | 25 | include Enumerable 26 | 27 | def initialize frame, line, samples 28 | super(frame) 29 | @line = line 30 | @samples = samples 31 | @total_samples = 0 32 | @children = Set.new 33 | end 34 | 35 | def each 36 | seen = {} 37 | stack = [self] 38 | 39 | while node = stack.pop 40 | next if seen[node] 41 | seen[node] = true 42 | yield node 43 | stack.concat node.children.to_a 44 | end 45 | end 46 | 47 | def to_dot 48 | seen = {} 49 | "digraph allocations {\n" + 50 | " node[shape=record];\n" + print_edges(self, seen, total_samples) + "}\n" 51 | end 52 | 53 | private 54 | 55 | def print_edges node, seen, total_samples 56 | return '' if seen[node.id] 57 | seen[node.id] = node 58 | " #{node.id} [label=\"#{CGI.escapeHTML node.name}\"];\n" + 59 | node.children.map { |child| 60 | ratio = child.total_samples / total_samples.to_f 61 | width = (1 * ratio) + 1 62 | " #{node.id} -> #{child.id} [penwidth=#{width}];\n" + print_edges(child, seen, total_samples) 63 | }.join 64 | end 65 | end 66 | 67 | attr_reader :samples, :frames 68 | 69 | def initialize samples, frames 70 | @samples = samples.sort_by! { |s| s[1] }.reverse! 71 | @frames = frames 72 | end 73 | 74 | def allocations_by_type 75 | @samples.each_with_object(Hash.new(0)) do |(type, count, _), h| 76 | h[type] += count 77 | end 78 | end 79 | 80 | def allocations_with_top_frame 81 | @samples.each_with_object({}) do |(type, count, stack), h| 82 | top_frame_id, line = stack.first 83 | frame = @frames[top_frame_id] 84 | ((h[type] ||= {})[frame.path] ||= {})[line] = count 85 | end 86 | end 87 | 88 | def calltree 89 | frame_delegates = {} 90 | @samples.map { |type, count, stack| 91 | build_tree(stack, count, frame_delegates) 92 | }.uniq.first 93 | end 94 | 95 | def by_type_with_call_tree 96 | types_with_stacks = @samples.group_by(&:first) 97 | types_with_stacks.transform_values do |stacks| 98 | frame_delegates = {} 99 | stacks.map { |_, count, stack| 100 | build_tree(stack, count, frame_delegates) 101 | }.uniq.first 102 | end 103 | end 104 | 105 | private 106 | 107 | def build_tree stack, count, frame_delegates 108 | top_down = stack.reverse 109 | last_caller = nil 110 | seen = Set.new 111 | root = nil 112 | top_frame_id, top_line = stack.first 113 | top = frame_delegates[top_frame_id] ||= build_frame(top_frame_id, top_line, 0) 114 | top.samples += count 115 | top_down.each do |frame_id, line| 116 | frame = frame_delegates[frame_id] ||= build_frame(frame_id, line, 0) 117 | root ||= frame 118 | if last_caller 119 | last_caller.children << frame 120 | end 121 | last_caller = frame 122 | last_caller.total_samples += count unless seen.include?(frame_id) 123 | seen << frame_id 124 | end 125 | root 126 | end 127 | 128 | def build_frame id, line, samples 129 | Frame.new @frames[id], line, samples 130 | end 131 | end 132 | 133 | def result 134 | Result.new samples, frames 135 | end 136 | 137 | module Display 138 | class Stack < DelegateClass(IO) 139 | attr_reader :max_depth 140 | 141 | def initialize output: $stdout 142 | super(output) 143 | end 144 | 145 | def show frames 146 | max_width = max_width(frames, 0, {}) 147 | display(frames, 0, frames.total_samples, [], {}, max_width) 148 | end 149 | 150 | private 151 | 152 | def max_width frame, depth, seen 153 | if seen.key? frame 154 | return 0 155 | end 156 | 157 | seen[frame] = true 158 | 159 | my_length = (depth * 4) + frame.name.length 160 | 161 | frame.children.each do |caller| 162 | child_len = max_width caller, depth + 1, seen 163 | my_length = child_len if my_length < child_len 164 | end 165 | 166 | my_length 167 | end 168 | 169 | def display frame, depth, total_samples, last_stack, seen, max_width 170 | seen[frame] = true 171 | 172 | 173 | buffer = max_width - ((depth * 4) + frame.name.length) 174 | 175 | self_samples = frame.samples 176 | last_stack.each_with_index do |last, i| 177 | if i == last_stack.length - 1 178 | if last 179 | printf "`-- " 180 | else 181 | printf "|-- " 182 | end 183 | else 184 | if last 185 | printf " " 186 | else 187 | printf "| " 188 | end 189 | end 190 | end 191 | 192 | 193 | printf frame.name 194 | printf " " * buffer 195 | printf "% d % 8s", self_samples, "(%2.1f%%)" % (self_samples*100.0/total_samples) 196 | puts 197 | 198 | children = (frame.children || []).sort_by { |ie| 199 | -ie.total_samples 200 | }.reject { |f| seen[f] } 201 | 202 | children.each_with_index do |child, i| 203 | s = last_stack + [i == children.length - 1] 204 | display child, depth + 1, total_samples, s, seen, max_width 205 | end 206 | end 207 | end 208 | end 209 | end 210 | end 211 | -------------------------------------------------------------------------------- /lib/allocation_sampler/version.rb: -------------------------------------------------------------------------------- 1 | module ObjectSpace 2 | class AllocationSampler 3 | VERSION = '1.0.2' 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /test/test_allocation_sampler.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require 'allocation_sampler' 3 | 4 | class TestAllocationSampler < Minitest::Test 5 | def test_initialize 6 | assert ObjectSpace::AllocationSampler.new 7 | end 8 | 9 | def test_init_with_params 10 | as = ObjectSpace::AllocationSampler.new(interval: 10) 11 | assert_equal 10, as.interval 12 | end 13 | 14 | def test_init_with_location 15 | iseq = RubyVM::InstructionSequence.new <<-eoruby 16 | Object.new 17 | Object.new 18 | eoruby 19 | as = ObjectSpace::AllocationSampler.new(interval: 1) 20 | as.enable 21 | iseq.eval 22 | as.disable 23 | 24 | assert_equal({"Object"=>{""=>{1=>1, 2=>1}}}, filter(as.result)) 25 | end 26 | 27 | def test_location_same_line 28 | iseq = RubyVM::InstructionSequence.new <<-eoruby 29 | 10.times { Object.new } 30 | eoruby 31 | as = ObjectSpace::AllocationSampler.new(interval: 1) 32 | as.enable 33 | iseq.eval 34 | as.disable 35 | 36 | assert_equal({"Object"=>{""=>{1=>10}}}, filter(as.result)) 37 | end 38 | 39 | def test_location_mixed 40 | iseq = RubyVM::InstructionSequence.new <<-eoruby 41 | 10.times { Object.new } 42 | Object.new 43 | eoruby 44 | as = ObjectSpace::AllocationSampler.new(interval: 1) 45 | as.enable 46 | iseq.eval 47 | as.disable 48 | 49 | assert_equal({"Object"=>{""=>{1=>10, 2=>1}}}, filter(as.result)) 50 | end 51 | 52 | def test_location_from_method 53 | iseq = RubyVM::InstructionSequence.new <<-eoruby 54 | def foo 55 | 10.times { Object.new } 56 | Object.new 57 | end 58 | foo 59 | eoruby 60 | as = ObjectSpace::AllocationSampler.new(interval: 1) 61 | as.enable 62 | iseq.eval 63 | as.disable 64 | 65 | assert_equal({"Object"=>{""=>{2=>10, 3=>1}}}, filter(as.result)) 66 | end 67 | 68 | def test_location_larger_interval 69 | iseq = RubyVM::InstructionSequence.new <<-eom 70 | 100.times { Object.new } 71 | 100.times { Object.new } 72 | eom 73 | as = ObjectSpace::AllocationSampler.new(interval: 10) 74 | as.enable 75 | iseq.eval 76 | as.disable 77 | 78 | assert_equal({"Object"=>{""=>{1=>10, 2=>10}}}, filter(as.result)) 79 | assert_equal 201, as.allocation_count 80 | end 81 | 82 | def test_interval_default 83 | as = ObjectSpace::AllocationSampler.new 84 | assert_equal 1, as.interval 85 | end 86 | 87 | def test_two_with_same_type 88 | as = ObjectSpace::AllocationSampler.new 89 | as.enable 90 | Object.new 91 | Object.new 92 | as.disable 93 | 94 | assert_equal(2, filter(as.result)[Object.name].values.flat_map(&:values).inject(:+)) 95 | end 96 | 97 | def test_two_with_same_type_same_line 98 | as = ObjectSpace::AllocationSampler.new 99 | as.enable 100 | Object.new; Object.new 101 | Object.new; Object.new 102 | as.disable 103 | 104 | assert_equal 4, as.result.allocations_by_type[Object.name] 105 | end 106 | 107 | class X 108 | end 109 | 110 | def test_expands 111 | as = ObjectSpace::AllocationSampler.new 112 | as.enable 113 | 500.times do 114 | Object.new 115 | X.new 116 | end 117 | Object.new 118 | as.disable 119 | 120 | result = as.result 121 | assert_equal 501, result.allocations_by_type[Object.name] 122 | assert_equal 500, result.allocations_by_type[TestAllocationSampler::X.name] 123 | end 124 | 125 | def d 126 | Object.new 127 | end 128 | def c; 5.times { d }; end 129 | def b; 5.times { c }; end 130 | def a; 5.times { b }; end 131 | 132 | def test_stack_trace 133 | as = ObjectSpace::AllocationSampler.new 134 | buffer = StringIO.new 135 | stack_printer = ObjectSpace::AllocationSampler::Display::Stack.new( 136 | output: buffer 137 | ) 138 | as.enable 139 | a 140 | as.disable 141 | 142 | as.result.by_type_with_call_tree.each do |class_name, tree| 143 | assert_equal Object.name, class_name 144 | root = tree.find { |node| node.name.include? __method__.to_s } 145 | stack_printer.show root 146 | end 147 | assert_equal <<-eoout, buffer.string 148 | TestAllocationSampler#test_stack_trace 0 (0.0%) 149 | `-- TestAllocationSampler#a 0 (0.0%) 150 | `-- TestAllocationSampler#b 0 (0.0%) 151 | `-- TestAllocationSampler#c 0 (0.0%) 152 | `-- TestAllocationSampler#d 125 (100.0%) 153 | eoout 154 | end 155 | 156 | def test_dot 157 | as = ObjectSpace::AllocationSampler.new 158 | as.enable 159 | a 160 | as.disable 161 | 162 | File.write 'out.dot', as.result.calltree.to_dot 163 | end 164 | 165 | private 166 | 167 | def filter result 168 | result.allocations_with_top_frame 169 | end 170 | end 171 | --------------------------------------------------------------------------------