├── .gitignore ├── CMakeLists.txt ├── README.md ├── conf_stress_tests.py ├── fillFlush.cpp ├── memsetVsMadvise.cpp ├── stress_test ├── Allocation.cpp ├── Allocation.h ├── Distribution.cpp ├── Distribution.h ├── Main.cpp ├── Mixer.cpp ├── Mixer.h ├── Producers.cpp ├── Producers.h ├── SizeConstants.h ├── ThreadObject.cpp ├── ThreadObject.h └── distributions │ ├── adfinder.txt │ ├── adindexer.txt │ └── multifeed.txt ├── summarize-internal-frag.py ├── summarize.py └── util.h /.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | 3 | find_package(gflags REQUIRED) 4 | find_package(Threads) 5 | 6 | add_executable(memsetVsMadvise memsetVsMadvise.cpp) 7 | set_property(TARGET memsetVsMadvise PROPERTY CXX_STANDARD 11) 8 | target_link_libraries(memsetVsMadvise PRIVATE ${CMAKE_THREAD_LIBS_INIT} gflags) 9 | 10 | add_library(jemalloc STATIC IMPORTED) 11 | set_target_properties(jemalloc PROPERTIES IMPORTED_LOCATION /usr/local/lib/libjemalloc.a) 12 | target_link_libraries(jemalloc INTERFACE dl) 13 | 14 | add_executable(stress stress_test/Main.cpp stress_test/Producers.cpp stress_test/Mixer.cpp stress_test/ThreadObject.cpp stress_test/Distribution.cpp stress_test/Allocation.cpp) 15 | set_property(TARGET stress PROPERTY CXX_STANDARD 14) 16 | target_link_libraries(stress PRIVATE gflags jemalloc m ${CMAKE_THREAD_LIBS_INIT}) 17 | 18 | add_executable(fillFlush fillFlush.cpp) 19 | set_property(TARGET fillFlush PROPERTY CXX_STANDARD 11) 20 | target_link_libraries(fillFlush PRIVATE gflags jemalloc ${CMAKE_THREAD_LIBS_INIT}) 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jemalloc-experiments 2 | Performance testing or system benchmarks for jemalloc 3 | 4 | This repository contains programs that are useful in jemalloc development, but 5 | not portable or polished enough to live in the main repo. Here we have some 6 | relaxed constraints on languages, dependencies, and the build environment. 7 | 8 | Libraries that need to be installed: 9 | - gflags 10 | -------------------------------------------------------------------------------- /conf_stress_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import subprocess 3 | import os 4 | 5 | def run_stress_test( 6 | malloc_conf, 7 | batch_size = 1000, 8 | batch_frees = 200, 9 | num_threads = 256, 10 | num_runs = 5000, 11 | malloc_size = 32): 12 | # The default setting takes ~2min for each test. 13 | env = os.environ.copy() 14 | env['MALLOC_CONF'] = malloc_conf 15 | print(f'MALLOC_CONF={malloc_conf}, ' 16 | f'batch_size={batch_size}, batch_frees={batch_frees}, ' 17 | f'num_threads={num_threads}, num_runs={num_runs}, ' 18 | f'malloc_size={malloc_size} ', 19 | flush=True) 20 | process = subprocess.Popen( 21 | [ 22 | './fillFlush', 23 | f'--batch_size={batch_size}', 24 | f'--batch_frees={batch_frees}', 25 | f'--num_threads={num_threads}', 26 | f'--num_runs={num_runs}', 27 | f'--malloc_size={malloc_size}' 28 | ], 29 | env=env, 30 | stdout=subprocess.PIPE, 31 | stderr=subprocess.PIPE 32 | ) 33 | stdout, stderr = process.communicate() 34 | print(f'Return code: {process.returncode}', flush=True) 35 | print(f'Standard Output:\n{stdout.decode("utf-8")}', flush=True) 36 | print(f'Standard Error:\n{stderr.decode("utf-8")}', flush=True) 37 | print('-' * 80, flush=True) 38 | 39 | return 40 | 41 | def main(): 42 | malloc_configs = [ 43 | 'tcache:false,prof:false', 44 | 'tcache:false,prof:true,lg_prof_sample:13', 45 | 'tcache:true,prof:false,lg_prof_sample:13,tcache_ncached_max:1-1024:100|1025-2048:1|2049-1000000:0,tcache_max:4096', 46 | 'tcache:true,prof:true,lg_prof_sample:13,tcache_ncached_max:1-1024:100|1025-99999:1|2049-1000000:0,tcache_max:4096', 47 | 'tcache:true,prof:true,lg_prof_sample:13,tcache_ncached_max:1-1024:100|1025-99999:1|2049-1000000:0,tcache_max:4096,percpu_arena:percpu' 48 | ] 49 | 50 | for config in malloc_configs: 51 | run_stress_test(config, num_threads=500) 52 | run_stress_test(config, malloc_size=2000, batch_size=100,batch_frees=100, num_threads=500) 53 | run_stress_test(config, malloc_size=5000, batch_size=100,batch_frees=100, num_threads=500) 54 | 55 | if __name__ == '__main__': 56 | main() 57 | 58 | -------------------------------------------------------------------------------- /fillFlush.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "gflags/gflags.h" 10 | 11 | // Flags that control single-threaded behavior 12 | DEFINE_int32(batch_size, 1000, "Number of pointers owned by a thread at a time"); 13 | DEFINE_int32(batch_thread_migrations, 200, "Number of pointers to free in a run"); 14 | DEFINE_int32(batch_frees, 200, "Number of pointers in a batch to free"); 15 | DEFINE_int32(batch_sleep_ms, 1, "Number of milliseconds to sleep between batch free and alloc"); 16 | 17 | // Flags that control cross-thread behavior 18 | DEFINE_int32(num_threads, 1, "Number of threads to run the test"); 19 | DEFINE_int32(shared_buffer_size, 10 * 1000, "Shared buffer size"); 20 | 21 | // Control parameters 22 | DEFINE_int32(num_runs, -1, "Number of runs to perform (or -1 to loop forever)"); 23 | DEFINE_int32(malloc_size, 32, "Size of the allocations"); 24 | DEFINE_int32(randseed, 12345, "Random seed, for gesture in the direction of reproducibility"); 25 | 26 | typedef std::minstd_rand URNG; 27 | 28 | std::vector> createSharedBuffer(URNG& urng) { 29 | std::vector resultNonAtomic(FLAGS_shared_buffer_size); 30 | for (int i = 0; i < FLAGS_shared_buffer_size; ++i) { 31 | resultNonAtomic[i] = std::malloc(FLAGS_malloc_size); 32 | } 33 | std::shuffle(resultNonAtomic.begin(), resultNonAtomic.end(), urng); 34 | 35 | std::vector> result(resultNonAtomic.begin(), resultNonAtomic.end()); 36 | return result; 37 | } 38 | 39 | void doThreadMigrations( 40 | URNG& urng, 41 | std::vector& batch, 42 | std::vector>& sharedBuffer) { 43 | std::uniform_int_distribution sharedDist(0, sharedBuffer.size() - 1); 44 | std::uniform_int_distribution localDist(0, batch.size() - 1); 45 | 46 | for (int i = 0; i < FLAGS_batch_thread_migrations; ++i) { 47 | int localIndex = localDist(urng); 48 | int sharedIndex = sharedDist(urng); 49 | void* oldLocal = batch[localIndex]; 50 | void* newLocal = sharedBuffer[sharedIndex].exchange(oldLocal); 51 | batch[localIndex] = newLocal; 52 | } 53 | } 54 | 55 | void doFrees(URNG& urng, std::vector& batch) { 56 | for (int i = 0; i < FLAGS_batch_frees; ++i) { 57 | std::free(batch[i]); 58 | } 59 | std::this_thread::sleep_for(std::chrono::milliseconds(FLAGS_batch_sleep_ms)); 60 | for (int i = 0; i < FLAGS_batch_frees; ++i) { 61 | batch[i] = std::malloc(FLAGS_malloc_size); 62 | } 63 | std::shuffle(batch.begin(), batch.end(), urng); 64 | } 65 | 66 | void doThread(unsigned initSeed, std::vector>& sharedBuffer) { 67 | std::vector batch(FLAGS_batch_size); 68 | for (int i = 0; i < FLAGS_batch_size; ++i) { 69 | batch[i] = std::malloc(FLAGS_malloc_size); 70 | } 71 | URNG urng(initSeed); 72 | for (unsigned i = 0; i < (unsigned) FLAGS_num_runs || FLAGS_num_runs == -1; ++i) { 73 | doThreadMigrations(urng, batch, sharedBuffer); 74 | doFrees(urng, batch); 75 | } 76 | } 77 | 78 | int main(int argc, char** argv) { 79 | gflags::ParseCommandLineFlags(&argc, &argv, true); 80 | URNG urng(FLAGS_randseed); 81 | auto sharedBuffer = createSharedBuffer(urng); 82 | std::vector threads; 83 | for (unsigned i = 0; i < FLAGS_num_threads; ++i) { 84 | unsigned seed = (unsigned)urng() + i; 85 | threads.emplace_back([&, seed]() { 86 | doThread(seed, sharedBuffer); 87 | }); 88 | } 89 | for (auto& thread : threads) { 90 | thread.join(); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /memsetVsMadvise.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "util.h" 15 | 16 | DEFINE_int32(num_runs, 100, 17 | "Number of times to zero the pages (per page count)"); 18 | DEFINE_int32(num_pages_min, 1, "Minimum number of pages to zero"); 19 | DEFINE_int32(num_pages_max, 50, "Maximum number of pages to zero"); 20 | DEFINE_int32(num_threads, 1, 21 | "Number of threads on which to try the experiment at once."); 22 | DEFINE_bool(touch_after_zero, false, 23 | "Whether to actually try touching the pages we zero."); 24 | 25 | struct Result { 26 | std::uint64_t memsetCycles; 27 | std::uint64_t madviseDontneedCycles; 28 | std::uint64_t madviseDontneedWillneedCycles; 29 | 30 | Result() 31 | : memsetCycles(0), 32 | madviseDontneedCycles(0), 33 | madviseDontneedWillneedCycles(0) {} 34 | 35 | void accum(const Result& other) { 36 | memsetCycles += other.memsetCycles; 37 | madviseDontneedCycles += other.madviseDontneedCycles; 38 | madviseDontneedWillneedCycles += other.madviseDontneedWillneedCycles; 39 | } 40 | }; 41 | 42 | void maybeTouchPages(void* beginv, std::size_t length) { 43 | char* begin = static_cast(beginv); 44 | if (FLAGS_touch_after_zero) { 45 | for (char* ptr = begin; ptr != begin + length; ptr += 4096) { 46 | *ptr = 0; 47 | } 48 | } 49 | } 50 | 51 | void zeroMemset(void* ptr, std::size_t size) { 52 | std::memset(ptr, 0, size); 53 | } 54 | 55 | void zeroMadviseDontneed(void* ptr, std::size_t size) { 56 | int err = madvise(ptr, size, MADV_DONTNEED); 57 | if (err != 0) { 58 | std::cerr << "Couldn't madvise(... MADV_DONTNEED); error was " 59 | << err << std::endl; 60 | exit(1); 61 | } 62 | } 63 | 64 | void zeroMadviseDontneedWillneed(void* ptr, std::size_t size) { 65 | int err = madvise(ptr, size, MADV_DONTNEED); 66 | if (err != 0) { 67 | std::cerr << "Couldn't madvise(..., MADV_DONTNEED); error was " 68 | << err << std::endl; 69 | exit(1); 70 | } 71 | err = madvise(ptr, size, MADV_WILLNEED); 72 | if (err != 0) { 73 | std::cerr << "Couldn't madvise(..., MAP_POPULATE); error was " 74 | << err << std::endl; 75 | exit(1); 76 | } 77 | } 78 | 79 | Result runTest(std::size_t size) { 80 | Result result; 81 | void *ptr; 82 | int err = posix_memalign(&ptr, 4096, size); 83 | if (err != 0) { 84 | std::cerr << "Couldn't allocate; error was " << err << std::endl; 85 | exit(1); 86 | } 87 | // Touch all the pages from this thread. 88 | std::memset(ptr, 0, size); 89 | // Touch all the pages from another thread. 90 | std::async(std::launch::async, std::memset, ptr, 0, size).get(); 91 | 92 | // We'll probably be dealing with uncached memory here; we care about this 93 | // difference when pulling memory out of an inactive state. 94 | util::flushCache(ptr, size); 95 | result.memsetCycles = util::runTimed([&]() { 96 | zeroMemset(ptr, size); 97 | maybeTouchPages(ptr, size); 98 | }); 99 | util::flushCache(ptr, size); 100 | result.madviseDontneedCycles = util::runTimed([&]() { 101 | zeroMadviseDontneed(ptr, size); 102 | maybeTouchPages(ptr, size); 103 | }); 104 | util::flushCache(ptr, size); 105 | result.madviseDontneedWillneedCycles = util::runTimed([&]() { 106 | zeroMadviseDontneedWillneed(ptr, size); 107 | maybeTouchPages(ptr, size); 108 | }); 109 | 110 | return result; 111 | } 112 | 113 | int main(int argc, char** argv) { 114 | std::string usage = 115 | "This program benchmarks memset vs madvise for zeroing memory.\n" 116 | "Sample usage:\n"; 117 | usage += argv[0]; 118 | usage += " --num_pages_min=20 --num_pagse_max=50 --num_runs=30 "; 119 | usage += "--num_threads=4 --touch_after_zero=true"; 120 | 121 | gflags::SetUsageMessage(usage); 122 | gflags::ParseCommandLineFlags(&argc, &argv, true); 123 | 124 | for (int i = FLAGS_num_pages_min; i <= FLAGS_num_pages_max; ++i) { 125 | Result sum; 126 | for (int j = 0; j < FLAGS_num_runs; ++j) { 127 | std::vector> results; 128 | for (int k = 0; k < FLAGS_num_threads; ++k) { 129 | results.push_back(std::async(std::launch::async, runTest, 4096 * i)); 130 | } 131 | for (int k = 0; k < FLAGS_num_threads; ++k) { 132 | sum.accum(results[k].get()); 133 | } 134 | } 135 | std::cout << "When zeroing " << i << " pages (averaging across " 136 | << FLAGS_num_runs << " runs of " << FLAGS_num_threads << " threads:\n" 137 | << " memset: " << sum.memsetCycles / FLAGS_num_runs << " cycles\n" 138 | << " madvise(..., MADV_DONTNEED): " 139 | << sum.madviseDontneedCycles / FLAGS_num_runs << " cycles\n" 140 | << " madvise(..., MADV_DONTNEED); madvise(..., MADV_WILLNEED): " 141 | << sum.madviseDontneedWillneedCycles / FLAGS_num_runs << " cycles\n"; 142 | } 143 | 144 | return 0; 145 | } 146 | -------------------------------------------------------------------------------- /stress_test/Allocation.cpp: -------------------------------------------------------------------------------- 1 | #include "Allocation.h" 2 | #include 3 | 4 | bool Allocation::operator<(const Allocation &that) const { 5 | return this->freeAfterAbsolute < that.freeAfterAbsolute; 6 | } 7 | 8 | bool Allocation::operator>(const Allocation &that) const { 9 | return this->freeAfterAbsolute > that.freeAfterAbsolute; 10 | } 11 | 12 | bool Allocation::isEmpty() const { return this->toFree_.size() == 0; } 13 | 14 | Allocation::Allocation(std::vector toFree, int freeAfterArg) 15 | : toFree_(toFree), freeAfterRelative(freeAfterArg), freeAfterAbsolute(0) {} 16 | 17 | void Allocation::clear() const { 18 | for (auto &ptr : this->toFree_) { 19 | free(ptr); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /stress_test/Allocation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // simple wrapper that pairs a group of allocated blocks with a lifetime 6 | 7 | class Allocation { 8 | public: 9 | // sorts based on [freeAfterAbsolute] field 10 | bool operator<(const Allocation &that) const; 11 | bool operator>(const Allocation &that) const; 12 | // true iff [this->toFree_] is empty 13 | bool isEmpty() const; 14 | // free the memory stored within 15 | void clear() const; 16 | 17 | Allocation() = default; 18 | 19 | /* [freeAfter] is a number of phases, according to the thread that is 20 | * responsible for this allocation */ 21 | Allocation(std::vector toFree, int freeAfter); 22 | 23 | // number of phases to live for relative to allocation 24 | int freeAfterRelative; 25 | // absolute phase number at which to free, based on a particular threads clock 26 | int freeAfterAbsolute; 27 | 28 | private: 29 | std::vector toFree_; 30 | // absolute time after which this should be freed 31 | }; 32 | -------------------------------------------------------------------------------- /stress_test/Distribution.cpp: -------------------------------------------------------------------------------- 1 | #include "Distribution.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | DEFINE_int64(max_size_class, 10 * k1KB, "max size class to allocate"); 12 | 13 | SizeClass parseSizeClass(const std::string &ln) { 14 | std::istringstream strStream(ln); 15 | size_t sizeClass; 16 | double freq; 17 | if (!(strStream >> sizeClass >> freq)) { 18 | std::cout << "File format invalid. Failed to following line:\n\e[0;31m" 19 | << ln << "\e[0m" << std::endl; 20 | exit(1); 21 | } 22 | if (freq > 1.0) { 23 | std::cout << "Warning: this looks off; frequency greater than 1.0" 24 | << std::endl; 25 | freq = 1.0; 26 | } 27 | return {sizeClass, freq}; 28 | } 29 | 30 | Distribution parseDistribution(const char *fileName) { 31 | std::string line; 32 | std::ifstream f(fileName); 33 | 34 | if (!f) { 35 | std::cout << "Specified file '" << fileName << "' not found." << std::endl; 36 | exit(1); 37 | } 38 | 39 | Distribution d; 40 | 41 | while (std::getline(f, line)) { 42 | SizeClass sz = parseSizeClass(line); 43 | if (sz.size <= FLAGS_max_size_class) { 44 | d.push_back(sz); 45 | } 46 | } 47 | 48 | std::sort(begin(d), end(d)); 49 | return d; 50 | } 51 | -------------------------------------------------------------------------------- /stress_test/Distribution.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "SizeConstants.h" 6 | 7 | struct SizeClass { 8 | size_t size; 9 | double freq; 10 | bool operator<(const SizeClass &that) const { return this->freq < that.freq; } 11 | }; 12 | 13 | typedef std::vector Distribution; 14 | 15 | Distribution parseDistribution(const char *fileName); 16 | -------------------------------------------------------------------------------- /stress_test/Main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include "Distribution.h" 11 | #include "Mixer.h" 12 | 13 | DEFINE_int32(num_threads, 1, "number of threads to run"); 14 | DEFINE_bool(print_malloc_stats, false, "print out malloc stats after running"); 15 | DEFINE_string(distribution_file, "", "path to distribution file"); 16 | static bool validateDistributionFile(const char *flagName, 17 | const std::string &val) { 18 | return val.length() != 0; 19 | } 20 | DEFINE_validator(distribution_file, &validateDistributionFile); 21 | 22 | using std::shared_ptr; 23 | using std::vector; 24 | 25 | void createAndRunMixer(const Distribution *distr, int me, 26 | vector> threadObjects) { 27 | Mixer m(distr, me, threadObjects); 28 | m.run(); 29 | } 30 | 31 | double run() { 32 | initInstBurner(); 33 | Distribution distr = parseDistribution(FLAGS_distribution_file.c_str()); 34 | 35 | // Set up a work queue for each thread 36 | vector threads; 37 | vector> threadObjects; 38 | for (int i = 0; i < FLAGS_num_threads; i++) { 39 | auto threadObject = shared_ptr(new ThreadObject()); 40 | threadObjects.push_back(threadObject); 41 | } 42 | 43 | for (int i = 0; i < FLAGS_num_threads; i++) { 44 | // each thread gets an arbitrary id given by [i] 45 | threads.push_back(std::thread(createAndRunMixer, &distr, i, threadObjects)); 46 | } 47 | 48 | using namespace std::chrono; 49 | 50 | high_resolution_clock::time_point beginTime = high_resolution_clock::now(); 51 | for (auto &t : threads) { 52 | t.join(); 53 | } 54 | 55 | // Cleanup any remaining memory 56 | for (auto &t : threadObjects) { 57 | t->freeIgnoreLifetime(); 58 | } 59 | high_resolution_clock::time_point endTime = high_resolution_clock::now(); 60 | duration span = duration_cast>(endTime - beginTime); 61 | 62 | return span.count(); 63 | } 64 | 65 | int main(int argc, char **argv) { 66 | gflags::ParseCommandLineFlags(&argc, &argv, true); 67 | double time = run(); 68 | 69 | if (FLAGS_print_malloc_stats) { 70 | if (mallctl("thread.tcache.flush", NULL, NULL, NULL, 0)) { 71 | std::cout << "je_mallctl failed. Exiting..." << std::endl; 72 | } 73 | malloc_stats_print(NULL, NULL, NULL); 74 | } 75 | 76 | std::cout << "Elapsed time: " << time << std::endl; 77 | } 78 | -------------------------------------------------------------------------------- /stress_test/Mixer.cpp: -------------------------------------------------------------------------------- 1 | #include "Mixer.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "SizeConstants.h" 14 | 15 | using std::shared_ptr; 16 | using std::unique_ptr; 17 | using std::vector; 18 | 19 | DEFINE_int32(max_producers, 5, "max producers per thread at one time"); 20 | DEFINE_int32(producer_scale_param, 100, 21 | "Vaguely scales the amount of stuff a " 22 | "single producer does, in a producer-defined way."); 23 | DEFINE_double(peak_priority, 100.0, "Priority for bursty producers"); 24 | DEFINE_double(ramp_priority, 1.0, "Priority for background producers"); 25 | 26 | Mixer::Mixer(const Distribution *distr, int me, 27 | vector> threadObjects) 28 | : distr_(distr), threadObjects_(threadObjects), me_(me), 29 | consumerIdPicker_(0, threadObjects.size() - 1) { 30 | std::vector distributionWeights; 31 | for (auto &sizeClass : *distr) { 32 | distributionWeights.push_back(sizeClass.freq); 33 | } 34 | sizeClassPicker_ = std::discrete_distribution(begin(distributionWeights), 35 | end(distributionWeights)); 36 | addProducers(); 37 | } 38 | 39 | ThreadObject &Mixer::myThread() { return *this->threadObjects_[this->me_]; } 40 | 41 | void Mixer::registerProducer(double weight, unique_ptr p) { 42 | this->producers_.push_back(std::move(p)); 43 | this->weightArray_.push_back(weight); 44 | this->producerPicker_ = std::discrete_distribution( 45 | begin(this->weightArray_), end(this->weightArray_)); 46 | } 47 | 48 | void Mixer::unregisterProducer(int index) { 49 | double weight = this->weightArray_[index]; 50 | this->weightArray_.erase(begin(this->weightArray_) + index); 51 | this->producers_.erase(begin(this->producers_) + index); 52 | this->producerPicker_ = std::discrete_distribution( 53 | begin(this->weightArray_), end(this->weightArray_)); 54 | } 55 | 56 | void Mixer::addProducer() { 57 | int sizeClassIndex = this->sizeClassPicker_(this->generator_); 58 | SizeClass sizeClass = (*this->distr_)[sizeClassIndex]; 59 | std::uniform_int_distribution initialSizeFuzz(1, sizeClass.size / 2); 60 | 61 | std::uniform_int_distribution strategyPicker(1, 8); 62 | int strategy = strategyPicker(this->generator_); 63 | 64 | double weight; 65 | Producer *p; 66 | int maxLifetime = this->myThread().maxPhase(); 67 | std::uniform_int_distribution longLifetime(maxLifetime / 10, 68 | maxLifetime); 69 | std::uniform_int_distribution shortLifetime(1, maxLifetime / 10); 70 | if (1 <= strategy && strategy <= 3) { 71 | /* allocate a ramp 72 | * - long lifetime 73 | * - low priority; slowly accumulates in the background */ 74 | weight = FLAGS_ramp_priority; 75 | int lifetime = longLifetime(this->generator_); 76 | // VectorProducer 77 | if (strategy == 1) { 78 | p = new VectorProducer(sizeClass.size, initialSizeFuzz(this->generator_), 79 | lifetime); 80 | } 81 | // LinkedListProducer 82 | if (strategy == 2) { 83 | p = new LinkedListProducer(sizeClass.size, FLAGS_producer_scale_param, 84 | lifetime); 85 | } 86 | // SimpleProducer 87 | if (strategy == 3) { 88 | p = new SimpleProducer(sizeClass.size, FLAGS_producer_scale_param); 89 | } 90 | } else if (4 <= strategy && strategy <= 5) { 91 | /* allocate a plateau 92 | * - finishes quickly 93 | * - long lifetime; stays for duration of program */ 94 | weight = FLAGS_peak_priority; 95 | int lifetime = shortLifetime(this->generator_); 96 | // VectorProducer 97 | if (strategy == 4) { 98 | p = new VectorProducer(sizeClass.size, initialSizeFuzz(this->generator_), 99 | lifetime); 100 | } 101 | // LinkedListProducer 102 | if (strategy == 5) { 103 | p = new LinkedListProducer(sizeClass.size, FLAGS_producer_scale_param, 104 | lifetime); 105 | } 106 | } else { 107 | weight = FLAGS_peak_priority; 108 | int lifetime = longLifetime(this->generator_); 109 | /* allocate a peak 110 | * - high priority 111 | * - finishes quickly 112 | * - short lifetime */ 113 | // VectorProducer 114 | if (strategy == 6) { 115 | p = new VectorProducer(sizeClass.size, initialSizeFuzz(this->generator_), 116 | lifetime); 117 | } 118 | // LinkedListProducer 119 | if (strategy == 7) { 120 | p = new LinkedListProducer(sizeClass.size, FLAGS_producer_scale_param, 121 | lifetime); 122 | } 123 | // SimpleProducer 124 | if (strategy == 8) { 125 | p = new SimpleProducer(sizeClass.size, FLAGS_producer_scale_param); 126 | } 127 | } 128 | 129 | assert(p != nullptr); 130 | 131 | this->registerProducer(weight, std::move(std::unique_ptr(p))); 132 | } 133 | 134 | void Mixer::addProducers() { 135 | while (this->producers_.size() < FLAGS_max_producers) { 136 | this->addProducer(); 137 | } 138 | } 139 | 140 | int Mixer::pickProducer() { return this->producerPicker_(this->generator_); } 141 | 142 | // Picks next producer for the mixer to run. Currently uniform random choice 143 | ThreadObject &Mixer::pickConsumer() { 144 | int consumerIndex = this->consumerIdPicker_(this->generator_); 145 | return *(this->threadObjects_[consumerIndex]); 146 | } 147 | 148 | constexpr size_t kMaxDataCacheSize = 8000000; 149 | constexpr size_t kMaxInstCacheSize = 32000; 150 | 151 | static char dataBurner[kMaxDataCacheSize] = {0}; 152 | static char *instBurner = nullptr; 153 | 154 | constexpr unsigned char instRet = {0xC3}; 155 | constexpr unsigned char instNop = {0x90}; 156 | 157 | void burnDataCache(size_t n) { 158 | // Do something slightly non-trivial so this doesn't get optimized away 159 | size_t nClipped = (n > kMaxDataCacheSize) ? kMaxDataCacheSize : n; 160 | char c = dataBurner[0]; 161 | for (int i = 0; i < nClipped; i++) { 162 | dataBurner[i] = c + 1; 163 | } 164 | } 165 | 166 | void initInstBurner() { 167 | size_t sz = kMaxInstCacheSize + 1; 168 | 169 | instBurner = (char *)mmap(NULL, sz, PROT_READ | PROT_WRITE, 170 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 171 | char *p = instBurner; 172 | for (int i = 0; i < sz - 1; ++i) { 173 | *(p++) = instNop; 174 | } 175 | *(p++) = instRet; 176 | if (mprotect(instBurner, sz, PROT_NONE) == -1) { 177 | std::cout << "mprotect failed" << std::endl; 178 | exit(1); 179 | } 180 | if (mprotect(instBurner, sz, PROT_EXEC | PROT_READ) == -1) { 181 | std::cout << "mprotect failed" << std::endl; 182 | exit(1); 183 | } 184 | } 185 | 186 | void burnInstCache(size_t n) { 187 | int nClipped = (n > kMaxInstCacheSize) ? kMaxInstCacheSize : n; 188 | int offset = kMaxInstCacheSize - nClipped; 189 | 190 | void (*f)() = (void (*)())(instBurner + offset); 191 | (*f)(); 192 | } 193 | 194 | void Mixer::run() { 195 | while (true) { 196 | this->myThread().free(); 197 | // otherwise run a random producer 198 | if (this->producers_.size() == 0) { 199 | std::cout << "ran out of producers" << std::endl; 200 | exit(0); 201 | } 202 | int producerIndex = this->pickProducer(); 203 | ProducerStatus st; 204 | Allocation a = 205 | this->producers_[producerIndex]->run(this->myThread(), 100000, st); 206 | if (st == ProducerStatus::AllocationFailed) { 207 | for (auto &producer : this->producers_) { 208 | producer->cleanup(); 209 | } 210 | break; 211 | } else if (st == ProducerStatus::Done) { 212 | this->unregisterProducer(producerIndex); 213 | } 214 | if (!a.isEmpty()) { 215 | this->pickConsumer().addToFree(std::move(a)); 216 | } 217 | 218 | addProducers(); 219 | 220 | burnInstCache(kMaxInstCacheSize); 221 | } 222 | if (mallctl("thread.tcache.flush", NULL, NULL, NULL, 0)) { 223 | std::cout << "je_mallctl failed. Exiting..." << std::endl; 224 | } 225 | // Main loop will cleanup memory after all threads are done 226 | } 227 | -------------------------------------------------------------------------------- /stress_test/Mixer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "Distribution.h" 8 | #include "Producers.h" 9 | #include "ThreadObject.h" 10 | 11 | void initInstBurner(); 12 | 13 | class Mixer { 14 | public: 15 | void run(); 16 | Mixer(const Distribution *distr, int me, 17 | std::vector> threadObjects); 18 | 19 | private: 20 | // the thread id that this mixer is running on 21 | int me_; 22 | // work queues for each thread indexed by thread number 23 | std::vector> threadObjects_; 24 | /* Picks a consumer to free memory allocated by a producer. Currently uniform 25 | * random choice */ 26 | ThreadObject &pickConsumer(); 27 | 28 | std::uniform_int_distribution consumerIdPicker_; 29 | std::default_random_engine generator_; 30 | 31 | // for picking producer with weighted random choice 32 | std::vector weightArray_; 33 | std::vector> producers_; 34 | std::discrete_distribution producerPicker_; 35 | // Picks the index of the next producer for the mixer to run. Uses 36 | // [producerPicker_]. 37 | int pickProducer(); 38 | 39 | // [pickProducer] constructs producers using [distr_] as a guideline 40 | const Distribution *distr_; 41 | /* Generated from [distr_]; generates indexes into [distr_] randomly, weighted 42 | * by the frequency of the size classes in [distr_]. */ 43 | std::discrete_distribution sizeClassPicker_; 44 | 45 | // add producers until [FLAGS_max_producers] 46 | void addProducers(); 47 | // randomly choose a producer and add it to the mixer 48 | void addProducer(); 49 | 50 | // get the thread object that this mixer is running on 51 | ThreadObject &myThread(); 52 | 53 | // register [p] to get scheduled by the mixer with priority [weight] 54 | void registerProducer(double weight, std::unique_ptr p); 55 | // unregister the producer indexed by [index] in [_producers] 56 | void unregisterProducer(int index); 57 | }; 58 | -------------------------------------------------------------------------------- /stress_test/Producers.cpp: -------------------------------------------------------------------------------- 1 | #include "Producers.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | void *allocateAndUse(ThreadObject &myThread, size_t &memUsed, size_t sz) { 8 | void *ptr = myThread.allocate(sz); 9 | memUsed += sz; 10 | if (ptr != nullptr) { 11 | memset(ptr, 0, sz); 12 | } 13 | return ptr; 14 | } 15 | 16 | // Simple Producer 17 | 18 | SimpleProducer::SimpleProducer(int allocSize, int numAllocs) 19 | : allocSize_(allocSize), allocsLeft_(numAllocs) {} 20 | Allocation SimpleProducer::run(ThreadObject &myThread, size_t memUsageHint, 21 | ProducerStatus &retStatus) { 22 | size_t memUsed = 0; 23 | while (true) { 24 | if (this->allocsLeft_ <= 0) { 25 | retStatus = ProducerStatus::Done; 26 | return Allocation(); 27 | } 28 | if (memUsed >= memUsageHint) { 29 | retStatus = ProducerStatus::Yield; 30 | return Allocation(); 31 | } 32 | void *ptr = allocateAndUse(myThread, memUsed, this->allocSize_); 33 | if (ptr == nullptr) { 34 | retStatus = ProducerStatus::AllocationFailed; 35 | return Allocation(); 36 | } 37 | this->allocsLeft_ -= 1; 38 | free(ptr); 39 | } 40 | } 41 | 42 | void SimpleProducer::cleanup() {} 43 | 44 | // Vector Producer 45 | 46 | VectorProducer::VectorProducer(size_t maxSize, size_t initialSize, int lifetime) 47 | : maxSize_(maxSize), initialSize_(initialSize), currentSize_(0), 48 | ptr_(nullptr), lifetime_(lifetime) {} 49 | 50 | Allocation VectorProducer::run(ThreadObject &myThread, size_t memUsageHint, 51 | ProducerStatus &retStatus) { 52 | size_t memUsed = 0; 53 | 54 | if (this->currentSize_ == 0) { 55 | this->ptr_ = allocateAndUse(myThread, memUsed, this->initialSize_); 56 | if (this->ptr_ == nullptr) { 57 | retStatus = ProducerStatus::AllocationFailed; 58 | return Allocation(); 59 | } 60 | this->currentSize_ = this->initialSize_; 61 | } 62 | 63 | while (true) { 64 | if (this->currentSize_ >= this->maxSize_) { 65 | retStatus = ProducerStatus::Done; 66 | return Allocation({this->ptr_}, this->lifetime_); 67 | } 68 | if (memUsed >= memUsageHint) { 69 | retStatus = ProducerStatus::Yield; 70 | return Allocation(); 71 | } 72 | 73 | free(this->ptr_); 74 | this->currentSize_ *= 2; 75 | this->ptr_ = allocateAndUse(myThread, memUsed, this->currentSize_); 76 | if (ptr_ == nullptr) { 77 | retStatus = ProducerStatus::AllocationFailed; 78 | return Allocation(); 79 | } 80 | } 81 | } 82 | 83 | void VectorProducer::cleanup() { 84 | if (this->ptr_ != nullptr) { 85 | free(this->ptr_); 86 | } 87 | } 88 | 89 | // LinkedList Producer 90 | 91 | Allocation LinkedListProducer::run(ThreadObject &myThread, size_t memUsageHint, 92 | ProducerStatus &retStatus) { 93 | size_t memUsed = 0; 94 | 95 | while (true) { 96 | if (this->nodesRemaining_ <= 0) { 97 | retStatus = ProducerStatus::Done; 98 | return Allocation(std::move(this->toFree_), this->lifetime_); 99 | } 100 | if (memUsed >= memUsageHint) { 101 | retStatus = ProducerStatus::Yield; 102 | return Allocation(); 103 | } 104 | void *newNode = allocateAndUse(myThread, memUsed, this->nodeSize_); 105 | if (newNode == nullptr) { 106 | retStatus = ProducerStatus::AllocationFailed; 107 | return Allocation(); 108 | } 109 | nodesRemaining_ -= 1; 110 | this->toFree_.push_back(newNode); 111 | } 112 | } 113 | 114 | void LinkedListProducer::cleanup() { 115 | for (auto &ptr : this->toFree_) { 116 | free(ptr); 117 | } 118 | } 119 | 120 | // allocate [numNodes] blocks of size [nodeSize] with lifetime [lifetime] 121 | LinkedListProducer::LinkedListProducer(size_t nodeSize, int numNodes, 122 | int lifetime) 123 | : nodeSize_(nodeSize), nodesRemaining_(numNodes), lifetime_(lifetime), 124 | toFree_() { 125 | this->toFree_.reserve(numNodes); 126 | } 127 | -------------------------------------------------------------------------------- /stress_test/Producers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "ThreadObject.h" 6 | 7 | enum class ProducerStatus { 8 | // the producer tried to allocate memory from the thread object, but it failed 9 | AllocationFailed, 10 | // the producer did everything it was supposed to do and can be unregistered 11 | Done, 12 | // the producer ran for some time, and deferred it's execution to the mixer 13 | Yield 14 | }; 15 | 16 | class Producer { 17 | public: 18 | /* Run the producer using [myThread] to allocate memory. The producer should 19 | * use approximately [memUsageHint] bytes of memory, but the accuracy to which 20 | * this is done is up to individual producers. Set [retStatus] to tell the 21 | * mixer what to do next. */ 22 | virtual Allocation run(ThreadObject &myThread, size_t memUsageHint, 23 | ProducerStatus &retStatus) = 0; 24 | /* Called on each Producer when this thread stops simulating. 25 | * Frees any memory stored in the producer. */ 26 | virtual void cleanup() = 0; 27 | }; 28 | 29 | // allocates a vector of size [sz] 30 | class VectorProducer : public Producer { 31 | public: 32 | Allocation run(ThreadObject &myThread, size_t memUsageHint, 33 | ProducerStatus &retStatus); 34 | // allocate, and then free after [lifetime] has elapsed 35 | VectorProducer(size_t maxSize, size_t initialSize, int lifetime); 36 | void cleanup(); 37 | 38 | private: 39 | size_t maxSize_; 40 | size_t initialSize_; 41 | int lifetime_; 42 | size_t currentSize_; 43 | void *ptr_; 44 | }; 45 | 46 | /* allocates a block of size [alloc_sz], and then immediately frees it. Repeats 47 | * this [n_allocs] times. */ 48 | class SimpleProducer : public Producer { 49 | public: 50 | Allocation run(ThreadObject &myThread, size_t memUsageHint, 51 | ProducerStatus &retStatus); 52 | SimpleProducer(int allocSize, int numAllocs); 53 | void cleanup(); 54 | 55 | private: 56 | int allocSize_; 57 | int allocsLeft_; 58 | }; 59 | 60 | // Allocates many similarly sized blocks, and then frees them all at once later. 61 | class LinkedListProducer : public Producer { 62 | public: 63 | Allocation run(ThreadObject &myThread, size_t memUsageHint, 64 | ProducerStatus &retStatus); 65 | // allocate [numNodes] blocks of size [nodeSize] with lifetime [lifetime] 66 | LinkedListProducer(size_t nodeSize, int numNodes, int lifetime); 67 | void cleanup(); 68 | 69 | private: 70 | size_t nodeSize_; 71 | int nodesRemaining_; 72 | int lifetime_; 73 | std::vector toFree_; 74 | }; 75 | -------------------------------------------------------------------------------- /stress_test/SizeConstants.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | using std::size_t; 6 | 7 | constexpr size_t k1KB = 1000; 8 | constexpr size_t k1MB = 1000000; 9 | constexpr size_t k1GB = 1000000000; 10 | -------------------------------------------------------------------------------- /stress_test/ThreadObject.cpp: -------------------------------------------------------------------------------- 1 | #include "ThreadObject.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "SizeConstants.h" 13 | 14 | DEFINE_int64(alloc_per_thread, k1GB, 15 | "stop each thread after allocating this amount of memory"); 16 | 17 | DEFINE_int64(bytes_per_phase, k1MB, "bytes allocated per clock 'tick'"); 18 | 19 | void ThreadObject::free() { 20 | std::lock_guard guard(this->lock_); 21 | 22 | while (!this->q_.empty() && 23 | this->q_.top().freeAfterAbsolute <= this->currentPhase()) { 24 | this->q_.top().clear(); 25 | this->q_.pop(); 26 | } 27 | } 28 | 29 | void ThreadObject::freeIgnoreLifetime() { 30 | std::lock_guard guard(this->lock_); 31 | 32 | while (!this->q_.empty()) { 33 | this->q_.top().clear(); 34 | this->q_.pop(); 35 | } 36 | } 37 | 38 | void ThreadObject::addToFree(Allocation a) { 39 | int absolutePhase = this->currentPhase() + a.freeAfterRelative; 40 | a.freeAfterAbsolute = absolutePhase; 41 | std::lock_guard guard(this->lock_); 42 | this->q_.push(a); 43 | } 44 | 45 | void *ThreadObject::allocate(size_t sz) { 46 | if (FLAGS_alloc_per_thread <= this->allocSoFar_) { 47 | return nullptr; 48 | } else { 49 | this->allocSoFar_ += sz; 50 | assert(sz > 0); 51 | void *r = malloc(sz); 52 | if (r == nullptr) { 53 | std::cout << "malloc failed." << std::endl; 54 | exit(1); 55 | } 56 | return r; 57 | } 58 | } 59 | 60 | int ThreadObject::currentPhase() const { 61 | return this->allocSoFar_ / FLAGS_bytes_per_phase; 62 | } 63 | 64 | int ThreadObject::maxPhase() const { 65 | return FLAGS_alloc_per_thread / FLAGS_bytes_per_phase; 66 | } 67 | 68 | ThreadObject::ThreadObject() : allocSoFar_(0) {} 69 | -------------------------------------------------------------------------------- /stress_test/ThreadObject.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "Allocation.h" 9 | 10 | class ThreadObject { 11 | public: 12 | // frees all allocations whose lifetime has elapsed 13 | void free(); 14 | // free all allocations, even if the lifetime hasn't expired 15 | void freeIgnoreLifetime(); 16 | // Add an allocation to be freed after a particular time 17 | void addToFree(Allocation a); 18 | // calls malloc, or return [nullptr] if the simulation should be done 19 | void *allocate(size_t sz); 20 | 21 | // get the current time for this threads logical clock 22 | int currentPhase() const; 23 | 24 | // the time that the simulation will stop (according to this thread's logical 25 | // clock) 26 | int maxPhase() const; 27 | 28 | ThreadObject(); 29 | 30 | private: 31 | std::mutex lock_; 32 | std::priority_queue, 33 | std::greater> 34 | q_; 35 | size_t allocSoFar_; 36 | }; 37 | -------------------------------------------------------------------------------- /stress_test/distributions/adfinder.txt: -------------------------------------------------------------------------------- 1 | 8 0.0090475869034 2 | 16 0.0371816165473 3 | 32 0.249494030275 4 | 48 0.153471785857 5 | 64 0.0833917342719 6 | 80 0.0641320417645 7 | 96 0.0990601323096 8 | 112 0.0180347594562 9 | 128 0.0328361596507 10 | 160 0.0333902865652 11 | 192 0.0201117690268 12 | 224 0.058487415762 13 | 256 0.015514937901 14 | 320 0.00665660276036 15 | 384 0.00159740677832 16 | 448 0.000535682381663 17 | 512 0.0131783396516 18 | 640 0.0162427708289 19 | 768 0.00117251634497 20 | 896 0.00778532765506 21 | 1024 0.00863362190594 22 | 1280 0.000847846220479 23 | 1536 0.000696548098703 24 | 1792 0.000598641216258 25 | 2048 0.000544905067297 26 | 2560 0.0076125356438 27 | 3072 6.08895444935e-05 28 | 3584 0.0070287766828 29 | 4096 0.0464055654527 30 | 5120 0.000121436526822 31 | 6144 9.04721265046e-05 32 | 7168 7.72089511569e-05 33 | 8192 0.000207314161953 34 | 10240 0.000168671037045 35 | 12288 3.20615046956e-05 36 | 14336 6.19056613406e-05 37 | 16384 0.00440350366137 38 | 20480 0.000591156492855 39 | 24576 5.35877556583e-05 40 | 28672 1.96489499362e-05 41 | 32768 4.83389923471e-05 42 | 40960 1.6742788683e-05 43 | 49152 1.00914152355e-05 44 | 57344 8.35697420142e-06 45 | 65536 0.000132731185374 46 | 81920 8.65979714299e-06 47 | 98304 2.06259982643e-05 48 | 114688 8.53253102303e-06 49 | 131072 1.06447461919e-05 50 | 163840 7.04806144037e-05 51 | 196608 1.27425412207e-05 52 | 229376 8.70993228116e-06 53 | 262144 4.50411398472e-06 54 | 327680 8.57964799234e-06 55 | 393216 2.33304452325e-06 56 | 458752 1.8239800434e-06 57 | 524288 1.64054244757e-06 58 | 655360 2.64223915462e-05 59 | 786432 9.47537343745e-07 60 | 917504 1.43111506439e-06 61 | 1048576 2.09075263478e-06 62 | 1310720 1.13030423539e-06 63 | 1572864 1.17004345862e-06 64 | 1835008 1.46397956968e-06 65 | 2097152 1.67910793846e-06 66 | 2621440 2.84244435519e-06 67 | 3145728 1.63635054638e-06 68 | 3670016 2.23411565529e-06 69 | 4194304 6.9568792058e-07 70 | 5242880 1.13181331982e-07 71 | 6291456 5.8686616583e-08 72 | 7340032 3.13554208601e-08 73 | 8388608 3.32836954049e-07 74 | 10485760 4.34280962714e-08 75 | 12582912 1.8947393354e-08 76 | 14680064 5.19795746878e-09 77 | 16777216 2.01211256856e-09 78 | 20971520 5.0302814214e-10 79 | 25165824 1.1904999364e-08 80 | 29360128 3.60168149772e-07 81 | 33554432 3.3535209476e-10 82 | 41943040 5.12418000794e-07 83 | 50331648 1.1904999364e-08 84 | 58720256 6.7070418952e-10 85 | 67108864 6.7070418952e-10 86 | 100663296 1.1904999364e-08 87 | 134217728 3.3535209476e-10 88 | 167772160 5.0302814214e-10 89 | 268435456 3.3535209476e-10 90 | 335544320 3.3535209476e-10 91 | 536870912 3.3535209476e-10 92 | -------------------------------------------------------------------------------- /stress_test/distributions/adindexer.txt: -------------------------------------------------------------------------------- 1 | 8 0.209085823124 2 | 16 0.029586297063 3 | 32 0.178274773997 4 | 48 0.241066858147 5 | 64 0.0177491726159 6 | 80 0.00316715233976 7 | 96 0.00172374180903 8 | 112 0.0242949424404 9 | 128 0.042802062118 10 | 160 0.017090771598 11 | 192 0.000478063296447 12 | 224 0.000410013192658 13 | 256 0.0151998573151 14 | 320 0.0839238412055 15 | 384 0.00558095305553 16 | 448 0.000161538552304 17 | 512 0.0249588592745 18 | 640 0.0142109771606 19 | 768 0.000120852257388 20 | 896 0.000232773650461 21 | 1024 0.0837722607808 22 | 1280 0.00493321069493 23 | 1536 6.13240332779e-05 24 | 1792 0.000189652124801 25 | 2048 0.000228215920232 26 | 2560 6.82940023995e-05 27 | 3072 8.79778177058e-05 28 | 3584 2.65205712767e-05 29 | 4096 0.000206021089145 30 | 5120 1.30274847089e-05 31 | 6144 9.15062452871e-06 32 | 7168 7.98717709207e-06 33 | 8192 3.07972326457e-05 34 | 10240 6.86707906904e-06 35 | 12288 3.15464911243e-05 36 | 14336 3.19897225942e-06 37 | 16384 2.24374225231e-05 38 | 20480 6.18179653499e-05 39 | 24576 4.39363122955e-06 40 | 28672 2.21741245744e-06 41 | 32768 8.47108491682e-06 42 | 40960 1.73420518899e-05 43 | 49152 1.68619271135e-06 44 | 57344 6.30948211536e-07 45 | 65536 5.22531806653e-06 46 | 81920 1.55959850997e-06 47 | 98304 4.56552308675e-05 48 | 114688 5.5086722641e-07 49 | 131072 3.75851667986e-06 50 | 163840 2.80804434068e-06 51 | 196608 7.63295083494e-07 52 | 229376 5.44202857804e-07 53 | 262144 2.64950858272e-06 54 | 327680 1.04873189625e-06 55 | 393216 7.00811088945e-07 56 | 458752 1.19235480013e-06 57 | 524288 9.51394007181e-07 58 | 655360 4.59595507965e-07 59 | 786432 3.07117058434e-07 60 | 917504 1.508720664e-06 61 | 1048576 3.31441560735e-07 62 | 1310720 5.43033048422e-07 63 | 1572864 8.03828535503e-07 64 | 1835008 4.84449526256e-07 65 | 2097152 5.57797460785e-07 66 | 2621440 8.81598703988e-07 67 | 3145728 4.43335600651e-07 68 | 3670016 2.0883202256e-07 69 | 4194304 3.99407928993e-07 70 | 5242880 5.07401496526e-07 71 | 6291456 8.35394556684e-08 72 | 7340032 2.007491493e-06 73 | 8388608 4.8193155587e-07 74 | 10485760 1.79530291511e-07 75 | 12582912 4.34723765289e-08 76 | 14680064 1.64165908729e-06 77 | 16777216 1.51244389254e-08 78 | 20971520 2.98810968705e-08 79 | 25165824 2.87068563914e-08 80 | 29360128 1.63484849252e-06 81 | 33554432 1.2679581626e-08 82 | 41943040 2.08128586047e-08 83 | 50331648 1.92376039243e-08 84 | 58720256 1.62509232657e-06 85 | 67108864 1.57868877994e-08 86 | 83886080 1.5264018454e-08 87 | 100663296 1.15518676565e-08 88 | 117440512 1.61000001309e-06 89 | 134217728 4.67702198366e-09 90 | 167772160 7.21160898002e-10 91 | 201326592 1.48884830555e-09 92 | 234881024 4.34730411933e-07 93 | 268435456 5.50563696324e-10 94 | 335544320 3.93259783089e-10 95 | 402653184 5.0957605696e-10 96 | 469762048 1.80811986072e-07 97 | 536870912 4.29816326305e-10 98 | 671088640 3.48948821614e-10 99 | 805306368 2.20447033337e-10 100 | 939524096 1.08836583574e-07 101 | 1073741824 2.52023024484e-07 102 | 1342177280 2.65145715725e-07 103 | 1610612736 6.42508941384e-11 104 | 1879048192 5.49876876421e-08 105 | -------------------------------------------------------------------------------- /stress_test/distributions/multifeed.txt: -------------------------------------------------------------------------------- 1 | 8 0.152825841152 2 | 16 0.0624298739386 3 | 32 0.32177883898 4 | 48 0.140391266756 5 | 64 0.0892268301422 6 | 80 0.0176315647257 7 | 96 0.0155377318314 8 | 112 0.0427224149156 9 | 128 0.0184088020922 10 | 160 0.0206062740843 11 | 192 0.0291457125272 12 | 224 0.00786826861338 13 | 256 0.00732744710948 14 | 320 0.0117055888817 15 | 384 0.00516131905986 16 | 448 0.00477161311241 17 | 512 0.00656374951823 18 | 640 0.00290697088989 19 | 768 0.00171603025145 20 | 896 0.00366906821009 21 | 1024 0.00210134987502 22 | 1280 0.00295046170314 23 | 1536 0.00293633060676 24 | 1792 0.00689489817897 25 | 2048 0.00122428584786 26 | 2560 0.00265963158262 27 | 3072 0.00298039380778 28 | 3584 0.000856269037326 29 | 4096 0.00143912181768 30 | 5120 0.000835374998611 31 | 6144 0.00258876081079 32 | 7168 0.00154101997597 33 | 8192 0.000295868506902 34 | 10240 0.00163702411359 35 | 12288 0.000278552085022 36 | 14336 0.000362782132825 37 | 16384 0.00174070010633 38 | 20480 0.000662920620457 39 | 24576 0.000116024674674 40 | 28672 3.85176590106e-05 41 | 32768 5.46356467897e-05 42 | 40960 8.18061226398e-05 43 | 49152 3.48428326573e-05 44 | 57344 1.41138300222e-05 45 | 65536 7.18129801279e-05 46 | 81920 1.53603621636e-05 47 | 98304 1.42579182199e-05 48 | 114688 7.0013001868e-06 49 | 131072 8.86293956764e-06 50 | 163840 3.52361848708e-05 51 | 196608 2.87794766288e-05 52 | 229376 4.47638074602e-06 53 | 262144 4.40796598777e-06 54 | 327680 3.83673060359e-06 55 | 393216 2.98595200603e-06 56 | 458752 4.52403903561e-06 57 | 524288 1.31922777485e-06 58 | 655360 4.17891539287e-05 59 | 786432 2.52347685673e-06 60 | 917504 1.38350954372e-06 61 | 1048576 7.23194621828e-06 62 | 1310720 2.80836460687e-06 63 | 1572864 3.0161586559e-06 64 | 1835008 9.1152561694e-07 65 | 2097152 3.15808716293e-07 66 | 2621440 3.23979769108e-06 67 | 3145728 3.79440345733e-07 68 | 3670016 2.79862423466e-07 69 | 4194304 2.65001296395e-07 70 | 5242880 2.3036700168e-07 71 | 6291456 9.68065983079e-08 72 | 7340032 1.34697728831e-07 73 | 8388608 7.00364794353e-10 74 | 10485760 5.18995425297e-10 75 | 12582912 8.66275912569e-08 76 | 14680064 7.81283435931e-11 77 | 16777216 1.35608482094e-10 78 | 20971520 2.33268911585e-10 79 | 25165824 8.43596370543e-08 80 | 29360128 3.23674566314e-11 81 | 33554432 1.31702064914e-10 82 | 41943040 1.1719251539e-10 83 | 50331648 1.31997278441e-07 84 | 58720256 3.34835758256e-12 85 | 67108864 5.58059597094e-12 86 | 83886080 8.59411779525e-11 87 | 100663296 8.25928203699e-11 88 | 117440512 1.11611919419e-12 89 | 16384 0.00174070010633 90 | 20480 0.000662920620457 91 | 24576 0.000116024674674 92 | 28672 3.85176590106e-05 93 | 32768 5.46356467897e-05 94 | 40960 8.18061226398e-05 95 | 49152 3.48428326573e-05 96 | 57344 1.41138300222e-05 97 | 65536 7.18129801279e-05 98 | 81920 1.53603621636e-05 99 | 98304 1.42579182199e-05 100 | 114688 7.0013001868e-06 101 | 131072 8.86293956764e-06 102 | 163840 3.52361848708e-05 103 | 196608 2.87794766288e-05 104 | 229376 4.47638074602e-06 105 | 262144 4.40796598777e-06 106 | 327680 3.83673060359e-06 107 | 393216 2.98595200603e-06 108 | 458752 4.52403903561e-06 109 | 524288 1.31922777485e-06 110 | 655360 4.17891539287e-05 111 | 786432 2.52347685673e-06 112 | 917504 1.38350954372e-06 113 | 1048576 7.23194621828e-06 114 | 1310720 2.80836460687e-06 115 | 1572864 3.0161586559e-06 116 | 1835008 9.1152561694e-07 117 | 2097152 3.15808716293e-07 118 | 2621440 3.23979769108e-06 119 | 3145728 3.79440345733e-07 120 | 3670016 2.79862423466e-07 121 | 4194304 2.65001296395e-07 122 | 5242880 2.3036700168e-07 123 | 6291456 9.68065983079e-08 124 | 7340032 1.34697728831e-07 125 | 8388608 7.00364794353e-10 126 | 10485760 5.18995425297e-10 127 | 12582912 8.66275912569e-08 128 | 14680064 7.81283435931e-11 129 | 16777216 1.35608482094e-10 130 | 20971520 2.33268911585e-10 131 | 25165824 8.43596370543e-08 132 | 29360128 3.23674566314e-11 133 | 33554432 1.31702064914e-10 134 | 41943040 1.1719251539e-10 135 | 50331648 1.31997278441e-07 136 | 58720256 3.34835758256e-12 137 | 67108864 5.58059597094e-12 138 | 83886080 8.59411779525e-11 139 | 100663296 8.25928203699e-11 140 | 117440512 1.11611919419e-12 141 | 167772160 8.48250587583e-11 142 | 335544320 8.48250587583e-11 143 | -------------------------------------------------------------------------------- /summarize-internal-frag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # See summarize.py for the input requirement and how to run the script. 4 | 5 | import json 6 | import sys 7 | 8 | def main(contents): 9 | bins = contents['jemalloc']['arenas']['bin'] 10 | lextents = contents['jemalloc']['arenas']['lextent'] 11 | 12 | binstats = contents['jemalloc']['stats.arenas']['merged']['bins'] 13 | lextentstats = contents['jemalloc']['stats.arenas']['merged']['lextents'] 14 | 15 | assert(len(bins) == len(binstats)) 16 | assert(len(lextents) == len(lextentstats)) 17 | 18 | nbins = len(bins) 19 | nsizes = nbins + len(lextents) 20 | bin_live_c_sum = 0 21 | 22 | print( "%4s" "%14s" "%14s" 23 | "%16s" "%12s" "%12s" "%18s" 24 | "%16s" "%12s" "%12s" "%18s" % 25 | ("ind", "size", "total", 26 | "live_requested", "live_count", "live_frag", "total_live_frag", 27 | "accum_requested", "accum_count", "accum_frag", "total_accum_frag")) 28 | 29 | for ind in range(nsizes): 30 | if ind < nbins: 31 | meta = bins[ind] 32 | stats = binstats[ind] 33 | count = stats['curregs'] 34 | else: 35 | ilextent = ind - nbins 36 | meta = lextents[ilextent] 37 | stats = lextentstats[ilextent] 38 | count = stats['curlextents'] 39 | 40 | live_r = stats['prof_live_requested'] 41 | live_c = stats['prof_live_count'] 42 | accum_r = stats['prof_accum_requested'] 43 | accum_c = stats['prof_accum_count'] 44 | 45 | if ind < nbins: 46 | count += live_c 47 | bin_live_c_sum += live_c 48 | elif ind == nbins: 49 | count -= bin_live_c_sum 50 | 51 | if not count: 52 | continue 53 | 54 | size = meta['size'] 55 | total = count * size 56 | 57 | live_frag = 0.0 if live_c == 0 else 1.0 - live_r / (live_c * size) 58 | total_live_frag = int(total * live_frag) 59 | 60 | accum_frag = 0.0 if accum_c == 0 else 1.0 - accum_r / (accum_c * size) 61 | total_accum_frag = int(total * accum_frag) 62 | 63 | print( "%4d" "%14d" "%14d" "%16d" "%12d" "%12.4f" "%18d" 64 | "%16d" "%12d" "%12.4f" "%18d" % 65 | (ind, size, total, live_r, live_c, live_frag, total_live_frag, 66 | accum_r, accum_c, accum_frag, total_accum_frag)) 67 | 68 | if __name__ == "__main__": 69 | if len(sys.argv) != 2: 70 | print("Usage:", sys.argv[0], "my_stats.json") 71 | sys.exit(1) 72 | with open(sys.argv[1]) as f: 73 | contents = json.load(f) 74 | main(contents) 75 | -------------------------------------------------------------------------------- /summarize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # First, export stats to a file with at least the options JM. We only need 4 | # merged arena stats, so if you expose stats output on some webserver status 5 | # path, the right command is something like: 6 | # 7 | # wget -O my_stats.json server:port/pprof/mallocstats?opts=JMa 8 | # summarize.py my_stats.json 9 | 10 | import json 11 | import sys 12 | 13 | def human_size_str(size): 14 | strings = ["bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"] 15 | ind = 0 16 | while size >= 1024 and ind < len(strings): 17 | ind += 1 18 | size /= 1024 19 | return "{:0.1f} {}".format(size, strings[ind]) 20 | 21 | def avg_size_str(nbytes, n): 22 | if n == 0: 23 | return "n/a" 24 | return human_size_str(nbytes / n) 25 | 26 | def waste_in_bin(binstat, bin): 27 | # *Live* objects 28 | curregs = binstat['curregs'] 29 | nslabs = binstat['curslabs'] 30 | # *all objects* 31 | nregs = nslabs * bin['nregs'] 32 | waste = (nregs - curregs) * bin['size'] 33 | return waste 34 | 35 | def live_in_bin(binstat, bin): 36 | # *Live* objects 37 | curregs = binstat['curregs'] 38 | live = curregs * bin['size'] 39 | return live 40 | 41 | def main(contents): 42 | # Bins 43 | print("-----------------------------------------------------") 44 | print("BINS") 45 | print("-----------------------------------------------------") 46 | bins = contents['jemalloc']['arenas']['bin'] 47 | binstats = contents['jemalloc']['stats.arenas']['merged']['bins'] 48 | sum_bin_waste = 0 49 | sum_bin_live = 0 50 | for i in range(len(binstats)): 51 | binstat = binstats[i] 52 | bin = bins[i] 53 | waste = waste_in_bin(binstat, bin) 54 | live = live_in_bin(binstat, bin) 55 | print("In bin {} ({} bytes), wasted memory is {}, live is {}".format(i, 56 | bin['size'], human_size_str(waste), human_size_str(live))) 57 | sum_bin_waste += waste 58 | sum_bin_live += live 59 | 60 | # Extents 61 | print("-----------------------------------------------------") 62 | print("EXTENTS") 63 | print("-----------------------------------------------------") 64 | sum_extent_waste = 0 65 | extents = contents['jemalloc']['stats.arenas']['merged']['extents'] 66 | sum_allocated = 0 67 | sum_dirty = 0 68 | sum_muzzy = 0 69 | for i, extent in enumerate(extents): 70 | dirty = extent['dirty_bytes'] 71 | ndirty = extent['ndirty'] 72 | muzzy = extent['muzzy_bytes'] 73 | nmuzzy = extent['nmuzzy'] 74 | sum_dirty += dirty 75 | sum_muzzy += muzzy 76 | if ndirty == 0 and nmuzzy == 0: 77 | continue 78 | print("In pszind {}, dirty: {}, muzzy: {}. avgdirty: {}. avgmuzzy: {}" 79 | .format(i, human_size_str(dirty), human_size_str(muzzy), 80 | avg_size_str(dirty, ndirty), avg_size_str(muzzy, nmuzzy))) 81 | # One-offs 82 | tcache_bytes = contents['jemalloc']['stats.arenas']['merged']['tcache_bytes'] 83 | metadata_bytes = contents['jemalloc']['stats']['metadata'] 84 | user_bytes = contents['jemalloc']['stats']['allocated'] 85 | 86 | print("-----------------------------------------------------") 87 | print("TOTALS") 88 | print("-----------------------------------------------------") 89 | print("Total waste across bins: {}".format(human_size_str(sum_bin_waste))) 90 | print("Total waste across extents: {} dirty, {} muzzy".format( 91 | human_size_str(sum_dirty), human_size_str(sum_muzzy))) 92 | print("Total waste in tcaches: {}".format(human_size_str(tcache_bytes))) 93 | print("Total waste in metadata: {}".format(human_size_str(metadata_bytes))) 94 | print("Total user bin bytes: {}".format(human_size_str(sum_bin_live))) 95 | print("Total user bytes: {}".format(human_size_str(user_bytes))) 96 | print("Waste / user bytes ratio: {:0.3f}".format((sum_bin_waste 97 | + sum_dirty + tcache_bytes + metadata_bytes)/user_bytes)) 98 | 99 | if __name__ == "__main__": 100 | if len(sys.argv) != 2: 101 | print("Usage:", sys.argv[0], "my_stats.json") 102 | sys.exit(1) 103 | with open(sys.argv[1]) as f: 104 | contents = json.load(f) 105 | main(contents) 106 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace util { 4 | 5 | inline void compilerBarrier() { 6 | asm volatile("" : : : "memory"); 7 | } 8 | 9 | inline std::uint64_t rdtsc() noexcept { 10 | std::uint32_t lo, hi; 11 | asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); 12 | return ((uint64_t)lo) | ((uint64_t)hi << 32); 13 | } 14 | 15 | // begin and end must be a multiple of 64. 16 | inline void flushCache(void* beginv, std::size_t size) { 17 | char* begin = static_cast(beginv); 18 | char* end = begin + size; 19 | 20 | for (char* ptr = begin; ptr != end; ptr += 64) { 21 | __builtin_ia32_clflush(static_cast(ptr)); 22 | } 23 | } 24 | 25 | // Returns time to execute func, in cycles. 26 | template 27 | std::uint64_t runTimed(Func func) { 28 | std::uint64_t begin = rdtsc(); 29 | compilerBarrier(); 30 | func(); 31 | compilerBarrier(); 32 | std::uint64_t end = rdtsc(); 33 | return end - begin; 34 | } 35 | 36 | } 37 | --------------------------------------------------------------------------------